refactor(subtitles): inject MediaProber/FilesystemScanner ports into domain services

Domain services no longer call subprocess or pathlib directly. Introduces two Protocol ports in domain/shared/ports/: MediaProber.list_subtitle_streams(video) -> list[SubtitleStreamInfo] FilesystemScanner.scan_dir / stat / read_text -> list[FileEntry] | ... Concrete adapters live in infrastructure/: FfprobeMediaProber (wraps subprocess + ffprobe + JSON) PathlibFilesystemScanner (wraps pathlib + os reads) SubtitleIdentifier and PatternDetector now take (kb, prober, scanner) at construction time. Their internals work over FileEntry snapshots and SubtitleStreamInfo records — no more ad-hoc Path.is_file/iterdir/stat or embedded subprocess.run loops. _count_entries now takes raw SRT text (returned by scanner.read_text) so SRT-only entry counting stays out of the FS layer. manage_subtitles use case instantiates the two adapters once and injects them into both services. Tests pass real adapters and patch `alfred.infrastructure.probe.ffprobe_prober.subprocess.run` for the ffprobe-failure cases. _classify_single tests build FileEntry via a small helper. Domain is now free of subprocess / direct filesystem reads in the subtitle pipeline. The only remaining I/O hooks are FilePath VO convenience methods (exists/is_file/is_dir) which stay as a deliberate affordance on the value object.
2026-05-19 14:52:24 +02:00
parent ced72547f7
commit e6ee700825
11 changed files with 432 additions and 211 deletions
@@ -22,8 +22,8 @@ from unittest.mock import patch

 import pytest

+from alfred.domain.shared.ports import FileEntry
 from alfred.domain.subtitles.entities import SubtitleCandidate
-from alfred.infrastructure.knowledge.subtitles.base import SubtitleKnowledgeBase
 from alfred.domain.subtitles.services.identifier import (
    SubtitleIdentifier,
    _count_entries,
@@ -37,6 +37,19 @@ from alfred.domain.subtitles.value_objects import (
    SubtitleType,
    TypeDetectionMethod,
 )
+from alfred.infrastructure.filesystem.scanner import PathlibFilesystemScanner
+from alfred.infrastructure.knowledge.subtitles.base import SubtitleKnowledgeBase
+from alfred.infrastructure.probe.ffprobe_prober import FfprobeMediaProber
+
+
+def _file_entry(path) -> FileEntry:
+    """Helper: build a FileEntry from a real tmp_path Path."""
+    return FileEntry(
+        path=path,
+        is_file=path.is_file(),
+        is_dir=path.is_dir(),
+        size_kb=(path.stat().st_size / 1024) if path.is_file() else None,
+    )


@pytest.fixture(scope="module")
@@ -46,7 +59,7 @@ def kb():

@pytest.fixture
 def identifier(kb):
-    return SubtitleIdentifier(kb)
+    return SubtitleIdentifier(kb, FfprobeMediaProber(), PathlibFilesystemScanner())


 def _pattern(
@@ -103,23 +116,19 @@ class TestTokenize:


 class TestCountEntries:
-    def test_last_cue_number(self, tmp_path):
-        srt = tmp_path / "x.srt"
-        srt.write_text(
+    def test_last_cue_number(self):
+        text = (
            "1\n00:00:01,000 --> 00:00:02,000\nHello\n\n"
            "2\n00:00:03,000 --> 00:00:04,000\nWorld\n\n"
-            "42\n00:00:05,000 --> 00:00:06,000\nLast\n",
-            encoding="utf-8",
+            "42\n00:00:05,000 --> 00:00:06,000\nLast\n"
        )
-        assert _count_entries(srt) == 42
+        assert _count_entries(text) == 42

-    def test_missing_file_returns_zero(self, tmp_path):
-        assert _count_entries(tmp_path / "nope.srt") == 0
+    def test_missing_file_returns_none(self):
+        assert _count_entries(None) is None

-    def test_empty_file_returns_zero(self, tmp_path):
-        f = tmp_path / "x.srt"
-        f.write_text("")
-        assert _count_entries(f) == 0
+    def test_empty_file_returns_zero(self):
+        assert _count_entries("") == 0


 # --------------------------------------------------------------------------- #
@@ -135,7 +144,7 @@ class TestEmbedded:
        video = tmp_path / "v.mkv"
        video.write_bytes(b"")
        with patch(
-            "alfred.domain.subtitles.services.identifier.subprocess.run",
+            "alfred.infrastructure.probe.ffprobe_prober.subprocess.run",
            side_effect=FileNotFoundError("no ffprobe"),
        ):
            assert identifier._scan_embedded(video) == []
@@ -156,7 +165,7 @@ class TestEmbedded:
            stdout = fake_output

        with patch(
-            "alfred.domain.subtitles.services.identifier.subprocess.run",
+            "alfred.infrastructure.probe.ffprobe_prober.subprocess.run",
            return_value=FakeResult(),
        ):
            tracks = identifier._scan_embedded(video)
@@ -256,7 +265,7 @@ class TestClassify:
    def test_classifies_language_and_format(self, identifier, tmp_path):
        f = tmp_path / "Show.S01E01.English.srt"
        f.write_text("1\n00:00:01,000 --> 00:00:02,000\nHi\n")
-        track = identifier._classify_single(f)
+        track = identifier._classify_single(_file_entry(f))
        assert track.language.code == "eng"
        assert track.format.id == "srt"
        assert track.confidence > 0
@@ -265,13 +274,13 @@ class TestClassify:
    def test_classifies_type_token(self, identifier, tmp_path):
        f = tmp_path / "Show.S01E01.English.sdh.srt"
        f.write_text("")
-        track = identifier._classify_single(f)
+        track = identifier._classify_single(_file_entry(f))
        assert track.subtitle_type == SubtitleType.SDH

    def test_unknown_tokens_lower_confidence(self, identifier, tmp_path):
        f = tmp_path / "Show.S01E01.gibberish.srt"
        f.write_text("")
-        track = identifier._classify_single(f)
+        track = identifier._classify_single(_file_entry(f))
        # No lang/type recognized → confidence is 0 or very low.
        assert track.language is None
        assert track.confidence < 0.5
@@ -279,7 +288,9 @@ class TestClassify:
    def test_episode_stem_prefix_stripped(self, identifier, tmp_path):
        f = tmp_path / "Show.S01E01.English.srt"
        f.write_text("")
-        track = identifier._classify_single(f, episode_stem="Show.S01E01")
+        track = identifier._classify_single(
+            _file_entry(f), episode_stem="Show.S01E01"
+        )
        # Only "english" remains as meaningful token → confidence == 1.0
        assert track.language.code == "eng"
        assert track.confidence == 1.0