refactor(subtitles): inject MediaProber/FilesystemScanner ports into domain services

Domain services no longer call subprocess or pathlib directly. Introduces
two Protocol ports in domain/shared/ports/:

  MediaProber.list_subtitle_streams(video) -> list[SubtitleStreamInfo]
  FilesystemScanner.scan_dir / stat / read_text  -> list[FileEntry] | ...

Concrete adapters live in infrastructure/:

  FfprobeMediaProber          (wraps subprocess + ffprobe + JSON)
  PathlibFilesystemScanner    (wraps pathlib + os reads)

SubtitleIdentifier and PatternDetector now take (kb, prober, scanner) at
construction time. Their internals work over FileEntry snapshots and
SubtitleStreamInfo records — no more ad-hoc Path.is_file/iterdir/stat or
embedded subprocess.run loops. _count_entries now takes raw SRT text
(returned by scanner.read_text) so SRT-only entry counting stays out of
the FS layer.

manage_subtitles use case instantiates the two adapters once and injects
them into both services. Tests pass real adapters and patch
`alfred.infrastructure.probe.ffprobe_prober.subprocess.run` for the
ffprobe-failure cases. _classify_single tests build FileEntry via a
small helper.

Domain is now free of subprocess / direct filesystem reads in the
subtitle pipeline. The only remaining I/O hooks are FilePath VO
convenience methods (exists/is_file/is_dir) which stay as a deliberate
affordance on the value object.
This commit is contained in:
2026-05-19 14:52:24 +02:00
parent ced72547f7
commit e6ee700825
11 changed files with 432 additions and 211 deletions
+31 -20
View File
@@ -22,8 +22,8 @@ from unittest.mock import patch
import pytest
from alfred.domain.shared.ports import FileEntry
from alfred.domain.subtitles.entities import SubtitleCandidate
from alfred.infrastructure.knowledge.subtitles.base import SubtitleKnowledgeBase
from alfred.domain.subtitles.services.identifier import (
SubtitleIdentifier,
_count_entries,
@@ -37,6 +37,19 @@ from alfred.domain.subtitles.value_objects import (
SubtitleType,
TypeDetectionMethod,
)
from alfred.infrastructure.filesystem.scanner import PathlibFilesystemScanner
from alfred.infrastructure.knowledge.subtitles.base import SubtitleKnowledgeBase
from alfred.infrastructure.probe.ffprobe_prober import FfprobeMediaProber
def _file_entry(path) -> FileEntry:
"""Helper: build a FileEntry from a real tmp_path Path."""
return FileEntry(
path=path,
is_file=path.is_file(),
is_dir=path.is_dir(),
size_kb=(path.stat().st_size / 1024) if path.is_file() else None,
)
@pytest.fixture(scope="module")
@@ -46,7 +59,7 @@ def kb():
@pytest.fixture
def identifier(kb):
return SubtitleIdentifier(kb)
return SubtitleIdentifier(kb, FfprobeMediaProber(), PathlibFilesystemScanner())
def _pattern(
@@ -103,23 +116,19 @@ class TestTokenize:
class TestCountEntries:
def test_last_cue_number(self, tmp_path):
srt = tmp_path / "x.srt"
srt.write_text(
def test_last_cue_number(self):
text = (
"1\n00:00:01,000 --> 00:00:02,000\nHello\n\n"
"2\n00:00:03,000 --> 00:00:04,000\nWorld\n\n"
"42\n00:00:05,000 --> 00:00:06,000\nLast\n",
encoding="utf-8",
"42\n00:00:05,000 --> 00:00:06,000\nLast\n"
)
assert _count_entries(srt) == 42
assert _count_entries(text) == 42
def test_missing_file_returns_zero(self, tmp_path):
assert _count_entries(tmp_path / "nope.srt") == 0
def test_missing_file_returns_none(self):
assert _count_entries(None) is None
def test_empty_file_returns_zero(self, tmp_path):
f = tmp_path / "x.srt"
f.write_text("")
assert _count_entries(f) == 0
def test_empty_file_returns_zero(self):
assert _count_entries("") == 0
# --------------------------------------------------------------------------- #
@@ -135,7 +144,7 @@ class TestEmbedded:
video = tmp_path / "v.mkv"
video.write_bytes(b"")
with patch(
"alfred.domain.subtitles.services.identifier.subprocess.run",
"alfred.infrastructure.probe.ffprobe_prober.subprocess.run",
side_effect=FileNotFoundError("no ffprobe"),
):
assert identifier._scan_embedded(video) == []
@@ -156,7 +165,7 @@ class TestEmbedded:
stdout = fake_output
with patch(
"alfred.domain.subtitles.services.identifier.subprocess.run",
"alfred.infrastructure.probe.ffprobe_prober.subprocess.run",
return_value=FakeResult(),
):
tracks = identifier._scan_embedded(video)
@@ -256,7 +265,7 @@ class TestClassify:
def test_classifies_language_and_format(self, identifier, tmp_path):
f = tmp_path / "Show.S01E01.English.srt"
f.write_text("1\n00:00:01,000 --> 00:00:02,000\nHi\n")
track = identifier._classify_single(f)
track = identifier._classify_single(_file_entry(f))
assert track.language.code == "eng"
assert track.format.id == "srt"
assert track.confidence > 0
@@ -265,13 +274,13 @@ class TestClassify:
def test_classifies_type_token(self, identifier, tmp_path):
f = tmp_path / "Show.S01E01.English.sdh.srt"
f.write_text("")
track = identifier._classify_single(f)
track = identifier._classify_single(_file_entry(f))
assert track.subtitle_type == SubtitleType.SDH
def test_unknown_tokens_lower_confidence(self, identifier, tmp_path):
f = tmp_path / "Show.S01E01.gibberish.srt"
f.write_text("")
track = identifier._classify_single(f)
track = identifier._classify_single(_file_entry(f))
# No lang/type recognized → confidence is 0 or very low.
assert track.language is None
assert track.confidence < 0.5
@@ -279,7 +288,9 @@ class TestClassify:
def test_episode_stem_prefix_stripped(self, identifier, tmp_path):
f = tmp_path / "Show.S01E01.English.srt"
f.write_text("")
track = identifier._classify_single(f, episode_stem="Show.S01E01")
track = identifier._classify_single(
_file_entry(f), episode_stem="Show.S01E01"
)
# Only "english" remains as meaningful token → confidence == 1.0
assert track.language.code == "eng"
assert track.confidence == 1.0