Files
alfred/tests/domain/test_subtitle_pattern_detector.py
francwa e6ee700825 refactor(subtitles): inject MediaProber/FilesystemScanner ports into domain services
Domain services no longer call subprocess or pathlib directly. Introduces
two Protocol ports in domain/shared/ports/:

  MediaProber.list_subtitle_streams(video) -> list[SubtitleStreamInfo]
  FilesystemScanner.scan_dir / stat / read_text  -> list[FileEntry] | ...

Concrete adapters live in infrastructure/:

  FfprobeMediaProber          (wraps subprocess + ffprobe + JSON)
  PathlibFilesystemScanner    (wraps pathlib + os reads)

SubtitleIdentifier and PatternDetector now take (kb, prober, scanner) at
construction time. Their internals work over FileEntry snapshots and
SubtitleStreamInfo records — no more ad-hoc Path.is_file/iterdir/stat or
embedded subprocess.run loops. _count_entries now takes raw SRT text
(returned by scanner.read_text) so SRT-only entry counting stays out of
the FS layer.

manage_subtitles use case instantiates the two adapters once and injects
them into both services. Tests pass real adapters and patch
`alfred.infrastructure.probe.ffprobe_prober.subprocess.run` for the
ffprobe-failure cases. _classify_single tests build FileEntry via a
small helper.

Domain is now free of subprocess / direct filesystem reads in the
subtitle pipeline. The only remaining I/O hooks are FilePath VO
convenience methods (exists/is_file/is_dir) which stay as a deliberate
affordance on the value object.
2026-05-19 14:52:24 +02:00

193 lines
7.5 KiB
Python

"""Tests for ``alfred.domain.subtitles.services.pattern_detector.PatternDetector``.
The detector inspects a release folder and returns the best-matching known
pattern + a confidence score.
Coverage:
- ``TestEmbeddedDetection`` — ffprobe is mocked; ``embedded`` pattern wins
when no external subs and ffprobe reports tracks.
- ``TestAdjacentDetection`` — .srt next to the video → ``adjacent``.
- ``TestFlatSubsFolder`` — ``Subs/*.srt`` → ``subs_flat``.
- ``TestEpisodeSubfolder`` — ``Subs/{ep}/*.srt`` → ``episode_subfolder``.
- ``TestNothingFound`` — empty release returns no pattern.
- ``TestDescribe`` — human-readable description mentions the right cues.
Uses the real ``SubtitleKnowledgeBase`` (loaded from the live builtin
``patterns/`` folder) since rebuilding all four patterns by hand would
just duplicate fixture state.
"""
from __future__ import annotations
from pathlib import Path
from unittest.mock import patch
import pytest
from alfred.domain.subtitles.services.pattern_detector import PatternDetector
from alfred.infrastructure.filesystem.scanner import PathlibFilesystemScanner
from alfred.infrastructure.knowledge.subtitles.base import SubtitleKnowledgeBase
from alfred.infrastructure.probe.ffprobe_prober import FfprobeMediaProber
@pytest.fixture(scope="module")
def kb():
return SubtitleKnowledgeBase()
@pytest.fixture
def detector(kb):
return PatternDetector(kb, FfprobeMediaProber(), PathlibFilesystemScanner())
def _make_video(folder: Path, name: str = "Show.S01E01.mkv") -> Path:
v = folder / name
v.write_bytes(b"")
return v
# --------------------------------------------------------------------------- #
# Embedded #
# --------------------------------------------------------------------------- #
class TestEmbeddedDetection:
def test_embedded_only(self, detector, tmp_path):
# Folder has video but no external .srt files anywhere.
video = _make_video(tmp_path)
with patch.object(
PatternDetector, "_has_embedded_subtitles", return_value=True
):
result = detector.detect(tmp_path, video)
assert result["detected"] is not None
assert result["detected"].id == "embedded"
assert result["confidence"] > 0
assert "embedded" in result["description"].lower()
# --------------------------------------------------------------------------- #
# Adjacent #
# --------------------------------------------------------------------------- #
class TestAdjacentDetection:
def test_srt_next_to_video(self, detector, tmp_path):
video = _make_video(tmp_path)
(tmp_path / "Show.S01E01.English.srt").write_text("")
(tmp_path / "Show.S01E01.French.srt").write_text("")
with patch.object(
PatternDetector, "_has_embedded_subtitles", return_value=False
):
result = detector.detect(tmp_path, video)
assert result["detected"] is not None
assert result["detected"].id == "adjacent"
assert "adjacent" in result["description"]
# --------------------------------------------------------------------------- #
# Subs flat folder #
# --------------------------------------------------------------------------- #
class TestFlatSubsFolder:
def test_flat_subs_folder_adjacent_to_video(self, detector, tmp_path):
video = _make_video(tmp_path)
subs = tmp_path / "Subs"
subs.mkdir()
(subs / "Show.S01E01.English.srt").write_text("")
(subs / "Show.S01E01.French.srt").write_text("")
with patch.object(
PatternDetector, "_has_embedded_subtitles", return_value=False
):
result = detector.detect(tmp_path, video)
assert result["detected"] is not None
assert result["detected"].id == "subs_flat"
assert "flat" in result["description"]
def test_flat_subs_folder_at_release_root(self, detector, tmp_path):
# Sample video lives one level deep; Subs/ is at the release root.
season_dir = tmp_path / "Season.01"
season_dir.mkdir()
video = _make_video(season_dir)
subs = tmp_path / "Subs"
subs.mkdir()
(subs / "ep01.English.srt").write_text("")
with patch.object(
PatternDetector, "_has_embedded_subtitles", return_value=False
):
result = detector.detect(tmp_path, video)
assert result["detected"] is not None
assert result["detected"].id == "subs_flat"
# --------------------------------------------------------------------------- #
# Episode subfolder #
# --------------------------------------------------------------------------- #
class TestEpisodeSubfolder:
def test_per_episode_subfolder(self, detector, tmp_path):
video = _make_video(tmp_path, name="Show.S01E01.mkv")
subs = tmp_path / "Subs" / "Show.S01E01"
subs.mkdir(parents=True)
(subs / "2_English.srt").write_text("")
(subs / "3_French.srt").write_text("")
with patch.object(
PatternDetector, "_has_embedded_subtitles", return_value=False
):
result = detector.detect(tmp_path, video)
assert result["detected"] is not None
assert result["detected"].id == "episode_subfolder"
desc = result["description"]
assert "episode_subfolder" in desc
# Numeric-prefix cue should be reported.
assert "numeric prefix" in desc
# --------------------------------------------------------------------------- #
# Nothing #
# --------------------------------------------------------------------------- #
class TestNothingFound:
def test_empty_release_no_pattern(self, detector, tmp_path):
video = _make_video(tmp_path)
with patch.object(
PatternDetector, "_has_embedded_subtitles", return_value=False
):
result = detector.detect(tmp_path, video)
# No external subs and no embedded → adjacent strategy still scores
# 0.5 (no Subs folder bonus). Best pattern may exist or not depending
# on threshold (0.4). Either way the description must reflect emptiness.
assert "no external subtitle files" in result["description"]
# --------------------------------------------------------------------------- #
# Describe #
# --------------------------------------------------------------------------- #
class TestDescribe:
def test_describe_includes_language_token_cue(self, detector, tmp_path):
video = _make_video(tmp_path)
subs = tmp_path / "Subs"
subs.mkdir()
(subs / "ep01.English.srt").write_text("")
with patch.object(
PatternDetector, "_has_embedded_subtitles", return_value=False
):
result = detector.detect(tmp_path, video)
assert "language tokens" in result["description"]
def test_describe_combines_external_and_embedded(self, detector, tmp_path):
video = _make_video(tmp_path)
(tmp_path / "Show.S01E01.English.srt").write_text("")
with patch.object(
PatternDetector, "_has_embedded_subtitles", return_value=True
):
result = detector.detect(tmp_path, video)
desc = result["description"]
assert "adjacent" in desc
assert "embedded" in desc.lower()