chore: sprint cleanup — language unification, parser unification, fossils removal
Several weeks of work accumulated without being committed. Grouped here for clarity; see CHANGELOG.md [Unreleased] for the user-facing summary. Highlights ---------- P1 #2 — ISO 639-2/B canonical migration - New Language VO + LanguageRegistry (alfred/domain/shared/knowledge/). - iso_languages.yaml as single source of truth for language codes. - SubtitleKnowledgeBase now delegates lookup to LanguageRegistry; subtitles.yaml only declares subtitle-specific tokens (vostfr, vf, vff, …). - SubtitlePreferences default → ["fre", "eng"]; subtitle filenames written as {iso639_2b}.srt (legacy fr.srt still read via alias). - Scanner: dropped _LANG_KEYWORDS / _SDH_TOKENS / _FORCED_TOKENS / SUBTITLE_EXTENSIONS hardcoded dicts. - Fixed: 'hi' token no longer marks SDH (conflicted with Hindi alias). - Added settings.min_movie_size_bytes (was a module constant). P1 #3 — Release parser unification + data-driven tokenizer - parse_release() is now the single source of truth for release-name parsing. - alfred/knowledge/release/separators.yaml declares the token separators used by the tokenizer (., space, [, ], (, ), _). New conventions can be added without code changes. - Tokenizer now splits on any configured separator instead of name.split('.'). Releases like 'The Father (2020) [1080p] [WEBRip] [5.1] [YTS.MX]' parse via the direct path without sanitization fallback. - Site-tag extraction always runs first; well-formedness only rejects truly forbidden chars. - _parse_season_episode() extended with NxNN / NxNNxNN alt forms. - Removed dead helpers: _sanitize, _normalize. Domain cleanup - Deleted fossil services with zero production callers: alfred/domain/movies/services.py alfred/domain/tv_shows/services.py alfred/domain/subtitles/services.py (replaced by subtitles/services/ package) alfred/domain/subtitles/repositories.py - Split monolithic subtitle services into a package (identifier, matcher, placer, pattern_detector, utils) + dedicated knowledge/ package. - MediaInfo split into dedicated package (alfred/domain/shared/media/: audio, video, subtitle, info, matching). Persistence cleanup - Removed dead JSON repositories (movie/subtitle/tvshow_repository.py). Tests - Major expansion of the test suite organized to mirror the source tree. - Removed obsolete *_edge_cases test files superseded by structured tests. - Suite: 990 passed, 8 skipped. Misc - .gitignore: exclude env_backup/ and *.bak. - Adjustments across agent/llm, app.py, application/filesystem, and infrastructure/filesystem to align with the new domain layout.
This commit is contained in:
@@ -0,0 +1,190 @@
|
||||
"""Tests for ``alfred.domain.subtitles.services.pattern_detector.PatternDetector``.
|
||||
|
||||
The detector inspects a release folder and returns the best-matching known
|
||||
pattern + a confidence score.
|
||||
|
||||
Coverage:
|
||||
|
||||
- ``TestEmbeddedDetection`` — ffprobe is mocked; ``embedded`` pattern wins
|
||||
when no external subs and ffprobe reports tracks.
|
||||
- ``TestAdjacentDetection`` — .srt next to the video → ``adjacent``.
|
||||
- ``TestFlatSubsFolder`` — ``Subs/*.srt`` → ``subs_flat``.
|
||||
- ``TestEpisodeSubfolder`` — ``Subs/{ep}/*.srt`` → ``episode_subfolder``.
|
||||
- ``TestNothingFound`` — empty release returns no pattern.
|
||||
- ``TestDescribe`` — human-readable description mentions the right cues.
|
||||
|
||||
Uses the real ``SubtitleKnowledgeBase`` (loaded from the live builtin
|
||||
``patterns/`` folder) since rebuilding all four patterns by hand would
|
||||
just duplicate fixture state.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
|
||||
from alfred.domain.subtitles.knowledge.base import SubtitleKnowledgeBase
|
||||
from alfred.domain.subtitles.services.pattern_detector import PatternDetector
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def kb():
|
||||
return SubtitleKnowledgeBase()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def detector(kb):
|
||||
return PatternDetector(kb)
|
||||
|
||||
|
||||
def _make_video(folder: Path, name: str = "Show.S01E01.mkv") -> Path:
|
||||
v = folder / name
|
||||
v.write_bytes(b"")
|
||||
return v
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------- #
|
||||
# Embedded #
|
||||
# --------------------------------------------------------------------------- #
|
||||
|
||||
|
||||
class TestEmbeddedDetection:
|
||||
def test_embedded_only(self, detector, tmp_path):
|
||||
# Folder has video but no external .srt files anywhere.
|
||||
video = _make_video(tmp_path)
|
||||
with patch.object(
|
||||
PatternDetector, "_has_embedded_subtitles", return_value=True
|
||||
):
|
||||
result = detector.detect(tmp_path, video)
|
||||
assert result["detected"] is not None
|
||||
assert result["detected"].id == "embedded"
|
||||
assert result["confidence"] > 0
|
||||
assert "embedded" in result["description"].lower()
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------- #
|
||||
# Adjacent #
|
||||
# --------------------------------------------------------------------------- #
|
||||
|
||||
|
||||
class TestAdjacentDetection:
|
||||
def test_srt_next_to_video(self, detector, tmp_path):
|
||||
video = _make_video(tmp_path)
|
||||
(tmp_path / "Show.S01E01.English.srt").write_text("")
|
||||
(tmp_path / "Show.S01E01.French.srt").write_text("")
|
||||
with patch.object(
|
||||
PatternDetector, "_has_embedded_subtitles", return_value=False
|
||||
):
|
||||
result = detector.detect(tmp_path, video)
|
||||
assert result["detected"] is not None
|
||||
assert result["detected"].id == "adjacent"
|
||||
assert "adjacent" in result["description"]
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------- #
|
||||
# Subs flat folder #
|
||||
# --------------------------------------------------------------------------- #
|
||||
|
||||
|
||||
class TestFlatSubsFolder:
|
||||
def test_flat_subs_folder_adjacent_to_video(self, detector, tmp_path):
|
||||
video = _make_video(tmp_path)
|
||||
subs = tmp_path / "Subs"
|
||||
subs.mkdir()
|
||||
(subs / "Show.S01E01.English.srt").write_text("")
|
||||
(subs / "Show.S01E01.French.srt").write_text("")
|
||||
with patch.object(
|
||||
PatternDetector, "_has_embedded_subtitles", return_value=False
|
||||
):
|
||||
result = detector.detect(tmp_path, video)
|
||||
assert result["detected"] is not None
|
||||
assert result["detected"].id == "subs_flat"
|
||||
assert "flat" in result["description"]
|
||||
|
||||
def test_flat_subs_folder_at_release_root(self, detector, tmp_path):
|
||||
# Sample video lives one level deep; Subs/ is at the release root.
|
||||
season_dir = tmp_path / "Season.01"
|
||||
season_dir.mkdir()
|
||||
video = _make_video(season_dir)
|
||||
subs = tmp_path / "Subs"
|
||||
subs.mkdir()
|
||||
(subs / "ep01.English.srt").write_text("")
|
||||
with patch.object(
|
||||
PatternDetector, "_has_embedded_subtitles", return_value=False
|
||||
):
|
||||
result = detector.detect(tmp_path, video)
|
||||
assert result["detected"] is not None
|
||||
assert result["detected"].id == "subs_flat"
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------- #
|
||||
# Episode subfolder #
|
||||
# --------------------------------------------------------------------------- #
|
||||
|
||||
|
||||
class TestEpisodeSubfolder:
|
||||
def test_per_episode_subfolder(self, detector, tmp_path):
|
||||
video = _make_video(tmp_path, name="Show.S01E01.mkv")
|
||||
subs = tmp_path / "Subs" / "Show.S01E01"
|
||||
subs.mkdir(parents=True)
|
||||
(subs / "2_English.srt").write_text("")
|
||||
(subs / "3_French.srt").write_text("")
|
||||
with patch.object(
|
||||
PatternDetector, "_has_embedded_subtitles", return_value=False
|
||||
):
|
||||
result = detector.detect(tmp_path, video)
|
||||
assert result["detected"] is not None
|
||||
assert result["detected"].id == "episode_subfolder"
|
||||
desc = result["description"]
|
||||
assert "episode_subfolder" in desc
|
||||
# Numeric-prefix cue should be reported.
|
||||
assert "numeric prefix" in desc
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------- #
|
||||
# Nothing #
|
||||
# --------------------------------------------------------------------------- #
|
||||
|
||||
|
||||
class TestNothingFound:
|
||||
def test_empty_release_no_pattern(self, detector, tmp_path):
|
||||
video = _make_video(tmp_path)
|
||||
with patch.object(
|
||||
PatternDetector, "_has_embedded_subtitles", return_value=False
|
||||
):
|
||||
result = detector.detect(tmp_path, video)
|
||||
# No external subs and no embedded → adjacent strategy still scores
|
||||
# 0.5 (no Subs folder bonus). Best pattern may exist or not depending
|
||||
# on threshold (0.4). Either way the description must reflect emptiness.
|
||||
assert "no external subtitle files" in result["description"]
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------- #
|
||||
# Describe #
|
||||
# --------------------------------------------------------------------------- #
|
||||
|
||||
|
||||
class TestDescribe:
|
||||
def test_describe_includes_language_token_cue(self, detector, tmp_path):
|
||||
video = _make_video(tmp_path)
|
||||
subs = tmp_path / "Subs"
|
||||
subs.mkdir()
|
||||
(subs / "ep01.English.srt").write_text("")
|
||||
with patch.object(
|
||||
PatternDetector, "_has_embedded_subtitles", return_value=False
|
||||
):
|
||||
result = detector.detect(tmp_path, video)
|
||||
assert "language tokens" in result["description"]
|
||||
|
||||
def test_describe_combines_external_and_embedded(self, detector, tmp_path):
|
||||
video = _make_video(tmp_path)
|
||||
(tmp_path / "Show.S01E01.English.srt").write_text("")
|
||||
with patch.object(
|
||||
PatternDetector, "_has_embedded_subtitles", return_value=True
|
||||
):
|
||||
result = detector.detect(tmp_path, video)
|
||||
desc = result["description"]
|
||||
assert "adjacent" in desc
|
||||
assert "embedded" in desc.lower()
|
||||
Reference in New Issue
Block a user