"""Tests for ``alfred.domain.subtitles.services.matcher.SubtitleMatcher``. The matcher filters classified subtitle tracks against effective rules, returning ``(matched, unresolved)``. Coverage: - ``TestUnresolved`` — None language or low confidence → unresolved. - ``TestLanguageFilter`` / ``TestFormatFilter`` / ``TestTypeFilter`` — rule-based exclusion. - ``TestEmbeddedTracks`` — embedded tracks are skipped entirely. - ``TestFormatPriority`` — conflict between two same-(lang, type) tracks is resolved by ``format_priority``. - ``TestNoConflict`` — different (lang, type) keys never collide. Uses lightweight, hand-built value objects — no KB dependency. """ from __future__ import annotations import pytest from alfred.domain.subtitles.entities import SubtitleCandidate from alfred.domain.subtitles.services.matcher import SubtitleMatcher from alfred.domain.subtitles.value_objects import ( SubtitleFormat, SubtitleLanguage, SubtitleMatchingRules, SubtitleType, ) SRT = SubtitleFormat(id="srt", extensions=[".srt"]) ASS = SubtitleFormat(id="ass", extensions=[".ass"]) FRA = SubtitleLanguage(code="fra", tokens=["fr"]) ENG = SubtitleLanguage(code="eng", tokens=["en"]) SPA = SubtitleLanguage(code="spa", tokens=["es"]) def _track( lang: SubtitleLanguage | None = FRA, fmt: SubtitleFormat | None = SRT, stype: SubtitleType = SubtitleType.STANDARD, confidence: float = 1.0, is_embedded: bool = False, ) -> SubtitleCandidate: return SubtitleCandidate( language=lang, format=fmt, subtitle_type=stype, is_embedded=is_embedded, confidence=confidence, ) @pytest.fixture def matcher(): return SubtitleMatcher() # --------------------------------------------------------------------------- # # Unresolved # # --------------------------------------------------------------------------- # class TestUnresolved: def test_none_language_unresolved(self, matcher): t = _track(lang=None) rules = SubtitleMatchingRules(min_confidence=0.7) matched, unresolved = matcher.match([t], rules) assert matched == [] assert unresolved == [t] def test_low_confidence_unresolved(self, matcher): t = _track(confidence=0.3) rules = SubtitleMatchingRules(min_confidence=0.7) matched, unresolved = matcher.match([t], rules) assert matched == [] assert unresolved == [t] def test_threshold_exact_passes(self, matcher): t = _track(confidence=0.7) rules = SubtitleMatchingRules( min_confidence=0.7, preferred_languages=["fra"] ) matched, unresolved = matcher.match([t], rules) assert matched == [t] # --------------------------------------------------------------------------- # # Filters # # --------------------------------------------------------------------------- # class TestLanguageFilter: def test_preferred_languages_filters_out(self, matcher): t_eng = _track(lang=ENG) rules = SubtitleMatchingRules( preferred_languages=["fra"], min_confidence=0.0 ) matched, _ = matcher.match([t_eng], rules) assert matched == [] def test_preferred_language_match_passes(self, matcher): t_fra = _track(lang=FRA) rules = SubtitleMatchingRules( preferred_languages=["fra"], min_confidence=0.0 ) matched, _ = matcher.match([t_fra], rules) assert matched == [t_fra] def test_empty_preferred_allows_all(self, matcher): t_fra = _track(lang=FRA) t_eng = _track(lang=ENG) rules = SubtitleMatchingRules(min_confidence=0.0) matched, _ = matcher.match([t_fra, t_eng], rules) # No language filter → both pass (different keys → no conflict). assert len(matched) == 2 class TestFormatFilter: def test_format_outside_preferred_filtered(self, matcher): t = _track(fmt=ASS) rules = SubtitleMatchingRules( preferred_formats=["srt"], min_confidence=0.0 ) matched, _ = matcher.match([t], rules) assert matched == [] def test_no_format_attribute_filtered_when_pref_set(self, matcher): t = _track(fmt=None) rules = SubtitleMatchingRules( preferred_formats=["srt"], min_confidence=0.0 ) matched, _ = matcher.match([t], rules) assert matched == [] class TestTypeFilter: def test_disallowed_type_excluded(self, matcher): t = _track(stype=SubtitleType.SDH) rules = SubtitleMatchingRules( allowed_types=["standard", "forced"], min_confidence=0.0 ) matched, _ = matcher.match([t], rules) assert matched == [] def test_allowed_type_passes(self, matcher): t = _track(stype=SubtitleType.STANDARD) rules = SubtitleMatchingRules( allowed_types=["standard"], min_confidence=0.0 ) matched, _ = matcher.match([t], rules) assert matched == [t] # --------------------------------------------------------------------------- # # Embedded handling # # --------------------------------------------------------------------------- # class TestEmbeddedTracks: def test_embedded_track_skipped_entirely(self, matcher): e = _track(is_embedded=True) rules = SubtitleMatchingRules(min_confidence=0.0) matched, unresolved = matcher.match([e], rules) # Embedded tracks are not the matcher's concern. assert matched == [] assert unresolved == [] # --------------------------------------------------------------------------- # # Conflict resolution # # --------------------------------------------------------------------------- # class TestFormatPriority: def test_higher_priority_format_wins(self, matcher): # Same (lang, type) but different formats → priority decides. t_srt = _track(fmt=SRT) t_ass = _track(fmt=ASS) rules = SubtitleMatchingRules( min_confidence=0.0, format_priority=["srt", "ass"], ) matched, _ = matcher.match([t_ass, t_srt], rules) assert len(matched) == 1 assert matched[0].format.id == "srt" def test_first_seen_kept_when_no_priority(self, matcher): t_srt = _track(fmt=SRT) t_ass = _track(fmt=ASS) rules = SubtitleMatchingRules(min_confidence=0.0) matched, _ = matcher.match([t_ass, t_srt], rules) # No priority → ass came first → kept. assert len(matched) == 1 assert matched[0].format.id == "ass" def test_priority_order_reversed(self, matcher): t_srt = _track(fmt=SRT) t_ass = _track(fmt=ASS) rules = SubtitleMatchingRules( min_confidence=0.0, format_priority=["ass", "srt"], ) matched, _ = matcher.match([t_srt, t_ass], rules) assert matched[0].format.id == "ass" class TestNoConflict: def test_different_languages_both_kept(self, matcher): t_fra = _track(lang=FRA) t_eng = _track(lang=ENG) rules = SubtitleMatchingRules(min_confidence=0.0) matched, _ = matcher.match([t_fra, t_eng], rules) assert len(matched) == 2 def test_different_types_both_kept(self, matcher): t_std = _track(stype=SubtitleType.STANDARD) t_sdh = _track(stype=SubtitleType.SDH) rules = SubtitleMatchingRules(min_confidence=0.0) matched, _ = matcher.match([t_std, t_sdh], rules) assert len(matched) == 2