88f156b7a4
The old name conflated 'might become a placed subtitle' with 'what a scan pass produced'. The class is the output of a scan/identify pass — language/format may still be None while classification is in progress, confidence reflects classifier certainty, raw_tokens holds filename fragments under analysis. SubtitleScanResult says that directly. Pure rename + refreshed docstring; no behavior change. Touches the domain entity, the matcher/identifier/utils services, the manage_subtitles use case, the placer, the metadata store, the shared-media cross-ref comment, and 7 test modules.
209 lines
7.5 KiB
Python
209 lines
7.5 KiB
Python
"""Tests for ``alfred.domain.subtitles.services.matcher.SubtitleMatcher``.
|
|
|
|
The matcher filters classified subtitle tracks against effective rules,
|
|
returning ``(matched, unresolved)``. Coverage:
|
|
|
|
- ``TestUnresolved`` — None language or low confidence → unresolved.
|
|
- ``TestLanguageFilter`` / ``TestFormatFilter`` / ``TestTypeFilter`` —
|
|
rule-based exclusion.
|
|
- ``TestEmbeddedTracks`` — embedded tracks are skipped entirely.
|
|
- ``TestFormatPriority`` — conflict between two same-(lang, type) tracks
|
|
is resolved by ``format_priority``.
|
|
- ``TestNoConflict`` — different (lang, type) keys never collide.
|
|
|
|
Uses lightweight, hand-built value objects — no KB dependency.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import pytest
|
|
|
|
from alfred.domain.subtitles.entities import SubtitleScanResult
|
|
from alfred.domain.subtitles.services.matcher import SubtitleMatcher
|
|
from alfred.domain.subtitles.value_objects import (
|
|
SubtitleFormat,
|
|
SubtitleLanguage,
|
|
SubtitleMatchingRules,
|
|
SubtitleType,
|
|
)
|
|
|
|
SRT = SubtitleFormat(id="srt", extensions=[".srt"])
|
|
ASS = SubtitleFormat(id="ass", extensions=[".ass"])
|
|
FRA = SubtitleLanguage(code="fra", tokens=["fr"])
|
|
ENG = SubtitleLanguage(code="eng", tokens=["en"])
|
|
SPA = SubtitleLanguage(code="spa", tokens=["es"])
|
|
|
|
|
|
def _track(
|
|
lang: SubtitleLanguage | None = FRA,
|
|
fmt: SubtitleFormat | None = SRT,
|
|
stype: SubtitleType = SubtitleType.STANDARD,
|
|
confidence: float = 1.0,
|
|
is_embedded: bool = False,
|
|
) -> SubtitleScanResult:
|
|
return SubtitleScanResult(
|
|
language=lang,
|
|
format=fmt,
|
|
subtitle_type=stype,
|
|
is_embedded=is_embedded,
|
|
confidence=confidence,
|
|
)
|
|
|
|
|
|
@pytest.fixture
|
|
def matcher():
|
|
return SubtitleMatcher()
|
|
|
|
|
|
# --------------------------------------------------------------------------- #
|
|
# Unresolved #
|
|
# --------------------------------------------------------------------------- #
|
|
|
|
|
|
class TestUnresolved:
|
|
def test_none_language_unresolved(self, matcher):
|
|
t = _track(lang=None)
|
|
rules = SubtitleMatchingRules(min_confidence=0.7)
|
|
matched, unresolved = matcher.match([t], rules)
|
|
assert matched == []
|
|
assert unresolved == [t]
|
|
|
|
def test_low_confidence_unresolved(self, matcher):
|
|
t = _track(confidence=0.3)
|
|
rules = SubtitleMatchingRules(min_confidence=0.7)
|
|
matched, unresolved = matcher.match([t], rules)
|
|
assert matched == []
|
|
assert unresolved == [t]
|
|
|
|
def test_threshold_exact_passes(self, matcher):
|
|
t = _track(confidence=0.7)
|
|
rules = SubtitleMatchingRules(min_confidence=0.7, preferred_languages=["fra"])
|
|
matched, unresolved = matcher.match([t], rules)
|
|
assert matched == [t]
|
|
|
|
|
|
# --------------------------------------------------------------------------- #
|
|
# Filters #
|
|
# --------------------------------------------------------------------------- #
|
|
|
|
|
|
class TestLanguageFilter:
|
|
def test_preferred_languages_filters_out(self, matcher):
|
|
t_eng = _track(lang=ENG)
|
|
rules = SubtitleMatchingRules(preferred_languages=["fra"], min_confidence=0.0)
|
|
matched, _ = matcher.match([t_eng], rules)
|
|
assert matched == []
|
|
|
|
def test_preferred_language_match_passes(self, matcher):
|
|
t_fra = _track(lang=FRA)
|
|
rules = SubtitleMatchingRules(preferred_languages=["fra"], min_confidence=0.0)
|
|
matched, _ = matcher.match([t_fra], rules)
|
|
assert matched == [t_fra]
|
|
|
|
def test_empty_preferred_allows_all(self, matcher):
|
|
t_fra = _track(lang=FRA)
|
|
t_eng = _track(lang=ENG)
|
|
rules = SubtitleMatchingRules(min_confidence=0.0)
|
|
matched, _ = matcher.match([t_fra, t_eng], rules)
|
|
# No language filter → both pass (different keys → no conflict).
|
|
assert len(matched) == 2
|
|
|
|
|
|
class TestFormatFilter:
|
|
def test_format_outside_preferred_filtered(self, matcher):
|
|
t = _track(fmt=ASS)
|
|
rules = SubtitleMatchingRules(preferred_formats=["srt"], min_confidence=0.0)
|
|
matched, _ = matcher.match([t], rules)
|
|
assert matched == []
|
|
|
|
def test_no_format_attribute_filtered_when_pref_set(self, matcher):
|
|
t = _track(fmt=None)
|
|
rules = SubtitleMatchingRules(preferred_formats=["srt"], min_confidence=0.0)
|
|
matched, _ = matcher.match([t], rules)
|
|
assert matched == []
|
|
|
|
|
|
class TestTypeFilter:
|
|
def test_disallowed_type_excluded(self, matcher):
|
|
t = _track(stype=SubtitleType.SDH)
|
|
rules = SubtitleMatchingRules(
|
|
allowed_types=["standard", "forced"], min_confidence=0.0
|
|
)
|
|
matched, _ = matcher.match([t], rules)
|
|
assert matched == []
|
|
|
|
def test_allowed_type_passes(self, matcher):
|
|
t = _track(stype=SubtitleType.STANDARD)
|
|
rules = SubtitleMatchingRules(allowed_types=["standard"], min_confidence=0.0)
|
|
matched, _ = matcher.match([t], rules)
|
|
assert matched == [t]
|
|
|
|
|
|
# --------------------------------------------------------------------------- #
|
|
# Embedded handling #
|
|
# --------------------------------------------------------------------------- #
|
|
|
|
|
|
class TestEmbeddedTracks:
|
|
def test_embedded_track_skipped_entirely(self, matcher):
|
|
e = _track(is_embedded=True)
|
|
rules = SubtitleMatchingRules(min_confidence=0.0)
|
|
matched, unresolved = matcher.match([e], rules)
|
|
# Embedded tracks are not the matcher's concern.
|
|
assert matched == []
|
|
assert unresolved == []
|
|
|
|
|
|
# --------------------------------------------------------------------------- #
|
|
# Conflict resolution #
|
|
# --------------------------------------------------------------------------- #
|
|
|
|
|
|
class TestFormatPriority:
|
|
def test_higher_priority_format_wins(self, matcher):
|
|
# Same (lang, type) but different formats → priority decides.
|
|
t_srt = _track(fmt=SRT)
|
|
t_ass = _track(fmt=ASS)
|
|
rules = SubtitleMatchingRules(
|
|
min_confidence=0.0,
|
|
format_priority=["srt", "ass"],
|
|
)
|
|
matched, _ = matcher.match([t_ass, t_srt], rules)
|
|
assert len(matched) == 1
|
|
assert matched[0].format.id == "srt"
|
|
|
|
def test_first_seen_kept_when_no_priority(self, matcher):
|
|
t_srt = _track(fmt=SRT)
|
|
t_ass = _track(fmt=ASS)
|
|
rules = SubtitleMatchingRules(min_confidence=0.0)
|
|
matched, _ = matcher.match([t_ass, t_srt], rules)
|
|
# No priority → ass came first → kept.
|
|
assert len(matched) == 1
|
|
assert matched[0].format.id == "ass"
|
|
|
|
def test_priority_order_reversed(self, matcher):
|
|
t_srt = _track(fmt=SRT)
|
|
t_ass = _track(fmt=ASS)
|
|
rules = SubtitleMatchingRules(
|
|
min_confidence=0.0,
|
|
format_priority=["ass", "srt"],
|
|
)
|
|
matched, _ = matcher.match([t_srt, t_ass], rules)
|
|
assert matched[0].format.id == "ass"
|
|
|
|
|
|
class TestNoConflict:
|
|
def test_different_languages_both_kept(self, matcher):
|
|
t_fra = _track(lang=FRA)
|
|
t_eng = _track(lang=ENG)
|
|
rules = SubtitleMatchingRules(min_confidence=0.0)
|
|
matched, _ = matcher.match([t_fra, t_eng], rules)
|
|
assert len(matched) == 2
|
|
|
|
def test_different_types_both_kept(self, matcher):
|
|
t_std = _track(stype=SubtitleType.STANDARD)
|
|
t_sdh = _track(stype=SubtitleType.SDH)
|
|
rules = SubtitleMatchingRules(min_confidence=0.0)
|
|
matched, _ = matcher.match([t_std, t_sdh], rules)
|
|
assert len(matched) == 2
|