Files
alfred/alfred/domain/subtitles/services/matcher.py
T
francwa 88f156b7a4 refactor(subtitles): rename SubtitleCandidate → SubtitleScanResult
The old name conflated 'might become a placed subtitle' with 'what a
scan pass produced'. The class is the output of a scan/identify pass —
language/format may still be None while classification is in progress,
confidence reflects classifier certainty, raw_tokens holds filename
fragments under analysis. SubtitleScanResult says that directly.

Pure rename + refreshed docstring; no behavior change. Touches the
domain entity, the matcher/identifier/utils services, the
manage_subtitles use case, the placer, the metadata store, the
shared-media cross-ref comment, and 7 test modules.
2026-05-21 08:05:46 +02:00

121 lines
4.0 KiB
Python

"""SubtitleMatcher — filters tracks against resolved rules."""
import logging
from ..entities import SubtitleScanResult
from ..value_objects import SubtitleMatchingRules
logger = logging.getLogger(__name__)
class SubtitleMatcher:
"""
Filters a list of SubtitleScanResult against effective SubtitleMatchingRules.
Returns matched tracks (pass all filters, confidence >= min_confidence)
and unresolved tracks (need user clarification).
Conflict resolution: when two tracks share the same language + type,
format_priority decides which one to keep.
"""
def match(
self,
tracks: list[SubtitleScanResult],
rules: SubtitleMatchingRules,
) -> tuple[list[SubtitleScanResult], list[SubtitleScanResult]]:
"""
Returns (matched, unresolved).
"""
matched: list[SubtitleScanResult] = []
unresolved: list[SubtitleScanResult] = []
for track in tracks:
if track.is_embedded:
continue
if track.language is None or track.confidence < rules.min_confidence:
unresolved.append(track)
continue
if not self._passes_filters(track, rules):
logger.debug(f"SubtitleMatcher: filtered out {track}")
continue
matched.append(track)
matched = self._resolve_conflicts(matched, rules)
logger.info(
f"SubtitleMatcher: {len(matched)} matched, {len(unresolved)} unresolved"
)
return matched, unresolved
def _passes_filters(
self, track: SubtitleScanResult, rules: SubtitleMatchingRules
) -> bool:
# Language filter
if rules.preferred_languages:
if not track.language:
return False
if track.language.code not in rules.preferred_languages:
return False
# Format filter (only for external files)
if rules.preferred_formats and not track.is_embedded:
if not track.format:
return False
if track.format.id not in rules.preferred_formats:
return False
# Type filter
if rules.allowed_types:
if track.subtitle_type.value not in rules.allowed_types:
return False
return True
def _resolve_conflicts(
self,
tracks: list[SubtitleScanResult],
rules: SubtitleMatchingRules,
) -> list[SubtitleScanResult]:
"""
When multiple tracks have same language + type, keep only the best one
according to format_priority. If no format_priority applies, keep the first.
"""
seen: dict[tuple, SubtitleScanResult] = {}
for track in tracks:
lang = track.language.code if track.language else None
stype = track.subtitle_type.value
key = (lang, stype)
if key not in seen:
seen[key] = track
else:
existing = seen[key]
if self._prefer(track, existing, rules.format_priority):
logger.debug(
f"SubtitleMatcher: conflict {key}"
f"preferring {track.format.id if track.format else 'embedded'} "
f"over {existing.format.id if existing.format else 'embedded'}"
)
seen[key] = track
return list(seen.values())
def _prefer(
self,
candidate: SubtitleScanResult,
existing: SubtitleScanResult,
format_priority: list[str],
) -> bool:
"""Return True if candidate is preferable to existing."""
if not format_priority:
return False
c_fmt = candidate.format.id if candidate.format else ""
e_fmt = existing.format.id if existing.format else ""
c_rank = format_priority.index(c_fmt) if c_fmt in format_priority else 999
e_rank = format_priority.index(e_fmt) if e_fmt in format_priority else 999
return c_rank < e_rank