6e252d1e81
aggregates.py used to call SubtitleKnowledgeBase().default_rules() via a DEFAULT_RULES() helper, which silently pulled the infrastructure layer (YAML loader) into the domain on every resolve. Make the dependency explicit: resolve() now takes the default rules as a parameter, and the caller (the ManageSubtitles use case) loads them from the KB once and passes them in. Domain stays I/O-free. - Drop DEFAULT_RULES helper and the SubtitleKnowledgeBase import from alfred/domain/subtitles/aggregates.py - SubtitleRuleSet.resolve(default_rules: SubtitleMatchingRules) - manage_subtitles use case passes kb.default_rules() at the call site - Tests use a local SubtitleMatchingRules stand-in instead of relying on KB defaults
309 lines
10 KiB
Python
309 lines
10 KiB
Python
"""ManageSubtitlesUseCase — orchestrates the full subtitle pipeline for a video file."""
|
|
|
|
import logging
|
|
from pathlib import Path
|
|
|
|
from alfred.domain.shared.value_objects import ImdbId
|
|
from alfred.domain.subtitles.entities import SubtitleCandidate
|
|
from alfred.domain.subtitles.services.identifier import SubtitleIdentifier
|
|
from alfred.domain.subtitles.services.matcher import SubtitleMatcher
|
|
from alfred.domain.subtitles.services.pattern_detector import PatternDetector
|
|
from alfred.application.subtitles.placer import (
|
|
PlacedTrack,
|
|
SubtitlePlacer,
|
|
_build_dest_name,
|
|
)
|
|
from alfred.domain.subtitles.services.utils import available_subtitles
|
|
from alfred.domain.subtitles.value_objects import ScanStrategy
|
|
from alfred.infrastructure.filesystem.scanner import PathlibFilesystemScanner
|
|
from alfred.infrastructure.knowledge.subtitles.base import SubtitleKnowledgeBase
|
|
from alfred.infrastructure.knowledge.subtitles.loader import KnowledgeLoader
|
|
from alfred.infrastructure.persistence.context import get_memory
|
|
from alfred.infrastructure.probe.ffprobe_prober import FfprobeMediaProber
|
|
from alfred.infrastructure.subtitle.metadata_store import SubtitleMetadataStore
|
|
from alfred.infrastructure.subtitle.rule_repository import RuleSetRepository
|
|
|
|
from .dto import (
|
|
AvailableSubtitle,
|
|
ManageSubtitlesResponse,
|
|
PlacedSubtitle,
|
|
UnresolvedTrack,
|
|
)
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
def _infer_library_root(dest_video: Path, media_type: str) -> Path:
|
|
"""
|
|
Infer the media library root folder from the destination video path.
|
|
|
|
TV show: video → Season 01 → The X-Files (3 levels up)
|
|
Movie: video → Inception (2010) (1 level up)
|
|
"""
|
|
if media_type == "tv_show":
|
|
return dest_video.parent.parent
|
|
return dest_video.parent
|
|
|
|
|
|
def _to_imdb_id(raw: str | None) -> ImdbId | None:
|
|
if not raw:
|
|
return None
|
|
try:
|
|
return ImdbId(raw)
|
|
except Exception:
|
|
return None
|
|
|
|
|
|
class ManageSubtitlesUseCase:
|
|
"""
|
|
Full subtitle pipeline:
|
|
|
|
1. Load knowledge base
|
|
2. Detect (or confirm) the release pattern
|
|
3. Identify all tracks (ffprobe + filesystem scan)
|
|
4. Load + resolve rules for this media
|
|
5. Match tracks against rules
|
|
6. If any tracks are unresolved → return needs_clarification (don't place yet)
|
|
7. Place matched tracks via hard-link
|
|
8. Persist to .alfred/metadata.yaml
|
|
|
|
The use case is stateless — all dependencies are instantiated inline.
|
|
"""
|
|
|
|
def execute(
|
|
self,
|
|
source_video: str,
|
|
destination_video: str,
|
|
imdb_id: str | None = None,
|
|
media_type: str = "tv_show",
|
|
release_group: str | None = None,
|
|
season: int | None = None,
|
|
episode: int | None = None,
|
|
confirmed_pattern_id: str | None = None,
|
|
dry_run: bool = False,
|
|
) -> ManageSubtitlesResponse:
|
|
source_path = Path(source_video)
|
|
dest_path = Path(destination_video)
|
|
|
|
if not source_path.exists() and not source_path.parent.exists():
|
|
return ManageSubtitlesResponse(
|
|
status="error",
|
|
error="source_not_found",
|
|
message=f"Source video not found: {source_video}",
|
|
)
|
|
|
|
kb = SubtitleKnowledgeBase(KnowledgeLoader())
|
|
prober = FfprobeMediaProber()
|
|
scanner = PathlibFilesystemScanner()
|
|
library_root = _infer_library_root(dest_path, media_type)
|
|
store = SubtitleMetadataStore(library_root)
|
|
repo = RuleSetRepository(library_root)
|
|
|
|
# --- Pattern resolution ---
|
|
pattern = self._resolve_pattern(
|
|
kb,
|
|
prober,
|
|
scanner,
|
|
store,
|
|
source_path,
|
|
confirmed_pattern_id,
|
|
release_group,
|
|
)
|
|
if pattern is None:
|
|
return ManageSubtitlesResponse(
|
|
status="error",
|
|
error="pattern_not_found",
|
|
message="Could not determine subtitle pattern for this release.",
|
|
)
|
|
|
|
# --- Identify ---
|
|
media_id = _to_imdb_id(imdb_id)
|
|
identifier = SubtitleIdentifier(kb, prober, scanner)
|
|
metadata = identifier.identify(
|
|
video_path=source_path,
|
|
pattern=pattern,
|
|
media_id=media_id,
|
|
media_type=media_type,
|
|
release_group=release_group,
|
|
)
|
|
|
|
if metadata.total_count == 0:
|
|
logger.info(
|
|
f"ManageSubtitles: no subtitle tracks found for {source_path.name}"
|
|
)
|
|
return ManageSubtitlesResponse(
|
|
status="ok",
|
|
video_path=destination_video,
|
|
placed=[],
|
|
skipped_count=0,
|
|
)
|
|
|
|
# --- Embedded short-circuit ---
|
|
if pattern.scan_strategy == ScanStrategy.EMBEDDED:
|
|
logger.info("ManageSubtitles: embedded pattern — skipping matcher")
|
|
available = [
|
|
AvailableSubtitle(
|
|
language=t.language.code if t.language else "?",
|
|
subtitle_type=t.subtitle_type.value,
|
|
)
|
|
for t in available_subtitles(metadata.embedded_tracks)
|
|
]
|
|
return ManageSubtitlesResponse(
|
|
status="ok",
|
|
video_path=destination_video,
|
|
placed=[],
|
|
skipped_count=0,
|
|
available=available,
|
|
)
|
|
|
|
# --- Match (external only) ---
|
|
subtitle_prefs = None
|
|
try:
|
|
memory = get_memory()
|
|
subtitle_prefs = memory.ltm.subtitle_preferences
|
|
except Exception:
|
|
pass
|
|
rules = repo.load(release_group, subtitle_prefs).resolve(kb.default_rules())
|
|
matcher = SubtitleMatcher()
|
|
matched, unresolved = matcher.match(metadata.external_tracks, rules)
|
|
|
|
if unresolved:
|
|
logger.info(
|
|
f"ManageSubtitles: {len(unresolved)} unresolved track(s) — needs clarification"
|
|
)
|
|
return ManageSubtitlesResponse(
|
|
status="needs_clarification",
|
|
video_path=destination_video,
|
|
placed=[],
|
|
unresolved=[_to_unresolved_dto(t) for t in unresolved],
|
|
)
|
|
|
|
if not matched:
|
|
return ManageSubtitlesResponse(
|
|
status="ok",
|
|
video_path=destination_video,
|
|
placed=[],
|
|
skipped_count=metadata.total_count,
|
|
)
|
|
|
|
# --- Dry run: skip placement ---
|
|
if dry_run:
|
|
placed_dtos = []
|
|
for t in matched:
|
|
if not t.file_path:
|
|
continue
|
|
try:
|
|
filename = _build_dest_name(t, dest_path.stem)
|
|
except ValueError:
|
|
continue
|
|
placed_dtos.append(
|
|
PlacedSubtitle(
|
|
source=str(t.file_path),
|
|
destination=str(dest_path.parent / filename),
|
|
filename=filename,
|
|
)
|
|
)
|
|
return ManageSubtitlesResponse(
|
|
status="ok",
|
|
video_path=destination_video,
|
|
placed=placed_dtos,
|
|
skipped_count=0,
|
|
)
|
|
|
|
# --- Place ---
|
|
placer = SubtitlePlacer()
|
|
place_result = placer.place(matched, dest_path)
|
|
|
|
# --- Persist ---
|
|
if place_result.placed:
|
|
pairs = _pair_placed_with_tracks(place_result.placed, matched)
|
|
store.append_history(pairs, season, episode, release_group)
|
|
|
|
placed_dtos = [
|
|
PlacedSubtitle(
|
|
source=str(p.source),
|
|
destination=str(p.destination),
|
|
filename=p.filename,
|
|
)
|
|
for p in place_result.placed
|
|
]
|
|
|
|
return ManageSubtitlesResponse(
|
|
status="ok",
|
|
video_path=destination_video,
|
|
placed=placed_dtos,
|
|
skipped_count=place_result.skipped_count,
|
|
)
|
|
|
|
def _resolve_pattern(
|
|
self,
|
|
kb: SubtitleKnowledgeBase,
|
|
prober: FfprobeMediaProber,
|
|
scanner: PathlibFilesystemScanner,
|
|
store: SubtitleMetadataStore,
|
|
source_path: Path,
|
|
confirmed_pattern_id: str | None,
|
|
release_group: str | None,
|
|
):
|
|
# 1. Explicit override from caller
|
|
if confirmed_pattern_id:
|
|
p = kb.pattern(confirmed_pattern_id)
|
|
if p:
|
|
return p
|
|
logger.warning(f"ManageSubtitles: unknown pattern '{confirmed_pattern_id}'")
|
|
|
|
# 2. Previously confirmed in metadata store
|
|
stored_id = store.confirmed_pattern()
|
|
if stored_id:
|
|
p = kb.pattern(stored_id)
|
|
if p:
|
|
logger.debug(f"ManageSubtitles: using confirmed pattern '{stored_id}'")
|
|
return p
|
|
|
|
# 3. Auto-detect
|
|
release_root = source_path.parent
|
|
detector = PatternDetector(kb, prober, scanner)
|
|
result = detector.detect(release_root, source_path)
|
|
|
|
if result["detected"] and result["confidence"] >= 0.6:
|
|
logger.info(
|
|
f"ManageSubtitles: auto-detected pattern '{result['detected'].id}' "
|
|
f"(confidence={result['confidence']:.2f})"
|
|
)
|
|
return result["detected"]
|
|
|
|
# 4. Fallback — adjacent (safest default)
|
|
logger.info("ManageSubtitles: falling back to 'adjacent' pattern")
|
|
return kb.pattern("adjacent")
|
|
|
|
|
|
def _to_unresolved_dto(
|
|
track: SubtitleCandidate, min_confidence: float = 0.7
|
|
) -> UnresolvedTrack:
|
|
reason = "unknown_language" if track.language is None else "low_confidence"
|
|
return UnresolvedTrack(
|
|
raw_tokens=track.raw_tokens,
|
|
file_path=str(track.file_path) if track.file_path else None,
|
|
file_size_kb=track.file_size_kb,
|
|
reason=reason,
|
|
)
|
|
|
|
|
|
def _pair_placed_with_tracks(
|
|
placed: list[PlacedTrack],
|
|
tracks: list[SubtitleCandidate],
|
|
) -> list[tuple[PlacedTrack, SubtitleCandidate]]:
|
|
"""
|
|
Pair each PlacedTrack with its originating SubtitleCandidate by source path.
|
|
Falls back to positional matching if paths don't align.
|
|
"""
|
|
track_by_path = {t.file_path: t for t in tracks if t.file_path}
|
|
pairs = []
|
|
for p in placed:
|
|
track = track_by_path.get(p.source)
|
|
if track is None and tracks:
|
|
track = tracks[0] # positional fallback
|
|
if track:
|
|
pairs.append((p, track))
|
|
return pairs
|