feat: split resolve_destination, persona-driven prompts, qBittorrent relocation

Destination resolution - Replace the single ResolveDestinationUseCase with four dedicated functions, one per release type: resolve_season_destination (pack season, folder move) resolve_episode_destination (single episode, file move) resolve_movie_destination (movie, file move) resolve_series_destination (multi-season pack, folder move) - Each returns a dedicated DTO carrying only the fields relevant to that release type — no more polymorphic ResolvedDestination with half the fields unused depending on the case. - Looser series folder matching: exact computed-name match is reused silently; any deviation (different group, multiple candidates) now prompts the user with all options including the computed name. Agent tools - Four new tools wrapping the use cases above; old resolve_destination removed from the registry. - New move_to_destination tool: create_folder + move, chained — used after a resolve_* call to perform the actual relocation. - Low-level filesystem_operations module (create_folder, move via mv) for instant same-FS renames (ZFS). Prompt & persona - New PromptBuilder (alfred/agent/prompt.py) replacing prompts.py: identity + personality block, situational expressions, memory schema, episodic/STM/config context, tool catalogue. - Per-user expression system: knowledge/users/common.yaml + {username}.yaml are merged at runtime; one phrase per situation (greeting/success/error/...) is sampled into the system prompt. qBittorrent integration - Credentials now come from settings (qbittorrent_url/username/password) instead of hardcoded defaults. - New client methods: find_by_name, set_location, recheck — the trio needed to update a torrent's save path and re-verify after a move. - Host→container path translation settings (qbittorrent_host_path / qbittorrent_container_path) for docker-mounted setups. Subtitles - Identifier: strip parenthesized qualifiers (simplified, brazil…) at tokenization; new _tokenize_suffix used for the episode_subfolder pattern so episode-stem tokens no longer pollute language detection. - Placer: extract _build_dest_name so it can be reused by the new dry_run path in ManageSubtitlesUseCase. - Knowledge: add yue, ell, ind, msa, rus, vie, heb, tam, tel, tha, hin, ukr; add 'fre' to fra; add 'simplified'/'traditional' to zho. Misc - LTM workspace: add 'trash' folder slot. - Default LLM provider switched to deepseek. - testing/debug_release.py: CLI to parse a release, hit TMDB, and dry-run the destination resolution end-to-end.
2026-05-14 05:01:59 +02:00
parent 1723b9fa53
commit e45465d52d
81 changed files with 2904 additions and 896 deletions
@@ -1,9 +1,9 @@
 """SubtitleIdentifier — finds and classifies all subtitle tracks for a video file."""

+import json
 import logging
 import re
 import subprocess
-import json
 from pathlib import Path

 from ...shared.value_objects import ImdbId
@@ -15,10 +15,28 @@ logger = logging.getLogger(__name__)


 def _tokenize(name: str) -> list[str]:
-    """Split a filename stem into lowercase tokens."""
+    """Split a filename stem into lowercase tokens, stripping parentheses."""
+    # Strip parenthesized qualifiers like (simplified), (canada), (brazil)
+    name = re.sub(r"\([^)]*\)", "", name)
    return [t.lower() for t in re.split(r"[\.\s_\-]+", name) if t]


+def _tokenize_suffix(stem: str, episode_stem: str) -> list[str]:
+    """
+    For episode_subfolder pattern: the filename is {episode_stem}.{lang_tokens}.
+    Return only the tokens that come after the episode stem portion.
+    Falls back to full tokenization if the stem doesn't start with episode_stem.
+    """
+    stem_lower = stem.lower()
+    prefix = episode_stem.lower()
+    if stem_lower.startswith(prefix):
+        suffix = stem[len(prefix) :]
+        tokens = _tokenize(suffix)
+        if tokens:
+            return tokens
+    return _tokenize(stem)
+
+
 def _count_entries(path: Path) -> int:
    """Return the entry count of an SRT file by finding the last cue number."""
    try:
@@ -79,17 +97,29 @@ class SubtitleIdentifier:
        try:
            result = subprocess.run(
                [
-                    "ffprobe", "-v", "quiet",
-                    "-print_format", "json",
+                    "ffprobe",
+                    "-v",
+                    "quiet",
+                    "-print_format",
+                    "json",
                    "-show_streams",
-                    "-select_streams", "s",
+                    "-select_streams",
+                    "s",
                    str(video_path),
                ],
-                capture_output=True, text=True, timeout=30,
+                capture_output=True,
+                text=True,
+                timeout=30,
            )
            data = json.loads(result.stdout)
-        except (subprocess.TimeoutExpired, json.JSONDecodeError, FileNotFoundError) as e:
-            logger.debug(f"SubtitleIdentifier: ffprobe failed for {video_path.name}: {e}")
+        except (
+            subprocess.TimeoutExpired,
+            json.JSONDecodeError,
+            FileNotFoundError,
+        ) as e:
+            logger.debug(
+                f"SubtitleIdentifier: ffprobe failed for {video_path.name}: {e}"
+            )
            return []

        tracks = []
@@ -108,39 +138,50 @@ class SubtitleIdentifier:
            else:
                stype = SubtitleType.STANDARD

-            tracks.append(SubtitleTrack(
-                language=lang,
-                format=None,
-                subtitle_type=stype,
-                is_embedded=True,
-                raw_tokens=[lang_code] if lang_code else [],
-            ))
+            tracks.append(
+                SubtitleTrack(
+                    language=lang,
+                    format=None,
+                    subtitle_type=stype,
+                    is_embedded=True,
+                    raw_tokens=[lang_code] if lang_code else [],
+                )
+            )

-        logger.debug(f"SubtitleIdentifier: {len(tracks)} embedded track(s) in {video_path.name}")
+        logger.debug(
+            f"SubtitleIdentifier: {len(tracks)} embedded track(s) in {video_path.name}"
+        )
        return tracks

    # ------------------------------------------------------------------
    # External tracks — filesystem scan per pattern strategy
    # ------------------------------------------------------------------

-    def _scan_external(self, video_path: Path, pattern: SubtitlePattern) -> list[SubtitleTrack]:
+    def _scan_external(
+        self, video_path: Path, pattern: SubtitlePattern
+    ) -> list[SubtitleTrack]:
        strategy = pattern.scan_strategy
+        episode_stem: str | None = None

        if strategy == ScanStrategy.ADJACENT:
            candidates = self._find_adjacent(video_path)
        elif strategy == ScanStrategy.FLAT:
            candidates = self._find_flat(video_path, pattern.root_folder or "Subs")
        elif strategy == ScanStrategy.EPISODE_SUBFOLDER:
-            candidates = self._find_episode_subfolder(video_path, pattern.root_folder or "Subs")
+            candidates, episode_stem = self._find_episode_subfolder(
+                video_path, pattern.root_folder or "Subs"
+            )
        else:
            return []

-        return self._classify_files(candidates, pattern)
+        return self._classify_files(candidates, pattern, episode_stem=episode_stem)

    def _find_adjacent(self, video_path: Path) -> list[Path]:
        return [
-            p for p in sorted(video_path.parent.iterdir())
-            if p.is_file() and p.suffix.lower() in self.kb.known_extensions()
+            p
+            for p in sorted(video_path.parent.iterdir())
+            if p.is_file()
+            and p.suffix.lower() in self.kb.known_extensions()
            and p.stem != video_path.stem
        ]

@@ -152,17 +193,22 @@ class SubtitleIdentifier:
        if not subs_dir.is_dir():
            return []
        return [
-            p for p in sorted(subs_dir.iterdir())
+            p
+            for p in sorted(subs_dir.iterdir())
            if p.is_file() and p.suffix.lower() in self.kb.known_extensions()
        ]

-    def _find_episode_subfolder(self, video_path: Path, root_folder: str) -> list[Path]:
+    def _find_episode_subfolder(
+        self, video_path: Path, root_folder: str
+    ) -> tuple[list[Path], str]:
        """
        Look for Subs/{episode_stem}/*.srt

        Checks two locations:
          1. Adjacent to the video: video_path.parent / root_folder / video_path.stem
          2. Release root (one level up): video_path.parent.parent / root_folder / video_path.stem
+
+        Returns (files, episode_stem) so the classifier can strip the prefix.
        """
        episode_stem = video_path.stem
        candidates_dirs = [
@@ -172,22 +218,30 @@ class SubtitleIdentifier:
        for subs_dir in candidates_dirs:
            if subs_dir.is_dir():
                files = [
-                    p for p in sorted(subs_dir.iterdir())
+                    p
+                    for p in sorted(subs_dir.iterdir())
                    if p.is_file() and p.suffix.lower() in self.kb.known_extensions()
                ]
                if files:
-                    logger.debug(f"SubtitleIdentifier: found {len(files)} file(s) in {subs_dir}")
-                    return files
-        return []
+                    logger.debug(
+                        f"SubtitleIdentifier: found {len(files)} file(s) in {subs_dir}"
+                    )
+                    return files, episode_stem
+        return [], episode_stem

    # ------------------------------------------------------------------
    # Classification
    # ------------------------------------------------------------------

-    def _classify_files(self, paths: list[Path], pattern: SubtitlePattern) -> list[SubtitleTrack]:
+    def _classify_files(
+        self,
+        paths: list[Path],
+        pattern: SubtitlePattern,
+        episode_stem: str | None = None,
+    ) -> list[SubtitleTrack]:
        tracks = []
        for path in paths:
-            track = self._classify_single(path)
+            track = self._classify_single(path, episode_stem=episode_stem)
            tracks.append(track)

        # Post-process: if multiple tracks share same language but type is ambiguous,
@@ -197,9 +251,15 @@ class SubtitleIdentifier:

        return tracks

-    def _classify_single(self, path: Path) -> SubtitleTrack:
+    def _classify_single(
+        self, path: Path, episode_stem: str | None = None
+    ) -> SubtitleTrack:
        fmt = self.kb.format_for_extension(path.suffix)
-        tokens = _tokenize(path.stem)
+        tokens = (
+            _tokenize_suffix(path.stem, episode_stem)
+            if episode_stem
+            else _tokenize(path.stem)
+        )

        language = None
        subtitle_type = SubtitleType.UNKNOWN
@@ -250,7 +310,6 @@ class SubtitleIdentifier:

        Only applied when type_detection = size_and_count.
        """
-        from itertools import groupby

        # Group by language code
        lang_groups: dict[str, list[SubtitleTrack]] = {}
@@ -3,7 +3,7 @@
 import logging

 from ..entities import SubtitleTrack
-from ..value_objects import SubtitleMatchingRules, SubtitleType
+from ..value_objects import SubtitleMatchingRules

 logger = logging.getLogger(__name__)

@@ -50,7 +50,9 @@ class SubtitleMatcher:
        )
        return matched, unresolved

-    def _passes_filters(self, track: SubtitleTrack, rules: SubtitleMatchingRules) -> bool:
+    def _passes_filters(
+        self, track: SubtitleTrack, rules: SubtitleMatchingRules
+    ) -> bool:
        # Language filter
        if rules.preferred_languages:
            if not track.language:
@@ -49,13 +49,19 @@ class PatternDetector:
        try:
            result = subprocess.run(
                [
-                    "ffprobe", "-v", "quiet",
-                    "-print_format", "json",
+                    "ffprobe",
+                    "-v",
+                    "quiet",
+                    "-print_format",
+                    "json",
                    "-show_streams",
-                    "-select_streams", "s",
+                    "-select_streams",
+                    "s",
                    str(video_path),
                ],
-                capture_output=True, text=True, timeout=30,
+                capture_output=True,
+                text=True,
+                timeout=30,
            )
            data = json.loads(result.stdout)
            return len(data.get("streams", [])) > 0
@@ -67,7 +73,7 @@ class PatternDetector:
        known_exts = self.kb.known_extensions()
        findings: dict = {
            "has_subs_folder": False,
-            "subs_strategy": None,       # "flat" | "episode_subfolder"
+            "subs_strategy": None,  # "flat" | "episode_subfolder"
            "subs_root": None,
            "adjacent_subs": False,
            "has_embedded": self._has_embedded_subtitles(sample_video),
@@ -87,15 +93,22 @@ class PatternDetector:

                # Is it flat or episode_subfolder?
                children = list(subs_candidate.iterdir())
-                sub_files = [c for c in children if c.is_file() and c.suffix.lower() in known_exts]
+                sub_files = [
+                    c
+                    for c in children
+                    if c.is_file() and c.suffix.lower() in known_exts
+                ]
                sub_dirs = [c for c in children if c.is_dir()]

                if sub_dirs and not sub_files:
                    findings["subs_strategy"] = "episode_subfolder"
                    # Count files in a sample subfolder
                    sample_sub = sub_dirs[0]
-                    sample_files = [f for f in sample_sub.iterdir()
-                                    if f.is_file() and f.suffix.lower() in known_exts]
+                    sample_files = [
+                        f
+                        for f in sample_sub.iterdir()
+                        if f.is_file() and f.suffix.lower() in known_exts
+                    ]
                    findings["files_per_episode"] = len(sample_files)
                    # Check naming conventions
                    for f in sample_files:
@@ -103,22 +116,27 @@ class PatternDetector:
                        parts = stem.split("_")
                        if parts[0].isdigit():
                            findings["has_numeric_prefix"] = True
-                        if any(self.kb.is_known_lang_token(t.lower())
-                               for t in stem.replace("_", ".").split(".")):
+                        if any(
+                            self.kb.is_known_lang_token(t.lower())
+                            for t in stem.replace("_", ".").split(".")
+                        ):
                            findings["has_lang_tokens"] = True
                else:
                    findings["subs_strategy"] = "flat"
                    findings["files_per_episode"] = len(sub_files)
                    for f in sub_files:
-                        if any(self.kb.is_known_lang_token(t.lower())
-                               for t in f.stem.replace("_", ".").split(".")):
+                        if any(
+                            self.kb.is_known_lang_token(t.lower())
+                            for t in f.stem.replace("_", ".").split(".")
+                        ):
                            findings["has_lang_tokens"] = True
                break

        # Check adjacent subs (next to the video)
        if not findings["has_subs_folder"]:
            adjacent = [
-                p for p in sample_video.parent.iterdir()
+                p
+                for p in sample_video.parent.iterdir()
                if p.is_file() and p.suffix.lower() in known_exts
            ]
            if adjacent:
@@ -157,7 +175,9 @@ class PatternDetector:
            total += 1
            if findings.get("has_embedded"):
                score += 1.0
-            if not findings.get("has_subs_folder") and not findings.get("adjacent_subs"):
+            if not findings.get("has_subs_folder") and not findings.get(
+                "adjacent_subs"
+            ):
                score += 0.5
                total += 0.5

@@ -10,6 +10,28 @@ from ..entities import SubtitleTrack
 logger = logging.getLogger(__name__)


+def _build_dest_name(track: SubtitleTrack, video_stem: str) -> str:
+    """
+    Build the destination filename for a subtitle track.
+
+    Format: {video_stem}.{lang}.{ext}
+            {video_stem}.{lang}.sdh.{ext}
+            {video_stem}.{lang}.forced.{ext}
+    """
+    from ..value_objects import SubtitleType
+
+    if not track.language or not track.format:
+        raise ValueError("Cannot compute destination name: language or format missing")
+
+    ext = track.format.extensions[0].lstrip(".")
+    parts = [video_stem, track.language.code]
+    if track.subtitle_type == SubtitleType.SDH:
+        parts.append("sdh")
+    elif track.subtitle_type == SubtitleType.FORCED:
+        parts.append("forced")
+    return ".".join(parts) + "." + ext
+
+
@dataclass
 class PlacedTrack:
    source: Path
@@ -20,7 +42,7 @@ class PlacedTrack:
@dataclass
 class PlaceResult:
    placed: list[PlacedTrack]
-    skipped: list[tuple[SubtitleTrack, str]]   # (track, reason)
+    skipped: list[tuple[SubtitleTrack, str]]  # (track, reason)

    @property
    def placed_count(self) -> int:
@@ -62,7 +84,7 @@ class SubtitlePlacer:
                continue

            try:
-                dest_name = track.destination_name
+                dest_name = _build_dest_name(track, destination_video.stem)
            except ValueError as e:
                skipped.append((track, str(e)))
                continue
@@ -76,11 +98,13 @@ class SubtitlePlacer:

            try:
                os.link(track.file_path, dest_path)
-                placed.append(PlacedTrack(
-                    source=track.file_path,
-                    destination=dest_path,
-                    filename=dest_name,
-                ))
+                placed.append(
+                    PlacedTrack(
+                        source=track.file_path,
+                        destination=dest_path,
+                        filename=dest_name,
+                    )
+                )
                logger.info(f"SubtitlePlacer: placed {dest_name}")
            except OSError as e:
                logger.warning(f"SubtitlePlacer: failed to place {dest_name}: {e}")