feat: split resolve_destination, persona-driven prompts, qBittorrent relocation

Destination resolution
- Replace the single ResolveDestinationUseCase with four dedicated
  functions, one per release type:
    resolve_season_destination    (pack season, folder move)
    resolve_episode_destination   (single episode, file move)
    resolve_movie_destination     (movie, file move)
    resolve_series_destination    (multi-season pack, folder move)
- Each returns a dedicated DTO carrying only the fields relevant to
  that release type — no more polymorphic ResolvedDestination with
  half the fields unused depending on the case.
- Looser series folder matching: exact computed-name match is reused
  silently; any deviation (different group, multiple candidates) now
  prompts the user with all options including the computed name.

Agent tools
- Four new tools wrapping the use cases above; old resolve_destination
  removed from the registry.
- New move_to_destination tool: create_folder + move, chained — used
  after a resolve_* call to perform the actual relocation.
- Low-level filesystem_operations module (create_folder, move via mv)
  for instant same-FS renames (ZFS).

Prompt & persona
- New PromptBuilder (alfred/agent/prompt.py) replacing prompts.py:
  identity + personality block, situational expressions, memory
  schema, episodic/STM/config context, tool catalogue.
- Per-user expression system: knowledge/users/common.yaml +
  {username}.yaml are merged at runtime; one phrase per situation
  (greeting/success/error/...) is sampled into the system prompt.

qBittorrent integration
- Credentials now come from settings (qbittorrent_url/username/password)
  instead of hardcoded defaults.
- New client methods: find_by_name, set_location, recheck — the trio
  needed to update a torrent's save path and re-verify after a move.
- Host→container path translation settings (qbittorrent_host_path /
  qbittorrent_container_path) for docker-mounted setups.

Subtitles
- Identifier: strip parenthesized qualifiers (simplified, brazil…) at
  tokenization; new _tokenize_suffix used for the episode_subfolder
  pattern so episode-stem tokens no longer pollute language detection.
- Placer: extract _build_dest_name so it can be reused by the new
  dry_run path in ManageSubtitlesUseCase.
- Knowledge: add yue, ell, ind, msa, rus, vie, heb, tam, tel, tha,
  hin, ukr; add 'fre' to fra; add 'simplified'/'traditional' to zho.

Misc
- LTM workspace: add 'trash' folder slot.
- Default LLM provider switched to deepseek.
- testing/debug_release.py: CLI to parse a release, hit TMDB, and
  dry-run the destination resolution end-to-end.
This commit is contained in:
2026-05-14 05:01:59 +02:00
parent 1723b9fa53
commit e45465d52d
81 changed files with 2904 additions and 896 deletions
+91 -32
View File
@@ -1,9 +1,9 @@
"""SubtitleIdentifier — finds and classifies all subtitle tracks for a video file."""
import json
import logging
import re
import subprocess
import json
from pathlib import Path
from ...shared.value_objects import ImdbId
@@ -15,10 +15,28 @@ logger = logging.getLogger(__name__)
def _tokenize(name: str) -> list[str]:
"""Split a filename stem into lowercase tokens."""
"""Split a filename stem into lowercase tokens, stripping parentheses."""
# Strip parenthesized qualifiers like (simplified), (canada), (brazil)
name = re.sub(r"\([^)]*\)", "", name)
return [t.lower() for t in re.split(r"[\.\s_\-]+", name) if t]
def _tokenize_suffix(stem: str, episode_stem: str) -> list[str]:
"""
For episode_subfolder pattern: the filename is {episode_stem}.{lang_tokens}.
Return only the tokens that come after the episode stem portion.
Falls back to full tokenization if the stem doesn't start with episode_stem.
"""
stem_lower = stem.lower()
prefix = episode_stem.lower()
if stem_lower.startswith(prefix):
suffix = stem[len(prefix) :]
tokens = _tokenize(suffix)
if tokens:
return tokens
return _tokenize(stem)
def _count_entries(path: Path) -> int:
"""Return the entry count of an SRT file by finding the last cue number."""
try:
@@ -79,17 +97,29 @@ class SubtitleIdentifier:
try:
result = subprocess.run(
[
"ffprobe", "-v", "quiet",
"-print_format", "json",
"ffprobe",
"-v",
"quiet",
"-print_format",
"json",
"-show_streams",
"-select_streams", "s",
"-select_streams",
"s",
str(video_path),
],
capture_output=True, text=True, timeout=30,
capture_output=True,
text=True,
timeout=30,
)
data = json.loads(result.stdout)
except (subprocess.TimeoutExpired, json.JSONDecodeError, FileNotFoundError) as e:
logger.debug(f"SubtitleIdentifier: ffprobe failed for {video_path.name}: {e}")
except (
subprocess.TimeoutExpired,
json.JSONDecodeError,
FileNotFoundError,
) as e:
logger.debug(
f"SubtitleIdentifier: ffprobe failed for {video_path.name}: {e}"
)
return []
tracks = []
@@ -108,39 +138,50 @@ class SubtitleIdentifier:
else:
stype = SubtitleType.STANDARD
tracks.append(SubtitleTrack(
language=lang,
format=None,
subtitle_type=stype,
is_embedded=True,
raw_tokens=[lang_code] if lang_code else [],
))
tracks.append(
SubtitleTrack(
language=lang,
format=None,
subtitle_type=stype,
is_embedded=True,
raw_tokens=[lang_code] if lang_code else [],
)
)
logger.debug(f"SubtitleIdentifier: {len(tracks)} embedded track(s) in {video_path.name}")
logger.debug(
f"SubtitleIdentifier: {len(tracks)} embedded track(s) in {video_path.name}"
)
return tracks
# ------------------------------------------------------------------
# External tracks — filesystem scan per pattern strategy
# ------------------------------------------------------------------
def _scan_external(self, video_path: Path, pattern: SubtitlePattern) -> list[SubtitleTrack]:
def _scan_external(
self, video_path: Path, pattern: SubtitlePattern
) -> list[SubtitleTrack]:
strategy = pattern.scan_strategy
episode_stem: str | None = None
if strategy == ScanStrategy.ADJACENT:
candidates = self._find_adjacent(video_path)
elif strategy == ScanStrategy.FLAT:
candidates = self._find_flat(video_path, pattern.root_folder or "Subs")
elif strategy == ScanStrategy.EPISODE_SUBFOLDER:
candidates = self._find_episode_subfolder(video_path, pattern.root_folder or "Subs")
candidates, episode_stem = self._find_episode_subfolder(
video_path, pattern.root_folder or "Subs"
)
else:
return []
return self._classify_files(candidates, pattern)
return self._classify_files(candidates, pattern, episode_stem=episode_stem)
def _find_adjacent(self, video_path: Path) -> list[Path]:
return [
p for p in sorted(video_path.parent.iterdir())
if p.is_file() and p.suffix.lower() in self.kb.known_extensions()
p
for p in sorted(video_path.parent.iterdir())
if p.is_file()
and p.suffix.lower() in self.kb.known_extensions()
and p.stem != video_path.stem
]
@@ -152,17 +193,22 @@ class SubtitleIdentifier:
if not subs_dir.is_dir():
return []
return [
p for p in sorted(subs_dir.iterdir())
p
for p in sorted(subs_dir.iterdir())
if p.is_file() and p.suffix.lower() in self.kb.known_extensions()
]
def _find_episode_subfolder(self, video_path: Path, root_folder: str) -> list[Path]:
def _find_episode_subfolder(
self, video_path: Path, root_folder: str
) -> tuple[list[Path], str]:
"""
Look for Subs/{episode_stem}/*.srt
Checks two locations:
1. Adjacent to the video: video_path.parent / root_folder / video_path.stem
2. Release root (one level up): video_path.parent.parent / root_folder / video_path.stem
Returns (files, episode_stem) so the classifier can strip the prefix.
"""
episode_stem = video_path.stem
candidates_dirs = [
@@ -172,22 +218,30 @@ class SubtitleIdentifier:
for subs_dir in candidates_dirs:
if subs_dir.is_dir():
files = [
p for p in sorted(subs_dir.iterdir())
p
for p in sorted(subs_dir.iterdir())
if p.is_file() and p.suffix.lower() in self.kb.known_extensions()
]
if files:
logger.debug(f"SubtitleIdentifier: found {len(files)} file(s) in {subs_dir}")
return files
return []
logger.debug(
f"SubtitleIdentifier: found {len(files)} file(s) in {subs_dir}"
)
return files, episode_stem
return [], episode_stem
# ------------------------------------------------------------------
# Classification
# ------------------------------------------------------------------
def _classify_files(self, paths: list[Path], pattern: SubtitlePattern) -> list[SubtitleTrack]:
def _classify_files(
self,
paths: list[Path],
pattern: SubtitlePattern,
episode_stem: str | None = None,
) -> list[SubtitleTrack]:
tracks = []
for path in paths:
track = self._classify_single(path)
track = self._classify_single(path, episode_stem=episode_stem)
tracks.append(track)
# Post-process: if multiple tracks share same language but type is ambiguous,
@@ -197,9 +251,15 @@ class SubtitleIdentifier:
return tracks
def _classify_single(self, path: Path) -> SubtitleTrack:
def _classify_single(
self, path: Path, episode_stem: str | None = None
) -> SubtitleTrack:
fmt = self.kb.format_for_extension(path.suffix)
tokens = _tokenize(path.stem)
tokens = (
_tokenize_suffix(path.stem, episode_stem)
if episode_stem
else _tokenize(path.stem)
)
language = None
subtitle_type = SubtitleType.UNKNOWN
@@ -250,7 +310,6 @@ class SubtitleIdentifier:
Only applied when type_detection = size_and_count.
"""
from itertools import groupby
# Group by language code
lang_groups: dict[str, list[SubtitleTrack]] = {}
+4 -2
View File
@@ -3,7 +3,7 @@
import logging
from ..entities import SubtitleTrack
from ..value_objects import SubtitleMatchingRules, SubtitleType
from ..value_objects import SubtitleMatchingRules
logger = logging.getLogger(__name__)
@@ -50,7 +50,9 @@ class SubtitleMatcher:
)
return matched, unresolved
def _passes_filters(self, track: SubtitleTrack, rules: SubtitleMatchingRules) -> bool:
def _passes_filters(
self, track: SubtitleTrack, rules: SubtitleMatchingRules
) -> bool:
# Language filter
if rules.preferred_languages:
if not track.language:
@@ -49,13 +49,19 @@ class PatternDetector:
try:
result = subprocess.run(
[
"ffprobe", "-v", "quiet",
"-print_format", "json",
"ffprobe",
"-v",
"quiet",
"-print_format",
"json",
"-show_streams",
"-select_streams", "s",
"-select_streams",
"s",
str(video_path),
],
capture_output=True, text=True, timeout=30,
capture_output=True,
text=True,
timeout=30,
)
data = json.loads(result.stdout)
return len(data.get("streams", [])) > 0
@@ -67,7 +73,7 @@ class PatternDetector:
known_exts = self.kb.known_extensions()
findings: dict = {
"has_subs_folder": False,
"subs_strategy": None, # "flat" | "episode_subfolder"
"subs_strategy": None, # "flat" | "episode_subfolder"
"subs_root": None,
"adjacent_subs": False,
"has_embedded": self._has_embedded_subtitles(sample_video),
@@ -87,15 +93,22 @@ class PatternDetector:
# Is it flat or episode_subfolder?
children = list(subs_candidate.iterdir())
sub_files = [c for c in children if c.is_file() and c.suffix.lower() in known_exts]
sub_files = [
c
for c in children
if c.is_file() and c.suffix.lower() in known_exts
]
sub_dirs = [c for c in children if c.is_dir()]
if sub_dirs and not sub_files:
findings["subs_strategy"] = "episode_subfolder"
# Count files in a sample subfolder
sample_sub = sub_dirs[0]
sample_files = [f for f in sample_sub.iterdir()
if f.is_file() and f.suffix.lower() in known_exts]
sample_files = [
f
for f in sample_sub.iterdir()
if f.is_file() and f.suffix.lower() in known_exts
]
findings["files_per_episode"] = len(sample_files)
# Check naming conventions
for f in sample_files:
@@ -103,22 +116,27 @@ class PatternDetector:
parts = stem.split("_")
if parts[0].isdigit():
findings["has_numeric_prefix"] = True
if any(self.kb.is_known_lang_token(t.lower())
for t in stem.replace("_", ".").split(".")):
if any(
self.kb.is_known_lang_token(t.lower())
for t in stem.replace("_", ".").split(".")
):
findings["has_lang_tokens"] = True
else:
findings["subs_strategy"] = "flat"
findings["files_per_episode"] = len(sub_files)
for f in sub_files:
if any(self.kb.is_known_lang_token(t.lower())
for t in f.stem.replace("_", ".").split(".")):
if any(
self.kb.is_known_lang_token(t.lower())
for t in f.stem.replace("_", ".").split(".")
):
findings["has_lang_tokens"] = True
break
# Check adjacent subs (next to the video)
if not findings["has_subs_folder"]:
adjacent = [
p for p in sample_video.parent.iterdir()
p
for p in sample_video.parent.iterdir()
if p.is_file() and p.suffix.lower() in known_exts
]
if adjacent:
@@ -157,7 +175,9 @@ class PatternDetector:
total += 1
if findings.get("has_embedded"):
score += 1.0
if not findings.get("has_subs_folder") and not findings.get("adjacent_subs"):
if not findings.get("has_subs_folder") and not findings.get(
"adjacent_subs"
):
score += 0.5
total += 0.5
+31 -7
View File
@@ -10,6 +10,28 @@ from ..entities import SubtitleTrack
logger = logging.getLogger(__name__)
def _build_dest_name(track: SubtitleTrack, video_stem: str) -> str:
"""
Build the destination filename for a subtitle track.
Format: {video_stem}.{lang}.{ext}
{video_stem}.{lang}.sdh.{ext}
{video_stem}.{lang}.forced.{ext}
"""
from ..value_objects import SubtitleType
if not track.language or not track.format:
raise ValueError("Cannot compute destination name: language or format missing")
ext = track.format.extensions[0].lstrip(".")
parts = [video_stem, track.language.code]
if track.subtitle_type == SubtitleType.SDH:
parts.append("sdh")
elif track.subtitle_type == SubtitleType.FORCED:
parts.append("forced")
return ".".join(parts) + "." + ext
@dataclass
class PlacedTrack:
source: Path
@@ -20,7 +42,7 @@ class PlacedTrack:
@dataclass
class PlaceResult:
placed: list[PlacedTrack]
skipped: list[tuple[SubtitleTrack, str]] # (track, reason)
skipped: list[tuple[SubtitleTrack, str]] # (track, reason)
@property
def placed_count(self) -> int:
@@ -62,7 +84,7 @@ class SubtitlePlacer:
continue
try:
dest_name = track.destination_name
dest_name = _build_dest_name(track, destination_video.stem)
except ValueError as e:
skipped.append((track, str(e)))
continue
@@ -76,11 +98,13 @@ class SubtitlePlacer:
try:
os.link(track.file_path, dest_path)
placed.append(PlacedTrack(
source=track.file_path,
destination=dest_path,
filename=dest_name,
))
placed.append(
PlacedTrack(
source=track.file_path,
destination=dest_path,
filename=dest_name,
)
)
logger.info(f"SubtitlePlacer: placed {dest_name}")
except OSError as e:
logger.warning(f"SubtitlePlacer: failed to place {dest_name}: {e}")