feat: split resolve_destination, persona-driven prompts, qBittorrent relocation
Destination resolution
- Replace the single ResolveDestinationUseCase with four dedicated
functions, one per release type:
resolve_season_destination (pack season, folder move)
resolve_episode_destination (single episode, file move)
resolve_movie_destination (movie, file move)
resolve_series_destination (multi-season pack, folder move)
- Each returns a dedicated DTO carrying only the fields relevant to
that release type — no more polymorphic ResolvedDestination with
half the fields unused depending on the case.
- Looser series folder matching: exact computed-name match is reused
silently; any deviation (different group, multiple candidates) now
prompts the user with all options including the computed name.
Agent tools
- Four new tools wrapping the use cases above; old resolve_destination
removed from the registry.
- New move_to_destination tool: create_folder + move, chained — used
after a resolve_* call to perform the actual relocation.
- Low-level filesystem_operations module (create_folder, move via mv)
for instant same-FS renames (ZFS).
Prompt & persona
- New PromptBuilder (alfred/agent/prompt.py) replacing prompts.py:
identity + personality block, situational expressions, memory
schema, episodic/STM/config context, tool catalogue.
- Per-user expression system: knowledge/users/common.yaml +
{username}.yaml are merged at runtime; one phrase per situation
(greeting/success/error/...) is sampled into the system prompt.
qBittorrent integration
- Credentials now come from settings (qbittorrent_url/username/password)
instead of hardcoded defaults.
- New client methods: find_by_name, set_location, recheck — the trio
needed to update a torrent's save path and re-verify after a move.
- Host→container path translation settings (qbittorrent_host_path /
qbittorrent_container_path) for docker-mounted setups.
Subtitles
- Identifier: strip parenthesized qualifiers (simplified, brazil…) at
tokenization; new _tokenize_suffix used for the episode_subfolder
pattern so episode-stem tokens no longer pollute language detection.
- Placer: extract _build_dest_name so it can be reused by the new
dry_run path in ManageSubtitlesUseCase.
- Knowledge: add yue, ell, ind, msa, rus, vie, heb, tam, tel, tha,
hin, ukr; add 'fre' to fra; add 'simplified'/'traditional' to zho.
Misc
- LTM workspace: add 'trash' folder slot.
- Default LLM provider switched to deepseek.
- testing/debug_release.py: CLI to parse a release, hit TMDB, and
dry-run the destination resolution end-to-end.
This commit is contained in:
@@ -1,9 +1,9 @@
|
||||
"""SubtitleIdentifier — finds and classifies all subtitle tracks for a video file."""
|
||||
|
||||
import json
|
||||
import logging
|
||||
import re
|
||||
import subprocess
|
||||
import json
|
||||
from pathlib import Path
|
||||
|
||||
from ...shared.value_objects import ImdbId
|
||||
@@ -15,10 +15,28 @@ logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _tokenize(name: str) -> list[str]:
|
||||
"""Split a filename stem into lowercase tokens."""
|
||||
"""Split a filename stem into lowercase tokens, stripping parentheses."""
|
||||
# Strip parenthesized qualifiers like (simplified), (canada), (brazil)
|
||||
name = re.sub(r"\([^)]*\)", "", name)
|
||||
return [t.lower() for t in re.split(r"[\.\s_\-]+", name) if t]
|
||||
|
||||
|
||||
def _tokenize_suffix(stem: str, episode_stem: str) -> list[str]:
|
||||
"""
|
||||
For episode_subfolder pattern: the filename is {episode_stem}.{lang_tokens}.
|
||||
Return only the tokens that come after the episode stem portion.
|
||||
Falls back to full tokenization if the stem doesn't start with episode_stem.
|
||||
"""
|
||||
stem_lower = stem.lower()
|
||||
prefix = episode_stem.lower()
|
||||
if stem_lower.startswith(prefix):
|
||||
suffix = stem[len(prefix) :]
|
||||
tokens = _tokenize(suffix)
|
||||
if tokens:
|
||||
return tokens
|
||||
return _tokenize(stem)
|
||||
|
||||
|
||||
def _count_entries(path: Path) -> int:
|
||||
"""Return the entry count of an SRT file by finding the last cue number."""
|
||||
try:
|
||||
@@ -79,17 +97,29 @@ class SubtitleIdentifier:
|
||||
try:
|
||||
result = subprocess.run(
|
||||
[
|
||||
"ffprobe", "-v", "quiet",
|
||||
"-print_format", "json",
|
||||
"ffprobe",
|
||||
"-v",
|
||||
"quiet",
|
||||
"-print_format",
|
||||
"json",
|
||||
"-show_streams",
|
||||
"-select_streams", "s",
|
||||
"-select_streams",
|
||||
"s",
|
||||
str(video_path),
|
||||
],
|
||||
capture_output=True, text=True, timeout=30,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=30,
|
||||
)
|
||||
data = json.loads(result.stdout)
|
||||
except (subprocess.TimeoutExpired, json.JSONDecodeError, FileNotFoundError) as e:
|
||||
logger.debug(f"SubtitleIdentifier: ffprobe failed for {video_path.name}: {e}")
|
||||
except (
|
||||
subprocess.TimeoutExpired,
|
||||
json.JSONDecodeError,
|
||||
FileNotFoundError,
|
||||
) as e:
|
||||
logger.debug(
|
||||
f"SubtitleIdentifier: ffprobe failed for {video_path.name}: {e}"
|
||||
)
|
||||
return []
|
||||
|
||||
tracks = []
|
||||
@@ -108,39 +138,50 @@ class SubtitleIdentifier:
|
||||
else:
|
||||
stype = SubtitleType.STANDARD
|
||||
|
||||
tracks.append(SubtitleTrack(
|
||||
language=lang,
|
||||
format=None,
|
||||
subtitle_type=stype,
|
||||
is_embedded=True,
|
||||
raw_tokens=[lang_code] if lang_code else [],
|
||||
))
|
||||
tracks.append(
|
||||
SubtitleTrack(
|
||||
language=lang,
|
||||
format=None,
|
||||
subtitle_type=stype,
|
||||
is_embedded=True,
|
||||
raw_tokens=[lang_code] if lang_code else [],
|
||||
)
|
||||
)
|
||||
|
||||
logger.debug(f"SubtitleIdentifier: {len(tracks)} embedded track(s) in {video_path.name}")
|
||||
logger.debug(
|
||||
f"SubtitleIdentifier: {len(tracks)} embedded track(s) in {video_path.name}"
|
||||
)
|
||||
return tracks
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# External tracks — filesystem scan per pattern strategy
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def _scan_external(self, video_path: Path, pattern: SubtitlePattern) -> list[SubtitleTrack]:
|
||||
def _scan_external(
|
||||
self, video_path: Path, pattern: SubtitlePattern
|
||||
) -> list[SubtitleTrack]:
|
||||
strategy = pattern.scan_strategy
|
||||
episode_stem: str | None = None
|
||||
|
||||
if strategy == ScanStrategy.ADJACENT:
|
||||
candidates = self._find_adjacent(video_path)
|
||||
elif strategy == ScanStrategy.FLAT:
|
||||
candidates = self._find_flat(video_path, pattern.root_folder or "Subs")
|
||||
elif strategy == ScanStrategy.EPISODE_SUBFOLDER:
|
||||
candidates = self._find_episode_subfolder(video_path, pattern.root_folder or "Subs")
|
||||
candidates, episode_stem = self._find_episode_subfolder(
|
||||
video_path, pattern.root_folder or "Subs"
|
||||
)
|
||||
else:
|
||||
return []
|
||||
|
||||
return self._classify_files(candidates, pattern)
|
||||
return self._classify_files(candidates, pattern, episode_stem=episode_stem)
|
||||
|
||||
def _find_adjacent(self, video_path: Path) -> list[Path]:
|
||||
return [
|
||||
p for p in sorted(video_path.parent.iterdir())
|
||||
if p.is_file() and p.suffix.lower() in self.kb.known_extensions()
|
||||
p
|
||||
for p in sorted(video_path.parent.iterdir())
|
||||
if p.is_file()
|
||||
and p.suffix.lower() in self.kb.known_extensions()
|
||||
and p.stem != video_path.stem
|
||||
]
|
||||
|
||||
@@ -152,17 +193,22 @@ class SubtitleIdentifier:
|
||||
if not subs_dir.is_dir():
|
||||
return []
|
||||
return [
|
||||
p for p in sorted(subs_dir.iterdir())
|
||||
p
|
||||
for p in sorted(subs_dir.iterdir())
|
||||
if p.is_file() and p.suffix.lower() in self.kb.known_extensions()
|
||||
]
|
||||
|
||||
def _find_episode_subfolder(self, video_path: Path, root_folder: str) -> list[Path]:
|
||||
def _find_episode_subfolder(
|
||||
self, video_path: Path, root_folder: str
|
||||
) -> tuple[list[Path], str]:
|
||||
"""
|
||||
Look for Subs/{episode_stem}/*.srt
|
||||
|
||||
Checks two locations:
|
||||
1. Adjacent to the video: video_path.parent / root_folder / video_path.stem
|
||||
2. Release root (one level up): video_path.parent.parent / root_folder / video_path.stem
|
||||
|
||||
Returns (files, episode_stem) so the classifier can strip the prefix.
|
||||
"""
|
||||
episode_stem = video_path.stem
|
||||
candidates_dirs = [
|
||||
@@ -172,22 +218,30 @@ class SubtitleIdentifier:
|
||||
for subs_dir in candidates_dirs:
|
||||
if subs_dir.is_dir():
|
||||
files = [
|
||||
p for p in sorted(subs_dir.iterdir())
|
||||
p
|
||||
for p in sorted(subs_dir.iterdir())
|
||||
if p.is_file() and p.suffix.lower() in self.kb.known_extensions()
|
||||
]
|
||||
if files:
|
||||
logger.debug(f"SubtitleIdentifier: found {len(files)} file(s) in {subs_dir}")
|
||||
return files
|
||||
return []
|
||||
logger.debug(
|
||||
f"SubtitleIdentifier: found {len(files)} file(s) in {subs_dir}"
|
||||
)
|
||||
return files, episode_stem
|
||||
return [], episode_stem
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Classification
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def _classify_files(self, paths: list[Path], pattern: SubtitlePattern) -> list[SubtitleTrack]:
|
||||
def _classify_files(
|
||||
self,
|
||||
paths: list[Path],
|
||||
pattern: SubtitlePattern,
|
||||
episode_stem: str | None = None,
|
||||
) -> list[SubtitleTrack]:
|
||||
tracks = []
|
||||
for path in paths:
|
||||
track = self._classify_single(path)
|
||||
track = self._classify_single(path, episode_stem=episode_stem)
|
||||
tracks.append(track)
|
||||
|
||||
# Post-process: if multiple tracks share same language but type is ambiguous,
|
||||
@@ -197,9 +251,15 @@ class SubtitleIdentifier:
|
||||
|
||||
return tracks
|
||||
|
||||
def _classify_single(self, path: Path) -> SubtitleTrack:
|
||||
def _classify_single(
|
||||
self, path: Path, episode_stem: str | None = None
|
||||
) -> SubtitleTrack:
|
||||
fmt = self.kb.format_for_extension(path.suffix)
|
||||
tokens = _tokenize(path.stem)
|
||||
tokens = (
|
||||
_tokenize_suffix(path.stem, episode_stem)
|
||||
if episode_stem
|
||||
else _tokenize(path.stem)
|
||||
)
|
||||
|
||||
language = None
|
||||
subtitle_type = SubtitleType.UNKNOWN
|
||||
@@ -250,7 +310,6 @@ class SubtitleIdentifier:
|
||||
|
||||
Only applied when type_detection = size_and_count.
|
||||
"""
|
||||
from itertools import groupby
|
||||
|
||||
# Group by language code
|
||||
lang_groups: dict[str, list[SubtitleTrack]] = {}
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
import logging
|
||||
|
||||
from ..entities import SubtitleTrack
|
||||
from ..value_objects import SubtitleMatchingRules, SubtitleType
|
||||
from ..value_objects import SubtitleMatchingRules
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -50,7 +50,9 @@ class SubtitleMatcher:
|
||||
)
|
||||
return matched, unresolved
|
||||
|
||||
def _passes_filters(self, track: SubtitleTrack, rules: SubtitleMatchingRules) -> bool:
|
||||
def _passes_filters(
|
||||
self, track: SubtitleTrack, rules: SubtitleMatchingRules
|
||||
) -> bool:
|
||||
# Language filter
|
||||
if rules.preferred_languages:
|
||||
if not track.language:
|
||||
|
||||
@@ -49,13 +49,19 @@ class PatternDetector:
|
||||
try:
|
||||
result = subprocess.run(
|
||||
[
|
||||
"ffprobe", "-v", "quiet",
|
||||
"-print_format", "json",
|
||||
"ffprobe",
|
||||
"-v",
|
||||
"quiet",
|
||||
"-print_format",
|
||||
"json",
|
||||
"-show_streams",
|
||||
"-select_streams", "s",
|
||||
"-select_streams",
|
||||
"s",
|
||||
str(video_path),
|
||||
],
|
||||
capture_output=True, text=True, timeout=30,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=30,
|
||||
)
|
||||
data = json.loads(result.stdout)
|
||||
return len(data.get("streams", [])) > 0
|
||||
@@ -67,7 +73,7 @@ class PatternDetector:
|
||||
known_exts = self.kb.known_extensions()
|
||||
findings: dict = {
|
||||
"has_subs_folder": False,
|
||||
"subs_strategy": None, # "flat" | "episode_subfolder"
|
||||
"subs_strategy": None, # "flat" | "episode_subfolder"
|
||||
"subs_root": None,
|
||||
"adjacent_subs": False,
|
||||
"has_embedded": self._has_embedded_subtitles(sample_video),
|
||||
@@ -87,15 +93,22 @@ class PatternDetector:
|
||||
|
||||
# Is it flat or episode_subfolder?
|
||||
children = list(subs_candidate.iterdir())
|
||||
sub_files = [c for c in children if c.is_file() and c.suffix.lower() in known_exts]
|
||||
sub_files = [
|
||||
c
|
||||
for c in children
|
||||
if c.is_file() and c.suffix.lower() in known_exts
|
||||
]
|
||||
sub_dirs = [c for c in children if c.is_dir()]
|
||||
|
||||
if sub_dirs and not sub_files:
|
||||
findings["subs_strategy"] = "episode_subfolder"
|
||||
# Count files in a sample subfolder
|
||||
sample_sub = sub_dirs[0]
|
||||
sample_files = [f for f in sample_sub.iterdir()
|
||||
if f.is_file() and f.suffix.lower() in known_exts]
|
||||
sample_files = [
|
||||
f
|
||||
for f in sample_sub.iterdir()
|
||||
if f.is_file() and f.suffix.lower() in known_exts
|
||||
]
|
||||
findings["files_per_episode"] = len(sample_files)
|
||||
# Check naming conventions
|
||||
for f in sample_files:
|
||||
@@ -103,22 +116,27 @@ class PatternDetector:
|
||||
parts = stem.split("_")
|
||||
if parts[0].isdigit():
|
||||
findings["has_numeric_prefix"] = True
|
||||
if any(self.kb.is_known_lang_token(t.lower())
|
||||
for t in stem.replace("_", ".").split(".")):
|
||||
if any(
|
||||
self.kb.is_known_lang_token(t.lower())
|
||||
for t in stem.replace("_", ".").split(".")
|
||||
):
|
||||
findings["has_lang_tokens"] = True
|
||||
else:
|
||||
findings["subs_strategy"] = "flat"
|
||||
findings["files_per_episode"] = len(sub_files)
|
||||
for f in sub_files:
|
||||
if any(self.kb.is_known_lang_token(t.lower())
|
||||
for t in f.stem.replace("_", ".").split(".")):
|
||||
if any(
|
||||
self.kb.is_known_lang_token(t.lower())
|
||||
for t in f.stem.replace("_", ".").split(".")
|
||||
):
|
||||
findings["has_lang_tokens"] = True
|
||||
break
|
||||
|
||||
# Check adjacent subs (next to the video)
|
||||
if not findings["has_subs_folder"]:
|
||||
adjacent = [
|
||||
p for p in sample_video.parent.iterdir()
|
||||
p
|
||||
for p in sample_video.parent.iterdir()
|
||||
if p.is_file() and p.suffix.lower() in known_exts
|
||||
]
|
||||
if adjacent:
|
||||
@@ -157,7 +175,9 @@ class PatternDetector:
|
||||
total += 1
|
||||
if findings.get("has_embedded"):
|
||||
score += 1.0
|
||||
if not findings.get("has_subs_folder") and not findings.get("adjacent_subs"):
|
||||
if not findings.get("has_subs_folder") and not findings.get(
|
||||
"adjacent_subs"
|
||||
):
|
||||
score += 0.5
|
||||
total += 0.5
|
||||
|
||||
|
||||
@@ -10,6 +10,28 @@ from ..entities import SubtitleTrack
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _build_dest_name(track: SubtitleTrack, video_stem: str) -> str:
|
||||
"""
|
||||
Build the destination filename for a subtitle track.
|
||||
|
||||
Format: {video_stem}.{lang}.{ext}
|
||||
{video_stem}.{lang}.sdh.{ext}
|
||||
{video_stem}.{lang}.forced.{ext}
|
||||
"""
|
||||
from ..value_objects import SubtitleType
|
||||
|
||||
if not track.language or not track.format:
|
||||
raise ValueError("Cannot compute destination name: language or format missing")
|
||||
|
||||
ext = track.format.extensions[0].lstrip(".")
|
||||
parts = [video_stem, track.language.code]
|
||||
if track.subtitle_type == SubtitleType.SDH:
|
||||
parts.append("sdh")
|
||||
elif track.subtitle_type == SubtitleType.FORCED:
|
||||
parts.append("forced")
|
||||
return ".".join(parts) + "." + ext
|
||||
|
||||
|
||||
@dataclass
|
||||
class PlacedTrack:
|
||||
source: Path
|
||||
@@ -20,7 +42,7 @@ class PlacedTrack:
|
||||
@dataclass
|
||||
class PlaceResult:
|
||||
placed: list[PlacedTrack]
|
||||
skipped: list[tuple[SubtitleTrack, str]] # (track, reason)
|
||||
skipped: list[tuple[SubtitleTrack, str]] # (track, reason)
|
||||
|
||||
@property
|
||||
def placed_count(self) -> int:
|
||||
@@ -62,7 +84,7 @@ class SubtitlePlacer:
|
||||
continue
|
||||
|
||||
try:
|
||||
dest_name = track.destination_name
|
||||
dest_name = _build_dest_name(track, destination_video.stem)
|
||||
except ValueError as e:
|
||||
skipped.append((track, str(e)))
|
||||
continue
|
||||
@@ -76,11 +98,13 @@ class SubtitlePlacer:
|
||||
|
||||
try:
|
||||
os.link(track.file_path, dest_path)
|
||||
placed.append(PlacedTrack(
|
||||
source=track.file_path,
|
||||
destination=dest_path,
|
||||
filename=dest_name,
|
||||
))
|
||||
placed.append(
|
||||
PlacedTrack(
|
||||
source=track.file_path,
|
||||
destination=dest_path,
|
||||
filename=dest_name,
|
||||
)
|
||||
)
|
||||
logger.info(f"SubtitlePlacer: placed {dest_name}")
|
||||
except OSError as e:
|
||||
logger.warning(f"SubtitlePlacer: failed to place {dest_name}: {e}")
|
||||
|
||||
Reference in New Issue
Block a user