Files
alfred/alfred/domain/subtitles/knowledge/base.py
T
francwa e45465d52d feat: split resolve_destination, persona-driven prompts, qBittorrent relocation
Destination resolution
- Replace the single ResolveDestinationUseCase with four dedicated
  functions, one per release type:
    resolve_season_destination    (pack season, folder move)
    resolve_episode_destination   (single episode, file move)
    resolve_movie_destination     (movie, file move)
    resolve_series_destination    (multi-season pack, folder move)
- Each returns a dedicated DTO carrying only the fields relevant to
  that release type — no more polymorphic ResolvedDestination with
  half the fields unused depending on the case.
- Looser series folder matching: exact computed-name match is reused
  silently; any deviation (different group, multiple candidates) now
  prompts the user with all options including the computed name.

Agent tools
- Four new tools wrapping the use cases above; old resolve_destination
  removed from the registry.
- New move_to_destination tool: create_folder + move, chained — used
  after a resolve_* call to perform the actual relocation.
- Low-level filesystem_operations module (create_folder, move via mv)
  for instant same-FS renames (ZFS).

Prompt & persona
- New PromptBuilder (alfred/agent/prompt.py) replacing prompts.py:
  identity + personality block, situational expressions, memory
  schema, episodic/STM/config context, tool catalogue.
- Per-user expression system: knowledge/users/common.yaml +
  {username}.yaml are merged at runtime; one phrase per situation
  (greeting/success/error/...) is sampled into the system prompt.

qBittorrent integration
- Credentials now come from settings (qbittorrent_url/username/password)
  instead of hardcoded defaults.
- New client methods: find_by_name, set_location, recheck — the trio
  needed to update a torrent's save path and re-verify after a move.
- Host→container path translation settings (qbittorrent_host_path /
  qbittorrent_container_path) for docker-mounted setups.

Subtitles
- Identifier: strip parenthesized qualifiers (simplified, brazil…) at
  tokenization; new _tokenize_suffix used for the episode_subfolder
  pattern so episode-stem tokens no longer pollute language detection.
- Placer: extract _build_dest_name so it can be reused by the new
  dry_run path in ManageSubtitlesUseCase.
- Knowledge: add yue, ell, ind, msa, rus, vie, heb, tam, tel, tha,
  hin, ukr; add 'fre' to fra; add 'simplified'/'traditional' to zho.

Misc
- LTM workspace: add 'trash' folder slot.
- Default LLM provider switched to deepseek.
- testing/debug_release.py: CLI to parse a release, hit TMDB, and
  dry-run the destination resolution end-to-end.
2026-05-14 05:01:59 +02:00

151 lines
5.0 KiB
Python

"""SubtitleKnowledgeBase — parsed, typed view of the loaded knowledge."""
import logging
from ..value_objects import (
ScanStrategy,
SubtitleFormat,
SubtitleLanguage,
SubtitleMatchingRules,
SubtitlePattern,
SubtitleType,
TypeDetectionMethod,
)
from .loader import KnowledgeLoader
logger = logging.getLogger(__name__)
class SubtitleKnowledgeBase:
"""
Typed access to subtitle knowledge (formats, types, languages, patterns).
Built from KnowledgeLoader — call kb.reload() to pick up newly learned entries
without restarting.
"""
def __init__(self, loader: KnowledgeLoader | None = None):
self._loader = loader or KnowledgeLoader()
self._build()
def _build(self) -> None:
data = self._loader.subtitles()
self._formats: dict[str, SubtitleFormat] = {}
for fid, fdata in data.get("formats", {}).items():
self._formats[fid] = SubtitleFormat(
id=fid,
extensions=fdata.get("extensions", []),
description=fdata.get("description", ""),
)
self._languages: dict[str, SubtitleLanguage] = {}
for code, ldata in data.get("languages", {}).items():
self._languages[code] = SubtitleLanguage(
code=code,
tokens=ldata.get("tokens", []),
)
# Build reverse token → language code map
self._lang_token_map: dict[str, str] = {}
for code, lang in self._languages.items():
for token in lang.tokens:
self._lang_token_map[token.lower()] = code
# Build reverse token → type map
self._type_token_map: dict[str, SubtitleType] = {}
for type_id, tdata in data.get("types", {}).items():
stype = SubtitleType(type_id)
for token in tdata.get("tokens", []):
self._type_token_map[token.lower()] = stype
d = data.get("defaults", {})
self._default_rules = SubtitleMatchingRules(
preferred_languages=d.get("languages", ["fra", "eng"]),
preferred_formats=d.get("formats", ["srt"]),
allowed_types=d.get("types", ["standard", "forced"]),
format_priority=d.get("format_priority", ["srt", "ass"]),
min_confidence=d.get("min_confidence", 0.7),
)
self._patterns: dict[str, SubtitlePattern] = {}
for pid, pdata in self._loader.patterns().items():
try:
self._patterns[pid] = SubtitlePattern(
id=pid,
description=pdata.get("description", ""),
scan_strategy=ScanStrategy(pdata.get("scan_strategy", "adjacent")),
root_folder=pdata.get("root_folder"),
type_detection=TypeDetectionMethod(
pdata.get("type_detection", {}).get("method", "token_in_name")
),
version=pdata.get("version", "1.0"),
)
except ValueError as e:
logger.warning(f"SubtitleKnowledgeBase: skipping pattern '{pid}': {e}")
def reload(self) -> None:
self._loader = KnowledgeLoader()
self._build()
logger.info("SubtitleKnowledgeBase: reloaded")
# --- Defaults ---
def default_rules(self) -> SubtitleMatchingRules:
return self._default_rules
# --- Formats ---
def formats(self) -> dict[str, SubtitleFormat]:
return self._formats
def format_for_extension(self, ext: str) -> SubtitleFormat | None:
for fmt in self._formats.values():
if fmt.matches_extension(ext):
return fmt
return None
def known_extensions(self) -> set[str]:
exts = set()
for fmt in self._formats.values():
exts.update(fmt.extensions)
return exts
# --- Languages ---
def languages(self) -> dict[str, SubtitleLanguage]:
return self._languages
def language_for_token(self, token: str) -> SubtitleLanguage | None:
code = self._lang_token_map.get(token.lower())
return self._languages.get(code) if code else None
def is_known_lang_token(self, token: str) -> bool:
return token.lower() in self._lang_token_map
# --- Types ---
def type_for_token(self, token: str) -> SubtitleType | None:
return self._type_token_map.get(token.lower())
def is_known_type_token(self, token: str) -> bool:
return token.lower() in self._type_token_map
# --- Patterns ---
def patterns(self) -> dict[str, SubtitlePattern]:
return self._patterns
def pattern(self, pattern_id: str) -> SubtitlePattern | None:
return self._patterns.get(pattern_id)
def patterns_for_group(self, group_name: str) -> list[SubtitlePattern]:
group = self._loader.release_group(group_name)
if not group:
return []
return [
self._patterns[pid]
for pid in group.get("known_patterns", [])
if pid in self._patterns
]