e45465d52d
Destination resolution
- Replace the single ResolveDestinationUseCase with four dedicated
functions, one per release type:
resolve_season_destination (pack season, folder move)
resolve_episode_destination (single episode, file move)
resolve_movie_destination (movie, file move)
resolve_series_destination (multi-season pack, folder move)
- Each returns a dedicated DTO carrying only the fields relevant to
that release type — no more polymorphic ResolvedDestination with
half the fields unused depending on the case.
- Looser series folder matching: exact computed-name match is reused
silently; any deviation (different group, multiple candidates) now
prompts the user with all options including the computed name.
Agent tools
- Four new tools wrapping the use cases above; old resolve_destination
removed from the registry.
- New move_to_destination tool: create_folder + move, chained — used
after a resolve_* call to perform the actual relocation.
- Low-level filesystem_operations module (create_folder, move via mv)
for instant same-FS renames (ZFS).
Prompt & persona
- New PromptBuilder (alfred/agent/prompt.py) replacing prompts.py:
identity + personality block, situational expressions, memory
schema, episodic/STM/config context, tool catalogue.
- Per-user expression system: knowledge/users/common.yaml +
{username}.yaml are merged at runtime; one phrase per situation
(greeting/success/error/...) is sampled into the system prompt.
qBittorrent integration
- Credentials now come from settings (qbittorrent_url/username/password)
instead of hardcoded defaults.
- New client methods: find_by_name, set_location, recheck — the trio
needed to update a torrent's save path and re-verify after a move.
- Host→container path translation settings (qbittorrent_host_path /
qbittorrent_container_path) for docker-mounted setups.
Subtitles
- Identifier: strip parenthesized qualifiers (simplified, brazil…) at
tokenization; new _tokenize_suffix used for the episode_subfolder
pattern so episode-stem tokens no longer pollute language detection.
- Placer: extract _build_dest_name so it can be reused by the new
dry_run path in ManageSubtitlesUseCase.
- Knowledge: add yue, ell, ind, msa, rus, vie, heb, tam, tel, tha,
hin, ukr; add 'fre' to fra; add 'simplified'/'traditional' to zho.
Misc
- LTM workspace: add 'trash' folder slot.
- Default LLM provider switched to deepseek.
- testing/debug_release.py: CLI to parse a release, hit TMDB, and
dry-run the destination resolution end-to-end.
151 lines
5.0 KiB
Python
151 lines
5.0 KiB
Python
"""SubtitleKnowledgeBase — parsed, typed view of the loaded knowledge."""
|
|
|
|
import logging
|
|
|
|
from ..value_objects import (
|
|
ScanStrategy,
|
|
SubtitleFormat,
|
|
SubtitleLanguage,
|
|
SubtitleMatchingRules,
|
|
SubtitlePattern,
|
|
SubtitleType,
|
|
TypeDetectionMethod,
|
|
)
|
|
from .loader import KnowledgeLoader
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class SubtitleKnowledgeBase:
|
|
"""
|
|
Typed access to subtitle knowledge (formats, types, languages, patterns).
|
|
|
|
Built from KnowledgeLoader — call kb.reload() to pick up newly learned entries
|
|
without restarting.
|
|
"""
|
|
|
|
def __init__(self, loader: KnowledgeLoader | None = None):
|
|
self._loader = loader or KnowledgeLoader()
|
|
self._build()
|
|
|
|
def _build(self) -> None:
|
|
data = self._loader.subtitles()
|
|
|
|
self._formats: dict[str, SubtitleFormat] = {}
|
|
for fid, fdata in data.get("formats", {}).items():
|
|
self._formats[fid] = SubtitleFormat(
|
|
id=fid,
|
|
extensions=fdata.get("extensions", []),
|
|
description=fdata.get("description", ""),
|
|
)
|
|
|
|
self._languages: dict[str, SubtitleLanguage] = {}
|
|
for code, ldata in data.get("languages", {}).items():
|
|
self._languages[code] = SubtitleLanguage(
|
|
code=code,
|
|
tokens=ldata.get("tokens", []),
|
|
)
|
|
|
|
# Build reverse token → language code map
|
|
self._lang_token_map: dict[str, str] = {}
|
|
for code, lang in self._languages.items():
|
|
for token in lang.tokens:
|
|
self._lang_token_map[token.lower()] = code
|
|
|
|
# Build reverse token → type map
|
|
self._type_token_map: dict[str, SubtitleType] = {}
|
|
for type_id, tdata in data.get("types", {}).items():
|
|
stype = SubtitleType(type_id)
|
|
for token in tdata.get("tokens", []):
|
|
self._type_token_map[token.lower()] = stype
|
|
|
|
d = data.get("defaults", {})
|
|
self._default_rules = SubtitleMatchingRules(
|
|
preferred_languages=d.get("languages", ["fra", "eng"]),
|
|
preferred_formats=d.get("formats", ["srt"]),
|
|
allowed_types=d.get("types", ["standard", "forced"]),
|
|
format_priority=d.get("format_priority", ["srt", "ass"]),
|
|
min_confidence=d.get("min_confidence", 0.7),
|
|
)
|
|
|
|
self._patterns: dict[str, SubtitlePattern] = {}
|
|
for pid, pdata in self._loader.patterns().items():
|
|
try:
|
|
self._patterns[pid] = SubtitlePattern(
|
|
id=pid,
|
|
description=pdata.get("description", ""),
|
|
scan_strategy=ScanStrategy(pdata.get("scan_strategy", "adjacent")),
|
|
root_folder=pdata.get("root_folder"),
|
|
type_detection=TypeDetectionMethod(
|
|
pdata.get("type_detection", {}).get("method", "token_in_name")
|
|
),
|
|
version=pdata.get("version", "1.0"),
|
|
)
|
|
except ValueError as e:
|
|
logger.warning(f"SubtitleKnowledgeBase: skipping pattern '{pid}': {e}")
|
|
|
|
def reload(self) -> None:
|
|
self._loader = KnowledgeLoader()
|
|
self._build()
|
|
logger.info("SubtitleKnowledgeBase: reloaded")
|
|
|
|
# --- Defaults ---
|
|
|
|
def default_rules(self) -> SubtitleMatchingRules:
|
|
return self._default_rules
|
|
|
|
# --- Formats ---
|
|
|
|
def formats(self) -> dict[str, SubtitleFormat]:
|
|
return self._formats
|
|
|
|
def format_for_extension(self, ext: str) -> SubtitleFormat | None:
|
|
for fmt in self._formats.values():
|
|
if fmt.matches_extension(ext):
|
|
return fmt
|
|
return None
|
|
|
|
def known_extensions(self) -> set[str]:
|
|
exts = set()
|
|
for fmt in self._formats.values():
|
|
exts.update(fmt.extensions)
|
|
return exts
|
|
|
|
# --- Languages ---
|
|
|
|
def languages(self) -> dict[str, SubtitleLanguage]:
|
|
return self._languages
|
|
|
|
def language_for_token(self, token: str) -> SubtitleLanguage | None:
|
|
code = self._lang_token_map.get(token.lower())
|
|
return self._languages.get(code) if code else None
|
|
|
|
def is_known_lang_token(self, token: str) -> bool:
|
|
return token.lower() in self._lang_token_map
|
|
|
|
# --- Types ---
|
|
|
|
def type_for_token(self, token: str) -> SubtitleType | None:
|
|
return self._type_token_map.get(token.lower())
|
|
|
|
def is_known_type_token(self, token: str) -> bool:
|
|
return token.lower() in self._type_token_map
|
|
|
|
# --- Patterns ---
|
|
|
|
def patterns(self) -> dict[str, SubtitlePattern]:
|
|
return self._patterns
|
|
|
|
def pattern(self, pattern_id: str) -> SubtitlePattern | None:
|
|
return self._patterns.get(pattern_id)
|
|
|
|
def patterns_for_group(self, group_name: str) -> list[SubtitlePattern]:
|
|
group = self._loader.release_group(group_name)
|
|
if not group:
|
|
return []
|
|
return [
|
|
self._patterns[pid]
|
|
for pid in group.get("known_patterns", [])
|
|
if pid in self._patterns
|
|
]
|