feat: major architectural refactor

- Refactor memory system (episodic/STM/LTM with components)
- Implement complete subtitle domain (scanner, matcher, placer)
- Add YAML workflow infrastructure
- Externalize knowledge base (patterns, release groups)
- Add comprehensive testing suite
- Create manual testing CLIs
This commit is contained in:
2026-05-11 21:33:37 +02:00
parent 62b5d0b998
commit 249c5de76a
103 changed files with 8559 additions and 1346 deletions
+21 -1
View File
@@ -1,12 +1,32 @@
"""Filesystem use cases."""
from .dto import ListFolderResponse, SetFolderPathResponse
from .create_seed_links import CreateSeedLinksUseCase
from .dto import (
CreateSeedLinksResponse,
ListFolderResponse,
ManageSubtitlesResponse,
MoveMediaResponse,
PlacedSubtitle,
SetFolderPathResponse,
)
from .list_folder import ListFolderUseCase
from .manage_subtitles import ManageSubtitlesUseCase
from .move_media import MoveMediaUseCase
from .resolve_destination import ResolveDestinationUseCase, ResolvedDestination
from .set_folder_path import SetFolderPathUseCase
__all__ = [
"SetFolderPathUseCase",
"ListFolderUseCase",
"CreateSeedLinksUseCase",
"MoveMediaUseCase",
"ManageSubtitlesUseCase",
"ResolveDestinationUseCase",
"ResolvedDestination",
"SetFolderPathResponse",
"ListFolderResponse",
"CreateSeedLinksResponse",
"MoveMediaResponse",
"ManageSubtitlesResponse",
"PlacedSubtitle",
]
@@ -0,0 +1,54 @@
"""CreateSeedLinksUseCase — prepares a torrent folder for continued seeding."""
import logging
from alfred.infrastructure.filesystem import FileManager
from alfred.infrastructure.persistence import get_memory
from .dto import CreateSeedLinksResponse
logger = logging.getLogger(__name__)
class CreateSeedLinksUseCase:
"""
Prepares a torrent subfolder so qBittorrent can keep seeding after a move.
Hard-links the video file from the library back into torrents/<original_folder>/,
then copies all remaining files from the original download folder (subs, nfo, …).
"""
def __init__(self, file_manager: FileManager):
self.file_manager = file_manager
def execute(
self, library_file: str, original_download_folder: str
) -> CreateSeedLinksResponse:
memory = get_memory()
torrent_folder = memory.ltm.workspace.torrent
if not torrent_folder:
return CreateSeedLinksResponse(
status="error",
error="torrent_folder_not_set",
message="Torrent folder is not configured. Use set_path_for_folder to set it.",
)
result = self.file_manager.create_seed_links(
library_file, original_download_folder, torrent_folder
)
if result.get("status") == "ok":
return CreateSeedLinksResponse(
status="ok",
torrent_subfolder=result.get("torrent_subfolder"),
linked_file=result.get("linked_file"),
copied_files=result.get("copied_files"),
copied_count=result.get("copied_count", 0),
skipped=result.get("skipped"),
)
return CreateSeedLinksResponse(
status="error",
error=result.get("error"),
message=result.get("message"),
)
+149 -1
View File
@@ -1,6 +1,56 @@
"""Filesystem application DTOs."""
from dataclasses import dataclass
from __future__ import annotations
from dataclasses import dataclass, field
@dataclass
class CopyMediaResponse:
"""Response from copying a media file."""
status: str
source: str | None = None
destination: str | None = None
filename: str | None = None
size: int | None = None
error: str | None = None
message: str | None = None
def to_dict(self) -> dict:
if self.error:
return {"status": self.status, "error": self.error, "message": self.message}
return {
"status": self.status,
"source": self.source,
"destination": self.destination,
"filename": self.filename,
"size": self.size,
}
@dataclass
class MoveMediaResponse:
"""Response from moving a media file."""
status: str
source: str | None = None
destination: str | None = None
filename: str | None = None
size: int | None = None
error: str | None = None
message: str | None = None
def to_dict(self) -> dict:
if self.error:
return {"status": self.status, "error": self.error, "message": self.message}
return {
"status": self.status,
"source": self.source,
"destination": self.destination,
"filename": self.filename,
"size": self.size,
}
@dataclass
@@ -29,6 +79,104 @@ class SetFolderPathResponse:
return result
@dataclass
class PlacedSubtitle:
"""One subtitle file successfully placed."""
source: str
destination: str
filename: str
def to_dict(self) -> dict:
return {"source": self.source, "destination": self.destination, "filename": self.filename}
@dataclass
class UnresolvedTrack:
"""A subtitle track that needs agent clarification before placement."""
raw_tokens: list[str]
file_path: str | None = None
file_size_kb: float | None = None
reason: str = "" # "unknown_language" | "low_confidence"
def to_dict(self) -> dict:
return {
"raw_tokens": self.raw_tokens,
"file_path": self.file_path,
"file_size_kb": self.file_size_kb,
"reason": self.reason,
}
@dataclass
class AvailableSubtitle:
"""One subtitle track available on an embedded media item."""
language: str # ISO 639-2 code
subtitle_type: str # "standard" | "sdh" | "forced" | "unknown"
def to_dict(self) -> dict:
return {"language": self.language, "type": self.subtitle_type}
@dataclass
class ManageSubtitlesResponse:
"""Response from the manage_subtitles use case."""
status: str # "ok" | "needs_clarification" | "error"
video_path: str | None = None
placed: list[PlacedSubtitle] | None = None
skipped_count: int = 0
unresolved: list[UnresolvedTrack] | None = None
available: list[AvailableSubtitle] | None = None # embedded tracks summary
error: str | None = None
message: str | None = None
def to_dict(self) -> dict:
if self.error:
return {"status": self.status, "error": self.error, "message": self.message}
result = {
"status": self.status,
"video_path": self.video_path,
"placed": [p.to_dict() for p in (self.placed or [])],
"placed_count": len(self.placed or []),
"skipped_count": self.skipped_count,
}
if self.unresolved:
result["unresolved"] = [u.to_dict() for u in self.unresolved]
result["unresolved_count"] = len(self.unresolved)
if self.available:
result["available"] = [a.to_dict() for a in self.available]
return result
@dataclass
class CreateSeedLinksResponse:
"""Response from creating seed links for a torrent."""
status: str
torrent_subfolder: str | None = None
linked_file: str | None = None
copied_files: list[str] | None = None
copied_count: int = 0
skipped: list[str] | None = None
error: str | None = None
message: str | None = None
def to_dict(self) -> dict:
if self.error:
return {"status": self.status, "error": self.error, "message": self.message}
return {
"status": self.status,
"torrent_subfolder": self.torrent_subfolder,
"linked_file": self.linked_file,
"copied_files": self.copied_files or [],
"copied_count": self.copied_count,
"skipped": self.skipped or [],
}
@dataclass
class ListFolderResponse:
"""Response from listing a folder."""
@@ -0,0 +1,258 @@
"""ManageSubtitlesUseCase — orchestrates the full subtitle pipeline for a video file."""
import logging
from pathlib import Path
from alfred.domain.shared.value_objects import ImdbId
from alfred.domain.subtitles.entities import SubtitleTrack
from alfred.domain.subtitles.knowledge.base import SubtitleKnowledgeBase
from alfred.domain.subtitles.knowledge.loader import KnowledgeLoader
from alfred.domain.subtitles.services.identifier import SubtitleIdentifier
from alfred.domain.subtitles.services.matcher import SubtitleMatcher
from alfred.domain.subtitles.services.pattern_detector import PatternDetector
from alfred.domain.subtitles.services.placer import PlacedTrack, SubtitlePlacer
from alfred.domain.subtitles.services.utils import available_subtitles
from alfred.domain.subtitles.value_objects import ScanStrategy
from alfred.infrastructure.persistence.context import get_memory
from alfred.infrastructure.subtitle.metadata_store import SubtitleMetadataStore
from alfred.infrastructure.subtitle.rule_repository import RuleSetRepository
from .dto import AvailableSubtitle, ManageSubtitlesResponse, PlacedSubtitle, UnresolvedTrack
logger = logging.getLogger(__name__)
def _infer_library_root(dest_video: Path, media_type: str) -> Path:
"""
Infer the media library root folder from the destination video path.
TV show: video → Season 01 → The X-Files (3 levels up)
Movie: video → Inception (2010) (1 level up)
"""
if media_type == "tv_show":
return dest_video.parent.parent
return dest_video.parent
def _to_imdb_id(raw: str | None) -> ImdbId | None:
if not raw:
return None
try:
return ImdbId(raw)
except Exception:
return None
class ManageSubtitlesUseCase:
"""
Full subtitle pipeline:
1. Load knowledge base
2. Detect (or confirm) the release pattern
3. Identify all tracks (ffprobe + filesystem scan)
4. Load + resolve rules for this media
5. Match tracks against rules
6. If any tracks are unresolved → return needs_clarification (don't place yet)
7. Place matched tracks via hard-link
8. Persist to .alfred/metadata.yaml
The use case is stateless — all dependencies are instantiated inline.
"""
def execute(
self,
source_video: str,
destination_video: str,
imdb_id: str | None = None,
media_type: str = "tv_show",
release_group: str | None = None,
season: int | None = None,
episode: int | None = None,
confirmed_pattern_id: str | None = None,
) -> ManageSubtitlesResponse:
source_path = Path(source_video)
dest_path = Path(destination_video)
if not source_path.exists():
return ManageSubtitlesResponse(
status="error",
error="source_not_found",
message=f"Source video not found: {source_video}",
)
kb = SubtitleKnowledgeBase(KnowledgeLoader())
library_root = _infer_library_root(dest_path, media_type)
store = SubtitleMetadataStore(library_root)
repo = RuleSetRepository(library_root)
# --- Pattern resolution ---
pattern = self._resolve_pattern(
kb, store, source_path, confirmed_pattern_id, release_group
)
if pattern is None:
return ManageSubtitlesResponse(
status="error",
error="pattern_not_found",
message="Could not determine subtitle pattern for this release.",
)
# --- Identify ---
media_id = _to_imdb_id(imdb_id)
identifier = SubtitleIdentifier(kb)
metadata = identifier.identify(
video_path=source_path,
pattern=pattern,
media_id=media_id,
media_type=media_type,
release_group=release_group,
)
if metadata.total_count == 0:
logger.info(f"ManageSubtitles: no subtitle tracks found for {source_path.name}")
return ManageSubtitlesResponse(
status="ok",
video_path=destination_video,
placed=[],
skipped_count=0,
)
# --- Embedded short-circuit ---
if pattern.scan_strategy == ScanStrategy.EMBEDDED:
logger.info("ManageSubtitles: embedded pattern — skipping matcher")
available = [
AvailableSubtitle(
language=t.language.code if t.language else "?",
subtitle_type=t.subtitle_type.value,
)
for t in available_subtitles(metadata.embedded_tracks)
]
return ManageSubtitlesResponse(
status="ok",
video_path=destination_video,
placed=[],
skipped_count=0,
available=available,
)
# --- Match (external only) ---
subtitle_prefs = None
try:
memory = get_memory()
subtitle_prefs = memory.ltm.subtitle_preferences
except Exception:
pass
rules = repo.load(release_group, subtitle_prefs).resolve()
matcher = SubtitleMatcher()
matched, unresolved = matcher.match(metadata.external_tracks, rules)
if unresolved:
logger.info(
f"ManageSubtitles: {len(unresolved)} unresolved track(s) — needs clarification"
)
return ManageSubtitlesResponse(
status="needs_clarification",
video_path=destination_video,
placed=[],
unresolved=[_to_unresolved_dto(t) for t in unresolved],
)
if not matched:
return ManageSubtitlesResponse(
status="ok",
video_path=destination_video,
placed=[],
skipped_count=metadata.total_count,
)
# --- Place ---
placer = SubtitlePlacer()
place_result = placer.place(matched, dest_path)
# --- Persist ---
if place_result.placed:
pairs = _pair_placed_with_tracks(place_result.placed, matched)
store.append_history(pairs, season, episode, release_group)
placed_dtos = [
PlacedSubtitle(
source=str(p.source),
destination=str(p.destination),
filename=p.filename,
)
for p in place_result.placed
]
return ManageSubtitlesResponse(
status="ok",
video_path=destination_video,
placed=placed_dtos,
skipped_count=place_result.skipped_count,
)
def _resolve_pattern(
self,
kb: SubtitleKnowledgeBase,
store: SubtitleMetadataStore,
source_path: Path,
confirmed_pattern_id: str | None,
release_group: str | None,
):
# 1. Explicit override from caller
if confirmed_pattern_id:
p = kb.pattern(confirmed_pattern_id)
if p:
return p
logger.warning(f"ManageSubtitles: unknown pattern '{confirmed_pattern_id}'")
# 2. Previously confirmed in metadata store
stored_id = store.confirmed_pattern()
if stored_id:
p = kb.pattern(stored_id)
if p:
logger.debug(f"ManageSubtitles: using confirmed pattern '{stored_id}'")
return p
# 3. Auto-detect
release_root = source_path.parent
detector = PatternDetector(kb)
result = detector.detect(release_root, source_path)
if result["detected"] and result["confidence"] >= 0.6:
logger.info(
f"ManageSubtitles: auto-detected pattern '{result['detected'].id}' "
f"(confidence={result['confidence']:.2f})"
)
return result["detected"]
# 4. Fallback — adjacent (safest default)
logger.info("ManageSubtitles: falling back to 'adjacent' pattern")
return kb.pattern("adjacent")
def _to_unresolved_dto(track: SubtitleTrack, min_confidence: float = 0.7) -> UnresolvedTrack:
reason = "unknown_language" if track.language is None else "low_confidence"
return UnresolvedTrack(
raw_tokens=track.raw_tokens,
file_path=str(track.file_path) if track.file_path else None,
file_size_kb=track.file_size_kb,
reason=reason,
)
def _pair_placed_with_tracks(
placed: list[PlacedTrack],
tracks: list[SubtitleTrack],
) -> list[tuple[PlacedTrack, SubtitleTrack]]:
"""
Pair each PlacedTrack with its originating SubtitleTrack by source path.
Falls back to positional matching if paths don't align.
"""
track_by_path = {t.file_path: t for t in tracks if t.file_path}
pairs = []
for p in placed:
track = track_by_path.get(p.source)
if track is None and tracks:
track = tracks[0] # positional fallback
if track:
pairs.append((p, track))
return pairs
@@ -0,0 +1,43 @@
"""Move media use case."""
import logging
from alfred.infrastructure.filesystem import FileManager
from .dto import MoveMediaResponse
logger = logging.getLogger(__name__)
class MoveMediaUseCase:
"""Use case for moving a media file to a destination (copy + delete source)."""
def __init__(self, file_manager: FileManager):
self.file_manager = file_manager
def execute(self, source: str, destination: str) -> MoveMediaResponse:
"""
Move a media file from source to destination.
Args:
source: Absolute path to the source file.
destination: Absolute path to the destination file.
Returns:
MoveMediaResponse with success or error information.
"""
result = self.file_manager.move_file(source, destination)
if result.get("status") == "ok":
return MoveMediaResponse(
status="ok",
source=result.get("source"),
destination=result.get("destination"),
filename=result.get("filename"),
size=result.get("size"),
)
return MoveMediaResponse(
status="error",
error=result.get("error"),
message=result.get("message"),
)
@@ -0,0 +1,246 @@
"""
ResolveDestinationUseCase — compute the library destination path for a release.
Steps:
1. Parse the release name
2. Look up TMDB for title + year (+ episode title if single episode)
3. Scan the library for an existing series folder
4. Apply group-conflict rules
5. Return the computed paths (or needs_clarification if ambiguous)
"""
from __future__ import annotations
import logging
import re
from dataclasses import dataclass, field
from pathlib import Path
from alfred.domain.media.release_parser import ParsedRelease, parse_release
from alfred.infrastructure.persistence import get_memory
logger = logging.getLogger(__name__)
# Characters forbidden on Windows filesystems (served via NFS)
_WIN_FORBIDDEN = re.compile(r'[?:*"<>|\\]')
def _sanitise(text: str) -> str:
return _WIN_FORBIDDEN.sub("", text)
# ---------------------------------------------------------------------------
# DTOs
# ---------------------------------------------------------------------------
@dataclass
class ResolvedDestination:
"""All computed paths for a release, ready to hand to move_media."""
status: str # "ok" | "needs_clarification" | "error"
# Populated on "ok"
library_file: str | None = None # absolute path of the destination video file
series_folder: str | None = None # absolute path of the series root folder
season_folder: str | None = None # absolute path of the season subfolder
series_folder_name: str | None = None # just the folder name (for display)
season_folder_name: str | None = None
filename: str | None = None
is_new_series_folder: bool = False # True if we're creating the folder
# Populated on "needs_clarification"
question: str | None = None
options: list[str] | None = None # existing group folder names to pick from
# Populated on "error"
error: str | None = None
message: str | None = None
def to_dict(self) -> dict:
if self.status == "error":
return {"status": self.status, "error": self.error, "message": self.message}
if self.status == "needs_clarification":
return {
"status": self.status,
"question": self.question,
"options": self.options or [],
}
return {
"status": self.status,
"library_file": self.library_file,
"series_folder": self.series_folder,
"season_folder": self.season_folder,
"series_folder_name": self.series_folder_name,
"season_folder_name": self.season_folder_name,
"filename": self.filename,
"is_new_series_folder": self.is_new_series_folder,
}
# ---------------------------------------------------------------------------
# Use case
# ---------------------------------------------------------------------------
class ResolveDestinationUseCase:
"""
Compute the full destination path for a media file being organised.
The caller provides:
- release_name: the raw release folder/file name
- source_file: path to the actual video file (to get extension)
- tmdb_title: canonical title from TMDB
- tmdb_year: release year from TMDB
- tmdb_episode_title: episode title from TMDB (None for movies / season packs)
- confirmed_folder: if the user already answered needs_clarification, pass
the chosen folder name here to skip the check
Returns a ResolvedDestination.
"""
def execute(
self,
release_name: str,
source_file: str,
tmdb_title: str,
tmdb_year: int,
tmdb_episode_title: str | None = None,
confirmed_folder: str | None = None,
) -> ResolvedDestination:
parsed = parse_release(release_name)
ext = Path(source_file).suffix # ".mkv"
if parsed.is_movie:
return self._resolve_movie(parsed, tmdb_title, tmdb_year, ext)
return self._resolve_tvshow(
parsed, tmdb_title, tmdb_year, tmdb_episode_title, ext, confirmed_folder
)
# ------------------------------------------------------------------
# Movie
# ------------------------------------------------------------------
def _resolve_movie(
self, parsed: ParsedRelease, tmdb_title: str, tmdb_year: int, ext: str
) -> ResolvedDestination:
memory = get_memory()
movies_root = memory.ltm.library_paths.get("movie")
if not movies_root:
return ResolvedDestination(
status="error",
error="library_not_set",
message="Movie library path is not configured.",
)
folder_name = _sanitise(parsed.movie_folder_name(tmdb_title, tmdb_year))
filename = _sanitise(parsed.movie_filename(tmdb_title, tmdb_year, ext))
folder_path = Path(movies_root) / folder_name
file_path = folder_path / filename
return ResolvedDestination(
status="ok",
library_file=str(file_path),
series_folder=str(folder_path),
series_folder_name=folder_name,
filename=filename,
is_new_series_folder=not folder_path.exists(),
)
# ------------------------------------------------------------------
# TV show
# ------------------------------------------------------------------
def _resolve_tvshow(
self,
parsed: ParsedRelease,
tmdb_title: str,
tmdb_year: int,
tmdb_episode_title: str | None,
ext: str,
confirmed_folder: str | None,
) -> ResolvedDestination:
memory = get_memory()
tv_root = memory.ltm.library_paths.get("tv_show")
if not tv_root:
return ResolvedDestination(
status="error",
error="library_not_set",
message="TV show library path is not configured.",
)
tv_root_path = Path(tv_root)
# --- Find existing series folders for this title ---
existing = _find_existing_series_folders(tv_root_path, tmdb_title, tmdb_year)
# --- Determine series folder name ---
if confirmed_folder:
series_folder_name = confirmed_folder
is_new = not (tv_root_path / confirmed_folder).exists()
elif len(existing) == 0:
# No existing folder — create with release group
series_folder_name = _sanitise(parsed.show_folder_name(tmdb_title, tmdb_year))
is_new = True
elif len(existing) == 1:
# Exactly one match — use it regardless of group
series_folder_name = existing[0]
is_new = False
else:
# Multiple folders — ask user
return ResolvedDestination(
status="needs_clarification",
question=(
f"Multiple folders found for '{tmdb_title}' in your library. "
f"Which one should I use for this release ({parsed.group})?"
),
options=existing,
)
# --- Build paths ---
season_folder_name = parsed.season_folder_name()
filename = _sanitise(
parsed.episode_filename(tmdb_episode_title, ext)
if not parsed.is_season_pack
else parsed.season_folder_name() + ext
)
series_path = tv_root_path / series_folder_name
season_path = series_path / season_folder_name
file_path = season_path / filename
return ResolvedDestination(
status="ok",
library_file=str(file_path),
series_folder=str(series_path),
season_folder=str(season_path),
series_folder_name=series_folder_name,
season_folder_name=season_folder_name,
filename=filename,
is_new_series_folder=is_new,
)
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
def _find_existing_series_folders(tv_root: Path, tmdb_title: str, tmdb_year: int) -> list[str]:
"""
Return names of folders in tv_root that match the given title + year.
Matching is loose: normalised title (dots, no special chars) + year must
appear at the start of the folder name.
"""
if not tv_root.exists():
return []
# Build a normalised prefix to match against: "Oz.1997"
clean_title = _sanitise(tmdb_title).replace(" ", ".")
prefix = f"{clean_title}.{tmdb_year}".lower()
matches = []
for entry in tv_root.iterdir():
if entry.is_dir() and entry.name.lower().startswith(prefix):
matches.append(entry.name)
return sorted(matches)