feat: major architectural refactor

- Refactor memory system (episodic/STM/LTM with components) - Implement complete subtitle domain (scanner, matcher, placer) - Add YAML workflow infrastructure - Externalize knowledge base (patterns, release groups) - Add comprehensive testing suite - Create manual testing CLIs
2026-05-11 21:33:37 +02:00
parent 62b5d0b998
commit 249c5de76a
103 changed files with 8559 additions and 1346 deletions
@@ -4,6 +4,7 @@ import json
 from typing import Any

 from alfred.infrastructure.persistence import get_memory
+from alfred.infrastructure.persistence.memory import MemoryRegistry

 from .registry import Tool

@@ -13,6 +14,7 @@ class PromptBuilder:

    def __init__(self, tools: dict[str, Tool]):
        self.tools = tools
+        self._memory_registry = MemoryRegistry()

    def build_tools_spec(self) -> list[dict[str, Any]]:
        """Build the tool specification for the LLM API."""
@@ -109,11 +111,30 @@ class PromptBuilder:

        return "\n".join(lines)

+    def _format_memory_schema(self) -> str:
+        """Describe available memory components so the agent knows what to read/write and when."""
+        schema = self._memory_registry.schema()
+        tier_labels = {"ltm": "LONG-TERM (persisted)", "stm": "SHORT-TERM (session)", "episodic": "EPISODIC (volatile)"}
+        lines = ["MEMORY COMPONENTS:"]
+
+        for tier, components in schema.items():
+            if not components:
+                continue
+            lines.append(f"\n  [{tier_labels.get(tier, tier.upper())}]")
+            for c in components:
+                access = c.get("access", "read")
+                lines.append(f"  {c['name']} ({access}): {c['description']}")
+                for field_name, field_desc in c.get("fields", {}).items():
+                    lines.append(f"    · {field_name}: {field_desc}")
+
+        return "\n".join(lines)
+
    def _format_config_context(self, memory) -> str:
        """Format configuration context."""
        lines = ["CURRENT CONFIGURATION:"]
-        if memory.ltm.config:
-            for key, value in memory.ltm.config.items():
+        folders = {**memory.ltm.workspace.as_dict(), **memory.ltm.library_paths.to_dict()}
+        if folders:
+            for key, value in folders.items():
                lines.append(f"  - {key}: {value}")
        else:
            lines.append("  (no configuration set)")
@@ -138,6 +159,9 @@ class PromptBuilder:
        tools_desc = self._format_tools_description()
        tools_section = f"\nAVAILABLE TOOLS:\n{tools_desc}" if tools_desc else ""

+        # Memory schema
+        memory_schema = self._format_memory_schema()
+
        # Configuration
        config_section = self._format_config_context(memory)
        if config_section:
@@ -172,6 +196,8 @@ EXAMPLES:

 {language_instruction}
 {tools_section}
+
+{memory_schema}
 {config_section}
 {stm_context}
 {episodic_context}
@@ -97,6 +97,11 @@ def make_tools(settings) -> dict[str, Tool]:
    tool_functions = [
        fs_tools.set_path_for_folder,
        fs_tools.list_folder,
+        fs_tools.resolve_destination,
+        fs_tools.move_media,
+        fs_tools.manage_subtitles,
+        fs_tools.create_seed_links,
+        fs_tools.learn,
        api_tools.find_media_imdb_id,
        api_tools.find_torrent,
        api_tools.add_torrent_by_index,
@@ -1,10 +1,200 @@
 """Filesystem tools for folder management."""

+from pathlib import Path
 from typing import Any

-from alfred.application.filesystem import ListFolderUseCase, SetFolderPathUseCase
+import alfred as _alfred_pkg
+import yaml
+
+from alfred.application.filesystem import (
+    CreateSeedLinksUseCase,
+    ListFolderUseCase,
+    ManageSubtitlesUseCase,
+    MoveMediaUseCase,
+    ResolveDestinationUseCase,
+    SetFolderPathUseCase,
+)
 from alfred.infrastructure.filesystem import FileManager

+_LEARNED_ROOT = Path(_alfred_pkg.__file__).parent.parent / "data" / "knowledge"
+
+
+def move_media(source: str, destination: str) -> dict[str, Any]:
+    """
+    Move a media file to a destination path.
+
+    Copies the file safely first (with integrity check), then deletes the source.
+    Use this to organise a downloaded file into the media library.
+
+    Args:
+        source: Absolute path to the source file.
+        destination: Absolute path to the destination file (must not already exist).
+
+    Returns:
+        Dict with status, source, destination, filename, and size — or error details.
+    """
+    file_manager = FileManager()
+    use_case = MoveMediaUseCase(file_manager)
+    return use_case.execute(source, destination).to_dict()
+
+
+def resolve_destination(
+    release_name: str,
+    source_file: str,
+    tmdb_title: str,
+    tmdb_year: int,
+    tmdb_episode_title: str | None = None,
+    confirmed_folder: str | None = None,
+) -> dict[str, Any]:
+    """
+    Compute the destination path in the media library for a release.
+
+    Call this before move_media to get the correct library path. Handles:
+    - Parsing the release name (quality, codec, group, season/episode)
+    - Looking up any existing series folder in the library
+    - Applying group-conflict rules (asks user if ambiguous)
+    - Building the full destination path with correct naming conventions
+
+    Args:
+        release_name: Raw release folder or file name
+                      (e.g. "Oz.S03.1080p.WEBRip.x265-KONTRAST").
+        source_file: Absolute path to the source video file (used for extension).
+        tmdb_title: Canonical show/movie title from TMDB (e.g. "Oz").
+        tmdb_year: Release/start year from TMDB (e.g. 1997).
+        tmdb_episode_title: Episode title from TMDB for single-episode releases
+                            (e.g. "The Routine"). Omit for season packs and movies.
+        confirmed_folder: If a previous call returned needs_clarification, pass
+                          the user-chosen folder name here to proceed.
+
+    Returns:
+        On success: dict with status, library_file, series_folder, season_folder,
+                    series_folder_name, season_folder_name, filename,
+                    is_new_series_folder.
+        On ambiguity: dict with status="needs_clarification", question, options.
+        On error: dict with status="error", error, message.
+    """
+    use_case = ResolveDestinationUseCase()
+    return use_case.execute(
+        release_name=release_name,
+        source_file=source_file,
+        tmdb_title=tmdb_title,
+        tmdb_year=tmdb_year,
+        tmdb_episode_title=tmdb_episode_title,
+        confirmed_folder=confirmed_folder,
+    ).to_dict()
+
+
+def create_seed_links(library_file: str, original_download_folder: str) -> dict[str, Any]:
+    """
+    Prepare a torrent subfolder so qBittorrent can keep seeding after a move.
+
+    Hard-links the video file from the library into torrents/<original_folder_name>/,
+    then copies all remaining files from the original download folder (subtitles,
+    .nfo, .jpg, .txt, …) so the torrent data is complete.
+
+    Call this after move_media when the user wants to keep seeding.
+
+    Args:
+        library_file: Absolute path to the video file now in the library.
+        original_download_folder: Absolute path to the original download folder
+            (may still contain subs, nfo, and other release files).
+
+    Returns:
+        Dict with status, torrent_subfolder, linked_file, copied_files,
+        copied_count, skipped — or error details.
+    """
+    file_manager = FileManager()
+    use_case = CreateSeedLinksUseCase(file_manager)
+    return use_case.execute(library_file, original_download_folder).to_dict()
+
+
+def manage_subtitles(source_video: str, destination_video: str) -> dict[str, Any]:
+    """
+    Place subtitle files alongside an organised video file.
+
+    Scans for subtitle files (.srt, .ass, .ssa, .vtt, .sub) next to the source
+    video, filters them according to the user's SubtitlePreferences (languages,
+    min size, SDH, forced), and hard-links the passing files next to the
+    destination video with the correct naming convention:
+        fr.srt / fr.sdh.srt / fr.forced.srt / en.srt …
+
+    Call this right after move_media or copy_media, passing the same source and
+    destination paths. If no subtitles are found, returns ok with placed_count=0.
+
+    Args:
+        source_video: Absolute path to the original video file (in the download folder).
+        destination_video: Absolute path to the placed video file (in the library).
+
+    Returns:
+        Dict with status, placed list (source, destination, filename), placed_count,
+        skipped_count — or error details.
+    """
+    file_manager = FileManager()
+    use_case = ManageSubtitlesUseCase(file_manager)
+    return use_case.execute(source_video, destination_video).to_dict()
+
+
+def learn(pack: str, category: str, key: str, values: list[str]) -> dict[str, Any]:
+    """
+    Teach Alfred a new token mapping and persist it to the learned knowledge pack.
+
+    Use this when a subtitle file contains an unrecognised token — after confirming
+    with the user what the token means, call learn() to persist it so Alfred
+    recognises it in future scans.
+
+    Args:
+        pack: Knowledge pack name. Currently only "subtitles" is supported.
+        category: Category within the pack: "languages", "types", or "formats".
+        key: The entry key — e.g. ISO 639-1 language code ("es"), type id ("sdh").
+        values: List of tokens to add — e.g. ["spanish", "espanol", "spa"].
+
+    Returns:
+        Dict with status, added_count, and the updated token list.
+    """
+    _VALID_PACKS = {"subtitles"}
+    _VALID_CATEGORIES = {"languages", "types", "formats"}
+
+    if pack not in _VALID_PACKS:
+        return {"status": "error", "error": "unknown_pack", "message": f"Unknown pack '{pack}'. Valid: {sorted(_VALID_PACKS)}"}
+
+    if category not in _VALID_CATEGORIES:
+        return {"status": "error", "error": "unknown_category", "message": f"Unknown category '{category}'. Valid: {sorted(_VALID_CATEGORIES)}"}
+
+    learned_path = _LEARNED_ROOT / "subtitles_learned.yaml"
+    _LEARNED_ROOT.mkdir(parents=True, exist_ok=True)
+
+    data: dict = {}
+    if learned_path.exists():
+        try:
+            with open(learned_path, encoding="utf-8") as f:
+                data = yaml.safe_load(f) or {}
+        except Exception as e:
+            return {"status": "error", "error": "read_failed", "message": str(e)}
+
+    cat_data = data.setdefault(category, {})
+    entry = cat_data.setdefault(key, {"tokens": []})
+    existing = entry.get("tokens", [])
+    new_tokens = [v for v in values if v not in existing]
+    entry["tokens"] = existing + new_tokens
+
+    tmp = learned_path.with_suffix(".yaml.tmp")
+    try:
+        with open(tmp, "w", encoding="utf-8") as f:
+            yaml.safe_dump(data, f, allow_unicode=True, default_flow_style=False, sort_keys=False)
+        tmp.rename(learned_path)
+    except Exception as e:
+        tmp.unlink(missing_ok=True)
+        return {"status": "error", "error": "write_failed", "message": str(e)}
+
+    return {
+        "status": "ok",
+        "pack": pack,
+        "category": category,
+        "key": key,
+        "added_count": len(new_tokens),
+        "tokens": entry["tokens"],
+    }
+

 def set_path_for_folder(folder_name: str, path_value: str) -> dict[str, Any]:
    """
@@ -0,0 +1,3 @@
+from .loader import WorkflowLoader
+
+__all__ = ["WorkflowLoader"]
@@ -0,0 +1,52 @@
+"""WorkflowLoader — autodiscovers and loads workflow YAML files.
+
+Scans the workflows/ directory for all .yaml files and exposes them
+as dicts. No manual registration needed — drop a new .yaml file and
+it will be picked up automatically.
+"""
+
+import logging
+from pathlib import Path
+
+import yaml
+
+logger = logging.getLogger(__name__)
+
+_WORKFLOWS_DIR = Path(__file__).parent
+
+
+class WorkflowLoader:
+    """
+    Loads all workflow definitions from the workflows/ directory.
+
+    Usage:
+        loader = WorkflowLoader()
+        all_workflows = loader.all()
+        workflow = loader.get("organize_media")
+    """
+
+    def __init__(self):
+        self._workflows: dict[str, dict] = {}
+        self._load()
+
+    def _load(self) -> None:
+        for path in sorted(_WORKFLOWS_DIR.glob("*.yaml")):
+            try:
+                data = yaml.safe_load(path.read_text(encoding="utf-8"))
+                name = data.get("name") or path.stem
+                self._workflows[name] = data
+                logger.info(f"WorkflowLoader: Loaded '{name}' from {path.name}")
+            except Exception as e:
+                logger.warning(f"WorkflowLoader: Could not load {path.name}: {e}")
+
+    def all(self) -> dict[str, dict]:
+        """Return all loaded workflows keyed by name."""
+        return self._workflows
+
+    def get(self, name: str) -> dict | None:
+        """Return a specific workflow by name, or None if not found."""
+        return self._workflows.get(name)
+
+    def names(self) -> list[str]:
+        """Return all available workflow names."""
+        return list(self._workflows.keys())
@@ -0,0 +1,69 @@
+name: manage_subtitles
+description: >
+  Place subtitle files alongside a video that has just been organised into the library.
+  Detects the release pattern automatically, identifies and classifies all tracks,
+  filters by user rules, and hard-links matching files to the destination.
+  If any tracks are unrecognised, asks the user and optionally teaches Alfred.
+
+trigger:
+  examples:
+    - "handle subtitles for The X-Files S01E01"
+    - "place the subs next to the file"
+    - "subtitles are in the Subs/ folder"
+    - "add subtitles"
+
+tools:
+  - manage_subtitles
+  - learn
+
+memory:
+  SubtitlePreferences: read
+  Workflow: read-write
+
+steps:
+  - id: place_subtitles
+    tool: manage_subtitles
+    description: >
+      Detect release pattern, identify and classify all subtitle tracks,
+      filter by rules, hard-link matching files next to the destination video.
+      Reads SubtitlePreferences from LTM for language/type/format filtering.
+    params:
+      source_video: "{source_video}"
+      destination_video: "{destination_video}"
+      imdb_id: "{imdb_id}"
+      media_type: "{media_type}"
+      release_group: "{release_group}"
+      season: "{season}"
+      episode: "{episode}"
+    on_result:
+      ok_placed_zero: skip               # no subtitles found — not an error
+      needs_clarification: ask_user      # unrecognised tokens found
+
+  - id: ask_user
+    description: >
+      Some tracks could not be classified. Show the user the unresolved tokens
+      and ask if they want to teach Alfred what they mean.
+      If yes → go to learn_tokens. If no → end workflow.
+    ask_user:
+      question: >
+        I could not identify some tokens in the subtitle files: {unresolved}.
+        Do you want to teach me what they mean?
+      answers:
+        yes: { next_step: learn_tokens }
+        no:  { next_step: end }
+
+  - id: learn_tokens
+    tool: learn
+    description: >
+      Persist a new token mapping to the learned knowledge pack so Alfred
+      recognises it in future scans without asking again.
+    params:
+      pack: "subtitles"
+      category: "{token_category}"   # "languages" or "types"
+      key: "{token_key}"             # e.g. "es", "de"
+      values: "{token_values}"       # e.g. ["spanish", "espanol"]
+
+subtitle_naming:
+  standard:  "{lang}.{ext}"
+  sdh:       "{lang}.sdh.{ext}"
+  forced:    "{lang}.forced.{ext}"
@@ -0,0 +1,82 @@
+name: organize_media
+description: >
+  Organise a downloaded series or movie into the media library.
+  Triggered when the user asks to move/organize a specific title.
+  Always moves the video file. Optionally creates seed links in the
+  torrents folder so qBittorrent can keep seeding.
+
+trigger:
+  examples:
+    - "organize Breaking Bad"
+    - "organise Severance season 2"
+    - "move Inception to my library"
+    - "organize Breaking Bad season 1, keep seeding"
+
+tools:
+  - list_folder
+  - find_media_imdb_id
+  - resolve_destination
+  - move_media
+  - manage_subtitles
+  - create_seed_links
+
+memory:
+  WorkspacePaths: read
+  LibraryPaths: read
+  Library: read-write
+  Workflow: read-write
+  Entities: read-write
+
+steps:
+  - id: list_downloads
+    tool: list_folder
+    description: List the download folder to find the target files.
+    params:
+      folder_type: download
+
+  - id: identify_media
+    tool: find_media_imdb_id
+    description: Confirm title, type (series/movie), and metadata via TMDB.
+
+  - id: resolve_destination
+    tool: resolve_destination
+    description: >
+      Compute the correct destination path in the library.
+      Uses the release name + TMDB metadata to build folder and file names.
+      If multiple series folders exist for this title, returns
+      needs_clarification and the user must pick one (re-call with confirmed_folder).
+
+  - id: move_file
+    tool: move_media
+    description: >
+      Move the video file to library_file returned by resolve_destination.
+
+  - id: handle_subtitles
+    tool: manage_subtitles
+    description: >
+      Place subtitle files alongside the video in the library.
+      Pass the original source path and the new library destination path.
+    on_missing: skip
+
+  - id: ask_seeding
+    ask_user:
+      question: "Do you want to keep seeding this torrent?"
+      answers:
+        "yes": { next_step: create_seed_links }
+        "no":  { next_step: update_library }
+
+  - id: create_seed_links
+    tool: create_seed_links
+    description: >
+      Hard-link the library video file back into torrents/<original_folder>/
+      and copy all remaining files from the original download folder
+      (subs, nfo, jpg, …) so the torrent stays complete for seeding.
+
+  - id: update_library
+    memory_write: Library
+    description: Add the entry to the LTM library after a successful move.
+
+naming_convention:
+  # Resolved by domain entities (Movie, Episode) — not hardcoded here
+  tv_show: "{title}/Season {season:02d}/{title}.S{season:02d}E{episode:02d}.{ext}"
+  movie: "{title} ({year})/{title}.{year}.{ext}"