feat: major architectural refactor

- Refactor memory system (episodic/STM/LTM with components)
- Implement complete subtitle domain (scanner, matcher, placer)
- Add YAML workflow infrastructure
- Externalize knowledge base (patterns, release groups)
- Add comprehensive testing suite
- Create manual testing CLIs
This commit is contained in:
2026-05-11 21:33:37 +02:00
parent 62b5d0b998
commit 249c5de76a
103 changed files with 8559 additions and 1346 deletions
+28 -2
View File
@@ -4,6 +4,7 @@ import json
from typing import Any
from alfred.infrastructure.persistence import get_memory
from alfred.infrastructure.persistence.memory import MemoryRegistry
from .registry import Tool
@@ -13,6 +14,7 @@ class PromptBuilder:
def __init__(self, tools: dict[str, Tool]):
self.tools = tools
self._memory_registry = MemoryRegistry()
def build_tools_spec(self) -> list[dict[str, Any]]:
"""Build the tool specification for the LLM API."""
@@ -109,11 +111,30 @@ class PromptBuilder:
return "\n".join(lines)
def _format_memory_schema(self) -> str:
"""Describe available memory components so the agent knows what to read/write and when."""
schema = self._memory_registry.schema()
tier_labels = {"ltm": "LONG-TERM (persisted)", "stm": "SHORT-TERM (session)", "episodic": "EPISODIC (volatile)"}
lines = ["MEMORY COMPONENTS:"]
for tier, components in schema.items():
if not components:
continue
lines.append(f"\n [{tier_labels.get(tier, tier.upper())}]")
for c in components:
access = c.get("access", "read")
lines.append(f" {c['name']} ({access}): {c['description']}")
for field_name, field_desc in c.get("fields", {}).items():
lines.append(f" · {field_name}: {field_desc}")
return "\n".join(lines)
def _format_config_context(self, memory) -> str:
"""Format configuration context."""
lines = ["CURRENT CONFIGURATION:"]
if memory.ltm.config:
for key, value in memory.ltm.config.items():
folders = {**memory.ltm.workspace.as_dict(), **memory.ltm.library_paths.to_dict()}
if folders:
for key, value in folders.items():
lines.append(f" - {key}: {value}")
else:
lines.append(" (no configuration set)")
@@ -138,6 +159,9 @@ class PromptBuilder:
tools_desc = self._format_tools_description()
tools_section = f"\nAVAILABLE TOOLS:\n{tools_desc}" if tools_desc else ""
# Memory schema
memory_schema = self._format_memory_schema()
# Configuration
config_section = self._format_config_context(memory)
if config_section:
@@ -172,6 +196,8 @@ EXAMPLES:
{language_instruction}
{tools_section}
{memory_schema}
{config_section}
{stm_context}
{episodic_context}
+5
View File
@@ -97,6 +97,11 @@ def make_tools(settings) -> dict[str, Tool]:
tool_functions = [
fs_tools.set_path_for_folder,
fs_tools.list_folder,
fs_tools.resolve_destination,
fs_tools.move_media,
fs_tools.manage_subtitles,
fs_tools.create_seed_links,
fs_tools.learn,
api_tools.find_media_imdb_id,
api_tools.find_torrent,
api_tools.add_torrent_by_index,
+191 -1
View File
@@ -1,10 +1,200 @@
"""Filesystem tools for folder management."""
from pathlib import Path
from typing import Any
from alfred.application.filesystem import ListFolderUseCase, SetFolderPathUseCase
import alfred as _alfred_pkg
import yaml
from alfred.application.filesystem import (
CreateSeedLinksUseCase,
ListFolderUseCase,
ManageSubtitlesUseCase,
MoveMediaUseCase,
ResolveDestinationUseCase,
SetFolderPathUseCase,
)
from alfred.infrastructure.filesystem import FileManager
_LEARNED_ROOT = Path(_alfred_pkg.__file__).parent.parent / "data" / "knowledge"
def move_media(source: str, destination: str) -> dict[str, Any]:
"""
Move a media file to a destination path.
Copies the file safely first (with integrity check), then deletes the source.
Use this to organise a downloaded file into the media library.
Args:
source: Absolute path to the source file.
destination: Absolute path to the destination file (must not already exist).
Returns:
Dict with status, source, destination, filename, and size — or error details.
"""
file_manager = FileManager()
use_case = MoveMediaUseCase(file_manager)
return use_case.execute(source, destination).to_dict()
def resolve_destination(
release_name: str,
source_file: str,
tmdb_title: str,
tmdb_year: int,
tmdb_episode_title: str | None = None,
confirmed_folder: str | None = None,
) -> dict[str, Any]:
"""
Compute the destination path in the media library for a release.
Call this before move_media to get the correct library path. Handles:
- Parsing the release name (quality, codec, group, season/episode)
- Looking up any existing series folder in the library
- Applying group-conflict rules (asks user if ambiguous)
- Building the full destination path with correct naming conventions
Args:
release_name: Raw release folder or file name
(e.g. "Oz.S03.1080p.WEBRip.x265-KONTRAST").
source_file: Absolute path to the source video file (used for extension).
tmdb_title: Canonical show/movie title from TMDB (e.g. "Oz").
tmdb_year: Release/start year from TMDB (e.g. 1997).
tmdb_episode_title: Episode title from TMDB for single-episode releases
(e.g. "The Routine"). Omit for season packs and movies.
confirmed_folder: If a previous call returned needs_clarification, pass
the user-chosen folder name here to proceed.
Returns:
On success: dict with status, library_file, series_folder, season_folder,
series_folder_name, season_folder_name, filename,
is_new_series_folder.
On ambiguity: dict with status="needs_clarification", question, options.
On error: dict with status="error", error, message.
"""
use_case = ResolveDestinationUseCase()
return use_case.execute(
release_name=release_name,
source_file=source_file,
tmdb_title=tmdb_title,
tmdb_year=tmdb_year,
tmdb_episode_title=tmdb_episode_title,
confirmed_folder=confirmed_folder,
).to_dict()
def create_seed_links(library_file: str, original_download_folder: str) -> dict[str, Any]:
"""
Prepare a torrent subfolder so qBittorrent can keep seeding after a move.
Hard-links the video file from the library into torrents/<original_folder_name>/,
then copies all remaining files from the original download folder (subtitles,
.nfo, .jpg, .txt, …) so the torrent data is complete.
Call this after move_media when the user wants to keep seeding.
Args:
library_file: Absolute path to the video file now in the library.
original_download_folder: Absolute path to the original download folder
(may still contain subs, nfo, and other release files).
Returns:
Dict with status, torrent_subfolder, linked_file, copied_files,
copied_count, skipped — or error details.
"""
file_manager = FileManager()
use_case = CreateSeedLinksUseCase(file_manager)
return use_case.execute(library_file, original_download_folder).to_dict()
def manage_subtitles(source_video: str, destination_video: str) -> dict[str, Any]:
"""
Place subtitle files alongside an organised video file.
Scans for subtitle files (.srt, .ass, .ssa, .vtt, .sub) next to the source
video, filters them according to the user's SubtitlePreferences (languages,
min size, SDH, forced), and hard-links the passing files next to the
destination video with the correct naming convention:
fr.srt / fr.sdh.srt / fr.forced.srt / en.srt …
Call this right after move_media or copy_media, passing the same source and
destination paths. If no subtitles are found, returns ok with placed_count=0.
Args:
source_video: Absolute path to the original video file (in the download folder).
destination_video: Absolute path to the placed video file (in the library).
Returns:
Dict with status, placed list (source, destination, filename), placed_count,
skipped_count — or error details.
"""
file_manager = FileManager()
use_case = ManageSubtitlesUseCase(file_manager)
return use_case.execute(source_video, destination_video).to_dict()
def learn(pack: str, category: str, key: str, values: list[str]) -> dict[str, Any]:
"""
Teach Alfred a new token mapping and persist it to the learned knowledge pack.
Use this when a subtitle file contains an unrecognised token — after confirming
with the user what the token means, call learn() to persist it so Alfred
recognises it in future scans.
Args:
pack: Knowledge pack name. Currently only "subtitles" is supported.
category: Category within the pack: "languages", "types", or "formats".
key: The entry key — e.g. ISO 639-1 language code ("es"), type id ("sdh").
values: List of tokens to add — e.g. ["spanish", "espanol", "spa"].
Returns:
Dict with status, added_count, and the updated token list.
"""
_VALID_PACKS = {"subtitles"}
_VALID_CATEGORIES = {"languages", "types", "formats"}
if pack not in _VALID_PACKS:
return {"status": "error", "error": "unknown_pack", "message": f"Unknown pack '{pack}'. Valid: {sorted(_VALID_PACKS)}"}
if category not in _VALID_CATEGORIES:
return {"status": "error", "error": "unknown_category", "message": f"Unknown category '{category}'. Valid: {sorted(_VALID_CATEGORIES)}"}
learned_path = _LEARNED_ROOT / "subtitles_learned.yaml"
_LEARNED_ROOT.mkdir(parents=True, exist_ok=True)
data: dict = {}
if learned_path.exists():
try:
with open(learned_path, encoding="utf-8") as f:
data = yaml.safe_load(f) or {}
except Exception as e:
return {"status": "error", "error": "read_failed", "message": str(e)}
cat_data = data.setdefault(category, {})
entry = cat_data.setdefault(key, {"tokens": []})
existing = entry.get("tokens", [])
new_tokens = [v for v in values if v not in existing]
entry["tokens"] = existing + new_tokens
tmp = learned_path.with_suffix(".yaml.tmp")
try:
with open(tmp, "w", encoding="utf-8") as f:
yaml.safe_dump(data, f, allow_unicode=True, default_flow_style=False, sort_keys=False)
tmp.rename(learned_path)
except Exception as e:
tmp.unlink(missing_ok=True)
return {"status": "error", "error": "write_failed", "message": str(e)}
return {
"status": "ok",
"pack": pack,
"category": category,
"key": key,
"added_count": len(new_tokens),
"tokens": entry["tokens"],
}
def set_path_for_folder(folder_name: str, path_value: str) -> dict[str, Any]:
"""
+3
View File
@@ -0,0 +1,3 @@
from .loader import WorkflowLoader
__all__ = ["WorkflowLoader"]
+52
View File
@@ -0,0 +1,52 @@
"""WorkflowLoader — autodiscovers and loads workflow YAML files.
Scans the workflows/ directory for all .yaml files and exposes them
as dicts. No manual registration needed — drop a new .yaml file and
it will be picked up automatically.
"""
import logging
from pathlib import Path
import yaml
logger = logging.getLogger(__name__)
_WORKFLOWS_DIR = Path(__file__).parent
class WorkflowLoader:
"""
Loads all workflow definitions from the workflows/ directory.
Usage:
loader = WorkflowLoader()
all_workflows = loader.all()
workflow = loader.get("organize_media")
"""
def __init__(self):
self._workflows: dict[str, dict] = {}
self._load()
def _load(self) -> None:
for path in sorted(_WORKFLOWS_DIR.glob("*.yaml")):
try:
data = yaml.safe_load(path.read_text(encoding="utf-8"))
name = data.get("name") or path.stem
self._workflows[name] = data
logger.info(f"WorkflowLoader: Loaded '{name}' from {path.name}")
except Exception as e:
logger.warning(f"WorkflowLoader: Could not load {path.name}: {e}")
def all(self) -> dict[str, dict]:
"""Return all loaded workflows keyed by name."""
return self._workflows
def get(self, name: str) -> dict | None:
"""Return a specific workflow by name, or None if not found."""
return self._workflows.get(name)
def names(self) -> list[str]:
"""Return all available workflow names."""
return list(self._workflows.keys())
@@ -0,0 +1,69 @@
name: manage_subtitles
description: >
Place subtitle files alongside a video that has just been organised into the library.
Detects the release pattern automatically, identifies and classifies all tracks,
filters by user rules, and hard-links matching files to the destination.
If any tracks are unrecognised, asks the user and optionally teaches Alfred.
trigger:
examples:
- "handle subtitles for The X-Files S01E01"
- "place the subs next to the file"
- "subtitles are in the Subs/ folder"
- "add subtitles"
tools:
- manage_subtitles
- learn
memory:
SubtitlePreferences: read
Workflow: read-write
steps:
- id: place_subtitles
tool: manage_subtitles
description: >
Detect release pattern, identify and classify all subtitle tracks,
filter by rules, hard-link matching files next to the destination video.
Reads SubtitlePreferences from LTM for language/type/format filtering.
params:
source_video: "{source_video}"
destination_video: "{destination_video}"
imdb_id: "{imdb_id}"
media_type: "{media_type}"
release_group: "{release_group}"
season: "{season}"
episode: "{episode}"
on_result:
ok_placed_zero: skip # no subtitles found — not an error
needs_clarification: ask_user # unrecognised tokens found
- id: ask_user
description: >
Some tracks could not be classified. Show the user the unresolved tokens
and ask if they want to teach Alfred what they mean.
If yes → go to learn_tokens. If no → end workflow.
ask_user:
question: >
I could not identify some tokens in the subtitle files: {unresolved}.
Do you want to teach me what they mean?
answers:
yes: { next_step: learn_tokens }
no: { next_step: end }
- id: learn_tokens
tool: learn
description: >
Persist a new token mapping to the learned knowledge pack so Alfred
recognises it in future scans without asking again.
params:
pack: "subtitles"
category: "{token_category}" # "languages" or "types"
key: "{token_key}" # e.g. "es", "de"
values: "{token_values}" # e.g. ["spanish", "espanol"]
subtitle_naming:
standard: "{lang}.{ext}"
sdh: "{lang}.sdh.{ext}"
forced: "{lang}.forced.{ext}"
@@ -0,0 +1,82 @@
name: organize_media
description: >
Organise a downloaded series or movie into the media library.
Triggered when the user asks to move/organize a specific title.
Always moves the video file. Optionally creates seed links in the
torrents folder so qBittorrent can keep seeding.
trigger:
examples:
- "organize Breaking Bad"
- "organise Severance season 2"
- "move Inception to my library"
- "organize Breaking Bad season 1, keep seeding"
tools:
- list_folder
- find_media_imdb_id
- resolve_destination
- move_media
- manage_subtitles
- create_seed_links
memory:
WorkspacePaths: read
LibraryPaths: read
Library: read-write
Workflow: read-write
Entities: read-write
steps:
- id: list_downloads
tool: list_folder
description: List the download folder to find the target files.
params:
folder_type: download
- id: identify_media
tool: find_media_imdb_id
description: Confirm title, type (series/movie), and metadata via TMDB.
- id: resolve_destination
tool: resolve_destination
description: >
Compute the correct destination path in the library.
Uses the release name + TMDB metadata to build folder and file names.
If multiple series folders exist for this title, returns
needs_clarification and the user must pick one (re-call with confirmed_folder).
- id: move_file
tool: move_media
description: >
Move the video file to library_file returned by resolve_destination.
- id: handle_subtitles
tool: manage_subtitles
description: >
Place subtitle files alongside the video in the library.
Pass the original source path and the new library destination path.
on_missing: skip
- id: ask_seeding
ask_user:
question: "Do you want to keep seeding this torrent?"
answers:
"yes": { next_step: create_seed_links }
"no": { next_step: update_library }
- id: create_seed_links
tool: create_seed_links
description: >
Hard-link the library video file back into torrents/<original_folder>/
and copy all remaining files from the original download folder
(subs, nfo, jpg, …) so the torrent stays complete for seeding.
- id: update_library
memory_write: Library
description: Add the entry to the LTM library after a successful move.
naming_convention:
# Resolved by domain entities (Movie, Episode) — not hardcoded here
tv_show: "{title}/Season {season:02d}/{title}.S{season:02d}E{episode:02d}.{ext}"
movie: "{title} ({year})/{title}.{year}.{ext}"