From ba6f016d492ecde6719f5209f7db2573ebc301db Mon Sep 17 00:00:00 2001 From: Francwa Date: Fri, 15 May 2026 11:02:25 +0200 Subject: [PATCH] feat: generic MetadataStore + read_release_metadata + query_library MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Extract MetadataStore from SubtitleMetadataStore (alfred/infrastructure/metadata/). Generic load/save + typed update helpers (update_parse, update_probe, update_tmdb) for the per-release .alfred/metadata.yaml. - SubtitleMetadataStore becomes a thin facade — owns subtitle_history shape, delegates I/O to MetadataStore. - Agent._execute_tool_call auto-persists successful analyze_release / probe_media / find_media_imdb_id results to the release's .alfred file. find_media_imdb_id follows release_focus when it has no path argument. - New tools: · read_release_metadata(release_path) — cacheable, key=release_path. Returns the .alfred content or has_metadata=false. · query_library(name) — substring scan across configured library roots. - Both new tools added to CORE_TOOLS (always visible). --- alfred/agent/agent.py | 64 ++++++ alfred/agent/prompt.py | 2 + alfred/agent/registry.py | 2 + alfred/agent/tools/filesystem.py | 75 +++++++ alfred/agent/tools/specs/query_library.yaml | 54 ++++++ .../tools/specs/read_release_metadata.yaml | 55 ++++++ alfred/infrastructure/metadata/__init__.py | 5 + alfred/infrastructure/metadata/store.py | 183 ++++++++++++++++++ .../infrastructure/subtitle/metadata_store.py | 115 +++-------- 9 files changed, 466 insertions(+), 89 deletions(-) create mode 100644 alfred/agent/tools/specs/query_library.yaml create mode 100644 alfred/agent/tools/specs/read_release_metadata.yaml create mode 100644 alfred/infrastructure/metadata/__init__.py create mode 100644 alfred/infrastructure/metadata/store.py diff --git a/alfred/agent/agent.py b/alfred/agent/agent.py index cfa9ebe..3a35461 100644 --- a/alfred/agent/agent.py +++ b/alfred/agent/agent.py @@ -3,8 +3,10 @@ import json import logging from collections.abc import AsyncGenerator +from pathlib import Path from typing import Any +from alfred.infrastructure.metadata import MetadataStore from alfred.infrastructure.persistence import get_memory from alfred.settings import settings @@ -243,6 +245,7 @@ class Agent: Today: - Update release_focus when a path-keyed inspector runs. + - Persist inspector results into the release's `.alfred/metadata.yaml`. - Refresh episodic.last_search_results on find_torrent cache hits so get_torrent_by_index keeps pointing at the right list. """ @@ -255,6 +258,11 @@ class Agent: if isinstance(path, str) and path: memory.stm.release_focus.focus(path) + # Persist inspector results to .alfred/metadata.yaml (skip on cache + # hit — the file is already up to date from the original run). + if not from_cache: + self._maybe_update_alfred(tool_name, args, result) + # Episodic refresh when find_torrent's cache short-circuits the call. if from_cache and tool_name == "find_torrent": torrents = result.get("torrents") or [] @@ -263,6 +271,62 @@ class Agent: query=query, results=torrents, search_type="torrent" ) + def _maybe_update_alfred( + self, + tool_name: str, + args: dict[str, Any], + result: dict[str, Any], + ) -> None: + """ + Persist a successful inspector result into the release's + `.alfred/metadata.yaml`. No-op when the release root can't be resolved. + """ + if tool_name not in {"analyze_release", "probe_media", "find_media_imdb_id"}: + return + + release_root = self._resolve_release_root(tool_name, args) + if release_root is None: + return + + try: + store = MetadataStore(release_root) + if tool_name == "analyze_release": + store.update_parse(result) + elif tool_name == "probe_media": + store.update_probe(result) + elif tool_name == "find_media_imdb_id": + store.update_tmdb(result) + except Exception as e: + logger.warning( + f"Failed to update .alfred for {tool_name} at {release_root}: {e}" + ) + + @staticmethod + def _resolve_release_root( + tool_name: str, + args: dict[str, Any], + ) -> Path | None: + """ + Figure out which release folder owns this call. + + - analyze_release / probe_media: derived from source_path + (folder kept as-is, file walked up to its parent). + - find_media_imdb_id: follow the current release focus in STM. + """ + if tool_name in {"analyze_release", "probe_media"}: + raw = args.get("source_path") + if not isinstance(raw, str) or not raw: + return None + path = Path(raw) + return path if path.is_dir() else path.parent + + # find_media_imdb_id has no path arg — rely on release focus. + focus = get_memory().stm.release_focus.current_release_path + if not focus: + return None + path = Path(focus) + return path if path.is_dir() else path.parent + async def step_streaming( self, user_input: str, completion_id: str, created_ts: int, model: str ) -> AsyncGenerator[dict[str, Any]]: diff --git a/alfred/agent/prompt.py b/alfred/agent/prompt.py index 06ca073..9a72c70 100644 --- a/alfred/agent/prompt.py +++ b/alfred/agent/prompt.py @@ -17,6 +17,8 @@ CORE_TOOLS: tuple[str, ...] = ( "set_language", "set_path_for_folder", "list_folder", + "read_release_metadata", + "query_library", "start_workflow", "end_workflow", ) diff --git a/alfred/agent/registry.py b/alfred/agent/registry.py index d36b0ee..5346380 100644 --- a/alfred/agent/registry.py +++ b/alfred/agent/registry.py @@ -138,6 +138,8 @@ def make_tools(settings) -> dict[str, Tool]: tool_functions = [ fs_tools.set_path_for_folder, fs_tools.list_folder, + fs_tools.read_release_metadata, + fs_tools.query_library, fs_tools.analyze_release, fs_tools.probe_media, fs_tools.resolve_season_destination, diff --git a/alfred/agent/tools/filesystem.py b/alfred/agent/tools/filesystem.py index c29ac62..9abadec 100644 --- a/alfred/agent/tools/filesystem.py +++ b/alfred/agent/tools/filesystem.py @@ -30,6 +30,8 @@ from alfred.application.filesystem.resolve_destination import ( from alfred.infrastructure.filesystem import FileManager, create_folder, move from alfred.infrastructure.filesystem.ffprobe import probe from alfred.infrastructure.filesystem.find_video import find_video_file +from alfred.infrastructure.metadata import MetadataStore +from alfred.infrastructure.persistence import get_memory _LEARNED_ROOT = Path(_alfred_pkg.__file__).parent.parent / "data" / "knowledge" @@ -288,3 +290,76 @@ def list_folder(folder_type: str, path: str = ".") -> dict[str, Any]: use_case = ListFolderUseCase(file_manager) response = use_case.execute(folder_type, path) return response.to_dict() + + +def read_release_metadata(release_path: str) -> dict[str, Any]: + """Thin tool wrapper — semantics live in alfred/agent/tools/specs/read_release_metadata.yaml.""" + path = Path(release_path) + if not path.exists(): + return { + "status": "error", + "error": "not_found", + "message": f"{release_path} does not exist", + } + root = path if path.is_dir() else path.parent + store = MetadataStore(root) + if not store.exists(): + return { + "status": "ok", + "release_path": str(root), + "has_metadata": False, + "metadata": {}, + } + return { + "status": "ok", + "release_path": str(root), + "has_metadata": True, + "metadata": store.load(), + } + + +def query_library(name: str) -> dict[str, Any]: + """Thin tool wrapper — semantics live in alfred/agent/tools/specs/query_library.yaml.""" + needle = name.strip().lower() + if not needle: + return { + "status": "error", + "error": "empty_name", + "message": "name must be a non-empty string", + } + + memory = get_memory() + roots = memory.ltm.library_paths.to_dict() or {} + if not roots: + return { + "status": "error", + "error": "no_libraries", + "message": "No library paths configured — call set_path_for_folder first.", + } + + matches: list[dict[str, Any]] = [] + for collection, root in roots.items(): + root_path = Path(root) + if not root_path.is_dir(): + continue + for entry in root_path.iterdir(): + if not entry.is_dir(): + continue + if needle not in entry.name.lower(): + continue + store = MetadataStore(entry) + matches.append( + { + "collection": collection, + "name": entry.name, + "path": str(entry), + "has_metadata": store.exists(), + } + ) + + return { + "status": "ok", + "query": name, + "match_count": len(matches), + "matches": matches, + } diff --git a/alfred/agent/tools/specs/query_library.yaml b/alfred/agent/tools/specs/query_library.yaml new file mode 100644 index 0000000..f706e78 --- /dev/null +++ b/alfred/agent/tools/specs/query_library.yaml @@ -0,0 +1,54 @@ +name: query_library + +summary: > + Find release folders across all configured library roots whose name + contains a substring (case-insensitive). + +description: | + Scans every configured library root (movies, tv_shows, …) at depth 1 + and returns folders whose name contains the query. For each match, + reports whether a `.alfred/metadata.yaml` exists — handy to spot + releases that have not been inspected yet. Does not recurse into + seasons / episodes; one entry per release folder. + +when_to_use: | + - To answer "do I already have X?" without listing whole library + roots one by one. + - To pick the release_path to feed read_release_metadata or any + inspector tool. + +when_not_to_use: | + - To list the *whole* library — that scan should live behind a + dedicated tool (not implemented yet). + - To browse a single root — use list_folder instead, it's cheaper + and doesn't open every library. + +next_steps: | + - When one match is found: feed its path to read_release_metadata or + analyze_release. + - When several match: surface the indexed list to the user and ask + which one they mean. + +parameters: + name: + description: Case-insensitive substring of the release name to look for. + why_needed: | + Library folders are named after the release (Title.Year.... or + Title (Year)). A substring is enough to catch typical user + phrasings ("foundation", "inception 2010"). + example: foundation + +returns: + ok: + description: Scan completed (possibly zero matches). + fields: + status: "'ok'" + query: The query string as received. + match_count: Number of matching folders. + matches: "List of {collection, name, path, has_metadata}." + + error: + description: Scan could not run. + fields: + error: Short error code (no_libraries, empty_name). + message: Human-readable explanation. diff --git a/alfred/agent/tools/specs/read_release_metadata.yaml b/alfred/agent/tools/specs/read_release_metadata.yaml new file mode 100644 index 0000000..43e1d76 --- /dev/null +++ b/alfred/agent/tools/specs/read_release_metadata.yaml @@ -0,0 +1,55 @@ +name: read_release_metadata + +summary: > + Read the `.alfred/metadata.yaml` file for a release folder. + +description: | + Returns whatever has been previously persisted by inspector tools + (analyze_release, probe_media, find_media_imdb_id) and by the subtitle + pipeline. Works for any folder — download or library — as long as the + release has been touched at least once. Missing metadata is not an + error: the tool returns `has_metadata=false` with an empty dict. + +when_to_use: | + - Before re-running analyze_release / probe_media on a release you + might have already seen — saves a full re-inspection. + - To answer "what do we know about X?" without scanning. + - To list which releases in a library have no `.alfred` yet (loop + + `has_metadata`). + +when_not_to_use: | + - To search a library by name — use query_library. + - When you need a fresh probe/parse — call the inspector directly, + the result will be persisted automatically. + +next_steps: | + - If `has_metadata=false`, decide whether to inspect now + (analyze_release / probe_media). + - If `has_metadata=true`, read `metadata.parse`, `metadata.probe`, + `metadata.tmdb` blocks before deciding next actions. + +cache: + key: release_path + +parameters: + release_path: + description: Absolute path to the release folder (or any file inside it). + why_needed: | + The store lives at `/.alfred/metadata.yaml`. A file + path is auto-resolved to its parent folder. + example: /mnt/library/tv_shows/Foundation.2021.1080p.WEBRip.x265-RARBG + +returns: + ok: + description: Release inspected (file may or may not exist). + fields: + status: "'ok'" + release_path: Absolute path of the release folder. + has_metadata: True if `.alfred/metadata.yaml` exists. + metadata: Full content of the file, or empty dict. + + error: + description: Path does not exist on disk. + fields: + error: Short error code (not_found). + message: Human-readable explanation. diff --git a/alfred/infrastructure/metadata/__init__.py b/alfred/infrastructure/metadata/__init__.py new file mode 100644 index 0000000..ee32815 --- /dev/null +++ b/alfred/infrastructure/metadata/__init__.py @@ -0,0 +1,5 @@ +"""Per-release `.alfred/metadata.yaml` persistence.""" + +from .store import MetadataStore + +__all__ = ["MetadataStore"] diff --git a/alfred/infrastructure/metadata/store.py b/alfred/infrastructure/metadata/store.py new file mode 100644 index 0000000..ee49cbf --- /dev/null +++ b/alfred/infrastructure/metadata/store.py @@ -0,0 +1,183 @@ +""" +MetadataStore — reads/writes the `.alfred/metadata.yaml` file colocated with +a release folder. + +The store is intentionally domain-agnostic: it knows how to atomically +load/save the YAML and exposes typed update helpers for the broad facts a +release carries (parse, probe, TMDB lookup, detected pattern). Subtitle +history lives next to the same file but is appended through a dedicated +helper kept under `alfred/infrastructure/subtitle/` so the subtitle pipeline +keeps full ownership of its payload shape. + +The file layout: + + / + .alfred/ + metadata.yaml + +The store never raises on a missing file — it returns empty defaults. Writes +are atomic (write to .tmp then rename). +""" + +from __future__ import annotations + +import logging +from datetime import UTC, datetime +from pathlib import Path +from typing import Any + +import yaml + +logger = logging.getLogger(__name__) + + +class MetadataStore: + """Manages `.alfred/metadata.yaml` for one release folder.""" + + def __init__(self, release_root: str | Path): + self._root = Path(release_root) + self._alfred_dir = self._root / ".alfred" + self._metadata_path = self._alfred_dir / "metadata.yaml" + + # ------------------------------------------------------------------ + # Identity + # ------------------------------------------------------------------ + + @property + def release_root(self) -> Path: + return self._root + + @property + def metadata_path(self) -> Path: + return self._metadata_path + + def exists(self) -> bool: + return self._metadata_path.exists() + + # ------------------------------------------------------------------ + # Load / Save + # ------------------------------------------------------------------ + + def load(self) -> dict: + """Return the full metadata dict. Empty dict if file absent.""" + if not self._metadata_path.exists(): + return {} + try: + with open(self._metadata_path, encoding="utf-8") as f: + return yaml.safe_load(f) or {} + except Exception as e: + logger.warning(f"MetadataStore: could not read {self._metadata_path}: {e}") + return {} + + def save(self, data: dict) -> None: + """Atomically write metadata.yaml. Creates .alfred/ if needed.""" + self._alfred_dir.mkdir(parents=True, exist_ok=True) + tmp = self._metadata_path.with_suffix(".yaml.tmp") + try: + with open(tmp, "w", encoding="utf-8") as f: + yaml.safe_dump( + data, + f, + allow_unicode=True, + default_flow_style=False, + sort_keys=False, + ) + tmp.rename(self._metadata_path) + except Exception as e: + logger.error(f"MetadataStore: could not write {self._metadata_path}: {e}") + tmp.unlink(missing_ok=True) + raise + + # ------------------------------------------------------------------ + # Generic update helper + # ------------------------------------------------------------------ + + def update_section(self, section: str, payload: dict[str, Any]) -> None: + """ + Merge `payload` into the top-level `section` block and stamp it. + + The section is replaced wholesale (not deep-merged) so the last + successful tool run reflects the current truth. A `_updated_at` + ISO-8601 timestamp is added inside the section. + """ + data = self.load() + stamped = dict(payload) + stamped["_updated_at"] = datetime.now(UTC).isoformat() + data[section] = stamped + self.save(data) + + # ------------------------------------------------------------------ + # Typed update helpers — one per inspector tool + # ------------------------------------------------------------------ + + def update_parse(self, parse_result: dict[str, Any]) -> None: + """Persist the result of analyze_release.""" + clean = {k: v for k, v in parse_result.items() if k != "status"} + self.update_section("parse", clean) + + def update_probe(self, probe_result: dict[str, Any]) -> None: + """Persist the result of probe_media.""" + clean = {k: v for k, v in probe_result.items() if k != "status"} + self.update_section("probe", clean) + + def update_tmdb(self, tmdb_result: dict[str, Any]) -> None: + """Persist the result of find_media_imdb_id.""" + clean = {k: v for k, v in tmdb_result.items() if k != "status"} + self.update_section("tmdb", clean) + # Also promote core identity fields to the top level so they are + # cheap to read without parsing the full tmdb block. + data = self.load() + for key in ("imdb_id", "tmdb_id", "media_type"): + if key in clean and clean[key] is not None: + data[key] = clean[key] + if "title" in clean and clean["title"]: + data.setdefault("title", clean["title"]) + self.save(data) + + # ------------------------------------------------------------------ + # Pattern (used by the subtitle pipeline) + # ------------------------------------------------------------------ + + def confirmed_pattern(self) -> str | None: + """Return the confirmed pattern_id, or None.""" + data = self.load() + if data.get("pattern_confirmed"): + return data.get("detected_pattern") + return None + + def mark_pattern_confirmed( + self, pattern_id: str, media_info: dict | None = None + ) -> None: + """Persist detected_pattern + pattern_confirmed=true.""" + data = self.load() + data["detected_pattern"] = pattern_id + data["pattern_confirmed"] = True + if media_info: + data.setdefault("media_type", media_info.get("media_type")) + data.setdefault("imdb_id", media_info.get("imdb_id")) + data.setdefault("title", media_info.get("title")) + self.save(data) + logger.info( + f"MetadataStore: confirmed pattern '{pattern_id}' for {self._root.name}" + ) + + # ------------------------------------------------------------------ + # Subtitle history (kept for backwards compatibility with the + # subtitle pipeline — payload shape is owned by the caller). + # ------------------------------------------------------------------ + + def append_subtitle_history_entry(self, entry: dict[str, Any]) -> None: + """Append one entry (raw dict) to subtitle_history.""" + data = self.load() + history = data.setdefault("subtitle_history", []) + history.append(entry) + rg = entry.get("release_group") + if rg: + groups = data.setdefault("release_groups", []) + if rg not in groups: + groups.append(rg) + self.save(data) + + def subtitle_history(self) -> list[dict]: + """Return the raw subtitle history list.""" + return self.load().get("subtitle_history", []) diff --git a/alfred/infrastructure/subtitle/metadata_store.py b/alfred/infrastructure/subtitle/metadata_store.py index 77c217e..b19e9c2 100644 --- a/alfred/infrastructure/subtitle/metadata_store.py +++ b/alfred/infrastructure/subtitle/metadata_store.py @@ -1,98 +1,47 @@ -"""SubtitleMetadataStore — reads/writes .alfred/metadata.yaml colocated with media.""" +""" +SubtitleMetadataStore — subtitle-specific helper on top of MetadataStore. + +Owns the shape of `subtitle_history` entries (track-level fields, type +inference from the destination filename) and delegates all I/O to the +generic MetadataStore. +""" + +from __future__ import annotations import logging from datetime import UTC, datetime from pathlib import Path from typing import Any -import yaml - from alfred.domain.subtitles.entities import SubtitleTrack from alfred.domain.subtitles.services.placer import PlacedTrack +from alfred.infrastructure.metadata.store import MetadataStore logger = logging.getLogger(__name__) class SubtitleMetadataStore: """ - Manages the .alfred/metadata.yaml file that lives inside the media library folder. + Subtitle-pipeline view of the per-release `.alfred/metadata.yaml`. - For TV shows: /media/tv_shows/The X-Files/.alfred/metadata.yaml - For movies: /media/movies/Inception (2010)/.alfred/metadata.yaml - - The store never raises on a missing file — it returns empty defaults. - Writes are atomic (write to .tmp then rename). + Backed by a generic MetadataStore; this class only knows how to build + a subtitle_history entry from PlacedTrack/SubtitleTrack pairs. """ def __init__(self, library_root: Path): - self._root = library_root - self._alfred_dir = library_root / ".alfred" - self._metadata_path = self._alfred_dir / "metadata.yaml" + self._store = MetadataStore(library_root) - # ------------------------------------------------------------------ - # Load / Save - # ------------------------------------------------------------------ - - def load(self) -> dict: - """Return the full metadata dict. Empty dict if file absent.""" - if not self._metadata_path.exists(): - return {} - try: - with open(self._metadata_path, encoding="utf-8") as f: - return yaml.safe_load(f) or {} - except Exception as e: - logger.warning(f"MetadataStore: could not read {self._metadata_path}: {e}") - return {} - - def save(self, data: dict) -> None: - """Atomically write metadata.yaml. Creates .alfred/ if needed.""" - self._alfred_dir.mkdir(parents=True, exist_ok=True) - tmp = self._metadata_path.with_suffix(".yaml.tmp") - try: - with open(tmp, "w", encoding="utf-8") as f: - yaml.safe_dump( - data, - f, - allow_unicode=True, - default_flow_style=False, - sort_keys=False, - ) - tmp.rename(self._metadata_path) - except Exception as e: - logger.error(f"MetadataStore: could not write {self._metadata_path}: {e}") - tmp.unlink(missing_ok=True) - raise - - # ------------------------------------------------------------------ - # Pattern - # ------------------------------------------------------------------ + # ---- Pattern ----------------------------------------------------- def confirmed_pattern(self) -> str | None: - """Return the confirmed pattern_id, or None.""" - data = self.load() - if data.get("pattern_confirmed"): - return data.get("detected_pattern") - return None + return self._store.confirmed_pattern() def mark_pattern_confirmed( self, pattern_id: str, media_info: dict | None = None ) -> None: - """Persist detected_pattern + pattern_confirmed=true.""" - data = self.load() - data["detected_pattern"] = pattern_id - data["pattern_confirmed"] = True - if media_info: - data.setdefault("media_type", media_info.get("media_type")) - data.setdefault("imdb_id", media_info.get("imdb_id")) - data.setdefault("title", media_info.get("title")) - self.save(data) - logger.info( - f"MetadataStore: confirmed pattern '{pattern_id}' for {self._root.name}" - ) + self._store.mark_pattern_confirmed(pattern_id, media_info) - # ------------------------------------------------------------------ - # Subtitle history - # ------------------------------------------------------------------ + # ---- History ----------------------------------------------------- def append_history( self, @@ -105,15 +54,10 @@ class SubtitleMetadataStore: if not placed_pairs: return - data = self.load() - history = data.setdefault("subtitle_history", []) - tracks_data: list[dict[str, Any]] = [] for placed, track in placed_pairs: # Infer type from destination filename parts (e.g. en.sdh.srt → sdh) - parts = placed.filename.rsplit( - ".", 2 - ) # ["en", "sdh", "srt"] or ["en", "srt"] + parts = placed.filename.rsplit(".", 2) inferred_type = parts[1] if len(parts) == 3 else "standard" tracks_data.append( @@ -138,21 +82,14 @@ class SubtitleMetadataStore: if episode is not None: entry["episode"] = episode - history.append(entry) - - # Update release_groups list - if release_group: - groups = data.setdefault("release_groups", []) - if release_group not in groups: - groups.append(release_group) - - self.save(data) + self._store.append_subtitle_history_entry(entry) + marker = ( + f"S{season:02d}E{episode:02d}" if season and episode else "movie" + ) logger.info( - f"MetadataStore: appended history " - f"({'S%02dE%02d' % (season, episode) if season and episode else 'movie'}) " - f"— {len(tracks_data)} track(s)" + f"SubtitleMetadataStore: appended history " + f"({marker}) — {len(tracks_data)} track(s)" ) def history(self) -> list[dict]: - """Return the raw history list.""" - return self.load().get("subtitle_history", []) + return self._store.subtitle_history()