feat: generic MetadataStore + read_release_metadata + query_library

- Extract MetadataStore from SubtitleMetadataStore (alfred/infrastructure/metadata/).
  Generic load/save + typed update helpers (update_parse, update_probe, update_tmdb)
  for the per-release .alfred/metadata.yaml.
- SubtitleMetadataStore becomes a thin facade — owns subtitle_history shape,
  delegates I/O to MetadataStore.
- Agent._execute_tool_call auto-persists successful analyze_release / probe_media /
  find_media_imdb_id results to the release's .alfred file. find_media_imdb_id
  follows release_focus when it has no path argument.
- New tools:
  · read_release_metadata(release_path) — cacheable, key=release_path.
    Returns the .alfred content or has_metadata=false.
  · query_library(name) — substring scan across configured library roots.
- Both new tools added to CORE_TOOLS (always visible).
This commit is contained in:
2026-05-15 11:02:25 +02:00
parent 3c7c6695f2
commit ba6f016d49
9 changed files with 466 additions and 89 deletions
+64
View File
@@ -3,8 +3,10 @@
import json
import logging
from collections.abc import AsyncGenerator
from pathlib import Path
from typing import Any
from alfred.infrastructure.metadata import MetadataStore
from alfred.infrastructure.persistence import get_memory
from alfred.settings import settings
@@ -243,6 +245,7 @@ class Agent:
Today:
- Update release_focus when a path-keyed inspector runs.
- Persist inspector results into the release's `.alfred/metadata.yaml`.
- Refresh episodic.last_search_results on find_torrent cache hits so
get_torrent_by_index keeps pointing at the right list.
"""
@@ -255,6 +258,11 @@ class Agent:
if isinstance(path, str) and path:
memory.stm.release_focus.focus(path)
# Persist inspector results to .alfred/metadata.yaml (skip on cache
# hit — the file is already up to date from the original run).
if not from_cache:
self._maybe_update_alfred(tool_name, args, result)
# Episodic refresh when find_torrent's cache short-circuits the call.
if from_cache and tool_name == "find_torrent":
torrents = result.get("torrents") or []
@@ -263,6 +271,62 @@ class Agent:
query=query, results=torrents, search_type="torrent"
)
def _maybe_update_alfred(
self,
tool_name: str,
args: dict[str, Any],
result: dict[str, Any],
) -> None:
"""
Persist a successful inspector result into the release's
`.alfred/metadata.yaml`. No-op when the release root can't be resolved.
"""
if tool_name not in {"analyze_release", "probe_media", "find_media_imdb_id"}:
return
release_root = self._resolve_release_root(tool_name, args)
if release_root is None:
return
try:
store = MetadataStore(release_root)
if tool_name == "analyze_release":
store.update_parse(result)
elif tool_name == "probe_media":
store.update_probe(result)
elif tool_name == "find_media_imdb_id":
store.update_tmdb(result)
except Exception as e:
logger.warning(
f"Failed to update .alfred for {tool_name} at {release_root}: {e}"
)
@staticmethod
def _resolve_release_root(
tool_name: str,
args: dict[str, Any],
) -> Path | None:
"""
Figure out which release folder owns this call.
- analyze_release / probe_media: derived from source_path
(folder kept as-is, file walked up to its parent).
- find_media_imdb_id: follow the current release focus in STM.
"""
if tool_name in {"analyze_release", "probe_media"}:
raw = args.get("source_path")
if not isinstance(raw, str) or not raw:
return None
path = Path(raw)
return path if path.is_dir() else path.parent
# find_media_imdb_id has no path arg — rely on release focus.
focus = get_memory().stm.release_focus.current_release_path
if not focus:
return None
path = Path(focus)
return path if path.is_dir() else path.parent
async def step_streaming(
self, user_input: str, completion_id: str, created_ts: int, model: str
) -> AsyncGenerator[dict[str, Any]]:
+2
View File
@@ -17,6 +17,8 @@ CORE_TOOLS: tuple[str, ...] = (
"set_language",
"set_path_for_folder",
"list_folder",
"read_release_metadata",
"query_library",
"start_workflow",
"end_workflow",
)
+2
View File
@@ -138,6 +138,8 @@ def make_tools(settings) -> dict[str, Tool]:
tool_functions = [
fs_tools.set_path_for_folder,
fs_tools.list_folder,
fs_tools.read_release_metadata,
fs_tools.query_library,
fs_tools.analyze_release,
fs_tools.probe_media,
fs_tools.resolve_season_destination,
+75
View File
@@ -30,6 +30,8 @@ from alfred.application.filesystem.resolve_destination import (
from alfred.infrastructure.filesystem import FileManager, create_folder, move
from alfred.infrastructure.filesystem.ffprobe import probe
from alfred.infrastructure.filesystem.find_video import find_video_file
from alfred.infrastructure.metadata import MetadataStore
from alfred.infrastructure.persistence import get_memory
_LEARNED_ROOT = Path(_alfred_pkg.__file__).parent.parent / "data" / "knowledge"
@@ -288,3 +290,76 @@ def list_folder(folder_type: str, path: str = ".") -> dict[str, Any]:
use_case = ListFolderUseCase(file_manager)
response = use_case.execute(folder_type, path)
return response.to_dict()
def read_release_metadata(release_path: str) -> dict[str, Any]:
"""Thin tool wrapper — semantics live in alfred/agent/tools/specs/read_release_metadata.yaml."""
path = Path(release_path)
if not path.exists():
return {
"status": "error",
"error": "not_found",
"message": f"{release_path} does not exist",
}
root = path if path.is_dir() else path.parent
store = MetadataStore(root)
if not store.exists():
return {
"status": "ok",
"release_path": str(root),
"has_metadata": False,
"metadata": {},
}
return {
"status": "ok",
"release_path": str(root),
"has_metadata": True,
"metadata": store.load(),
}
def query_library(name: str) -> dict[str, Any]:
"""Thin tool wrapper — semantics live in alfred/agent/tools/specs/query_library.yaml."""
needle = name.strip().lower()
if not needle:
return {
"status": "error",
"error": "empty_name",
"message": "name must be a non-empty string",
}
memory = get_memory()
roots = memory.ltm.library_paths.to_dict() or {}
if not roots:
return {
"status": "error",
"error": "no_libraries",
"message": "No library paths configured — call set_path_for_folder first.",
}
matches: list[dict[str, Any]] = []
for collection, root in roots.items():
root_path = Path(root)
if not root_path.is_dir():
continue
for entry in root_path.iterdir():
if not entry.is_dir():
continue
if needle not in entry.name.lower():
continue
store = MetadataStore(entry)
matches.append(
{
"collection": collection,
"name": entry.name,
"path": str(entry),
"has_metadata": store.exists(),
}
)
return {
"status": "ok",
"query": name,
"match_count": len(matches),
"matches": matches,
}
@@ -0,0 +1,54 @@
name: query_library
summary: >
Find release folders across all configured library roots whose name
contains a substring (case-insensitive).
description: |
Scans every configured library root (movies, tv_shows, …) at depth 1
and returns folders whose name contains the query. For each match,
reports whether a `.alfred/metadata.yaml` exists — handy to spot
releases that have not been inspected yet. Does not recurse into
seasons / episodes; one entry per release folder.
when_to_use: |
- To answer "do I already have X?" without listing whole library
roots one by one.
- To pick the release_path to feed read_release_metadata or any
inspector tool.
when_not_to_use: |
- To list the *whole* library — that scan should live behind a
dedicated tool (not implemented yet).
- To browse a single root — use list_folder instead, it's cheaper
and doesn't open every library.
next_steps: |
- When one match is found: feed its path to read_release_metadata or
analyze_release.
- When several match: surface the indexed list to the user and ask
which one they mean.
parameters:
name:
description: Case-insensitive substring of the release name to look for.
why_needed: |
Library folders are named after the release (Title.Year.... or
Title (Year)). A substring is enough to catch typical user
phrasings ("foundation", "inception 2010").
example: foundation
returns:
ok:
description: Scan completed (possibly zero matches).
fields:
status: "'ok'"
query: The query string as received.
match_count: Number of matching folders.
matches: "List of {collection, name, path, has_metadata}."
error:
description: Scan could not run.
fields:
error: Short error code (no_libraries, empty_name).
message: Human-readable explanation.
@@ -0,0 +1,55 @@
name: read_release_metadata
summary: >
Read the `.alfred/metadata.yaml` file for a release folder.
description: |
Returns whatever has been previously persisted by inspector tools
(analyze_release, probe_media, find_media_imdb_id) and by the subtitle
pipeline. Works for any folder — download or library — as long as the
release has been touched at least once. Missing metadata is not an
error: the tool returns `has_metadata=false` with an empty dict.
when_to_use: |
- Before re-running analyze_release / probe_media on a release you
might have already seen — saves a full re-inspection.
- To answer "what do we know about X?" without scanning.
- To list which releases in a library have no `.alfred` yet (loop +
`has_metadata`).
when_not_to_use: |
- To search a library by name — use query_library.
- When you need a fresh probe/parse — call the inspector directly,
the result will be persisted automatically.
next_steps: |
- If `has_metadata=false`, decide whether to inspect now
(analyze_release / probe_media).
- If `has_metadata=true`, read `metadata.parse`, `metadata.probe`,
`metadata.tmdb` blocks before deciding next actions.
cache:
key: release_path
parameters:
release_path:
description: Absolute path to the release folder (or any file inside it).
why_needed: |
The store lives at `<release_root>/.alfred/metadata.yaml`. A file
path is auto-resolved to its parent folder.
example: /mnt/library/tv_shows/Foundation.2021.1080p.WEBRip.x265-RARBG
returns:
ok:
description: Release inspected (file may or may not exist).
fields:
status: "'ok'"
release_path: Absolute path of the release folder.
has_metadata: True if `.alfred/metadata.yaml` exists.
metadata: Full content of the file, or empty dict.
error:
description: Path does not exist on disk.
fields:
error: Short error code (not_found).
message: Human-readable explanation.
@@ -0,0 +1,5 @@
"""Per-release `.alfred/metadata.yaml` persistence."""
from .store import MetadataStore
__all__ = ["MetadataStore"]
+183
View File
@@ -0,0 +1,183 @@
"""
MetadataStore — reads/writes the `.alfred/metadata.yaml` file colocated with
a release folder.
The store is intentionally domain-agnostic: it knows how to atomically
load/save the YAML and exposes typed update helpers for the broad facts a
release carries (parse, probe, TMDB lookup, detected pattern). Subtitle
history lives next to the same file but is appended through a dedicated
helper kept under `alfred/infrastructure/subtitle/` so the subtitle pipeline
keeps full ownership of its payload shape.
The file layout:
<release_root>/
.alfred/
metadata.yaml
The store never raises on a missing file — it returns empty defaults. Writes
are atomic (write to .tmp then rename).
"""
from __future__ import annotations
import logging
from datetime import UTC, datetime
from pathlib import Path
from typing import Any
import yaml
logger = logging.getLogger(__name__)
class MetadataStore:
"""Manages `.alfred/metadata.yaml` for one release folder."""
def __init__(self, release_root: str | Path):
self._root = Path(release_root)
self._alfred_dir = self._root / ".alfred"
self._metadata_path = self._alfred_dir / "metadata.yaml"
# ------------------------------------------------------------------
# Identity
# ------------------------------------------------------------------
@property
def release_root(self) -> Path:
return self._root
@property
def metadata_path(self) -> Path:
return self._metadata_path
def exists(self) -> bool:
return self._metadata_path.exists()
# ------------------------------------------------------------------
# Load / Save
# ------------------------------------------------------------------
def load(self) -> dict:
"""Return the full metadata dict. Empty dict if file absent."""
if not self._metadata_path.exists():
return {}
try:
with open(self._metadata_path, encoding="utf-8") as f:
return yaml.safe_load(f) or {}
except Exception as e:
logger.warning(f"MetadataStore: could not read {self._metadata_path}: {e}")
return {}
def save(self, data: dict) -> None:
"""Atomically write metadata.yaml. Creates .alfred/ if needed."""
self._alfred_dir.mkdir(parents=True, exist_ok=True)
tmp = self._metadata_path.with_suffix(".yaml.tmp")
try:
with open(tmp, "w", encoding="utf-8") as f:
yaml.safe_dump(
data,
f,
allow_unicode=True,
default_flow_style=False,
sort_keys=False,
)
tmp.rename(self._metadata_path)
except Exception as e:
logger.error(f"MetadataStore: could not write {self._metadata_path}: {e}")
tmp.unlink(missing_ok=True)
raise
# ------------------------------------------------------------------
# Generic update helper
# ------------------------------------------------------------------
def update_section(self, section: str, payload: dict[str, Any]) -> None:
"""
Merge `payload` into the top-level `section` block and stamp it.
The section is replaced wholesale (not deep-merged) so the last
successful tool run reflects the current truth. A `_updated_at`
ISO-8601 timestamp is added inside the section.
"""
data = self.load()
stamped = dict(payload)
stamped["_updated_at"] = datetime.now(UTC).isoformat()
data[section] = stamped
self.save(data)
# ------------------------------------------------------------------
# Typed update helpers — one per inspector tool
# ------------------------------------------------------------------
def update_parse(self, parse_result: dict[str, Any]) -> None:
"""Persist the result of analyze_release."""
clean = {k: v for k, v in parse_result.items() if k != "status"}
self.update_section("parse", clean)
def update_probe(self, probe_result: dict[str, Any]) -> None:
"""Persist the result of probe_media."""
clean = {k: v for k, v in probe_result.items() if k != "status"}
self.update_section("probe", clean)
def update_tmdb(self, tmdb_result: dict[str, Any]) -> None:
"""Persist the result of find_media_imdb_id."""
clean = {k: v for k, v in tmdb_result.items() if k != "status"}
self.update_section("tmdb", clean)
# Also promote core identity fields to the top level so they are
# cheap to read without parsing the full tmdb block.
data = self.load()
for key in ("imdb_id", "tmdb_id", "media_type"):
if key in clean and clean[key] is not None:
data[key] = clean[key]
if "title" in clean and clean["title"]:
data.setdefault("title", clean["title"])
self.save(data)
# ------------------------------------------------------------------
# Pattern (used by the subtitle pipeline)
# ------------------------------------------------------------------
def confirmed_pattern(self) -> str | None:
"""Return the confirmed pattern_id, or None."""
data = self.load()
if data.get("pattern_confirmed"):
return data.get("detected_pattern")
return None
def mark_pattern_confirmed(
self, pattern_id: str, media_info: dict | None = None
) -> None:
"""Persist detected_pattern + pattern_confirmed=true."""
data = self.load()
data["detected_pattern"] = pattern_id
data["pattern_confirmed"] = True
if media_info:
data.setdefault("media_type", media_info.get("media_type"))
data.setdefault("imdb_id", media_info.get("imdb_id"))
data.setdefault("title", media_info.get("title"))
self.save(data)
logger.info(
f"MetadataStore: confirmed pattern '{pattern_id}' for {self._root.name}"
)
# ------------------------------------------------------------------
# Subtitle history (kept for backwards compatibility with the
# subtitle pipeline — payload shape is owned by the caller).
# ------------------------------------------------------------------
def append_subtitle_history_entry(self, entry: dict[str, Any]) -> None:
"""Append one entry (raw dict) to subtitle_history."""
data = self.load()
history = data.setdefault("subtitle_history", [])
history.append(entry)
rg = entry.get("release_group")
if rg:
groups = data.setdefault("release_groups", [])
if rg not in groups:
groups.append(rg)
self.save(data)
def subtitle_history(self) -> list[dict]:
"""Return the raw subtitle history list."""
return self.load().get("subtitle_history", [])
@@ -1,98 +1,47 @@
"""SubtitleMetadataStore — reads/writes .alfred/metadata.yaml colocated with media."""
"""
SubtitleMetadataStore — subtitle-specific helper on top of MetadataStore.
Owns the shape of `subtitle_history` entries (track-level fields, type
inference from the destination filename) and delegates all I/O to the
generic MetadataStore.
"""
from __future__ import annotations
import logging
from datetime import UTC, datetime
from pathlib import Path
from typing import Any
import yaml
from alfred.domain.subtitles.entities import SubtitleTrack
from alfred.domain.subtitles.services.placer import PlacedTrack
from alfred.infrastructure.metadata.store import MetadataStore
logger = logging.getLogger(__name__)
class SubtitleMetadataStore:
"""
Manages the .alfred/metadata.yaml file that lives inside the media library folder.
Subtitle-pipeline view of the per-release `.alfred/metadata.yaml`.
For TV shows: /media/tv_shows/The X-Files/.alfred/metadata.yaml
For movies: /media/movies/Inception (2010)/.alfred/metadata.yaml
The store never raises on a missing file — it returns empty defaults.
Writes are atomic (write to .tmp then rename).
Backed by a generic MetadataStore; this class only knows how to build
a subtitle_history entry from PlacedTrack/SubtitleTrack pairs.
"""
def __init__(self, library_root: Path):
self._root = library_root
self._alfred_dir = library_root / ".alfred"
self._metadata_path = self._alfred_dir / "metadata.yaml"
self._store = MetadataStore(library_root)
# ------------------------------------------------------------------
# Load / Save
# ------------------------------------------------------------------
def load(self) -> dict:
"""Return the full metadata dict. Empty dict if file absent."""
if not self._metadata_path.exists():
return {}
try:
with open(self._metadata_path, encoding="utf-8") as f:
return yaml.safe_load(f) or {}
except Exception as e:
logger.warning(f"MetadataStore: could not read {self._metadata_path}: {e}")
return {}
def save(self, data: dict) -> None:
"""Atomically write metadata.yaml. Creates .alfred/ if needed."""
self._alfred_dir.mkdir(parents=True, exist_ok=True)
tmp = self._metadata_path.with_suffix(".yaml.tmp")
try:
with open(tmp, "w", encoding="utf-8") as f:
yaml.safe_dump(
data,
f,
allow_unicode=True,
default_flow_style=False,
sort_keys=False,
)
tmp.rename(self._metadata_path)
except Exception as e:
logger.error(f"MetadataStore: could not write {self._metadata_path}: {e}")
tmp.unlink(missing_ok=True)
raise
# ------------------------------------------------------------------
# Pattern
# ------------------------------------------------------------------
# ---- Pattern -----------------------------------------------------
def confirmed_pattern(self) -> str | None:
"""Return the confirmed pattern_id, or None."""
data = self.load()
if data.get("pattern_confirmed"):
return data.get("detected_pattern")
return None
return self._store.confirmed_pattern()
def mark_pattern_confirmed(
self, pattern_id: str, media_info: dict | None = None
) -> None:
"""Persist detected_pattern + pattern_confirmed=true."""
data = self.load()
data["detected_pattern"] = pattern_id
data["pattern_confirmed"] = True
if media_info:
data.setdefault("media_type", media_info.get("media_type"))
data.setdefault("imdb_id", media_info.get("imdb_id"))
data.setdefault("title", media_info.get("title"))
self.save(data)
logger.info(
f"MetadataStore: confirmed pattern '{pattern_id}' for {self._root.name}"
)
self._store.mark_pattern_confirmed(pattern_id, media_info)
# ------------------------------------------------------------------
# Subtitle history
# ------------------------------------------------------------------
# ---- History -----------------------------------------------------
def append_history(
self,
@@ -105,15 +54,10 @@ class SubtitleMetadataStore:
if not placed_pairs:
return
data = self.load()
history = data.setdefault("subtitle_history", [])
tracks_data: list[dict[str, Any]] = []
for placed, track in placed_pairs:
# Infer type from destination filename parts (e.g. en.sdh.srt → sdh)
parts = placed.filename.rsplit(
".", 2
) # ["en", "sdh", "srt"] or ["en", "srt"]
parts = placed.filename.rsplit(".", 2)
inferred_type = parts[1] if len(parts) == 3 else "standard"
tracks_data.append(
@@ -138,21 +82,14 @@ class SubtitleMetadataStore:
if episode is not None:
entry["episode"] = episode
history.append(entry)
# Update release_groups list
if release_group:
groups = data.setdefault("release_groups", [])
if release_group not in groups:
groups.append(release_group)
self.save(data)
self._store.append_subtitle_history_entry(entry)
marker = (
f"S{season:02d}E{episode:02d}" if season and episode else "movie"
)
logger.info(
f"MetadataStore: appended history "
f"({'S%02dE%02d' % (season, episode) if season and episode else 'movie'}) "
f"{len(tracks_data)} track(s)"
f"SubtitleMetadataStore: appended history "
f"({marker}) — {len(tracks_data)} track(s)"
)
def history(self) -> list[dict]:
"""Return the raw history list."""
return self.load().get("subtitle_history", [])
return self._store.subtitle_history()