feat: release parser, media type detection, ffprobe integration

Replace the old domain/media release parser with a full rewrite under
domain/release/:
- ParsedRelease with media_type ("movie" | "tv_show" | "tv_complete" |
  "documentary" | "concert" | "other" | "unknown"), site_tag, parse_path,
  languages, audio_codec, audio_channels, bit_depth, hdr_format, edition
- Well-formedness check + sanitize pipeline (_is_well_formed, _sanitize,
  _strip_site_tag) before token-level parsing
- Multi-token sequence matching for audio (DTS-HD.MA, TrueHD.Atmos…),
  HDR (DV.HDR10…) and editions (DIRECTORS.CUT…)
- Knowledge YAML: file_extensions, release_format, languages, audio,
  video, editions, sites/c411

New infrastructure:
- ffprobe.py — single-pass probe returning MediaInfo (video, audio
  tracks, subtitle tracks)
- find_video.py — locate first video file in a release folder

New application helpers:
- detect_media_type — filesystem-based type refinement
- enrich_from_probe — fill missing ParsedRelease fields from MediaInfo

New agent tools:
- analyze_release — parse + detect type + ffprobe in one call
- probe_media — standalone ffprobe for a specific file

New domain value object:
- MediaInfo + AudioTrack + SubtitleTrack (domain/shared/media_info.py)

Testing CLIs:
- recognize_folders_in_downloads.py — full pipeline with colored output
- probe_video.py — display MediaInfo for a video file
This commit is contained in:
2026-05-12 16:14:20 +02:00
parent 249c5de76a
commit 1723b9fa53
32 changed files with 2323 additions and 562 deletions
+99 -32
View File
@@ -79,24 +79,67 @@ def kv(key: str, val: str) -> None:
# Dry-run tool stubs
# ---------------------------------------------------------------------------
def _dry_list_folder(folder_type: str, path: str = ".") -> dict[str, Any]:
return {
"status": "ok",
"folder_type": folder_type,
"path": path,
"entries": ["[dry-run — no real listing]"],
"count": 1,
}
def _real_list_folder(folder_type: str, path: str = ".") -> dict[str, Any]:
"""Call the real list_folder (read-only, safe in dry-run)."""
# TODO: remove hardcoded fallback once download path is configured in LTM
_HARDCODED_DOWNLOAD_ROOT = "/mnt/testipool/downloads"
try:
from alfred.infrastructure.persistence import get_memory, init_memory
try:
get_memory()
except Exception:
init_memory()
from alfred.agent.tools.filesystem import list_folder
result = list_folder(folder_type=folder_type, path=path)
if result.get("status") == "error" and folder_type == "download":
raise RuntimeError(result.get("message", "not configured"))
return result
except Exception as e:
if folder_type == "download":
warn(f"list_folder: {e} — using hardcoded download root: {_HARDCODED_DOWNLOAD_ROOT}")
import os
resolved = os.path.join(_HARDCODED_DOWNLOAD_ROOT, path) if path != "." else _HARDCODED_DOWNLOAD_ROOT
try:
entries = sorted(os.listdir(resolved))
except OSError as oe:
return {"status": "error", "error": "os_error", "message": str(oe)}
return {
"status": "ok",
"folder_type": folder_type,
"path": resolved,
"entries": entries,
"count": len(entries),
}
warn(f"list_folder: filesystem unavailable ({e}), falling back to stub")
return {
"status": "ok",
"folder_type": folder_type,
"path": path,
"entries": ["[stub — filesystem unavailable]"],
"count": 1,
}
def _dry_find_media_imdb_id(**kwargs) -> dict[str, Any]:
return {
"status": "ok",
"imdb_id": kwargs.get("imdb_id") or "tt0000000",
"title": "Dry Run Show",
"type": "tv_show",
"year": 2024,
}
def _real_find_media_imdb_id(media_title: str, **kwargs) -> dict[str, Any]:
"""Call the real TMDB API even in dry-run (read-only, no filesystem side effects)."""
try:
from alfred.infrastructure.persistence import get_memory, init_memory
try:
get_memory()
except Exception:
init_memory()
from alfred.agent.tools.api import find_media_imdb_id
return find_media_imdb_id(media_title=media_title)
except Exception as e:
warn(f"find_media_imdb_id: TMDB unavailable ({e}), falling back to stub")
return {
"status": "ok",
"imdb_id": "tt0000000",
"title": media_title,
"media_type": "tv_show",
"year": 2024,
}
def _dry_resolve_destination(
@@ -107,7 +150,7 @@ def _dry_resolve_destination(
tmdb_episode_title: str | None = None,
confirmed_folder: str | None = None,
) -> dict[str, Any]:
from alfred.domain.media.release_parser import parse_release
from alfred.domain.release import parse_release
parsed = parse_release(release_name)
ext = Path(source_file).suffix
if parsed.is_movie:
@@ -170,8 +213,8 @@ def _dry_create_seed_links(library_file: str, original_download_folder: str) ->
DRY_RUN_TOOLS: dict[str, Any] = {
"list_folder": _dry_list_folder,
"find_media_imdb_id": _dry_find_media_imdb_id,
"list_folder": _real_list_folder,
"find_media_imdb_id": _real_find_media_imdb_id,
"resolve_destination": _dry_resolve_destination,
"move_media": _dry_move_media,
"manage_subtitles": _dry_manage_subtitles,
@@ -316,10 +359,22 @@ class WorkflowRunner:
self.step_results.append({"id": step_id, "result": {"status": "error", "error": str(e)}})
return
self._print_result(result)
self._print_result(result, tool_name=tool_name)
self.context[step_id] = result
self.step_results.append({"id": step_id, "result": result})
# After list_downloads: confirm the requested media folder exists in downloads
if tool_name == "list_folder" and result.get("status") == "ok" and self.args.source:
folder_path = result.get("path", "")
entries = result.get("entries", [])
if self.args.source in entries:
media_folder = str(Path(folder_path) / self.args.source)
self.context["media_folder"] = media_folder
print()
print(f" {c('Dossier media trouvé:', BOLD, GREEN)} {c(media_folder, CYAN, BOLD)}")
else:
warn(f"Dossier '{self.args.source}' introuvable dans {folder_path}")
def _build_kwargs(self, tool_name: str, step: dict) -> dict[str, Any]:
"""Build tool kwargs from step params + CLI args + previous context."""
# Start from step-level params (static defaults from YAML)
@@ -335,12 +390,13 @@ class WorkflowRunner:
kwargs["imdb_id"] = a.imdb_id
elif tool_name == "resolve_destination":
media_folder = self.context.get("media_folder")
if a.release:
kwargs["release_name"] = a.release
elif a.source:
kwargs.setdefault("release_name", Path(a.source).parent.name)
if a.source:
kwargs["source_file"] = a.source
kwargs.setdefault("release_name", a.source)
if media_folder:
kwargs["source_file"] = media_folder
if a.tmdb_title:
kwargs["tmdb_title"] = a.tmdb_title
if a.tmdb_year:
@@ -351,16 +407,18 @@ class WorkflowRunner:
elif tool_name == "move_media":
# If resolve_destination ran, use its library_file as destination
resolved = self.context.get("resolve_destination", {})
if a.source:
kwargs["source"] = a.source
media_folder = self.context.get("media_folder")
if media_folder:
kwargs["source"] = media_folder
dest = a.dest or resolved.get("library_file")
if dest:
kwargs["destination"] = dest
elif tool_name == "manage_subtitles":
resolved = self.context.get("resolve_destination", {})
if a.source:
kwargs["source_video"] = a.source
media_folder = self.context.get("media_folder")
if media_folder:
kwargs["source_video"] = media_folder
dest = a.dest or resolved.get("library_file")
if dest:
kwargs["destination_video"] = dest
@@ -372,12 +430,16 @@ class WorkflowRunner:
kwargs["library_file"] = library_file
if a.download_folder:
kwargs["original_download_folder"] = a.download_folder
elif a.source:
kwargs.setdefault("original_download_folder", str(Path(a.source).parent))
else:
# Use the resolved folder path from list_downloads context
list_result = self.context.get("list_downloads", {})
folder_path = list_result.get("path")
if folder_path:
kwargs.setdefault("original_download_folder", folder_path)
return kwargs
def _print_result(self, result: dict) -> None:
def _print_result(self, result: dict, tool_name: str = "") -> None:
status = result.get("status", "?")
if status == "ok":
ok(f"status={c('ok', GREEN)}")
@@ -387,6 +449,11 @@ class WorkflowRunner:
err(f"status={c(status, RED)} error={result.get('error')} msg={result.get('message')}")
return
# Highlight resolved folder path for list_folder
if tool_name == "list_folder" and result.get("path"):
print()
print(f" {c('Dossier résolu:', BOLD, GREEN)} {c(result['path'], CYAN, BOLD)}")
# Pretty-print notable fields
skip = {"status", "error", "message"}
for k, v in result.items():
@@ -420,8 +487,8 @@ def parse_args() -> argparse.Namespace:
help="Simulate steps without executing tools (default)")
parser.add_argument("--live", action="store_true",
help="Actually execute tools against the real filesystem")
parser.add_argument("--source", metavar="PATH",
help="Source video file (in download folder)")
parser.add_argument("--source", metavar="FOLDER_NAME",
help="Release folder name inside the download root (e.g. Oz.S03.1080p.WEBRip.x265-KONTRAST)")
parser.add_argument("--dest", metavar="PATH",
help="Destination video file (in library, overrides resolve_destination)")
parser.add_argument("--download-folder", metavar="PATH",