feat: release parser, media type detection, ffprobe integration
Replace the old domain/media release parser with a full rewrite under
domain/release/:
- ParsedRelease with media_type ("movie" | "tv_show" | "tv_complete" |
"documentary" | "concert" | "other" | "unknown"), site_tag, parse_path,
languages, audio_codec, audio_channels, bit_depth, hdr_format, edition
- Well-formedness check + sanitize pipeline (_is_well_formed, _sanitize,
_strip_site_tag) before token-level parsing
- Multi-token sequence matching for audio (DTS-HD.MA, TrueHD.Atmos…),
HDR (DV.HDR10…) and editions (DIRECTORS.CUT…)
- Knowledge YAML: file_extensions, release_format, languages, audio,
video, editions, sites/c411
New infrastructure:
- ffprobe.py — single-pass probe returning MediaInfo (video, audio
tracks, subtitle tracks)
- find_video.py — locate first video file in a release folder
New application helpers:
- detect_media_type — filesystem-based type refinement
- enrich_from_probe — fill missing ParsedRelease fields from MediaInfo
New agent tools:
- analyze_release — parse + detect type + ffprobe in one call
- probe_media — standalone ffprobe for a specific file
New domain value object:
- MediaInfo + AudioTrack + SubtitleTrack (domain/shared/media_info.py)
Testing CLIs:
- recognize_folders_in_downloads.py — full pipeline with colored output
- probe_video.py — display MediaInfo for a video file
This commit is contained in:
@@ -79,24 +79,67 @@ def kv(key: str, val: str) -> None:
|
||||
# Dry-run tool stubs
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _dry_list_folder(folder_type: str, path: str = ".") -> dict[str, Any]:
|
||||
return {
|
||||
"status": "ok",
|
||||
"folder_type": folder_type,
|
||||
"path": path,
|
||||
"entries": ["[dry-run — no real listing]"],
|
||||
"count": 1,
|
||||
}
|
||||
def _real_list_folder(folder_type: str, path: str = ".") -> dict[str, Any]:
|
||||
"""Call the real list_folder (read-only, safe in dry-run)."""
|
||||
# TODO: remove hardcoded fallback once download path is configured in LTM
|
||||
_HARDCODED_DOWNLOAD_ROOT = "/mnt/testipool/downloads"
|
||||
|
||||
try:
|
||||
from alfred.infrastructure.persistence import get_memory, init_memory
|
||||
try:
|
||||
get_memory()
|
||||
except Exception:
|
||||
init_memory()
|
||||
from alfred.agent.tools.filesystem import list_folder
|
||||
result = list_folder(folder_type=folder_type, path=path)
|
||||
if result.get("status") == "error" and folder_type == "download":
|
||||
raise RuntimeError(result.get("message", "not configured"))
|
||||
return result
|
||||
except Exception as e:
|
||||
if folder_type == "download":
|
||||
warn(f"list_folder: {e} — using hardcoded download root: {_HARDCODED_DOWNLOAD_ROOT}")
|
||||
import os
|
||||
resolved = os.path.join(_HARDCODED_DOWNLOAD_ROOT, path) if path != "." else _HARDCODED_DOWNLOAD_ROOT
|
||||
try:
|
||||
entries = sorted(os.listdir(resolved))
|
||||
except OSError as oe:
|
||||
return {"status": "error", "error": "os_error", "message": str(oe)}
|
||||
return {
|
||||
"status": "ok",
|
||||
"folder_type": folder_type,
|
||||
"path": resolved,
|
||||
"entries": entries,
|
||||
"count": len(entries),
|
||||
}
|
||||
warn(f"list_folder: filesystem unavailable ({e}), falling back to stub")
|
||||
return {
|
||||
"status": "ok",
|
||||
"folder_type": folder_type,
|
||||
"path": path,
|
||||
"entries": ["[stub — filesystem unavailable]"],
|
||||
"count": 1,
|
||||
}
|
||||
|
||||
|
||||
def _dry_find_media_imdb_id(**kwargs) -> dict[str, Any]:
|
||||
return {
|
||||
"status": "ok",
|
||||
"imdb_id": kwargs.get("imdb_id") or "tt0000000",
|
||||
"title": "Dry Run Show",
|
||||
"type": "tv_show",
|
||||
"year": 2024,
|
||||
}
|
||||
def _real_find_media_imdb_id(media_title: str, **kwargs) -> dict[str, Any]:
|
||||
"""Call the real TMDB API even in dry-run (read-only, no filesystem side effects)."""
|
||||
try:
|
||||
from alfred.infrastructure.persistence import get_memory, init_memory
|
||||
try:
|
||||
get_memory()
|
||||
except Exception:
|
||||
init_memory()
|
||||
from alfred.agent.tools.api import find_media_imdb_id
|
||||
return find_media_imdb_id(media_title=media_title)
|
||||
except Exception as e:
|
||||
warn(f"find_media_imdb_id: TMDB unavailable ({e}), falling back to stub")
|
||||
return {
|
||||
"status": "ok",
|
||||
"imdb_id": "tt0000000",
|
||||
"title": media_title,
|
||||
"media_type": "tv_show",
|
||||
"year": 2024,
|
||||
}
|
||||
|
||||
|
||||
def _dry_resolve_destination(
|
||||
@@ -107,7 +150,7 @@ def _dry_resolve_destination(
|
||||
tmdb_episode_title: str | None = None,
|
||||
confirmed_folder: str | None = None,
|
||||
) -> dict[str, Any]:
|
||||
from alfred.domain.media.release_parser import parse_release
|
||||
from alfred.domain.release import parse_release
|
||||
parsed = parse_release(release_name)
|
||||
ext = Path(source_file).suffix
|
||||
if parsed.is_movie:
|
||||
@@ -170,8 +213,8 @@ def _dry_create_seed_links(library_file: str, original_download_folder: str) ->
|
||||
|
||||
|
||||
DRY_RUN_TOOLS: dict[str, Any] = {
|
||||
"list_folder": _dry_list_folder,
|
||||
"find_media_imdb_id": _dry_find_media_imdb_id,
|
||||
"list_folder": _real_list_folder,
|
||||
"find_media_imdb_id": _real_find_media_imdb_id,
|
||||
"resolve_destination": _dry_resolve_destination,
|
||||
"move_media": _dry_move_media,
|
||||
"manage_subtitles": _dry_manage_subtitles,
|
||||
@@ -316,10 +359,22 @@ class WorkflowRunner:
|
||||
self.step_results.append({"id": step_id, "result": {"status": "error", "error": str(e)}})
|
||||
return
|
||||
|
||||
self._print_result(result)
|
||||
self._print_result(result, tool_name=tool_name)
|
||||
self.context[step_id] = result
|
||||
self.step_results.append({"id": step_id, "result": result})
|
||||
|
||||
# After list_downloads: confirm the requested media folder exists in downloads
|
||||
if tool_name == "list_folder" and result.get("status") == "ok" and self.args.source:
|
||||
folder_path = result.get("path", "")
|
||||
entries = result.get("entries", [])
|
||||
if self.args.source in entries:
|
||||
media_folder = str(Path(folder_path) / self.args.source)
|
||||
self.context["media_folder"] = media_folder
|
||||
print()
|
||||
print(f" {c('Dossier media trouvé:', BOLD, GREEN)} {c(media_folder, CYAN, BOLD)}")
|
||||
else:
|
||||
warn(f"Dossier '{self.args.source}' introuvable dans {folder_path}")
|
||||
|
||||
def _build_kwargs(self, tool_name: str, step: dict) -> dict[str, Any]:
|
||||
"""Build tool kwargs from step params + CLI args + previous context."""
|
||||
# Start from step-level params (static defaults from YAML)
|
||||
@@ -335,12 +390,13 @@ class WorkflowRunner:
|
||||
kwargs["imdb_id"] = a.imdb_id
|
||||
|
||||
elif tool_name == "resolve_destination":
|
||||
media_folder = self.context.get("media_folder")
|
||||
if a.release:
|
||||
kwargs["release_name"] = a.release
|
||||
elif a.source:
|
||||
kwargs.setdefault("release_name", Path(a.source).parent.name)
|
||||
if a.source:
|
||||
kwargs["source_file"] = a.source
|
||||
kwargs.setdefault("release_name", a.source)
|
||||
if media_folder:
|
||||
kwargs["source_file"] = media_folder
|
||||
if a.tmdb_title:
|
||||
kwargs["tmdb_title"] = a.tmdb_title
|
||||
if a.tmdb_year:
|
||||
@@ -351,16 +407,18 @@ class WorkflowRunner:
|
||||
elif tool_name == "move_media":
|
||||
# If resolve_destination ran, use its library_file as destination
|
||||
resolved = self.context.get("resolve_destination", {})
|
||||
if a.source:
|
||||
kwargs["source"] = a.source
|
||||
media_folder = self.context.get("media_folder")
|
||||
if media_folder:
|
||||
kwargs["source"] = media_folder
|
||||
dest = a.dest or resolved.get("library_file")
|
||||
if dest:
|
||||
kwargs["destination"] = dest
|
||||
|
||||
elif tool_name == "manage_subtitles":
|
||||
resolved = self.context.get("resolve_destination", {})
|
||||
if a.source:
|
||||
kwargs["source_video"] = a.source
|
||||
media_folder = self.context.get("media_folder")
|
||||
if media_folder:
|
||||
kwargs["source_video"] = media_folder
|
||||
dest = a.dest or resolved.get("library_file")
|
||||
if dest:
|
||||
kwargs["destination_video"] = dest
|
||||
@@ -372,12 +430,16 @@ class WorkflowRunner:
|
||||
kwargs["library_file"] = library_file
|
||||
if a.download_folder:
|
||||
kwargs["original_download_folder"] = a.download_folder
|
||||
elif a.source:
|
||||
kwargs.setdefault("original_download_folder", str(Path(a.source).parent))
|
||||
else:
|
||||
# Use the resolved folder path from list_downloads context
|
||||
list_result = self.context.get("list_downloads", {})
|
||||
folder_path = list_result.get("path")
|
||||
if folder_path:
|
||||
kwargs.setdefault("original_download_folder", folder_path)
|
||||
|
||||
return kwargs
|
||||
|
||||
def _print_result(self, result: dict) -> None:
|
||||
def _print_result(self, result: dict, tool_name: str = "") -> None:
|
||||
status = result.get("status", "?")
|
||||
if status == "ok":
|
||||
ok(f"status={c('ok', GREEN)}")
|
||||
@@ -387,6 +449,11 @@ class WorkflowRunner:
|
||||
err(f"status={c(status, RED)} error={result.get('error')} msg={result.get('message')}")
|
||||
return
|
||||
|
||||
# Highlight resolved folder path for list_folder
|
||||
if tool_name == "list_folder" and result.get("path"):
|
||||
print()
|
||||
print(f" {c('Dossier résolu:', BOLD, GREEN)} {c(result['path'], CYAN, BOLD)}")
|
||||
|
||||
# Pretty-print notable fields
|
||||
skip = {"status", "error", "message"}
|
||||
for k, v in result.items():
|
||||
@@ -420,8 +487,8 @@ def parse_args() -> argparse.Namespace:
|
||||
help="Simulate steps without executing tools (default)")
|
||||
parser.add_argument("--live", action="store_true",
|
||||
help="Actually execute tools against the real filesystem")
|
||||
parser.add_argument("--source", metavar="PATH",
|
||||
help="Source video file (in download folder)")
|
||||
parser.add_argument("--source", metavar="FOLDER_NAME",
|
||||
help="Release folder name inside the download root (e.g. Oz.S03.1080p.WEBRip.x265-KONTRAST)")
|
||||
parser.add_argument("--dest", metavar="PATH",
|
||||
help="Destination video file (in library, overrides resolve_destination)")
|
||||
parser.add_argument("--download-folder", metavar="PATH",
|
||||
|
||||
Reference in New Issue
Block a user