feat: release parser, media type detection, ffprobe integration

Replace the old domain/media release parser with a full rewrite under domain/release/: - ParsedRelease with media_type ("movie" | "tv_show" | "tv_complete" | "documentary" | "concert" | "other" | "unknown"), site_tag, parse_path, languages, audio_codec, audio_channels, bit_depth, hdr_format, edition - Well-formedness check + sanitize pipeline (_is_well_formed, _sanitize, _strip_site_tag) before token-level parsing - Multi-token sequence matching for audio (DTS-HD.MA, TrueHD.Atmos…), HDR (DV.HDR10…) and editions (DIRECTORS.CUT…) - Knowledge YAML: file_extensions, release_format, languages, audio, video, editions, sites/c411 New infrastructure: - ffprobe.py — single-pass probe returning MediaInfo (video, audio tracks, subtitle tracks) - find_video.py — locate first video file in a release folder New application helpers: - detect_media_type — filesystem-based type refinement - enrich_from_probe — fill missing ParsedRelease fields from MediaInfo New agent tools: - analyze_release — parse + detect type + ffprobe in one call - probe_media — standalone ffprobe for a specific file New domain value object: - MediaInfo + AudioTrack + SubtitleTrack (domain/shared/media_info.py) Testing CLIs: - recognize_folders_in_downloads.py — full pipeline with colored output - probe_video.py — display MediaInfo for a video file
2026-05-12 16:14:20 +02:00
parent 249c5de76a
commit 1723b9fa53
32 changed files with 2323 additions and 562 deletions
@@ -0,0 +1,160 @@
+#!/usr/bin/env python3
+"""
+probe_video.py — Display MediaInfo extracted by ffprobe for a video file.
+
+Usage:
+    uv run testing/probe_video.py /path/to/video.mkv
+    uv run testing/probe_video.py /path/to/video.mkv --no-color
+"""
+
+import argparse
+import sys
+from pathlib import Path
+
+_PROJECT_ROOT = Path(__file__).resolve().parents[1]
+if str(_PROJECT_ROOT) not in sys.path:
+    sys.path.insert(0, str(_PROJECT_ROOT))
+
+# ---------------------------------------------------------------------------
+# Colours
+# ---------------------------------------------------------------------------
+
+RESET  = "\033[0m"
+BOLD   = "\033[1m"
+DIM    = "\033[2m"
+GREEN  = "\033[32m"
+YELLOW = "\033[33m"
+RED    = "\033[31m"
+CYAN   = "\033[36m"
+BLUE   = "\033[34m"
+
+USE_COLOR = True
+
+
+def c(text: str, *codes: str) -> str:
+    if not USE_COLOR:
+        return str(text)
+    return "".join(codes) + str(text) + RESET
+
+
+def kv(key: str, val: str, indent: int = 4, color: str = CYAN) -> None:
+    print(f"{' ' * indent}{c(key + ':', BOLD)} {c(val, color)}")
+
+
+def section(title: str) -> None:
+    print()
+    print(f"  {c('▸ ' + title, BOLD, BLUE)}")
+
+
+def hr() -> None:
+    print(c("─" * 70, DIM))
+
+
+# ---------------------------------------------------------------------------
+# Formatting helpers
+# ---------------------------------------------------------------------------
+
+def fmt_duration(seconds: float) -> str:
+    h = int(seconds // 3600)
+    m = int((seconds % 3600) // 60)
+    s = int(seconds % 60)
+    if h:
+        return f"{h}h {m:02d}m {s:02d}s"
+    return f"{m}m {s:02d}s"
+
+
+def fmt_channels(channels: int | None, layout: str | None) -> str:
+    parts = []
+    if channels is not None:
+        parts.append(str(channels) + "ch")
+    if layout:
+        parts.append(f"({layout})")
+    return " ".join(parts) if parts else "—"
+
+
+def flag(val: bool) -> str:
+    return c("yes", GREEN) if val else c("no", DIM)
+
+
+# ---------------------------------------------------------------------------
+# Main
+# ---------------------------------------------------------------------------
+
+def main() -> None:
+    global USE_COLOR
+
+    parser = argparse.ArgumentParser(description="Probe a video file with ffprobe")
+    parser.add_argument("file", help="Path to the video file")
+    parser.add_argument("--no-color", action="store_true")
+    args = parser.parse_args()
+
+    if args.no_color or not sys.stdout.isatty():
+        USE_COLOR = False
+
+    path = Path(args.file)
+    if not path.exists():
+        print(c(f"Error: {path} does not exist", RED), file=sys.stderr)
+        sys.exit(1)
+
+    from alfred.infrastructure.filesystem.ffprobe import probe
+
+    info = probe(path)
+    if info is None:
+        print(c("Error: ffprobe failed to probe the file", RED), file=sys.stderr)
+        sys.exit(1)
+
+    print()
+    print(c("━" * 70, BOLD))
+    print(c(f"  {path.name}", BOLD, CYAN))
+    print(c(f"  {path}", DIM))
+    print(c("━" * 70, BOLD))
+
+    # --- Video ---
+    section("Video")
+    kv("codec",      info.video_codec or c("—", DIM))
+    kv("resolution", info.resolution  or c("—", DIM))
+    if info.width and info.height:
+        kv("dimensions", f"{info.width} × {info.height}")
+    if info.duration_seconds is not None:
+        kv("duration",   fmt_duration(info.duration_seconds))
+    if info.bitrate_kbps is not None:
+        kv("bitrate",    f"{info.bitrate_kbps} kbps")
+
+    # --- Audio ---
+    section(f"Audio  {c(str(len(info.audio_tracks)) + ' track(s)', DIM)}")
+    if not info.audio_tracks:
+        print(f"    {c('no audio tracks found', DIM)}")
+    for track in info.audio_tracks:
+        lang = track.language or "und"
+        default_marker = f"  {c('default', GREEN, DIM)}" if track.is_default else ""
+        print(f"    {c(f'[{track.index}]', BOLD)} {c(lang, YELLOW)}{default_marker}")
+        kv("codec",    track.codec   or c("—", DIM), indent=8)
+        kv("channels", fmt_channels(track.channels, track.channel_layout), indent=8)
+
+    # --- Subtitles ---
+    section(f"Subtitles  {c(str(len(info.subtitle_tracks)) + ' track(s)', DIM)}")
+    if not info.subtitle_tracks:
+        print(f"    {c('no embedded subtitle tracks', DIM)}")
+    for track in info.subtitle_tracks:
+        lang = track.language or "und"
+        markers = []
+        if track.is_default:
+            markers.append(c("default", GREEN, DIM))
+        if track.is_forced:
+            markers.append(c("forced", YELLOW, DIM))
+        marker_str = ("  " + "  ".join(markers)) if markers else ""
+        print(f"    {c(f'[{track.index}]', BOLD)} {c(lang, YELLOW)}{marker_str}")
+        kv("codec", track.codec or c("—", DIM), indent=8)
+
+    # --- Summary ---
+    print()
+    hr()
+    multi = c("yes", GREEN) if info.is_multi_audio else c("no", DIM)
+    langs = ", ".join(info.audio_languages) if info.audio_languages else c("—", DIM)
+    print(f"  {c('multi-audio:', BOLD)} {multi}   {c('languages:', BOLD)} {c(langs, CYAN)}")
+    hr()
+    print()
+
+
+if __name__ == "__main__":
+    main()