feat: release parser, media type detection, ffprobe integration

Replace the old domain/media release parser with a full rewrite under domain/release/: - ParsedRelease with media_type ("movie" | "tv_show" | "tv_complete" | "documentary" | "concert" | "other" | "unknown"), site_tag, parse_path, languages, audio_codec, audio_channels, bit_depth, hdr_format, edition - Well-formedness check + sanitize pipeline (_is_well_formed, _sanitize, _strip_site_tag) before token-level parsing - Multi-token sequence matching for audio (DTS-HD.MA, TrueHD.Atmos…), HDR (DV.HDR10…) and editions (DIRECTORS.CUT…) - Knowledge YAML: file_extensions, release_format, languages, audio, video, editions, sites/c411 New infrastructure: - ffprobe.py — single-pass probe returning MediaInfo (video, audio tracks, subtitle tracks) - find_video.py — locate first video file in a release folder New application helpers: - detect_media_type — filesystem-based type refinement - enrich_from_probe — fill missing ParsedRelease fields from MediaInfo New agent tools: - analyze_release — parse + detect type + ffprobe in one call - probe_media — standalone ffprobe for a specific file New domain value object: - MediaInfo + AudioTrack + SubtitleTrack (domain/shared/media_info.py) Testing CLIs: - recognize_folders_in_downloads.py — full pipeline with colored output - probe_video.py — display MediaInfo for a video file
2026-05-12 16:14:20 +02:00
parent 249c5de76a
commit 1723b9fa53
32 changed files with 2323 additions and 562 deletions
@@ -0,0 +1,98 @@
+"""ffprobe — infrastructure adapter for extracting MediaInfo from a video file."""
+
+from __future__ import annotations
+
+import json
+import logging
+import subprocess
+from pathlib import Path
+
+from alfred.domain.shared.media_info import AudioTrack, MediaInfo, SubtitleTrack
+
+logger = logging.getLogger(__name__)
+
+_FFPROBE_CMD = [
+    "ffprobe",
+    "-v", "quiet",
+    "-print_format", "json",
+    "-show_streams",
+    "-show_format",
+]
+
+
+def probe(path: Path) -> MediaInfo | None:
+    """
+    Run ffprobe on path and return a MediaInfo.
+
+    Returns None if ffprobe is not available or the file cannot be probed.
+    """
+    try:
+        result = subprocess.run(
+            [*_FFPROBE_CMD, str(path)],
+            capture_output=True,
+            text=True,
+            timeout=30,
+        )
+    except subprocess.TimeoutExpired:
+        logger.warning("ffprobe timed out on %s", path)
+        return None
+
+    if result.returncode != 0:
+        logger.warning("ffprobe failed on %s: %s", path, result.stderr.strip())
+        return None
+
+    try:
+        data = json.loads(result.stdout)
+    except json.JSONDecodeError:
+        logger.warning("ffprobe returned invalid JSON for %s", path)
+        return None
+
+    return _parse(data)
+
+
+def _parse(data: dict) -> MediaInfo:
+    streams = data.get("streams", [])
+    fmt = data.get("format", {})
+
+    info = MediaInfo()
+
+    # Format-level
+    if "duration" in fmt:
+        try:
+            info.duration_seconds = float(fmt["duration"])
+        except ValueError:
+            pass
+    if "bit_rate" in fmt:
+        try:
+            info.bitrate_kbps = int(fmt["bit_rate"]) // 1000
+        except ValueError:
+            pass
+
+    for stream in streams:
+        codec_type = stream.get("codec_type")
+
+        if codec_type == "video" and info.video_codec is None:
+            info.video_codec = stream.get("codec_name")
+            info.width = stream.get("width")
+            info.height = stream.get("height")
+
+        elif codec_type == "audio":
+            info.audio_tracks.append(AudioTrack(
+                index=stream.get("index", len(info.audio_tracks)),
+                codec=stream.get("codec_name"),
+                channels=stream.get("channels"),
+                channel_layout=stream.get("channel_layout"),
+                language=stream.get("tags", {}).get("language"),
+                is_default=stream.get("disposition", {}).get("default", 0) == 1,
+            ))
+
+        elif codec_type == "subtitle":
+            info.subtitle_tracks.append(SubtitleTrack(
+                index=stream.get("index", len(info.subtitle_tracks)),
+                codec=stream.get("codec_name"),
+                language=stream.get("tags", {}).get("language"),
+                is_default=stream.get("disposition", {}).get("default", 0) == 1,
+                is_forced=stream.get("disposition", {}).get("forced", 0) == 1,
+            ))
+
+    return info