feat: release parser, media type detection, ffprobe integration

Replace the old domain/media release parser with a full rewrite under
domain/release/:
- ParsedRelease with media_type ("movie" | "tv_show" | "tv_complete" |
  "documentary" | "concert" | "other" | "unknown"), site_tag, parse_path,
  languages, audio_codec, audio_channels, bit_depth, hdr_format, edition
- Well-formedness check + sanitize pipeline (_is_well_formed, _sanitize,
  _strip_site_tag) before token-level parsing
- Multi-token sequence matching for audio (DTS-HD.MA, TrueHD.Atmos…),
  HDR (DV.HDR10…) and editions (DIRECTORS.CUT…)
- Knowledge YAML: file_extensions, release_format, languages, audio,
  video, editions, sites/c411

New infrastructure:
- ffprobe.py — single-pass probe returning MediaInfo (video, audio
  tracks, subtitle tracks)
- find_video.py — locate first video file in a release folder

New application helpers:
- detect_media_type — filesystem-based type refinement
- enrich_from_probe — fill missing ParsedRelease fields from MediaInfo

New agent tools:
- analyze_release — parse + detect type + ffprobe in one call
- probe_media — standalone ffprobe for a specific file

New domain value object:
- MediaInfo + AudioTrack + SubtitleTrack (domain/shared/media_info.py)

Testing CLIs:
- recognize_folders_in_downloads.py — full pipeline with colored output
- probe_video.py — display MediaInfo for a video file
This commit is contained in:
2026-05-12 16:14:20 +02:00
parent 249c5de76a
commit 1723b9fa53
32 changed files with 2323 additions and 562 deletions
+160
View File
@@ -0,0 +1,160 @@
#!/usr/bin/env python3
"""
probe_video.py — Display MediaInfo extracted by ffprobe for a video file.
Usage:
uv run testing/probe_video.py /path/to/video.mkv
uv run testing/probe_video.py /path/to/video.mkv --no-color
"""
import argparse
import sys
from pathlib import Path
_PROJECT_ROOT = Path(__file__).resolve().parents[1]
if str(_PROJECT_ROOT) not in sys.path:
sys.path.insert(0, str(_PROJECT_ROOT))
# ---------------------------------------------------------------------------
# Colours
# ---------------------------------------------------------------------------
RESET = "\033[0m"
BOLD = "\033[1m"
DIM = "\033[2m"
GREEN = "\033[32m"
YELLOW = "\033[33m"
RED = "\033[31m"
CYAN = "\033[36m"
BLUE = "\033[34m"
USE_COLOR = True
def c(text: str, *codes: str) -> str:
if not USE_COLOR:
return str(text)
return "".join(codes) + str(text) + RESET
def kv(key: str, val: str, indent: int = 4, color: str = CYAN) -> None:
print(f"{' ' * indent}{c(key + ':', BOLD)} {c(val, color)}")
def section(title: str) -> None:
print()
print(f" {c('' + title, BOLD, BLUE)}")
def hr() -> None:
print(c("" * 70, DIM))
# ---------------------------------------------------------------------------
# Formatting helpers
# ---------------------------------------------------------------------------
def fmt_duration(seconds: float) -> str:
h = int(seconds // 3600)
m = int((seconds % 3600) // 60)
s = int(seconds % 60)
if h:
return f"{h}h {m:02d}m {s:02d}s"
return f"{m}m {s:02d}s"
def fmt_channels(channels: int | None, layout: str | None) -> str:
parts = []
if channels is not None:
parts.append(str(channels) + "ch")
if layout:
parts.append(f"({layout})")
return " ".join(parts) if parts else ""
def flag(val: bool) -> str:
return c("yes", GREEN) if val else c("no", DIM)
# ---------------------------------------------------------------------------
# Main
# ---------------------------------------------------------------------------
def main() -> None:
global USE_COLOR
parser = argparse.ArgumentParser(description="Probe a video file with ffprobe")
parser.add_argument("file", help="Path to the video file")
parser.add_argument("--no-color", action="store_true")
args = parser.parse_args()
if args.no_color or not sys.stdout.isatty():
USE_COLOR = False
path = Path(args.file)
if not path.exists():
print(c(f"Error: {path} does not exist", RED), file=sys.stderr)
sys.exit(1)
from alfred.infrastructure.filesystem.ffprobe import probe
info = probe(path)
if info is None:
print(c("Error: ffprobe failed to probe the file", RED), file=sys.stderr)
sys.exit(1)
print()
print(c("" * 70, BOLD))
print(c(f" {path.name}", BOLD, CYAN))
print(c(f" {path}", DIM))
print(c("" * 70, BOLD))
# --- Video ---
section("Video")
kv("codec", info.video_codec or c("", DIM))
kv("resolution", info.resolution or c("", DIM))
if info.width and info.height:
kv("dimensions", f"{info.width} × {info.height}")
if info.duration_seconds is not None:
kv("duration", fmt_duration(info.duration_seconds))
if info.bitrate_kbps is not None:
kv("bitrate", f"{info.bitrate_kbps} kbps")
# --- Audio ---
section(f"Audio {c(str(len(info.audio_tracks)) + ' track(s)', DIM)}")
if not info.audio_tracks:
print(f" {c('no audio tracks found', DIM)}")
for track in info.audio_tracks:
lang = track.language or "und"
default_marker = f" {c('default', GREEN, DIM)}" if track.is_default else ""
print(f" {c(f'[{track.index}]', BOLD)} {c(lang, YELLOW)}{default_marker}")
kv("codec", track.codec or c("", DIM), indent=8)
kv("channels", fmt_channels(track.channels, track.channel_layout), indent=8)
# --- Subtitles ---
section(f"Subtitles {c(str(len(info.subtitle_tracks)) + ' track(s)', DIM)}")
if not info.subtitle_tracks:
print(f" {c('no embedded subtitle tracks', DIM)}")
for track in info.subtitle_tracks:
lang = track.language or "und"
markers = []
if track.is_default:
markers.append(c("default", GREEN, DIM))
if track.is_forced:
markers.append(c("forced", YELLOW, DIM))
marker_str = (" " + " ".join(markers)) if markers else ""
print(f" {c(f'[{track.index}]', BOLD)} {c(lang, YELLOW)}{marker_str}")
kv("codec", track.codec or c("", DIM), indent=8)
# --- Summary ---
print()
hr()
multi = c("yes", GREEN) if info.is_multi_audio else c("no", DIM)
langs = ", ".join(info.audio_languages) if info.audio_languages else c("", DIM)
print(f" {c('multi-audio:', BOLD)} {multi} {c('languages:', BOLD)} {c(langs, CYAN)}")
hr()
print()
if __name__ == "__main__":
main()