feat: release parser, media type detection, ffprobe integration
Replace the old domain/media release parser with a full rewrite under
domain/release/:
- ParsedRelease with media_type ("movie" | "tv_show" | "tv_complete" |
"documentary" | "concert" | "other" | "unknown"), site_tag, parse_path,
languages, audio_codec, audio_channels, bit_depth, hdr_format, edition
- Well-formedness check + sanitize pipeline (_is_well_formed, _sanitize,
_strip_site_tag) before token-level parsing
- Multi-token sequence matching for audio (DTS-HD.MA, TrueHD.Atmos…),
HDR (DV.HDR10…) and editions (DIRECTORS.CUT…)
- Knowledge YAML: file_extensions, release_format, languages, audio,
video, editions, sites/c411
New infrastructure:
- ffprobe.py — single-pass probe returning MediaInfo (video, audio
tracks, subtitle tracks)
- find_video.py — locate first video file in a release folder
New application helpers:
- detect_media_type — filesystem-based type refinement
- enrich_from_probe — fill missing ParsedRelease fields from MediaInfo
New agent tools:
- analyze_release — parse + detect type + ffprobe in one call
- probe_media — standalone ffprobe for a specific file
New domain value object:
- MediaInfo + AudioTrack + SubtitleTrack (domain/shared/media_info.py)
Testing CLIs:
- recognize_folders_in_downloads.py — full pipeline with colored output
- probe_video.py — display MediaInfo for a video file
This commit is contained in:
@@ -0,0 +1,98 @@
|
||||
"""ffprobe — infrastructure adapter for extracting MediaInfo from a video file."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
|
||||
from alfred.domain.shared.media_info import AudioTrack, MediaInfo, SubtitleTrack
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_FFPROBE_CMD = [
|
||||
"ffprobe",
|
||||
"-v", "quiet",
|
||||
"-print_format", "json",
|
||||
"-show_streams",
|
||||
"-show_format",
|
||||
]
|
||||
|
||||
|
||||
def probe(path: Path) -> MediaInfo | None:
|
||||
"""
|
||||
Run ffprobe on path and return a MediaInfo.
|
||||
|
||||
Returns None if ffprobe is not available or the file cannot be probed.
|
||||
"""
|
||||
try:
|
||||
result = subprocess.run(
|
||||
[*_FFPROBE_CMD, str(path)],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=30,
|
||||
)
|
||||
except subprocess.TimeoutExpired:
|
||||
logger.warning("ffprobe timed out on %s", path)
|
||||
return None
|
||||
|
||||
if result.returncode != 0:
|
||||
logger.warning("ffprobe failed on %s: %s", path, result.stderr.strip())
|
||||
return None
|
||||
|
||||
try:
|
||||
data = json.loads(result.stdout)
|
||||
except json.JSONDecodeError:
|
||||
logger.warning("ffprobe returned invalid JSON for %s", path)
|
||||
return None
|
||||
|
||||
return _parse(data)
|
||||
|
||||
|
||||
def _parse(data: dict) -> MediaInfo:
|
||||
streams = data.get("streams", [])
|
||||
fmt = data.get("format", {})
|
||||
|
||||
info = MediaInfo()
|
||||
|
||||
# Format-level
|
||||
if "duration" in fmt:
|
||||
try:
|
||||
info.duration_seconds = float(fmt["duration"])
|
||||
except ValueError:
|
||||
pass
|
||||
if "bit_rate" in fmt:
|
||||
try:
|
||||
info.bitrate_kbps = int(fmt["bit_rate"]) // 1000
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
for stream in streams:
|
||||
codec_type = stream.get("codec_type")
|
||||
|
||||
if codec_type == "video" and info.video_codec is None:
|
||||
info.video_codec = stream.get("codec_name")
|
||||
info.width = stream.get("width")
|
||||
info.height = stream.get("height")
|
||||
|
||||
elif codec_type == "audio":
|
||||
info.audio_tracks.append(AudioTrack(
|
||||
index=stream.get("index", len(info.audio_tracks)),
|
||||
codec=stream.get("codec_name"),
|
||||
channels=stream.get("channels"),
|
||||
channel_layout=stream.get("channel_layout"),
|
||||
language=stream.get("tags", {}).get("language"),
|
||||
is_default=stream.get("disposition", {}).get("default", 0) == 1,
|
||||
))
|
||||
|
||||
elif codec_type == "subtitle":
|
||||
info.subtitle_tracks.append(SubtitleTrack(
|
||||
index=stream.get("index", len(info.subtitle_tracks)),
|
||||
codec=stream.get("codec_name"),
|
||||
language=stream.get("tags", {}).get("language"),
|
||||
is_default=stream.get("disposition", {}).get("default", 0) == 1,
|
||||
is_forced=stream.get("disposition", {}).get("forced", 0) == 1,
|
||||
))
|
||||
|
||||
return info
|
||||
Reference in New Issue
Block a user