feat: release parser, media type detection, ffprobe integration
Replace the old domain/media release parser with a full rewrite under
domain/release/:
- ParsedRelease with media_type ("movie" | "tv_show" | "tv_complete" |
"documentary" | "concert" | "other" | "unknown"), site_tag, parse_path,
languages, audio_codec, audio_channels, bit_depth, hdr_format, edition
- Well-formedness check + sanitize pipeline (_is_well_formed, _sanitize,
_strip_site_tag) before token-level parsing
- Multi-token sequence matching for audio (DTS-HD.MA, TrueHD.Atmos…),
HDR (DV.HDR10…) and editions (DIRECTORS.CUT…)
- Knowledge YAML: file_extensions, release_format, languages, audio,
video, editions, sites/c411
New infrastructure:
- ffprobe.py — single-pass probe returning MediaInfo (video, audio
tracks, subtitle tracks)
- find_video.py — locate first video file in a release folder
New application helpers:
- detect_media_type — filesystem-based type refinement
- enrich_from_probe — fill missing ParsedRelease fields from MediaInfo
New agent tools:
- analyze_release — parse + detect type + ffprobe in one call
- probe_media — standalone ffprobe for a specific file
New domain value object:
- MediaInfo + AudioTrack + SubtitleTrack (domain/shared/media_info.py)
Testing CLIs:
- recognize_folders_in_downloads.py — full pipeline with colored output
- probe_video.py — display MediaInfo for a video file
This commit is contained in:
@@ -0,0 +1,76 @@
|
||||
"""enrich_from_probe — fill missing ParsedRelease fields from MediaInfo."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from alfred.domain.release.value_objects import ParsedRelease
|
||||
from alfred.domain.shared.media_info import MediaInfo
|
||||
|
||||
# Map ffprobe codec names to scene-style codec tokens
|
||||
_VIDEO_CODEC_MAP = {
|
||||
"hevc": "x265",
|
||||
"h264": "x264",
|
||||
"h265": "x265",
|
||||
"av1": "AV1",
|
||||
"vp9": "VP9",
|
||||
"mpeg4": "XviD",
|
||||
}
|
||||
|
||||
# Map ffprobe audio codec names to scene-style tokens
|
||||
_AUDIO_CODEC_MAP = {
|
||||
"eac3": "EAC3",
|
||||
"ac3": "AC3",
|
||||
"dts": "DTS",
|
||||
"truehd": "TrueHD",
|
||||
"aac": "AAC",
|
||||
"flac": "FLAC",
|
||||
"opus": "OPUS",
|
||||
"mp3": "MP3",
|
||||
"pcm_s16l": "PCM",
|
||||
"pcm_s24l": "PCM",
|
||||
}
|
||||
|
||||
# Map channel count to standard layout string
|
||||
_CHANNEL_MAP = {
|
||||
8: "7.1",
|
||||
6: "5.1",
|
||||
2: "2.0",
|
||||
1: "1.0",
|
||||
}
|
||||
|
||||
|
||||
def enrich_from_probe(parsed: ParsedRelease, info: MediaInfo) -> None:
|
||||
"""
|
||||
Fill None fields in parsed using data from ffprobe MediaInfo.
|
||||
|
||||
Only overwrites fields that are currently None — token-level values
|
||||
from the release name always take priority.
|
||||
Mutates parsed in place.
|
||||
"""
|
||||
if parsed.quality is None and info.resolution:
|
||||
parsed.quality = info.resolution
|
||||
|
||||
if parsed.codec is None and info.video_codec:
|
||||
parsed.codec = _VIDEO_CODEC_MAP.get(info.video_codec.lower(), info.video_codec.upper())
|
||||
|
||||
if parsed.bit_depth is None and info.video_codec:
|
||||
# ffprobe exposes bit depth via pix_fmt — not in MediaInfo yet, skip for now
|
||||
pass
|
||||
|
||||
# Audio — use the default track, fallback to first
|
||||
default_track = next((t for t in info.audio_tracks if t.is_default), None)
|
||||
track = default_track or (info.audio_tracks[0] if info.audio_tracks else None)
|
||||
|
||||
if track:
|
||||
if parsed.audio_codec is None and track.codec:
|
||||
parsed.audio_codec = _AUDIO_CODEC_MAP.get(track.codec.lower(), track.codec.upper())
|
||||
|
||||
if parsed.audio_channels is None and track.channels:
|
||||
parsed.audio_channels = _CHANNEL_MAP.get(track.channels, f"{track.channels}ch")
|
||||
|
||||
# Languages — merge ffprobe languages with token-level ones
|
||||
# "und" = undetermined, not useful
|
||||
if info.audio_languages:
|
||||
existing = set(parsed.languages)
|
||||
for lang in info.audio_languages:
|
||||
if lang.lower() != "und" and lang.upper() not in existing:
|
||||
parsed.languages.append(lang)
|
||||
Reference in New Issue
Block a user