refactor(probe): consolidate full probe() into MediaProber port

Add probe(video) -> MediaInfo | None to the MediaProber Protocol and
implement it on FfprobeMediaProber. The standalone
alfred/infrastructure/filesystem/ffprobe.py module is removed; all
callers (analyze_release / probe_media tools, testing scripts) now go
through the adapter.

Tests for the probe path moved to tests/infrastructure/test_ffprobe_prober.py
(patching subprocess.run at the adapter module level).

Unblocks the upcoming inspect_release orchestrator, which needs the
port — not a free function — to compose parse + main-video selection
+ probe in one shot.
This commit is contained in:
2026-05-20 09:11:24 +02:00
parent 5db350a1df
commit c303efea48
10 changed files with 311 additions and 278 deletions
+17
View File
@@ -15,6 +15,23 @@ callers).
## [Unreleased]
### Changed
- **`MediaProber` port now covers full media probing**: added
`probe(video) -> MediaInfo | None` alongside the existing
`list_subtitle_streams`. `FfprobeMediaProber` (in
`alfred/infrastructure/probe/`) implements both methods and is now
the single adapter shelling out to `ffprobe`. The standalone
`alfred/infrastructure/filesystem/ffprobe.py` module was removed —
all callers (tools, testing scripts) instantiate
`FfprobeMediaProber` instead. Unblocks the upcoming
`inspect_release` orchestrator, which depends on the port.
### Removed
- `alfred/infrastructure/filesystem/ffprobe.py` (folded into the
`FfprobeMediaProber` adapter).
---
## [2026-05-20] — Release parser confidence scoring + exclusion
+5 -3
View File
@@ -28,10 +28,12 @@ from alfred.application.filesystem.resolve_destination import (
resolve_series_destination as _resolve_series_destination,
)
from alfred.infrastructure.filesystem import FileManager, create_folder, move
from alfred.infrastructure.filesystem.ffprobe import probe
from alfred.infrastructure.filesystem.find_video import find_video_file
from alfred.infrastructure.metadata import MetadataStore
from alfred.infrastructure.persistence import get_memory
from alfred.infrastructure.probe import FfprobeMediaProber
_PROBER = FfprobeMediaProber()
_LEARNED_ROOT = Path(_alfred_pkg.__file__).parent.parent / "data" / "knowledge"
@@ -201,7 +203,7 @@ def analyze_release(release_name: str, source_path: str) -> dict[str, Any]:
if parsed.media_type not in ("unknown", "other"):
video_file = find_video_file(path, _KB)
if video_file:
media_info = probe(video_file)
media_info = _PROBER.probe(video_file)
if media_info:
enrich_from_probe(parsed, media_info)
probe_used = True
@@ -241,7 +243,7 @@ def probe_media(source_path: str) -> dict[str, Any]:
"message": f"{source_path} does not exist",
}
media_info = probe(path)
media_info = _PROBER.probe(path)
if media_info is None:
return {
"status": "error",
+14 -1
View File
@@ -9,7 +9,10 @@ from __future__ import annotations
from dataclasses import dataclass
from pathlib import Path
from typing import Protocol
from typing import TYPE_CHECKING, Protocol
if TYPE_CHECKING:
from alfred.domain.shared.media import MediaInfo
@dataclass(frozen=True)
@@ -37,3 +40,13 @@ class MediaProber(Protocol):
no subtitle streams. Adapters must not raise.
"""
...
def probe(self, video: Path) -> MediaInfo | None:
"""Return the full :class:`MediaInfo` for ``video``, or ``None``.
Covers all stream families (video, audio, subtitle) plus
file-level duration / bitrate. ``None`` signals that ffprobe is
unavailable or the file can't be read — adapters must not
raise.
"""
...
-121
View File
@@ -1,121 +0,0 @@
"""ffprobe — infrastructure adapter for extracting MediaInfo from a video file."""
from __future__ import annotations
import json
import logging
import subprocess
from pathlib import Path
from alfred.domain.shared.media import AudioTrack, MediaInfo, SubtitleTrack, VideoTrack
logger = logging.getLogger(__name__)
_FFPROBE_CMD = [
"ffprobe",
"-v",
"quiet",
"-print_format",
"json",
"-show_streams",
"-show_format",
]
def probe(path: Path) -> MediaInfo | None:
"""
Run ffprobe on path and return a MediaInfo.
Returns None if ffprobe is not available or the file cannot be probed.
"""
try:
result = subprocess.run(
[*_FFPROBE_CMD, str(path)],
capture_output=True,
text=True,
timeout=30,
check=False,
)
except subprocess.TimeoutExpired:
logger.warning("ffprobe timed out on %s", path)
return None
if result.returncode != 0:
logger.warning("ffprobe failed on %s: %s", path, result.stderr.strip())
return None
try:
data = json.loads(result.stdout)
except json.JSONDecodeError:
logger.warning("ffprobe returned invalid JSON for %s", path)
return None
return _parse(data)
def _parse(data: dict) -> MediaInfo:
streams = data.get("streams", [])
fmt = data.get("format", {})
# File-level duration/bitrate (ffprobe ``format`` block — independent of streams)
duration_seconds: float | None = None
bitrate_kbps: int | None = None
if "duration" in fmt:
try:
duration_seconds = float(fmt["duration"])
except ValueError:
pass
if "bit_rate" in fmt:
try:
bitrate_kbps = int(fmt["bit_rate"]) // 1000
except ValueError:
pass
video_tracks: list[VideoTrack] = []
audio_tracks: list[AudioTrack] = []
subtitle_tracks: list[SubtitleTrack] = []
for stream in streams:
codec_type = stream.get("codec_type")
if codec_type == "video":
video_tracks.append(
VideoTrack(
index=stream.get("index", len(video_tracks)),
codec=stream.get("codec_name"),
width=stream.get("width"),
height=stream.get("height"),
is_default=stream.get("disposition", {}).get("default", 0) == 1,
)
)
elif codec_type == "audio":
audio_tracks.append(
AudioTrack(
index=stream.get("index", len(audio_tracks)),
codec=stream.get("codec_name"),
channels=stream.get("channels"),
channel_layout=stream.get("channel_layout"),
language=stream.get("tags", {}).get("language"),
is_default=stream.get("disposition", {}).get("default", 0) == 1,
)
)
elif codec_type == "subtitle":
subtitle_tracks.append(
SubtitleTrack(
index=stream.get("index", len(subtitle_tracks)),
codec=stream.get("codec_name"),
language=stream.get("tags", {}).get("language"),
is_default=stream.get("disposition", {}).get("default", 0) == 1,
is_forced=stream.get("disposition", {}).get("forced", 0) == 1,
)
)
return MediaInfo(
video_tracks=tuple(video_tracks),
audio_tracks=tuple(audio_tracks),
subtitle_tracks=tuple(subtitle_tracks),
duration_seconds=duration_seconds,
bitrate_kbps=bitrate_kbps,
)
@@ -7,12 +7,23 @@ import logging
import subprocess
from pathlib import Path
from alfred.domain.shared.media import AudioTrack, MediaInfo, SubtitleTrack, VideoTrack
from alfred.domain.shared.ports import SubtitleStreamInfo
logger = logging.getLogger(__name__)
_FFPROBE_TIMEOUT_SECONDS = 30
_FFPROBE_FULL_CMD = [
"ffprobe",
"-v",
"quiet",
"-print_format",
"json",
"-show_streams",
"-show_format",
]
class FfprobeMediaProber:
"""Inspect media files by shelling out to ``ffprobe``.
@@ -63,3 +74,101 @@ class FfprobeMediaProber:
)
)
return streams
def probe(self, video: Path) -> MediaInfo | None:
"""Run ffprobe on ``video`` and return a :class:`MediaInfo`.
Returns ``None`` when ffprobe is not available, times out, or
the file cannot be parsed. Never raises.
"""
try:
result = subprocess.run(
[*_FFPROBE_FULL_CMD, str(video)],
capture_output=True,
text=True,
timeout=_FFPROBE_TIMEOUT_SECONDS,
check=False,
)
except (subprocess.TimeoutExpired, FileNotFoundError) as e:
logger.warning("ffprobe failed on %s: %s", video, e)
return None
if result.returncode != 0:
logger.warning("ffprobe failed on %s: %s", video, result.stderr.strip())
return None
try:
data = json.loads(result.stdout)
except json.JSONDecodeError:
logger.warning("ffprobe returned invalid JSON for %s", video)
return None
return _parse_media_info(data)
def _parse_media_info(data: dict) -> MediaInfo:
"""Translate raw ffprobe JSON into a :class:`MediaInfo` snapshot."""
streams = data.get("streams", [])
fmt = data.get("format", {})
duration_seconds: float | None = None
bitrate_kbps: int | None = None
if "duration" in fmt:
try:
duration_seconds = float(fmt["duration"])
except ValueError:
pass
if "bit_rate" in fmt:
try:
bitrate_kbps = int(fmt["bit_rate"]) // 1000
except ValueError:
pass
video_tracks: list[VideoTrack] = []
audio_tracks: list[AudioTrack] = []
subtitle_tracks: list[SubtitleTrack] = []
for stream in streams:
codec_type = stream.get("codec_type")
if codec_type == "video":
video_tracks.append(
VideoTrack(
index=stream.get("index", len(video_tracks)),
codec=stream.get("codec_name"),
width=stream.get("width"),
height=stream.get("height"),
is_default=stream.get("disposition", {}).get("default", 0) == 1,
)
)
elif codec_type == "audio":
audio_tracks.append(
AudioTrack(
index=stream.get("index", len(audio_tracks)),
codec=stream.get("codec_name"),
channels=stream.get("channels"),
channel_layout=stream.get("channel_layout"),
language=stream.get("tags", {}).get("language"),
is_default=stream.get("disposition", {}).get("default", 0) == 1,
)
)
elif codec_type == "subtitle":
subtitle_tracks.append(
SubtitleTrack(
index=stream.get("index", len(subtitle_tracks)),
codec=stream.get("codec_name"),
language=stream.get("tags", {}).get("language"),
is_default=stream.get("disposition", {}).get("default", 0) == 1,
is_forced=stream.get("disposition", {}).get("forced", 0) == 1,
)
)
return MediaInfo(
video_tracks=tuple(video_tracks),
audio_tracks=tuple(audio_tracks),
subtitle_tracks=tuple(subtitle_tracks),
duration_seconds=duration_seconds,
bitrate_kbps=bitrate_kbps,
)
+2 -2
View File
@@ -88,13 +88,13 @@ def analyze(release_name: str, source_path: str | None = None) -> None:
if not path.exists():
print(" (chemin inexistant, probe skipped)")
else:
from alfred.infrastructure.filesystem.ffprobe import probe
from alfred.infrastructure.filesystem.find_video import find_video_file
from alfred.infrastructure.probe import FfprobeMediaProber
video = find_video_file(path) if path.is_dir() else path
if video:
print(f" video file: {video.name}")
info = probe(video)
info = FfprobeMediaProber().probe(video)
if info:
print(f" codec: {info.video_codec}")
print(f" resolution: {info.resolution}")
+2 -2
View File
@@ -98,9 +98,9 @@ def main() -> None:
print(c(f"Error: {path} does not exist", RED), file=sys.stderr)
sys.exit(1)
from alfred.infrastructure.filesystem.ffprobe import probe
from alfred.infrastructure.probe import FfprobeMediaProber
info = probe(path)
info = FfprobeMediaProber().probe(path)
if info is None:
print(c("Error: ffprobe failed to probe the file", RED), file=sys.stderr)
sys.exit(1)
+4 -2
View File
@@ -103,8 +103,10 @@ def main() -> None:
from alfred.application.filesystem.detect_media_type import detect_media_type
from alfred.application.filesystem.enrich_from_probe import enrich_from_probe
from alfred.domain.release.services import parse_release
from alfred.infrastructure.filesystem.ffprobe import probe
from alfred.infrastructure.filesystem.find_video import find_video_file
from alfred.infrastructure.probe import FfprobeMediaProber
_prober = FfprobeMediaProber()
entries = sorted(downloads.iterdir(), key=lambda p: p.name.lower())
total = len(entries)
@@ -126,7 +128,7 @@ def main() -> None:
if p.media_type not in ("unknown", "other"):
video_file = find_video_file(entry)
if video_file:
media_info = probe(video_file)
media_info = _prober.probe(video_file)
if media_info:
enrich_from_probe(p, media_info)
warnings = _assess(p)
+155
View File
@@ -0,0 +1,155 @@
"""Tests for :class:`FfprobeMediaProber`.
Covers the full-probe path (``probe()`` returning a ``MediaInfo``) by
patching ``subprocess.run`` at the adapter module level. The
subtitle-streams path is exercised by the subtitle domain tests via
the same adapter.
"""
from __future__ import annotations
import json
import subprocess
from unittest.mock import MagicMock, patch
from alfred.infrastructure.probe import FfprobeMediaProber
_PROBER = FfprobeMediaProber()
_PATCH_TARGET = "alfred.infrastructure.probe.ffprobe_prober.subprocess.run"
def _ffprobe_result(returncode=0, stdout="{}", stderr="") -> MagicMock:
return MagicMock(returncode=returncode, stdout=stdout, stderr=stderr)
class TestProbe:
def test_timeout_returns_none(self, tmp_path):
f = tmp_path / "x.mkv"
f.write_bytes(b"")
with patch(
_PATCH_TARGET,
side_effect=subprocess.TimeoutExpired(cmd="ffprobe", timeout=30),
):
assert _PROBER.probe(f) is None
def test_nonzero_returncode_returns_none(self, tmp_path):
f = tmp_path / "x.mkv"
f.write_bytes(b"")
with patch(
_PATCH_TARGET,
return_value=_ffprobe_result(returncode=1, stderr="not a media file"),
):
assert _PROBER.probe(f) is None
def test_invalid_json_returns_none(self, tmp_path):
f = tmp_path / "x.mkv"
f.write_bytes(b"")
with patch(
_PATCH_TARGET,
return_value=_ffprobe_result(stdout="not json {"),
):
assert _PROBER.probe(f) is None
def test_parses_format_duration_and_bitrate(self, tmp_path):
f = tmp_path / "x.mkv"
f.write_bytes(b"")
payload = {
"format": {"duration": "1234.5", "bit_rate": "5000000"},
"streams": [],
}
with patch(
_PATCH_TARGET,
return_value=_ffprobe_result(stdout=json.dumps(payload)),
):
info = _PROBER.probe(f)
assert info is not None
assert info.duration_seconds == 1234.5
assert info.bitrate_kbps == 5000 # bit_rate // 1000
def test_invalid_numeric_format_fields_skipped(self, tmp_path):
f = tmp_path / "x.mkv"
f.write_bytes(b"")
payload = {
"format": {"duration": "garbage", "bit_rate": "also-bad"},
"streams": [],
}
with patch(
_PATCH_TARGET,
return_value=_ffprobe_result(stdout=json.dumps(payload)),
):
info = _PROBER.probe(f)
assert info is not None
assert info.duration_seconds is None
assert info.bitrate_kbps is None
def test_parses_streams(self, tmp_path):
f = tmp_path / "x.mkv"
f.write_bytes(b"")
payload = {
"format": {},
"streams": [
{
"index": 0,
"codec_type": "video",
"codec_name": "h264",
"width": 1920,
"height": 1080,
},
{
"index": 1,
"codec_type": "audio",
"codec_name": "ac3",
"channels": 6,
"channel_layout": "5.1",
"tags": {"language": "eng"},
"disposition": {"default": 1},
},
{
"index": 2,
"codec_type": "audio",
"codec_name": "aac",
"channels": 2,
"tags": {"language": "fra"},
},
{
"index": 3,
"codec_type": "subtitle",
"codec_name": "subrip",
"tags": {"language": "fra"},
"disposition": {"forced": 1},
},
],
}
with patch(
_PATCH_TARGET,
return_value=_ffprobe_result(stdout=json.dumps(payload)),
):
info = _PROBER.probe(f)
assert info.video_codec == "h264"
assert info.width == 1920 and info.height == 1080
assert len(info.audio_tracks) == 2
eng = info.audio_tracks[0]
assert eng.language == "eng"
assert eng.is_default is True
assert info.audio_tracks[1].is_default is False
assert len(info.subtitle_tracks) == 1
assert info.subtitle_tracks[0].is_forced is True
def test_first_video_stream_wins(self, tmp_path):
# The implementation only fills video_codec on the FIRST video stream.
f = tmp_path / "x.mkv"
f.write_bytes(b"")
payload = {
"format": {},
"streams": [
{"codec_type": "video", "codec_name": "h264", "width": 1920},
{"codec_type": "video", "codec_name": "hevc", "width": 3840},
],
}
with patch(
_PATCH_TARGET,
return_value=_ffprobe_result(stdout=json.dumps(payload)),
):
info = _PROBER.probe(f)
assert info.video_codec == "h264"
assert info.width == 1920
+3 -147
View File
@@ -1,21 +1,19 @@
"""Tests for the smaller ``alfred.infrastructure.filesystem`` helpers.
Covers four siblings of ``FileManager`` that had near-zero coverage:
Covers three siblings of ``FileManager`` that had near-zero coverage:
- ``ffprobe.probe`` — wraps ``ffprobe`` JSON output into a ``MediaInfo``.
- ``filesystem_operations.create_folder`` / ``move`` — thin
``mkdir`` / ``mv`` wrappers returning dict-shaped responses.
- ``organizer.MediaOrganizer`` — computes destination paths for movies
and TV episodes; creates folders for them.
- ``find_video.find_video_file`` — first-video lookup in a folder.
External commands (``ffprobe`` / ``mv``) are patched via ``subprocess.run``.
(``ffprobe`` coverage now lives in ``test_ffprobe_prober.py`` alongside
its adapter.)
"""
from __future__ import annotations
import json
import subprocess
from unittest.mock import MagicMock, patch
from alfred.domain.movies.entities import Movie
@@ -27,7 +25,6 @@ from alfred.domain.tv_shows.value_objects import (
SeasonNumber,
ShowStatus,
)
from alfred.infrastructure.filesystem import ffprobe
from alfred.infrastructure.filesystem.filesystem_operations import (
create_folder,
move,
@@ -38,147 +35,6 @@ from alfred.infrastructure.knowledge.release_kb import YamlReleaseKnowledge
_KB = YamlReleaseKnowledge()
# --------------------------------------------------------------------------- #
# ffprobe.probe #
# --------------------------------------------------------------------------- #
def _ffprobe_result(returncode=0, stdout="{}", stderr="") -> MagicMock:
return MagicMock(returncode=returncode, stdout=stdout, stderr=stderr)
class TestFfprobe:
def test_timeout_returns_none(self, tmp_path):
f = tmp_path / "x.mkv"
f.write_bytes(b"")
with patch(
"alfred.infrastructure.filesystem.ffprobe.subprocess.run",
side_effect=subprocess.TimeoutExpired(cmd="ffprobe", timeout=30),
):
assert ffprobe.probe(f) is None
def test_nonzero_returncode_returns_none(self, tmp_path):
f = tmp_path / "x.mkv"
f.write_bytes(b"")
with patch(
"alfred.infrastructure.filesystem.ffprobe.subprocess.run",
return_value=_ffprobe_result(returncode=1, stderr="not a media file"),
):
assert ffprobe.probe(f) is None
def test_invalid_json_returns_none(self, tmp_path):
f = tmp_path / "x.mkv"
f.write_bytes(b"")
with patch(
"alfred.infrastructure.filesystem.ffprobe.subprocess.run",
return_value=_ffprobe_result(stdout="not json {"),
):
assert ffprobe.probe(f) is None
def test_parses_format_duration_and_bitrate(self, tmp_path):
f = tmp_path / "x.mkv"
f.write_bytes(b"")
payload = {
"format": {"duration": "1234.5", "bit_rate": "5000000"},
"streams": [],
}
with patch(
"alfred.infrastructure.filesystem.ffprobe.subprocess.run",
return_value=_ffprobe_result(stdout=json.dumps(payload)),
):
info = ffprobe.probe(f)
assert info is not None
assert info.duration_seconds == 1234.5
assert info.bitrate_kbps == 5000 # bit_rate // 1000
def test_invalid_numeric_format_fields_skipped(self, tmp_path):
f = tmp_path / "x.mkv"
f.write_bytes(b"")
payload = {
"format": {"duration": "garbage", "bit_rate": "also-bad"},
"streams": [],
}
with patch(
"alfred.infrastructure.filesystem.ffprobe.subprocess.run",
return_value=_ffprobe_result(stdout=json.dumps(payload)),
):
info = ffprobe.probe(f)
assert info is not None
assert info.duration_seconds is None
assert info.bitrate_kbps is None
def test_parses_streams(self, tmp_path):
f = tmp_path / "x.mkv"
f.write_bytes(b"")
payload = {
"format": {},
"streams": [
{
"index": 0,
"codec_type": "video",
"codec_name": "h264",
"width": 1920,
"height": 1080,
},
{
"index": 1,
"codec_type": "audio",
"codec_name": "ac3",
"channels": 6,
"channel_layout": "5.1",
"tags": {"language": "eng"},
"disposition": {"default": 1},
},
{
"index": 2,
"codec_type": "audio",
"codec_name": "aac",
"channels": 2,
"tags": {"language": "fra"},
},
{
"index": 3,
"codec_type": "subtitle",
"codec_name": "subrip",
"tags": {"language": "fra"},
"disposition": {"forced": 1},
},
],
}
with patch(
"alfred.infrastructure.filesystem.ffprobe.subprocess.run",
return_value=_ffprobe_result(stdout=json.dumps(payload)),
):
info = ffprobe.probe(f)
assert info.video_codec == "h264"
assert info.width == 1920 and info.height == 1080
assert len(info.audio_tracks) == 2
eng = info.audio_tracks[0]
assert eng.language == "eng"
assert eng.is_default is True
assert info.audio_tracks[1].is_default is False
assert len(info.subtitle_tracks) == 1
assert info.subtitle_tracks[0].is_forced is True
def test_first_video_stream_wins(self, tmp_path):
# The implementation only fills video_codec on the FIRST video stream.
f = tmp_path / "x.mkv"
f.write_bytes(b"")
payload = {
"format": {},
"streams": [
{"codec_type": "video", "codec_name": "h264", "width": 1920},
{"codec_type": "video", "codec_name": "hevc", "width": 3840},
],
}
with patch(
"alfred.infrastructure.filesystem.ffprobe.subprocess.run",
return_value=_ffprobe_result(stdout=json.dumps(payload)),
):
info = ffprobe.probe(f)
assert info.video_codec == "h264"
assert info.width == 1920
# --------------------------------------------------------------------------- #
# filesystem_operations #