diff --git a/CHANGELOG.md b/CHANGELOG.md index d46a86a..db99a90 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,23 @@ callers). ## [Unreleased] +### Changed + +- **`MediaProber` port now covers full media probing**: added + `probe(video) -> MediaInfo | None` alongside the existing + `list_subtitle_streams`. `FfprobeMediaProber` (in + `alfred/infrastructure/probe/`) implements both methods and is now + the single adapter shelling out to `ffprobe`. The standalone + `alfred/infrastructure/filesystem/ffprobe.py` module was removed — + all callers (tools, testing scripts) instantiate + `FfprobeMediaProber` instead. Unblocks the upcoming + `inspect_release` orchestrator, which depends on the port. + +### Removed + +- `alfred/infrastructure/filesystem/ffprobe.py` (folded into the + `FfprobeMediaProber` adapter). + --- ## [2026-05-20] — Release parser confidence scoring + exclusion diff --git a/alfred/agent/tools/filesystem.py b/alfred/agent/tools/filesystem.py index 3f73f17..fc50a6a 100644 --- a/alfred/agent/tools/filesystem.py +++ b/alfred/agent/tools/filesystem.py @@ -28,10 +28,12 @@ from alfred.application.filesystem.resolve_destination import ( resolve_series_destination as _resolve_series_destination, ) from alfred.infrastructure.filesystem import FileManager, create_folder, move -from alfred.infrastructure.filesystem.ffprobe import probe from alfred.infrastructure.filesystem.find_video import find_video_file from alfred.infrastructure.metadata import MetadataStore from alfred.infrastructure.persistence import get_memory +from alfred.infrastructure.probe import FfprobeMediaProber + +_PROBER = FfprobeMediaProber() _LEARNED_ROOT = Path(_alfred_pkg.__file__).parent.parent / "data" / "knowledge" @@ -201,7 +203,7 @@ def analyze_release(release_name: str, source_path: str) -> dict[str, Any]: if parsed.media_type not in ("unknown", "other"): video_file = find_video_file(path, _KB) if video_file: - media_info = probe(video_file) + media_info = _PROBER.probe(video_file) if media_info: enrich_from_probe(parsed, media_info) probe_used = True @@ -241,7 +243,7 @@ def probe_media(source_path: str) -> dict[str, Any]: "message": f"{source_path} does not exist", } - media_info = probe(path) + media_info = _PROBER.probe(path) if media_info is None: return { "status": "error", diff --git a/alfred/domain/shared/ports/media_prober.py b/alfred/domain/shared/ports/media_prober.py index d06df09..8dca6c2 100644 --- a/alfred/domain/shared/ports/media_prober.py +++ b/alfred/domain/shared/ports/media_prober.py @@ -9,7 +9,10 @@ from __future__ import annotations from dataclasses import dataclass from pathlib import Path -from typing import Protocol +from typing import TYPE_CHECKING, Protocol + +if TYPE_CHECKING: + from alfred.domain.shared.media import MediaInfo @dataclass(frozen=True) @@ -37,3 +40,13 @@ class MediaProber(Protocol): no subtitle streams. Adapters must not raise. """ ... + + def probe(self, video: Path) -> MediaInfo | None: + """Return the full :class:`MediaInfo` for ``video``, or ``None``. + + Covers all stream families (video, audio, subtitle) plus + file-level duration / bitrate. ``None`` signals that ffprobe is + unavailable or the file can't be read — adapters must not + raise. + """ + ... diff --git a/alfred/infrastructure/filesystem/ffprobe.py b/alfred/infrastructure/filesystem/ffprobe.py deleted file mode 100644 index df12878..0000000 --- a/alfred/infrastructure/filesystem/ffprobe.py +++ /dev/null @@ -1,121 +0,0 @@ -"""ffprobe — infrastructure adapter for extracting MediaInfo from a video file.""" - -from __future__ import annotations - -import json -import logging -import subprocess -from pathlib import Path - -from alfred.domain.shared.media import AudioTrack, MediaInfo, SubtitleTrack, VideoTrack - -logger = logging.getLogger(__name__) - -_FFPROBE_CMD = [ - "ffprobe", - "-v", - "quiet", - "-print_format", - "json", - "-show_streams", - "-show_format", -] - - -def probe(path: Path) -> MediaInfo | None: - """ - Run ffprobe on path and return a MediaInfo. - - Returns None if ffprobe is not available or the file cannot be probed. - """ - try: - result = subprocess.run( - [*_FFPROBE_CMD, str(path)], - capture_output=True, - text=True, - timeout=30, - check=False, - ) - except subprocess.TimeoutExpired: - logger.warning("ffprobe timed out on %s", path) - return None - - if result.returncode != 0: - logger.warning("ffprobe failed on %s: %s", path, result.stderr.strip()) - return None - - try: - data = json.loads(result.stdout) - except json.JSONDecodeError: - logger.warning("ffprobe returned invalid JSON for %s", path) - return None - - return _parse(data) - - -def _parse(data: dict) -> MediaInfo: - streams = data.get("streams", []) - fmt = data.get("format", {}) - - # File-level duration/bitrate (ffprobe ``format`` block — independent of streams) - duration_seconds: float | None = None - bitrate_kbps: int | None = None - if "duration" in fmt: - try: - duration_seconds = float(fmt["duration"]) - except ValueError: - pass - if "bit_rate" in fmt: - try: - bitrate_kbps = int(fmt["bit_rate"]) // 1000 - except ValueError: - pass - - video_tracks: list[VideoTrack] = [] - audio_tracks: list[AudioTrack] = [] - subtitle_tracks: list[SubtitleTrack] = [] - - for stream in streams: - codec_type = stream.get("codec_type") - - if codec_type == "video": - video_tracks.append( - VideoTrack( - index=stream.get("index", len(video_tracks)), - codec=stream.get("codec_name"), - width=stream.get("width"), - height=stream.get("height"), - is_default=stream.get("disposition", {}).get("default", 0) == 1, - ) - ) - - elif codec_type == "audio": - audio_tracks.append( - AudioTrack( - index=stream.get("index", len(audio_tracks)), - codec=stream.get("codec_name"), - channels=stream.get("channels"), - channel_layout=stream.get("channel_layout"), - language=stream.get("tags", {}).get("language"), - is_default=stream.get("disposition", {}).get("default", 0) == 1, - ) - ) - - elif codec_type == "subtitle": - subtitle_tracks.append( - SubtitleTrack( - index=stream.get("index", len(subtitle_tracks)), - codec=stream.get("codec_name"), - language=stream.get("tags", {}).get("language"), - is_default=stream.get("disposition", {}).get("default", 0) == 1, - is_forced=stream.get("disposition", {}).get("forced", 0) == 1, - ) - ) - - return MediaInfo( - video_tracks=tuple(video_tracks), - audio_tracks=tuple(audio_tracks), - subtitle_tracks=tuple(subtitle_tracks), - duration_seconds=duration_seconds, - bitrate_kbps=bitrate_kbps, - ) diff --git a/alfred/infrastructure/probe/ffprobe_prober.py b/alfred/infrastructure/probe/ffprobe_prober.py index 5ae4017..a8cea9b 100644 --- a/alfred/infrastructure/probe/ffprobe_prober.py +++ b/alfred/infrastructure/probe/ffprobe_prober.py @@ -7,12 +7,23 @@ import logging import subprocess from pathlib import Path +from alfred.domain.shared.media import AudioTrack, MediaInfo, SubtitleTrack, VideoTrack from alfred.domain.shared.ports import SubtitleStreamInfo logger = logging.getLogger(__name__) _FFPROBE_TIMEOUT_SECONDS = 30 +_FFPROBE_FULL_CMD = [ + "ffprobe", + "-v", + "quiet", + "-print_format", + "json", + "-show_streams", + "-show_format", +] + class FfprobeMediaProber: """Inspect media files by shelling out to ``ffprobe``. @@ -63,3 +74,101 @@ class FfprobeMediaProber: ) ) return streams + + def probe(self, video: Path) -> MediaInfo | None: + """Run ffprobe on ``video`` and return a :class:`MediaInfo`. + + Returns ``None`` when ffprobe is not available, times out, or + the file cannot be parsed. Never raises. + """ + try: + result = subprocess.run( + [*_FFPROBE_FULL_CMD, str(video)], + capture_output=True, + text=True, + timeout=_FFPROBE_TIMEOUT_SECONDS, + check=False, + ) + except (subprocess.TimeoutExpired, FileNotFoundError) as e: + logger.warning("ffprobe failed on %s: %s", video, e) + return None + + if result.returncode != 0: + logger.warning("ffprobe failed on %s: %s", video, result.stderr.strip()) + return None + + try: + data = json.loads(result.stdout) + except json.JSONDecodeError: + logger.warning("ffprobe returned invalid JSON for %s", video) + return None + + return _parse_media_info(data) + + +def _parse_media_info(data: dict) -> MediaInfo: + """Translate raw ffprobe JSON into a :class:`MediaInfo` snapshot.""" + streams = data.get("streams", []) + fmt = data.get("format", {}) + + duration_seconds: float | None = None + bitrate_kbps: int | None = None + if "duration" in fmt: + try: + duration_seconds = float(fmt["duration"]) + except ValueError: + pass + if "bit_rate" in fmt: + try: + bitrate_kbps = int(fmt["bit_rate"]) // 1000 + except ValueError: + pass + + video_tracks: list[VideoTrack] = [] + audio_tracks: list[AudioTrack] = [] + subtitle_tracks: list[SubtitleTrack] = [] + + for stream in streams: + codec_type = stream.get("codec_type") + + if codec_type == "video": + video_tracks.append( + VideoTrack( + index=stream.get("index", len(video_tracks)), + codec=stream.get("codec_name"), + width=stream.get("width"), + height=stream.get("height"), + is_default=stream.get("disposition", {}).get("default", 0) == 1, + ) + ) + + elif codec_type == "audio": + audio_tracks.append( + AudioTrack( + index=stream.get("index", len(audio_tracks)), + codec=stream.get("codec_name"), + channels=stream.get("channels"), + channel_layout=stream.get("channel_layout"), + language=stream.get("tags", {}).get("language"), + is_default=stream.get("disposition", {}).get("default", 0) == 1, + ) + ) + + elif codec_type == "subtitle": + subtitle_tracks.append( + SubtitleTrack( + index=stream.get("index", len(subtitle_tracks)), + codec=stream.get("codec_name"), + language=stream.get("tags", {}).get("language"), + is_default=stream.get("disposition", {}).get("default", 0) == 1, + is_forced=stream.get("disposition", {}).get("forced", 0) == 1, + ) + ) + + return MediaInfo( + video_tracks=tuple(video_tracks), + audio_tracks=tuple(audio_tracks), + subtitle_tracks=tuple(subtitle_tracks), + duration_seconds=duration_seconds, + bitrate_kbps=bitrate_kbps, + ) diff --git a/testing/debug_release.py b/testing/debug_release.py index fa28591..e639b4a 100644 --- a/testing/debug_release.py +++ b/testing/debug_release.py @@ -88,13 +88,13 @@ def analyze(release_name: str, source_path: str | None = None) -> None: if not path.exists(): print(" (chemin inexistant, probe skipped)") else: - from alfred.infrastructure.filesystem.ffprobe import probe from alfred.infrastructure.filesystem.find_video import find_video_file + from alfred.infrastructure.probe import FfprobeMediaProber video = find_video_file(path) if path.is_dir() else path if video: print(f" video file: {video.name}") - info = probe(video) + info = FfprobeMediaProber().probe(video) if info: print(f" codec: {info.video_codec}") print(f" resolution: {info.resolution}") diff --git a/testing/probe_video.py b/testing/probe_video.py index 078a6bd..6dd84e2 100644 --- a/testing/probe_video.py +++ b/testing/probe_video.py @@ -98,9 +98,9 @@ def main() -> None: print(c(f"Error: {path} does not exist", RED), file=sys.stderr) sys.exit(1) - from alfred.infrastructure.filesystem.ffprobe import probe + from alfred.infrastructure.probe import FfprobeMediaProber - info = probe(path) + info = FfprobeMediaProber().probe(path) if info is None: print(c("Error: ffprobe failed to probe the file", RED), file=sys.stderr) sys.exit(1) diff --git a/testing/recognize_folders_in_downloads.py b/testing/recognize_folders_in_downloads.py index 498716d..ac3e4f4 100644 --- a/testing/recognize_folders_in_downloads.py +++ b/testing/recognize_folders_in_downloads.py @@ -103,8 +103,10 @@ def main() -> None: from alfred.application.filesystem.detect_media_type import detect_media_type from alfred.application.filesystem.enrich_from_probe import enrich_from_probe from alfred.domain.release.services import parse_release - from alfred.infrastructure.filesystem.ffprobe import probe from alfred.infrastructure.filesystem.find_video import find_video_file + from alfred.infrastructure.probe import FfprobeMediaProber + + _prober = FfprobeMediaProber() entries = sorted(downloads.iterdir(), key=lambda p: p.name.lower()) total = len(entries) @@ -126,7 +128,7 @@ def main() -> None: if p.media_type not in ("unknown", "other"): video_file = find_video_file(entry) if video_file: - media_info = probe(video_file) + media_info = _prober.probe(video_file) if media_info: enrich_from_probe(p, media_info) warnings = _assess(p) diff --git a/tests/infrastructure/test_ffprobe_prober.py b/tests/infrastructure/test_ffprobe_prober.py new file mode 100644 index 0000000..c791208 --- /dev/null +++ b/tests/infrastructure/test_ffprobe_prober.py @@ -0,0 +1,155 @@ +"""Tests for :class:`FfprobeMediaProber`. + +Covers the full-probe path (``probe()`` returning a ``MediaInfo``) by +patching ``subprocess.run`` at the adapter module level. The +subtitle-streams path is exercised by the subtitle domain tests via +the same adapter. +""" + +from __future__ import annotations + +import json +import subprocess +from unittest.mock import MagicMock, patch + +from alfred.infrastructure.probe import FfprobeMediaProber + +_PROBER = FfprobeMediaProber() +_PATCH_TARGET = "alfred.infrastructure.probe.ffprobe_prober.subprocess.run" + + +def _ffprobe_result(returncode=0, stdout="{}", stderr="") -> MagicMock: + return MagicMock(returncode=returncode, stdout=stdout, stderr=stderr) + + +class TestProbe: + def test_timeout_returns_none(self, tmp_path): + f = tmp_path / "x.mkv" + f.write_bytes(b"") + with patch( + _PATCH_TARGET, + side_effect=subprocess.TimeoutExpired(cmd="ffprobe", timeout=30), + ): + assert _PROBER.probe(f) is None + + def test_nonzero_returncode_returns_none(self, tmp_path): + f = tmp_path / "x.mkv" + f.write_bytes(b"") + with patch( + _PATCH_TARGET, + return_value=_ffprobe_result(returncode=1, stderr="not a media file"), + ): + assert _PROBER.probe(f) is None + + def test_invalid_json_returns_none(self, tmp_path): + f = tmp_path / "x.mkv" + f.write_bytes(b"") + with patch( + _PATCH_TARGET, + return_value=_ffprobe_result(stdout="not json {"), + ): + assert _PROBER.probe(f) is None + + def test_parses_format_duration_and_bitrate(self, tmp_path): + f = tmp_path / "x.mkv" + f.write_bytes(b"") + payload = { + "format": {"duration": "1234.5", "bit_rate": "5000000"}, + "streams": [], + } + with patch( + _PATCH_TARGET, + return_value=_ffprobe_result(stdout=json.dumps(payload)), + ): + info = _PROBER.probe(f) + assert info is not None + assert info.duration_seconds == 1234.5 + assert info.bitrate_kbps == 5000 # bit_rate // 1000 + + def test_invalid_numeric_format_fields_skipped(self, tmp_path): + f = tmp_path / "x.mkv" + f.write_bytes(b"") + payload = { + "format": {"duration": "garbage", "bit_rate": "also-bad"}, + "streams": [], + } + with patch( + _PATCH_TARGET, + return_value=_ffprobe_result(stdout=json.dumps(payload)), + ): + info = _PROBER.probe(f) + assert info is not None + assert info.duration_seconds is None + assert info.bitrate_kbps is None + + def test_parses_streams(self, tmp_path): + f = tmp_path / "x.mkv" + f.write_bytes(b"") + payload = { + "format": {}, + "streams": [ + { + "index": 0, + "codec_type": "video", + "codec_name": "h264", + "width": 1920, + "height": 1080, + }, + { + "index": 1, + "codec_type": "audio", + "codec_name": "ac3", + "channels": 6, + "channel_layout": "5.1", + "tags": {"language": "eng"}, + "disposition": {"default": 1}, + }, + { + "index": 2, + "codec_type": "audio", + "codec_name": "aac", + "channels": 2, + "tags": {"language": "fra"}, + }, + { + "index": 3, + "codec_type": "subtitle", + "codec_name": "subrip", + "tags": {"language": "fra"}, + "disposition": {"forced": 1}, + }, + ], + } + with patch( + _PATCH_TARGET, + return_value=_ffprobe_result(stdout=json.dumps(payload)), + ): + info = _PROBER.probe(f) + assert info.video_codec == "h264" + assert info.width == 1920 and info.height == 1080 + assert len(info.audio_tracks) == 2 + eng = info.audio_tracks[0] + assert eng.language == "eng" + assert eng.is_default is True + assert info.audio_tracks[1].is_default is False + assert len(info.subtitle_tracks) == 1 + assert info.subtitle_tracks[0].is_forced is True + + def test_first_video_stream_wins(self, tmp_path): + # The implementation only fills video_codec on the FIRST video stream. + f = tmp_path / "x.mkv" + f.write_bytes(b"") + payload = { + "format": {}, + "streams": [ + {"codec_type": "video", "codec_name": "h264", "width": 1920}, + {"codec_type": "video", "codec_name": "hevc", "width": 3840}, + ], + } + with patch( + _PATCH_TARGET, + return_value=_ffprobe_result(stdout=json.dumps(payload)), + ): + info = _PROBER.probe(f) + assert info.video_codec == "h264" + assert info.width == 1920 diff --git a/tests/infrastructure/test_filesystem_extras.py b/tests/infrastructure/test_filesystem_extras.py index 6988489..ba867ae 100644 --- a/tests/infrastructure/test_filesystem_extras.py +++ b/tests/infrastructure/test_filesystem_extras.py @@ -1,21 +1,19 @@ """Tests for the smaller ``alfred.infrastructure.filesystem`` helpers. -Covers four siblings of ``FileManager`` that had near-zero coverage: +Covers three siblings of ``FileManager`` that had near-zero coverage: -- ``ffprobe.probe`` — wraps ``ffprobe`` JSON output into a ``MediaInfo``. - ``filesystem_operations.create_folder`` / ``move`` — thin ``mkdir`` / ``mv`` wrappers returning dict-shaped responses. - ``organizer.MediaOrganizer`` — computes destination paths for movies and TV episodes; creates folders for them. - ``find_video.find_video_file`` — first-video lookup in a folder. -External commands (``ffprobe`` / ``mv``) are patched via ``subprocess.run``. +(``ffprobe`` coverage now lives in ``test_ffprobe_prober.py`` alongside +its adapter.) """ from __future__ import annotations -import json -import subprocess from unittest.mock import MagicMock, patch from alfred.domain.movies.entities import Movie @@ -27,7 +25,6 @@ from alfred.domain.tv_shows.value_objects import ( SeasonNumber, ShowStatus, ) -from alfred.infrastructure.filesystem import ffprobe from alfred.infrastructure.filesystem.filesystem_operations import ( create_folder, move, @@ -38,147 +35,6 @@ from alfred.infrastructure.knowledge.release_kb import YamlReleaseKnowledge _KB = YamlReleaseKnowledge() -# --------------------------------------------------------------------------- # -# ffprobe.probe # -# --------------------------------------------------------------------------- # - - -def _ffprobe_result(returncode=0, stdout="{}", stderr="") -> MagicMock: - return MagicMock(returncode=returncode, stdout=stdout, stderr=stderr) - - -class TestFfprobe: - def test_timeout_returns_none(self, tmp_path): - f = tmp_path / "x.mkv" - f.write_bytes(b"") - with patch( - "alfred.infrastructure.filesystem.ffprobe.subprocess.run", - side_effect=subprocess.TimeoutExpired(cmd="ffprobe", timeout=30), - ): - assert ffprobe.probe(f) is None - - def test_nonzero_returncode_returns_none(self, tmp_path): - f = tmp_path / "x.mkv" - f.write_bytes(b"") - with patch( - "alfred.infrastructure.filesystem.ffprobe.subprocess.run", - return_value=_ffprobe_result(returncode=1, stderr="not a media file"), - ): - assert ffprobe.probe(f) is None - - def test_invalid_json_returns_none(self, tmp_path): - f = tmp_path / "x.mkv" - f.write_bytes(b"") - with patch( - "alfred.infrastructure.filesystem.ffprobe.subprocess.run", - return_value=_ffprobe_result(stdout="not json {"), - ): - assert ffprobe.probe(f) is None - - def test_parses_format_duration_and_bitrate(self, tmp_path): - f = tmp_path / "x.mkv" - f.write_bytes(b"") - payload = { - "format": {"duration": "1234.5", "bit_rate": "5000000"}, - "streams": [], - } - with patch( - "alfred.infrastructure.filesystem.ffprobe.subprocess.run", - return_value=_ffprobe_result(stdout=json.dumps(payload)), - ): - info = ffprobe.probe(f) - assert info is not None - assert info.duration_seconds == 1234.5 - assert info.bitrate_kbps == 5000 # bit_rate // 1000 - - def test_invalid_numeric_format_fields_skipped(self, tmp_path): - f = tmp_path / "x.mkv" - f.write_bytes(b"") - payload = { - "format": {"duration": "garbage", "bit_rate": "also-bad"}, - "streams": [], - } - with patch( - "alfred.infrastructure.filesystem.ffprobe.subprocess.run", - return_value=_ffprobe_result(stdout=json.dumps(payload)), - ): - info = ffprobe.probe(f) - assert info is not None - assert info.duration_seconds is None - assert info.bitrate_kbps is None - - def test_parses_streams(self, tmp_path): - f = tmp_path / "x.mkv" - f.write_bytes(b"") - payload = { - "format": {}, - "streams": [ - { - "index": 0, - "codec_type": "video", - "codec_name": "h264", - "width": 1920, - "height": 1080, - }, - { - "index": 1, - "codec_type": "audio", - "codec_name": "ac3", - "channels": 6, - "channel_layout": "5.1", - "tags": {"language": "eng"}, - "disposition": {"default": 1}, - }, - { - "index": 2, - "codec_type": "audio", - "codec_name": "aac", - "channels": 2, - "tags": {"language": "fra"}, - }, - { - "index": 3, - "codec_type": "subtitle", - "codec_name": "subrip", - "tags": {"language": "fra"}, - "disposition": {"forced": 1}, - }, - ], - } - with patch( - "alfred.infrastructure.filesystem.ffprobe.subprocess.run", - return_value=_ffprobe_result(stdout=json.dumps(payload)), - ): - info = ffprobe.probe(f) - assert info.video_codec == "h264" - assert info.width == 1920 and info.height == 1080 - assert len(info.audio_tracks) == 2 - eng = info.audio_tracks[0] - assert eng.language == "eng" - assert eng.is_default is True - assert info.audio_tracks[1].is_default is False - assert len(info.subtitle_tracks) == 1 - assert info.subtitle_tracks[0].is_forced is True - - def test_first_video_stream_wins(self, tmp_path): - # The implementation only fills video_codec on the FIRST video stream. - f = tmp_path / "x.mkv" - f.write_bytes(b"") - payload = { - "format": {}, - "streams": [ - {"codec_type": "video", "codec_name": "h264", "width": 1920}, - {"codec_type": "video", "codec_name": "hevc", "width": 3840}, - ], - } - with patch( - "alfred.infrastructure.filesystem.ffprobe.subprocess.run", - return_value=_ffprobe_result(stdout=json.dumps(payload)), - ): - info = ffprobe.probe(f) - assert info.video_codec == "h264" - assert info.width == 1920 - # --------------------------------------------------------------------------- # # filesystem_operations #