From 9ddd85929ea791c0892082720235b7451b3bf4fa Mon Sep 17 00:00:00 2001 From: Francwa Date: Wed, 20 May 2026 01:34:32 +0200 Subject: [PATCH] feat(release): pre-pipeline exclusion helpers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add the application-layer helpers that decide which files are worth parsing, sitting one notch above parse_release. - is_supported_video(path, kb): extension-only check against kb.video_extensions. Lowercased suffix lookup. Directories and broken symlinks return False. - find_main_video(folder, kb): top-level scan only (no recursion into subdirectories — releases that wrap their video in Sample/ are PATH_OF_PAIN territory). Lexicographically-first eligible file wins when several qualify (deterministic, no size-based ranking). A bare file as folder argument is supported for single-file releases. No size threshold and no filename heuristics ('sample' / 'trailer'): the parser's job is to extract structure, not to second-guess non-standard release shapes. PoP catches the rest. 17 tests under tests/application/test_supported_media.py. --- CHANGELOG.md | 9 ++ alfred/application/release/__init__.py | 11 ++ alfred/application/release/supported_media.py | 74 ++++++++++ tests/application/test_supported_media.py | 130 ++++++++++++++++++ 4 files changed, 224 insertions(+) create mode 100644 alfred/application/release/__init__.py create mode 100644 alfred/application/release/supported_media.py create mode 100644 tests/application/test_supported_media.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 27c8a2c..47198e6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,6 +17,15 @@ callers). ### Added +- **Pre-pipeline exclusion helpers** (`alfred/application/release/supported_media.py`): + `is_supported_video(path, kb)` (extension-only check against + `kb.video_extensions`) and `find_main_video(folder, kb)` (top-level + scan, lexicographically-first eligible file, returns `None` when no + video qualifies; accepts a bare file as folder for single-file + releases). No size threshold, no filename heuristics — + PATH_OF_PAIN handles the exotic cases. Foundation for the future + `inspect_release` orchestrator. + - **Release parser — parse-confidence scoring** (`alfred/domain/release/parser/scoring.py`, `alfred/knowledge/release/scoring.yaml`). `parse_release` now returns `(ParsedRelease, ParseReport)`. The new `ParseReport` frozen VO diff --git a/alfred/application/release/__init__.py b/alfred/application/release/__init__.py new file mode 100644 index 0000000..c00e603 --- /dev/null +++ b/alfred/application/release/__init__.py @@ -0,0 +1,11 @@ +"""Release application layer — orchestrators sitting between domain +parsing and infrastructure I/O. + +Today it exposes the pre-pipeline exclusion helpers +(:mod:`supported_media`). Phase C will add the ``inspect_release`` +orchestrator here. +""" + +from .supported_media import find_main_video, is_supported_video + +__all__ = ["find_main_video", "is_supported_video"] diff --git a/alfred/application/release/supported_media.py b/alfred/application/release/supported_media.py new file mode 100644 index 0000000..aa1a59b --- /dev/null +++ b/alfred/application/release/supported_media.py @@ -0,0 +1,74 @@ +"""Pre-pipeline exclusion — decide which files are worth parsing. + +These helpers live one notch above the domain: they touch the +filesystem (``Path.iterdir``, ``Path.suffix``) but carry no parsing +logic of their own. The goal is to filter out non-video files and pick +the canonical "main video" from a release folder *before* anything +hits :func:`~alfred.domain.release.parse_release`. + +Design notes (Phase A bis, 2026-05-20): + +- **Extension is the sole eligibility criterion.** A file is supported + iff its suffix is in ``kb.video_extensions``. No size threshold, no + filename heuristics ("sample", "trailer", …). If a release packs a + bloated featurette or names its sample alphabetically before the + main feature, that's PATH_OF_PAIN territory — not this layer's job. + +- **Top-level scan only.** ``find_main_video`` does not descend into + subdirectories. Releases that wrap the main video in ``Sample/`` or + similar are non-scene-standard and handled by the orchestrator + upstream. + +- **Lexicographic tie-break.** When several candidates qualify + (legitimate for season packs), we return the first by alphabetical + order. Deterministic, no size-based ranking. + +- **Direct ``Path`` I/O.** No ``FilesystemScanner`` port — this layer + is application, not domain. If isolation becomes necessary for + testing scale, we'll introduce a port then. +""" + +from __future__ import annotations + +from pathlib import Path + +from alfred.domain.release.ports.knowledge import ReleaseKnowledge + + +def is_supported_video(path: Path, kb: ReleaseKnowledge) -> bool: + """Return True when ``path`` is a video file the parser should + consider. + + The check is purely extension-based: ``path.suffix.lower()`` must + belong to ``kb.video_extensions``. ``path`` must also be a regular + file — directories and broken symlinks return False. + """ + if not path.is_file(): + return False + return path.suffix.lower() in kb.video_extensions + + +def find_main_video(folder: Path, kb: ReleaseKnowledge) -> Path | None: + """Return the canonical main video file inside ``folder``, or + ``None`` if there isn't one. + + Behavior: + + - Top-level scan only — subdirectories are ignored. + - Eligibility is :func:`is_supported_video`. + - When several files qualify, the lexicographically first one wins. + - When ``folder`` itself is a video file, it is returned as-is + (single-file releases are valid). + - When ``folder`` doesn't exist or isn't a directory (and isn't a + video file either), returns ``None``. + """ + if folder.is_file(): + return folder if is_supported_video(folder, kb) else None + + if not folder.is_dir(): + return None + + candidates = sorted( + child for child in folder.iterdir() if is_supported_video(child, kb) + ) + return candidates[0] if candidates else None diff --git a/tests/application/test_supported_media.py b/tests/application/test_supported_media.py new file mode 100644 index 0000000..f52fe6c --- /dev/null +++ b/tests/application/test_supported_media.py @@ -0,0 +1,130 @@ +"""Tests for the pre-pipeline exclusion helpers (Phase A bis).""" + +from __future__ import annotations + +from pathlib import Path + +import pytest + +from alfred.application.release.supported_media import ( + find_main_video, + is_supported_video, +) +from alfred.infrastructure.knowledge.release_kb import YamlReleaseKnowledge + +_KB = YamlReleaseKnowledge() + + +# --------------------------------------------------------------------- # +# is_supported_video # +# --------------------------------------------------------------------- # + + +class TestIsSupportedVideo: + def test_mkv_is_supported(self, tmp_path: Path) -> None: + f = tmp_path / "movie.mkv" + f.touch() + assert is_supported_video(f, _KB) is True + + def test_mp4_is_supported(self, tmp_path: Path) -> None: + f = tmp_path / "movie.mp4" + f.touch() + assert is_supported_video(f, _KB) is True + + def test_uppercase_extension_is_supported(self, tmp_path: Path) -> None: + # File systems can return mixed case; we lowercase the suffix. + f = tmp_path / "movie.MKV" + f.touch() + assert is_supported_video(f, _KB) is True + + def test_srt_is_not_video(self, tmp_path: Path) -> None: + f = tmp_path / "movie.srt" + f.touch() + assert is_supported_video(f, _KB) is False + + def test_nfo_is_not_video(self, tmp_path: Path) -> None: + f = tmp_path / "movie.nfo" + f.touch() + assert is_supported_video(f, _KB) is False + + def test_no_extension_is_not_video(self, tmp_path: Path) -> None: + f = tmp_path / "README" + f.touch() + assert is_supported_video(f, _KB) is False + + def test_directory_is_not_video(self, tmp_path: Path) -> None: + d = tmp_path / "subdir.mkv" # even with a video extension + d.mkdir() + assert is_supported_video(d, _KB) is False + + def test_nonexistent_path_is_not_video(self, tmp_path: Path) -> None: + assert is_supported_video(tmp_path / "ghost.mkv", _KB) is False + + +# --------------------------------------------------------------------- # +# find_main_video # +# --------------------------------------------------------------------- # + + +class TestFindMainVideo: + def test_single_video_file_in_folder(self, tmp_path: Path) -> None: + main = tmp_path / "Movie.2020.mkv" + main.touch() + assert find_main_video(tmp_path, _KB) == main + + def test_returns_lexicographically_first_among_multiple( + self, tmp_path: Path + ) -> None: + # Legitimate for season packs: pick the first episode by name. + ep2 = tmp_path / "Show.S01E02.mkv" + ep1 = tmp_path / "Show.S01E01.mkv" + ep2.touch() + ep1.touch() + assert find_main_video(tmp_path, _KB) == ep1 + + def test_skips_non_video_files(self, tmp_path: Path) -> None: + # nfo and srt come alphabetically before .mkv, must not win. + (tmp_path / "Movie.nfo").touch() + (tmp_path / "Movie.srt").touch() + vid = tmp_path / "Movie.mkv" + vid.touch() + assert find_main_video(tmp_path, _KB) == vid + + def test_ignores_subdirectories(self, tmp_path: Path) -> None: + # A Sample/ subdir must NOT be descended into. + sample_dir = tmp_path / "Sample" + sample_dir.mkdir() + (sample_dir / "sample.mkv").touch() + main = tmp_path / "Movie.mkv" + main.touch() + assert find_main_video(tmp_path, _KB) == main + + def test_only_subdirectory_with_video_returns_none( + self, tmp_path: Path + ) -> None: + # No top-level video, only one inside a subdir → None. + sub = tmp_path / "Sample" + sub.mkdir() + (sub / "video.mkv").touch() + assert find_main_video(tmp_path, _KB) is None + + def test_empty_folder_returns_none(self, tmp_path: Path) -> None: + assert find_main_video(tmp_path, _KB) is None + + def test_nonexistent_folder_returns_none(self, tmp_path: Path) -> None: + assert find_main_video(tmp_path / "ghost", _KB) is None + + def test_single_file_release_passed_as_folder_arg( + self, tmp_path: Path + ) -> None: + # Some releases are a bare .mkv with no enclosing folder. + f = tmp_path / "Movie.2020.1080p.mkv" + f.touch() + assert find_main_video(f, _KB) == f + + def test_single_file_non_video_passed_as_folder_arg( + self, tmp_path: Path + ) -> None: + f = tmp_path / "README.nfo" + f.touch() + assert find_main_video(f, _KB) is None