feat(release): pre-pipeline exclusion helpers

Add the application-layer helpers that decide which files are worth
parsing, sitting one notch above parse_release.

- is_supported_video(path, kb): extension-only check against
  kb.video_extensions. Lowercased suffix lookup. Directories and
  broken symlinks return False.
- find_main_video(folder, kb): top-level scan only (no recursion into
  subdirectories — releases that wrap their video in Sample/ are
  PATH_OF_PAIN territory). Lexicographically-first eligible file wins
  when several qualify (deterministic, no size-based ranking). A bare
  file as folder argument is supported for single-file releases.

No size threshold and no filename heuristics ('sample' / 'trailer'):
the parser's job is to extract structure, not to second-guess
non-standard release shapes. PoP catches the rest.

17 tests under tests/application/test_supported_media.py.
This commit is contained in:
2026-05-20 01:34:32 +02:00
parent ed7680b58f
commit 9ddd85929e
4 changed files with 224 additions and 0 deletions
+9
View File
@@ -17,6 +17,15 @@ callers).
### Added
- **Pre-pipeline exclusion helpers** (`alfred/application/release/supported_media.py`):
`is_supported_video(path, kb)` (extension-only check against
`kb.video_extensions`) and `find_main_video(folder, kb)` (top-level
scan, lexicographically-first eligible file, returns `None` when no
video qualifies; accepts a bare file as folder for single-file
releases). No size threshold, no filename heuristics —
PATH_OF_PAIN handles the exotic cases. Foundation for the future
`inspect_release` orchestrator.
- **Release parser — parse-confidence scoring** (`alfred/domain/release/parser/scoring.py`,
`alfred/knowledge/release/scoring.yaml`). `parse_release` now returns
`(ParsedRelease, ParseReport)`. The new `ParseReport` frozen VO
+11
View File
@@ -0,0 +1,11 @@
"""Release application layer — orchestrators sitting between domain
parsing and infrastructure I/O.
Today it exposes the pre-pipeline exclusion helpers
(:mod:`supported_media`). Phase C will add the ``inspect_release``
orchestrator here.
"""
from .supported_media import find_main_video, is_supported_video
__all__ = ["find_main_video", "is_supported_video"]
@@ -0,0 +1,74 @@
"""Pre-pipeline exclusion — decide which files are worth parsing.
These helpers live one notch above the domain: they touch the
filesystem (``Path.iterdir``, ``Path.suffix``) but carry no parsing
logic of their own. The goal is to filter out non-video files and pick
the canonical "main video" from a release folder *before* anything
hits :func:`~alfred.domain.release.parse_release`.
Design notes (Phase A bis, 2026-05-20):
- **Extension is the sole eligibility criterion.** A file is supported
iff its suffix is in ``kb.video_extensions``. No size threshold, no
filename heuristics ("sample", "trailer", …). If a release packs a
bloated featurette or names its sample alphabetically before the
main feature, that's PATH_OF_PAIN territory — not this layer's job.
- **Top-level scan only.** ``find_main_video`` does not descend into
subdirectories. Releases that wrap the main video in ``Sample/`` or
similar are non-scene-standard and handled by the orchestrator
upstream.
- **Lexicographic tie-break.** When several candidates qualify
(legitimate for season packs), we return the first by alphabetical
order. Deterministic, no size-based ranking.
- **Direct ``Path`` I/O.** No ``FilesystemScanner`` port — this layer
is application, not domain. If isolation becomes necessary for
testing scale, we'll introduce a port then.
"""
from __future__ import annotations
from pathlib import Path
from alfred.domain.release.ports.knowledge import ReleaseKnowledge
def is_supported_video(path: Path, kb: ReleaseKnowledge) -> bool:
"""Return True when ``path`` is a video file the parser should
consider.
The check is purely extension-based: ``path.suffix.lower()`` must
belong to ``kb.video_extensions``. ``path`` must also be a regular
file — directories and broken symlinks return False.
"""
if not path.is_file():
return False
return path.suffix.lower() in kb.video_extensions
def find_main_video(folder: Path, kb: ReleaseKnowledge) -> Path | None:
"""Return the canonical main video file inside ``folder``, or
``None`` if there isn't one.
Behavior:
- Top-level scan only — subdirectories are ignored.
- Eligibility is :func:`is_supported_video`.
- When several files qualify, the lexicographically first one wins.
- When ``folder`` itself is a video file, it is returned as-is
(single-file releases are valid).
- When ``folder`` doesn't exist or isn't a directory (and isn't a
video file either), returns ``None``.
"""
if folder.is_file():
return folder if is_supported_video(folder, kb) else None
if not folder.is_dir():
return None
candidates = sorted(
child for child in folder.iterdir() if is_supported_video(child, kb)
)
return candidates[0] if candidates else None
+130
View File
@@ -0,0 +1,130 @@
"""Tests for the pre-pipeline exclusion helpers (Phase A bis)."""
from __future__ import annotations
from pathlib import Path
import pytest
from alfred.application.release.supported_media import (
find_main_video,
is_supported_video,
)
from alfred.infrastructure.knowledge.release_kb import YamlReleaseKnowledge
_KB = YamlReleaseKnowledge()
# --------------------------------------------------------------------- #
# is_supported_video #
# --------------------------------------------------------------------- #
class TestIsSupportedVideo:
def test_mkv_is_supported(self, tmp_path: Path) -> None:
f = tmp_path / "movie.mkv"
f.touch()
assert is_supported_video(f, _KB) is True
def test_mp4_is_supported(self, tmp_path: Path) -> None:
f = tmp_path / "movie.mp4"
f.touch()
assert is_supported_video(f, _KB) is True
def test_uppercase_extension_is_supported(self, tmp_path: Path) -> None:
# File systems can return mixed case; we lowercase the suffix.
f = tmp_path / "movie.MKV"
f.touch()
assert is_supported_video(f, _KB) is True
def test_srt_is_not_video(self, tmp_path: Path) -> None:
f = tmp_path / "movie.srt"
f.touch()
assert is_supported_video(f, _KB) is False
def test_nfo_is_not_video(self, tmp_path: Path) -> None:
f = tmp_path / "movie.nfo"
f.touch()
assert is_supported_video(f, _KB) is False
def test_no_extension_is_not_video(self, tmp_path: Path) -> None:
f = tmp_path / "README"
f.touch()
assert is_supported_video(f, _KB) is False
def test_directory_is_not_video(self, tmp_path: Path) -> None:
d = tmp_path / "subdir.mkv" # even with a video extension
d.mkdir()
assert is_supported_video(d, _KB) is False
def test_nonexistent_path_is_not_video(self, tmp_path: Path) -> None:
assert is_supported_video(tmp_path / "ghost.mkv", _KB) is False
# --------------------------------------------------------------------- #
# find_main_video #
# --------------------------------------------------------------------- #
class TestFindMainVideo:
def test_single_video_file_in_folder(self, tmp_path: Path) -> None:
main = tmp_path / "Movie.2020.mkv"
main.touch()
assert find_main_video(tmp_path, _KB) == main
def test_returns_lexicographically_first_among_multiple(
self, tmp_path: Path
) -> None:
# Legitimate for season packs: pick the first episode by name.
ep2 = tmp_path / "Show.S01E02.mkv"
ep1 = tmp_path / "Show.S01E01.mkv"
ep2.touch()
ep1.touch()
assert find_main_video(tmp_path, _KB) == ep1
def test_skips_non_video_files(self, tmp_path: Path) -> None:
# nfo and srt come alphabetically before .mkv, must not win.
(tmp_path / "Movie.nfo").touch()
(tmp_path / "Movie.srt").touch()
vid = tmp_path / "Movie.mkv"
vid.touch()
assert find_main_video(tmp_path, _KB) == vid
def test_ignores_subdirectories(self, tmp_path: Path) -> None:
# A Sample/ subdir must NOT be descended into.
sample_dir = tmp_path / "Sample"
sample_dir.mkdir()
(sample_dir / "sample.mkv").touch()
main = tmp_path / "Movie.mkv"
main.touch()
assert find_main_video(tmp_path, _KB) == main
def test_only_subdirectory_with_video_returns_none(
self, tmp_path: Path
) -> None:
# No top-level video, only one inside a subdir → None.
sub = tmp_path / "Sample"
sub.mkdir()
(sub / "video.mkv").touch()
assert find_main_video(tmp_path, _KB) is None
def test_empty_folder_returns_none(self, tmp_path: Path) -> None:
assert find_main_video(tmp_path, _KB) is None
def test_nonexistent_folder_returns_none(self, tmp_path: Path) -> None:
assert find_main_video(tmp_path / "ghost", _KB) is None
def test_single_file_release_passed_as_folder_arg(
self, tmp_path: Path
) -> None:
# Some releases are a bare .mkv with no enclosing folder.
f = tmp_path / "Movie.2020.1080p.mkv"
f.touch()
assert find_main_video(f, _KB) == f
def test_single_file_non_video_passed_as_folder_arg(
self, tmp_path: Path
) -> None:
f = tmp_path / "README.nfo"
f.touch()
assert find_main_video(f, _KB) is None