feat(release): pre-pipeline exclusion helpers
Add the application-layer helpers that decide which files are worth
parsing, sitting one notch above parse_release.
- is_supported_video(path, kb): extension-only check against
kb.video_extensions. Lowercased suffix lookup. Directories and
broken symlinks return False.
- find_main_video(folder, kb): top-level scan only (no recursion into
subdirectories — releases that wrap their video in Sample/ are
PATH_OF_PAIN territory). Lexicographically-first eligible file wins
when several qualify (deterministic, no size-based ranking). A bare
file as folder argument is supported for single-file releases.
No size threshold and no filename heuristics ('sample' / 'trailer'):
the parser's job is to extract structure, not to second-guess
non-standard release shapes. PoP catches the rest.
17 tests under tests/application/test_supported_media.py.
This commit is contained in:
@@ -17,6 +17,15 @@ callers).
|
||||
|
||||
### Added
|
||||
|
||||
- **Pre-pipeline exclusion helpers** (`alfred/application/release/supported_media.py`):
|
||||
`is_supported_video(path, kb)` (extension-only check against
|
||||
`kb.video_extensions`) and `find_main_video(folder, kb)` (top-level
|
||||
scan, lexicographically-first eligible file, returns `None` when no
|
||||
video qualifies; accepts a bare file as folder for single-file
|
||||
releases). No size threshold, no filename heuristics —
|
||||
PATH_OF_PAIN handles the exotic cases. Foundation for the future
|
||||
`inspect_release` orchestrator.
|
||||
|
||||
- **Release parser — parse-confidence scoring** (`alfred/domain/release/parser/scoring.py`,
|
||||
`alfred/knowledge/release/scoring.yaml`). `parse_release` now returns
|
||||
`(ParsedRelease, ParseReport)`. The new `ParseReport` frozen VO
|
||||
|
||||
@@ -0,0 +1,11 @@
|
||||
"""Release application layer — orchestrators sitting between domain
|
||||
parsing and infrastructure I/O.
|
||||
|
||||
Today it exposes the pre-pipeline exclusion helpers
|
||||
(:mod:`supported_media`). Phase C will add the ``inspect_release``
|
||||
orchestrator here.
|
||||
"""
|
||||
|
||||
from .supported_media import find_main_video, is_supported_video
|
||||
|
||||
__all__ = ["find_main_video", "is_supported_video"]
|
||||
@@ -0,0 +1,74 @@
|
||||
"""Pre-pipeline exclusion — decide which files are worth parsing.
|
||||
|
||||
These helpers live one notch above the domain: they touch the
|
||||
filesystem (``Path.iterdir``, ``Path.suffix``) but carry no parsing
|
||||
logic of their own. The goal is to filter out non-video files and pick
|
||||
the canonical "main video" from a release folder *before* anything
|
||||
hits :func:`~alfred.domain.release.parse_release`.
|
||||
|
||||
Design notes (Phase A bis, 2026-05-20):
|
||||
|
||||
- **Extension is the sole eligibility criterion.** A file is supported
|
||||
iff its suffix is in ``kb.video_extensions``. No size threshold, no
|
||||
filename heuristics ("sample", "trailer", …). If a release packs a
|
||||
bloated featurette or names its sample alphabetically before the
|
||||
main feature, that's PATH_OF_PAIN territory — not this layer's job.
|
||||
|
||||
- **Top-level scan only.** ``find_main_video`` does not descend into
|
||||
subdirectories. Releases that wrap the main video in ``Sample/`` or
|
||||
similar are non-scene-standard and handled by the orchestrator
|
||||
upstream.
|
||||
|
||||
- **Lexicographic tie-break.** When several candidates qualify
|
||||
(legitimate for season packs), we return the first by alphabetical
|
||||
order. Deterministic, no size-based ranking.
|
||||
|
||||
- **Direct ``Path`` I/O.** No ``FilesystemScanner`` port — this layer
|
||||
is application, not domain. If isolation becomes necessary for
|
||||
testing scale, we'll introduce a port then.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
from alfred.domain.release.ports.knowledge import ReleaseKnowledge
|
||||
|
||||
|
||||
def is_supported_video(path: Path, kb: ReleaseKnowledge) -> bool:
|
||||
"""Return True when ``path`` is a video file the parser should
|
||||
consider.
|
||||
|
||||
The check is purely extension-based: ``path.suffix.lower()`` must
|
||||
belong to ``kb.video_extensions``. ``path`` must also be a regular
|
||||
file — directories and broken symlinks return False.
|
||||
"""
|
||||
if not path.is_file():
|
||||
return False
|
||||
return path.suffix.lower() in kb.video_extensions
|
||||
|
||||
|
||||
def find_main_video(folder: Path, kb: ReleaseKnowledge) -> Path | None:
|
||||
"""Return the canonical main video file inside ``folder``, or
|
||||
``None`` if there isn't one.
|
||||
|
||||
Behavior:
|
||||
|
||||
- Top-level scan only — subdirectories are ignored.
|
||||
- Eligibility is :func:`is_supported_video`.
|
||||
- When several files qualify, the lexicographically first one wins.
|
||||
- When ``folder`` itself is a video file, it is returned as-is
|
||||
(single-file releases are valid).
|
||||
- When ``folder`` doesn't exist or isn't a directory (and isn't a
|
||||
video file either), returns ``None``.
|
||||
"""
|
||||
if folder.is_file():
|
||||
return folder if is_supported_video(folder, kb) else None
|
||||
|
||||
if not folder.is_dir():
|
||||
return None
|
||||
|
||||
candidates = sorted(
|
||||
child for child in folder.iterdir() if is_supported_video(child, kb)
|
||||
)
|
||||
return candidates[0] if candidates else None
|
||||
@@ -0,0 +1,130 @@
|
||||
"""Tests for the pre-pipeline exclusion helpers (Phase A bis)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from alfred.application.release.supported_media import (
|
||||
find_main_video,
|
||||
is_supported_video,
|
||||
)
|
||||
from alfred.infrastructure.knowledge.release_kb import YamlReleaseKnowledge
|
||||
|
||||
_KB = YamlReleaseKnowledge()
|
||||
|
||||
|
||||
# --------------------------------------------------------------------- #
|
||||
# is_supported_video #
|
||||
# --------------------------------------------------------------------- #
|
||||
|
||||
|
||||
class TestIsSupportedVideo:
|
||||
def test_mkv_is_supported(self, tmp_path: Path) -> None:
|
||||
f = tmp_path / "movie.mkv"
|
||||
f.touch()
|
||||
assert is_supported_video(f, _KB) is True
|
||||
|
||||
def test_mp4_is_supported(self, tmp_path: Path) -> None:
|
||||
f = tmp_path / "movie.mp4"
|
||||
f.touch()
|
||||
assert is_supported_video(f, _KB) is True
|
||||
|
||||
def test_uppercase_extension_is_supported(self, tmp_path: Path) -> None:
|
||||
# File systems can return mixed case; we lowercase the suffix.
|
||||
f = tmp_path / "movie.MKV"
|
||||
f.touch()
|
||||
assert is_supported_video(f, _KB) is True
|
||||
|
||||
def test_srt_is_not_video(self, tmp_path: Path) -> None:
|
||||
f = tmp_path / "movie.srt"
|
||||
f.touch()
|
||||
assert is_supported_video(f, _KB) is False
|
||||
|
||||
def test_nfo_is_not_video(self, tmp_path: Path) -> None:
|
||||
f = tmp_path / "movie.nfo"
|
||||
f.touch()
|
||||
assert is_supported_video(f, _KB) is False
|
||||
|
||||
def test_no_extension_is_not_video(self, tmp_path: Path) -> None:
|
||||
f = tmp_path / "README"
|
||||
f.touch()
|
||||
assert is_supported_video(f, _KB) is False
|
||||
|
||||
def test_directory_is_not_video(self, tmp_path: Path) -> None:
|
||||
d = tmp_path / "subdir.mkv" # even with a video extension
|
||||
d.mkdir()
|
||||
assert is_supported_video(d, _KB) is False
|
||||
|
||||
def test_nonexistent_path_is_not_video(self, tmp_path: Path) -> None:
|
||||
assert is_supported_video(tmp_path / "ghost.mkv", _KB) is False
|
||||
|
||||
|
||||
# --------------------------------------------------------------------- #
|
||||
# find_main_video #
|
||||
# --------------------------------------------------------------------- #
|
||||
|
||||
|
||||
class TestFindMainVideo:
|
||||
def test_single_video_file_in_folder(self, tmp_path: Path) -> None:
|
||||
main = tmp_path / "Movie.2020.mkv"
|
||||
main.touch()
|
||||
assert find_main_video(tmp_path, _KB) == main
|
||||
|
||||
def test_returns_lexicographically_first_among_multiple(
|
||||
self, tmp_path: Path
|
||||
) -> None:
|
||||
# Legitimate for season packs: pick the first episode by name.
|
||||
ep2 = tmp_path / "Show.S01E02.mkv"
|
||||
ep1 = tmp_path / "Show.S01E01.mkv"
|
||||
ep2.touch()
|
||||
ep1.touch()
|
||||
assert find_main_video(tmp_path, _KB) == ep1
|
||||
|
||||
def test_skips_non_video_files(self, tmp_path: Path) -> None:
|
||||
# nfo and srt come alphabetically before .mkv, must not win.
|
||||
(tmp_path / "Movie.nfo").touch()
|
||||
(tmp_path / "Movie.srt").touch()
|
||||
vid = tmp_path / "Movie.mkv"
|
||||
vid.touch()
|
||||
assert find_main_video(tmp_path, _KB) == vid
|
||||
|
||||
def test_ignores_subdirectories(self, tmp_path: Path) -> None:
|
||||
# A Sample/ subdir must NOT be descended into.
|
||||
sample_dir = tmp_path / "Sample"
|
||||
sample_dir.mkdir()
|
||||
(sample_dir / "sample.mkv").touch()
|
||||
main = tmp_path / "Movie.mkv"
|
||||
main.touch()
|
||||
assert find_main_video(tmp_path, _KB) == main
|
||||
|
||||
def test_only_subdirectory_with_video_returns_none(
|
||||
self, tmp_path: Path
|
||||
) -> None:
|
||||
# No top-level video, only one inside a subdir → None.
|
||||
sub = tmp_path / "Sample"
|
||||
sub.mkdir()
|
||||
(sub / "video.mkv").touch()
|
||||
assert find_main_video(tmp_path, _KB) is None
|
||||
|
||||
def test_empty_folder_returns_none(self, tmp_path: Path) -> None:
|
||||
assert find_main_video(tmp_path, _KB) is None
|
||||
|
||||
def test_nonexistent_folder_returns_none(self, tmp_path: Path) -> None:
|
||||
assert find_main_video(tmp_path / "ghost", _KB) is None
|
||||
|
||||
def test_single_file_release_passed_as_folder_arg(
|
||||
self, tmp_path: Path
|
||||
) -> None:
|
||||
# Some releases are a bare .mkv with no enclosing folder.
|
||||
f = tmp_path / "Movie.2020.1080p.mkv"
|
||||
f.touch()
|
||||
assert find_main_video(f, _KB) == f
|
||||
|
||||
def test_single_file_non_video_passed_as_folder_arg(
|
||||
self, tmp_path: Path
|
||||
) -> None:
|
||||
f = tmp_path / "README.nfo"
|
||||
f.touch()
|
||||
assert find_main_video(f, _KB) is None
|
||||
Reference in New Issue
Block a user