feat(release): pre-pipeline exclusion helpers
Add the application-layer helpers that decide which files are worth
parsing, sitting one notch above parse_release.
- is_supported_video(path, kb): extension-only check against
kb.video_extensions. Lowercased suffix lookup. Directories and
broken symlinks return False.
- find_main_video(folder, kb): top-level scan only (no recursion into
subdirectories — releases that wrap their video in Sample/ are
PATH_OF_PAIN territory). Lexicographically-first eligible file wins
when several qualify (deterministic, no size-based ranking). A bare
file as folder argument is supported for single-file releases.
No size threshold and no filename heuristics ('sample' / 'trailer'):
the parser's job is to extract structure, not to second-guess
non-standard release shapes. PoP catches the rest.
17 tests under tests/application/test_supported_media.py.
This commit is contained in:
@@ -17,6 +17,15 @@ callers).
|
|||||||
|
|
||||||
### Added
|
### Added
|
||||||
|
|
||||||
|
- **Pre-pipeline exclusion helpers** (`alfred/application/release/supported_media.py`):
|
||||||
|
`is_supported_video(path, kb)` (extension-only check against
|
||||||
|
`kb.video_extensions`) and `find_main_video(folder, kb)` (top-level
|
||||||
|
scan, lexicographically-first eligible file, returns `None` when no
|
||||||
|
video qualifies; accepts a bare file as folder for single-file
|
||||||
|
releases). No size threshold, no filename heuristics —
|
||||||
|
PATH_OF_PAIN handles the exotic cases. Foundation for the future
|
||||||
|
`inspect_release` orchestrator.
|
||||||
|
|
||||||
- **Release parser — parse-confidence scoring** (`alfred/domain/release/parser/scoring.py`,
|
- **Release parser — parse-confidence scoring** (`alfred/domain/release/parser/scoring.py`,
|
||||||
`alfred/knowledge/release/scoring.yaml`). `parse_release` now returns
|
`alfred/knowledge/release/scoring.yaml`). `parse_release` now returns
|
||||||
`(ParsedRelease, ParseReport)`. The new `ParseReport` frozen VO
|
`(ParsedRelease, ParseReport)`. The new `ParseReport` frozen VO
|
||||||
|
|||||||
@@ -0,0 +1,11 @@
|
|||||||
|
"""Release application layer — orchestrators sitting between domain
|
||||||
|
parsing and infrastructure I/O.
|
||||||
|
|
||||||
|
Today it exposes the pre-pipeline exclusion helpers
|
||||||
|
(:mod:`supported_media`). Phase C will add the ``inspect_release``
|
||||||
|
orchestrator here.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from .supported_media import find_main_video, is_supported_video
|
||||||
|
|
||||||
|
__all__ = ["find_main_video", "is_supported_video"]
|
||||||
@@ -0,0 +1,74 @@
|
|||||||
|
"""Pre-pipeline exclusion — decide which files are worth parsing.
|
||||||
|
|
||||||
|
These helpers live one notch above the domain: they touch the
|
||||||
|
filesystem (``Path.iterdir``, ``Path.suffix``) but carry no parsing
|
||||||
|
logic of their own. The goal is to filter out non-video files and pick
|
||||||
|
the canonical "main video" from a release folder *before* anything
|
||||||
|
hits :func:`~alfred.domain.release.parse_release`.
|
||||||
|
|
||||||
|
Design notes (Phase A bis, 2026-05-20):
|
||||||
|
|
||||||
|
- **Extension is the sole eligibility criterion.** A file is supported
|
||||||
|
iff its suffix is in ``kb.video_extensions``. No size threshold, no
|
||||||
|
filename heuristics ("sample", "trailer", …). If a release packs a
|
||||||
|
bloated featurette or names its sample alphabetically before the
|
||||||
|
main feature, that's PATH_OF_PAIN territory — not this layer's job.
|
||||||
|
|
||||||
|
- **Top-level scan only.** ``find_main_video`` does not descend into
|
||||||
|
subdirectories. Releases that wrap the main video in ``Sample/`` or
|
||||||
|
similar are non-scene-standard and handled by the orchestrator
|
||||||
|
upstream.
|
||||||
|
|
||||||
|
- **Lexicographic tie-break.** When several candidates qualify
|
||||||
|
(legitimate for season packs), we return the first by alphabetical
|
||||||
|
order. Deterministic, no size-based ranking.
|
||||||
|
|
||||||
|
- **Direct ``Path`` I/O.** No ``FilesystemScanner`` port — this layer
|
||||||
|
is application, not domain. If isolation becomes necessary for
|
||||||
|
testing scale, we'll introduce a port then.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from alfred.domain.release.ports.knowledge import ReleaseKnowledge
|
||||||
|
|
||||||
|
|
||||||
|
def is_supported_video(path: Path, kb: ReleaseKnowledge) -> bool:
|
||||||
|
"""Return True when ``path`` is a video file the parser should
|
||||||
|
consider.
|
||||||
|
|
||||||
|
The check is purely extension-based: ``path.suffix.lower()`` must
|
||||||
|
belong to ``kb.video_extensions``. ``path`` must also be a regular
|
||||||
|
file — directories and broken symlinks return False.
|
||||||
|
"""
|
||||||
|
if not path.is_file():
|
||||||
|
return False
|
||||||
|
return path.suffix.lower() in kb.video_extensions
|
||||||
|
|
||||||
|
|
||||||
|
def find_main_video(folder: Path, kb: ReleaseKnowledge) -> Path | None:
|
||||||
|
"""Return the canonical main video file inside ``folder``, or
|
||||||
|
``None`` if there isn't one.
|
||||||
|
|
||||||
|
Behavior:
|
||||||
|
|
||||||
|
- Top-level scan only — subdirectories are ignored.
|
||||||
|
- Eligibility is :func:`is_supported_video`.
|
||||||
|
- When several files qualify, the lexicographically first one wins.
|
||||||
|
- When ``folder`` itself is a video file, it is returned as-is
|
||||||
|
(single-file releases are valid).
|
||||||
|
- When ``folder`` doesn't exist or isn't a directory (and isn't a
|
||||||
|
video file either), returns ``None``.
|
||||||
|
"""
|
||||||
|
if folder.is_file():
|
||||||
|
return folder if is_supported_video(folder, kb) else None
|
||||||
|
|
||||||
|
if not folder.is_dir():
|
||||||
|
return None
|
||||||
|
|
||||||
|
candidates = sorted(
|
||||||
|
child for child in folder.iterdir() if is_supported_video(child, kb)
|
||||||
|
)
|
||||||
|
return candidates[0] if candidates else None
|
||||||
@@ -0,0 +1,130 @@
|
|||||||
|
"""Tests for the pre-pipeline exclusion helpers (Phase A bis)."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from alfred.application.release.supported_media import (
|
||||||
|
find_main_video,
|
||||||
|
is_supported_video,
|
||||||
|
)
|
||||||
|
from alfred.infrastructure.knowledge.release_kb import YamlReleaseKnowledge
|
||||||
|
|
||||||
|
_KB = YamlReleaseKnowledge()
|
||||||
|
|
||||||
|
|
||||||
|
# --------------------------------------------------------------------- #
|
||||||
|
# is_supported_video #
|
||||||
|
# --------------------------------------------------------------------- #
|
||||||
|
|
||||||
|
|
||||||
|
class TestIsSupportedVideo:
|
||||||
|
def test_mkv_is_supported(self, tmp_path: Path) -> None:
|
||||||
|
f = tmp_path / "movie.mkv"
|
||||||
|
f.touch()
|
||||||
|
assert is_supported_video(f, _KB) is True
|
||||||
|
|
||||||
|
def test_mp4_is_supported(self, tmp_path: Path) -> None:
|
||||||
|
f = tmp_path / "movie.mp4"
|
||||||
|
f.touch()
|
||||||
|
assert is_supported_video(f, _KB) is True
|
||||||
|
|
||||||
|
def test_uppercase_extension_is_supported(self, tmp_path: Path) -> None:
|
||||||
|
# File systems can return mixed case; we lowercase the suffix.
|
||||||
|
f = tmp_path / "movie.MKV"
|
||||||
|
f.touch()
|
||||||
|
assert is_supported_video(f, _KB) is True
|
||||||
|
|
||||||
|
def test_srt_is_not_video(self, tmp_path: Path) -> None:
|
||||||
|
f = tmp_path / "movie.srt"
|
||||||
|
f.touch()
|
||||||
|
assert is_supported_video(f, _KB) is False
|
||||||
|
|
||||||
|
def test_nfo_is_not_video(self, tmp_path: Path) -> None:
|
||||||
|
f = tmp_path / "movie.nfo"
|
||||||
|
f.touch()
|
||||||
|
assert is_supported_video(f, _KB) is False
|
||||||
|
|
||||||
|
def test_no_extension_is_not_video(self, tmp_path: Path) -> None:
|
||||||
|
f = tmp_path / "README"
|
||||||
|
f.touch()
|
||||||
|
assert is_supported_video(f, _KB) is False
|
||||||
|
|
||||||
|
def test_directory_is_not_video(self, tmp_path: Path) -> None:
|
||||||
|
d = tmp_path / "subdir.mkv" # even with a video extension
|
||||||
|
d.mkdir()
|
||||||
|
assert is_supported_video(d, _KB) is False
|
||||||
|
|
||||||
|
def test_nonexistent_path_is_not_video(self, tmp_path: Path) -> None:
|
||||||
|
assert is_supported_video(tmp_path / "ghost.mkv", _KB) is False
|
||||||
|
|
||||||
|
|
||||||
|
# --------------------------------------------------------------------- #
|
||||||
|
# find_main_video #
|
||||||
|
# --------------------------------------------------------------------- #
|
||||||
|
|
||||||
|
|
||||||
|
class TestFindMainVideo:
|
||||||
|
def test_single_video_file_in_folder(self, tmp_path: Path) -> None:
|
||||||
|
main = tmp_path / "Movie.2020.mkv"
|
||||||
|
main.touch()
|
||||||
|
assert find_main_video(tmp_path, _KB) == main
|
||||||
|
|
||||||
|
def test_returns_lexicographically_first_among_multiple(
|
||||||
|
self, tmp_path: Path
|
||||||
|
) -> None:
|
||||||
|
# Legitimate for season packs: pick the first episode by name.
|
||||||
|
ep2 = tmp_path / "Show.S01E02.mkv"
|
||||||
|
ep1 = tmp_path / "Show.S01E01.mkv"
|
||||||
|
ep2.touch()
|
||||||
|
ep1.touch()
|
||||||
|
assert find_main_video(tmp_path, _KB) == ep1
|
||||||
|
|
||||||
|
def test_skips_non_video_files(self, tmp_path: Path) -> None:
|
||||||
|
# nfo and srt come alphabetically before .mkv, must not win.
|
||||||
|
(tmp_path / "Movie.nfo").touch()
|
||||||
|
(tmp_path / "Movie.srt").touch()
|
||||||
|
vid = tmp_path / "Movie.mkv"
|
||||||
|
vid.touch()
|
||||||
|
assert find_main_video(tmp_path, _KB) == vid
|
||||||
|
|
||||||
|
def test_ignores_subdirectories(self, tmp_path: Path) -> None:
|
||||||
|
# A Sample/ subdir must NOT be descended into.
|
||||||
|
sample_dir = tmp_path / "Sample"
|
||||||
|
sample_dir.mkdir()
|
||||||
|
(sample_dir / "sample.mkv").touch()
|
||||||
|
main = tmp_path / "Movie.mkv"
|
||||||
|
main.touch()
|
||||||
|
assert find_main_video(tmp_path, _KB) == main
|
||||||
|
|
||||||
|
def test_only_subdirectory_with_video_returns_none(
|
||||||
|
self, tmp_path: Path
|
||||||
|
) -> None:
|
||||||
|
# No top-level video, only one inside a subdir → None.
|
||||||
|
sub = tmp_path / "Sample"
|
||||||
|
sub.mkdir()
|
||||||
|
(sub / "video.mkv").touch()
|
||||||
|
assert find_main_video(tmp_path, _KB) is None
|
||||||
|
|
||||||
|
def test_empty_folder_returns_none(self, tmp_path: Path) -> None:
|
||||||
|
assert find_main_video(tmp_path, _KB) is None
|
||||||
|
|
||||||
|
def test_nonexistent_folder_returns_none(self, tmp_path: Path) -> None:
|
||||||
|
assert find_main_video(tmp_path / "ghost", _KB) is None
|
||||||
|
|
||||||
|
def test_single_file_release_passed_as_folder_arg(
|
||||||
|
self, tmp_path: Path
|
||||||
|
) -> None:
|
||||||
|
# Some releases are a bare .mkv with no enclosing folder.
|
||||||
|
f = tmp_path / "Movie.2020.1080p.mkv"
|
||||||
|
f.touch()
|
||||||
|
assert find_main_video(f, _KB) == f
|
||||||
|
|
||||||
|
def test_single_file_non_video_passed_as_folder_arg(
|
||||||
|
self, tmp_path: Path
|
||||||
|
) -> None:
|
||||||
|
f = tmp_path / "README.nfo"
|
||||||
|
f.touch()
|
||||||
|
assert find_main_video(f, _KB) is None
|
||||||
Reference in New Issue
Block a user