3737f66851
Replace the ~480-line legacy heuristic block in services.py with a small dict-driven pass in pipeline._annotate_shitty: each token is looked up against the kb buckets (resolutions / sources / codecs / distributors / year / sxxexx) with first-match-wins semantics, the leftmost contiguous UNKNOWN run becomes the title, done. SHITTY's scope is intentionally narrow — releases that *look* like scene names but don't have a registered group schema. Anything more exotic (parenthesized tech, bare-dashed title fragments, YT slugs, franchise boxes) is PATH OF PAIN territory and stays out of here. - annotate() no longer returns None; SHITTY is the always-on fallback - services.py shrunk from ~525 to ~85 lines (legacy extractors gone) - 4 fixtures get xfail markers documenting PoP-grade pathologies (deutschland franchise box, sleaford YT slug, super_mario bilingual, predator space-separators — the last one moved from shitty/ → pop/) - ReleaseFixture grows xfail_reason; the parametrized suite wires the pytest.mark.xfail(strict=False) automatically
217 lines
8.3 KiB
Python
217 lines
8.3 KiB
Python
"""EASY-path tests for the v2 annotate-based pipeline.
|
|
|
|
These tests assert that the **v2 pipeline itself** produces the correct
|
|
annotated stream and assembled fields for releases from known groups
|
|
(KONTRAST, ELiTE, …) — without going through ``parse_release``. The
|
|
fixtures suite (``tests/domain/test_release_fixtures.py``) already
|
|
locks the user-visible ``ParsedRelease`` contract; here we cover the
|
|
internal pipeline behavior so a future refactor of ``parse_release``
|
|
can't quietly drop EASY without us noticing.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
from alfred.domain.release.parser import TokenRole
|
|
from alfred.domain.release.parser.pipeline import (
|
|
_detect_group,
|
|
annotate,
|
|
assemble,
|
|
tokenize,
|
|
)
|
|
from alfred.infrastructure.knowledge.release_kb import YamlReleaseKnowledge
|
|
|
|
_KB = YamlReleaseKnowledge()
|
|
|
|
|
|
class TestDetectGroup:
|
|
def test_codec_group(self) -> None:
|
|
tokens, _ = tokenize(
|
|
"Back.in.Action.2025.1080p.WEBRip.x265-KONTRAST", _KB
|
|
)
|
|
name, idx = _detect_group(tokens, _KB)
|
|
assert name == "KONTRAST"
|
|
assert idx == 6 # x265-KONTRAST is the 7th token
|
|
|
|
def test_unknown_when_no_dash(self) -> None:
|
|
tokens, _ = tokenize("Some.Movie.2020.1080p.WEBRip.x265.KONTRAST", _KB)
|
|
# No dash anywhere → no group detected.
|
|
name, idx = _detect_group(tokens, _KB)
|
|
assert idx is None
|
|
assert name == "UNKNOWN"
|
|
|
|
def test_skips_dashed_source(self) -> None:
|
|
# "Web-DL" must not be mistaken for a group token.
|
|
tokens, _ = tokenize("Movie.2020.1080p.Web-DL.x265-GRP", _KB)
|
|
name, idx = _detect_group(tokens, _KB)
|
|
assert name == "GRP"
|
|
|
|
|
|
class TestAnnotateEasy:
|
|
def test_kontrast_movie(self) -> None:
|
|
tokens, tag = tokenize(
|
|
"Back.in.Action.2025.1080p.WEBRip.x265-KONTRAST", _KB
|
|
)
|
|
annotated = annotate(tokens, _KB)
|
|
assert annotated is not None, "KONTRAST should hit the EASY path"
|
|
|
|
roles = [t.role for t in annotated]
|
|
assert roles == [
|
|
TokenRole.TITLE, # Back
|
|
TokenRole.TITLE, # in
|
|
TokenRole.TITLE, # Action
|
|
TokenRole.YEAR,
|
|
TokenRole.RESOLUTION,
|
|
TokenRole.SOURCE,
|
|
TokenRole.CODEC, # x265-KONTRAST → CODEC with extra.group=KONTRAST
|
|
]
|
|
assert annotated[-1].extra["group"] == "KONTRAST"
|
|
assert annotated[-1].extra["codec"] == "x265"
|
|
|
|
def test_kontrast_tv_episode(self) -> None:
|
|
tokens, _ = tokenize(
|
|
"Slow.Horses.S05E01.1080p.WEBRip.x265-KONTRAST", _KB
|
|
)
|
|
annotated = annotate(tokens, _KB)
|
|
assert annotated is not None
|
|
|
|
# Year is optional and absent → skipped. Season_episode present.
|
|
roles = [t.role for t in annotated]
|
|
assert TokenRole.SEASON_EPISODE in roles
|
|
assert TokenRole.YEAR not in roles
|
|
|
|
def test_elite_no_source(self) -> None:
|
|
# ELiTE schema marks source as optional — Foundation.S02 omits it.
|
|
tokens, _ = tokenize("Foundation.S02.1080p.x265-ELiTE", _KB)
|
|
annotated = annotate(tokens, _KB)
|
|
assert annotated is not None, "ELiTE optional source must be tolerated"
|
|
|
|
roles = [t.role for t in annotated]
|
|
assert TokenRole.SOURCE not in roles
|
|
assert TokenRole.RESOLUTION in roles
|
|
assert TokenRole.CODEC in roles
|
|
|
|
def test_unknown_group_falls_to_shitty(self) -> None:
|
|
tokens, _ = tokenize("Some.Movie.2020.1080p.WEBRip.x264-RANDOM", _KB)
|
|
# RANDOM is not in our release_groups/ — annotate() now falls
|
|
# through to the in-pipeline SHITTY pass and returns a populated
|
|
# token list (no None sentinel anymore).
|
|
annotated = annotate(tokens, _KB)
|
|
assert annotated is not None
|
|
roles = [t.role for t in annotated]
|
|
# Title is "Some.Movie", then YEAR, RESOLUTION, SOURCE, CODEC
|
|
# carrying the group in extra.
|
|
assert TokenRole.TITLE in roles
|
|
assert TokenRole.YEAR in roles
|
|
assert TokenRole.RESOLUTION in roles
|
|
assert TokenRole.SOURCE in roles
|
|
assert TokenRole.CODEC in roles
|
|
codec_tok = next(t for t in annotated if t.role is TokenRole.CODEC)
|
|
assert codec_tok.extra.get("group") == "RANDOM"
|
|
|
|
|
|
class TestAssemble:
|
|
def test_kontrast_movie_fields(self) -> None:
|
|
name = "Back.in.Action.2025.1080p.WEBRip.x265-KONTRAST"
|
|
tokens, tag = tokenize(name, _KB)
|
|
annotated = annotate(tokens, _KB)
|
|
fields = assemble(annotated, tag, name, _KB)
|
|
|
|
assert fields["title"] == "Back.in.Action"
|
|
assert fields["year"] == 2025
|
|
assert fields["season"] is None
|
|
assert fields["quality"] == "1080p"
|
|
assert fields["source"] == "WEBRip"
|
|
assert fields["codec"] == "x265"
|
|
assert fields["group"] == "KONTRAST"
|
|
assert fields["tech_string"] == "1080p.WEBRip.x265"
|
|
assert fields["media_type"] == "movie"
|
|
assert fields["site_tag"] is None
|
|
|
|
def test_kontrast_tv_fields(self) -> None:
|
|
name = "Slow.Horses.S05E01.1080p.WEBRip.x265-KONTRAST"
|
|
tokens, tag = tokenize(name, _KB)
|
|
annotated = annotate(tokens, _KB)
|
|
fields = assemble(annotated, tag, name, _KB)
|
|
|
|
assert fields["title"] == "Slow.Horses"
|
|
assert fields["year"] is None
|
|
assert fields["season"] == 5
|
|
assert fields["episode"] == 1
|
|
assert fields["media_type"] == "tv_show"
|
|
assert fields["group"] == "KONTRAST"
|
|
|
|
def test_elite_season_pack(self) -> None:
|
|
name = "Foundation.S02.1080p.x265-ELiTE"
|
|
tokens, tag = tokenize(name, _KB)
|
|
annotated = annotate(tokens, _KB)
|
|
fields = assemble(annotated, tag, name, _KB)
|
|
|
|
assert fields["title"] == "Foundation"
|
|
assert fields["season"] == 2
|
|
assert fields["episode"] is None # season pack
|
|
assert fields["source"] is None # ELiTE omits it
|
|
assert fields["tech_string"] == "1080p.x265"
|
|
assert fields["group"] == "ELiTE"
|
|
|
|
|
|
class TestEnrichers:
|
|
"""Non-positional roles populated alongside the structural walk.
|
|
|
|
These releases would have failed the v2 EASY path before the enricher
|
|
pass landed (leftover unknown tokens would force a fallback). They
|
|
now succeed in v2 with rich metadata.
|
|
"""
|
|
|
|
def test_bit_depth_and_audio(self) -> None:
|
|
name = "Back.in.Action.2025.1080p.WEBRip.10bit.DDP.5.1.x265-KONTRAST"
|
|
tokens, tag = tokenize(name, _KB)
|
|
annotated = annotate(tokens, _KB)
|
|
assert annotated is not None
|
|
fields = assemble(annotated, tag, name, _KB)
|
|
|
|
assert fields["title"] == "Back.in.Action"
|
|
assert fields["bit_depth"] == "10bit"
|
|
assert fields["audio_codec"] == "DDP"
|
|
assert fields["audio_channels"] == "5.1"
|
|
|
|
def test_hdr_sequence(self) -> None:
|
|
# DV.HDR10 sequence + TrueHD.Atmos sequence + 7.1 channels +
|
|
# DIRECTORS.CUT edition all in one release.
|
|
name = (
|
|
"Some.Movie.2024.DIRECTORS.CUT.2160p.BluRay.DV.HDR10."
|
|
"TrueHD.Atmos.7.1.x265-KONTRAST"
|
|
)
|
|
tokens, tag = tokenize(name, _KB)
|
|
annotated = annotate(tokens, _KB)
|
|
assert annotated is not None
|
|
fields = assemble(annotated, tag, name, _KB)
|
|
|
|
assert fields["edition"] == "DIRECTORS.CUT"
|
|
assert fields["hdr_format"] == "DV.HDR10"
|
|
assert fields["audio_codec"] == "TrueHD.Atmos"
|
|
assert fields["audio_channels"] == "7.1"
|
|
|
|
def test_multiple_languages(self) -> None:
|
|
name = "Movie.2020.FRENCH.MULTI.1080p.WEBRip.DTS.HD.MA.5.1.x265-KONTRAST"
|
|
tokens, tag = tokenize(name, _KB)
|
|
annotated = annotate(tokens, _KB)
|
|
assert annotated is not None
|
|
fields = assemble(annotated, tag, name, _KB)
|
|
|
|
assert fields["languages"] == ["FRENCH", "MULTI"]
|
|
assert fields["audio_codec"] == "DTS-HD.MA"
|
|
assert fields["audio_channels"] == "5.1"
|
|
|
|
def test_tv_with_language(self) -> None:
|
|
name = "Show.S01E05.FRENCH.1080p.WEBRip.x265-KONTRAST"
|
|
tokens, tag = tokenize(name, _KB)
|
|
annotated = annotate(tokens, _KB)
|
|
assert annotated is not None
|
|
fields = assemble(annotated, tag, name, _KB)
|
|
|
|
assert fields["title"] == "Show"
|
|
assert fields["season"] == 1
|
|
assert fields["episode"] == 5
|
|
assert fields["languages"] == ["FRENCH"]
|
|
assert fields["media_type"] == "tv_show"
|