feat(release): wire v2 EASY path for known release groups
The annotate-based v2 pipeline now handles releases ending in -KONTRAST, -ELiTE, or -RARBG. Unknown groups still fall through to the legacy SHITTY heuristic in services.py — nothing changes for them. Pipeline (alfred/domain/release/parser/pipeline.py): - tokenize(): string-ops separator split, strips [site.tag] first. - annotate(): right-to-left group detection (priority to codec-GROUP shape, fallback to any non-source dashed token), GroupSchema lookup via the kb port, then lockstep walk of tokens against schema chunks. Optional chunks skip on mismatch, mandatory mismatches return None so the caller falls back gracefully. CODEC pre-consumed by a codec-GROUP trailing token correctly skips the CODEC chunk in the body walk. - assemble(): folds annotated tokens into a ParsedRelease-compatible dict (title joined by '.', group from the codec-GROUP token's extras). Schema (alfred/domain/release/parser/schema.py): - GroupSchema + SchemaChunk frozen value objects. - TokenRole.GROUP added. Port + adapter: - ReleaseKnowledge.group_schema(name) lookup added (case-insensitive). - YamlReleaseKnowledge loads alfred/knowledge/release/release_groups/ *.yaml at construction time; learned overrides in data/knowledge/release/release_groups/ also picked up. Knowledge: - release_groups/kontrast.yaml, elite.yaml, rarbg.yaml declare the canonical chunk_order. ELiTE marks source as optional (Foundation.S02 has no WEBRip token). Services: - parse_release tries the v2 path first; on None falls through to the legacy implementation untouched. Tests: - tests/domain/release/test_parser_v2_easy.py (10 cases) cover group detection (codec-GROUP, dashed-source skip, no-dash → unknown), schema-driven annotation (movie, TV episode, season pack with optional source, unknown group returns None), and field assembly. - Existing tests/domain/test_release_fixtures.py (30 cases) stay green: 5 EASY fixtures now produced by v2, 25 SHITTY/PATH OF PAIN fixtures still produced by the legacy path. Verified via spy on v2.assemble. Suite: 1007 passed, 8 skipped. Refs: project_release_parser_v2_specs (memory)
This commit is contained in:
@@ -0,0 +1,142 @@
|
||||
"""EASY-path tests for the v2 annotate-based pipeline.
|
||||
|
||||
These tests assert that the **v2 pipeline itself** produces the correct
|
||||
annotated stream and assembled fields for releases from known groups
|
||||
(KONTRAST, ELiTE, …) — without going through ``parse_release``. The
|
||||
fixtures suite (``tests/domain/test_release_fixtures.py``) already
|
||||
locks the user-visible ``ParsedRelease`` contract; here we cover the
|
||||
internal pipeline behavior so a future refactor of ``parse_release``
|
||||
can't quietly drop EASY without us noticing.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from alfred.domain.release.parser import TokenRole
|
||||
from alfred.domain.release.parser.pipeline import (
|
||||
_detect_group,
|
||||
annotate,
|
||||
assemble,
|
||||
tokenize,
|
||||
)
|
||||
from alfred.infrastructure.knowledge.release_kb import YamlReleaseKnowledge
|
||||
|
||||
_KB = YamlReleaseKnowledge()
|
||||
|
||||
|
||||
class TestDetectGroup:
|
||||
def test_codec_group(self) -> None:
|
||||
tokens, _ = tokenize(
|
||||
"Back.in.Action.2025.1080p.WEBRip.x265-KONTRAST", _KB
|
||||
)
|
||||
name, idx = _detect_group(tokens, _KB)
|
||||
assert name == "KONTRAST"
|
||||
assert idx == 6 # x265-KONTRAST is the 7th token
|
||||
|
||||
def test_unknown_when_no_dash(self) -> None:
|
||||
tokens, _ = tokenize("Some.Movie.2020.1080p.WEBRip.x265.KONTRAST", _KB)
|
||||
# No dash anywhere → no group detected.
|
||||
name, idx = _detect_group(tokens, _KB)
|
||||
assert idx is None
|
||||
assert name == "UNKNOWN"
|
||||
|
||||
def test_skips_dashed_source(self) -> None:
|
||||
# "Web-DL" must not be mistaken for a group token.
|
||||
tokens, _ = tokenize("Movie.2020.1080p.Web-DL.x265-GRP", _KB)
|
||||
name, idx = _detect_group(tokens, _KB)
|
||||
assert name == "GRP"
|
||||
|
||||
|
||||
class TestAnnotateEasy:
|
||||
def test_kontrast_movie(self) -> None:
|
||||
tokens, tag = tokenize(
|
||||
"Back.in.Action.2025.1080p.WEBRip.x265-KONTRAST", _KB
|
||||
)
|
||||
annotated = annotate(tokens, _KB)
|
||||
assert annotated is not None, "KONTRAST should hit the EASY path"
|
||||
|
||||
roles = [t.role for t in annotated]
|
||||
assert roles == [
|
||||
TokenRole.TITLE, # Back
|
||||
TokenRole.TITLE, # in
|
||||
TokenRole.TITLE, # Action
|
||||
TokenRole.YEAR,
|
||||
TokenRole.RESOLUTION,
|
||||
TokenRole.SOURCE,
|
||||
TokenRole.CODEC, # x265-KONTRAST → CODEC with extra.group=KONTRAST
|
||||
]
|
||||
assert annotated[-1].extra["group"] == "KONTRAST"
|
||||
assert annotated[-1].extra["codec"] == "x265"
|
||||
|
||||
def test_kontrast_tv_episode(self) -> None:
|
||||
tokens, _ = tokenize(
|
||||
"Slow.Horses.S05E01.1080p.WEBRip.x265-KONTRAST", _KB
|
||||
)
|
||||
annotated = annotate(tokens, _KB)
|
||||
assert annotated is not None
|
||||
|
||||
# Year is optional and absent → skipped. Season_episode present.
|
||||
roles = [t.role for t in annotated]
|
||||
assert TokenRole.SEASON_EPISODE in roles
|
||||
assert TokenRole.YEAR not in roles
|
||||
|
||||
def test_elite_no_source(self) -> None:
|
||||
# ELiTE schema marks source as optional — Foundation.S02 omits it.
|
||||
tokens, _ = tokenize("Foundation.S02.1080p.x265-ELiTE", _KB)
|
||||
annotated = annotate(tokens, _KB)
|
||||
assert annotated is not None, "ELiTE optional source must be tolerated"
|
||||
|
||||
roles = [t.role for t in annotated]
|
||||
assert TokenRole.SOURCE not in roles
|
||||
assert TokenRole.RESOLUTION in roles
|
||||
assert TokenRole.CODEC in roles
|
||||
|
||||
def test_unknown_group_returns_none(self) -> None:
|
||||
tokens, _ = tokenize("Some.Movie.2020.1080p.WEBRip.x264-RANDOM", _KB)
|
||||
# RANDOM is not in our release_groups/ → annotate returns None
|
||||
# and the caller falls back to SHITTY.
|
||||
assert annotate(tokens, _KB) is None
|
||||
|
||||
|
||||
class TestAssemble:
|
||||
def test_kontrast_movie_fields(self) -> None:
|
||||
name = "Back.in.Action.2025.1080p.WEBRip.x265-KONTRAST"
|
||||
tokens, tag = tokenize(name, _KB)
|
||||
annotated = annotate(tokens, _KB)
|
||||
fields = assemble(annotated, tag, name, _KB)
|
||||
|
||||
assert fields["title"] == "Back.in.Action"
|
||||
assert fields["year"] == 2025
|
||||
assert fields["season"] is None
|
||||
assert fields["quality"] == "1080p"
|
||||
assert fields["source"] == "WEBRip"
|
||||
assert fields["codec"] == "x265"
|
||||
assert fields["group"] == "KONTRAST"
|
||||
assert fields["tech_string"] == "1080p.WEBRip.x265"
|
||||
assert fields["media_type"] == "movie"
|
||||
assert fields["site_tag"] is None
|
||||
|
||||
def test_kontrast_tv_fields(self) -> None:
|
||||
name = "Slow.Horses.S05E01.1080p.WEBRip.x265-KONTRAST"
|
||||
tokens, tag = tokenize(name, _KB)
|
||||
annotated = annotate(tokens, _KB)
|
||||
fields = assemble(annotated, tag, name, _KB)
|
||||
|
||||
assert fields["title"] == "Slow.Horses"
|
||||
assert fields["year"] is None
|
||||
assert fields["season"] == 5
|
||||
assert fields["episode"] == 1
|
||||
assert fields["media_type"] == "tv_show"
|
||||
assert fields["group"] == "KONTRAST"
|
||||
|
||||
def test_elite_season_pack(self) -> None:
|
||||
name = "Foundation.S02.1080p.x265-ELiTE"
|
||||
tokens, tag = tokenize(name, _KB)
|
||||
annotated = annotate(tokens, _KB)
|
||||
fields = assemble(annotated, tag, name, _KB)
|
||||
|
||||
assert fields["title"] == "Foundation"
|
||||
assert fields["season"] == 2
|
||||
assert fields["episode"] is None # season pack
|
||||
assert fields["source"] is None # ELiTE omits it
|
||||
assert fields["tech_string"] == "1080p.x265"
|
||||
assert fields["group"] == "ELiTE"
|
||||
Reference in New Issue
Block a user