feat(release): wire v2 EASY path for known release groups

The annotate-based v2 pipeline now handles releases ending in -KONTRAST,
-ELiTE, or -RARBG. Unknown groups still fall through to the legacy
SHITTY heuristic in services.py — nothing changes for them.

Pipeline (alfred/domain/release/parser/pipeline.py):
- tokenize(): string-ops separator split, strips [site.tag] first.
- annotate(): right-to-left group detection (priority to codec-GROUP
  shape, fallback to any non-source dashed token), GroupSchema lookup
  via the kb port, then lockstep walk of tokens against schema chunks.
  Optional chunks skip on mismatch, mandatory mismatches return None so
  the caller falls back gracefully. CODEC pre-consumed by a codec-GROUP
  trailing token correctly skips the CODEC chunk in the body walk.
- assemble(): folds annotated tokens into a ParsedRelease-compatible
  dict (title joined by '.', group from the codec-GROUP token's extras).

Schema (alfred/domain/release/parser/schema.py):
- GroupSchema + SchemaChunk frozen value objects.
- TokenRole.GROUP added.

Port + adapter:
- ReleaseKnowledge.group_schema(name) lookup added (case-insensitive).
- YamlReleaseKnowledge loads alfred/knowledge/release/release_groups/
  *.yaml at construction time; learned overrides in
  data/knowledge/release/release_groups/ also picked up.

Knowledge:
- release_groups/kontrast.yaml, elite.yaml, rarbg.yaml declare the
  canonical chunk_order. ELiTE marks source as optional (Foundation.S02
  has no WEBRip token).

Services:
- parse_release tries the v2 path first; on None falls through to the
  legacy implementation untouched.

Tests:
- tests/domain/release/test_parser_v2_easy.py (10 cases) cover group
  detection (codec-GROUP, dashed-source skip, no-dash → unknown),
  schema-driven annotation (movie, TV episode, season pack with
  optional source, unknown group returns None), and field assembly.
- Existing tests/domain/test_release_fixtures.py (30 cases) stay green:
  5 EASY fixtures now produced by v2, 25 SHITTY/PATH OF PAIN fixtures
  still produced by the legacy path. Verified via spy on v2.assemble.

Suite: 1007 passed, 8 skipped.

Refs: project_release_parser_v2_specs (memory)
This commit is contained in:
2026-05-20 00:21:11 +02:00
parent a2c917618f
commit 075a827b0e
12 changed files with 730 additions and 70 deletions
+142
View File
@@ -0,0 +1,142 @@
"""EASY-path tests for the v2 annotate-based pipeline.
These tests assert that the **v2 pipeline itself** produces the correct
annotated stream and assembled fields for releases from known groups
(KONTRAST, ELiTE, …) — without going through ``parse_release``. The
fixtures suite (``tests/domain/test_release_fixtures.py``) already
locks the user-visible ``ParsedRelease`` contract; here we cover the
internal pipeline behavior so a future refactor of ``parse_release``
can't quietly drop EASY without us noticing.
"""
from __future__ import annotations
from alfred.domain.release.parser import TokenRole
from alfred.domain.release.parser.pipeline import (
_detect_group,
annotate,
assemble,
tokenize,
)
from alfred.infrastructure.knowledge.release_kb import YamlReleaseKnowledge
_KB = YamlReleaseKnowledge()
class TestDetectGroup:
def test_codec_group(self) -> None:
tokens, _ = tokenize(
"Back.in.Action.2025.1080p.WEBRip.x265-KONTRAST", _KB
)
name, idx = _detect_group(tokens, _KB)
assert name == "KONTRAST"
assert idx == 6 # x265-KONTRAST is the 7th token
def test_unknown_when_no_dash(self) -> None:
tokens, _ = tokenize("Some.Movie.2020.1080p.WEBRip.x265.KONTRAST", _KB)
# No dash anywhere → no group detected.
name, idx = _detect_group(tokens, _KB)
assert idx is None
assert name == "UNKNOWN"
def test_skips_dashed_source(self) -> None:
# "Web-DL" must not be mistaken for a group token.
tokens, _ = tokenize("Movie.2020.1080p.Web-DL.x265-GRP", _KB)
name, idx = _detect_group(tokens, _KB)
assert name == "GRP"
class TestAnnotateEasy:
def test_kontrast_movie(self) -> None:
tokens, tag = tokenize(
"Back.in.Action.2025.1080p.WEBRip.x265-KONTRAST", _KB
)
annotated = annotate(tokens, _KB)
assert annotated is not None, "KONTRAST should hit the EASY path"
roles = [t.role for t in annotated]
assert roles == [
TokenRole.TITLE, # Back
TokenRole.TITLE, # in
TokenRole.TITLE, # Action
TokenRole.YEAR,
TokenRole.RESOLUTION,
TokenRole.SOURCE,
TokenRole.CODEC, # x265-KONTRAST → CODEC with extra.group=KONTRAST
]
assert annotated[-1].extra["group"] == "KONTRAST"
assert annotated[-1].extra["codec"] == "x265"
def test_kontrast_tv_episode(self) -> None:
tokens, _ = tokenize(
"Slow.Horses.S05E01.1080p.WEBRip.x265-KONTRAST", _KB
)
annotated = annotate(tokens, _KB)
assert annotated is not None
# Year is optional and absent → skipped. Season_episode present.
roles = [t.role for t in annotated]
assert TokenRole.SEASON_EPISODE in roles
assert TokenRole.YEAR not in roles
def test_elite_no_source(self) -> None:
# ELiTE schema marks source as optional — Foundation.S02 omits it.
tokens, _ = tokenize("Foundation.S02.1080p.x265-ELiTE", _KB)
annotated = annotate(tokens, _KB)
assert annotated is not None, "ELiTE optional source must be tolerated"
roles = [t.role for t in annotated]
assert TokenRole.SOURCE not in roles
assert TokenRole.RESOLUTION in roles
assert TokenRole.CODEC in roles
def test_unknown_group_returns_none(self) -> None:
tokens, _ = tokenize("Some.Movie.2020.1080p.WEBRip.x264-RANDOM", _KB)
# RANDOM is not in our release_groups/ → annotate returns None
# and the caller falls back to SHITTY.
assert annotate(tokens, _KB) is None
class TestAssemble:
def test_kontrast_movie_fields(self) -> None:
name = "Back.in.Action.2025.1080p.WEBRip.x265-KONTRAST"
tokens, tag = tokenize(name, _KB)
annotated = annotate(tokens, _KB)
fields = assemble(annotated, tag, name, _KB)
assert fields["title"] == "Back.in.Action"
assert fields["year"] == 2025
assert fields["season"] is None
assert fields["quality"] == "1080p"
assert fields["source"] == "WEBRip"
assert fields["codec"] == "x265"
assert fields["group"] == "KONTRAST"
assert fields["tech_string"] == "1080p.WEBRip.x265"
assert fields["media_type"] == "movie"
assert fields["site_tag"] is None
def test_kontrast_tv_fields(self) -> None:
name = "Slow.Horses.S05E01.1080p.WEBRip.x265-KONTRAST"
tokens, tag = tokenize(name, _KB)
annotated = annotate(tokens, _KB)
fields = assemble(annotated, tag, name, _KB)
assert fields["title"] == "Slow.Horses"
assert fields["year"] is None
assert fields["season"] == 5
assert fields["episode"] == 1
assert fields["media_type"] == "tv_show"
assert fields["group"] == "KONTRAST"
def test_elite_season_pack(self) -> None:
name = "Foundation.S02.1080p.x265-ELiTE"
tokens, tag = tokenize(name, _KB)
annotated = annotate(tokens, _KB)
fields = assemble(annotated, tag, name, _KB)
assert fields["title"] == "Foundation"
assert fields["season"] == 2
assert fields["episode"] is None # season pack
assert fields["source"] is None # ELiTE omits it
assert fields["tech_string"] == "1080p.x265"
assert fields["group"] == "ELiTE"