Files
alfred/tests/domain/release/test_parser_v2_easy.py
T
francwa 7dc7f0c241 feat(release): v2 enricher pass for audio/video-meta/edition/language
The EASY pipeline now extracts the full ParsedRelease surface from
known-group releases, not just the structural backbone. Behavior is
unchanged for releases that don't carry these tokens.

Pipeline (parser/pipeline.py):
- Structural walk (renamed _annotate_structural): no longer requires
  body to be fully consumed. Tokens passed over between schema chunks
  remain UNKNOWN so the enricher pass can claim them.
- _find_chunk(): scans forward in the body for the next token matching
  a given role, skipping already-annotated tokens. Lets optional and
  mandatory chunks both tolerate intercalated enricher tokens.
- _annotate_enrichers(): new non-positional pass. Walks UNKNOWN tokens
  and tags AUDIO_CODEC / AUDIO_CHANNELS / BIT_DEPTH / HDR / EDITION /
  LANGUAGE. Multi-token sequences from kb.audio / kb.video_meta /
  kb.editions are matched first (longest-first ordering preserved from
  the YAML), single tokens after.
- _apply_sequences(): mutates the token list, tagging the first token
  of a matched sequence with extra['sequence']=<canonical value> and
  trailing members with extra['sequence_member']='True' so assemble
  skips them.
- _detect_channel_pairs(): handles the '5.1' / '7.1' case where the
  '.' separator splits the layout into two tokens. Strips a trailing
  '-GROUP' suffix on the second before joining.

Assemble:
- New fields populated: languages (list), audio_codec, audio_channels,
  bit_depth, hdr_format, edition. Each role-handler skips
  sequence_member tokens.
- media_type heuristic extended: edition in {COMPLETE, INTEGRALE,
  COLLECTION} + no season → tv_complete (mirrors legacy).

Tests:
- 4 new TestEnrichers cases covering bit_depth+audio_codec+channels,
  HDR sequence + edition sequence + TrueHD.Atmos + 7.1, multi-language
  with DTS-HD.MA sequence, TV episode with single language.
- All 14 v2 tests + 30 fixture tests still green. Suite: 1011 passed,
  8 skipped.

Refs: project_release_parser_v2_specs (memory)
2026-05-20 00:26:05 +02:00

205 lines
7.7 KiB
Python

"""EASY-path tests for the v2 annotate-based pipeline.
These tests assert that the **v2 pipeline itself** produces the correct
annotated stream and assembled fields for releases from known groups
(KONTRAST, ELiTE, …) — without going through ``parse_release``. The
fixtures suite (``tests/domain/test_release_fixtures.py``) already
locks the user-visible ``ParsedRelease`` contract; here we cover the
internal pipeline behavior so a future refactor of ``parse_release``
can't quietly drop EASY without us noticing.
"""
from __future__ import annotations
from alfred.domain.release.parser import TokenRole
from alfred.domain.release.parser.pipeline import (
_detect_group,
annotate,
assemble,
tokenize,
)
from alfred.infrastructure.knowledge.release_kb import YamlReleaseKnowledge
_KB = YamlReleaseKnowledge()
class TestDetectGroup:
def test_codec_group(self) -> None:
tokens, _ = tokenize(
"Back.in.Action.2025.1080p.WEBRip.x265-KONTRAST", _KB
)
name, idx = _detect_group(tokens, _KB)
assert name == "KONTRAST"
assert idx == 6 # x265-KONTRAST is the 7th token
def test_unknown_when_no_dash(self) -> None:
tokens, _ = tokenize("Some.Movie.2020.1080p.WEBRip.x265.KONTRAST", _KB)
# No dash anywhere → no group detected.
name, idx = _detect_group(tokens, _KB)
assert idx is None
assert name == "UNKNOWN"
def test_skips_dashed_source(self) -> None:
# "Web-DL" must not be mistaken for a group token.
tokens, _ = tokenize("Movie.2020.1080p.Web-DL.x265-GRP", _KB)
name, idx = _detect_group(tokens, _KB)
assert name == "GRP"
class TestAnnotateEasy:
def test_kontrast_movie(self) -> None:
tokens, tag = tokenize(
"Back.in.Action.2025.1080p.WEBRip.x265-KONTRAST", _KB
)
annotated = annotate(tokens, _KB)
assert annotated is not None, "KONTRAST should hit the EASY path"
roles = [t.role for t in annotated]
assert roles == [
TokenRole.TITLE, # Back
TokenRole.TITLE, # in
TokenRole.TITLE, # Action
TokenRole.YEAR,
TokenRole.RESOLUTION,
TokenRole.SOURCE,
TokenRole.CODEC, # x265-KONTRAST → CODEC with extra.group=KONTRAST
]
assert annotated[-1].extra["group"] == "KONTRAST"
assert annotated[-1].extra["codec"] == "x265"
def test_kontrast_tv_episode(self) -> None:
tokens, _ = tokenize(
"Slow.Horses.S05E01.1080p.WEBRip.x265-KONTRAST", _KB
)
annotated = annotate(tokens, _KB)
assert annotated is not None
# Year is optional and absent → skipped. Season_episode present.
roles = [t.role for t in annotated]
assert TokenRole.SEASON_EPISODE in roles
assert TokenRole.YEAR not in roles
def test_elite_no_source(self) -> None:
# ELiTE schema marks source as optional — Foundation.S02 omits it.
tokens, _ = tokenize("Foundation.S02.1080p.x265-ELiTE", _KB)
annotated = annotate(tokens, _KB)
assert annotated is not None, "ELiTE optional source must be tolerated"
roles = [t.role for t in annotated]
assert TokenRole.SOURCE not in roles
assert TokenRole.RESOLUTION in roles
assert TokenRole.CODEC in roles
def test_unknown_group_returns_none(self) -> None:
tokens, _ = tokenize("Some.Movie.2020.1080p.WEBRip.x264-RANDOM", _KB)
# RANDOM is not in our release_groups/ → annotate returns None
# and the caller falls back to SHITTY.
assert annotate(tokens, _KB) is None
class TestAssemble:
def test_kontrast_movie_fields(self) -> None:
name = "Back.in.Action.2025.1080p.WEBRip.x265-KONTRAST"
tokens, tag = tokenize(name, _KB)
annotated = annotate(tokens, _KB)
fields = assemble(annotated, tag, name, _KB)
assert fields["title"] == "Back.in.Action"
assert fields["year"] == 2025
assert fields["season"] is None
assert fields["quality"] == "1080p"
assert fields["source"] == "WEBRip"
assert fields["codec"] == "x265"
assert fields["group"] == "KONTRAST"
assert fields["tech_string"] == "1080p.WEBRip.x265"
assert fields["media_type"] == "movie"
assert fields["site_tag"] is None
def test_kontrast_tv_fields(self) -> None:
name = "Slow.Horses.S05E01.1080p.WEBRip.x265-KONTRAST"
tokens, tag = tokenize(name, _KB)
annotated = annotate(tokens, _KB)
fields = assemble(annotated, tag, name, _KB)
assert fields["title"] == "Slow.Horses"
assert fields["year"] is None
assert fields["season"] == 5
assert fields["episode"] == 1
assert fields["media_type"] == "tv_show"
assert fields["group"] == "KONTRAST"
def test_elite_season_pack(self) -> None:
name = "Foundation.S02.1080p.x265-ELiTE"
tokens, tag = tokenize(name, _KB)
annotated = annotate(tokens, _KB)
fields = assemble(annotated, tag, name, _KB)
assert fields["title"] == "Foundation"
assert fields["season"] == 2
assert fields["episode"] is None # season pack
assert fields["source"] is None # ELiTE omits it
assert fields["tech_string"] == "1080p.x265"
assert fields["group"] == "ELiTE"
class TestEnrichers:
"""Non-positional roles populated alongside the structural walk.
These releases would have failed the v2 EASY path before the enricher
pass landed (leftover unknown tokens would force a fallback). They
now succeed in v2 with rich metadata.
"""
def test_bit_depth_and_audio(self) -> None:
name = "Back.in.Action.2025.1080p.WEBRip.10bit.DDP.5.1.x265-KONTRAST"
tokens, tag = tokenize(name, _KB)
annotated = annotate(tokens, _KB)
assert annotated is not None
fields = assemble(annotated, tag, name, _KB)
assert fields["title"] == "Back.in.Action"
assert fields["bit_depth"] == "10bit"
assert fields["audio_codec"] == "DDP"
assert fields["audio_channels"] == "5.1"
def test_hdr_sequence(self) -> None:
# DV.HDR10 sequence + TrueHD.Atmos sequence + 7.1 channels +
# DIRECTORS.CUT edition all in one release.
name = (
"Some.Movie.2024.DIRECTORS.CUT.2160p.BluRay.DV.HDR10."
"TrueHD.Atmos.7.1.x265-KONTRAST"
)
tokens, tag = tokenize(name, _KB)
annotated = annotate(tokens, _KB)
assert annotated is not None
fields = assemble(annotated, tag, name, _KB)
assert fields["edition"] == "DIRECTORS.CUT"
assert fields["hdr_format"] == "DV.HDR10"
assert fields["audio_codec"] == "TrueHD.Atmos"
assert fields["audio_channels"] == "7.1"
def test_multiple_languages(self) -> None:
name = "Movie.2020.FRENCH.MULTI.1080p.WEBRip.DTS.HD.MA.5.1.x265-KONTRAST"
tokens, tag = tokenize(name, _KB)
annotated = annotate(tokens, _KB)
assert annotated is not None
fields = assemble(annotated, tag, name, _KB)
assert fields["languages"] == ["FRENCH", "MULTI"]
assert fields["audio_codec"] == "DTS-HD.MA"
assert fields["audio_channels"] == "5.1"
def test_tv_with_language(self) -> None:
name = "Show.S01E05.FRENCH.1080p.WEBRip.x265-KONTRAST"
tokens, tag = tokenize(name, _KB)
annotated = annotate(tokens, _KB)
assert annotated is not None
fields = assemble(annotated, tag, name, _KB)
assert fields["title"] == "Show"
assert fields["season"] == 1
assert fields["episode"] == 5
assert fields["languages"] == ["FRENCH"]
assert fields["media_type"] == "tv_show"