From 7dc7f0c241577a288800129fb345d70b282b6efd Mon Sep 17 00:00:00 2001
From: Francwa <francois.hodiaumont@gmail.com>
Date: Wed, 20 May 2026 00:26:05 +0200
Subject: [PATCH] feat(release): v2 enricher pass for
 audio/video-meta/edition/language
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The EASY pipeline now extracts the full ParsedRelease surface from
known-group releases, not just the structural backbone. Behavior is
unchanged for releases that don't carry these tokens.

Pipeline (parser/pipeline.py):
- Structural walk (renamed _annotate_structural): no longer requires
  body to be fully consumed. Tokens passed over between schema chunks
  remain UNKNOWN so the enricher pass can claim them.
- _find_chunk(): scans forward in the body for the next token matching
  a given role, skipping already-annotated tokens. Lets optional and
  mandatory chunks both tolerate intercalated enricher tokens.
- _annotate_enrichers(): new non-positional pass. Walks UNKNOWN tokens
  and tags AUDIO_CODEC / AUDIO_CHANNELS / BIT_DEPTH / HDR / EDITION /
  LANGUAGE. Multi-token sequences from kb.audio / kb.video_meta /
  kb.editions are matched first (longest-first ordering preserved from
  the YAML), single tokens after.
- _apply_sequences(): mutates the token list, tagging the first token
  of a matched sequence with extra['sequence']=<canonical value> and
  trailing members with extra['sequence_member']='True' so assemble
  skips them.
- _detect_channel_pairs(): handles the '5.1' / '7.1' case where the
  '.' separator splits the layout into two tokens. Strips a trailing
  '-GROUP' suffix on the second before joining.

Assemble:
- New fields populated: languages (list), audio_codec, audio_channels,
  bit_depth, hdr_format, edition. Each role-handler skips
  sequence_member tokens.
- media_type heuristic extended: edition in {COMPLETE, INTEGRALE,
  COLLECTION} + no season → tv_complete (mirrors legacy).

Tests:
- 4 new TestEnrichers cases covering bit_depth+audio_codec+channels,
  HDR sequence + edition sequence + TrueHD.Atmos + 7.1, multi-language
  with DTS-HD.MA sequence, TV episode with single language.
- All 14 v2 tests + 30 fixture tests still green. Suite: 1011 passed,
  8 skipped.

Refs: project_release_parser_v2_specs (memory)
---
 CHANGELOG.md                                |  13 +
 alfred/domain/release/parser/pipeline.py    | 543 +++++++++++++-------
 tests/domain/release/test_parser_v2_easy.py |  62 +++
 3 files changed, 446 insertions(+), 172 deletions(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 3420c02..4bb9f04 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -43,6 +43,19 @@ callers).
     annotation (movie, TV episode, season pack with optional source),
     and field assembly.
 
+- **Release parser v2 — enricher pass** completes the EASY pipeline.
+  The structural schema walk now tolerates non-positional tokens
+  between chunks (instead of aborting on leftover tokens), and a second
+  pass tags them with audio / video-meta / edition / language roles.
+  Multi-token sequences from `audio.yaml`, `video.yaml`, `editions.yaml`
+  (e.g. `DTS.HD.MA`, `DV.HDR10`, `TrueHD.Atmos`, `DIRECTORS.CUT`) are
+  matched before single tokens. Channel layouts like `5.1` and `7.1`
+  (split into two tokens by the `.` separator) are detected as
+  consecutive pairs. Sequence members carry an `extra["sequence_member"]`
+  marker so `assemble` extracts the canonical value only from the
+  primary token. KONTRAST releases with audio / HDR / edition / language
+  metadata now produce a fully populated `ParsedRelease`.
+
 - **Real-world release fixtures** under `tests/fixtures/releases/{easy,shitty,path_of_pain}/`,
   each documenting an expected `ParsedRelease` plus the future `routing`
   (library / torrents / seed_hardlinks) for the upcoming `organize_media`
diff --git a/alfred/domain/release/parser/pipeline.py b/alfred/domain/release/parser/pipeline.py
index 2b63a25..f2c0812 100644
--- a/alfred/domain/release/parser/pipeline.py
+++ b/alfred/domain/release/parser/pipeline.py
@@ -6,13 +6,21 @@ Three stages:
    a separately-returned site tag (e.g. ``[YTS.MX]``) that is never
    tokenized.
 2. :func:`annotate` — promote each token's :class:`TokenRole` using the
-   injected knowledge base. Group detection is right-to-left; if the
-   group has a registered :class:`GroupSchema` we run :func:`_annotate_easy`
-   (schema-driven, lockstep walk); otherwise we return the tokens with
-   only the group annotated and the caller falls back to SHITTY in
-   :func:`_legacy_assemble` (see :mod:`..services`).
+   injected knowledge base. Two sub-passes:
+
+     a. **Structural** (schema-driven, EASY only). Detects the group at
+        the right end, looks up its :class:`GroupSchema`, then matches
+        the schema's chunk sequence against the token stream. Between
+        two structural chunks, any number of unmatched tokens may
+        remain — they are left UNKNOWN for the enricher pass to handle.
+     b. **Enrichers** (non-positional). Walks UNKNOWN tokens and tags
+        audio / video-meta / edition / language roles. Multi-token
+        sequences (``DTS.HD.MA``, ``DV.HDR10``, ``DIRECTORS.CUT``) are
+        matched first, single tokens after.
+
 3. :func:`assemble` — fold annotated tokens into a
-   :class:`~alfred.domain.release.value_objects.ParsedRelease`.
+   :class:`~alfred.domain.release.value_objects.ParsedRelease`-compatible
+   dict.
 
 The pipeline is **pure**: no I/O, no TMDB, no probe. All knowledge
 arrives through ``kb: ReleaseKnowledge``.
@@ -78,7 +86,7 @@ def tokenize(name: str, kb: ReleaseKnowledge) -> tuple[list[Token], str | None]:
 
 
 # ---------------------------------------------------------------------------
-# Stage 2 — annotate
+# Helpers shared across passes
 # ---------------------------------------------------------------------------
 
 
@@ -138,157 +146,8 @@ def _split_codec_group(text: str, kb: ReleaseKnowledge) -> tuple[str, str] | Non
     return None
 
 
-def _detect_group(tokens: list[Token], kb: ReleaseKnowledge) -> tuple[str, int | None]:
-    """Identify the release group by walking tokens right-to-left.
-
-    Returns ``(group_name, token_index_carrying_group)`` — the index is
-    ``None`` when the group is missing entirely (no trailing ``-`` token
-    in the stream).
-
-    Priority:
-      1. Rightmost token of shape ``codec-GROUP`` (clearest signal).
-      2. Rightmost token containing ``-`` whose head is *not* a known
-         source token (Web-DL etc. shouldn't be confused with a group).
-    """
-    # Priority 1: codec-GROUP
-    for tok in reversed(tokens):
-        split = _split_codec_group(tok.text, kb)
-        if split is not None:
-            _, group = split
-            return (group or "UNKNOWN"), tok.index
-
-    # Priority 2: rightmost dash, excluding known dashed sources
-    for tok in reversed(tokens):
-        if "-" not in tok.text:
-            continue
-        head, _, tail = tok.text.rpartition("-")
-        # Skip dashed-source tokens like "Web-DL"
-        if (
-            head.lower() in kb.sources
-            or tok.text.lower().replace("-", "") in kb.sources
-        ):
-            continue
-        if tail:
-            return tail, tok.index
-
-    return "UNKNOWN", None
-
-
-def _annotate_easy(
-    tokens: list[Token],
-    kb: ReleaseKnowledge,
-    schema: GroupSchema,
-    group_token_index: int,
-) -> list[Token] | None:
-    """Annotate tokens following a known group schema (EASY path).
-
-    Returns the new token list on success, or ``None`` if the schema
-    walk fails — a mandatory chunk that doesn't match aborts EASY and
-    lets the caller fall back to SHITTY without crashing.
-    """
-    result = list(tokens)
-
-    # The codec-GROUP token is special: it carries TWO roles (CODEC +
-    # GROUP). We split it conceptually and tag it as CODEC here; the
-    # group itself is propagated via ``extra["group"]`` so the assemble
-    # step can recover both pieces from one token. When we do this,
-    # ``codec_pre_consumed`` is True so the schema walk knows to skip
-    # the CODEC chunk (it has nothing left to match in the body).
-    group_token = result[group_token_index]
-    cg_split = _split_codec_group(group_token.text, kb)
-    codec_pre_consumed = False
-    if cg_split is not None:
-        codec, group = cg_split
-        result[group_token_index] = group_token.with_role(
-            TokenRole.CODEC, codec=codec, group=group or "UNKNOWN"
-        )
-        codec_pre_consumed = True
-    else:
-        # Group on a non-codec token (e.g. release without codec).
-        head, _, tail = group_token.text.rpartition("-")
-        result[group_token_index] = group_token.with_role(
-            TokenRole.GROUP, group=tail or "UNKNOWN", prefix=head
-        )
-
-    # Walk the schema left-to-right against tokens [0 .. group_token_index].
-    # The codec-GROUP token at `group_token_index` already consumed CODEC
-    # + GROUP, so we walk up to (not including) it.
-    body = result[:group_token_index]
-    chunk_idx = 0
-    tok_idx = 0
-
-    # 1) TITLE — special: consume contiguous UNKNOWN tokens until we hit
-    #    a token whose text matches a non-title role.
-    while chunk_idx < len(schema.chunks) and schema.chunks[chunk_idx].role is TokenRole.TITLE:
-        title_end = _find_title_end(body, kb)
-        # All body tokens up to title_end are title parts.
-        for i in range(tok_idx, title_end):
-            result[i] = body[i].with_role(TokenRole.TITLE)
-        tok_idx = title_end
-        chunk_idx += 1
-
-    # 2) Remaining chunks. CODEC and GROUP that were pre-consumed by the
-    #    codec-GROUP token at the end of the stream are skipped here.
-    for chunk in schema.chunks[chunk_idx:]:
-        if chunk.role is TokenRole.GROUP:
-            # Handled above via the trailing token.
-            continue
-        if chunk.role is TokenRole.CODEC and codec_pre_consumed:
-            # Already attached to the trailing token's extras.
-            continue
-
-        if tok_idx >= len(body):
-            if chunk.optional:
-                continue
-            return None
-
-        tok = body[tok_idx]
-        matched_role = _match_role(tok.text, chunk.role, kb)
-
-        if matched_role is None:
-            if chunk.optional:
-                continue
-            return None
-
-        result[tok_idx] = tok.with_role(matched_role)
-        tok_idx += 1
-
-    # Body must be fully consumed for EASY to succeed. Leftover tokens
-    # would mean we missed a chunk (e.g. extra audio/HDR tokens not in
-    # the schema yet) — fall back to SHITTY rather than silently dropping.
-    if tok_idx < len(body):
-        return None
-
-    return result
-
-
-def _find_title_end(body: list[Token], kb: ReleaseKnowledge) -> int:
-    """Return the exclusive index where the title ends.
-
-    The title is the leftmost run of tokens that don't match any known
-    structural/technical role. Stops at the first token that does.
-    """
-    for i, tok in enumerate(body):
-        if _parse_season_episode(tok.text) is not None:
-            return i
-        if _is_year(tok.text):
-            return i
-        if tok.text.lower() in kb.resolutions:
-            return i
-        if tok.text.lower() in kb.sources:
-            return i
-        if tok.text.lower() in kb.codecs:
-            return i
-    return len(body)
-
-
 def _match_role(text: str, role: TokenRole, kb: ReleaseKnowledge) -> TokenRole | None:
-    """Return ``role`` if ``text`` matches it under ``kb``, else ``None``.
-
-    Used by the schema walk: each chunk requests a specific role, and
-    this checks whether the current token can play it. Optional chunks
-    that don't match are silently skipped.
-    """
+    """Return ``role`` if ``text`` matches it under ``kb``, else ``None``."""
     lower = text.lower()
 
     if role is TokenRole.YEAR:
@@ -313,12 +172,314 @@ def _match_role(text: str, role: TokenRole, kb: ReleaseKnowledge) -> TokenRole |
     return None
 
 
+# ---------------------------------------------------------------------------
+# Stage 2a — group detection
+# ---------------------------------------------------------------------------
+
+
+def _detect_group(tokens: list[Token], kb: ReleaseKnowledge) -> tuple[str, int | None]:
+    """Identify the release group by walking tokens right-to-left.
+
+    Returns ``(group_name, token_index_carrying_group)``. ``index`` is
+    ``None`` when the group is absent (no trailing ``-`` in the stream).
+    """
+    # Priority 1: codec-GROUP shape (clearest signal).
+    for tok in reversed(tokens):
+        split = _split_codec_group(tok.text, kb)
+        if split is not None:
+            _, group = split
+            return (group or "UNKNOWN"), tok.index
+
+    # Priority 2: rightmost dash, excluding dashed sources (Web-DL, etc.).
+    for tok in reversed(tokens):
+        if "-" not in tok.text:
+            continue
+        head, _, tail = tok.text.rpartition("-")
+        if (
+            head.lower() in kb.sources
+            or tok.text.lower().replace("-", "") in kb.sources
+        ):
+            continue
+        if tail:
+            return tail, tok.index
+
+    return "UNKNOWN", None
+
+
+# ---------------------------------------------------------------------------
+# Stage 2b — structural annotation (schema-driven)
+# ---------------------------------------------------------------------------
+
+
+def _annotate_structural(
+    tokens: list[Token],
+    kb: ReleaseKnowledge,
+    schema: GroupSchema,
+    group_token_index: int,
+) -> list[Token] | None:
+    """Annotate structural tokens following a known group schema.
+
+    Walks the schema's chunks against the body (tokens up to the group
+    token). For each chunk, scans forward in the body for a matching
+    token — tokens passed over without match are left UNKNOWN (the
+    enricher pass will handle them).
+
+    Returns ``None`` if any mandatory chunk fails to find a match.
+    """
+    result = list(tokens)
+
+    # The codec-GROUP token carries CODEC + GROUP. Split it now so the
+    # schema walk knows the codec is "pre-consumed" at the end.
+    group_token = result[group_token_index]
+    cg_split = _split_codec_group(group_token.text, kb)
+    codec_pre_consumed = False
+    if cg_split is not None:
+        codec, group = cg_split
+        result[group_token_index] = group_token.with_role(
+            TokenRole.CODEC, codec=codec, group=group or "UNKNOWN"
+        )
+        codec_pre_consumed = True
+    else:
+        head, _, tail = group_token.text.rpartition("-")
+        result[group_token_index] = group_token.with_role(
+            TokenRole.GROUP, group=tail or "UNKNOWN", prefix=head
+        )
+
+    body_end = group_token_index  # exclusive
+    tok_idx = 0
+    chunk_idx = 0
+
+    # 1) TITLE — leftmost contiguous tokens up to the first structural
+    #    boundary. Title is special because it can be multi-token.
+    while (
+        chunk_idx < len(schema.chunks)
+        and schema.chunks[chunk_idx].role is TokenRole.TITLE
+    ):
+        title_end = _find_title_end(result, body_end, kb)
+        for i in range(tok_idx, title_end):
+            result[i] = result[i].with_role(TokenRole.TITLE)
+        tok_idx = title_end
+        chunk_idx += 1
+
+    # 2) Remaining structural chunks. For each, scan forward in the body
+    #    for a matching token; tokens passed over remain UNKNOWN.
+    for chunk in schema.chunks[chunk_idx:]:
+        if chunk.role is TokenRole.GROUP:
+            continue
+        if chunk.role is TokenRole.CODEC and codec_pre_consumed:
+            continue
+
+        match_idx = _find_chunk(result, tok_idx, body_end, chunk.role, kb)
+        if match_idx is None:
+            if chunk.optional:
+                continue
+            return None
+
+        result[match_idx] = result[match_idx].with_role(chunk.role)
+        tok_idx = match_idx + 1
+
+    return result
+
+
+def _find_title_end(
+    tokens: list[Token], body_end: int, kb: ReleaseKnowledge
+) -> int:
+    """Return the exclusive index where the title ends.
+
+    The title is the leftmost run of tokens whose text does not match
+    any structural role (year, season/episode, resolution, source,
+    codec). Enricher tokens (audio, HDR, language) are *not* boundaries
+    because they can appear in the middle of the structural sequence;
+    however, in canonical scene names they don't appear inside the title
+    itself, so this heuristic holds in practice.
+    """
+    for i in range(body_end):
+        text = tokens[i].text
+        if _parse_season_episode(text) is not None:
+            return i
+        if _is_year(text):
+            return i
+        lower = text.lower()
+        if lower in kb.resolutions:
+            return i
+        if lower in kb.sources:
+            return i
+        if lower in kb.codecs:
+            return i
+    return body_end
+
+
+def _find_chunk(
+    tokens: list[Token],
+    start: int,
+    end: int,
+    role: TokenRole,
+    kb: ReleaseKnowledge,
+) -> int | None:
+    """Return the first index in ``[start, end)`` whose token matches ``role``.
+
+    Returns ``None`` if no token in the range matches. Tokens already
+    annotated (non-UNKNOWN) are skipped — they belong to another chunk.
+    """
+    for i in range(start, end):
+        if tokens[i].role is not TokenRole.UNKNOWN:
+            continue
+        if _match_role(tokens[i].text, role, kb) is not None:
+            return i
+    return None
+
+
+# ---------------------------------------------------------------------------
+# Stage 2c — enricher pass (non-positional roles)
+# ---------------------------------------------------------------------------
+
+
+def _annotate_enrichers(tokens: list[Token], kb: ReleaseKnowledge) -> list[Token]:
+    """Tag the remaining UNKNOWN tokens with non-positional roles.
+
+    Multi-token sequences are matched first (so ``DTS.HD.MA`` wins over
+    a single-token ``DTS``). For each sequence match, the first token
+    receives the role + ``extra["sequence"]`` (the canonical joined
+    value), and the trailing members are marked with the same role +
+    ``extra["sequence_member"]=True`` so :func:`assemble` extracts the
+    value only from the primary.
+    """
+    result = list(tokens)
+
+    # Multi-token sequences first.
+    _apply_sequences(
+        result, kb.audio.get("sequences", []), "codec", TokenRole.AUDIO_CODEC
+    )
+    _apply_sequences(
+        result, kb.video_meta.get("sequences", []), "hdr", TokenRole.HDR
+    )
+    _apply_sequences(
+        result, kb.editions.get("sequences", []), "edition", TokenRole.EDITION
+    )
+
+    # Single tokens.
+    known_audio_codecs = {c.upper() for c in kb.audio.get("codecs", [])}
+    known_audio_channels = set(kb.audio.get("channels", []))
+    known_hdr = {h.upper() for h in kb.video_meta.get("hdr", [])} | kb.hdr_extra
+    known_bit_depth = {d.lower() for d in kb.video_meta.get("bit_depth", [])}
+    known_editions = {t.upper() for t in kb.editions.get("tokens", [])}
+
+    # Channel layouts like "5.1" are tokenized as two tokens ("5", "1")
+    # because "." is a separator. Detect consecutive pairs whose joined
+    # value (without any trailing "-GROUP") is in the channel set.
+    _detect_channel_pairs(result, known_audio_channels)
+
+    for i, tok in enumerate(result):
+        if tok.role is not TokenRole.UNKNOWN:
+            continue
+        text = tok.text
+        upper = text.upper()
+        lower = text.lower()
+
+        if upper in known_audio_codecs:
+            result[i] = tok.with_role(TokenRole.AUDIO_CODEC)
+            continue
+        if text in known_audio_channels:
+            result[i] = tok.with_role(TokenRole.AUDIO_CHANNELS)
+            continue
+        if upper in known_hdr:
+            result[i] = tok.with_role(TokenRole.HDR)
+            continue
+        if lower in known_bit_depth:
+            result[i] = tok.with_role(TokenRole.BIT_DEPTH)
+            continue
+        if upper in known_editions:
+            result[i] = tok.with_role(TokenRole.EDITION)
+            continue
+        if upper in kb.language_tokens:
+            result[i] = tok.with_role(TokenRole.LANGUAGE)
+            continue
+
+    return result
+
+
+def _apply_sequences(
+    tokens: list[Token],
+    sequences: list[dict],
+    value_key: str,
+    role: TokenRole,
+) -> None:
+    """Mark the first occurrence of each sequence in place.
+
+    Mutates ``tokens`` (replacing entries with new role-tagged Token
+    instances). Sequences in the YAML must be ordered most-specific
+    first; the first match wins per starting position.
+    """
+    if not sequences:
+        return
+
+    upper_texts = [t.text.upper() for t in tokens]
+    consumed: set[int] = set()
+
+    for seq in sequences:
+        seq_upper = [s.upper() for s in seq["tokens"]]
+        n = len(seq_upper)
+        for start in range(len(tokens) - n + 1):
+            if any(idx in consumed for idx in range(start, start + n)):
+                continue
+            if any(
+                tokens[start + k].role is not TokenRole.UNKNOWN for k in range(n)
+            ):
+                continue
+            if upper_texts[start : start + n] == seq_upper:
+                tokens[start] = tokens[start].with_role(
+                    role, sequence=seq[value_key]
+                )
+                for k in range(1, n):
+                    tokens[start + k] = tokens[start + k].with_role(
+                        role, sequence_member="True"
+                    )
+                consumed.update(range(start, start + n))
+
+
+def _detect_channel_pairs(
+    tokens: list[Token], known_channels: set[str]
+) -> None:
+    """Spot two consecutive numeric tokens that form a channel layout.
+
+    Example: ``["5", "1-KTH"]`` → joined ``"5.1"`` (after stripping the
+    ``-GROUP`` suffix on the second). The second token may be the trailing
+    codec-GROUP token, in which case it's already tagged CODEC and we
+    skip — we'd corrupt its role.
+    """
+    for i in range(len(tokens) - 1):
+        first = tokens[i]
+        second = tokens[i + 1]
+        if first.role is not TokenRole.UNKNOWN:
+            continue
+        # Strip a "-GROUP" suffix on the second token before joining.
+        second_text = second.text.split("-")[0]
+        candidate = f"{first.text}.{second_text}"
+        if candidate not in known_channels:
+            continue
+        # Only tag the first token (carries the channel value). The
+        # second token may legitimately remain UNKNOWN (or be the
+        # codec-GROUP token, already tagged CODEC).
+        tokens[i] = first.with_role(
+            TokenRole.AUDIO_CHANNELS, sequence=candidate
+        )
+        if second.role is TokenRole.UNKNOWN:
+            tokens[i + 1] = second.with_role(
+                TokenRole.AUDIO_CHANNELS, sequence_member="True"
+            )
+
+
+# ---------------------------------------------------------------------------
+# Stage 2 entry point
+# ---------------------------------------------------------------------------
+
+
 def annotate(tokens: list[Token], kb: ReleaseKnowledge) -> list[Token] | None:
     """Annotate token roles. Returns ``None`` when the EASY path fails.
 
     A ``None`` return means: the group is unknown, OR the schema walk
-    aborted on a mandatory mismatch. The caller (``services.parse_release``)
-    falls back to the legacy SHITTY heuristic in that case.
+    aborted on a mandatory mismatch. The caller falls back to the legacy
+    SHITTY heuristic in that case.
     """
     group_name, group_index = _detect_group(tokens, kb)
     if group_index is None:
@@ -328,7 +489,11 @@ def annotate(tokens: list[Token], kb: ReleaseKnowledge) -> list[Token] | None:
     if schema is None:
         return None
 
-    return _annotate_easy(tokens, kb, schema, group_index)
+    structural = _annotate_structural(tokens, kb, schema, group_index)
+    if structural is None:
+        return None
+
+    return _annotate_enrichers(structural, kb)
 
 
 # ---------------------------------------------------------------------------
@@ -345,9 +510,8 @@ def assemble(
     """Fold annotated tokens into a ``ParsedRelease``-compatible dict.
 
     Returns a dict (not a ``ParsedRelease`` instance) so the caller can
-    layer in additional fields (``parse_path``, etc.) before instantiation.
-    The dict's keys mirror the :class:`ParsedRelease` constructor
-    arguments.
+    layer in additional fields (``parse_path``, ``raw``, …) before
+    instantiation.
     """
     title_parts = [t.text for t in annotated if t.role is TokenRole.TITLE]
     title = ".".join(title_parts) if title_parts else (
@@ -362,33 +526,62 @@ def assemble(
     source: str | None = None
     codec: str | None = None
     group = "UNKNOWN"
+    audio_codec: str | None = None
+    audio_channels: str | None = None
+    bit_depth: str | None = None
+    hdr_format: str | None = None
+    edition: str | None = None
+    languages: list[str] = []
 
     for tok in annotated:
-        if tok.role is TokenRole.YEAR:
+        # Skip non-primary members of a multi-token sequence.
+        if tok.extra.get("sequence_member") == "True":
+            continue
+
+        role = tok.role
+        if role is TokenRole.YEAR:
             year = int(tok.text)
-        elif tok.role is TokenRole.SEASON_EPISODE:
+        elif role is TokenRole.SEASON_EPISODE:
             parsed = _parse_season_episode(tok.text)
             if parsed is not None:
                 season, episode, episode_end = parsed
-        elif tok.role is TokenRole.RESOLUTION:
+        elif role is TokenRole.RESOLUTION:
             quality = tok.text
-        elif tok.role is TokenRole.SOURCE:
+        elif role is TokenRole.SOURCE:
             source = tok.text
-        elif tok.role is TokenRole.CODEC:
-            # CODEC token may also carry the group (codec-GROUP shape).
+        elif role is TokenRole.CODEC:
             codec = tok.extra.get("codec", tok.text)
             if "group" in tok.extra:
                 group = tok.extra["group"] or "UNKNOWN"
-        elif tok.role is TokenRole.GROUP:
+        elif role is TokenRole.GROUP:
             group = tok.extra.get("group", tok.text) or "UNKNOWN"
+        elif role is TokenRole.AUDIO_CODEC:
+            if audio_codec is None:
+                audio_codec = tok.extra.get("sequence", tok.text)
+        elif role is TokenRole.AUDIO_CHANNELS:
+            if audio_channels is None:
+                audio_channels = tok.extra.get("sequence", tok.text)
+        elif role is TokenRole.BIT_DEPTH:
+            if bit_depth is None:
+                bit_depth = tok.text.lower()
+        elif role is TokenRole.HDR:
+            if hdr_format is None:
+                hdr_format = tok.extra.get("sequence", tok.text.upper())
+        elif role is TokenRole.EDITION:
+            if edition is None:
+                edition = tok.extra.get("sequence", tok.text.upper())
+        elif role is TokenRole.LANGUAGE:
+            languages.append(tok.text.upper())
 
     tech_parts = [p for p in (quality, source, codec) if p]
     tech_string = ".".join(tech_parts)
 
-    # Media type: TV if a season was parsed, otherwise movie if we have
-    # at least one tech marker, else unknown.
+    # Media type heuristic — same rules as the legacy parser, minus the
+    # documentary/concert/integrale specials (handled by SHITTY for now).
     if season is not None:
         media_type = "tv_show"
+    elif edition in {"COMPLETE", "INTEGRALE", "COLLECTION"}:
+        media_type = "tv_complete"
     elif any((quality, source, codec, year)):
         media_type = "movie"
     else:
@@ -408,4 +601,10 @@ def assemble(
         "tech_string": tech_string,
         "media_type": media_type,
         "site_tag": site_tag,
+        "languages": languages,
+        "audio_codec": audio_codec,
+        "audio_channels": audio_channels,
+        "bit_depth": bit_depth,
+        "hdr_format": hdr_format,
+        "edition": edition,
     }
diff --git a/tests/domain/release/test_parser_v2_easy.py b/tests/domain/release/test_parser_v2_easy.py
index 1fc23bc..2400e0b 100644
--- a/tests/domain/release/test_parser_v2_easy.py
+++ b/tests/domain/release/test_parser_v2_easy.py
@@ -140,3 +140,65 @@ class TestAssemble:
         assert fields["source"] is None  # ELiTE omits it
         assert fields["tech_string"] == "1080p.x265"
         assert fields["group"] == "ELiTE"
+
+
+class TestEnrichers:
+    """Non-positional roles populated alongside the structural walk.
+
+    These releases would have failed the v2 EASY path before the enricher
+    pass landed (leftover unknown tokens would force a fallback). They
+    now succeed in v2 with rich metadata.
+    """
+
+    def test_bit_depth_and_audio(self) -> None:
+        name = "Back.in.Action.2025.1080p.WEBRip.10bit.DDP.5.1.x265-KONTRAST"
+        tokens, tag = tokenize(name, _KB)
+        annotated = annotate(tokens, _KB)
+        assert annotated is not None
+        fields = assemble(annotated, tag, name, _KB)
+
+        assert fields["title"] == "Back.in.Action"
+        assert fields["bit_depth"] == "10bit"
+        assert fields["audio_codec"] == "DDP"
+        assert fields["audio_channels"] == "5.1"
+
+    def test_hdr_sequence(self) -> None:
+        # DV.HDR10 sequence + TrueHD.Atmos sequence + 7.1 channels +
+        # DIRECTORS.CUT edition all in one release.
+        name = (
+            "Some.Movie.2024.DIRECTORS.CUT.2160p.BluRay.DV.HDR10."
+            "TrueHD.Atmos.7.1.x265-KONTRAST"
+        )
+        tokens, tag = tokenize(name, _KB)
+        annotated = annotate(tokens, _KB)
+        assert annotated is not None
+        fields = assemble(annotated, tag, name, _KB)
+
+        assert fields["edition"] == "DIRECTORS.CUT"
+        assert fields["hdr_format"] == "DV.HDR10"
+        assert fields["audio_codec"] == "TrueHD.Atmos"
+        assert fields["audio_channels"] == "7.1"
+
+    def test_multiple_languages(self) -> None:
+        name = "Movie.2020.FRENCH.MULTI.1080p.WEBRip.DTS.HD.MA.5.1.x265-KONTRAST"
+        tokens, tag = tokenize(name, _KB)
+        annotated = annotate(tokens, _KB)
+        assert annotated is not None
+        fields = assemble(annotated, tag, name, _KB)
+
+        assert fields["languages"] == ["FRENCH", "MULTI"]
+        assert fields["audio_codec"] == "DTS-HD.MA"
+        assert fields["audio_channels"] == "5.1"
+
+    def test_tv_with_language(self) -> None:
+        name = "Show.S01E05.FRENCH.1080p.WEBRip.x265-KONTRAST"
+        tokens, tag = tokenize(name, _KB)
+        annotated = annotate(tokens, _KB)
+        assert annotated is not None
+        fields = assemble(annotated, tag, name, _KB)
+
+        assert fields["title"] == "Show"
+        assert fields["season"] == 1
+        assert fields["episode"] == 5
+        assert fields["languages"] == ["FRENCH"]
+        assert fields["media_type"] == "tv_show"