refactor(release): rename ParsePath enum to TokenizationRoute

ParsePath collided with pathlib.Path in mental models, and was one letter from the parse_path attribute that stores its value — confusion on confusion. Road (EASY/SHITTY/PATH_OF_PAIN) is the parser-confidence axis; TokenizationRoute (DIRECT/SANITIZED/AI) is the tokenization-method axis. They're orthogonal and the new name makes that obvious. Field name parse_path stays — it's the right name for the attribute that *holds* the route. String values ("direct", "sanitized", "ai") stay too, so YAML fixtures and the analyze_release tool spec are unchanged. Only the type symbol changes: - value_objects.py: class rename + docstring spelling out orthogonality with Road. - services.py: 3 call sites. - scoring.py: docstring cross-reference updated. - tests/domain/release/test_parser_v2_scoring.py: import + 3 call sites.
2026-05-21 07:39:42 +02:00
parent 0246f85ef8
commit 5e0ed11672
5 changed files with 39 additions and 16 deletions
@@ -57,6 +57,17 @@ callers).

 ### Changed

+- **`ParsePath` enum renamed to `TokenizationRoute`.** The old name
+  collided with `pathlib.Path` in code-reading mental models, and was
+  one letter from `parse_path` (the field that holds the value) — making
+  it harder than it needed to be to spot the type vs the attribute.
+  ``TokenizationRoute`` says what it actually captures (DIRECT /
+  SANITIZED / AI = how the name reached the tokenizer), and the class
+  docstring now spells out the orthogonality with ``Road`` (EASY /
+  SHITTY / PATH_OF_PAIN, which captures parser confidence on
+  ``ParseReport``). The ``parse_path`` field name stays unchanged —
+  string values too — so YAML fixtures, the ``analyze_release`` tool
+  spec, and any external consumer are untouched.
 - **`enrich_from_probe` codec mappings moved to YAML.** The three
  hard-coded module dicts (`_VIDEO_CODEC_MAP`, `_AUDIO_CODEC_MAP`,
  `_CHANNEL_MAP`) translating ffprobe output to scene tokens
@@ -34,7 +34,7 @@ from .tokens import Token, TokenRole
 class Road(str, Enum):
    """How the parser handled a given release name.

-    Distinct from :class:`~alfred.domain.release.value_objects.ParsePath`,
+    Distinct from :class:`~alfred.domain.release.value_objects.TokenizationRoute`,
    which records the tokenization route (DIRECT / SANITIZED / AI). Road
    is about confidence in the *result*, not the *method*.
    """
@@ -21,7 +21,7 @@ from __future__ import annotations
 from .parser import pipeline as _v2
 from .parser import scoring as _scoring
 from .ports import ReleaseKnowledge
-from .value_objects import MediaTypeToken, ParsedRelease, ParsePath, ParseReport
+from .value_objects import MediaTypeToken, ParsedRelease, ParseReport, TokenizationRoute


 def parse_release(
@@ -44,7 +44,7 @@ def parse_release(
    3. Otherwise run the v2 pipeline: tokenize → annotate (EASY when a
       group schema is known, SHITTY otherwise) → assemble → score.
    """
-    parse_path = ParsePath.DIRECT
+    parse_path = TokenizationRoute.DIRECT

    # Apostrophes inside titles ("Don't", "L'avare") are common and should
    # not push the release through the AI fallback. Strip them up front so
@@ -53,11 +53,11 @@ def parse_release(
    working_name = name
    if "'" in working_name:
        working_name = working_name.replace("'", "")
-        parse_path = ParsePath.SANITIZED
+        parse_path = TokenizationRoute.SANITIZED

    clean, site_tag = _v2.strip_site_tag(working_name)
    if site_tag is not None:
-        parse_path = ParsePath.SANITIZED
+        parse_path = TokenizationRoute.SANITIZED

    if not _is_well_formed(clean, kb):
        parsed = ParsedRelease(
@@ -75,7 +75,7 @@ def parse_release(
            group="UNKNOWN",
            media_type=MediaTypeToken.UNKNOWN,
            site_tag=site_tag,
-            parse_path=ParsePath.AI,
+            parse_path=TokenizationRoute.AI,
        )
        report = ParseReport(
            confidence=0,
@@ -40,9 +40,21 @@ class MediaTypeToken(str, Enum):
    UNKNOWN = "unknown"


-class ParsePath(str, Enum):
-    """How a ``ParsedRelease`` was produced. ``str``-backed for the same
-    reasons as :class:`MediaTypeToken`."""
+class TokenizationRoute(str, Enum):
+    """How a ``ParsedRelease`` was produced.
+
+    Records the **tokenization route** — i.e. whether the release name
+    was tokenized as-is (``DIRECT``), after a sanitization pass like
+    site-tag stripping or apostrophe removal (``SANITIZED``), or whether
+    structural parsing failed and an LLM rebuild is needed (``AI``).
+
+    This is **orthogonal** to :class:`~alfred.domain.release.parser.scoring.Road`
+    (EASY / SHITTY / PATH_OF_PAIN), which captures parser confidence and
+    is recorded on :class:`ParseReport`. Both can vary independently —
+    a SANITIZED name can still land on the EASY road if a group schema
+    matches the tokens after stripping.
+
+    ``str``-backed for the same reasons as :class:`MediaTypeToken`."""

    DIRECT = "direct"
    SANITIZED = "sanitized"
@@ -127,7 +139,7 @@ class ParsedRelease:
    site_tag: str | None = (
        None  # site watermark stripped from name, e.g. "TGx", "OxTorrent.vc"
    )
-    parse_path: ParsePath = ParsePath.DIRECT
+    parse_path: TokenizationRoute = TokenizationRoute.DIRECT
    languages: list[str] = field(default_factory=list)  # ["MULTI", "VFF"], ["FRENCH"], …
    audio_codec: str | None = None  # "DTS-HD.MA", "DDP", "EAC3", …
    audio_channels: str | None = None  # "5.1", "7.1", "2.0", …
@@ -168,9 +180,9 @@ class ParsedRelease:
                f"ParsedRelease.media_type must be a MediaTypeToken, "
                f"got {type(self.media_type).__name__}: {self.media_type!r}"
            )
-        if not isinstance(self.parse_path, ParsePath):
+        if not isinstance(self.parse_path, TokenizationRoute):
            raise ValidationError(
-                f"ParsedRelease.parse_path must be a ParsePath, "
+                f"ParsedRelease.parse_path must be a TokenizationRoute, "
                f"got {type(self.parse_path).__name__}: {self.parse_path!r}"
            )

@@ -22,8 +22,8 @@ from alfred.domain.release.services import parse_release
 from alfred.domain.release.value_objects import (
    MediaTypeToken,
    ParsedRelease,
-    ParsePath,
    ParseReport,
+    TokenizationRoute,
 )
 from alfred.domain.shared.exceptions import ValidationError
 from alfred.infrastructure.knowledge.release_kb import YamlReleaseKnowledge
@@ -79,7 +79,7 @@ def _movie(year: int = 2020, **overrides) -> ParsedRelease:
        codec="x264",
        group="GROUP",
        media_type=MediaTypeToken.MOVIE,
-        parse_path=ParsePath.DIRECT,
+        parse_path=TokenizationRoute.DIRECT,
    )
    base.update(overrides)
    return ParsedRelease(**base)
@@ -120,7 +120,7 @@ class TestComputeScore:
            codec="x265",
            group="KONTRAST",
            media_type=MediaTypeToken.TV_SHOW,
-            parse_path=ParsePath.DIRECT,
+            parse_path=TokenizationRoute.DIRECT,
        )
        tokens = [
            Token("Oz", 0, TokenRole.TITLE),
@@ -230,7 +230,7 @@ class TestCollectors:
            codec=None,
            group="UNKNOWN",
            media_type=MediaTypeToken.UNKNOWN,
-            parse_path=ParsePath.DIRECT,
+            parse_path=TokenizationRoute.DIRECT,
        )
        assert set(collect_missing_critical(empty)) == {
            "title",