refactor(release): rename ParsePath enum to TokenizationRoute

ParsePath collided with pathlib.Path in mental models, and was one
letter from the parse_path attribute that stores its value — confusion
on confusion. Road (EASY/SHITTY/PATH_OF_PAIN) is the parser-confidence
axis; TokenizationRoute (DIRECT/SANITIZED/AI) is the tokenization-method
axis. They're orthogonal and the new name makes that obvious.

Field name parse_path stays — it's the right name for the attribute
that *holds* the route. String values ("direct", "sanitized", "ai")
stay too, so YAML fixtures and the analyze_release tool spec are
unchanged. Only the type symbol changes:

- value_objects.py: class rename + docstring spelling out orthogonality
  with Road.
- services.py: 3 call sites.
- scoring.py: docstring cross-reference updated.
- tests/domain/release/test_parser_v2_scoring.py: import + 3 call sites.
This commit is contained in:
2026-05-21 07:39:42 +02:00
parent 0246f85ef8
commit 5e0ed11672
5 changed files with 39 additions and 16 deletions
+11
View File
@@ -57,6 +57,17 @@ callers).
### Changed
- **`ParsePath` enum renamed to `TokenizationRoute`.** The old name
collided with `pathlib.Path` in code-reading mental models, and was
one letter from `parse_path` (the field that holds the value) — making
it harder than it needed to be to spot the type vs the attribute.
``TokenizationRoute`` says what it actually captures (DIRECT /
SANITIZED / AI = how the name reached the tokenizer), and the class
docstring now spells out the orthogonality with ``Road`` (EASY /
SHITTY / PATH_OF_PAIN, which captures parser confidence on
``ParseReport``). The ``parse_path`` field name stays unchanged —
string values too — so YAML fixtures, the ``analyze_release`` tool
spec, and any external consumer are untouched.
- **`enrich_from_probe` codec mappings moved to YAML.** The three
hard-coded module dicts (`_VIDEO_CODEC_MAP`, `_AUDIO_CODEC_MAP`,
`_CHANNEL_MAP`) translating ffprobe output to scene tokens
+1 -1
View File
@@ -34,7 +34,7 @@ from .tokens import Token, TokenRole
class Road(str, Enum):
"""How the parser handled a given release name.
Distinct from :class:`~alfred.domain.release.value_objects.ParsePath`,
Distinct from :class:`~alfred.domain.release.value_objects.TokenizationRoute`,
which records the tokenization route (DIRECT / SANITIZED / AI). Road
is about confidence in the *result*, not the *method*.
"""
+5 -5
View File
@@ -21,7 +21,7 @@ from __future__ import annotations
from .parser import pipeline as _v2
from .parser import scoring as _scoring
from .ports import ReleaseKnowledge
from .value_objects import MediaTypeToken, ParsedRelease, ParsePath, ParseReport
from .value_objects import MediaTypeToken, ParsedRelease, ParseReport, TokenizationRoute
def parse_release(
@@ -44,7 +44,7 @@ def parse_release(
3. Otherwise run the v2 pipeline: tokenize → annotate (EASY when a
group schema is known, SHITTY otherwise) → assemble → score.
"""
parse_path = ParsePath.DIRECT
parse_path = TokenizationRoute.DIRECT
# Apostrophes inside titles ("Don't", "L'avare") are common and should
# not push the release through the AI fallback. Strip them up front so
@@ -53,11 +53,11 @@ def parse_release(
working_name = name
if "'" in working_name:
working_name = working_name.replace("'", "")
parse_path = ParsePath.SANITIZED
parse_path = TokenizationRoute.SANITIZED
clean, site_tag = _v2.strip_site_tag(working_name)
if site_tag is not None:
parse_path = ParsePath.SANITIZED
parse_path = TokenizationRoute.SANITIZED
if not _is_well_formed(clean, kb):
parsed = ParsedRelease(
@@ -75,7 +75,7 @@ def parse_release(
group="UNKNOWN",
media_type=MediaTypeToken.UNKNOWN,
site_tag=site_tag,
parse_path=ParsePath.AI,
parse_path=TokenizationRoute.AI,
)
report = ParseReport(
confidence=0,
+18 -6
View File
@@ -40,9 +40,21 @@ class MediaTypeToken(str, Enum):
UNKNOWN = "unknown"
class ParsePath(str, Enum):
"""How a ``ParsedRelease`` was produced. ``str``-backed for the same
reasons as :class:`MediaTypeToken`."""
class TokenizationRoute(str, Enum):
"""How a ``ParsedRelease`` was produced.
Records the **tokenization route** — i.e. whether the release name
was tokenized as-is (``DIRECT``), after a sanitization pass like
site-tag stripping or apostrophe removal (``SANITIZED``), or whether
structural parsing failed and an LLM rebuild is needed (``AI``).
This is **orthogonal** to :class:`~alfred.domain.release.parser.scoring.Road`
(EASY / SHITTY / PATH_OF_PAIN), which captures parser confidence and
is recorded on :class:`ParseReport`. Both can vary independently —
a SANITIZED name can still land on the EASY road if a group schema
matches the tokens after stripping.
``str``-backed for the same reasons as :class:`MediaTypeToken`."""
DIRECT = "direct"
SANITIZED = "sanitized"
@@ -127,7 +139,7 @@ class ParsedRelease:
site_tag: str | None = (
None # site watermark stripped from name, e.g. "TGx", "OxTorrent.vc"
)
parse_path: ParsePath = ParsePath.DIRECT
parse_path: TokenizationRoute = TokenizationRoute.DIRECT
languages: list[str] = field(default_factory=list) # ["MULTI", "VFF"], ["FRENCH"], …
audio_codec: str | None = None # "DTS-HD.MA", "DDP", "EAC3", …
audio_channels: str | None = None # "5.1", "7.1", "2.0", …
@@ -168,9 +180,9 @@ class ParsedRelease:
f"ParsedRelease.media_type must be a MediaTypeToken, "
f"got {type(self.media_type).__name__}: {self.media_type!r}"
)
if not isinstance(self.parse_path, ParsePath):
if not isinstance(self.parse_path, TokenizationRoute):
raise ValidationError(
f"ParsedRelease.parse_path must be a ParsePath, "
f"ParsedRelease.parse_path must be a TokenizationRoute, "
f"got {type(self.parse_path).__name__}: {self.parse_path!r}"
)
@@ -22,8 +22,8 @@ from alfred.domain.release.services import parse_release
from alfred.domain.release.value_objects import (
MediaTypeToken,
ParsedRelease,
ParsePath,
ParseReport,
TokenizationRoute,
)
from alfred.domain.shared.exceptions import ValidationError
from alfred.infrastructure.knowledge.release_kb import YamlReleaseKnowledge
@@ -79,7 +79,7 @@ def _movie(year: int = 2020, **overrides) -> ParsedRelease:
codec="x264",
group="GROUP",
media_type=MediaTypeToken.MOVIE,
parse_path=ParsePath.DIRECT,
parse_path=TokenizationRoute.DIRECT,
)
base.update(overrides)
return ParsedRelease(**base)
@@ -120,7 +120,7 @@ class TestComputeScore:
codec="x265",
group="KONTRAST",
media_type=MediaTypeToken.TV_SHOW,
parse_path=ParsePath.DIRECT,
parse_path=TokenizationRoute.DIRECT,
)
tokens = [
Token("Oz", 0, TokenRole.TITLE),
@@ -230,7 +230,7 @@ class TestCollectors:
codec=None,
group="UNKNOWN",
media_type=MediaTypeToken.UNKNOWN,
parse_path=ParsePath.DIRECT,
parse_path=TokenizationRoute.DIRECT,
)
assert set(collect_missing_critical(empty)) == {
"title",