feat(release): foundations for parse-confidence scoring

Add the building blocks for Phase A scoring without yet wiring them into parse_release. Nothing changes at runtime — parse_release still returns a single ParsedRelease — but the pieces needed to upgrade it in a follow-up commit are now in place. - alfred/knowledge/release/scoring.yaml: weights / penalties / thresholds. Title and media_type are heavy (30 / 20), structural fields medium (year 15, season 10), tech fields light (5 each). Unknown-token penalty 5 capped at -30. SHITTY/PoP cutoff at 60. - load_scoring() loader with safe defaults baked in: a missing or partial YAML only de-tunes, never breaks. - ReleaseKnowledge port grows a 'scoring: dict' field. YamlReleaseKnowledge populates it from load_scoring(). - New parser/scoring.py module with Road enum (EASY / SHITTY / PATH_OF_PAIN, distinct from ParsePath which records the tokenization route), and pure functions: compute_score, decide_road, collect_unknown_tokens, collect_missing_critical. - ParseReport frozen VO in value_objects.py — exported alongside ParsedRelease.
2026-05-20 01:21:17 +02:00
parent fcd80763e2
commit 98c688f29b
7 changed files with 264 additions and 2 deletions
@@ -1,6 +1,6 @@
 """Release domain — release name parsing and naming conventions."""
 from .services import parse_release
-from .value_objects import ParsedRelease
+from .value_objects import ParsedRelease, ParseReport
-__all__ = ["ParsedRelease", "parse_release"]
+__all__ = ["ParsedRelease", "ParseReport", "parse_release"]
@@ -0,0 +1,139 @@
 """Parse-confidence scoring.
 ``parse_release`` returns a :class:`ParseReport` alongside its
 :class:`ParsedRelease`. The report carries:
 - ``confidence``: integer 0–100 derived from which structural and
  technical fields got populated, minus a penalty per UNKNOWN token
  left in the annotated stream.
 - ``road``: which of the three roads the parse took
  (:class:`Road.EASY` / :class:`Road.SHITTY` / :class:`Road.PATH_OF_PAIN`).
 - ``unknown_tokens``: textual residue, useful for diagnostics.
 - ``missing_critical``: structural fields the score-tally found absent
  (e.g. ``("year", "media_type")``) — the caller can use this to drive
  PoP recovery (questions, LLM call).
 All weights, penalties and thresholds come from the injected knowledge
 base (``kb.scoring``), itself loaded from
 ``alfred/knowledge/release/scoring.yaml``. No magic numbers here.
 The scoring functions are pure — they consume the annotated token list
 and the resulting :class:`ParsedRelease` and return the report. They are
 called by ``services.parse_release`` after ``assemble`` has run.
 """
 from __future__ import annotations
 from enum import Enum
 from ..ports.knowledge import ReleaseKnowledge
 from ..value_objects import ParsedRelease
 from .tokens import Token, TokenRole
 class Road(str, Enum):
    """How the parser handled a given release name.
    Distinct from :class:`~alfred.domain.release.value_objects.ParsePath`,
    which records the tokenization route (DIRECT / SANITIZED / AI). Road
    is about confidence in the *result*, not the *method*.
    """
    EASY = "easy"  # group schema matched — structural annotation
    SHITTY = "shitty"  # no schema, dict-driven annotation, score ≥ threshold
    PATH_OF_PAIN = "path_of_pain"  # score below threshold, needs help
 # Critical structural fields — their absence drives the
 # ``missing_critical`` list in the report.
 _CRITICAL_FIELDS: tuple[str, ...] = ("title", "media_type", "year")
 def _is_tv_shaped(parsed: ParsedRelease) -> bool:
    """Season/episode weights only count for releases that *look* like TV."""
    return parsed.season is not None
 def compute_score(
    parsed: ParsedRelease,
    annotated: list[Token],
    kb: ReleaseKnowledge,
 ) -> int:
    """Compute a 0–100 confidence score for the parse.
    Each populated field contributes its weight from
    ``kb.scoring["weights"]``. Season/episode only count when the parse
    looks like TV. ``group == "UNKNOWN"`` is treated as absent.
    Then a penalty is subtracted per residual UNKNOWN token in
    ``annotated``, capped at ``penalties["max_unknown_penalty"]``.
    Result is clamped to ``[0, 100]``.
    """
    weights = kb.scoring["weights"]
    penalties = kb.scoring["penalties"]
    score = 0
    if parsed.title:
        score += weights.get("title", 0)
    if parsed.media_type and parsed.media_type.value != "unknown":
        score += weights.get("media_type", 0)
    if parsed.year is not None:
        score += weights.get("year", 0)
    if _is_tv_shaped(parsed):
        if parsed.season is not None:
            score += weights.get("season", 0)
        if parsed.episode is not None:
            score += weights.get("episode", 0)
    if parsed.quality:
        score += weights.get("resolution", 0)
    if parsed.source:
        score += weights.get("source", 0)
    if parsed.codec:
        score += weights.get("codec", 0)
    if parsed.group and parsed.group != "UNKNOWN":
        score += weights.get("group", 0)
    unknown_count = sum(1 for t in annotated if t.role is TokenRole.UNKNOWN)
    raw_penalty = unknown_count * penalties.get("unknown_token", 0)
    capped_penalty = min(raw_penalty, penalties.get("max_unknown_penalty", 0))
    score -= capped_penalty
    return max(0, min(100, score))
 def collect_unknown_tokens(annotated: list[Token]) -> tuple[str, ...]:
    """Return the text of every token still tagged UNKNOWN."""
    return tuple(t.text for t in annotated if t.role is TokenRole.UNKNOWN)
 def collect_missing_critical(parsed: ParsedRelease) -> tuple[str, ...]:
    """Return the names of critical structural fields that are absent."""
    missing: list[str] = []
    if not parsed.title:
        missing.append("title")
    if not parsed.media_type or parsed.media_type.value == "unknown":
        missing.append("media_type")
    if parsed.year is None:
        missing.append("year")
    return tuple(missing)
 def decide_road(
    score: int,
    has_schema: bool,
    kb: ReleaseKnowledge,
 ) -> Road:
    """Pick the road the parse took.
    EASY is decided structurally: if a known group schema matched, the
    annotation walked the schema, and that's enough — the score does not
    veto EASY. Otherwise the score decides between SHITTY and
    PATH_OF_PAIN using ``kb.scoring["thresholds"]["shitty_min"]``.
    """
    if has_schema:
        return Road.EASY
    threshold = kb.scoring["thresholds"].get("shitty_min", 60)
    if score >= threshold:
        return Road.SHITTY
    return Road.PATH_OF_PAIN
@@ -40,6 +40,18 @@ class ReleaseKnowledge(Protocol):
    separators: list[str]
    # --- Parse scoring (Phase A) ---
    #
    # ``scoring`` is a dict with three keys:
    #   - ``weights``:     dict[field_name, int]   field weight contribution
    #   - ``penalties``:   {"unknown_token": int, "max_unknown_penalty": int}
    #   - ``thresholds``:  {"shitty_min": int}     SHITTY vs PATH_OF_PAIN cutoff
    #
    # Concrete values come from ``alfred/knowledge/release/scoring.yaml``.
    # The loader fills in safe defaults so this dict is always populated.
    scoring: dict
    # --- File-extension sets (used by application/infra modules that work
    #     directly with filesystem paths, e.g. media-type detection, video
    #     lookup). Domain parsing itself doesn't touch these. ---
@@ -72,6 +72,40 @@ def _strip_episode_from_normalized(normalized: str) -> str:
    return ".".join(result)
@dataclass(frozen=True)
 class ParseReport:
    """Diagnostic report attached to a :class:`ParsedRelease`.
    ``parse_release`` returns ``(ParsedRelease, ParseReport)``. The
    report describes *how confident* the parser is in the result and
    *which road* produced it. It is intentionally separate from
    ``ParsedRelease`` so the structural VO stays free of meta-concerns
    about its own quality.
    Fields:
    - ``confidence``: integer 0–100 (see :func:`parser.scoring.compute_score`).
    - ``road``: ``"easy"`` / ``"shitty"`` / ``"path_of_pain"`` — distinct
      from ``ParsedRelease.parse_path`` (which describes the
      tokenization route, not the confidence tier).
    - ``unknown_tokens``: tokens that finished annotation with role
      UNKNOWN, in order of appearance.
    - ``missing_critical``: names of critical structural fields the
      parser couldn't fill (subset of ``{"title", "media_type", "year"}``).
    """
    confidence: int
    road: str  # one of parser.scoring.Road values
    unknown_tokens: tuple[str, ...] = ()
    missing_critical: tuple[str, ...] = ()
    def __post_init__(self) -> None:
        if not (0 <= self.confidence <= 100):
            raise ValidationError(
                f"ParseReport.confidence out of range: {self.confidence}"
            )
@dataclass
 class ParsedRelease:
    """Structured representation of a parsed release name.
@@ -160,6 +160,37 @@ def load_group_schemas() -> dict:
    return result
 def load_scoring() -> dict:
    """Load the parse-scoring config.
    Returns a dict with three top-level keys: ``weights``, ``penalties``,
    ``thresholds``. Defaults are baked in so a missing or partial YAML
    never breaks the parser — only de-tunes it.
    """
    raw = _load("scoring.yaml")
    weights = {
        "title": 30,
        "media_type": 20,
        "year": 15,
        "season": 10,
        "episode": 5,
        "resolution": 5,
        "source": 5,
        "codec": 5,
        "group": 5,
    }
    weights.update(raw.get("weights", {}) or {})
    penalties = {"unknown_token": 5, "max_unknown_penalty": 30}
    penalties.update(raw.get("penalties", {}) or {})
    thresholds = {"shitty_min": 60}
    thresholds.update(raw.get("thresholds", {}) or {})
    return {
        "weights": weights,
        "penalties": penalties,
        "thresholds": thresholds,
    }
 def load_separators() -> list[str]:
    """Single-char token separators used by the release name tokenizer.
@@ -30,6 +30,7 @@ from .release import (
    load_metadata_extensions,
    load_non_video_extensions,
    load_resolutions,
    load_scoring,
    load_separators,
    load_sources,
    load_sources_extra,
@@ -85,6 +86,9 @@ class YamlReleaseKnowledge:
        self.separators: list[str] = load_separators()
        # Parse-scoring config (weights / penalties / thresholds).
        self.scoring: dict = load_scoring()
        # File-extension sets (used by application/infra modules, not by
        # the parser itself — kept here so there is a single ownership
        # point for release knowledge).
@@ -0,0 +1,42 @@
 # Release parse scoring.
 #
 # `parse_release` returns a `ParseReport` alongside the `ParsedRelease`.
 # The report carries a 0-100 confidence score computed from the annotated
 # tokens, plus the road decision (EASY / SHITTY / PATH_OF_PAIN).
 #
 # Why YAML: the weights and the SHITTY/PoP cutoff are tuning knobs we
 # expect to iterate on as fixtures grow. Keeping them in code would
 # mean a commit per tweak; here the user can adjust without touching
 # Python.
 #
 # Weights are awarded when the corresponding ParsedRelease field is
 # populated (non-None, non-"UNKNOWN" for group). Season and episode
 # only contribute when the parse looks like TV (season is not None).
 weights:
  title:       30   # structural pivot — without it nothing else matters
  media_type:  20   # movie / tv_show / tv_complete / …
  year:        15
  season:      10   # only counted for TV-shaped releases
  episode:     5
  resolution:  5
  source:      5
  codec:       5
  group:       5    # "UNKNOWN" yields 0
 # Penalty applied per UNKNOWN token left in the annotated stream.
 # Capped at `max_unknown_penalty` to keep a long-tail of garbage from
 # pushing every release into PoP.
 penalties:
  unknown_token:        5
  max_unknown_penalty:  30
 # Decision thresholds.
 #
 # EASY is decided structurally (a known group schema matched) — it does
 # not look at the score. SHITTY vs PATH_OF_PAIN is decided here:
 #
 #   score >= shitty_min  → SHITTY (best-effort parse usable)
 #   score <  shitty_min  → PATH_OF_PAIN (needs user / LLM help)
 thresholds:
  shitty_min: 60