From 98c688f29b9324ebbf3a0d55e639c1018eaed8bc Mon Sep 17 00:00:00 2001
From: Francwa <francois.hodiaumont@gmail.com>
Date: Wed, 20 May 2026 01:21:17 +0200
Subject: [PATCH] feat(release): foundations for parse-confidence scoring
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add the building blocks for Phase A scoring without yet wiring them
into parse_release. Nothing changes at runtime — parse_release still
returns a single ParsedRelease — but the pieces needed to upgrade it
in a follow-up commit are now in place.

- alfred/knowledge/release/scoring.yaml: weights / penalties /
  thresholds. Title and media_type are heavy (30 / 20), structural
  fields medium (year 15, season 10), tech fields light (5 each).
  Unknown-token penalty 5 capped at -30. SHITTY/PoP cutoff at 60.
- load_scoring() loader with safe defaults baked in: a missing or
  partial YAML only de-tunes, never breaks.
- ReleaseKnowledge port grows a 'scoring: dict' field. YamlReleaseKnowledge
  populates it from load_scoring().
- New parser/scoring.py module with Road enum (EASY / SHITTY /
  PATH_OF_PAIN, distinct from ParsePath which records the tokenization
  route), and pure functions: compute_score, decide_road,
  collect_unknown_tokens, collect_missing_critical.
- ParseReport frozen VO in value_objects.py — exported alongside
  ParsedRelease.
---
 alfred/domain/release/__init__.py             |   4 +-
 alfred/domain/release/parser/scoring.py       | 139 ++++++++++++++++++
 alfred/domain/release/ports/knowledge.py      |  12 ++
 alfred/domain/release/value_objects.py        |  34 +++++
 alfred/infrastructure/knowledge/release.py    |  31 ++++
 alfred/infrastructure/knowledge/release_kb.py |   4 +
 alfred/knowledge/release/scoring.yaml         |  42 ++++++
 7 files changed, 264 insertions(+), 2 deletions(-)
 create mode 100644 alfred/domain/release/parser/scoring.py
 create mode 100644 alfred/knowledge/release/scoring.yaml

diff --git a/alfred/domain/release/__init__.py b/alfred/domain/release/__init__.py
index 2e96275..3c2b4c7 100644
--- a/alfred/domain/release/__init__.py
+++ b/alfred/domain/release/__init__.py
@@ -1,6 +1,6 @@
 """Release domain — release name parsing and naming conventions."""
 
 from .services import parse_release
-from .value_objects import ParsedRelease
+from .value_objects import ParsedRelease, ParseReport
 
-__all__ = ["ParsedRelease", "parse_release"]
+__all__ = ["ParsedRelease", "ParseReport", "parse_release"]
diff --git a/alfred/domain/release/parser/scoring.py b/alfred/domain/release/parser/scoring.py
new file mode 100644
index 0000000..4e27fc3
--- /dev/null
+++ b/alfred/domain/release/parser/scoring.py
@@ -0,0 +1,139 @@
+"""Parse-confidence scoring.
+
+``parse_release`` returns a :class:`ParseReport` alongside its
+:class:`ParsedRelease`. The report carries:
+
+- ``confidence``: integer 0–100 derived from which structural and
+  technical fields got populated, minus a penalty per UNKNOWN token
+  left in the annotated stream.
+- ``road``: which of the three roads the parse took
+  (:class:`Road.EASY` / :class:`Road.SHITTY` / :class:`Road.PATH_OF_PAIN`).
+- ``unknown_tokens``: textual residue, useful for diagnostics.
+- ``missing_critical``: structural fields the score-tally found absent
+  (e.g. ``("year", "media_type")``) — the caller can use this to drive
+  PoP recovery (questions, LLM call).
+
+All weights, penalties and thresholds come from the injected knowledge
+base (``kb.scoring``), itself loaded from
+``alfred/knowledge/release/scoring.yaml``. No magic numbers here.
+
+The scoring functions are pure — they consume the annotated token list
+and the resulting :class:`ParsedRelease` and return the report. They are
+called by ``services.parse_release`` after ``assemble`` has run.
+"""
+
+from __future__ import annotations
+
+from enum import Enum
+
+from ..ports.knowledge import ReleaseKnowledge
+from ..value_objects import ParsedRelease
+from .tokens import Token, TokenRole
+
+
+class Road(str, Enum):
+    """How the parser handled a given release name.
+
+    Distinct from :class:`~alfred.domain.release.value_objects.ParsePath`,
+    which records the tokenization route (DIRECT / SANITIZED / AI). Road
+    is about confidence in the *result*, not the *method*.
+    """
+
+    EASY = "easy"  # group schema matched — structural annotation
+    SHITTY = "shitty"  # no schema, dict-driven annotation, score ≥ threshold
+    PATH_OF_PAIN = "path_of_pain"  # score below threshold, needs help
+
+
+# Critical structural fields — their absence drives the
+# ``missing_critical`` list in the report.
+_CRITICAL_FIELDS: tuple[str, ...] = ("title", "media_type", "year")
+
+
+def _is_tv_shaped(parsed: ParsedRelease) -> bool:
+    """Season/episode weights only count for releases that *look* like TV."""
+    return parsed.season is not None
+
+
+def compute_score(
+    parsed: ParsedRelease,
+    annotated: list[Token],
+    kb: ReleaseKnowledge,
+) -> int:
+    """Compute a 0–100 confidence score for the parse.
+
+    Each populated field contributes its weight from
+    ``kb.scoring["weights"]``. Season/episode only count when the parse
+    looks like TV. ``group == "UNKNOWN"`` is treated as absent.
+
+    Then a penalty is subtracted per residual UNKNOWN token in
+    ``annotated``, capped at ``penalties["max_unknown_penalty"]``.
+
+    Result is clamped to ``[0, 100]``.
+    """
+    weights = kb.scoring["weights"]
+    penalties = kb.scoring["penalties"]
+
+    score = 0
+    if parsed.title:
+        score += weights.get("title", 0)
+    if parsed.media_type and parsed.media_type.value != "unknown":
+        score += weights.get("media_type", 0)
+    if parsed.year is not None:
+        score += weights.get("year", 0)
+    if _is_tv_shaped(parsed):
+        if parsed.season is not None:
+            score += weights.get("season", 0)
+        if parsed.episode is not None:
+            score += weights.get("episode", 0)
+    if parsed.quality:
+        score += weights.get("resolution", 0)
+    if parsed.source:
+        score += weights.get("source", 0)
+    if parsed.codec:
+        score += weights.get("codec", 0)
+    if parsed.group and parsed.group != "UNKNOWN":
+        score += weights.get("group", 0)
+
+    unknown_count = sum(1 for t in annotated if t.role is TokenRole.UNKNOWN)
+    raw_penalty = unknown_count * penalties.get("unknown_token", 0)
+    capped_penalty = min(raw_penalty, penalties.get("max_unknown_penalty", 0))
+    score -= capped_penalty
+
+    return max(0, min(100, score))
+
+
+def collect_unknown_tokens(annotated: list[Token]) -> tuple[str, ...]:
+    """Return the text of every token still tagged UNKNOWN."""
+    return tuple(t.text for t in annotated if t.role is TokenRole.UNKNOWN)
+
+
+def collect_missing_critical(parsed: ParsedRelease) -> tuple[str, ...]:
+    """Return the names of critical structural fields that are absent."""
+    missing: list[str] = []
+    if not parsed.title:
+        missing.append("title")
+    if not parsed.media_type or parsed.media_type.value == "unknown":
+        missing.append("media_type")
+    if parsed.year is None:
+        missing.append("year")
+    return tuple(missing)
+
+
+def decide_road(
+    score: int,
+    has_schema: bool,
+    kb: ReleaseKnowledge,
+) -> Road:
+    """Pick the road the parse took.
+
+    EASY is decided structurally: if a known group schema matched, the
+    annotation walked the schema, and that's enough — the score does not
+    veto EASY. Otherwise the score decides between SHITTY and
+    PATH_OF_PAIN using ``kb.scoring["thresholds"]["shitty_min"]``.
+    """
+    if has_schema:
+        return Road.EASY
+    threshold = kb.scoring["thresholds"].get("shitty_min", 60)
+    if score >= threshold:
+        return Road.SHITTY
+    return Road.PATH_OF_PAIN
diff --git a/alfred/domain/release/ports/knowledge.py b/alfred/domain/release/ports/knowledge.py
index ff6982e..183c3a0 100644
--- a/alfred/domain/release/ports/knowledge.py
+++ b/alfred/domain/release/ports/knowledge.py
@@ -40,6 +40,18 @@ class ReleaseKnowledge(Protocol):
 
     separators: list[str]
 
+    # --- Parse scoring (Phase A) ---
+    #
+    # ``scoring`` is a dict with three keys:
+    #   - ``weights``:     dict[field_name, int]   field weight contribution
+    #   - ``penalties``:   {"unknown_token": int, "max_unknown_penalty": int}
+    #   - ``thresholds``:  {"shitty_min": int}     SHITTY vs PATH_OF_PAIN cutoff
+    #
+    # Concrete values come from ``alfred/knowledge/release/scoring.yaml``.
+    # The loader fills in safe defaults so this dict is always populated.
+
+    scoring: dict
+
     # --- File-extension sets (used by application/infra modules that work
     #     directly with filesystem paths, e.g. media-type detection, video
     #     lookup). Domain parsing itself doesn't touch these. ---
diff --git a/alfred/domain/release/value_objects.py b/alfred/domain/release/value_objects.py
index b3fa431..fde9879 100644
--- a/alfred/domain/release/value_objects.py
+++ b/alfred/domain/release/value_objects.py
@@ -72,6 +72,40 @@ def _strip_episode_from_normalized(normalized: str) -> str:
     return ".".join(result)
 
 
+@dataclass(frozen=True)
+class ParseReport:
+    """Diagnostic report attached to a :class:`ParsedRelease`.
+
+    ``parse_release`` returns ``(ParsedRelease, ParseReport)``. The
+    report describes *how confident* the parser is in the result and
+    *which road* produced it. It is intentionally separate from
+    ``ParsedRelease`` so the structural VO stays free of meta-concerns
+    about its own quality.
+
+    Fields:
+
+    - ``confidence``: integer 0–100 (see :func:`parser.scoring.compute_score`).
+    - ``road``: ``"easy"`` / ``"shitty"`` / ``"path_of_pain"`` — distinct
+      from ``ParsedRelease.parse_path`` (which describes the
+      tokenization route, not the confidence tier).
+    - ``unknown_tokens``: tokens that finished annotation with role
+      UNKNOWN, in order of appearance.
+    - ``missing_critical``: names of critical structural fields the
+      parser couldn't fill (subset of ``{"title", "media_type", "year"}``).
+    """
+
+    confidence: int
+    road: str  # one of parser.scoring.Road values
+    unknown_tokens: tuple[str, ...] = ()
+    missing_critical: tuple[str, ...] = ()
+
+    def __post_init__(self) -> None:
+        if not (0 <= self.confidence <= 100):
+            raise ValidationError(
+                f"ParseReport.confidence out of range: {self.confidence}"
+            )
+
+
 @dataclass
 class ParsedRelease:
     """Structured representation of a parsed release name.
diff --git a/alfred/infrastructure/knowledge/release.py b/alfred/infrastructure/knowledge/release.py
index 60623e4..05eb08b 100644
--- a/alfred/infrastructure/knowledge/release.py
+++ b/alfred/infrastructure/knowledge/release.py
@@ -160,6 +160,37 @@ def load_group_schemas() -> dict:
     return result
 
 
+def load_scoring() -> dict:
+    """Load the parse-scoring config.
+
+    Returns a dict with three top-level keys: ``weights``, ``penalties``,
+    ``thresholds``. Defaults are baked in so a missing or partial YAML
+    never breaks the parser — only de-tunes it.
+    """
+    raw = _load("scoring.yaml")
+    weights = {
+        "title": 30,
+        "media_type": 20,
+        "year": 15,
+        "season": 10,
+        "episode": 5,
+        "resolution": 5,
+        "source": 5,
+        "codec": 5,
+        "group": 5,
+    }
+    weights.update(raw.get("weights", {}) or {})
+    penalties = {"unknown_token": 5, "max_unknown_penalty": 30}
+    penalties.update(raw.get("penalties", {}) or {})
+    thresholds = {"shitty_min": 60}
+    thresholds.update(raw.get("thresholds", {}) or {})
+    return {
+        "weights": weights,
+        "penalties": penalties,
+        "thresholds": thresholds,
+    }
+
+
 def load_separators() -> list[str]:
     """Single-char token separators used by the release name tokenizer.
 
diff --git a/alfred/infrastructure/knowledge/release_kb.py b/alfred/infrastructure/knowledge/release_kb.py
index c84df71..5ecb6ba 100644
--- a/alfred/infrastructure/knowledge/release_kb.py
+++ b/alfred/infrastructure/knowledge/release_kb.py
@@ -30,6 +30,7 @@ from .release import (
     load_metadata_extensions,
     load_non_video_extensions,
     load_resolutions,
+    load_scoring,
     load_separators,
     load_sources,
     load_sources_extra,
@@ -85,6 +86,9 @@ class YamlReleaseKnowledge:
 
         self.separators: list[str] = load_separators()
 
+        # Parse-scoring config (weights / penalties / thresholds).
+        self.scoring: dict = load_scoring()
+
         # File-extension sets (used by application/infra modules, not by
         # the parser itself — kept here so there is a single ownership
         # point for release knowledge).
diff --git a/alfred/knowledge/release/scoring.yaml b/alfred/knowledge/release/scoring.yaml
new file mode 100644
index 0000000..8d64f33
--- /dev/null
+++ b/alfred/knowledge/release/scoring.yaml
@@ -0,0 +1,42 @@
+# Release parse scoring.
+#
+# `parse_release` returns a `ParseReport` alongside the `ParsedRelease`.
+# The report carries a 0-100 confidence score computed from the annotated
+# tokens, plus the road decision (EASY / SHITTY / PATH_OF_PAIN).
+#
+# Why YAML: the weights and the SHITTY/PoP cutoff are tuning knobs we
+# expect to iterate on as fixtures grow. Keeping them in code would
+# mean a commit per tweak; here the user can adjust without touching
+# Python.
+#
+# Weights are awarded when the corresponding ParsedRelease field is
+# populated (non-None, non-"UNKNOWN" for group). Season and episode
+# only contribute when the parse looks like TV (season is not None).
+
+weights:
+  title:       30   # structural pivot — without it nothing else matters
+  media_type:  20   # movie / tv_show / tv_complete / …
+  year:        15
+  season:      10   # only counted for TV-shaped releases
+  episode:     5
+  resolution:  5
+  source:      5
+  codec:       5
+  group:       5    # "UNKNOWN" yields 0
+
+# Penalty applied per UNKNOWN token left in the annotated stream.
+# Capped at `max_unknown_penalty` to keep a long-tail of garbage from
+# pushing every release into PoP.
+penalties:
+  unknown_token:        5
+  max_unknown_penalty:  30
+
+# Decision thresholds.
+#
+# EASY is decided structurally (a known group schema matched) — it does
+# not look at the score. SHITTY vs PATH_OF_PAIN is decided here:
+#
+#   score >= shitty_min  → SHITTY (best-effort parse usable)
+#   score <  shitty_min  → PATH_OF_PAIN (needs user / LLM help)
+thresholds:
+  shitty_min: 60