From 98c688f29b9324ebbf3a0d55e639c1018eaed8bc Mon Sep 17 00:00:00 2001 From: Francwa Date: Wed, 20 May 2026 01:21:17 +0200 Subject: [PATCH] feat(release): foundations for parse-confidence scoring MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add the building blocks for Phase A scoring without yet wiring them into parse_release. Nothing changes at runtime — parse_release still returns a single ParsedRelease — but the pieces needed to upgrade it in a follow-up commit are now in place. - alfred/knowledge/release/scoring.yaml: weights / penalties / thresholds. Title and media_type are heavy (30 / 20), structural fields medium (year 15, season 10), tech fields light (5 each). Unknown-token penalty 5 capped at -30. SHITTY/PoP cutoff at 60. - load_scoring() loader with safe defaults baked in: a missing or partial YAML only de-tunes, never breaks. - ReleaseKnowledge port grows a 'scoring: dict' field. YamlReleaseKnowledge populates it from load_scoring(). - New parser/scoring.py module with Road enum (EASY / SHITTY / PATH_OF_PAIN, distinct from ParsePath which records the tokenization route), and pure functions: compute_score, decide_road, collect_unknown_tokens, collect_missing_critical. - ParseReport frozen VO in value_objects.py — exported alongside ParsedRelease. --- alfred/domain/release/__init__.py | 4 +- alfred/domain/release/parser/scoring.py | 139 ++++++++++++++++++ alfred/domain/release/ports/knowledge.py | 12 ++ alfred/domain/release/value_objects.py | 34 +++++ alfred/infrastructure/knowledge/release.py | 31 ++++ alfred/infrastructure/knowledge/release_kb.py | 4 + alfred/knowledge/release/scoring.yaml | 42 ++++++ 7 files changed, 264 insertions(+), 2 deletions(-) create mode 100644 alfred/domain/release/parser/scoring.py create mode 100644 alfred/knowledge/release/scoring.yaml diff --git a/alfred/domain/release/__init__.py b/alfred/domain/release/__init__.py index 2e96275..3c2b4c7 100644 --- a/alfred/domain/release/__init__.py +++ b/alfred/domain/release/__init__.py @@ -1,6 +1,6 @@ """Release domain — release name parsing and naming conventions.""" from .services import parse_release -from .value_objects import ParsedRelease +from .value_objects import ParsedRelease, ParseReport -__all__ = ["ParsedRelease", "parse_release"] +__all__ = ["ParsedRelease", "ParseReport", "parse_release"] diff --git a/alfred/domain/release/parser/scoring.py b/alfred/domain/release/parser/scoring.py new file mode 100644 index 0000000..4e27fc3 --- /dev/null +++ b/alfred/domain/release/parser/scoring.py @@ -0,0 +1,139 @@ +"""Parse-confidence scoring. + +``parse_release`` returns a :class:`ParseReport` alongside its +:class:`ParsedRelease`. The report carries: + +- ``confidence``: integer 0–100 derived from which structural and + technical fields got populated, minus a penalty per UNKNOWN token + left in the annotated stream. +- ``road``: which of the three roads the parse took + (:class:`Road.EASY` / :class:`Road.SHITTY` / :class:`Road.PATH_OF_PAIN`). +- ``unknown_tokens``: textual residue, useful for diagnostics. +- ``missing_critical``: structural fields the score-tally found absent + (e.g. ``("year", "media_type")``) — the caller can use this to drive + PoP recovery (questions, LLM call). + +All weights, penalties and thresholds come from the injected knowledge +base (``kb.scoring``), itself loaded from +``alfred/knowledge/release/scoring.yaml``. No magic numbers here. + +The scoring functions are pure — they consume the annotated token list +and the resulting :class:`ParsedRelease` and return the report. They are +called by ``services.parse_release`` after ``assemble`` has run. +""" + +from __future__ import annotations + +from enum import Enum + +from ..ports.knowledge import ReleaseKnowledge +from ..value_objects import ParsedRelease +from .tokens import Token, TokenRole + + +class Road(str, Enum): + """How the parser handled a given release name. + + Distinct from :class:`~alfred.domain.release.value_objects.ParsePath`, + which records the tokenization route (DIRECT / SANITIZED / AI). Road + is about confidence in the *result*, not the *method*. + """ + + EASY = "easy" # group schema matched — structural annotation + SHITTY = "shitty" # no schema, dict-driven annotation, score ≥ threshold + PATH_OF_PAIN = "path_of_pain" # score below threshold, needs help + + +# Critical structural fields — their absence drives the +# ``missing_critical`` list in the report. +_CRITICAL_FIELDS: tuple[str, ...] = ("title", "media_type", "year") + + +def _is_tv_shaped(parsed: ParsedRelease) -> bool: + """Season/episode weights only count for releases that *look* like TV.""" + return parsed.season is not None + + +def compute_score( + parsed: ParsedRelease, + annotated: list[Token], + kb: ReleaseKnowledge, +) -> int: + """Compute a 0–100 confidence score for the parse. + + Each populated field contributes its weight from + ``kb.scoring["weights"]``. Season/episode only count when the parse + looks like TV. ``group == "UNKNOWN"`` is treated as absent. + + Then a penalty is subtracted per residual UNKNOWN token in + ``annotated``, capped at ``penalties["max_unknown_penalty"]``. + + Result is clamped to ``[0, 100]``. + """ + weights = kb.scoring["weights"] + penalties = kb.scoring["penalties"] + + score = 0 + if parsed.title: + score += weights.get("title", 0) + if parsed.media_type and parsed.media_type.value != "unknown": + score += weights.get("media_type", 0) + if parsed.year is not None: + score += weights.get("year", 0) + if _is_tv_shaped(parsed): + if parsed.season is not None: + score += weights.get("season", 0) + if parsed.episode is not None: + score += weights.get("episode", 0) + if parsed.quality: + score += weights.get("resolution", 0) + if parsed.source: + score += weights.get("source", 0) + if parsed.codec: + score += weights.get("codec", 0) + if parsed.group and parsed.group != "UNKNOWN": + score += weights.get("group", 0) + + unknown_count = sum(1 for t in annotated if t.role is TokenRole.UNKNOWN) + raw_penalty = unknown_count * penalties.get("unknown_token", 0) + capped_penalty = min(raw_penalty, penalties.get("max_unknown_penalty", 0)) + score -= capped_penalty + + return max(0, min(100, score)) + + +def collect_unknown_tokens(annotated: list[Token]) -> tuple[str, ...]: + """Return the text of every token still tagged UNKNOWN.""" + return tuple(t.text for t in annotated if t.role is TokenRole.UNKNOWN) + + +def collect_missing_critical(parsed: ParsedRelease) -> tuple[str, ...]: + """Return the names of critical structural fields that are absent.""" + missing: list[str] = [] + if not parsed.title: + missing.append("title") + if not parsed.media_type or parsed.media_type.value == "unknown": + missing.append("media_type") + if parsed.year is None: + missing.append("year") + return tuple(missing) + + +def decide_road( + score: int, + has_schema: bool, + kb: ReleaseKnowledge, +) -> Road: + """Pick the road the parse took. + + EASY is decided structurally: if a known group schema matched, the + annotation walked the schema, and that's enough — the score does not + veto EASY. Otherwise the score decides between SHITTY and + PATH_OF_PAIN using ``kb.scoring["thresholds"]["shitty_min"]``. + """ + if has_schema: + return Road.EASY + threshold = kb.scoring["thresholds"].get("shitty_min", 60) + if score >= threshold: + return Road.SHITTY + return Road.PATH_OF_PAIN diff --git a/alfred/domain/release/ports/knowledge.py b/alfred/domain/release/ports/knowledge.py index ff6982e..183c3a0 100644 --- a/alfred/domain/release/ports/knowledge.py +++ b/alfred/domain/release/ports/knowledge.py @@ -40,6 +40,18 @@ class ReleaseKnowledge(Protocol): separators: list[str] + # --- Parse scoring (Phase A) --- + # + # ``scoring`` is a dict with three keys: + # - ``weights``: dict[field_name, int] field weight contribution + # - ``penalties``: {"unknown_token": int, "max_unknown_penalty": int} + # - ``thresholds``: {"shitty_min": int} SHITTY vs PATH_OF_PAIN cutoff + # + # Concrete values come from ``alfred/knowledge/release/scoring.yaml``. + # The loader fills in safe defaults so this dict is always populated. + + scoring: dict + # --- File-extension sets (used by application/infra modules that work # directly with filesystem paths, e.g. media-type detection, video # lookup). Domain parsing itself doesn't touch these. --- diff --git a/alfred/domain/release/value_objects.py b/alfred/domain/release/value_objects.py index b3fa431..fde9879 100644 --- a/alfred/domain/release/value_objects.py +++ b/alfred/domain/release/value_objects.py @@ -72,6 +72,40 @@ def _strip_episode_from_normalized(normalized: str) -> str: return ".".join(result) +@dataclass(frozen=True) +class ParseReport: + """Diagnostic report attached to a :class:`ParsedRelease`. + + ``parse_release`` returns ``(ParsedRelease, ParseReport)``. The + report describes *how confident* the parser is in the result and + *which road* produced it. It is intentionally separate from + ``ParsedRelease`` so the structural VO stays free of meta-concerns + about its own quality. + + Fields: + + - ``confidence``: integer 0–100 (see :func:`parser.scoring.compute_score`). + - ``road``: ``"easy"`` / ``"shitty"`` / ``"path_of_pain"`` — distinct + from ``ParsedRelease.parse_path`` (which describes the + tokenization route, not the confidence tier). + - ``unknown_tokens``: tokens that finished annotation with role + UNKNOWN, in order of appearance. + - ``missing_critical``: names of critical structural fields the + parser couldn't fill (subset of ``{"title", "media_type", "year"}``). + """ + + confidence: int + road: str # one of parser.scoring.Road values + unknown_tokens: tuple[str, ...] = () + missing_critical: tuple[str, ...] = () + + def __post_init__(self) -> None: + if not (0 <= self.confidence <= 100): + raise ValidationError( + f"ParseReport.confidence out of range: {self.confidence}" + ) + + @dataclass class ParsedRelease: """Structured representation of a parsed release name. diff --git a/alfred/infrastructure/knowledge/release.py b/alfred/infrastructure/knowledge/release.py index 60623e4..05eb08b 100644 --- a/alfred/infrastructure/knowledge/release.py +++ b/alfred/infrastructure/knowledge/release.py @@ -160,6 +160,37 @@ def load_group_schemas() -> dict: return result +def load_scoring() -> dict: + """Load the parse-scoring config. + + Returns a dict with three top-level keys: ``weights``, ``penalties``, + ``thresholds``. Defaults are baked in so a missing or partial YAML + never breaks the parser — only de-tunes it. + """ + raw = _load("scoring.yaml") + weights = { + "title": 30, + "media_type": 20, + "year": 15, + "season": 10, + "episode": 5, + "resolution": 5, + "source": 5, + "codec": 5, + "group": 5, + } + weights.update(raw.get("weights", {}) or {}) + penalties = {"unknown_token": 5, "max_unknown_penalty": 30} + penalties.update(raw.get("penalties", {}) or {}) + thresholds = {"shitty_min": 60} + thresholds.update(raw.get("thresholds", {}) or {}) + return { + "weights": weights, + "penalties": penalties, + "thresholds": thresholds, + } + + def load_separators() -> list[str]: """Single-char token separators used by the release name tokenizer. diff --git a/alfred/infrastructure/knowledge/release_kb.py b/alfred/infrastructure/knowledge/release_kb.py index c84df71..5ecb6ba 100644 --- a/alfred/infrastructure/knowledge/release_kb.py +++ b/alfred/infrastructure/knowledge/release_kb.py @@ -30,6 +30,7 @@ from .release import ( load_metadata_extensions, load_non_video_extensions, load_resolutions, + load_scoring, load_separators, load_sources, load_sources_extra, @@ -85,6 +86,9 @@ class YamlReleaseKnowledge: self.separators: list[str] = load_separators() + # Parse-scoring config (weights / penalties / thresholds). + self.scoring: dict = load_scoring() + # File-extension sets (used by application/infra modules, not by # the parser itself — kept here so there is a single ownership # point for release knowledge). diff --git a/alfred/knowledge/release/scoring.yaml b/alfred/knowledge/release/scoring.yaml new file mode 100644 index 0000000..8d64f33 --- /dev/null +++ b/alfred/knowledge/release/scoring.yaml @@ -0,0 +1,42 @@ +# Release parse scoring. +# +# `parse_release` returns a `ParseReport` alongside the `ParsedRelease`. +# The report carries a 0-100 confidence score computed from the annotated +# tokens, plus the road decision (EASY / SHITTY / PATH_OF_PAIN). +# +# Why YAML: the weights and the SHITTY/PoP cutoff are tuning knobs we +# expect to iterate on as fixtures grow. Keeping them in code would +# mean a commit per tweak; here the user can adjust without touching +# Python. +# +# Weights are awarded when the corresponding ParsedRelease field is +# populated (non-None, non-"UNKNOWN" for group). Season and episode +# only contribute when the parse looks like TV (season is not None). + +weights: + title: 30 # structural pivot — without it nothing else matters + media_type: 20 # movie / tv_show / tv_complete / … + year: 15 + season: 10 # only counted for TV-shaped releases + episode: 5 + resolution: 5 + source: 5 + codec: 5 + group: 5 # "UNKNOWN" yields 0 + +# Penalty applied per UNKNOWN token left in the annotated stream. +# Capped at `max_unknown_penalty` to keep a long-tail of garbage from +# pushing every release into PoP. +penalties: + unknown_token: 5 + max_unknown_penalty: 30 + +# Decision thresholds. +# +# EASY is decided structurally (a known group schema matched) — it does +# not look at the score. SHITTY vs PATH_OF_PAIN is decided here: +# +# score >= shitty_min → SHITTY (best-effort parse usable) +# score < shitty_min → PATH_OF_PAIN (needs user / LLM help) +thresholds: + shitty_min: 60