feat(release): foundations for parse-confidence scoring

Add the building blocks for Phase A scoring without yet wiring them
into parse_release. Nothing changes at runtime — parse_release still
returns a single ParsedRelease — but the pieces needed to upgrade it
in a follow-up commit are now in place.

- alfred/knowledge/release/scoring.yaml: weights / penalties /
  thresholds. Title and media_type are heavy (30 / 20), structural
  fields medium (year 15, season 10), tech fields light (5 each).
  Unknown-token penalty 5 capped at -30. SHITTY/PoP cutoff at 60.
- load_scoring() loader with safe defaults baked in: a missing or
  partial YAML only de-tunes, never breaks.
- ReleaseKnowledge port grows a 'scoring: dict' field. YamlReleaseKnowledge
  populates it from load_scoring().
- New parser/scoring.py module with Road enum (EASY / SHITTY /
  PATH_OF_PAIN, distinct from ParsePath which records the tokenization
  route), and pure functions: compute_score, decide_road,
  collect_unknown_tokens, collect_missing_critical.
- ParseReport frozen VO in value_objects.py — exported alongside
  ParsedRelease.
This commit is contained in:
2026-05-20 01:21:17 +02:00
parent fcd80763e2
commit 98c688f29b
7 changed files with 264 additions and 2 deletions
+2 -2
View File
@@ -1,6 +1,6 @@
"""Release domain — release name parsing and naming conventions.""" """Release domain — release name parsing and naming conventions."""
from .services import parse_release from .services import parse_release
from .value_objects import ParsedRelease from .value_objects import ParsedRelease, ParseReport
__all__ = ["ParsedRelease", "parse_release"] __all__ = ["ParsedRelease", "ParseReport", "parse_release"]
+139
View File
@@ -0,0 +1,139 @@
"""Parse-confidence scoring.
``parse_release`` returns a :class:`ParseReport` alongside its
:class:`ParsedRelease`. The report carries:
- ``confidence``: integer 0100 derived from which structural and
technical fields got populated, minus a penalty per UNKNOWN token
left in the annotated stream.
- ``road``: which of the three roads the parse took
(:class:`Road.EASY` / :class:`Road.SHITTY` / :class:`Road.PATH_OF_PAIN`).
- ``unknown_tokens``: textual residue, useful for diagnostics.
- ``missing_critical``: structural fields the score-tally found absent
(e.g. ``("year", "media_type")``) — the caller can use this to drive
PoP recovery (questions, LLM call).
All weights, penalties and thresholds come from the injected knowledge
base (``kb.scoring``), itself loaded from
``alfred/knowledge/release/scoring.yaml``. No magic numbers here.
The scoring functions are pure — they consume the annotated token list
and the resulting :class:`ParsedRelease` and return the report. They are
called by ``services.parse_release`` after ``assemble`` has run.
"""
from __future__ import annotations
from enum import Enum
from ..ports.knowledge import ReleaseKnowledge
from ..value_objects import ParsedRelease
from .tokens import Token, TokenRole
class Road(str, Enum):
"""How the parser handled a given release name.
Distinct from :class:`~alfred.domain.release.value_objects.ParsePath`,
which records the tokenization route (DIRECT / SANITIZED / AI). Road
is about confidence in the *result*, not the *method*.
"""
EASY = "easy" # group schema matched — structural annotation
SHITTY = "shitty" # no schema, dict-driven annotation, score ≥ threshold
PATH_OF_PAIN = "path_of_pain" # score below threshold, needs help
# Critical structural fields — their absence drives the
# ``missing_critical`` list in the report.
_CRITICAL_FIELDS: tuple[str, ...] = ("title", "media_type", "year")
def _is_tv_shaped(parsed: ParsedRelease) -> bool:
"""Season/episode weights only count for releases that *look* like TV."""
return parsed.season is not None
def compute_score(
parsed: ParsedRelease,
annotated: list[Token],
kb: ReleaseKnowledge,
) -> int:
"""Compute a 0100 confidence score for the parse.
Each populated field contributes its weight from
``kb.scoring["weights"]``. Season/episode only count when the parse
looks like TV. ``group == "UNKNOWN"`` is treated as absent.
Then a penalty is subtracted per residual UNKNOWN token in
``annotated``, capped at ``penalties["max_unknown_penalty"]``.
Result is clamped to ``[0, 100]``.
"""
weights = kb.scoring["weights"]
penalties = kb.scoring["penalties"]
score = 0
if parsed.title:
score += weights.get("title", 0)
if parsed.media_type and parsed.media_type.value != "unknown":
score += weights.get("media_type", 0)
if parsed.year is not None:
score += weights.get("year", 0)
if _is_tv_shaped(parsed):
if parsed.season is not None:
score += weights.get("season", 0)
if parsed.episode is not None:
score += weights.get("episode", 0)
if parsed.quality:
score += weights.get("resolution", 0)
if parsed.source:
score += weights.get("source", 0)
if parsed.codec:
score += weights.get("codec", 0)
if parsed.group and parsed.group != "UNKNOWN":
score += weights.get("group", 0)
unknown_count = sum(1 for t in annotated if t.role is TokenRole.UNKNOWN)
raw_penalty = unknown_count * penalties.get("unknown_token", 0)
capped_penalty = min(raw_penalty, penalties.get("max_unknown_penalty", 0))
score -= capped_penalty
return max(0, min(100, score))
def collect_unknown_tokens(annotated: list[Token]) -> tuple[str, ...]:
"""Return the text of every token still tagged UNKNOWN."""
return tuple(t.text for t in annotated if t.role is TokenRole.UNKNOWN)
def collect_missing_critical(parsed: ParsedRelease) -> tuple[str, ...]:
"""Return the names of critical structural fields that are absent."""
missing: list[str] = []
if not parsed.title:
missing.append("title")
if not parsed.media_type or parsed.media_type.value == "unknown":
missing.append("media_type")
if parsed.year is None:
missing.append("year")
return tuple(missing)
def decide_road(
score: int,
has_schema: bool,
kb: ReleaseKnowledge,
) -> Road:
"""Pick the road the parse took.
EASY is decided structurally: if a known group schema matched, the
annotation walked the schema, and that's enough — the score does not
veto EASY. Otherwise the score decides between SHITTY and
PATH_OF_PAIN using ``kb.scoring["thresholds"]["shitty_min"]``.
"""
if has_schema:
return Road.EASY
threshold = kb.scoring["thresholds"].get("shitty_min", 60)
if score >= threshold:
return Road.SHITTY
return Road.PATH_OF_PAIN
+12
View File
@@ -40,6 +40,18 @@ class ReleaseKnowledge(Protocol):
separators: list[str] separators: list[str]
# --- Parse scoring (Phase A) ---
#
# ``scoring`` is a dict with three keys:
# - ``weights``: dict[field_name, int] field weight contribution
# - ``penalties``: {"unknown_token": int, "max_unknown_penalty": int}
# - ``thresholds``: {"shitty_min": int} SHITTY vs PATH_OF_PAIN cutoff
#
# Concrete values come from ``alfred/knowledge/release/scoring.yaml``.
# The loader fills in safe defaults so this dict is always populated.
scoring: dict
# --- File-extension sets (used by application/infra modules that work # --- File-extension sets (used by application/infra modules that work
# directly with filesystem paths, e.g. media-type detection, video # directly with filesystem paths, e.g. media-type detection, video
# lookup). Domain parsing itself doesn't touch these. --- # lookup). Domain parsing itself doesn't touch these. ---
+34
View File
@@ -72,6 +72,40 @@ def _strip_episode_from_normalized(normalized: str) -> str:
return ".".join(result) return ".".join(result)
@dataclass(frozen=True)
class ParseReport:
"""Diagnostic report attached to a :class:`ParsedRelease`.
``parse_release`` returns ``(ParsedRelease, ParseReport)``. The
report describes *how confident* the parser is in the result and
*which road* produced it. It is intentionally separate from
``ParsedRelease`` so the structural VO stays free of meta-concerns
about its own quality.
Fields:
- ``confidence``: integer 0100 (see :func:`parser.scoring.compute_score`).
- ``road``: ``"easy"`` / ``"shitty"`` / ``"path_of_pain"`` — distinct
from ``ParsedRelease.parse_path`` (which describes the
tokenization route, not the confidence tier).
- ``unknown_tokens``: tokens that finished annotation with role
UNKNOWN, in order of appearance.
- ``missing_critical``: names of critical structural fields the
parser couldn't fill (subset of ``{"title", "media_type", "year"}``).
"""
confidence: int
road: str # one of parser.scoring.Road values
unknown_tokens: tuple[str, ...] = ()
missing_critical: tuple[str, ...] = ()
def __post_init__(self) -> None:
if not (0 <= self.confidence <= 100):
raise ValidationError(
f"ParseReport.confidence out of range: {self.confidence}"
)
@dataclass @dataclass
class ParsedRelease: class ParsedRelease:
"""Structured representation of a parsed release name. """Structured representation of a parsed release name.
@@ -160,6 +160,37 @@ def load_group_schemas() -> dict:
return result return result
def load_scoring() -> dict:
"""Load the parse-scoring config.
Returns a dict with three top-level keys: ``weights``, ``penalties``,
``thresholds``. Defaults are baked in so a missing or partial YAML
never breaks the parser — only de-tunes it.
"""
raw = _load("scoring.yaml")
weights = {
"title": 30,
"media_type": 20,
"year": 15,
"season": 10,
"episode": 5,
"resolution": 5,
"source": 5,
"codec": 5,
"group": 5,
}
weights.update(raw.get("weights", {}) or {})
penalties = {"unknown_token": 5, "max_unknown_penalty": 30}
penalties.update(raw.get("penalties", {}) or {})
thresholds = {"shitty_min": 60}
thresholds.update(raw.get("thresholds", {}) or {})
return {
"weights": weights,
"penalties": penalties,
"thresholds": thresholds,
}
def load_separators() -> list[str]: def load_separators() -> list[str]:
"""Single-char token separators used by the release name tokenizer. """Single-char token separators used by the release name tokenizer.
@@ -30,6 +30,7 @@ from .release import (
load_metadata_extensions, load_metadata_extensions,
load_non_video_extensions, load_non_video_extensions,
load_resolutions, load_resolutions,
load_scoring,
load_separators, load_separators,
load_sources, load_sources,
load_sources_extra, load_sources_extra,
@@ -85,6 +86,9 @@ class YamlReleaseKnowledge:
self.separators: list[str] = load_separators() self.separators: list[str] = load_separators()
# Parse-scoring config (weights / penalties / thresholds).
self.scoring: dict = load_scoring()
# File-extension sets (used by application/infra modules, not by # File-extension sets (used by application/infra modules, not by
# the parser itself — kept here so there is a single ownership # the parser itself — kept here so there is a single ownership
# point for release knowledge). # point for release knowledge).
+42
View File
@@ -0,0 +1,42 @@
# Release parse scoring.
#
# `parse_release` returns a `ParseReport` alongside the `ParsedRelease`.
# The report carries a 0-100 confidence score computed from the annotated
# tokens, plus the road decision (EASY / SHITTY / PATH_OF_PAIN).
#
# Why YAML: the weights and the SHITTY/PoP cutoff are tuning knobs we
# expect to iterate on as fixtures grow. Keeping them in code would
# mean a commit per tweak; here the user can adjust without touching
# Python.
#
# Weights are awarded when the corresponding ParsedRelease field is
# populated (non-None, non-"UNKNOWN" for group). Season and episode
# only contribute when the parse looks like TV (season is not None).
weights:
title: 30 # structural pivot — without it nothing else matters
media_type: 20 # movie / tv_show / tv_complete / …
year: 15
season: 10 # only counted for TV-shaped releases
episode: 5
resolution: 5
source: 5
codec: 5
group: 5 # "UNKNOWN" yields 0
# Penalty applied per UNKNOWN token left in the annotated stream.
# Capped at `max_unknown_penalty` to keep a long-tail of garbage from
# pushing every release into PoP.
penalties:
unknown_token: 5
max_unknown_penalty: 30
# Decision thresholds.
#
# EASY is decided structurally (a known group schema matched) — it does
# not look at the score. SHITTY vs PATH_OF_PAIN is decided here:
#
# score >= shitty_min → SHITTY (best-effort parse usable)
# score < shitty_min → PATH_OF_PAIN (needs user / LLM help)
thresholds:
shitty_min: 60