feat(release): foundations for parse-confidence scoring
Add the building blocks for Phase A scoring without yet wiring them into parse_release. Nothing changes at runtime — parse_release still returns a single ParsedRelease — but the pieces needed to upgrade it in a follow-up commit are now in place. - alfred/knowledge/release/scoring.yaml: weights / penalties / thresholds. Title and media_type are heavy (30 / 20), structural fields medium (year 15, season 10), tech fields light (5 each). Unknown-token penalty 5 capped at -30. SHITTY/PoP cutoff at 60. - load_scoring() loader with safe defaults baked in: a missing or partial YAML only de-tunes, never breaks. - ReleaseKnowledge port grows a 'scoring: dict' field. YamlReleaseKnowledge populates it from load_scoring(). - New parser/scoring.py module with Road enum (EASY / SHITTY / PATH_OF_PAIN, distinct from ParsePath which records the tokenization route), and pure functions: compute_score, decide_road, collect_unknown_tokens, collect_missing_critical. - ParseReport frozen VO in value_objects.py — exported alongside ParsedRelease.
This commit is contained in:
@@ -1,6 +1,6 @@
|
||||
"""Release domain — release name parsing and naming conventions."""
|
||||
|
||||
from .services import parse_release
|
||||
from .value_objects import ParsedRelease
|
||||
from .value_objects import ParsedRelease, ParseReport
|
||||
|
||||
__all__ = ["ParsedRelease", "parse_release"]
|
||||
__all__ = ["ParsedRelease", "ParseReport", "parse_release"]
|
||||
|
||||
@@ -0,0 +1,139 @@
|
||||
"""Parse-confidence scoring.
|
||||
|
||||
``parse_release`` returns a :class:`ParseReport` alongside its
|
||||
:class:`ParsedRelease`. The report carries:
|
||||
|
||||
- ``confidence``: integer 0–100 derived from which structural and
|
||||
technical fields got populated, minus a penalty per UNKNOWN token
|
||||
left in the annotated stream.
|
||||
- ``road``: which of the three roads the parse took
|
||||
(:class:`Road.EASY` / :class:`Road.SHITTY` / :class:`Road.PATH_OF_PAIN`).
|
||||
- ``unknown_tokens``: textual residue, useful for diagnostics.
|
||||
- ``missing_critical``: structural fields the score-tally found absent
|
||||
(e.g. ``("year", "media_type")``) — the caller can use this to drive
|
||||
PoP recovery (questions, LLM call).
|
||||
|
||||
All weights, penalties and thresholds come from the injected knowledge
|
||||
base (``kb.scoring``), itself loaded from
|
||||
``alfred/knowledge/release/scoring.yaml``. No magic numbers here.
|
||||
|
||||
The scoring functions are pure — they consume the annotated token list
|
||||
and the resulting :class:`ParsedRelease` and return the report. They are
|
||||
called by ``services.parse_release`` after ``assemble`` has run.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from enum import Enum
|
||||
|
||||
from ..ports.knowledge import ReleaseKnowledge
|
||||
from ..value_objects import ParsedRelease
|
||||
from .tokens import Token, TokenRole
|
||||
|
||||
|
||||
class Road(str, Enum):
|
||||
"""How the parser handled a given release name.
|
||||
|
||||
Distinct from :class:`~alfred.domain.release.value_objects.ParsePath`,
|
||||
which records the tokenization route (DIRECT / SANITIZED / AI). Road
|
||||
is about confidence in the *result*, not the *method*.
|
||||
"""
|
||||
|
||||
EASY = "easy" # group schema matched — structural annotation
|
||||
SHITTY = "shitty" # no schema, dict-driven annotation, score ≥ threshold
|
||||
PATH_OF_PAIN = "path_of_pain" # score below threshold, needs help
|
||||
|
||||
|
||||
# Critical structural fields — their absence drives the
|
||||
# ``missing_critical`` list in the report.
|
||||
_CRITICAL_FIELDS: tuple[str, ...] = ("title", "media_type", "year")
|
||||
|
||||
|
||||
def _is_tv_shaped(parsed: ParsedRelease) -> bool:
|
||||
"""Season/episode weights only count for releases that *look* like TV."""
|
||||
return parsed.season is not None
|
||||
|
||||
|
||||
def compute_score(
|
||||
parsed: ParsedRelease,
|
||||
annotated: list[Token],
|
||||
kb: ReleaseKnowledge,
|
||||
) -> int:
|
||||
"""Compute a 0–100 confidence score for the parse.
|
||||
|
||||
Each populated field contributes its weight from
|
||||
``kb.scoring["weights"]``. Season/episode only count when the parse
|
||||
looks like TV. ``group == "UNKNOWN"`` is treated as absent.
|
||||
|
||||
Then a penalty is subtracted per residual UNKNOWN token in
|
||||
``annotated``, capped at ``penalties["max_unknown_penalty"]``.
|
||||
|
||||
Result is clamped to ``[0, 100]``.
|
||||
"""
|
||||
weights = kb.scoring["weights"]
|
||||
penalties = kb.scoring["penalties"]
|
||||
|
||||
score = 0
|
||||
if parsed.title:
|
||||
score += weights.get("title", 0)
|
||||
if parsed.media_type and parsed.media_type.value != "unknown":
|
||||
score += weights.get("media_type", 0)
|
||||
if parsed.year is not None:
|
||||
score += weights.get("year", 0)
|
||||
if _is_tv_shaped(parsed):
|
||||
if parsed.season is not None:
|
||||
score += weights.get("season", 0)
|
||||
if parsed.episode is not None:
|
||||
score += weights.get("episode", 0)
|
||||
if parsed.quality:
|
||||
score += weights.get("resolution", 0)
|
||||
if parsed.source:
|
||||
score += weights.get("source", 0)
|
||||
if parsed.codec:
|
||||
score += weights.get("codec", 0)
|
||||
if parsed.group and parsed.group != "UNKNOWN":
|
||||
score += weights.get("group", 0)
|
||||
|
||||
unknown_count = sum(1 for t in annotated if t.role is TokenRole.UNKNOWN)
|
||||
raw_penalty = unknown_count * penalties.get("unknown_token", 0)
|
||||
capped_penalty = min(raw_penalty, penalties.get("max_unknown_penalty", 0))
|
||||
score -= capped_penalty
|
||||
|
||||
return max(0, min(100, score))
|
||||
|
||||
|
||||
def collect_unknown_tokens(annotated: list[Token]) -> tuple[str, ...]:
|
||||
"""Return the text of every token still tagged UNKNOWN."""
|
||||
return tuple(t.text for t in annotated if t.role is TokenRole.UNKNOWN)
|
||||
|
||||
|
||||
def collect_missing_critical(parsed: ParsedRelease) -> tuple[str, ...]:
|
||||
"""Return the names of critical structural fields that are absent."""
|
||||
missing: list[str] = []
|
||||
if not parsed.title:
|
||||
missing.append("title")
|
||||
if not parsed.media_type or parsed.media_type.value == "unknown":
|
||||
missing.append("media_type")
|
||||
if parsed.year is None:
|
||||
missing.append("year")
|
||||
return tuple(missing)
|
||||
|
||||
|
||||
def decide_road(
|
||||
score: int,
|
||||
has_schema: bool,
|
||||
kb: ReleaseKnowledge,
|
||||
) -> Road:
|
||||
"""Pick the road the parse took.
|
||||
|
||||
EASY is decided structurally: if a known group schema matched, the
|
||||
annotation walked the schema, and that's enough — the score does not
|
||||
veto EASY. Otherwise the score decides between SHITTY and
|
||||
PATH_OF_PAIN using ``kb.scoring["thresholds"]["shitty_min"]``.
|
||||
"""
|
||||
if has_schema:
|
||||
return Road.EASY
|
||||
threshold = kb.scoring["thresholds"].get("shitty_min", 60)
|
||||
if score >= threshold:
|
||||
return Road.SHITTY
|
||||
return Road.PATH_OF_PAIN
|
||||
@@ -40,6 +40,18 @@ class ReleaseKnowledge(Protocol):
|
||||
|
||||
separators: list[str]
|
||||
|
||||
# --- Parse scoring (Phase A) ---
|
||||
#
|
||||
# ``scoring`` is a dict with three keys:
|
||||
# - ``weights``: dict[field_name, int] field weight contribution
|
||||
# - ``penalties``: {"unknown_token": int, "max_unknown_penalty": int}
|
||||
# - ``thresholds``: {"shitty_min": int} SHITTY vs PATH_OF_PAIN cutoff
|
||||
#
|
||||
# Concrete values come from ``alfred/knowledge/release/scoring.yaml``.
|
||||
# The loader fills in safe defaults so this dict is always populated.
|
||||
|
||||
scoring: dict
|
||||
|
||||
# --- File-extension sets (used by application/infra modules that work
|
||||
# directly with filesystem paths, e.g. media-type detection, video
|
||||
# lookup). Domain parsing itself doesn't touch these. ---
|
||||
|
||||
@@ -72,6 +72,40 @@ def _strip_episode_from_normalized(normalized: str) -> str:
|
||||
return ".".join(result)
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class ParseReport:
|
||||
"""Diagnostic report attached to a :class:`ParsedRelease`.
|
||||
|
||||
``parse_release`` returns ``(ParsedRelease, ParseReport)``. The
|
||||
report describes *how confident* the parser is in the result and
|
||||
*which road* produced it. It is intentionally separate from
|
||||
``ParsedRelease`` so the structural VO stays free of meta-concerns
|
||||
about its own quality.
|
||||
|
||||
Fields:
|
||||
|
||||
- ``confidence``: integer 0–100 (see :func:`parser.scoring.compute_score`).
|
||||
- ``road``: ``"easy"`` / ``"shitty"`` / ``"path_of_pain"`` — distinct
|
||||
from ``ParsedRelease.parse_path`` (which describes the
|
||||
tokenization route, not the confidence tier).
|
||||
- ``unknown_tokens``: tokens that finished annotation with role
|
||||
UNKNOWN, in order of appearance.
|
||||
- ``missing_critical``: names of critical structural fields the
|
||||
parser couldn't fill (subset of ``{"title", "media_type", "year"}``).
|
||||
"""
|
||||
|
||||
confidence: int
|
||||
road: str # one of parser.scoring.Road values
|
||||
unknown_tokens: tuple[str, ...] = ()
|
||||
missing_critical: tuple[str, ...] = ()
|
||||
|
||||
def __post_init__(self) -> None:
|
||||
if not (0 <= self.confidence <= 100):
|
||||
raise ValidationError(
|
||||
f"ParseReport.confidence out of range: {self.confidence}"
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class ParsedRelease:
|
||||
"""Structured representation of a parsed release name.
|
||||
|
||||
@@ -160,6 +160,37 @@ def load_group_schemas() -> dict:
|
||||
return result
|
||||
|
||||
|
||||
def load_scoring() -> dict:
|
||||
"""Load the parse-scoring config.
|
||||
|
||||
Returns a dict with three top-level keys: ``weights``, ``penalties``,
|
||||
``thresholds``. Defaults are baked in so a missing or partial YAML
|
||||
never breaks the parser — only de-tunes it.
|
||||
"""
|
||||
raw = _load("scoring.yaml")
|
||||
weights = {
|
||||
"title": 30,
|
||||
"media_type": 20,
|
||||
"year": 15,
|
||||
"season": 10,
|
||||
"episode": 5,
|
||||
"resolution": 5,
|
||||
"source": 5,
|
||||
"codec": 5,
|
||||
"group": 5,
|
||||
}
|
||||
weights.update(raw.get("weights", {}) or {})
|
||||
penalties = {"unknown_token": 5, "max_unknown_penalty": 30}
|
||||
penalties.update(raw.get("penalties", {}) or {})
|
||||
thresholds = {"shitty_min": 60}
|
||||
thresholds.update(raw.get("thresholds", {}) or {})
|
||||
return {
|
||||
"weights": weights,
|
||||
"penalties": penalties,
|
||||
"thresholds": thresholds,
|
||||
}
|
||||
|
||||
|
||||
def load_separators() -> list[str]:
|
||||
"""Single-char token separators used by the release name tokenizer.
|
||||
|
||||
|
||||
@@ -30,6 +30,7 @@ from .release import (
|
||||
load_metadata_extensions,
|
||||
load_non_video_extensions,
|
||||
load_resolutions,
|
||||
load_scoring,
|
||||
load_separators,
|
||||
load_sources,
|
||||
load_sources_extra,
|
||||
@@ -85,6 +86,9 @@ class YamlReleaseKnowledge:
|
||||
|
||||
self.separators: list[str] = load_separators()
|
||||
|
||||
# Parse-scoring config (weights / penalties / thresholds).
|
||||
self.scoring: dict = load_scoring()
|
||||
|
||||
# File-extension sets (used by application/infra modules, not by
|
||||
# the parser itself — kept here so there is a single ownership
|
||||
# point for release knowledge).
|
||||
|
||||
@@ -0,0 +1,42 @@
|
||||
# Release parse scoring.
|
||||
#
|
||||
# `parse_release` returns a `ParseReport` alongside the `ParsedRelease`.
|
||||
# The report carries a 0-100 confidence score computed from the annotated
|
||||
# tokens, plus the road decision (EASY / SHITTY / PATH_OF_PAIN).
|
||||
#
|
||||
# Why YAML: the weights and the SHITTY/PoP cutoff are tuning knobs we
|
||||
# expect to iterate on as fixtures grow. Keeping them in code would
|
||||
# mean a commit per tweak; here the user can adjust without touching
|
||||
# Python.
|
||||
#
|
||||
# Weights are awarded when the corresponding ParsedRelease field is
|
||||
# populated (non-None, non-"UNKNOWN" for group). Season and episode
|
||||
# only contribute when the parse looks like TV (season is not None).
|
||||
|
||||
weights:
|
||||
title: 30 # structural pivot — without it nothing else matters
|
||||
media_type: 20 # movie / tv_show / tv_complete / …
|
||||
year: 15
|
||||
season: 10 # only counted for TV-shaped releases
|
||||
episode: 5
|
||||
resolution: 5
|
||||
source: 5
|
||||
codec: 5
|
||||
group: 5 # "UNKNOWN" yields 0
|
||||
|
||||
# Penalty applied per UNKNOWN token left in the annotated stream.
|
||||
# Capped at `max_unknown_penalty` to keep a long-tail of garbage from
|
||||
# pushing every release into PoP.
|
||||
penalties:
|
||||
unknown_token: 5
|
||||
max_unknown_penalty: 30
|
||||
|
||||
# Decision thresholds.
|
||||
#
|
||||
# EASY is decided structurally (a known group schema matched) — it does
|
||||
# not look at the score. SHITTY vs PATH_OF_PAIN is decided here:
|
||||
#
|
||||
# score >= shitty_min → SHITTY (best-effort parse usable)
|
||||
# score < shitty_min → PATH_OF_PAIN (needs user / LLM help)
|
||||
thresholds:
|
||||
shitty_min: 60
|
||||
Reference in New Issue
Block a user