feat(release): foundations for parse-confidence scoring
Add the building blocks for Phase A scoring without yet wiring them into parse_release. Nothing changes at runtime — parse_release still returns a single ParsedRelease — but the pieces needed to upgrade it in a follow-up commit are now in place. - alfred/knowledge/release/scoring.yaml: weights / penalties / thresholds. Title and media_type are heavy (30 / 20), structural fields medium (year 15, season 10), tech fields light (5 each). Unknown-token penalty 5 capped at -30. SHITTY/PoP cutoff at 60. - load_scoring() loader with safe defaults baked in: a missing or partial YAML only de-tunes, never breaks. - ReleaseKnowledge port grows a 'scoring: dict' field. YamlReleaseKnowledge populates it from load_scoring(). - New parser/scoring.py module with Road enum (EASY / SHITTY / PATH_OF_PAIN, distinct from ParsePath which records the tokenization route), and pure functions: compute_score, decide_road, collect_unknown_tokens, collect_missing_critical. - ParseReport frozen VO in value_objects.py — exported alongside ParsedRelease.
This commit is contained in:
@@ -1,6 +1,6 @@
|
|||||||
"""Release domain — release name parsing and naming conventions."""
|
"""Release domain — release name parsing and naming conventions."""
|
||||||
|
|
||||||
from .services import parse_release
|
from .services import parse_release
|
||||||
from .value_objects import ParsedRelease
|
from .value_objects import ParsedRelease, ParseReport
|
||||||
|
|
||||||
__all__ = ["ParsedRelease", "parse_release"]
|
__all__ = ["ParsedRelease", "ParseReport", "parse_release"]
|
||||||
|
|||||||
@@ -0,0 +1,139 @@
|
|||||||
|
"""Parse-confidence scoring.
|
||||||
|
|
||||||
|
``parse_release`` returns a :class:`ParseReport` alongside its
|
||||||
|
:class:`ParsedRelease`. The report carries:
|
||||||
|
|
||||||
|
- ``confidence``: integer 0–100 derived from which structural and
|
||||||
|
technical fields got populated, minus a penalty per UNKNOWN token
|
||||||
|
left in the annotated stream.
|
||||||
|
- ``road``: which of the three roads the parse took
|
||||||
|
(:class:`Road.EASY` / :class:`Road.SHITTY` / :class:`Road.PATH_OF_PAIN`).
|
||||||
|
- ``unknown_tokens``: textual residue, useful for diagnostics.
|
||||||
|
- ``missing_critical``: structural fields the score-tally found absent
|
||||||
|
(e.g. ``("year", "media_type")``) — the caller can use this to drive
|
||||||
|
PoP recovery (questions, LLM call).
|
||||||
|
|
||||||
|
All weights, penalties and thresholds come from the injected knowledge
|
||||||
|
base (``kb.scoring``), itself loaded from
|
||||||
|
``alfred/knowledge/release/scoring.yaml``. No magic numbers here.
|
||||||
|
|
||||||
|
The scoring functions are pure — they consume the annotated token list
|
||||||
|
and the resulting :class:`ParsedRelease` and return the report. They are
|
||||||
|
called by ``services.parse_release`` after ``assemble`` has run.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from enum import Enum
|
||||||
|
|
||||||
|
from ..ports.knowledge import ReleaseKnowledge
|
||||||
|
from ..value_objects import ParsedRelease
|
||||||
|
from .tokens import Token, TokenRole
|
||||||
|
|
||||||
|
|
||||||
|
class Road(str, Enum):
|
||||||
|
"""How the parser handled a given release name.
|
||||||
|
|
||||||
|
Distinct from :class:`~alfred.domain.release.value_objects.ParsePath`,
|
||||||
|
which records the tokenization route (DIRECT / SANITIZED / AI). Road
|
||||||
|
is about confidence in the *result*, not the *method*.
|
||||||
|
"""
|
||||||
|
|
||||||
|
EASY = "easy" # group schema matched — structural annotation
|
||||||
|
SHITTY = "shitty" # no schema, dict-driven annotation, score ≥ threshold
|
||||||
|
PATH_OF_PAIN = "path_of_pain" # score below threshold, needs help
|
||||||
|
|
||||||
|
|
||||||
|
# Critical structural fields — their absence drives the
|
||||||
|
# ``missing_critical`` list in the report.
|
||||||
|
_CRITICAL_FIELDS: tuple[str, ...] = ("title", "media_type", "year")
|
||||||
|
|
||||||
|
|
||||||
|
def _is_tv_shaped(parsed: ParsedRelease) -> bool:
|
||||||
|
"""Season/episode weights only count for releases that *look* like TV."""
|
||||||
|
return parsed.season is not None
|
||||||
|
|
||||||
|
|
||||||
|
def compute_score(
|
||||||
|
parsed: ParsedRelease,
|
||||||
|
annotated: list[Token],
|
||||||
|
kb: ReleaseKnowledge,
|
||||||
|
) -> int:
|
||||||
|
"""Compute a 0–100 confidence score for the parse.
|
||||||
|
|
||||||
|
Each populated field contributes its weight from
|
||||||
|
``kb.scoring["weights"]``. Season/episode only count when the parse
|
||||||
|
looks like TV. ``group == "UNKNOWN"`` is treated as absent.
|
||||||
|
|
||||||
|
Then a penalty is subtracted per residual UNKNOWN token in
|
||||||
|
``annotated``, capped at ``penalties["max_unknown_penalty"]``.
|
||||||
|
|
||||||
|
Result is clamped to ``[0, 100]``.
|
||||||
|
"""
|
||||||
|
weights = kb.scoring["weights"]
|
||||||
|
penalties = kb.scoring["penalties"]
|
||||||
|
|
||||||
|
score = 0
|
||||||
|
if parsed.title:
|
||||||
|
score += weights.get("title", 0)
|
||||||
|
if parsed.media_type and parsed.media_type.value != "unknown":
|
||||||
|
score += weights.get("media_type", 0)
|
||||||
|
if parsed.year is not None:
|
||||||
|
score += weights.get("year", 0)
|
||||||
|
if _is_tv_shaped(parsed):
|
||||||
|
if parsed.season is not None:
|
||||||
|
score += weights.get("season", 0)
|
||||||
|
if parsed.episode is not None:
|
||||||
|
score += weights.get("episode", 0)
|
||||||
|
if parsed.quality:
|
||||||
|
score += weights.get("resolution", 0)
|
||||||
|
if parsed.source:
|
||||||
|
score += weights.get("source", 0)
|
||||||
|
if parsed.codec:
|
||||||
|
score += weights.get("codec", 0)
|
||||||
|
if parsed.group and parsed.group != "UNKNOWN":
|
||||||
|
score += weights.get("group", 0)
|
||||||
|
|
||||||
|
unknown_count = sum(1 for t in annotated if t.role is TokenRole.UNKNOWN)
|
||||||
|
raw_penalty = unknown_count * penalties.get("unknown_token", 0)
|
||||||
|
capped_penalty = min(raw_penalty, penalties.get("max_unknown_penalty", 0))
|
||||||
|
score -= capped_penalty
|
||||||
|
|
||||||
|
return max(0, min(100, score))
|
||||||
|
|
||||||
|
|
||||||
|
def collect_unknown_tokens(annotated: list[Token]) -> tuple[str, ...]:
|
||||||
|
"""Return the text of every token still tagged UNKNOWN."""
|
||||||
|
return tuple(t.text for t in annotated if t.role is TokenRole.UNKNOWN)
|
||||||
|
|
||||||
|
|
||||||
|
def collect_missing_critical(parsed: ParsedRelease) -> tuple[str, ...]:
|
||||||
|
"""Return the names of critical structural fields that are absent."""
|
||||||
|
missing: list[str] = []
|
||||||
|
if not parsed.title:
|
||||||
|
missing.append("title")
|
||||||
|
if not parsed.media_type or parsed.media_type.value == "unknown":
|
||||||
|
missing.append("media_type")
|
||||||
|
if parsed.year is None:
|
||||||
|
missing.append("year")
|
||||||
|
return tuple(missing)
|
||||||
|
|
||||||
|
|
||||||
|
def decide_road(
|
||||||
|
score: int,
|
||||||
|
has_schema: bool,
|
||||||
|
kb: ReleaseKnowledge,
|
||||||
|
) -> Road:
|
||||||
|
"""Pick the road the parse took.
|
||||||
|
|
||||||
|
EASY is decided structurally: if a known group schema matched, the
|
||||||
|
annotation walked the schema, and that's enough — the score does not
|
||||||
|
veto EASY. Otherwise the score decides between SHITTY and
|
||||||
|
PATH_OF_PAIN using ``kb.scoring["thresholds"]["shitty_min"]``.
|
||||||
|
"""
|
||||||
|
if has_schema:
|
||||||
|
return Road.EASY
|
||||||
|
threshold = kb.scoring["thresholds"].get("shitty_min", 60)
|
||||||
|
if score >= threshold:
|
||||||
|
return Road.SHITTY
|
||||||
|
return Road.PATH_OF_PAIN
|
||||||
@@ -40,6 +40,18 @@ class ReleaseKnowledge(Protocol):
|
|||||||
|
|
||||||
separators: list[str]
|
separators: list[str]
|
||||||
|
|
||||||
|
# --- Parse scoring (Phase A) ---
|
||||||
|
#
|
||||||
|
# ``scoring`` is a dict with three keys:
|
||||||
|
# - ``weights``: dict[field_name, int] field weight contribution
|
||||||
|
# - ``penalties``: {"unknown_token": int, "max_unknown_penalty": int}
|
||||||
|
# - ``thresholds``: {"shitty_min": int} SHITTY vs PATH_OF_PAIN cutoff
|
||||||
|
#
|
||||||
|
# Concrete values come from ``alfred/knowledge/release/scoring.yaml``.
|
||||||
|
# The loader fills in safe defaults so this dict is always populated.
|
||||||
|
|
||||||
|
scoring: dict
|
||||||
|
|
||||||
# --- File-extension sets (used by application/infra modules that work
|
# --- File-extension sets (used by application/infra modules that work
|
||||||
# directly with filesystem paths, e.g. media-type detection, video
|
# directly with filesystem paths, e.g. media-type detection, video
|
||||||
# lookup). Domain parsing itself doesn't touch these. ---
|
# lookup). Domain parsing itself doesn't touch these. ---
|
||||||
|
|||||||
@@ -72,6 +72,40 @@ def _strip_episode_from_normalized(normalized: str) -> str:
|
|||||||
return ".".join(result)
|
return ".".join(result)
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class ParseReport:
|
||||||
|
"""Diagnostic report attached to a :class:`ParsedRelease`.
|
||||||
|
|
||||||
|
``parse_release`` returns ``(ParsedRelease, ParseReport)``. The
|
||||||
|
report describes *how confident* the parser is in the result and
|
||||||
|
*which road* produced it. It is intentionally separate from
|
||||||
|
``ParsedRelease`` so the structural VO stays free of meta-concerns
|
||||||
|
about its own quality.
|
||||||
|
|
||||||
|
Fields:
|
||||||
|
|
||||||
|
- ``confidence``: integer 0–100 (see :func:`parser.scoring.compute_score`).
|
||||||
|
- ``road``: ``"easy"`` / ``"shitty"`` / ``"path_of_pain"`` — distinct
|
||||||
|
from ``ParsedRelease.parse_path`` (which describes the
|
||||||
|
tokenization route, not the confidence tier).
|
||||||
|
- ``unknown_tokens``: tokens that finished annotation with role
|
||||||
|
UNKNOWN, in order of appearance.
|
||||||
|
- ``missing_critical``: names of critical structural fields the
|
||||||
|
parser couldn't fill (subset of ``{"title", "media_type", "year"}``).
|
||||||
|
"""
|
||||||
|
|
||||||
|
confidence: int
|
||||||
|
road: str # one of parser.scoring.Road values
|
||||||
|
unknown_tokens: tuple[str, ...] = ()
|
||||||
|
missing_critical: tuple[str, ...] = ()
|
||||||
|
|
||||||
|
def __post_init__(self) -> None:
|
||||||
|
if not (0 <= self.confidence <= 100):
|
||||||
|
raise ValidationError(
|
||||||
|
f"ParseReport.confidence out of range: {self.confidence}"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class ParsedRelease:
|
class ParsedRelease:
|
||||||
"""Structured representation of a parsed release name.
|
"""Structured representation of a parsed release name.
|
||||||
|
|||||||
@@ -160,6 +160,37 @@ def load_group_schemas() -> dict:
|
|||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def load_scoring() -> dict:
|
||||||
|
"""Load the parse-scoring config.
|
||||||
|
|
||||||
|
Returns a dict with three top-level keys: ``weights``, ``penalties``,
|
||||||
|
``thresholds``. Defaults are baked in so a missing or partial YAML
|
||||||
|
never breaks the parser — only de-tunes it.
|
||||||
|
"""
|
||||||
|
raw = _load("scoring.yaml")
|
||||||
|
weights = {
|
||||||
|
"title": 30,
|
||||||
|
"media_type": 20,
|
||||||
|
"year": 15,
|
||||||
|
"season": 10,
|
||||||
|
"episode": 5,
|
||||||
|
"resolution": 5,
|
||||||
|
"source": 5,
|
||||||
|
"codec": 5,
|
||||||
|
"group": 5,
|
||||||
|
}
|
||||||
|
weights.update(raw.get("weights", {}) or {})
|
||||||
|
penalties = {"unknown_token": 5, "max_unknown_penalty": 30}
|
||||||
|
penalties.update(raw.get("penalties", {}) or {})
|
||||||
|
thresholds = {"shitty_min": 60}
|
||||||
|
thresholds.update(raw.get("thresholds", {}) or {})
|
||||||
|
return {
|
||||||
|
"weights": weights,
|
||||||
|
"penalties": penalties,
|
||||||
|
"thresholds": thresholds,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
def load_separators() -> list[str]:
|
def load_separators() -> list[str]:
|
||||||
"""Single-char token separators used by the release name tokenizer.
|
"""Single-char token separators used by the release name tokenizer.
|
||||||
|
|
||||||
|
|||||||
@@ -30,6 +30,7 @@ from .release import (
|
|||||||
load_metadata_extensions,
|
load_metadata_extensions,
|
||||||
load_non_video_extensions,
|
load_non_video_extensions,
|
||||||
load_resolutions,
|
load_resolutions,
|
||||||
|
load_scoring,
|
||||||
load_separators,
|
load_separators,
|
||||||
load_sources,
|
load_sources,
|
||||||
load_sources_extra,
|
load_sources_extra,
|
||||||
@@ -85,6 +86,9 @@ class YamlReleaseKnowledge:
|
|||||||
|
|
||||||
self.separators: list[str] = load_separators()
|
self.separators: list[str] = load_separators()
|
||||||
|
|
||||||
|
# Parse-scoring config (weights / penalties / thresholds).
|
||||||
|
self.scoring: dict = load_scoring()
|
||||||
|
|
||||||
# File-extension sets (used by application/infra modules, not by
|
# File-extension sets (used by application/infra modules, not by
|
||||||
# the parser itself — kept here so there is a single ownership
|
# the parser itself — kept here so there is a single ownership
|
||||||
# point for release knowledge).
|
# point for release knowledge).
|
||||||
|
|||||||
@@ -0,0 +1,42 @@
|
|||||||
|
# Release parse scoring.
|
||||||
|
#
|
||||||
|
# `parse_release` returns a `ParseReport` alongside the `ParsedRelease`.
|
||||||
|
# The report carries a 0-100 confidence score computed from the annotated
|
||||||
|
# tokens, plus the road decision (EASY / SHITTY / PATH_OF_PAIN).
|
||||||
|
#
|
||||||
|
# Why YAML: the weights and the SHITTY/PoP cutoff are tuning knobs we
|
||||||
|
# expect to iterate on as fixtures grow. Keeping them in code would
|
||||||
|
# mean a commit per tweak; here the user can adjust without touching
|
||||||
|
# Python.
|
||||||
|
#
|
||||||
|
# Weights are awarded when the corresponding ParsedRelease field is
|
||||||
|
# populated (non-None, non-"UNKNOWN" for group). Season and episode
|
||||||
|
# only contribute when the parse looks like TV (season is not None).
|
||||||
|
|
||||||
|
weights:
|
||||||
|
title: 30 # structural pivot — without it nothing else matters
|
||||||
|
media_type: 20 # movie / tv_show / tv_complete / …
|
||||||
|
year: 15
|
||||||
|
season: 10 # only counted for TV-shaped releases
|
||||||
|
episode: 5
|
||||||
|
resolution: 5
|
||||||
|
source: 5
|
||||||
|
codec: 5
|
||||||
|
group: 5 # "UNKNOWN" yields 0
|
||||||
|
|
||||||
|
# Penalty applied per UNKNOWN token left in the annotated stream.
|
||||||
|
# Capped at `max_unknown_penalty` to keep a long-tail of garbage from
|
||||||
|
# pushing every release into PoP.
|
||||||
|
penalties:
|
||||||
|
unknown_token: 5
|
||||||
|
max_unknown_penalty: 30
|
||||||
|
|
||||||
|
# Decision thresholds.
|
||||||
|
#
|
||||||
|
# EASY is decided structurally (a known group schema matched) — it does
|
||||||
|
# not look at the score. SHITTY vs PATH_OF_PAIN is decided here:
|
||||||
|
#
|
||||||
|
# score >= shitty_min → SHITTY (best-effort parse usable)
|
||||||
|
# score < shitty_min → PATH_OF_PAIN (needs user / LLM help)
|
||||||
|
thresholds:
|
||||||
|
shitty_min: 60
|
||||||
Reference in New Issue
Block a user