refactor(release): ParsedRelease.media_type & parse_path are strict enums

The fields were already typed as MediaTypeToken / ParsePath, but a
tolerant __post_init__ coerced raw strings into their enum form. With
MediaTypeToken(str, Enum) (and ParsePath idem), the coercion served no
purpose — callers that pass '.value' got back the enum anyway, and
callers that pass an unknown string got a ValidationError just like
they would now.

Strict mode: constructor rejects non-enum values directly. The two
in-tree builders (parse_release() and the parser pipeline) already
produce enum values; all .value sites have been removed. Drops the
unused _VALID_MEDIA_TYPES / _VALID_PARSE_PATHS lookup tables.
This commit is contained in:
2026-05-20 23:52:30 +02:00
parent c3767aacb6
commit 757e4045ee
4 changed files with 28 additions and 38 deletions
+8 -7
View File
@@ -29,6 +29,7 @@ arrives through ``kb: ReleaseKnowledge``.
from __future__ import annotations from __future__ import annotations
from ..ports.knowledge import ReleaseKnowledge from ..ports.knowledge import ReleaseKnowledge
from ..value_objects import MediaTypeToken
from .schema import GroupSchema from .schema import GroupSchema
from .tokens import Token, TokenRole from .tokens import Token, TokenRole
@@ -725,22 +726,22 @@ def assemble(
integrale_tokens = {t.upper() for t in kb.media_type_tokens.get("integrale", [])} integrale_tokens = {t.upper() for t in kb.media_type_tokens.get("integrale", [])}
if upper_tokens & doc_tokens: if upper_tokens & doc_tokens:
media_type = "documentary" media_type = MediaTypeToken.DOCUMENTARY
elif upper_tokens & concert_tokens: elif upper_tokens & concert_tokens:
media_type = "concert" media_type = MediaTypeToken.CONCERT
elif is_season_range: elif is_season_range:
media_type = "tv_complete" media_type = MediaTypeToken.TV_COMPLETE
elif ( elif (
edition in {"COMPLETE", "INTEGRALE", "COLLECTION"} edition in {"COMPLETE", "INTEGRALE", "COLLECTION"}
or upper_tokens & integrale_tokens or upper_tokens & integrale_tokens
) and season is None: ) and season is None:
media_type = "tv_complete" media_type = MediaTypeToken.TV_COMPLETE
elif season is not None: elif season is not None:
media_type = "tv_show" media_type = MediaTypeToken.TV_SHOW
elif any((quality, source, codec, year)): elif any((quality, source, codec, year)):
media_type = "movie" media_type = MediaTypeToken.MOVIE
else: else:
media_type = "unknown" media_type = MediaTypeToken.UNKNOWN
return { return {
"title": title, "title": title,
+5 -5
View File
@@ -44,7 +44,7 @@ def parse_release(
3. Otherwise run the v2 pipeline: tokenize → annotate (EASY when a 3. Otherwise run the v2 pipeline: tokenize → annotate (EASY when a
group schema is known, SHITTY otherwise) → assemble → score. group schema is known, SHITTY otherwise) → assemble → score.
""" """
parse_path = ParsePath.DIRECT.value parse_path = ParsePath.DIRECT
# Apostrophes inside titles ("Don't", "L'avare") are common and should # Apostrophes inside titles ("Don't", "L'avare") are common and should
# not push the release through the AI fallback. Strip them up front so # not push the release through the AI fallback. Strip them up front so
@@ -53,11 +53,11 @@ def parse_release(
working_name = name working_name = name
if "'" in working_name: if "'" in working_name:
working_name = working_name.replace("'", "") working_name = working_name.replace("'", "")
parse_path = ParsePath.SANITIZED.value parse_path = ParsePath.SANITIZED
clean, site_tag = _v2.strip_site_tag(working_name) clean, site_tag = _v2.strip_site_tag(working_name)
if site_tag is not None: if site_tag is not None:
parse_path = ParsePath.SANITIZED.value parse_path = ParsePath.SANITIZED
if not _is_well_formed(clean, kb): if not _is_well_formed(clean, kb):
parsed = ParsedRelease( parsed = ParsedRelease(
@@ -74,9 +74,9 @@ def parse_release(
codec=None, codec=None,
group="UNKNOWN", group="UNKNOWN",
tech_string="", tech_string="",
media_type=MediaTypeToken.UNKNOWN.value, media_type=MediaTypeToken.UNKNOWN,
site_tag=site_tag, site_tag=site_tag,
parse_path=ParsePath.AI.value, parse_path=ParsePath.AI,
) )
report = ParseReport( report = ParseReport(
confidence=0, confidence=0,
+6 -17
View File
@@ -49,10 +49,6 @@ class ParsePath(str, Enum):
AI = "ai" AI = "ai"
_VALID_MEDIA_TYPES: frozenset[str] = frozenset(m.value for m in MediaTypeToken)
_VALID_PARSE_PATHS: frozenset[str] = frozenset(p.value for p in ParsePath)
def _strip_episode_from_normalized(normalized: str) -> str: def _strip_episode_from_normalized(normalized: str) -> str:
""" """
Remove all episode parts (Exx) from a normalized release name, keeping Sxx. Remove all episode parts (Exx) from a normalized release name, keeping Sxx.
@@ -168,23 +164,16 @@ class ParsedRelease:
f"ParsedRelease.episode_end ({self.episode_end}) < " f"ParsedRelease.episode_end ({self.episode_end}) < "
f"episode ({self.episode})" f"episode ({self.episode})"
) )
# Coerce raw strings into their enum form (tolerant constructor).
if not isinstance(self.media_type, MediaTypeToken): if not isinstance(self.media_type, MediaTypeToken):
try:
self.media_type = MediaTypeToken(self.media_type)
except ValueError:
raise ValidationError( raise ValidationError(
f"ParsedRelease.media_type invalid: {self.media_type!r} " f"ParsedRelease.media_type must be a MediaTypeToken, "
f"(expected one of {sorted(_VALID_MEDIA_TYPES)})" f"got {type(self.media_type).__name__}: {self.media_type!r}"
) from None )
if not isinstance(self.parse_path, ParsePath): if not isinstance(self.parse_path, ParsePath):
try:
self.parse_path = ParsePath(self.parse_path)
except ValueError:
raise ValidationError( raise ValidationError(
f"ParsedRelease.parse_path invalid: {self.parse_path!r} " f"ParsedRelease.parse_path must be a ParsePath, "
f"(expected one of {sorted(_VALID_PARSE_PATHS)})" f"got {type(self.parse_path).__name__}: {self.parse_path!r}"
) from None )
@property @property
def is_season_pack(self) -> bool: def is_season_pack(self) -> bool:
@@ -79,8 +79,8 @@ def _movie(year: int = 2020, **overrides) -> ParsedRelease:
codec="x264", codec="x264",
group="GROUP", group="GROUP",
tech_string="1080p.BluRay.x264", tech_string="1080p.BluRay.x264",
media_type=MediaTypeToken.MOVIE.value, media_type=MediaTypeToken.MOVIE,
parse_path=ParsePath.DIRECT.value, parse_path=ParsePath.DIRECT,
) )
base.update(overrides) base.update(overrides)
return ParsedRelease(**base) return ParsedRelease(**base)
@@ -121,8 +121,8 @@ class TestComputeScore:
codec="x265", codec="x265",
group="KONTRAST", group="KONTRAST",
tech_string="1080p.WEBRip.x265", tech_string="1080p.WEBRip.x265",
media_type=MediaTypeToken.TV_SHOW.value, media_type=MediaTypeToken.TV_SHOW,
parse_path=ParsePath.DIRECT.value, parse_path=ParsePath.DIRECT,
) )
tokens = [ tokens = [
Token("Oz", 0, TokenRole.TITLE), Token("Oz", 0, TokenRole.TITLE),
@@ -166,7 +166,7 @@ class TestComputeScore:
assert 0 <= score <= 100 assert 0 <= score <= 100
def test_unknown_media_type_does_not_count(self) -> None: def test_unknown_media_type_does_not_count(self) -> None:
parsed = _movie(media_type=MediaTypeToken.UNKNOWN.value) parsed = _movie(media_type=MediaTypeToken.UNKNOWN)
score = compute_score(parsed, _all_annotated(), _KB) score = compute_score(parsed, _all_annotated(), _KB)
# Loses the 20 of media_type vs baseline # Loses the 20 of media_type vs baseline
assert score == 85 - 20 assert score == 85 - 20
@@ -232,8 +232,8 @@ class TestCollectors:
codec=None, codec=None,
group="UNKNOWN", group="UNKNOWN",
tech_string="", tech_string="",
media_type=MediaTypeToken.UNKNOWN.value, media_type=MediaTypeToken.UNKNOWN,
parse_path=ParsePath.DIRECT.value, parse_path=ParsePath.DIRECT,
) )
assert set(collect_missing_critical(empty)) == { assert set(collect_missing_critical(empty)) == {
"title", "title",