refactor(release): typed enums + __post_init__ validation on ParsedRelease

ParsedRelease accepted any string for media_type/parse_path and had no
validation on numeric ranges (season=-5 was silently accepted). Tighten
both ends:

- New str-backed Enums MediaTypeToken and ParsePath. Inherit from str so
  every existing comparison ('== "movie"'), JSON serialization, and TMDB
  DTO interop keeps working unchanged.
- ParsedRelease.__post_init__ now validates: raw/group non-empty, year in
  1888-2100, season 0-100, episode 0-9999, episode_end >= episode,
  media_type/parse_path against the enum allowlist.
- services.py uses the enum .value members everywhere instead of bare
  string literals — kills the typo risk.
This commit is contained in:
2026-05-19 14:17:56 +02:00
parent 481eeb5afd
commit da484d7474
2 changed files with 86 additions and 14 deletions
+12 -10
View File
@@ -16,7 +16,9 @@ from .value_objects import (
_RESOLUTIONS, _RESOLUTIONS,
_SOURCES, _SOURCES,
_VIDEO_META, _VIDEO_META,
MediaTypeToken,
ParsedRelease, ParsedRelease,
ParsePath,
) )
@@ -39,12 +41,12 @@ def parse_release(name: str) -> ParsedRelease:
and run token-level matchers (season/episode, tech, languages, audio, and run token-level matchers (season/episode, tech, languages, audio,
video, edition, title, year). video, edition, title, year).
""" """
parse_path = "direct" parse_path = ParsePath.DIRECT.value
# Always try to extract a bracket-enclosed site tag first. # Always try to extract a bracket-enclosed site tag first.
clean, site_tag = _strip_site_tag(name) clean, site_tag = _strip_site_tag(name)
if site_tag is not None: if site_tag is not None:
parse_path = "sanitized" parse_path = ParsePath.SANITIZED.value
if not _is_well_formed(clean): if not _is_well_formed(clean):
return ParsedRelease( return ParsedRelease(
@@ -60,9 +62,9 @@ def parse_release(name: str) -> ParsedRelease:
codec=None, codec=None,
group="UNKNOWN", group="UNKNOWN",
tech_string="", tech_string="",
media_type="unknown", media_type=MediaTypeToken.UNKNOWN.value,
site_tag=site_tag, site_tag=site_tag,
parse_path="ai", parse_path=ParsePath.AI.value,
) )
name = clean name = clean
@@ -137,19 +139,19 @@ def _infer_media_type(
integrale_tokens = {t.upper() for t in _MEDIA_TYPE_TOKENS.get("integrale", [])} integrale_tokens = {t.upper() for t in _MEDIA_TYPE_TOKENS.get("integrale", [])}
if upper_tokens & doc_tokens: if upper_tokens & doc_tokens:
return "documentary" return MediaTypeToken.DOCUMENTARY.value
if upper_tokens & concert_tokens: if upper_tokens & concert_tokens:
return "concert" return MediaTypeToken.CONCERT.value
if ( if (
edition in {"COMPLETE", "INTEGRALE", "COLLECTION"} edition in {"COMPLETE", "INTEGRALE", "COLLECTION"}
or upper_tokens & integrale_tokens or upper_tokens & integrale_tokens
) and season is None: ) and season is None:
return "tv_complete" return MediaTypeToken.TV_COMPLETE.value
if season is not None: if season is not None:
return "tv_show" return MediaTypeToken.TV_SHOW.value
if any([quality, source, codec, year]): if any([quality, source, codec, year]):
return "movie" return MediaTypeToken.MOVIE.value
return "unknown" return MediaTypeToken.UNKNOWN.value
def _is_well_formed(name: str) -> bool: def _is_well_formed(name: str) -> bool:
+74 -4
View File
@@ -3,7 +3,9 @@
from __future__ import annotations from __future__ import annotations
from dataclasses import dataclass, field from dataclasses import dataclass, field
from enum import Enum
from ..shared.exceptions import ValidationError
from .knowledge import ( from .knowledge import (
load_audio, load_audio,
load_codecs, load_codecs,
@@ -50,6 +52,38 @@ def _sanitize_for_fs(text: str) -> str:
return text.translate(_WIN_FORBIDDEN_TABLE) return text.translate(_WIN_FORBIDDEN_TABLE)
class MediaTypeToken(str, Enum):
"""
Canonical values for ``ParsedRelease.media_type``.
Inherits from ``str`` so existing string-based comparisons (``== "movie"``,
JSON serialization, TMDB DTO interop) keep working unchanged. The enum
serves both as documentation and as the set of valid values for
``__post_init__`` validation.
"""
MOVIE = "movie"
TV_SHOW = "tv_show"
TV_COMPLETE = "tv_complete"
DOCUMENTARY = "documentary"
CONCERT = "concert"
OTHER = "other"
UNKNOWN = "unknown"
class ParsePath(str, Enum):
"""How a ``ParsedRelease`` was produced. ``str``-backed for the same
reasons as :class:`MediaTypeToken`."""
DIRECT = "direct"
SANITIZED = "sanitized"
AI = "ai"
_VALID_MEDIA_TYPES: frozenset[str] = frozenset(m.value for m in MediaTypeToken)
_VALID_PARSE_PATHS: frozenset[str] = frozenset(p.value for p in ParsePath)
def _strip_episode_from_normalized(normalized: str) -> str: def _strip_episode_from_normalized(normalized: str) -> str:
""" """
Remove all episode parts (Exx) from a normalized release name, keeping Sxx. Remove all episode parts (Exx) from a normalized release name, keeping Sxx.
@@ -85,13 +119,11 @@ class ParsedRelease:
codec: str | None # x265, HEVC, … codec: str | None # x265, HEVC, …
group: str # release group, "UNKNOWN" if missing group: str # release group, "UNKNOWN" if missing
tech_string: str # quality.source.codec joined with dots tech_string: str # quality.source.codec joined with dots
media_type: str = ( media_type: str = MediaTypeToken.UNKNOWN.value # one of MediaTypeToken values
"unknown" # "movie" | "tv_show" | "tv_complete" | "other" | "unknown"
)
site_tag: str | None = ( site_tag: str | None = (
None # site watermark stripped from name, e.g. "TGx", "OxTorrent.vc" None # site watermark stripped from name, e.g. "TGx", "OxTorrent.vc"
) )
parse_path: str = "direct" # "direct" | "sanitized" | "ai" parse_path: str = ParsePath.DIRECT.value # one of ParsePath values
languages: list[str] = field(default_factory=list) # ["MULTI", "VFF"], ["FRENCH"], … languages: list[str] = field(default_factory=list) # ["MULTI", "VFF"], ["FRENCH"], …
audio_codec: str | None = None # "DTS-HD.MA", "DDP", "EAC3", … audio_codec: str | None = None # "DTS-HD.MA", "DDP", "EAC3", …
audio_channels: str | None = None # "5.1", "7.1", "2.0", … audio_channels: str | None = None # "5.1", "7.1", "2.0", …
@@ -99,6 +131,44 @@ class ParsedRelease:
hdr_format: str | None = None # "DV", "HDR10", "DV.HDR10", … hdr_format: str | None = None # "DV", "HDR10", "DV.HDR10", …
edition: str | None = None # "UNRATED", "EXTENDED", "DIRECTORS.CUT", … edition: str | None = None # "UNRATED", "EXTENDED", "DIRECTORS.CUT", …
def __post_init__(self) -> None:
if not self.raw:
raise ValidationError("ParsedRelease.raw cannot be empty")
if not self.group:
raise ValidationError("ParsedRelease.group cannot be empty")
if self.year is not None and not (1888 <= self.year <= 2100):
raise ValidationError(
f"ParsedRelease.year out of range: {self.year}"
)
if self.season is not None and not (0 <= self.season <= 100):
raise ValidationError(
f"ParsedRelease.season out of range: {self.season}"
)
if self.episode is not None and not (0 <= self.episode <= 9999):
raise ValidationError(
f"ParsedRelease.episode out of range: {self.episode}"
)
if self.episode_end is not None:
if not (0 <= self.episode_end <= 9999):
raise ValidationError(
f"ParsedRelease.episode_end out of range: {self.episode_end}"
)
if self.episode is not None and self.episode_end < self.episode:
raise ValidationError(
f"ParsedRelease.episode_end ({self.episode_end}) < "
f"episode ({self.episode})"
)
if self.media_type not in _VALID_MEDIA_TYPES:
raise ValidationError(
f"ParsedRelease.media_type invalid: {self.media_type!r} "
f"(expected one of {sorted(_VALID_MEDIA_TYPES)})"
)
if self.parse_path not in _VALID_PARSE_PATHS:
raise ValidationError(
f"ParsedRelease.parse_path invalid: {self.parse_path!r} "
f"(expected one of {sorted(_VALID_PARSE_PATHS)})"
)
@property @property
def is_season_pack(self) -> bool: def is_season_pack(self) -> bool:
return self.season is not None and self.episode is None return self.season is not None and self.episode is None