Files
alfred/alfred/domain/release/value_objects.py
T
francwa 757e4045ee refactor(release): ParsedRelease.media_type & parse_path are strict enums
The fields were already typed as MediaTypeToken / ParsePath, but a
tolerant __post_init__ coerced raw strings into their enum form. With
MediaTypeToken(str, Enum) (and ParsePath idem), the coercion served no
purpose — callers that pass '.value' got back the enum anyway, and
callers that pass an unknown string got a ValidationError just like
they would now.

Strict mode: constructor rejects non-enum values directly. The two
in-tree builders (parse_release() and the parser pipeline) already
produce enum values; all .value sites have been removed. Drops the
unused _VALID_MEDIA_TYPES / _VALID_PARSE_PATHS lookup tables.
2026-05-20 23:52:30 +02:00

248 lines
9.6 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""Release domain — value objects.
This module is **pure**: no I/O, no YAML loading, no knowledge-base
imports. All knowledge that the parser consumes is injected at runtime
via the ``ReleaseKnowledge`` port (see ``ports/knowledge.py``).
``ParsedRelease`` follows Option B of the snapshot-VO design: filesystem
sanitization is performed once at parse time and stored in
``title_sanitized``. The builder methods (``show_folder_name``,
``episode_filename``, etc.) are therefore pure string-formatting and do
**not** need access to any knowledge base — but they require the caller
to pass already-sanitized TMDB strings. The use case is responsible for
calling ``kb.sanitize_for_fs(tmdb_title)`` before invoking the builders.
"""
from __future__ import annotations
from dataclasses import dataclass, field
from enum import Enum
from ..shared.exceptions import ValidationError
class MediaTypeToken(str, Enum):
"""
Canonical values for ``ParsedRelease.media_type``.
Inherits from ``str`` so existing string-based comparisons (``== "movie"``,
JSON serialization, TMDB DTO interop) keep working unchanged. The enum
serves both as documentation and as the set of valid values for
``__post_init__`` validation.
"""
MOVIE = "movie"
TV_SHOW = "tv_show"
TV_COMPLETE = "tv_complete"
DOCUMENTARY = "documentary"
CONCERT = "concert"
OTHER = "other"
UNKNOWN = "unknown"
class ParsePath(str, Enum):
"""How a ``ParsedRelease`` was produced. ``str``-backed for the same
reasons as :class:`MediaTypeToken`."""
DIRECT = "direct"
SANITIZED = "sanitized"
AI = "ai"
def _strip_episode_from_normalized(normalized: str) -> str:
"""
Remove all episode parts (Exx) from a normalized release name, keeping Sxx.
Oz.S03E01.1080p... → Oz.S03.1080p...
Archer.S14E09E10E11.1080p... → Archer.S14.1080p...
"""
tokens = normalized.split(".")
result = []
for tok in tokens:
upper = tok.upper()
# Token is SxxExx... — keep only the Sxx part
if len(upper) >= 3 and upper[0] == "S" and upper[1:3].isdigit():
result.append(tok[:3]) # "S" + two digits
else:
result.append(tok)
return ".".join(result)
@dataclass(frozen=True)
class ParseReport:
"""Diagnostic report attached to a :class:`ParsedRelease`.
``parse_release`` returns ``(ParsedRelease, ParseReport)``. The
report describes *how confident* the parser is in the result and
*which road* produced it. It is intentionally separate from
``ParsedRelease`` so the structural VO stays free of meta-concerns
about its own quality.
Fields:
- ``confidence``: integer 0100 (see :func:`parser.scoring.compute_score`).
- ``road``: ``"easy"`` / ``"shitty"`` / ``"path_of_pain"`` — distinct
from ``ParsedRelease.parse_path`` (which describes the
tokenization route, not the confidence tier).
- ``unknown_tokens``: tokens that finished annotation with role
UNKNOWN, in order of appearance.
- ``missing_critical``: names of critical structural fields the
parser couldn't fill (subset of ``{"title", "media_type", "year"}``).
"""
confidence: int
road: str # one of parser.scoring.Road values
unknown_tokens: tuple[str, ...] = ()
missing_critical: tuple[str, ...] = ()
def __post_init__(self) -> None:
if not (0 <= self.confidence <= 100):
raise ValidationError(
f"ParseReport.confidence out of range: {self.confidence}"
)
@dataclass
class ParsedRelease:
"""Structured representation of a parsed release name.
``title_sanitized`` carries the filesystem-safe form of ``title`` (computed
by the parser at construction time using the injected knowledge base).
Builder methods rely on it being already-sanitized — see module docstring.
"""
raw: str # original release name (untouched)
clean: str # raw minus site_tag and apostrophes — used by season_folder_name()
title: str # show/movie title (dots, no year/season/tech)
title_sanitized: str # title with filesystem-forbidden chars stripped
year: int | None # movie year or show start year (from TMDB)
season: int | None # season number (None for movies)
episode: int | None # first episode number (None if season-pack)
episode_end: int | None # last episode for multi-ep (None otherwise)
quality: str | None # 1080p, 2160p, …
source: str | None # WEBRip, BluRay, …
codec: str | None # x265, HEVC, …
group: str # release group, "UNKNOWN" if missing
tech_string: str # quality.source.codec joined with dots
media_type: MediaTypeToken = MediaTypeToken.UNKNOWN
site_tag: str | None = (
None # site watermark stripped from name, e.g. "TGx", "OxTorrent.vc"
)
parse_path: ParsePath = ParsePath.DIRECT
languages: list[str] = field(default_factory=list) # ["MULTI", "VFF"], ["FRENCH"], …
audio_codec: str | None = None # "DTS-HD.MA", "DDP", "EAC3", …
audio_channels: str | None = None # "5.1", "7.1", "2.0", …
bit_depth: str | None = None # "10bit", "8bit", …
hdr_format: str | None = None # "DV", "HDR10", "DV.HDR10", …
edition: str | None = None # "UNRATED", "EXTENDED", "DIRECTORS.CUT", …
distributor: str | None = None # "NF", "AMZN", "DSNP", … (streaming origin)
def __post_init__(self) -> None:
if not self.raw:
raise ValidationError("ParsedRelease.raw cannot be empty")
if not self.group:
raise ValidationError("ParsedRelease.group cannot be empty")
if self.year is not None and not (1888 <= self.year <= 2100):
raise ValidationError(
f"ParsedRelease.year out of range: {self.year}"
)
if self.season is not None and not (0 <= self.season <= 100):
raise ValidationError(
f"ParsedRelease.season out of range: {self.season}"
)
if self.episode is not None and not (0 <= self.episode <= 9999):
raise ValidationError(
f"ParsedRelease.episode out of range: {self.episode}"
)
if self.episode_end is not None:
if not (0 <= self.episode_end <= 9999):
raise ValidationError(
f"ParsedRelease.episode_end out of range: {self.episode_end}"
)
if self.episode is not None and self.episode_end < self.episode:
raise ValidationError(
f"ParsedRelease.episode_end ({self.episode_end}) < "
f"episode ({self.episode})"
)
if not isinstance(self.media_type, MediaTypeToken):
raise ValidationError(
f"ParsedRelease.media_type must be a MediaTypeToken, "
f"got {type(self.media_type).__name__}: {self.media_type!r}"
)
if not isinstance(self.parse_path, ParsePath):
raise ValidationError(
f"ParsedRelease.parse_path must be a ParsePath, "
f"got {type(self.parse_path).__name__}: {self.parse_path!r}"
)
@property
def is_season_pack(self) -> bool:
return self.season is not None and self.episode is None
def show_folder_name(self, tmdb_title_safe: str, tmdb_year: int) -> str:
"""
Build the series root folder name.
Format: {Title}.{Year}.{Tech}-{Group}
Example: Oz.1997.1080p.WEBRip.x265-KONTRAST
``tmdb_title_safe`` must already be filesystem-safe (the caller is
expected to have run it through ``kb.sanitize_for_fs``).
"""
title_part = tmdb_title_safe.replace(" ", ".")
tech = self.tech_string or "Unknown"
return f"{title_part}.{tmdb_year}.{tech}-{self.group}"
def season_folder_name(self) -> str:
"""
Build the season subfolder name = normalized release name (no episode).
Example: Oz.S03.1080p.WEBRip.x265-KONTRAST
For a single-episode release we still strip the episode token so the
folder can hold the whole season.
"""
return _strip_episode_from_normalized(self.clean)
def episode_filename(self, tmdb_episode_title_safe: str | None, ext: str) -> str:
"""
Build the episode filename.
Format: {Title}.{SxxExx}.{EpisodeTitle}.{Tech}-{Group}.{ext}
Example: Oz.S01E01.The.Routine.1080p.WEBRip.x265-KONTRAST.mkv
``tmdb_episode_title_safe`` must already be filesystem-safe; pass
``None`` to omit the episode title segment.
"""
title_part = self.title_sanitized
s = f"S{self.season:02d}" if self.season is not None else ""
e = f"E{self.episode:02d}" if self.episode is not None else ""
se = s + e
ep_title = ""
if tmdb_episode_title_safe:
ep_title = "." + tmdb_episode_title_safe.replace(" ", ".")
tech = self.tech_string or "Unknown"
ext_clean = ext.lstrip(".")
return f"{title_part}.{se}{ep_title}.{tech}-{self.group}.{ext_clean}"
def movie_folder_name(self, tmdb_title_safe: str, tmdb_year: int) -> str:
"""
Build the movie folder name.
Format: {Title}.{Year}.{Tech}-{Group}
Example: Inception.2010.1080p.BluRay.x265-GROUP
"""
return self.show_folder_name(tmdb_title_safe, tmdb_year)
def movie_filename(
self, tmdb_title_safe: str, tmdb_year: int, ext: str
) -> str:
"""
Build the movie filename (same as folder name + extension).
Example: Inception.2010.1080p.BluRay.x265-GROUP.mkv
"""
ext_clean = ext.lstrip(".")
return f"{self.movie_folder_name(tmdb_title_safe, tmdb_year)}.{ext_clean}"