757e4045ee
The fields were already typed as MediaTypeToken / ParsePath, but a tolerant __post_init__ coerced raw strings into their enum form. With MediaTypeToken(str, Enum) (and ParsePath idem), the coercion served no purpose — callers that pass '.value' got back the enum anyway, and callers that pass an unknown string got a ValidationError just like they would now. Strict mode: constructor rejects non-enum values directly. The two in-tree builders (parse_release() and the parser pipeline) already produce enum values; all .value sites have been removed. Drops the unused _VALID_MEDIA_TYPES / _VALID_PARSE_PATHS lookup tables.
248 lines
9.6 KiB
Python
248 lines
9.6 KiB
Python
"""Release domain — value objects.
|
||
|
||
This module is **pure**: no I/O, no YAML loading, no knowledge-base
|
||
imports. All knowledge that the parser consumes is injected at runtime
|
||
via the ``ReleaseKnowledge`` port (see ``ports/knowledge.py``).
|
||
|
||
``ParsedRelease`` follows Option B of the snapshot-VO design: filesystem
|
||
sanitization is performed once at parse time and stored in
|
||
``title_sanitized``. The builder methods (``show_folder_name``,
|
||
``episode_filename``, etc.) are therefore pure string-formatting and do
|
||
**not** need access to any knowledge base — but they require the caller
|
||
to pass already-sanitized TMDB strings. The use case is responsible for
|
||
calling ``kb.sanitize_for_fs(tmdb_title)`` before invoking the builders.
|
||
"""
|
||
|
||
from __future__ import annotations
|
||
|
||
from dataclasses import dataclass, field
|
||
from enum import Enum
|
||
|
||
from ..shared.exceptions import ValidationError
|
||
|
||
|
||
class MediaTypeToken(str, Enum):
|
||
"""
|
||
Canonical values for ``ParsedRelease.media_type``.
|
||
|
||
Inherits from ``str`` so existing string-based comparisons (``== "movie"``,
|
||
JSON serialization, TMDB DTO interop) keep working unchanged. The enum
|
||
serves both as documentation and as the set of valid values for
|
||
``__post_init__`` validation.
|
||
"""
|
||
|
||
MOVIE = "movie"
|
||
TV_SHOW = "tv_show"
|
||
TV_COMPLETE = "tv_complete"
|
||
DOCUMENTARY = "documentary"
|
||
CONCERT = "concert"
|
||
OTHER = "other"
|
||
UNKNOWN = "unknown"
|
||
|
||
|
||
class ParsePath(str, Enum):
|
||
"""How a ``ParsedRelease`` was produced. ``str``-backed for the same
|
||
reasons as :class:`MediaTypeToken`."""
|
||
|
||
DIRECT = "direct"
|
||
SANITIZED = "sanitized"
|
||
AI = "ai"
|
||
|
||
|
||
def _strip_episode_from_normalized(normalized: str) -> str:
|
||
"""
|
||
Remove all episode parts (Exx) from a normalized release name, keeping Sxx.
|
||
|
||
Oz.S03E01.1080p... → Oz.S03.1080p...
|
||
Archer.S14E09E10E11.1080p... → Archer.S14.1080p...
|
||
"""
|
||
tokens = normalized.split(".")
|
||
result = []
|
||
for tok in tokens:
|
||
upper = tok.upper()
|
||
# Token is SxxExx... — keep only the Sxx part
|
||
if len(upper) >= 3 and upper[0] == "S" and upper[1:3].isdigit():
|
||
result.append(tok[:3]) # "S" + two digits
|
||
else:
|
||
result.append(tok)
|
||
return ".".join(result)
|
||
|
||
|
||
@dataclass(frozen=True)
|
||
class ParseReport:
|
||
"""Diagnostic report attached to a :class:`ParsedRelease`.
|
||
|
||
``parse_release`` returns ``(ParsedRelease, ParseReport)``. The
|
||
report describes *how confident* the parser is in the result and
|
||
*which road* produced it. It is intentionally separate from
|
||
``ParsedRelease`` so the structural VO stays free of meta-concerns
|
||
about its own quality.
|
||
|
||
Fields:
|
||
|
||
- ``confidence``: integer 0–100 (see :func:`parser.scoring.compute_score`).
|
||
- ``road``: ``"easy"`` / ``"shitty"`` / ``"path_of_pain"`` — distinct
|
||
from ``ParsedRelease.parse_path`` (which describes the
|
||
tokenization route, not the confidence tier).
|
||
- ``unknown_tokens``: tokens that finished annotation with role
|
||
UNKNOWN, in order of appearance.
|
||
- ``missing_critical``: names of critical structural fields the
|
||
parser couldn't fill (subset of ``{"title", "media_type", "year"}``).
|
||
"""
|
||
|
||
confidence: int
|
||
road: str # one of parser.scoring.Road values
|
||
unknown_tokens: tuple[str, ...] = ()
|
||
missing_critical: tuple[str, ...] = ()
|
||
|
||
def __post_init__(self) -> None:
|
||
if not (0 <= self.confidence <= 100):
|
||
raise ValidationError(
|
||
f"ParseReport.confidence out of range: {self.confidence}"
|
||
)
|
||
|
||
|
||
@dataclass
|
||
class ParsedRelease:
|
||
"""Structured representation of a parsed release name.
|
||
|
||
``title_sanitized`` carries the filesystem-safe form of ``title`` (computed
|
||
by the parser at construction time using the injected knowledge base).
|
||
Builder methods rely on it being already-sanitized — see module docstring.
|
||
"""
|
||
|
||
raw: str # original release name (untouched)
|
||
clean: str # raw minus site_tag and apostrophes — used by season_folder_name()
|
||
title: str # show/movie title (dots, no year/season/tech)
|
||
title_sanitized: str # title with filesystem-forbidden chars stripped
|
||
year: int | None # movie year or show start year (from TMDB)
|
||
season: int | None # season number (None for movies)
|
||
episode: int | None # first episode number (None if season-pack)
|
||
episode_end: int | None # last episode for multi-ep (None otherwise)
|
||
quality: str | None # 1080p, 2160p, …
|
||
source: str | None # WEBRip, BluRay, …
|
||
codec: str | None # x265, HEVC, …
|
||
group: str # release group, "UNKNOWN" if missing
|
||
tech_string: str # quality.source.codec joined with dots
|
||
media_type: MediaTypeToken = MediaTypeToken.UNKNOWN
|
||
site_tag: str | None = (
|
||
None # site watermark stripped from name, e.g. "TGx", "OxTorrent.vc"
|
||
)
|
||
parse_path: ParsePath = ParsePath.DIRECT
|
||
languages: list[str] = field(default_factory=list) # ["MULTI", "VFF"], ["FRENCH"], …
|
||
audio_codec: str | None = None # "DTS-HD.MA", "DDP", "EAC3", …
|
||
audio_channels: str | None = None # "5.1", "7.1", "2.0", …
|
||
bit_depth: str | None = None # "10bit", "8bit", …
|
||
hdr_format: str | None = None # "DV", "HDR10", "DV.HDR10", …
|
||
edition: str | None = None # "UNRATED", "EXTENDED", "DIRECTORS.CUT", …
|
||
distributor: str | None = None # "NF", "AMZN", "DSNP", … (streaming origin)
|
||
|
||
def __post_init__(self) -> None:
|
||
if not self.raw:
|
||
raise ValidationError("ParsedRelease.raw cannot be empty")
|
||
if not self.group:
|
||
raise ValidationError("ParsedRelease.group cannot be empty")
|
||
if self.year is not None and not (1888 <= self.year <= 2100):
|
||
raise ValidationError(
|
||
f"ParsedRelease.year out of range: {self.year}"
|
||
)
|
||
if self.season is not None and not (0 <= self.season <= 100):
|
||
raise ValidationError(
|
||
f"ParsedRelease.season out of range: {self.season}"
|
||
)
|
||
if self.episode is not None and not (0 <= self.episode <= 9999):
|
||
raise ValidationError(
|
||
f"ParsedRelease.episode out of range: {self.episode}"
|
||
)
|
||
if self.episode_end is not None:
|
||
if not (0 <= self.episode_end <= 9999):
|
||
raise ValidationError(
|
||
f"ParsedRelease.episode_end out of range: {self.episode_end}"
|
||
)
|
||
if self.episode is not None and self.episode_end < self.episode:
|
||
raise ValidationError(
|
||
f"ParsedRelease.episode_end ({self.episode_end}) < "
|
||
f"episode ({self.episode})"
|
||
)
|
||
if not isinstance(self.media_type, MediaTypeToken):
|
||
raise ValidationError(
|
||
f"ParsedRelease.media_type must be a MediaTypeToken, "
|
||
f"got {type(self.media_type).__name__}: {self.media_type!r}"
|
||
)
|
||
if not isinstance(self.parse_path, ParsePath):
|
||
raise ValidationError(
|
||
f"ParsedRelease.parse_path must be a ParsePath, "
|
||
f"got {type(self.parse_path).__name__}: {self.parse_path!r}"
|
||
)
|
||
|
||
@property
|
||
def is_season_pack(self) -> bool:
|
||
return self.season is not None and self.episode is None
|
||
|
||
def show_folder_name(self, tmdb_title_safe: str, tmdb_year: int) -> str:
|
||
"""
|
||
Build the series root folder name.
|
||
|
||
Format: {Title}.{Year}.{Tech}-{Group}
|
||
Example: Oz.1997.1080p.WEBRip.x265-KONTRAST
|
||
|
||
``tmdb_title_safe`` must already be filesystem-safe (the caller is
|
||
expected to have run it through ``kb.sanitize_for_fs``).
|
||
"""
|
||
title_part = tmdb_title_safe.replace(" ", ".")
|
||
tech = self.tech_string or "Unknown"
|
||
return f"{title_part}.{tmdb_year}.{tech}-{self.group}"
|
||
|
||
def season_folder_name(self) -> str:
|
||
"""
|
||
Build the season subfolder name = normalized release name (no episode).
|
||
|
||
Example: Oz.S03.1080p.WEBRip.x265-KONTRAST
|
||
For a single-episode release we still strip the episode token so the
|
||
folder can hold the whole season.
|
||
"""
|
||
return _strip_episode_from_normalized(self.clean)
|
||
|
||
def episode_filename(self, tmdb_episode_title_safe: str | None, ext: str) -> str:
|
||
"""
|
||
Build the episode filename.
|
||
|
||
Format: {Title}.{SxxExx}.{EpisodeTitle}.{Tech}-{Group}.{ext}
|
||
Example: Oz.S01E01.The.Routine.1080p.WEBRip.x265-KONTRAST.mkv
|
||
|
||
``tmdb_episode_title_safe`` must already be filesystem-safe; pass
|
||
``None`` to omit the episode title segment.
|
||
"""
|
||
title_part = self.title_sanitized
|
||
s = f"S{self.season:02d}" if self.season is not None else ""
|
||
e = f"E{self.episode:02d}" if self.episode is not None else ""
|
||
se = s + e
|
||
|
||
ep_title = ""
|
||
if tmdb_episode_title_safe:
|
||
ep_title = "." + tmdb_episode_title_safe.replace(" ", ".")
|
||
|
||
tech = self.tech_string or "Unknown"
|
||
ext_clean = ext.lstrip(".")
|
||
return f"{title_part}.{se}{ep_title}.{tech}-{self.group}.{ext_clean}"
|
||
|
||
def movie_folder_name(self, tmdb_title_safe: str, tmdb_year: int) -> str:
|
||
"""
|
||
Build the movie folder name.
|
||
|
||
Format: {Title}.{Year}.{Tech}-{Group}
|
||
Example: Inception.2010.1080p.BluRay.x265-GROUP
|
||
"""
|
||
return self.show_folder_name(tmdb_title_safe, tmdb_year)
|
||
|
||
def movie_filename(
|
||
self, tmdb_title_safe: str, tmdb_year: int, ext: str
|
||
) -> str:
|
||
"""
|
||
Build the movie filename (same as folder name + extension).
|
||
|
||
Example: Inception.2010.1080p.BluRay.x265-GROUP.mkv
|
||
"""
|
||
ext_clean = ext.lstrip(".")
|
||
return f"{self.movie_folder_name(tmdb_title_safe, tmdb_year)}.{ext_clean}"
|