alfred/alfred/domain/release/value_objects.py

"""Release domain — value objects.

This module is **pure**: no I/O, no YAML loading, no knowledge-base
imports. All knowledge that the parser consumes is injected at runtime
via the ``ReleaseKnowledge`` port (see ``ports/knowledge.py``).

``ParsedRelease`` follows Option B of the snapshot-VO design: filesystem
sanitization is performed once at parse time and stored in
``title_sanitized``. The builder methods (``show_folder_name``,
``episode_filename``, etc.) are therefore pure string-formatting and do
**not** need access to any knowledge base — but they require the caller
to pass already-sanitized TMDB strings. The use case is responsible for
calling ``kb.sanitize_for_fs(tmdb_title)`` before invoking the builders.
"""

from __future__ import annotations

from dataclasses import dataclass, field
from enum import Enum

from ..shared.exceptions import ValidationError


class MediaTypeToken(str, Enum):
    """
    Canonical values for ``ParsedRelease.media_type``.

    Inherits from ``str`` so existing string-based comparisons (``== "movie"``,
    JSON serialization, TMDB DTO interop) keep working unchanged. The enum
    serves both as documentation and as the set of valid values for
    ``__post_init__`` validation.
    """

    MOVIE = "movie"
    TV_SHOW = "tv_show"
    TV_COMPLETE = "tv_complete"
    DOCUMENTARY = "documentary"
    CONCERT = "concert"
    OTHER = "other"
    UNKNOWN = "unknown"


class ParsePath(str, Enum):
    """How a ``ParsedRelease`` was produced. ``str``-backed for the same
    reasons as :class:`MediaTypeToken`."""

    DIRECT = "direct"
    SANITIZED = "sanitized"
    AI = "ai"


def _strip_episode_from_normalized(normalized: str) -> str:
    """
    Remove all episode parts (Exx) from a normalized release name, keeping Sxx.

    Oz.S03E01.1080p...             → Oz.S03.1080p...
    Archer.S14E09E10E11.1080p...   → Archer.S14.1080p...
    """
    tokens = normalized.split(".")
    result = []
    for tok in tokens:
        upper = tok.upper()
        # Token is SxxExx... — keep only the Sxx part
        if len(upper) >= 3 and upper[0] == "S" and upper[1:3].isdigit():
            result.append(tok[:3])  # "S" + two digits
        else:
            result.append(tok)
    return ".".join(result)


@dataclass(frozen=True)
class ParseReport:
    """Diagnostic report attached to a :class:`ParsedRelease`.

    ``parse_release`` returns ``(ParsedRelease, ParseReport)``. The
    report describes *how confident* the parser is in the result and
    *which road* produced it. It is intentionally separate from
    ``ParsedRelease`` so the structural VO stays free of meta-concerns
    about its own quality.

    Fields:

    - ``confidence``: integer 0–100 (see :func:`parser.scoring.compute_score`).
    - ``road``: ``"easy"`` / ``"shitty"`` / ``"path_of_pain"`` — distinct
      from ``ParsedRelease.parse_path`` (which describes the
      tokenization route, not the confidence tier).
    - ``unknown_tokens``: tokens that finished annotation with role
      UNKNOWN, in order of appearance.
    - ``missing_critical``: names of critical structural fields the
      parser couldn't fill (subset of ``{"title", "media_type", "year"}``).
    """

    confidence: int
    road: str  # one of parser.scoring.Road values
    unknown_tokens: tuple[str, ...] = ()
    missing_critical: tuple[str, ...] = ()

    def __post_init__(self) -> None:
        if not (0 <= self.confidence <= 100):
            raise ValidationError(
                f"ParseReport.confidence out of range: {self.confidence}"
            )


@dataclass
class ParsedRelease:
    """Structured representation of a parsed release name.

    ``title_sanitized`` carries the filesystem-safe form of ``title`` (computed
    by the parser at construction time using the injected knowledge base).
    Builder methods rely on it being already-sanitized — see module docstring.
    """

    raw: str  # original release name (untouched)
    clean: str  # raw minus site_tag and apostrophes — used by season_folder_name()
    title: str  # show/movie title (dots, no year/season/tech)
    title_sanitized: str  # title with filesystem-forbidden chars stripped
    year: int | None  # movie year or show start year (from TMDB)
    season: int | None  # season number (None for movies)
    episode: int | None  # first episode number (None if season-pack)
    episode_end: int | None  # last episode for multi-ep (None otherwise)
    quality: str | None  # 1080p, 2160p, …
    source: str | None  # WEBRip, BluRay, …
    codec: str | None  # x265, HEVC, …
    group: str  # release group, "UNKNOWN" if missing
    tech_string: str  # quality.source.codec joined with dots
    media_type: MediaTypeToken = MediaTypeToken.UNKNOWN
    site_tag: str | None = (
        None  # site watermark stripped from name, e.g. "TGx", "OxTorrent.vc"
    )
    parse_path: ParsePath = ParsePath.DIRECT
    languages: list[str] = field(default_factory=list)  # ["MULTI", "VFF"], ["FRENCH"], …
    audio_codec: str | None = None  # "DTS-HD.MA", "DDP", "EAC3", …
    audio_channels: str | None = None  # "5.1", "7.1", "2.0", …
    bit_depth: str | None = None  # "10bit", "8bit", …
    hdr_format: str | None = None  # "DV", "HDR10", "DV.HDR10", …
    edition: str | None = None  # "UNRATED", "EXTENDED", "DIRECTORS.CUT", …
    distributor: str | None = None  # "NF", "AMZN", "DSNP", … (streaming origin)

    def __post_init__(self) -> None:
        if not self.raw:
            raise ValidationError("ParsedRelease.raw cannot be empty")
        if not self.group:
            raise ValidationError("ParsedRelease.group cannot be empty")
        if self.year is not None and not (1888 <= self.year <= 2100):
            raise ValidationError(
                f"ParsedRelease.year out of range: {self.year}"
            )
        if self.season is not None and not (0 <= self.season <= 100):
            raise ValidationError(
                f"ParsedRelease.season out of range: {self.season}"
            )
        if self.episode is not None and not (0 <= self.episode <= 9999):
            raise ValidationError(
                f"ParsedRelease.episode out of range: {self.episode}"
            )
        if self.episode_end is not None:
            if not (0 <= self.episode_end <= 9999):
                raise ValidationError(
                    f"ParsedRelease.episode_end out of range: {self.episode_end}"
                )
            if self.episode is not None and self.episode_end < self.episode:
                raise ValidationError(
                    f"ParsedRelease.episode_end ({self.episode_end}) < "
                    f"episode ({self.episode})"
                )
        if not isinstance(self.media_type, MediaTypeToken):
            raise ValidationError(
                f"ParsedRelease.media_type must be a MediaTypeToken, "
                f"got {type(self.media_type).__name__}: {self.media_type!r}"
            )
        if not isinstance(self.parse_path, ParsePath):
            raise ValidationError(
                f"ParsedRelease.parse_path must be a ParsePath, "
                f"got {type(self.parse_path).__name__}: {self.parse_path!r}"
            )

    @property
    def is_season_pack(self) -> bool:
        return self.season is not None and self.episode is None

    def show_folder_name(self, tmdb_title_safe: str, tmdb_year: int) -> str:
        """
        Build the series root folder name.

        Format: {Title}.{Year}.{Tech}-{Group}
        Example: Oz.1997.1080p.WEBRip.x265-KONTRAST

        ``tmdb_title_safe`` must already be filesystem-safe (the caller is
        expected to have run it through ``kb.sanitize_for_fs``).
        """
        title_part = tmdb_title_safe.replace(" ", ".")
        tech = self.tech_string or "Unknown"
        return f"{title_part}.{tmdb_year}.{tech}-{self.group}"

    def season_folder_name(self) -> str:
        """
        Build the season subfolder name = normalized release name (no episode).

        Example: Oz.S03.1080p.WEBRip.x265-KONTRAST
        For a single-episode release we still strip the episode token so the
        folder can hold the whole season.
        """
        return _strip_episode_from_normalized(self.clean)

    def episode_filename(self, tmdb_episode_title_safe: str | None, ext: str) -> str:
        """
        Build the episode filename.

        Format: {Title}.{SxxExx}.{EpisodeTitle}.{Tech}-{Group}.{ext}
        Example: Oz.S01E01.The.Routine.1080p.WEBRip.x265-KONTRAST.mkv

        ``tmdb_episode_title_safe`` must already be filesystem-safe; pass
        ``None`` to omit the episode title segment.
        """
        title_part = self.title_sanitized
        s = f"S{self.season:02d}" if self.season is not None else ""
        e = f"E{self.episode:02d}" if self.episode is not None else ""
        se = s + e

        ep_title = ""
        if tmdb_episode_title_safe:
            ep_title = "." + tmdb_episode_title_safe.replace(" ", ".")

        tech = self.tech_string or "Unknown"
        ext_clean = ext.lstrip(".")
        return f"{title_part}.{se}{ep_title}.{tech}-{self.group}.{ext_clean}"

    def movie_folder_name(self, tmdb_title_safe: str, tmdb_year: int) -> str:
        """
        Build the movie folder name.

        Format: {Title}.{Year}.{Tech}-{Group}
        Example: Inception.2010.1080p.BluRay.x265-GROUP
        """
        return self.show_folder_name(tmdb_title_safe, tmdb_year)

    def movie_filename(
        self, tmdb_title_safe: str, tmdb_year: int, ext: str
    ) -> str:
        """
        Build the movie filename (same as folder name + extension).

        Example: Inception.2010.1080p.BluRay.x265-GROUP.mkv
        """
        ext_clean = ext.lstrip(".")
        return f"{self.movie_folder_name(tmdb_title_safe, tmdb_year)}.{ext_clean}"