alfred/alfred/domain/shared_TO_CHECK/value_objects.py

"""Shared value objects used across multiple domains."""

from __future__ import annotations

import re
from dataclasses import dataclass
from pathlib import Path

from .exceptions import ValidationError


@dataclass(frozen=True)
class ImdbId:
    """
    Value object representing an IMDb ID.

    IMDb IDs follow the format: tt followed by 7-8 digits (e.g., tt1375666)
    """

    value: str

    def __post_init__(self):
        """Validate IMDb ID format."""
        if not self.value:
            raise ValidationError("IMDb ID cannot be empty")

        if not isinstance(self.value, str):
            raise ValidationError(f"IMDb ID must be a string, got {type(self.value)}")

        # IMDb ID format: tt + 7-8 digits
        pattern = r"^tt\d{7,8}$"
        if not re.match(pattern, self.value):
            raise ValidationError(
                f"Invalid IMDb ID format: {self.value}. "
                "Expected format: tt followed by 7-8 digits (e.g., tt1375666)"
            )

    def __str__(self) -> str:
        return self.value

    def __repr__(self) -> str:
        return f"ImdbId('{self.value}')"


@dataclass(frozen=True)
class TmdbId:
    """
    Value object representing a TMDB ID.

    TMDB ids are positive integers. The same id is used across the TMDB API
    for a given work (movie or TV show); the type qualifier (``movie`` /
    ``tv``) lives at the call site, not in the VO.

    Stored as ``int`` (not zero-padded string) — TMDB exposes ids as
    integers in their API responses.
    """

    value: int

    def __post_init__(self) -> None:
        # bool is a subclass of int in Python — reject explicitly so that
        # ``TmdbId(True)`` does not silently become ``TmdbId(1)``.
        if isinstance(self.value, bool) or not isinstance(self.value, int):
            raise ValidationError(
                f"TMDB ID must be an integer, got {type(self.value)}"
            )

        if self.value <= 0:
            raise ValidationError(f"TMDB ID must be positive, got {self.value}")

    def __str__(self) -> str:
        return str(self.value)

    def __repr__(self) -> str:
        return f"TmdbId({self.value})"

    def __int__(self) -> int:
        return self.value

# GOOD
@dataclass(frozen=True)
class FilePath:
    """
    Value object representing a file path.

    Accepts either ``str`` or :class:`pathlib.Path` at construction;
    the value is normalized to ``Path`` in ``__post_init__``.
    """

    value: Path

    def __post_init__(self) -> None:
        if isinstance(self.value, Path):
            return
        if isinstance(self.value, str):
            object.__setattr__(self, "value", Path(self.value))
            return
        raise ValidationError(f"Path must be str or Path, got {type(self.value)}")

    def __str__(self) -> str:
        return str(self.value)

    def __repr__(self) -> str:
        return f"FilePath('{self.value}')"


@dataclass(frozen=True)
class FileSize:
    """
    Value object representing a file size in bytes.

    Provides human-readable formatting.
    """

    bytes: int

    def __post_init__(self):
        """Validate file size."""
        if not isinstance(self.bytes, int):
            raise ValidationError(
                f"File size must be an integer, got {type(self.bytes)}"
            )

        if self.bytes < 0:
            raise ValidationError(f"File size cannot be negative: {self.bytes}")

    def to_human_readable(self) -> str:
        """
        Convert bytes to human-readable format.

        Returns:
            String like "1.5 GB", "500 MB", etc.
        """
        units = ["B", "KB", "MB", "GB", "TB"]
        size = float(self.bytes)
        unit_index = 0

        while size >= 1024 and unit_index < len(units) - 1:
            size /= 1024
            unit_index += 1

        if unit_index == 0:
            return f"{int(size)} {units[unit_index]}"
        else:
            return f"{size:.2f} {units[unit_index]}"

    def __str__(self) -> str:
        return self.to_human_readable()

    def __repr__(self) -> str:
        return f"FileSize({self.bytes})"


@dataclass(frozen=True)
class Language:
    """
    Canonical language value object.

    The primary identifier is the ISO 639-2/B code (3 letters, bibliographic form,
    e.g. "fre", "eng", "ger"). This is what ffprobe emits and the project-wide
    canonical form. All other representations (ISO 639-1 code, ISO 639-2/T
    variant, english/native names, common spellings) live in ``aliases`` and are
    used by ``matches()`` for case-insensitive lookup.

    Equality and hashing are based solely on ``iso`` so two Language objects with
    the same canonical code are interchangeable regardless of aliases.
    """

    iso: str
    english_name: str
    native_name: str
    aliases: tuple[str, ...] = ()

    def __post_init__(self):
        if not isinstance(self.iso, str) or not self.iso:
            raise ValidationError(
                f"Language.iso must be a non-empty string, got {self.iso!r}"
            )
        if len(self.iso) != 3:
            raise ValidationError(
                f"Language.iso must be a 3-letter ISO 639-2/B code, got {self.iso!r}"
            )
        if self.iso != self.iso.lower():
            raise ValidationError(
                f"Language.iso must be lowercase, got {self.iso!r} — "
                f"use Language.from_raw() to construct from arbitrary input"
            )
        for alias in self.aliases:
            if not isinstance(alias, str) or alias != alias.lower().strip() or not alias:
                raise ValidationError(
                    f"Language.aliases must be lowercase non-empty strings, "
                    f"got {alias!r} — use Language.from_raw() to normalize"
                )

    @classmethod
    def from_raw(
        cls,
        iso: str,
        english_name: str,
        native_name: str,
        aliases: tuple[str, ...] | list[str] = (),
    ) -> Language:
        """
        Construct a Language from arbitrary (possibly un-normalized) input.

        Use this factory when loading from external sources (YAML, user input,
        third-party APIs) — it lowercases the iso code and normalizes/dedups
        the alias tuple. The direct constructor is strict and rejects
        un-normalized input.
        """
        seen: set[str] = set()
        normalized: list[str] = []
        for alias in aliases:
            if not isinstance(alias, str):
                continue
            a = alias.lower().strip()
            if a and a not in seen:
                seen.add(a)
                normalized.append(a)
        return cls(
            iso=iso.lower(),
            english_name=english_name,
            native_name=native_name,
            aliases=tuple(normalized),
        )

    def matches(self, raw: str) -> bool:
        """
        True if ``raw`` is any known representation of this language.

        Comparison is case-insensitive and whitespace-trimmed. The match space is
        the union of the canonical ``iso`` code, the english/native names, and
        every alias.
        """
        if not isinstance(raw, str):
            return False
        needle = raw.lower().strip()
        if not needle:
            return False
        if needle == self.iso:
            return True
        if needle == self.english_name.lower():
            return True
        if needle == self.native_name.lower():
            return True
        return needle in self.aliases

    def __eq__(self, other: object) -> bool:
        if not isinstance(other, Language):
            return NotImplemented
        return self.iso == other.iso

    def __hash__(self) -> int:
        return hash(self.iso)

    def __str__(self) -> str:
        return self.iso

    def __repr__(self) -> str:
        return f"Language({self.iso!r}, {self.english_name!r})"


# Characters allowed in dot-separated folder/filename forms:
# alphanumerics, underscores, spaces (about to be replaced with dots),
# literal dots, and hyphens. Everything else is stripped.
_FS_SAFE_CHARS = re.compile(r"[^\w\s\.\-]")

# USELESS - TO REMOVE
def to_dot_folder_name(title: str) -> str:
    """Sanitize ``title`` for filesystem use and convert spaces to dots.

    Produces e.g. ``Breaking.Bad`` from ``"Breaking Bad"`` or
    ``Spider.Man`` from ``"Spider-Man: No Way Home"``.
    """
    return _FS_SAFE_CHARS.sub("", title).replace(" ", ".")