alfred/alfred/domain/shared/value_objects.py

"""Shared value objects used across multiple domains."""

import re
from dataclasses import dataclass
from pathlib import Path

from .exceptions import ValidationError


@dataclass(frozen=True)
class ImdbId:
    """
    Value object representing an IMDb ID.

    IMDb IDs follow the format: tt followed by 7-8 digits (e.g., tt1375666)
    """

    value: str

    def __post_init__(self):
        """Validate IMDb ID format."""
        if not self.value:
            raise ValidationError("IMDb ID cannot be empty")

        if not isinstance(self.value, str):
            raise ValidationError(f"IMDb ID must be a string, got {type(self.value)}")

        # IMDb ID format: tt + 7-8 digits
        pattern = r"^tt\d{7,8}$"
        if not re.match(pattern, self.value):
            raise ValidationError(
                f"Invalid IMDb ID format: {self.value}. "
                "Expected format: tt followed by 7-8 digits (e.g., tt1375666)"
            )

    def __str__(self) -> str:
        return self.value

    def __repr__(self) -> str:
        return f"ImdbId('{self.value}')"


@dataclass(frozen=True)
class FilePath:
    """
    Value object representing a file path with validation.

    Ensures the path is valid and optionally checks existence.
    """

    value: Path

    def __init__(self, path: str | Path):
        """
        Initialize FilePath.

        Args:
            path: String or Path object representing the file path
        """
        if isinstance(path, str):
            path_obj = Path(path)
        elif isinstance(path, Path):
            path_obj = path
        else:
            raise ValidationError(f"Path must be str or Path, got {type(path)}")

        # Use object.__setattr__ because dataclass is frozen
        object.__setattr__(self, "value", path_obj)

    def __str__(self) -> str:
        return str(self.value)

    def __repr__(self) -> str:
        return f"FilePath('{self.value}')"


@dataclass(frozen=True)
class FileSize:
    """
    Value object representing a file size in bytes.

    Provides human-readable formatting.
    """

    bytes: int

    def __post_init__(self):
        """Validate file size."""
        if not isinstance(self.bytes, int):
            raise ValidationError(
                f"File size must be an integer, got {type(self.bytes)}"
            )

        if self.bytes < 0:
            raise ValidationError(f"File size cannot be negative: {self.bytes}")

    def to_human_readable(self) -> str:
        """
        Convert bytes to human-readable format.

        Returns:
            String like "1.5 GB", "500 MB", etc.
        """
        units = ["B", "KB", "MB", "GB", "TB"]
        size = float(self.bytes)
        unit_index = 0

        while size >= 1024 and unit_index < len(units) - 1:
            size /= 1024
            unit_index += 1

        if unit_index == 0:
            return f"{int(size)} {units[unit_index]}"
        else:
            return f"{size:.2f} {units[unit_index]}"

    def __str__(self) -> str:
        return self.to_human_readable()

    def __repr__(self) -> str:
        return f"FileSize({self.bytes})"


@dataclass(frozen=True)
class Language:
    """
    Canonical language value object.

    The primary identifier is the ISO 639-2/B code (3 letters, bibliographic form,
    e.g. "fre", "eng", "ger"). This is what ffprobe emits and the project-wide
    canonical form. All other representations (ISO 639-1 code, ISO 639-2/T
    variant, english/native names, common spellings) live in ``aliases`` and are
    used by ``matches()`` for case-insensitive lookup.

    Equality and hashing are based solely on ``iso`` so two Language objects with
    the same canonical code are interchangeable regardless of aliases.
    """

    iso: str
    english_name: str
    native_name: str
    aliases: tuple[str, ...] = ()

    def __post_init__(self):
        if not isinstance(self.iso, str) or not self.iso:
            raise ValidationError(
                f"Language.iso must be a non-empty string, got {self.iso!r}"
            )
        if len(self.iso) != 3:
            raise ValidationError(
                f"Language.iso must be a 3-letter ISO 639-2/B code, got {self.iso!r}"
            )
        # Normalize iso to lowercase
        object.__setattr__(self, "iso", self.iso.lower())
        # Normalize aliases to a tuple of lowercase strings (dedup, preserve order)
        seen: set[str] = set()
        normalized: list[str] = []
        for alias in self.aliases:
            if not isinstance(alias, str):
                continue
            a = alias.lower().strip()
            if a and a not in seen:
                seen.add(a)
                normalized.append(a)
        object.__setattr__(self, "aliases", tuple(normalized))

    def matches(self, raw: str) -> bool:
        """
        True if ``raw`` is any known representation of this language.

        Comparison is case-insensitive and whitespace-trimmed. The match space is
        the union of the canonical ``iso`` code, the english/native names, and
        every alias.
        """
        if not isinstance(raw, str):
            return False
        needle = raw.lower().strip()
        if not needle:
            return False
        if needle == self.iso:
            return True
        if needle == self.english_name.lower():
            return True
        if needle == self.native_name.lower():
            return True
        return needle in self.aliases

    def __eq__(self, other: object) -> bool:
        if not isinstance(other, Language):
            return NotImplemented
        return self.iso == other.iso

    def __hash__(self) -> int:
        return hash(self.iso)

    def __str__(self) -> str:
        return self.iso

    def __repr__(self) -> str:
        return f"Language({self.iso!r}, {self.english_name!r})"


# Characters allowed in dot-separated folder/filename forms:
# alphanumerics, underscores, spaces (about to be replaced with dots),
# literal dots, and hyphens. Everything else is stripped.
_FS_SAFE_CHARS = re.compile(r"[^\w\s\.\-]")


def to_dot_folder_name(title: str) -> str:
    """Sanitize ``title`` for filesystem use and convert spaces to dots.

    Produces e.g. ``Breaking.Bad`` from ``"Breaking Bad"`` or
    ``Spider.Man`` from ``"Spider-Man: No Way Home"``.
    """
    return _FS_SAFE_CHARS.sub("", title).replace(" ", ".")