276 lines
8.2 KiB
Python
276 lines
8.2 KiB
Python
"""Shared value objects used across multiple domains."""
|
|
|
|
from __future__ import annotations
|
|
|
|
import re
|
|
from dataclasses import dataclass
|
|
from pathlib import Path
|
|
|
|
from .exceptions import ValidationError
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
class ImdbId:
|
|
"""
|
|
Value object representing an IMDb ID.
|
|
|
|
IMDb IDs follow the format: tt followed by 7-8 digits (e.g., tt1375666)
|
|
"""
|
|
|
|
value: str
|
|
|
|
def __post_init__(self):
|
|
"""Validate IMDb ID format."""
|
|
if not self.value:
|
|
raise ValidationError("IMDb ID cannot be empty")
|
|
|
|
if not isinstance(self.value, str):
|
|
raise ValidationError(f"IMDb ID must be a string, got {type(self.value)}")
|
|
|
|
# IMDb ID format: tt + 7-8 digits
|
|
pattern = r"^tt\d{7,8}$"
|
|
if not re.match(pattern, self.value):
|
|
raise ValidationError(
|
|
f"Invalid IMDb ID format: {self.value}. "
|
|
"Expected format: tt followed by 7-8 digits (e.g., tt1375666)"
|
|
)
|
|
|
|
def __str__(self) -> str:
|
|
return self.value
|
|
|
|
def __repr__(self) -> str:
|
|
return f"ImdbId('{self.value}')"
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
class TmdbId:
|
|
"""
|
|
Value object representing a TMDB ID.
|
|
|
|
TMDB ids are positive integers. The same id is used across the TMDB API
|
|
for a given work (movie or TV show); the type qualifier (``movie`` /
|
|
``tv``) lives at the call site, not in the VO.
|
|
|
|
Stored as ``int`` (not zero-padded string) — TMDB exposes ids as
|
|
integers in their API responses.
|
|
"""
|
|
|
|
value: int
|
|
|
|
def __post_init__(self) -> None:
|
|
# bool is a subclass of int in Python — reject explicitly so that
|
|
# ``TmdbId(True)`` does not silently become ``TmdbId(1)``.
|
|
if isinstance(self.value, bool) or not isinstance(self.value, int):
|
|
raise ValidationError(
|
|
f"TMDB ID must be an integer, got {type(self.value)}"
|
|
)
|
|
|
|
if self.value <= 0:
|
|
raise ValidationError(f"TMDB ID must be positive, got {self.value}")
|
|
|
|
def __str__(self) -> str:
|
|
return str(self.value)
|
|
|
|
def __repr__(self) -> str:
|
|
return f"TmdbId({self.value})"
|
|
|
|
def __int__(self) -> int:
|
|
return self.value
|
|
|
|
# GOOD
|
|
@dataclass(frozen=True)
|
|
class FilePath:
|
|
"""
|
|
Value object representing a file path.
|
|
|
|
Accepts either ``str`` or :class:`pathlib.Path` at construction;
|
|
the value is normalized to ``Path`` in ``__post_init__``.
|
|
"""
|
|
|
|
value: Path
|
|
|
|
def __post_init__(self) -> None:
|
|
if isinstance(self.value, Path):
|
|
return
|
|
if isinstance(self.value, str):
|
|
object.__setattr__(self, "value", Path(self.value))
|
|
return
|
|
raise ValidationError(f"Path must be str or Path, got {type(self.value)}")
|
|
|
|
def __str__(self) -> str:
|
|
return str(self.value)
|
|
|
|
def __repr__(self) -> str:
|
|
return f"FilePath('{self.value}')"
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
class FileSize:
|
|
"""
|
|
Value object representing a file size in bytes.
|
|
|
|
Provides human-readable formatting.
|
|
"""
|
|
|
|
bytes: int
|
|
|
|
def __post_init__(self):
|
|
"""Validate file size."""
|
|
if not isinstance(self.bytes, int):
|
|
raise ValidationError(
|
|
f"File size must be an integer, got {type(self.bytes)}"
|
|
)
|
|
|
|
if self.bytes < 0:
|
|
raise ValidationError(f"File size cannot be negative: {self.bytes}")
|
|
|
|
def to_human_readable(self) -> str:
|
|
"""
|
|
Convert bytes to human-readable format.
|
|
|
|
Returns:
|
|
String like "1.5 GB", "500 MB", etc.
|
|
"""
|
|
units = ["B", "KB", "MB", "GB", "TB"]
|
|
size = float(self.bytes)
|
|
unit_index = 0
|
|
|
|
while size >= 1024 and unit_index < len(units) - 1:
|
|
size /= 1024
|
|
unit_index += 1
|
|
|
|
if unit_index == 0:
|
|
return f"{int(size)} {units[unit_index]}"
|
|
else:
|
|
return f"{size:.2f} {units[unit_index]}"
|
|
|
|
def __str__(self) -> str:
|
|
return self.to_human_readable()
|
|
|
|
def __repr__(self) -> str:
|
|
return f"FileSize({self.bytes})"
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
class Language:
|
|
"""
|
|
Canonical language value object.
|
|
|
|
The primary identifier is the ISO 639-2/B code (3 letters, bibliographic form,
|
|
e.g. "fre", "eng", "ger"). This is what ffprobe emits and the project-wide
|
|
canonical form. All other representations (ISO 639-1 code, ISO 639-2/T
|
|
variant, english/native names, common spellings) live in ``aliases`` and are
|
|
used by ``matches()`` for case-insensitive lookup.
|
|
|
|
Equality and hashing are based solely on ``iso`` so two Language objects with
|
|
the same canonical code are interchangeable regardless of aliases.
|
|
"""
|
|
|
|
iso: str
|
|
english_name: str
|
|
native_name: str
|
|
aliases: tuple[str, ...] = ()
|
|
|
|
def __post_init__(self):
|
|
if not isinstance(self.iso, str) or not self.iso:
|
|
raise ValidationError(
|
|
f"Language.iso must be a non-empty string, got {self.iso!r}"
|
|
)
|
|
if len(self.iso) != 3:
|
|
raise ValidationError(
|
|
f"Language.iso must be a 3-letter ISO 639-2/B code, got {self.iso!r}"
|
|
)
|
|
if self.iso != self.iso.lower():
|
|
raise ValidationError(
|
|
f"Language.iso must be lowercase, got {self.iso!r} — "
|
|
f"use Language.from_raw() to construct from arbitrary input"
|
|
)
|
|
for alias in self.aliases:
|
|
if not isinstance(alias, str) or alias != alias.lower().strip() or not alias:
|
|
raise ValidationError(
|
|
f"Language.aliases must be lowercase non-empty strings, "
|
|
f"got {alias!r} — use Language.from_raw() to normalize"
|
|
)
|
|
|
|
@classmethod
|
|
def from_raw(
|
|
cls,
|
|
iso: str,
|
|
english_name: str,
|
|
native_name: str,
|
|
aliases: tuple[str, ...] | list[str] = (),
|
|
) -> Language:
|
|
"""
|
|
Construct a Language from arbitrary (possibly un-normalized) input.
|
|
|
|
Use this factory when loading from external sources (YAML, user input,
|
|
third-party APIs) — it lowercases the iso code and normalizes/dedups
|
|
the alias tuple. The direct constructor is strict and rejects
|
|
un-normalized input.
|
|
"""
|
|
seen: set[str] = set()
|
|
normalized: list[str] = []
|
|
for alias in aliases:
|
|
if not isinstance(alias, str):
|
|
continue
|
|
a = alias.lower().strip()
|
|
if a and a not in seen:
|
|
seen.add(a)
|
|
normalized.append(a)
|
|
return cls(
|
|
iso=iso.lower(),
|
|
english_name=english_name,
|
|
native_name=native_name,
|
|
aliases=tuple(normalized),
|
|
)
|
|
|
|
def matches(self, raw: str) -> bool:
|
|
"""
|
|
True if ``raw`` is any known representation of this language.
|
|
|
|
Comparison is case-insensitive and whitespace-trimmed. The match space is
|
|
the union of the canonical ``iso`` code, the english/native names, and
|
|
every alias.
|
|
"""
|
|
if not isinstance(raw, str):
|
|
return False
|
|
needle = raw.lower().strip()
|
|
if not needle:
|
|
return False
|
|
if needle == self.iso:
|
|
return True
|
|
if needle == self.english_name.lower():
|
|
return True
|
|
if needle == self.native_name.lower():
|
|
return True
|
|
return needle in self.aliases
|
|
|
|
def __eq__(self, other: object) -> bool:
|
|
if not isinstance(other, Language):
|
|
return NotImplemented
|
|
return self.iso == other.iso
|
|
|
|
def __hash__(self) -> int:
|
|
return hash(self.iso)
|
|
|
|
def __str__(self) -> str:
|
|
return self.iso
|
|
|
|
def __repr__(self) -> str:
|
|
return f"Language({self.iso!r}, {self.english_name!r})"
|
|
|
|
|
|
# Characters allowed in dot-separated folder/filename forms:
|
|
# alphanumerics, underscores, spaces (about to be replaced with dots),
|
|
# literal dots, and hyphens. Everything else is stripped.
|
|
_FS_SAFE_CHARS = re.compile(r"[^\w\s\.\-]")
|
|
|
|
# USELESS - TO REMOVE
|
|
def to_dot_folder_name(title: str) -> str:
|
|
"""Sanitize ``title`` for filesystem use and convert spaces to dots.
|
|
|
|
Produces e.g. ``Breaking.Bad`` from ``"Breaking Bad"`` or
|
|
``Spider.Man`` from ``"Spider-Man: No Way Home"``.
|
|
"""
|
|
return _FS_SAFE_CHARS.sub("", title).replace(" ", ".")
|