Files
alfred/alfred/domain/shared_TO_CHECK/value_objects.py
T
2026-05-26 21:45:11 +02:00

276 lines
8.2 KiB
Python

"""Shared value objects used across multiple domains."""
from __future__ import annotations
import re
from dataclasses import dataclass
from pathlib import Path
from .exceptions import ValidationError
@dataclass(frozen=True)
class ImdbId:
"""
Value object representing an IMDb ID.
IMDb IDs follow the format: tt followed by 7-8 digits (e.g., tt1375666)
"""
value: str
def __post_init__(self):
"""Validate IMDb ID format."""
if not self.value:
raise ValidationError("IMDb ID cannot be empty")
if not isinstance(self.value, str):
raise ValidationError(f"IMDb ID must be a string, got {type(self.value)}")
# IMDb ID format: tt + 7-8 digits
pattern = r"^tt\d{7,8}$"
if not re.match(pattern, self.value):
raise ValidationError(
f"Invalid IMDb ID format: {self.value}. "
"Expected format: tt followed by 7-8 digits (e.g., tt1375666)"
)
def __str__(self) -> str:
return self.value
def __repr__(self) -> str:
return f"ImdbId('{self.value}')"
@dataclass(frozen=True)
class TmdbId:
"""
Value object representing a TMDB ID.
TMDB ids are positive integers. The same id is used across the TMDB API
for a given work (movie or TV show); the type qualifier (``movie`` /
``tv``) lives at the call site, not in the VO.
Stored as ``int`` (not zero-padded string) — TMDB exposes ids as
integers in their API responses.
"""
value: int
def __post_init__(self) -> None:
# bool is a subclass of int in Python — reject explicitly so that
# ``TmdbId(True)`` does not silently become ``TmdbId(1)``.
if isinstance(self.value, bool) or not isinstance(self.value, int):
raise ValidationError(
f"TMDB ID must be an integer, got {type(self.value)}"
)
if self.value <= 0:
raise ValidationError(f"TMDB ID must be positive, got {self.value}")
def __str__(self) -> str:
return str(self.value)
def __repr__(self) -> str:
return f"TmdbId({self.value})"
def __int__(self) -> int:
return self.value
# GOOD
@dataclass(frozen=True)
class FilePath:
"""
Value object representing a file path.
Accepts either ``str`` or :class:`pathlib.Path` at construction;
the value is normalized to ``Path`` in ``__post_init__``.
"""
value: Path
def __post_init__(self) -> None:
if isinstance(self.value, Path):
return
if isinstance(self.value, str):
object.__setattr__(self, "value", Path(self.value))
return
raise ValidationError(f"Path must be str or Path, got {type(self.value)}")
def __str__(self) -> str:
return str(self.value)
def __repr__(self) -> str:
return f"FilePath('{self.value}')"
@dataclass(frozen=True)
class FileSize:
"""
Value object representing a file size in bytes.
Provides human-readable formatting.
"""
bytes: int
def __post_init__(self):
"""Validate file size."""
if not isinstance(self.bytes, int):
raise ValidationError(
f"File size must be an integer, got {type(self.bytes)}"
)
if self.bytes < 0:
raise ValidationError(f"File size cannot be negative: {self.bytes}")
def to_human_readable(self) -> str:
"""
Convert bytes to human-readable format.
Returns:
String like "1.5 GB", "500 MB", etc.
"""
units = ["B", "KB", "MB", "GB", "TB"]
size = float(self.bytes)
unit_index = 0
while size >= 1024 and unit_index < len(units) - 1:
size /= 1024
unit_index += 1
if unit_index == 0:
return f"{int(size)} {units[unit_index]}"
else:
return f"{size:.2f} {units[unit_index]}"
def __str__(self) -> str:
return self.to_human_readable()
def __repr__(self) -> str:
return f"FileSize({self.bytes})"
@dataclass(frozen=True)
class Language:
"""
Canonical language value object.
The primary identifier is the ISO 639-2/B code (3 letters, bibliographic form,
e.g. "fre", "eng", "ger"). This is what ffprobe emits and the project-wide
canonical form. All other representations (ISO 639-1 code, ISO 639-2/T
variant, english/native names, common spellings) live in ``aliases`` and are
used by ``matches()`` for case-insensitive lookup.
Equality and hashing are based solely on ``iso`` so two Language objects with
the same canonical code are interchangeable regardless of aliases.
"""
iso: str
english_name: str
native_name: str
aliases: tuple[str, ...] = ()
def __post_init__(self):
if not isinstance(self.iso, str) or not self.iso:
raise ValidationError(
f"Language.iso must be a non-empty string, got {self.iso!r}"
)
if len(self.iso) != 3:
raise ValidationError(
f"Language.iso must be a 3-letter ISO 639-2/B code, got {self.iso!r}"
)
if self.iso != self.iso.lower():
raise ValidationError(
f"Language.iso must be lowercase, got {self.iso!r}"
f"use Language.from_raw() to construct from arbitrary input"
)
for alias in self.aliases:
if not isinstance(alias, str) or alias != alias.lower().strip() or not alias:
raise ValidationError(
f"Language.aliases must be lowercase non-empty strings, "
f"got {alias!r} — use Language.from_raw() to normalize"
)
@classmethod
def from_raw(
cls,
iso: str,
english_name: str,
native_name: str,
aliases: tuple[str, ...] | list[str] = (),
) -> Language:
"""
Construct a Language from arbitrary (possibly un-normalized) input.
Use this factory when loading from external sources (YAML, user input,
third-party APIs) — it lowercases the iso code and normalizes/dedups
the alias tuple. The direct constructor is strict and rejects
un-normalized input.
"""
seen: set[str] = set()
normalized: list[str] = []
for alias in aliases:
if not isinstance(alias, str):
continue
a = alias.lower().strip()
if a and a not in seen:
seen.add(a)
normalized.append(a)
return cls(
iso=iso.lower(),
english_name=english_name,
native_name=native_name,
aliases=tuple(normalized),
)
def matches(self, raw: str) -> bool:
"""
True if ``raw`` is any known representation of this language.
Comparison is case-insensitive and whitespace-trimmed. The match space is
the union of the canonical ``iso`` code, the english/native names, and
every alias.
"""
if not isinstance(raw, str):
return False
needle = raw.lower().strip()
if not needle:
return False
if needle == self.iso:
return True
if needle == self.english_name.lower():
return True
if needle == self.native_name.lower():
return True
return needle in self.aliases
def __eq__(self, other: object) -> bool:
if not isinstance(other, Language):
return NotImplemented
return self.iso == other.iso
def __hash__(self) -> int:
return hash(self.iso)
def __str__(self) -> str:
return self.iso
def __repr__(self) -> str:
return f"Language({self.iso!r}, {self.english_name!r})"
# Characters allowed in dot-separated folder/filename forms:
# alphanumerics, underscores, spaces (about to be replaced with dots),
# literal dots, and hyphens. Everything else is stripped.
_FS_SAFE_CHARS = re.compile(r"[^\w\s\.\-]")
# USELESS - TO REMOVE
def to_dot_folder_name(title: str) -> str:
"""Sanitize ``title`` for filesystem use and convert spaces to dots.
Produces e.g. ``Breaking.Bad`` from ``"Breaking Bad"`` or
``Spider.Man`` from ``"Spider-Man: No Way Home"``.
"""
return _FS_SAFE_CHARS.sub("", title).replace(" ", ".")