diff --git a/CHANGELOG.md b/CHANGELOG.md index c756dfa..e5f1608 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,6 +17,31 @@ callers). ### Added +- **`.alfred` sidecar serializer + (`alfred/infrastructure/persistence/dot_alfred/`).** Implements step 2 + of the `specs/dot_alfred.md` plan. Pure-dict in/out + (`serialize(sidecar) -> dict`, `deserialize(data) -> ShowSidecar`) — + YAML I/O lives in the repository layer (step 3) and is kept out for + trivial testability. Ships the DTOs that mirror the YAML schema + field-for-field (`ShowSidecar`, `SeasonSidecar`, `EpisodeSidecar`, + `SubtitleEntry`). The sidecar acts as a **scan cache**: it stores + only what is genuinely costly to recompute — folder/file paths + (skipping the FS walk) and probed track metadata (skipping ffprobe). + Release identifiers (group, source, quality, codec) live in folder + and file names and are derived on demand by the parser — they are + deliberately absent from the schema and rejected on deserialize. The + serializer is **strict on schema**: unknown keys at any level raise + `SidecarSchemaError`, missing required fields raise clearly, and + `bool` cannot sneak in as a season/episode number. Optional fields + (`tmdb_id`, empty `audio`/`subtitles`/`episodes`) are omitted from + the output rather than emitted as `null` / `[]`. Tests cover + round-trip equivalence (DTO → dict → DTO and DTO → YAML text → DTO), + the Foundation S01 PACK case (real-world fixture with mixed sub + types — superset captured at season scope), and a Breaking Bad S05 + EPISODIC case. An on-disk `tmp_path` fixture recreates the Foundation + folder structure with placeholder files, ready to be reused by the + upcoming repository walk tests in step 3. + - **`TVShowBuilder` / `SeasonBuilder` — sole construction surface for the TVShow aggregate** (`alfred/domain/tv_shows/builders.py`). The aggregate is now fully frozen; building goes through a mutable scratchpad that diff --git a/alfred/infrastructure/persistence/dot_alfred/__init__.py b/alfred/infrastructure/persistence/dot_alfred/__init__.py new file mode 100644 index 0000000..54c338d --- /dev/null +++ b/alfred/infrastructure/persistence/dot_alfred/__init__.py @@ -0,0 +1,31 @@ +"""`.alfred` sidecar persistence layer. + +Implements the per-show YAML sidecar described in +``specs/dot_alfred.md``. The sidecar is a single file named ``.alfred`` +posed at the root of a show's directory, containing the full aggregate +in a factual-only schema. + +Public surface: + +* :mod:`.sidecar` — DTOs (``ShowSidecar``, ``SeasonSidecar``, + ``EpisodeSidecar``, ``SubtitleEntry``) that mirror the YAML schema. +* :mod:`.serializer` — ``serialize`` / ``deserialize`` functions + converting between DTOs and plain dicts (YAML-ready). +""" + +from .serializer import deserialize, serialize +from .sidecar import ( + EpisodeSidecar, + SeasonSidecar, + ShowSidecar, + SubtitleEntry, +) + +__all__ = [ + "deserialize", + "serialize", + "EpisodeSidecar", + "SeasonSidecar", + "ShowSidecar", + "SubtitleEntry", +] diff --git a/alfred/infrastructure/persistence/dot_alfred/serializer.py b/alfred/infrastructure/persistence/dot_alfred/serializer.py new file mode 100644 index 0000000..6f018cc --- /dev/null +++ b/alfred/infrastructure/persistence/dot_alfred/serializer.py @@ -0,0 +1,294 @@ +"""Serialize / deserialize ``.alfred`` sidecar DTOs to plain dicts. + +The functions here operate strictly on Python dicts — no YAML I/O. The +repository layer is responsible for ``yaml.safe_dump`` / ``yaml.safe_load`` +and atomic file writes. Keeping I/O out of the serializer makes it +trivially testable without touching the filesystem. + +The output dict layout matches the schema in ``specs/dot_alfred.md``: + +* Top level: ``schema_version``, ``imdb_id``, ``tmdb_id``, ``seasons``. +* Each season carries ``number``, ``path``, and either pack-mode probed + metadata (``audio`` / ``subtitles``) **or** an ``episodes`` list + (episodic mode, each episode carrying its own probed metadata). +* Subtitles are written as inline-style dicts (handled by the YAML + writer, not here) — at the DTO level they are just regular keys. + +Conventions: + +* Fields that are ``None`` or empty tuples are **omitted** from the + output dict (cleaner YAML, no ``null`` / ``[]`` noise). +* Identity fields (``imdb_id``, ``tmdb_id``) are required; empty + ``seasons`` is allowed (a show with no season is legitimate during + initial population). +* Deserialization is **strict on unknown keys** — a stray field is a + bug, not a feature; raising early prevents silent drift. +* Release identifiers (group/source/quality/codec) are intentionally + absent: they are derived from folder/file names by the parser. +""" + +from __future__ import annotations + +from typing import Any + +from ....domain.shared.value_objects import ImdbId +from ....domain.tv_shows.value_objects import EpisodeNumber, SeasonNumber +from .sidecar import ( + SCHEMA_VERSION, + EpisodeSidecar, + SeasonSidecar, + ShowSidecar, + SubtitleEntry, +) + + +class SidecarSchemaError(ValueError): + """Raised when a sidecar dict does not match the expected schema.""" + + +# ════════════════════════════════════════════════════════════════════════════ +# Serialize — DTO → dict +# ════════════════════════════════════════════════════════════════════════════ + + +def serialize(sidecar: ShowSidecar) -> dict[str, Any]: + """Render a :class:`ShowSidecar` to a plain dict ready for YAML dump.""" + out: dict[str, Any] = { + "schema_version": sidecar.schema_version, + "imdb_id": str(sidecar.imdb_id), + } + if sidecar.tmdb_id is not None: + out["tmdb_id"] = sidecar.tmdb_id + out["seasons"] = [_serialize_season(s) for s in sidecar.seasons] + return out + + +def _serialize_season(season: SeasonSidecar) -> dict[str, Any]: + out: dict[str, Any] = { + "number": season.number.value, + "path": season.path, + } + _put_tracks(out, season.audio_languages, season.subtitles) + if season.episodes: + out["episodes"] = [_serialize_episode(ep) for ep in season.episodes] + return out + + +def _serialize_episode(episode: EpisodeSidecar) -> dict[str, Any]: + out: dict[str, Any] = { + "number": episode.number.value, + "path": episode.path, + } + _put_tracks(out, episode.audio_languages, episode.subtitles) + return out + + +def _put_tracks( + out: dict[str, Any], + audio_languages: tuple[str, ...], + subtitles: tuple[SubtitleEntry, ...], +) -> None: + """Append the optional probed-track fields to ``out`` if set.""" + if audio_languages: + out["audio"] = [{"language": lang} for lang in audio_languages] + if subtitles: + out["subtitles"] = [_serialize_subtitle(sub) for sub in subtitles] + + +def _serialize_subtitle(sub: SubtitleEntry) -> dict[str, Any]: + return {"language": sub.language, "source": sub.source, "type": sub.type} + + +# ════════════════════════════════════════════════════════════════════════════ +# Deserialize — dict → DTO +# ════════════════════════════════════════════════════════════════════════════ + +_ALLOWED_ROOT = {"schema_version", "imdb_id", "tmdb_id", "seasons"} +_ALLOWED_SEASON = {"number", "path", "audio", "subtitles", "episodes"} +_ALLOWED_EPISODE = {"number", "path", "audio", "subtitles"} +_ALLOWED_SUBTITLE = {"language", "source", "type"} +_ALLOWED_AUDIO = {"language"} + + +def deserialize(data: dict[str, Any]) -> ShowSidecar: + """Parse a sidecar dict into a :class:`ShowSidecar`. + + Raises :class:`SidecarSchemaError` on schema violations (unknown + keys, missing required fields, type mismatch, unsupported + ``schema_version``). + """ + _require_dict(data, "root") + _reject_unknown(data, _ALLOWED_ROOT, "root") + + version = data.get("schema_version") + if version != SCHEMA_VERSION: + raise SidecarSchemaError( + f"Unsupported schema_version: {version!r} (expected {SCHEMA_VERSION})" + ) + + imdb_id_raw = data.get("imdb_id") + if not isinstance(imdb_id_raw, str): + raise SidecarSchemaError( + f"imdb_id must be a string, got {type(imdb_id_raw).__name__}" + ) + + tmdb_id_raw = data.get("tmdb_id") + if tmdb_id_raw is not None and not isinstance(tmdb_id_raw, int): + raise SidecarSchemaError( + f"tmdb_id must be an int or absent, got {type(tmdb_id_raw).__name__}" + ) + + seasons_raw = data.get("seasons", []) + if not isinstance(seasons_raw, list): + raise SidecarSchemaError( + f"seasons must be a list, got {type(seasons_raw).__name__}" + ) + + seasons = tuple(_deserialize_season(s) for s in seasons_raw) + + return ShowSidecar( + imdb_id=ImdbId(imdb_id_raw), + tmdb_id=tmdb_id_raw, + seasons=seasons, + schema_version=version, + ) + + +def _deserialize_season(data: Any) -> SeasonSidecar: + _require_dict(data, "season") + _reject_unknown(data, _ALLOWED_SEASON, "season") + + number = _require_int(data, "number", "season") + path = _require_str(data, "path", "season") + episodes_raw = data.get("episodes") + + tracks = _read_tracks(data, "season") + if episodes_raw is not None and not isinstance(episodes_raw, list): + raise SidecarSchemaError( + f"season.episodes must be a list, got {type(episodes_raw).__name__}" + ) + episodes = ( + tuple(_deserialize_episode(e) for e in episodes_raw) + if episodes_raw + else () + ) + + return SeasonSidecar( + number=SeasonNumber(number), + path=path, + episodes=episodes, + **tracks, + ) + + +def _deserialize_episode(data: Any) -> EpisodeSidecar: + _require_dict(data, "episode") + _reject_unknown(data, _ALLOWED_EPISODE, "episode") + + number = _require_int(data, "number", "episode") + path = _require_str(data, "path", "episode") + tracks = _read_tracks(data, "episode") + + return EpisodeSidecar( + number=EpisodeNumber(number), + path=path, + **tracks, + ) + + +def _read_tracks(data: dict[str, Any], where: str) -> dict[str, Any]: + """Extract the optional probed-track fields shared between season and episode.""" + result: dict[str, Any] = {} + + audio_raw = data.get("audio") + if audio_raw is not None: + if not isinstance(audio_raw, list): + raise SidecarSchemaError( + f"{where}.audio must be a list, got {type(audio_raw).__name__}" + ) + result["audio_languages"] = tuple( + _deserialize_audio(entry, where) for entry in audio_raw + ) + + subtitles_raw = data.get("subtitles") + if subtitles_raw is not None: + if not isinstance(subtitles_raw, list): + raise SidecarSchemaError( + f"{where}.subtitles must be a list, got {type(subtitles_raw).__name__}" + ) + result["subtitles"] = tuple( + _deserialize_subtitle(entry) for entry in subtitles_raw + ) + + return result + + +def _deserialize_audio(entry: Any, where: str) -> str: + _require_dict(entry, f"{where}.audio[]") + _reject_unknown(entry, _ALLOWED_AUDIO, f"{where}.audio[]") + language = entry.get("language") + if not isinstance(language, str): + raise SidecarSchemaError( + f"{where}.audio[].language must be a string, " + f"got {type(language).__name__}" + ) + return language + + +def _deserialize_subtitle(entry: Any) -> SubtitleEntry: + _require_dict(entry, "subtitle") + _reject_unknown(entry, _ALLOWED_SUBTITLE, "subtitle") + language = entry.get("language") + source = entry.get("source") + type_ = entry.get("type") + if not isinstance(language, str): + raise SidecarSchemaError( + f"subtitle.language must be a string, got {type(language).__name__}" + ) + if not isinstance(source, str): + raise SidecarSchemaError( + f"subtitle.source must be a string, got {type(source).__name__}" + ) + if not isinstance(type_, str): + raise SidecarSchemaError( + f"subtitle.type must be a string, got {type(type_).__name__}" + ) + return SubtitleEntry(language=language, source=source, type=type_) + + +# ════════════════════════════════════════════════════════════════════════════ +# Schema-checking helpers +# ════════════════════════════════════════════════════════════════════════════ + + +def _require_dict(value: Any, where: str) -> None: + if not isinstance(value, dict): + raise SidecarSchemaError( + f"{where} must be a mapping, got {type(value).__name__}" + ) + + +def _reject_unknown(data: dict[str, Any], allowed: set[str], where: str) -> None: + extra = set(data) - allowed + if extra: + raise SidecarSchemaError( + f"{where} has unknown keys: {sorted(extra)}" + ) + + +def _require_str(data: dict[str, Any], key: str, where: str) -> str: + value = data.get(key) + if not isinstance(value, str): + raise SidecarSchemaError( + f"{where}.{key} must be a string, got {type(value).__name__}" + ) + return value + + +def _require_int(data: dict[str, Any], key: str, where: str) -> int: + value = data.get(key) + if not isinstance(value, int) or isinstance(value, bool): + raise SidecarSchemaError( + f"{where}.{key} must be an int, got {type(value).__name__}" + ) + return value diff --git a/alfred/infrastructure/persistence/dot_alfred/sidecar.py b/alfred/infrastructure/persistence/dot_alfred/sidecar.py new file mode 100644 index 0000000..bf44cab --- /dev/null +++ b/alfred/infrastructure/persistence/dot_alfred/sidecar.py @@ -0,0 +1,87 @@ +"""DTOs mirroring the `.alfred` YAML schema. + +These dataclasses are the **in-memory representation** of a single +``.alfred`` file. They mirror the YAML schema described in +``specs/dot_alfred.md`` field-for-field. + +Philosophy: the sidecar exists to avoid two costly operations on every +read — re-walking the show directory and re-probing the media tracks. +Parser-derivable fields (release group, source, quality, codec) are +**not stored**: they live in folder and file names and the parser +reconstructs them on demand. The sidecar only caches what is not +otherwise free — folder/file paths (to skip the walk) and probed track +metadata (audio languages, subtitles — to skip ffprobe). + +Schema version: 1. +""" + +from __future__ import annotations + +from dataclasses import dataclass, field + +from ....domain.shared.value_objects import ImdbId +from ....domain.tv_shows.value_objects import EpisodeNumber, SeasonNumber + +SCHEMA_VERSION = 1 + + +@dataclass(frozen=True) +class SubtitleEntry: + """One subtitle row, as it appears under ``subtitles:`` in YAML.""" + + language: str + source: str # "embedded" | "adjacent" + type: str # "standard" | "sdh" | "forced" + + +@dataclass(frozen=True) +class EpisodeSidecar: + """One episode entry under ``episodes:`` in episodic mode. + + Carries only probed track metadata — release identifiers + (group/source/quality/codec) are derived from the filename by the + parser, not duplicated here. + """ + + number: EpisodeNumber + path: str + audio_languages: tuple[str, ...] = () + subtitles: tuple[SubtitleEntry, ...] = () + + +@dataclass(frozen=True) +class SeasonSidecar: + """One season block in the sidecar. + + Two storage modes are encoded structurally: + + * **PACK** — ``episodes`` is empty; ``audio_languages`` / + ``subtitles`` describe the season as a whole (VO-only policy means + all episodes share the same audio set). + * **EPISODIC** — ``episodes`` is populated; per-episode track data + lives on each :class:`EpisodeSidecar`. + + Release identifiers (group/source/quality/codec) come from parsing + the season folder name and are not stored. + """ + + number: SeasonNumber + path: str + audio_languages: tuple[str, ...] = () + subtitles: tuple[SubtitleEntry, ...] = () + episodes: tuple[EpisodeSidecar, ...] = () + + +@dataclass(frozen=True) +class ShowSidecar: + """Root DTO — one ``.alfred`` file maps to one ``ShowSidecar``. + + Identity-only at the root (``imdb_id`` / ``tmdb_id``). The show's + display title is the parent directory name on disk, not stored + here. + """ + + imdb_id: ImdbId + tmdb_id: int | None = None + seasons: tuple[SeasonSidecar, ...] = field(default_factory=tuple) + schema_version: int = SCHEMA_VERSION diff --git a/tests/infrastructure/persistence/__init__.py b/tests/infrastructure/persistence/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/infrastructure/persistence/dot_alfred/__init__.py b/tests/infrastructure/persistence/dot_alfred/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/infrastructure/persistence/dot_alfred/test_serializer.py b/tests/infrastructure/persistence/dot_alfred/test_serializer.py new file mode 100644 index 0000000..27ab23e --- /dev/null +++ b/tests/infrastructure/persistence/dot_alfred/test_serializer.py @@ -0,0 +1,425 @@ +"""Tests for the ``.alfred`` sidecar serializer. + +Covers: + +* Round-trip equivalence (``serialize`` → ``deserialize`` → equal DTO). +* Field omission rules (``None`` / empty tuples never make it to dict). +* Strict schema (unknown keys rejected, missing keys raise clearly). +* The Foundation fixture (real-world PACK season with mixed subtitles) + to exercise the full surface on a realistic case. + +The serializer is pure-dict in/out; YAML I/O lives in the repository +layer and is tested separately. + +Note: release identifiers (group/source/quality/codec) live in folder +and file names — the parser derives them on demand. They are +deliberately absent from the sidecar schema. +""" + +from __future__ import annotations + +import pytest +import yaml + +from alfred.domain.shared.value_objects import ImdbId +from alfred.domain.tv_shows.value_objects import EpisodeNumber, SeasonNumber +from alfred.infrastructure.persistence.dot_alfred import ( + EpisodeSidecar, + SeasonSidecar, + ShowSidecar, + SubtitleEntry, + deserialize, + serialize, +) +from alfred.infrastructure.persistence.dot_alfred.serializer import ( + SidecarSchemaError, +) + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _foundation_sidecar() -> ShowSidecar: + """The Foundation S01 PACK season — real-world fixture data. + + Mirrors the layout seen in + ``/mnt/testipool/tv_shows/Foundation.2021.1080p.WEBRip.x265-RARBG/`` — + superset audio/subs at season level (some episodes have a forced + English sub, captured at season scope). + """ + return ShowSidecar( + imdb_id=ImdbId("tt0804484"), + tmdb_id=84958, + seasons=( + SeasonSidecar( + number=SeasonNumber(1), + path="Foundation.2021.S01.1080p.WEBRip.x265-RARBG", + audio_languages=("eng",), + subtitles=( + SubtitleEntry(language="eng", source="adjacent", type="standard"), + SubtitleEntry(language="eng", source="adjacent", type="sdh"), + SubtitleEntry(language="eng", source="adjacent", type="forced"), + SubtitleEntry(language="fra", source="adjacent", type="standard"), + SubtitleEntry(language="fra", source="adjacent", type="sdh"), + ), + ), + ), + ) + + +def _minimal_sidecar() -> ShowSidecar: + """Identity-only sidecar — no seasons, no track data.""" + return ShowSidecar(imdb_id=ImdbId("tt0903747")) + + +def _episodic_sidecar() -> ShowSidecar: + """A season in EPISODIC mode (per-episode track metadata).""" + return ShowSidecar( + imdb_id=ImdbId("tt0903747"), + tmdb_id=1396, + seasons=( + SeasonSidecar( + number=SeasonNumber(5), + path="Breaking.Bad.S05", + episodes=( + EpisodeSidecar( + number=EpisodeNumber(1), + path="Breaking.Bad.S05E01.Live.Free.or.Die-MeGusta/Breaking.Bad.S05E01.mkv", + audio_languages=("eng",), + subtitles=( + SubtitleEntry( + language="eng", source="embedded", type="standard" + ), + ), + ), + EpisodeSidecar( + number=EpisodeNumber(2), + path="Breaking.Bad.S05E02.Madrigal-CtrlHD/Breaking.Bad.S05E02.mkv", + audio_languages=("eng",), + ), + ), + ), + ), + ) + + +# --------------------------------------------------------------------------- +# Round-trip +# --------------------------------------------------------------------------- + + +class TestRoundTrip: + def test_minimal(self): + original = _minimal_sidecar() + assert deserialize(serialize(original)) == original + + def test_foundation_pack_season(self): + original = _foundation_sidecar() + assert deserialize(serialize(original)) == original + + def test_episodic_breaking_bad(self): + original = _episodic_sidecar() + assert deserialize(serialize(original)) == original + + def test_round_trip_through_yaml(self): + """Full pipeline: DTO → dict → YAML text → dict → DTO.""" + original = _foundation_sidecar() + text = yaml.safe_dump(serialize(original), sort_keys=False) + recovered = deserialize(yaml.safe_load(text)) + assert recovered == original + + +# --------------------------------------------------------------------------- +# Serialize — field omission +# --------------------------------------------------------------------------- + + +class TestSerializeOmission: + def test_tmdb_id_omitted_when_none(self): + out = serialize(_minimal_sidecar()) + assert "tmdb_id" not in out + + def test_empty_seasons_is_empty_list_not_omitted(self): + # We always emit `seasons:` even if empty — the key documents the + # show "has no season recorded yet" vs being entirely missing. + out = serialize(_minimal_sidecar()) + assert out["seasons"] == [] + + def test_no_audio_when_empty(self): + sidecar = ShowSidecar( + imdb_id=ImdbId("tt0903747"), + seasons=(SeasonSidecar(number=SeasonNumber(1), path="X.S01"),), + ) + out = serialize(sidecar) + assert "audio" not in out["seasons"][0] + + def test_no_subtitles_when_empty(self): + sidecar = ShowSidecar( + imdb_id=ImdbId("tt0903747"), + seasons=(SeasonSidecar(number=SeasonNumber(1), path="X.S01"),), + ) + out = serialize(sidecar) + assert "subtitles" not in out["seasons"][0] + + def test_no_episodes_when_pack(self): + sidecar = ShowSidecar( + imdb_id=ImdbId("tt0903747"), + seasons=(SeasonSidecar(number=SeasonNumber(1), path="X.S01"),), + ) + out = serialize(sidecar) + assert "episodes" not in out["seasons"][0] + + def test_parser_derivable_fields_never_emitted(self): + """group/source/quality/codec must never appear in the YAML.""" + out = serialize(_foundation_sidecar()) + season = out["seasons"][0] + for forbidden in ("group", "source", "quality", "codec"): + assert forbidden not in season + + +# --------------------------------------------------------------------------- +# Serialize — shape +# --------------------------------------------------------------------------- + + +class TestSerializeShape: + def test_root_keys(self): + out = serialize(_foundation_sidecar()) + assert out["schema_version"] == 1 + assert out["imdb_id"] == "tt0804484" + assert out["tmdb_id"] == 84958 + assert isinstance(out["seasons"], list) + + def test_season_number_is_int(self): + out = serialize(_foundation_sidecar()) + assert out["seasons"][0]["number"] == 1 + assert isinstance(out["seasons"][0]["number"], int) + + def test_audio_as_list_of_dicts(self): + out = serialize(_foundation_sidecar()) + assert out["seasons"][0]["audio"] == [{"language": "eng"}] + + def test_subtitle_structure(self): + out = serialize(_foundation_sidecar()) + subs = out["seasons"][0]["subtitles"] + assert subs[0] == { + "language": "eng", + "source": "adjacent", + "type": "standard", + } + + +# --------------------------------------------------------------------------- +# Deserialize — strict schema +# --------------------------------------------------------------------------- + + +class TestDeserializeStrict: + def _valid_minimal(self) -> dict: + return { + "schema_version": 1, + "imdb_id": "tt0903747", + "seasons": [], + } + + def test_unknown_root_key_raises(self): + data = self._valid_minimal() + data["bogus"] = "x" + with pytest.raises(SidecarSchemaError, match="root has unknown keys"): + deserialize(data) + + def test_unknown_season_key_raises(self): + data = self._valid_minimal() + data["seasons"] = [{"number": 1, "path": "X", "weird": True}] + with pytest.raises(SidecarSchemaError, match="season has unknown keys"): + deserialize(data) + + def test_parser_derivable_season_key_raises(self): + """A stray group/source/quality/codec key must be rejected.""" + data = self._valid_minimal() + data["seasons"] = [{"number": 1, "path": "X", "group": "RARBG"}] + with pytest.raises(SidecarSchemaError, match="season has unknown keys"): + deserialize(data) + + def test_unknown_episode_key_raises(self): + data = self._valid_minimal() + data["seasons"] = [ + { + "number": 1, + "path": "X", + "episodes": [{"number": 1, "path": "p", "huh": 1}], + } + ] + with pytest.raises(SidecarSchemaError, match="episode has unknown keys"): + deserialize(data) + + def test_unknown_subtitle_key_raises(self): + data = self._valid_minimal() + data["seasons"] = [ + { + "number": 1, + "path": "X", + "subtitles": [ + {"language": "eng", "source": "adjacent", "type": "sdh", "x": 1} + ], + } + ] + with pytest.raises(SidecarSchemaError, match="subtitle has unknown keys"): + deserialize(data) + + def test_unknown_audio_key_raises(self): + data = self._valid_minimal() + data["seasons"] = [ + { + "number": 1, + "path": "X", + "audio": [{"language": "eng", "channels": 6}], + } + ] + with pytest.raises(SidecarSchemaError, match=r"audio\[\] has unknown keys"): + deserialize(data) + + def test_wrong_schema_version_raises(self): + data = self._valid_minimal() + data["schema_version"] = 2 + with pytest.raises(SidecarSchemaError, match="schema_version"): + deserialize(data) + + def test_missing_schema_version_raises(self): + data = self._valid_minimal() + del data["schema_version"] + with pytest.raises(SidecarSchemaError, match="schema_version"): + deserialize(data) + + def test_imdb_id_must_be_string(self): + data = self._valid_minimal() + data["imdb_id"] = 12345 + with pytest.raises(SidecarSchemaError, match="imdb_id must be a string"): + deserialize(data) + + def test_tmdb_id_must_be_int_when_present(self): + data = self._valid_minimal() + data["tmdb_id"] = "1396" + with pytest.raises(SidecarSchemaError, match="tmdb_id"): + deserialize(data) + + def test_seasons_must_be_list(self): + data = self._valid_minimal() + data["seasons"] = {"1": {}} + with pytest.raises(SidecarSchemaError, match="seasons must be a list"): + deserialize(data) + + def test_season_number_must_be_int(self): + data = self._valid_minimal() + data["seasons"] = [{"number": "1", "path": "X"}] + with pytest.raises(SidecarSchemaError, match="season.number must be an int"): + deserialize(data) + + def test_season_number_bool_rejected(self): + # bool is a subclass of int but should not pass — guards against + # YAML quirks where `True` could sneak in as a season number. + data = self._valid_minimal() + data["seasons"] = [{"number": True, "path": "X"}] + with pytest.raises(SidecarSchemaError, match="season.number must be an int"): + deserialize(data) + + def test_season_path_must_be_string(self): + data = self._valid_minimal() + data["seasons"] = [{"number": 1, "path": 1}] + with pytest.raises(SidecarSchemaError, match="season.path"): + deserialize(data) + + def test_subtitle_missing_field_raises(self): + data = self._valid_minimal() + data["seasons"] = [ + { + "number": 1, + "path": "X", + "subtitles": [{"language": "eng", "source": "adjacent"}], + } + ] + with pytest.raises(SidecarSchemaError, match="subtitle.type"): + deserialize(data) + + +# --------------------------------------------------------------------------- +# Foundation fixture — golden YAML +# --------------------------------------------------------------------------- + + +class TestFoundationGolden: + """Use the Foundation case to validate the produced YAML reads well.""" + + def test_yaml_dump_shape(self): + text = yaml.safe_dump(serialize(_foundation_sidecar()), sort_keys=False) + # Sanity-check that the human-readable layout matches the spec. + assert "schema_version: 1" in text + assert "imdb_id: tt0804484" in text + assert "tmdb_id: 84958" in text + assert "- number: 1" in text + assert "path: Foundation.2021.S01.1080p.WEBRip.x265-RARBG" in text + # No episodes block (PACK mode). + assert "episodes:" not in text + # No release identifiers at season scope — those live in folder + # names. (We can't check ``source:`` here because the subtitle + # entries legitimately carry their own ``source`` key.) + for forbidden in ("group:", "quality:", "codec:"): + assert forbidden not in text + + +# --------------------------------------------------------------------------- +# Foundation on-disk fixture (real folder structure, no real .mkv) +# --------------------------------------------------------------------------- + + +@pytest.fixture +def foundation_tree(tmp_path): + """Recreate the Foundation S01 layout in a tmp directory. + + Mirrors the on-disk structure of + ``/mnt/testipool/tv_shows/Foundation.2021.1080p.WEBRip.x265-RARBG/`` + using empty placeholder files — sufficient for tests that need a + realistic show folder without dragging in real media. + """ + show = tmp_path / "Foundation.2021.1080p.WEBRip.x265-RARBG" + season = show / "Foundation.2021.S01.1080p.WEBRip.x265-RARBG" + season.mkdir(parents=True) + base = "Foundation.2021.S01E{n:02d}.1080p.WEBRip.x265-RARBG" + for ep in range(1, 11): + stem = base.format(n=ep) + (season / f"{stem}.mp4").touch() + (season / f"{stem}.eng.srt").touch() + (season / f"{stem}.eng.sdh.srt").touch() + (season / f"{stem}.fra.srt").touch() + (season / f"{stem}.fra.sdh.srt").touch() + if 4 <= ep <= 9: + (season / f"{stem}.eng.forced.srt").touch() + return show + + +class TestFoundationOnDisk: + """The on-disk fixture is mostly for future tests (repository walk). + + For now we exercise the basic shape — a placeholder for richer + walk-and-build tests landing in step 3 (repository). + """ + + def test_fixture_has_expected_episode_count(self, foundation_tree): + season = foundation_tree / "Foundation.2021.S01.1080p.WEBRip.x265-RARBG" + mkvs = sorted(season.glob("*.mp4")) + assert len(mkvs) == 10 + + def test_fixture_has_forced_subs_only_on_some_episodes(self, foundation_tree): + season = foundation_tree / "Foundation.2021.S01.1080p.WEBRip.x265-RARBG" + forced = sorted(season.glob("*.eng.forced.srt")) + assert len(forced) == 6 # E04 through E09 + + def test_serialize_yaml_can_be_written_alongside(self, foundation_tree): + """Write the sidecar next to the show folder and read it back.""" + sidecar_path = foundation_tree / ".alfred" + sidecar_path.write_text( + yaml.safe_dump(serialize(_foundation_sidecar()), sort_keys=False) + ) + recovered = deserialize(yaml.safe_load(sidecar_path.read_text())) + assert recovered == _foundation_sidecar()