Files
alfred/tests/infrastructure/persistence/dot_alfred/test_serializer.py
T
francwa b0e275bd11 feat(persistence): add .alfred sidecar serializer (DTO ↔ dict)
Step 2 of the specs/dot_alfred.md plan. Pure-dict in/out
(serialize(sidecar) -> dict, deserialize(data) -> ShowSidecar);
YAML I/O lives in the repository layer (step 3) and is kept out
for trivial testability.

DTOs mirror the YAML schema field-for-field:
- ShowSidecar (root: imdb_id, tmdb_id, schema_version, seasons)
- SeasonSidecar (number, path, optional audio/subtitles, optional episodes)
- EpisodeSidecar (number, path, optional audio/subtitles)
- SubtitleEntry (language, source, type)

The sidecar acts as a scan cache: it stores only what is genuinely
costly to recompute — folder/file paths (skipping the FS walk) and
probed track metadata (skipping ffprobe). Release identifiers
(group, source, quality, codec) live in folder/file names and are
derived on demand by the parser; they are deliberately absent from
the schema and rejected as unknown keys on deserialize.

The serializer is strict on schema: unknown keys at any level raise
SidecarSchemaError, missing required fields raise clearly, and bool
cannot sneak in as a season/episode number. Optional fields
(tmdb_id, empty audio/subtitles/episodes) are omitted from the
output rather than emitted as null / [].

Tests cover round-trip equivalence (DTO → dict → DTO and DTO → YAML
text → DTO), the Foundation S01 PACK case (real-world fixture with
mixed sub types — superset captured at season scope), and a
Breaking Bad S05 EPISODIC case. An on-disk tmp_path fixture
recreates the Foundation folder structure with placeholder files,
ready to be reused by the upcoming repository walk tests in step 3.
2026-05-22 16:56:56 +02:00

426 lines
16 KiB
Python

"""Tests for the ``.alfred`` sidecar serializer.
Covers:
* Round-trip equivalence (``serialize`` → ``deserialize`` → equal DTO).
* Field omission rules (``None`` / empty tuples never make it to dict).
* Strict schema (unknown keys rejected, missing keys raise clearly).
* The Foundation fixture (real-world PACK season with mixed subtitles)
to exercise the full surface on a realistic case.
The serializer is pure-dict in/out; YAML I/O lives in the repository
layer and is tested separately.
Note: release identifiers (group/source/quality/codec) live in folder
and file names — the parser derives them on demand. They are
deliberately absent from the sidecar schema.
"""
from __future__ import annotations
import pytest
import yaml
from alfred.domain.shared.value_objects import ImdbId
from alfred.domain.tv_shows.value_objects import EpisodeNumber, SeasonNumber
from alfred.infrastructure.persistence.dot_alfred import (
EpisodeSidecar,
SeasonSidecar,
ShowSidecar,
SubtitleEntry,
deserialize,
serialize,
)
from alfred.infrastructure.persistence.dot_alfred.serializer import (
SidecarSchemaError,
)
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
def _foundation_sidecar() -> ShowSidecar:
"""The Foundation S01 PACK season — real-world fixture data.
Mirrors the layout seen in
``/mnt/testipool/tv_shows/Foundation.2021.1080p.WEBRip.x265-RARBG/`` —
superset audio/subs at season level (some episodes have a forced
English sub, captured at season scope).
"""
return ShowSidecar(
imdb_id=ImdbId("tt0804484"),
tmdb_id=84958,
seasons=(
SeasonSidecar(
number=SeasonNumber(1),
path="Foundation.2021.S01.1080p.WEBRip.x265-RARBG",
audio_languages=("eng",),
subtitles=(
SubtitleEntry(language="eng", source="adjacent", type="standard"),
SubtitleEntry(language="eng", source="adjacent", type="sdh"),
SubtitleEntry(language="eng", source="adjacent", type="forced"),
SubtitleEntry(language="fra", source="adjacent", type="standard"),
SubtitleEntry(language="fra", source="adjacent", type="sdh"),
),
),
),
)
def _minimal_sidecar() -> ShowSidecar:
"""Identity-only sidecar — no seasons, no track data."""
return ShowSidecar(imdb_id=ImdbId("tt0903747"))
def _episodic_sidecar() -> ShowSidecar:
"""A season in EPISODIC mode (per-episode track metadata)."""
return ShowSidecar(
imdb_id=ImdbId("tt0903747"),
tmdb_id=1396,
seasons=(
SeasonSidecar(
number=SeasonNumber(5),
path="Breaking.Bad.S05",
episodes=(
EpisodeSidecar(
number=EpisodeNumber(1),
path="Breaking.Bad.S05E01.Live.Free.or.Die-MeGusta/Breaking.Bad.S05E01.mkv",
audio_languages=("eng",),
subtitles=(
SubtitleEntry(
language="eng", source="embedded", type="standard"
),
),
),
EpisodeSidecar(
number=EpisodeNumber(2),
path="Breaking.Bad.S05E02.Madrigal-CtrlHD/Breaking.Bad.S05E02.mkv",
audio_languages=("eng",),
),
),
),
),
)
# ---------------------------------------------------------------------------
# Round-trip
# ---------------------------------------------------------------------------
class TestRoundTrip:
def test_minimal(self):
original = _minimal_sidecar()
assert deserialize(serialize(original)) == original
def test_foundation_pack_season(self):
original = _foundation_sidecar()
assert deserialize(serialize(original)) == original
def test_episodic_breaking_bad(self):
original = _episodic_sidecar()
assert deserialize(serialize(original)) == original
def test_round_trip_through_yaml(self):
"""Full pipeline: DTO → dict → YAML text → dict → DTO."""
original = _foundation_sidecar()
text = yaml.safe_dump(serialize(original), sort_keys=False)
recovered = deserialize(yaml.safe_load(text))
assert recovered == original
# ---------------------------------------------------------------------------
# Serialize — field omission
# ---------------------------------------------------------------------------
class TestSerializeOmission:
def test_tmdb_id_omitted_when_none(self):
out = serialize(_minimal_sidecar())
assert "tmdb_id" not in out
def test_empty_seasons_is_empty_list_not_omitted(self):
# We always emit `seasons:` even if empty — the key documents the
# show "has no season recorded yet" vs being entirely missing.
out = serialize(_minimal_sidecar())
assert out["seasons"] == []
def test_no_audio_when_empty(self):
sidecar = ShowSidecar(
imdb_id=ImdbId("tt0903747"),
seasons=(SeasonSidecar(number=SeasonNumber(1), path="X.S01"),),
)
out = serialize(sidecar)
assert "audio" not in out["seasons"][0]
def test_no_subtitles_when_empty(self):
sidecar = ShowSidecar(
imdb_id=ImdbId("tt0903747"),
seasons=(SeasonSidecar(number=SeasonNumber(1), path="X.S01"),),
)
out = serialize(sidecar)
assert "subtitles" not in out["seasons"][0]
def test_no_episodes_when_pack(self):
sidecar = ShowSidecar(
imdb_id=ImdbId("tt0903747"),
seasons=(SeasonSidecar(number=SeasonNumber(1), path="X.S01"),),
)
out = serialize(sidecar)
assert "episodes" not in out["seasons"][0]
def test_parser_derivable_fields_never_emitted(self):
"""group/source/quality/codec must never appear in the YAML."""
out = serialize(_foundation_sidecar())
season = out["seasons"][0]
for forbidden in ("group", "source", "quality", "codec"):
assert forbidden not in season
# ---------------------------------------------------------------------------
# Serialize — shape
# ---------------------------------------------------------------------------
class TestSerializeShape:
def test_root_keys(self):
out = serialize(_foundation_sidecar())
assert out["schema_version"] == 1
assert out["imdb_id"] == "tt0804484"
assert out["tmdb_id"] == 84958
assert isinstance(out["seasons"], list)
def test_season_number_is_int(self):
out = serialize(_foundation_sidecar())
assert out["seasons"][0]["number"] == 1
assert isinstance(out["seasons"][0]["number"], int)
def test_audio_as_list_of_dicts(self):
out = serialize(_foundation_sidecar())
assert out["seasons"][0]["audio"] == [{"language": "eng"}]
def test_subtitle_structure(self):
out = serialize(_foundation_sidecar())
subs = out["seasons"][0]["subtitles"]
assert subs[0] == {
"language": "eng",
"source": "adjacent",
"type": "standard",
}
# ---------------------------------------------------------------------------
# Deserialize — strict schema
# ---------------------------------------------------------------------------
class TestDeserializeStrict:
def _valid_minimal(self) -> dict:
return {
"schema_version": 1,
"imdb_id": "tt0903747",
"seasons": [],
}
def test_unknown_root_key_raises(self):
data = self._valid_minimal()
data["bogus"] = "x"
with pytest.raises(SidecarSchemaError, match="root has unknown keys"):
deserialize(data)
def test_unknown_season_key_raises(self):
data = self._valid_minimal()
data["seasons"] = [{"number": 1, "path": "X", "weird": True}]
with pytest.raises(SidecarSchemaError, match="season has unknown keys"):
deserialize(data)
def test_parser_derivable_season_key_raises(self):
"""A stray group/source/quality/codec key must be rejected."""
data = self._valid_minimal()
data["seasons"] = [{"number": 1, "path": "X", "group": "RARBG"}]
with pytest.raises(SidecarSchemaError, match="season has unknown keys"):
deserialize(data)
def test_unknown_episode_key_raises(self):
data = self._valid_minimal()
data["seasons"] = [
{
"number": 1,
"path": "X",
"episodes": [{"number": 1, "path": "p", "huh": 1}],
}
]
with pytest.raises(SidecarSchemaError, match="episode has unknown keys"):
deserialize(data)
def test_unknown_subtitle_key_raises(self):
data = self._valid_minimal()
data["seasons"] = [
{
"number": 1,
"path": "X",
"subtitles": [
{"language": "eng", "source": "adjacent", "type": "sdh", "x": 1}
],
}
]
with pytest.raises(SidecarSchemaError, match="subtitle has unknown keys"):
deserialize(data)
def test_unknown_audio_key_raises(self):
data = self._valid_minimal()
data["seasons"] = [
{
"number": 1,
"path": "X",
"audio": [{"language": "eng", "channels": 6}],
}
]
with pytest.raises(SidecarSchemaError, match=r"audio\[\] has unknown keys"):
deserialize(data)
def test_wrong_schema_version_raises(self):
data = self._valid_minimal()
data["schema_version"] = 2
with pytest.raises(SidecarSchemaError, match="schema_version"):
deserialize(data)
def test_missing_schema_version_raises(self):
data = self._valid_minimal()
del data["schema_version"]
with pytest.raises(SidecarSchemaError, match="schema_version"):
deserialize(data)
def test_imdb_id_must_be_string(self):
data = self._valid_minimal()
data["imdb_id"] = 12345
with pytest.raises(SidecarSchemaError, match="imdb_id must be a string"):
deserialize(data)
def test_tmdb_id_must_be_int_when_present(self):
data = self._valid_minimal()
data["tmdb_id"] = "1396"
with pytest.raises(SidecarSchemaError, match="tmdb_id"):
deserialize(data)
def test_seasons_must_be_list(self):
data = self._valid_minimal()
data["seasons"] = {"1": {}}
with pytest.raises(SidecarSchemaError, match="seasons must be a list"):
deserialize(data)
def test_season_number_must_be_int(self):
data = self._valid_minimal()
data["seasons"] = [{"number": "1", "path": "X"}]
with pytest.raises(SidecarSchemaError, match="season.number must be an int"):
deserialize(data)
def test_season_number_bool_rejected(self):
# bool is a subclass of int but should not pass — guards against
# YAML quirks where `True` could sneak in as a season number.
data = self._valid_minimal()
data["seasons"] = [{"number": True, "path": "X"}]
with pytest.raises(SidecarSchemaError, match="season.number must be an int"):
deserialize(data)
def test_season_path_must_be_string(self):
data = self._valid_minimal()
data["seasons"] = [{"number": 1, "path": 1}]
with pytest.raises(SidecarSchemaError, match="season.path"):
deserialize(data)
def test_subtitle_missing_field_raises(self):
data = self._valid_minimal()
data["seasons"] = [
{
"number": 1,
"path": "X",
"subtitles": [{"language": "eng", "source": "adjacent"}],
}
]
with pytest.raises(SidecarSchemaError, match="subtitle.type"):
deserialize(data)
# ---------------------------------------------------------------------------
# Foundation fixture — golden YAML
# ---------------------------------------------------------------------------
class TestFoundationGolden:
"""Use the Foundation case to validate the produced YAML reads well."""
def test_yaml_dump_shape(self):
text = yaml.safe_dump(serialize(_foundation_sidecar()), sort_keys=False)
# Sanity-check that the human-readable layout matches the spec.
assert "schema_version: 1" in text
assert "imdb_id: tt0804484" in text
assert "tmdb_id: 84958" in text
assert "- number: 1" in text
assert "path: Foundation.2021.S01.1080p.WEBRip.x265-RARBG" in text
# No episodes block (PACK mode).
assert "episodes:" not in text
# No release identifiers at season scope — those live in folder
# names. (We can't check ``source:`` here because the subtitle
# entries legitimately carry their own ``source`` key.)
for forbidden in ("group:", "quality:", "codec:"):
assert forbidden not in text
# ---------------------------------------------------------------------------
# Foundation on-disk fixture (real folder structure, no real .mkv)
# ---------------------------------------------------------------------------
@pytest.fixture
def foundation_tree(tmp_path):
"""Recreate the Foundation S01 layout in a tmp directory.
Mirrors the on-disk structure of
``/mnt/testipool/tv_shows/Foundation.2021.1080p.WEBRip.x265-RARBG/``
using empty placeholder files — sufficient for tests that need a
realistic show folder without dragging in real media.
"""
show = tmp_path / "Foundation.2021.1080p.WEBRip.x265-RARBG"
season = show / "Foundation.2021.S01.1080p.WEBRip.x265-RARBG"
season.mkdir(parents=True)
base = "Foundation.2021.S01E{n:02d}.1080p.WEBRip.x265-RARBG"
for ep in range(1, 11):
stem = base.format(n=ep)
(season / f"{stem}.mp4").touch()
(season / f"{stem}.eng.srt").touch()
(season / f"{stem}.eng.sdh.srt").touch()
(season / f"{stem}.fra.srt").touch()
(season / f"{stem}.fra.sdh.srt").touch()
if 4 <= ep <= 9:
(season / f"{stem}.eng.forced.srt").touch()
return show
class TestFoundationOnDisk:
"""The on-disk fixture is mostly for future tests (repository walk).
For now we exercise the basic shape — a placeholder for richer
walk-and-build tests landing in step 3 (repository).
"""
def test_fixture_has_expected_episode_count(self, foundation_tree):
season = foundation_tree / "Foundation.2021.S01.1080p.WEBRip.x265-RARBG"
mkvs = sorted(season.glob("*.mp4"))
assert len(mkvs) == 10
def test_fixture_has_forced_subs_only_on_some_episodes(self, foundation_tree):
season = foundation_tree / "Foundation.2021.S01.1080p.WEBRip.x265-RARBG"
forced = sorted(season.glob("*.eng.forced.srt"))
assert len(forced) == 6 # E04 through E09
def test_serialize_yaml_can_be_written_alongside(self, foundation_tree):
"""Write the sidecar next to the show folder and read it back."""
sidecar_path = foundation_tree / ".alfred"
sidecar_path.write_text(
yaml.safe_dump(serialize(_foundation_sidecar()), sort_keys=False)
)
recovered = deserialize(yaml.safe_load(sidecar_path.read_text()))
assert recovered == _foundation_sidecar()