feat(persistence): add .alfred sidecar serializer (DTO ↔ dict)
Step 2 of the specs/dot_alfred.md plan. Pure-dict in/out (serialize(sidecar) -> dict, deserialize(data) -> ShowSidecar); YAML I/O lives in the repository layer (step 3) and is kept out for trivial testability. DTOs mirror the YAML schema field-for-field: - ShowSidecar (root: imdb_id, tmdb_id, schema_version, seasons) - SeasonSidecar (number, path, optional audio/subtitles, optional episodes) - EpisodeSidecar (number, path, optional audio/subtitles) - SubtitleEntry (language, source, type) The sidecar acts as a scan cache: it stores only what is genuinely costly to recompute — folder/file paths (skipping the FS walk) and probed track metadata (skipping ffprobe). Release identifiers (group, source, quality, codec) live in folder/file names and are derived on demand by the parser; they are deliberately absent from the schema and rejected as unknown keys on deserialize. The serializer is strict on schema: unknown keys at any level raise SidecarSchemaError, missing required fields raise clearly, and bool cannot sneak in as a season/episode number. Optional fields (tmdb_id, empty audio/subtitles/episodes) are omitted from the output rather than emitted as null / []. Tests cover round-trip equivalence (DTO → dict → DTO and DTO → YAML text → DTO), the Foundation S01 PACK case (real-world fixture with mixed sub types — superset captured at season scope), and a Breaking Bad S05 EPISODIC case. An on-disk tmp_path fixture recreates the Foundation folder structure with placeholder files, ready to be reused by the upcoming repository walk tests in step 3.
This commit is contained in:
@@ -0,0 +1,425 @@
|
||||
"""Tests for the ``.alfred`` sidecar serializer.
|
||||
|
||||
Covers:
|
||||
|
||||
* Round-trip equivalence (``serialize`` → ``deserialize`` → equal DTO).
|
||||
* Field omission rules (``None`` / empty tuples never make it to dict).
|
||||
* Strict schema (unknown keys rejected, missing keys raise clearly).
|
||||
* The Foundation fixture (real-world PACK season with mixed subtitles)
|
||||
to exercise the full surface on a realistic case.
|
||||
|
||||
The serializer is pure-dict in/out; YAML I/O lives in the repository
|
||||
layer and is tested separately.
|
||||
|
||||
Note: release identifiers (group/source/quality/codec) live in folder
|
||||
and file names — the parser derives them on demand. They are
|
||||
deliberately absent from the sidecar schema.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
import yaml
|
||||
|
||||
from alfred.domain.shared.value_objects import ImdbId
|
||||
from alfred.domain.tv_shows.value_objects import EpisodeNumber, SeasonNumber
|
||||
from alfred.infrastructure.persistence.dot_alfred import (
|
||||
EpisodeSidecar,
|
||||
SeasonSidecar,
|
||||
ShowSidecar,
|
||||
SubtitleEntry,
|
||||
deserialize,
|
||||
serialize,
|
||||
)
|
||||
from alfred.infrastructure.persistence.dot_alfred.serializer import (
|
||||
SidecarSchemaError,
|
||||
)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _foundation_sidecar() -> ShowSidecar:
|
||||
"""The Foundation S01 PACK season — real-world fixture data.
|
||||
|
||||
Mirrors the layout seen in
|
||||
``/mnt/testipool/tv_shows/Foundation.2021.1080p.WEBRip.x265-RARBG/`` —
|
||||
superset audio/subs at season level (some episodes have a forced
|
||||
English sub, captured at season scope).
|
||||
"""
|
||||
return ShowSidecar(
|
||||
imdb_id=ImdbId("tt0804484"),
|
||||
tmdb_id=84958,
|
||||
seasons=(
|
||||
SeasonSidecar(
|
||||
number=SeasonNumber(1),
|
||||
path="Foundation.2021.S01.1080p.WEBRip.x265-RARBG",
|
||||
audio_languages=("eng",),
|
||||
subtitles=(
|
||||
SubtitleEntry(language="eng", source="adjacent", type="standard"),
|
||||
SubtitleEntry(language="eng", source="adjacent", type="sdh"),
|
||||
SubtitleEntry(language="eng", source="adjacent", type="forced"),
|
||||
SubtitleEntry(language="fra", source="adjacent", type="standard"),
|
||||
SubtitleEntry(language="fra", source="adjacent", type="sdh"),
|
||||
),
|
||||
),
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
def _minimal_sidecar() -> ShowSidecar:
|
||||
"""Identity-only sidecar — no seasons, no track data."""
|
||||
return ShowSidecar(imdb_id=ImdbId("tt0903747"))
|
||||
|
||||
|
||||
def _episodic_sidecar() -> ShowSidecar:
|
||||
"""A season in EPISODIC mode (per-episode track metadata)."""
|
||||
return ShowSidecar(
|
||||
imdb_id=ImdbId("tt0903747"),
|
||||
tmdb_id=1396,
|
||||
seasons=(
|
||||
SeasonSidecar(
|
||||
number=SeasonNumber(5),
|
||||
path="Breaking.Bad.S05",
|
||||
episodes=(
|
||||
EpisodeSidecar(
|
||||
number=EpisodeNumber(1),
|
||||
path="Breaking.Bad.S05E01.Live.Free.or.Die-MeGusta/Breaking.Bad.S05E01.mkv",
|
||||
audio_languages=("eng",),
|
||||
subtitles=(
|
||||
SubtitleEntry(
|
||||
language="eng", source="embedded", type="standard"
|
||||
),
|
||||
),
|
||||
),
|
||||
EpisodeSidecar(
|
||||
number=EpisodeNumber(2),
|
||||
path="Breaking.Bad.S05E02.Madrigal-CtrlHD/Breaking.Bad.S05E02.mkv",
|
||||
audio_languages=("eng",),
|
||||
),
|
||||
),
|
||||
),
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Round-trip
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestRoundTrip:
|
||||
def test_minimal(self):
|
||||
original = _minimal_sidecar()
|
||||
assert deserialize(serialize(original)) == original
|
||||
|
||||
def test_foundation_pack_season(self):
|
||||
original = _foundation_sidecar()
|
||||
assert deserialize(serialize(original)) == original
|
||||
|
||||
def test_episodic_breaking_bad(self):
|
||||
original = _episodic_sidecar()
|
||||
assert deserialize(serialize(original)) == original
|
||||
|
||||
def test_round_trip_through_yaml(self):
|
||||
"""Full pipeline: DTO → dict → YAML text → dict → DTO."""
|
||||
original = _foundation_sidecar()
|
||||
text = yaml.safe_dump(serialize(original), sort_keys=False)
|
||||
recovered = deserialize(yaml.safe_load(text))
|
||||
assert recovered == original
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Serialize — field omission
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestSerializeOmission:
|
||||
def test_tmdb_id_omitted_when_none(self):
|
||||
out = serialize(_minimal_sidecar())
|
||||
assert "tmdb_id" not in out
|
||||
|
||||
def test_empty_seasons_is_empty_list_not_omitted(self):
|
||||
# We always emit `seasons:` even if empty — the key documents the
|
||||
# show "has no season recorded yet" vs being entirely missing.
|
||||
out = serialize(_minimal_sidecar())
|
||||
assert out["seasons"] == []
|
||||
|
||||
def test_no_audio_when_empty(self):
|
||||
sidecar = ShowSidecar(
|
||||
imdb_id=ImdbId("tt0903747"),
|
||||
seasons=(SeasonSidecar(number=SeasonNumber(1), path="X.S01"),),
|
||||
)
|
||||
out = serialize(sidecar)
|
||||
assert "audio" not in out["seasons"][0]
|
||||
|
||||
def test_no_subtitles_when_empty(self):
|
||||
sidecar = ShowSidecar(
|
||||
imdb_id=ImdbId("tt0903747"),
|
||||
seasons=(SeasonSidecar(number=SeasonNumber(1), path="X.S01"),),
|
||||
)
|
||||
out = serialize(sidecar)
|
||||
assert "subtitles" not in out["seasons"][0]
|
||||
|
||||
def test_no_episodes_when_pack(self):
|
||||
sidecar = ShowSidecar(
|
||||
imdb_id=ImdbId("tt0903747"),
|
||||
seasons=(SeasonSidecar(number=SeasonNumber(1), path="X.S01"),),
|
||||
)
|
||||
out = serialize(sidecar)
|
||||
assert "episodes" not in out["seasons"][0]
|
||||
|
||||
def test_parser_derivable_fields_never_emitted(self):
|
||||
"""group/source/quality/codec must never appear in the YAML."""
|
||||
out = serialize(_foundation_sidecar())
|
||||
season = out["seasons"][0]
|
||||
for forbidden in ("group", "source", "quality", "codec"):
|
||||
assert forbidden not in season
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Serialize — shape
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestSerializeShape:
|
||||
def test_root_keys(self):
|
||||
out = serialize(_foundation_sidecar())
|
||||
assert out["schema_version"] == 1
|
||||
assert out["imdb_id"] == "tt0804484"
|
||||
assert out["tmdb_id"] == 84958
|
||||
assert isinstance(out["seasons"], list)
|
||||
|
||||
def test_season_number_is_int(self):
|
||||
out = serialize(_foundation_sidecar())
|
||||
assert out["seasons"][0]["number"] == 1
|
||||
assert isinstance(out["seasons"][0]["number"], int)
|
||||
|
||||
def test_audio_as_list_of_dicts(self):
|
||||
out = serialize(_foundation_sidecar())
|
||||
assert out["seasons"][0]["audio"] == [{"language": "eng"}]
|
||||
|
||||
def test_subtitle_structure(self):
|
||||
out = serialize(_foundation_sidecar())
|
||||
subs = out["seasons"][0]["subtitles"]
|
||||
assert subs[0] == {
|
||||
"language": "eng",
|
||||
"source": "adjacent",
|
||||
"type": "standard",
|
||||
}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Deserialize — strict schema
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestDeserializeStrict:
|
||||
def _valid_minimal(self) -> dict:
|
||||
return {
|
||||
"schema_version": 1,
|
||||
"imdb_id": "tt0903747",
|
||||
"seasons": [],
|
||||
}
|
||||
|
||||
def test_unknown_root_key_raises(self):
|
||||
data = self._valid_minimal()
|
||||
data["bogus"] = "x"
|
||||
with pytest.raises(SidecarSchemaError, match="root has unknown keys"):
|
||||
deserialize(data)
|
||||
|
||||
def test_unknown_season_key_raises(self):
|
||||
data = self._valid_minimal()
|
||||
data["seasons"] = [{"number": 1, "path": "X", "weird": True}]
|
||||
with pytest.raises(SidecarSchemaError, match="season has unknown keys"):
|
||||
deserialize(data)
|
||||
|
||||
def test_parser_derivable_season_key_raises(self):
|
||||
"""A stray group/source/quality/codec key must be rejected."""
|
||||
data = self._valid_minimal()
|
||||
data["seasons"] = [{"number": 1, "path": "X", "group": "RARBG"}]
|
||||
with pytest.raises(SidecarSchemaError, match="season has unknown keys"):
|
||||
deserialize(data)
|
||||
|
||||
def test_unknown_episode_key_raises(self):
|
||||
data = self._valid_minimal()
|
||||
data["seasons"] = [
|
||||
{
|
||||
"number": 1,
|
||||
"path": "X",
|
||||
"episodes": [{"number": 1, "path": "p", "huh": 1}],
|
||||
}
|
||||
]
|
||||
with pytest.raises(SidecarSchemaError, match="episode has unknown keys"):
|
||||
deserialize(data)
|
||||
|
||||
def test_unknown_subtitle_key_raises(self):
|
||||
data = self._valid_minimal()
|
||||
data["seasons"] = [
|
||||
{
|
||||
"number": 1,
|
||||
"path": "X",
|
||||
"subtitles": [
|
||||
{"language": "eng", "source": "adjacent", "type": "sdh", "x": 1}
|
||||
],
|
||||
}
|
||||
]
|
||||
with pytest.raises(SidecarSchemaError, match="subtitle has unknown keys"):
|
||||
deserialize(data)
|
||||
|
||||
def test_unknown_audio_key_raises(self):
|
||||
data = self._valid_minimal()
|
||||
data["seasons"] = [
|
||||
{
|
||||
"number": 1,
|
||||
"path": "X",
|
||||
"audio": [{"language": "eng", "channels": 6}],
|
||||
}
|
||||
]
|
||||
with pytest.raises(SidecarSchemaError, match=r"audio\[\] has unknown keys"):
|
||||
deserialize(data)
|
||||
|
||||
def test_wrong_schema_version_raises(self):
|
||||
data = self._valid_minimal()
|
||||
data["schema_version"] = 2
|
||||
with pytest.raises(SidecarSchemaError, match="schema_version"):
|
||||
deserialize(data)
|
||||
|
||||
def test_missing_schema_version_raises(self):
|
||||
data = self._valid_minimal()
|
||||
del data["schema_version"]
|
||||
with pytest.raises(SidecarSchemaError, match="schema_version"):
|
||||
deserialize(data)
|
||||
|
||||
def test_imdb_id_must_be_string(self):
|
||||
data = self._valid_minimal()
|
||||
data["imdb_id"] = 12345
|
||||
with pytest.raises(SidecarSchemaError, match="imdb_id must be a string"):
|
||||
deserialize(data)
|
||||
|
||||
def test_tmdb_id_must_be_int_when_present(self):
|
||||
data = self._valid_minimal()
|
||||
data["tmdb_id"] = "1396"
|
||||
with pytest.raises(SidecarSchemaError, match="tmdb_id"):
|
||||
deserialize(data)
|
||||
|
||||
def test_seasons_must_be_list(self):
|
||||
data = self._valid_minimal()
|
||||
data["seasons"] = {"1": {}}
|
||||
with pytest.raises(SidecarSchemaError, match="seasons must be a list"):
|
||||
deserialize(data)
|
||||
|
||||
def test_season_number_must_be_int(self):
|
||||
data = self._valid_minimal()
|
||||
data["seasons"] = [{"number": "1", "path": "X"}]
|
||||
with pytest.raises(SidecarSchemaError, match="season.number must be an int"):
|
||||
deserialize(data)
|
||||
|
||||
def test_season_number_bool_rejected(self):
|
||||
# bool is a subclass of int but should not pass — guards against
|
||||
# YAML quirks where `True` could sneak in as a season number.
|
||||
data = self._valid_minimal()
|
||||
data["seasons"] = [{"number": True, "path": "X"}]
|
||||
with pytest.raises(SidecarSchemaError, match="season.number must be an int"):
|
||||
deserialize(data)
|
||||
|
||||
def test_season_path_must_be_string(self):
|
||||
data = self._valid_minimal()
|
||||
data["seasons"] = [{"number": 1, "path": 1}]
|
||||
with pytest.raises(SidecarSchemaError, match="season.path"):
|
||||
deserialize(data)
|
||||
|
||||
def test_subtitle_missing_field_raises(self):
|
||||
data = self._valid_minimal()
|
||||
data["seasons"] = [
|
||||
{
|
||||
"number": 1,
|
||||
"path": "X",
|
||||
"subtitles": [{"language": "eng", "source": "adjacent"}],
|
||||
}
|
||||
]
|
||||
with pytest.raises(SidecarSchemaError, match="subtitle.type"):
|
||||
deserialize(data)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Foundation fixture — golden YAML
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestFoundationGolden:
|
||||
"""Use the Foundation case to validate the produced YAML reads well."""
|
||||
|
||||
def test_yaml_dump_shape(self):
|
||||
text = yaml.safe_dump(serialize(_foundation_sidecar()), sort_keys=False)
|
||||
# Sanity-check that the human-readable layout matches the spec.
|
||||
assert "schema_version: 1" in text
|
||||
assert "imdb_id: tt0804484" in text
|
||||
assert "tmdb_id: 84958" in text
|
||||
assert "- number: 1" in text
|
||||
assert "path: Foundation.2021.S01.1080p.WEBRip.x265-RARBG" in text
|
||||
# No episodes block (PACK mode).
|
||||
assert "episodes:" not in text
|
||||
# No release identifiers at season scope — those live in folder
|
||||
# names. (We can't check ``source:`` here because the subtitle
|
||||
# entries legitimately carry their own ``source`` key.)
|
||||
for forbidden in ("group:", "quality:", "codec:"):
|
||||
assert forbidden not in text
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Foundation on-disk fixture (real folder structure, no real .mkv)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def foundation_tree(tmp_path):
|
||||
"""Recreate the Foundation S01 layout in a tmp directory.
|
||||
|
||||
Mirrors the on-disk structure of
|
||||
``/mnt/testipool/tv_shows/Foundation.2021.1080p.WEBRip.x265-RARBG/``
|
||||
using empty placeholder files — sufficient for tests that need a
|
||||
realistic show folder without dragging in real media.
|
||||
"""
|
||||
show = tmp_path / "Foundation.2021.1080p.WEBRip.x265-RARBG"
|
||||
season = show / "Foundation.2021.S01.1080p.WEBRip.x265-RARBG"
|
||||
season.mkdir(parents=True)
|
||||
base = "Foundation.2021.S01E{n:02d}.1080p.WEBRip.x265-RARBG"
|
||||
for ep in range(1, 11):
|
||||
stem = base.format(n=ep)
|
||||
(season / f"{stem}.mp4").touch()
|
||||
(season / f"{stem}.eng.srt").touch()
|
||||
(season / f"{stem}.eng.sdh.srt").touch()
|
||||
(season / f"{stem}.fra.srt").touch()
|
||||
(season / f"{stem}.fra.sdh.srt").touch()
|
||||
if 4 <= ep <= 9:
|
||||
(season / f"{stem}.eng.forced.srt").touch()
|
||||
return show
|
||||
|
||||
|
||||
class TestFoundationOnDisk:
|
||||
"""The on-disk fixture is mostly for future tests (repository walk).
|
||||
|
||||
For now we exercise the basic shape — a placeholder for richer
|
||||
walk-and-build tests landing in step 3 (repository).
|
||||
"""
|
||||
|
||||
def test_fixture_has_expected_episode_count(self, foundation_tree):
|
||||
season = foundation_tree / "Foundation.2021.S01.1080p.WEBRip.x265-RARBG"
|
||||
mkvs = sorted(season.glob("*.mp4"))
|
||||
assert len(mkvs) == 10
|
||||
|
||||
def test_fixture_has_forced_subs_only_on_some_episodes(self, foundation_tree):
|
||||
season = foundation_tree / "Foundation.2021.S01.1080p.WEBRip.x265-RARBG"
|
||||
forced = sorted(season.glob("*.eng.forced.srt"))
|
||||
assert len(forced) == 6 # E04 through E09
|
||||
|
||||
def test_serialize_yaml_can_be_written_alongside(self, foundation_tree):
|
||||
"""Write the sidecar next to the show folder and read it back."""
|
||||
sidecar_path = foundation_tree / ".alfred"
|
||||
sidecar_path.write_text(
|
||||
yaml.safe_dump(serialize(_foundation_sidecar()), sort_keys=False)
|
||||
)
|
||||
recovered = deserialize(yaml.safe_load(sidecar_path.read_text()))
|
||||
assert recovered == _foundation_sidecar()
|
||||
Reference in New Issue
Block a user