From e65c1df2291cd7e085f4b77eb9fae0f00f56de7c Mon Sep 17 00:00:00 2001 From: Francwa Date: Mon, 25 May 2026 16:01:39 +0200 Subject: [PATCH] =?UTF-8?q?feat(.alfred=20v2=20=E2=80=94=20Phase=202):=20P?= =?UTF-8?q?ydantic=20sidecars,=20atomic=20repos,=20auto-heal=20index?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Spec: specs/dot_alfred_v2.md (Phase 2). New package alfred/infrastructure/persistence/dot_alfred/v2/: * sidecar_release.py / sidecar_root.py — Pydantic DTOs (extra="forbid", frozen=True) for per-item sidecars and the library-root index. schema_version enforced via model_validator. * serializer.py — read_yaml / atomic_write_yaml (.tmp + os.replace). SidecarSchemaError wraps YAML + Pydantic errors uniformly. * bridge.py — lossless domain <-> sidecar for SeriesRelease / MovieRelease; projection-only show_index_entry_from / movie_index_entry_from with multi-episode-file flattening. * repository.py — DotAlfredSeriesReleaseRepository / DotAlfredMovieReleaseRepository (log+skip on corruption), DotAlfredTVShowLibraryIndex / DotAlfredMovieLibraryIndex with silent auto-heal on missing/corrupt index reads. Writes never auto-heal (read paths handle that). TMDB client extensions: * TmdbSeasonInfo / TmdbShowInfo DTOs + pure parse_tv_show_info. * TMDBClient.get_tv_show_info aggregates /tv/{id} + /tv/{id}/external_ids. Domain change: * SubtitleTrack gains is_sdh: bool = False, populated from ffprobe's hearing_impaired disposition. Required for v2 sidecar parity (spec replaces v1's type: "sdh" with explicit flag). Default keeps every existing caller unchanged. Tests: 37 new v2 integration tests on tmp_path (round-trips, atomic writes, schema mismatch handling, anchor warnings, auto-heal paths) plus 16 TMDB DTO tests. Full suite: 1240 -> 1277 passed. Implementation notes filed in .claude/specs/dot_alfred_v2_notes.md (strict=True trade-off, upsert signature deviation from spec, etc.). Phases 3-5 (TVShow/Movie refactor to TMDB-only, rescan_show rewrite, v1 deletion + wiring) are next. --- CHANGELOG.md | 53 ++ alfred/domain/shared/media.py | 12 +- alfred/infrastructure/api/tmdb/client.py | 25 +- alfred/infrastructure/api/tmdb/dto.py | 132 +++- .../persistence/dot_alfred/v2/__init__.py | 59 ++ .../persistence/dot_alfred/v2/bridge.py | 318 +++++++++ .../persistence/dot_alfred/v2/repository.py | 658 ++++++++++++++++++ .../persistence/dot_alfred/v2/serializer.py | 76 ++ .../dot_alfred/v2/sidecar_release.py | 167 +++++ .../persistence/dot_alfred/v2/sidecar_root.py | 148 ++++ alfred/infrastructure/probe/ffprobe_prober.py | 1 + tests/infrastructure/api/test_tmdb_client.py | 57 ++ tests/infrastructure/api/test_tmdb_dto.py | 168 +++++ .../persistence/dot_alfred/v2/__init__.py | 0 .../persistence/dot_alfred/v2/conftest.py | 200 ++++++ .../dot_alfred/v2/test_library_index.py | 266 +++++++ .../dot_alfred/v2/test_release_repository.py | 137 ++++ .../dot_alfred/v2/test_round_trip.py | 91 +++ 18 files changed, 2565 insertions(+), 3 deletions(-) create mode 100644 alfred/infrastructure/persistence/dot_alfred/v2/__init__.py create mode 100644 alfred/infrastructure/persistence/dot_alfred/v2/bridge.py create mode 100644 alfred/infrastructure/persistence/dot_alfred/v2/repository.py create mode 100644 alfred/infrastructure/persistence/dot_alfred/v2/serializer.py create mode 100644 alfred/infrastructure/persistence/dot_alfred/v2/sidecar_release.py create mode 100644 alfred/infrastructure/persistence/dot_alfred/v2/sidecar_root.py create mode 100644 tests/infrastructure/api/test_tmdb_dto.py create mode 100644 tests/infrastructure/persistence/dot_alfred/v2/__init__.py create mode 100644 tests/infrastructure/persistence/dot_alfred/v2/conftest.py create mode 100644 tests/infrastructure/persistence/dot_alfred/v2/test_library_index.py create mode 100644 tests/infrastructure/persistence/dot_alfred/v2/test_release_repository.py create mode 100644 tests/infrastructure/persistence/dot_alfred/v2/test_round_trip.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 75d48f2..ad771a1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,6 +17,59 @@ callers). ### Added +- **`.alfred` v2 — Phase 2: new persistence package + TMDB client + extensions.** Second phase of `specs/dot_alfred_v2.md` on branch + `refactor/dot-alfred-v2`. The new + `alfred/infrastructure/persistence/dot_alfred/v2/` package ships + the full v2 sidecar stack while leaving v1 (and the existing + `TVShow` aggregate) untouched — Phase 3 is the cutover. + - **Pydantic DTOs** — `SeriesReleaseSidecar` / + `MovieReleaseSidecar` (per-item), `TVShowLibraryIndexSidecar` / + `MovieLibraryIndexSidecar` (library-root index). All built on a + common `_Strict` base (`extra="forbid"`, `frozen=True`) with a + `@model_validator` enforcing `schema_version == 1`. + - **Track entries** — `AudioTrackEntry` / `SubtitleEntry` (sidecar + cache shape, slimmed from the domain track types). `SubtitleEntry` + carries `is_forced` + `is_sdh` as explicit booleans (v1's + `type: "sdh"` overload is gone). + - **Serializer** — `read_yaml` / `atomic_write_yaml` helpers + centralize YAML I/O and atomic writes (`.tmp + os.replace`). + `SidecarSchemaError` wraps both YAML parse errors and Pydantic + validation errors for uniform catch-and-skip semantics. + - **Bridge** — lossless `domain ↔ sidecar` conversion for + `SeriesRelease` / `MovieRelease` (round-trippable, including + multi-episode ranges and `is_sdh` subtitles); one-way projection + for library-index entries (`show_index_entry_from`, + `movie_index_entry_from`) that flattens multi-episode files into + per-TMDB-slot maps in `seasons[*].episodes`. + - **Repositories** — + `DotAlfredSeriesReleaseRepository` / + `DotAlfredMovieReleaseRepository` walk `library_root/*/` with + log+skip on corruption; **`DotAlfredTVShowLibraryIndex`** / + **`DotAlfredMovieLibraryIndex`** auto-heal silently on missing or + corrupt index files by rebuilding from the per-item sidecars + (healed entries keep TMDB-cached fields as placeholders until the + next sync repopulates them). Writes are atomic and never auto-heal + (read paths handle that). + - **TMDB client extensions** — `TmdbSeasonInfo` / `TmdbShowInfo` + DTOs + `TMDBClient.get_tv_show_info(tmdb_id)` aggregating + `/tv/{id}` + `/tv/{id}/external_ids`. The parsing logic is a pure + function (`parse_tv_show_info`) testable without HTTP, with an + injectable reference date for deterministic `aired` flag tests. +- **`is_sdh` flag on `SubtitleTrack`.** Added to + `alfred/domain/shared/media.py::SubtitleTrack` to mirror ffprobe's + `hearing_impaired` disposition. Wired through the ffprobe layer + (`ffprobe_prober.py`) and the v2 sidecar bridge so SDH information + round-trips end-to-end. Defaults to `False` — backwards-compatible + for every existing caller. +- **37 v2 integration tests** on `tmp_path` covering round-trips + (domain ↔ sidecar ↔ YAML ↔ domain), atomic writes (no `.tmp` + leftovers), per-item log+skip on corruption / schema mismatch, + movie anchor-mismatch warning, full upsert / find / delete on both + library indexes, and the auto-heal path on missing / corrupt / + schema-mismatched index files. **16 TMDB DTO tests** for the new + `parse_tv_show_info` pure function. + - **`.alfred` v2 — Phase 1: new `releases/` domain.** First step of `specs/dot_alfred_v2.md` on branch `refactor/dot-alfred-v2`. The new `alfred/domain/releases/` package introduces a filesystem-only diff --git a/alfred/domain/shared/media.py b/alfred/domain/shared/media.py index cd142c2..1497da7 100644 --- a/alfred/domain/shared/media.py +++ b/alfred/domain/shared/media.py @@ -44,13 +44,23 @@ class AudioTrack: @dataclass(frozen=True) class SubtitleTrack: - """A single embedded subtitle track as reported by ffprobe.""" + """A single embedded subtitle track as reported by ffprobe. + + ffprobe reports ``forced`` / ``default`` / ``hearing_impaired`` as + independent disposition flags — we mirror that shape directly. + ``is_sdh`` flags hearing-impaired tracks (called "SDH" in the + Anglo-Saxon distribution world: subtitles for the deaf and hard of + hearing, with non-speech audio cues). v2 ``.alfred`` sidecars + persist this flag explicitly; v1's ``type: "sdh"`` string overload + is gone. + """ index: int codec: str | None # subrip, ass, hdmv_pgs_subtitle, … language: str | None # ISO 639-2: fre, eng, und, … is_default: bool = False is_forced: bool = False + is_sdh: bool = False @dataclass(frozen=True) diff --git a/alfred/infrastructure/api/tmdb/client.py b/alfred/infrastructure/api/tmdb/client.py index 32b8d36..18e19c7 100644 --- a/alfred/infrastructure/api/tmdb/client.py +++ b/alfred/infrastructure/api/tmdb/client.py @@ -8,7 +8,7 @@ from requests.exceptions import HTTPError, RequestException, Timeout from alfred.settings import Settings, settings -from .dto import MediaResult +from .dto import MediaResult, TmdbShowInfo, parse_tv_show_info from .exceptions import ( TMDBAPIError, TMDBConfigurationError, @@ -279,6 +279,29 @@ class TMDBClient: """ return self._make_request(f"/tv/{tv_id}") + def get_tv_show_info(self, tmdb_id: int) -> TmdbShowInfo: + """ + Aggregate ``/tv/{id}`` + ``/tv/{id}/external_ids`` into a + :class:`TmdbShowInfo` — the shape consumed by the v2 library-root + index cache. + + Args: + tmdb_id: TMDB TV show ID. + + Returns: + :class:`TmdbShowInfo` with ``imdb_id`` (when available), + ``status``, and one :class:`TmdbSeasonInfo` per season. + + Raises: + TMDBAPIError: if either HTTP call fails. + TMDBNotFoundError: if the show id is unknown. + ValueError: if the TMDB payload is missing required fields + (``id``, ``name``, ``status``). + """ + details = self.get_tv_details(tmdb_id) + external = self.get_external_ids("tv", tmdb_id) + return parse_tv_show_info(details, external) + def is_configured(self) -> bool: """ Check if TMDB client is properly configured. diff --git a/alfred/infrastructure/api/tmdb/dto.py b/alfred/infrastructure/api/tmdb/dto.py index 2c70eb9..211545e 100644 --- a/alfred/infrastructure/api/tmdb/dto.py +++ b/alfred/infrastructure/api/tmdb/dto.py @@ -1,6 +1,10 @@ """TMDB Data Transfer Objects.""" -from dataclasses import dataclass +from __future__ import annotations + +from dataclasses import dataclass, field +from datetime import date +from typing import Any @dataclass @@ -26,3 +30,129 @@ class ExternalIds: facebook_id: str | None = None instagram_id: str | None = None twitter_id: str | None = None + + +# ──────────────────────────────────────────────────────────────────────────── +# TV show details — used by the v2 library-root index cache +# ──────────────────────────────────────────────────────────────────────────── + + +@dataclass(frozen=True) +class TmdbSeasonInfo: + """One season block extracted from the ``/tv/{id}`` payload. + + ``aired`` is derived from ``air_date`` vs the current date (``today``), + not stored directly by TMDB. A season with no ``air_date`` is treated + as not yet aired. + """ + + number: int + episode_count: int + aired: bool + + +@dataclass(frozen=True) +class TmdbShowInfo: + """TMDB-cached identity for one TV show. + + Populated by :meth:`TMDBClient.get_tv_show_info`. Carries only the + fields the v2 library index needs to cache; richer details remain + on-demand via the raw client methods. + """ + + tmdb_id: int + imdb_id: str | None + name: str + status: str + seasons: tuple[TmdbSeasonInfo, ...] = field(default_factory=tuple) + + +def parse_tv_show_info( + details: dict[str, Any], + external_ids: dict[str, Any], + *, + today: date | None = None, +) -> TmdbShowInfo: + """Build a :class:`TmdbShowInfo` from raw TMDB payloads. + + Pure function — no HTTP, no I/O. The HTTP layer + (:meth:`TMDBClient.get_tv_show_info`) calls this with the JSON it + received from TMDB. + + Args: + details: payload from ``/tv/{tmdb_id}`` (``name``, ``status``, + ``seasons`` …). + external_ids: payload from ``/tv/{tmdb_id}/external_ids`` + (``imdb_id`` mostly). + today: optional reference date for the ``aired`` flag — defaults + to :meth:`date.today`. Injectable for deterministic tests. + + Raises: + ValueError: if a required field (``id``, ``name``, ``status``) + is missing from ``details``. + """ + ref = today or date.today() + + tmdb_id = details.get("id") + if not isinstance(tmdb_id, int): + raise ValueError( + f"TMDB show payload missing/invalid 'id': {tmdb_id!r}" + ) + name = details.get("name") + if not isinstance(name, str) or not name: + raise ValueError( + f"TMDB show payload missing/invalid 'name': {name!r}" + ) + status = details.get("status") + if not isinstance(status, str) or not status: + raise ValueError( + f"TMDB show payload missing/invalid 'status': {status!r}" + ) + + imdb_id_raw = external_ids.get("imdb_id") + imdb_id = imdb_id_raw if isinstance(imdb_id_raw, str) and imdb_id_raw else None + + seasons_raw = details.get("seasons", []) or [] + seasons = tuple(_parse_season(s, ref) for s in seasons_raw) + + return TmdbShowInfo( + tmdb_id=tmdb_id, + imdb_id=imdb_id, + name=name, + status=status, + seasons=seasons, + ) + + +def _parse_season(raw: dict[str, Any], today: date) -> TmdbSeasonInfo: + number = raw.get("season_number") + if not isinstance(number, int): + raise ValueError( + f"TMDB season missing/invalid 'season_number': {number!r}" + ) + episode_count = raw.get("episode_count") + if not isinstance(episode_count, int): + raise ValueError( + f"TMDB season missing/invalid 'episode_count': {episode_count!r}" + ) + air_date_raw = raw.get("air_date") + aired = _is_aired(air_date_raw, today) + return TmdbSeasonInfo( + number=number, + episode_count=episode_count, + aired=aired, + ) + + +def _is_aired(air_date_raw: Any, today: date) -> bool: + """True if ``air_date_raw`` (YYYY-MM-DD string) is on or before ``today``. + + An empty / missing / unparseable air_date counts as not yet aired — + consistent with TMDB's behavior for announced-but-unscheduled seasons. + """ + if not isinstance(air_date_raw, str) or not air_date_raw: + return False + try: + return date.fromisoformat(air_date_raw) <= today + except ValueError: + return False diff --git a/alfred/infrastructure/persistence/dot_alfred/v2/__init__.py b/alfred/infrastructure/persistence/dot_alfred/v2/__init__.py new file mode 100644 index 0000000..b5e590d --- /dev/null +++ b/alfred/infrastructure/persistence/dot_alfred/v2/__init__.py @@ -0,0 +1,59 @@ +"""`.alfred` v2 — strict Pydantic sidecars + library-root index. + +v2 separates two kinds of files: + +* **Per-item sidecars** (``/.alfred``, ``/.alfred``) — + pure release data: what's physically on disk. +* **Library-root index** (``tv_shows/.alfred.index``, + ``movies/.alfred.index``) — TMDB-cached identity + grep-friendly + folder/path projection. + +All DTOs are Pydantic v2 models with strict validation +(``extra="forbid"`` + explicit ``schema_version`` check). The +serializer handles YAML I/O and atomic writes; the bridge translates +between DTOs and domain aggregates. + +See ``.claude/specs/dot_alfred_v2.md`` for the full schema. +""" + +from .serializer import ( + SCHEMA_VERSION, + SidecarSchemaError, + atomic_write_yaml, + read_yaml, +) +from .sidecar_release import ( + AudioTrackEntry, + EpisodeReleaseEntry, + MovieReleaseSidecar, + SeasonReleaseEntry, + SeriesReleaseSidecar, + SubtitleEntry, +) +from .sidecar_root import ( + MovieIndexEntry, + MovieLibraryIndexSidecar, + SeasonIndexEntry, + ShowIndexEntry, + ShowIndexMetadata, + TVShowLibraryIndexSidecar, +) + +__all__ = [ + "SCHEMA_VERSION", + "AudioTrackEntry", + "EpisodeReleaseEntry", + "MovieIndexEntry", + "MovieLibraryIndexSidecar", + "MovieReleaseSidecar", + "SeasonIndexEntry", + "SeasonReleaseEntry", + "SeriesReleaseSidecar", + "ShowIndexEntry", + "ShowIndexMetadata", + "SidecarSchemaError", + "SubtitleEntry", + "TVShowLibraryIndexSidecar", + "atomic_write_yaml", + "read_yaml", +] diff --git a/alfred/infrastructure/persistence/dot_alfred/v2/bridge.py b/alfred/infrastructure/persistence/dot_alfred/v2/bridge.py new file mode 100644 index 0000000..029897c --- /dev/null +++ b/alfred/infrastructure/persistence/dot_alfred/v2/bridge.py @@ -0,0 +1,318 @@ +"""Translate between v2 sidecar DTOs and domain aggregates. + +Four conversion families, each lossless in both directions where it +makes sense: + +* :class:`SeriesRelease` ↔ :class:`SeriesReleaseSidecar` — per-show + release file (round-trippable). +* :class:`MovieRelease` ↔ :class:`MovieReleaseSidecar` — per-movie + release file (round-trippable). +* :class:`TmdbShowInfo` + :class:`SeriesRelease` → :class:`ShowIndexEntry` + — projection only (the index entry combines TMDB cache + flattened + release episodes map; the reverse direction is not needed because the + per-show sidecar is the source of truth for releases and TMDB calls + populate identity). +* (movie identity, release_year) + :class:`MovieRelease` → + :class:`MovieIndexEntry` — same shape. + +The bridge owns: + +* Pydantic DTO ↔ domain VO unwrapping (``TmdbId`` ↔ ``int``, + ``ImdbId`` ↔ ``str | None``, ``EpisodeNumber`` ↔ ``int`` …). +* The track-shape diff between :class:`AudioTrack` / + :class:`SubtitleTrack` (domain, ffprobe-shaped with ``index`` and + ``is_default``) and :class:`AudioTrackEntry` / :class:`SubtitleEntry` + (sidecar cache, slimmed). ``index`` and ``is_default`` are not + persisted; ``is_sdh`` is persisted on subtitles. +* The episode-slot flattening — one physical file with + ``EpisodeRange(start=2, end=3)`` becomes two entries in the + ``ShowIndexEntry.seasons[*].episodes`` map (``"E02"`` and ``"E03"``, + both pointing at the same path). +""" + +from __future__ import annotations + +from datetime import datetime + +from ....api.tmdb.dto import TmdbShowInfo +from .....domain.releases.entities import ( + EpisodeRelease, + MovieRelease, + SeasonRelease, + SeriesRelease, + TrackProfile, +) +from .....domain.releases.value_objects import EpisodeRange, ReleaseMode +from .....domain.shared.media import AudioTrack, SubtitleTrack +from .....domain.shared.value_objects import FilePath, ImdbId, TmdbId +from .....domain.tv_shows.value_objects import EpisodeNumber, SeasonNumber +from .sidecar_release import ( + SCHEMA_VERSION, + AudioTrackEntry, + EpisodeReleaseEntry, + MovieReleaseSidecar, + SeasonReleaseEntry, + SeriesReleaseSidecar, + SubtitleEntry, +) +from .sidecar_root import ( + MovieIndexEntry, + SeasonIndexEntry, + ShowIndexEntry, + ShowIndexMetadata, +) + +__all__ = [ + "movie_index_entry_from", + "movie_release_from_sidecar", + "movie_release_to_sidecar", + "series_release_from_sidecar", + "series_release_to_sidecar", + "show_index_entry_from", +] + + +# ════════════════════════════════════════════════════════════════════════════ +# Series release — domain ↔ sidecar (round-trippable) +# ════════════════════════════════════════════════════════════════════════════ + + +def series_release_to_sidecar(release: SeriesRelease) -> SeriesReleaseSidecar: + """Project a :class:`SeriesRelease` to its per-show sidecar DTO.""" + return SeriesReleaseSidecar( + schema_version=SCHEMA_VERSION, + tmdb_id=release.tmdb_id.value, + imdb_id=str(release.imdb_id) if release.imdb_id is not None else None, + releases=tuple(_season_to_entry(s) for s in release.seasons), + ) + + +def series_release_from_sidecar(sidecar: SeriesReleaseSidecar) -> SeriesRelease: + """Rebuild a :class:`SeriesRelease` from its per-show sidecar DTO.""" + return SeriesRelease( + tmdb_id=TmdbId(sidecar.tmdb_id), + imdb_id=ImdbId(sidecar.imdb_id) if sidecar.imdb_id else None, + seasons=tuple(_season_from_entry(s) for s in sidecar.releases), + ) + + +def _season_to_entry(season: SeasonRelease) -> SeasonReleaseEntry: + return SeasonReleaseEntry( + season=season.season_number.value, + mode=season.mode, + folder=season.folder, + episodes=tuple(_episode_to_entry(ep) for ep in season.episodes), + ) + + +def _season_from_entry(entry: SeasonReleaseEntry) -> SeasonRelease: + return SeasonRelease( + season_number=SeasonNumber(entry.season), + folder=entry.folder, + mode=ReleaseMode(entry.mode), + episodes=tuple(_episode_from_entry(ep) for ep in entry.episodes), + ) + + +def _episode_to_entry(episode: EpisodeRelease) -> EpisodeReleaseEntry: + return EpisodeReleaseEntry( + start=episode.episodes.start.value, + end=episode.episodes.end.value, + file=str(episode.file_path), + audio=tuple(_audio_to_entry(t) for t in episode.tracks.audio_tracks), + subtitles=tuple(_sub_to_entry(t) for t in episode.tracks.subtitle_tracks), + ) + + +def _episode_from_entry(entry: EpisodeReleaseEntry) -> EpisodeRelease: + return EpisodeRelease( + episodes=EpisodeRange( + start=EpisodeNumber(entry.start), + end=EpisodeNumber(entry.end), + ), + file_path=FilePath(entry.file), + tracks=TrackProfile( + audio_tracks=tuple(_audio_from_entry(a, i) for i, a in enumerate(entry.audio)), + subtitle_tracks=tuple( + _sub_from_entry(s, i) for i, s in enumerate(entry.subtitles) + ), + ), + ) + + +# ════════════════════════════════════════════════════════════════════════════ +# Movie release — domain ↔ sidecar (round-trippable) +# ════════════════════════════════════════════════════════════════════════════ + + +def movie_release_to_sidecar(release: MovieRelease) -> MovieReleaseSidecar: + """Project a :class:`MovieRelease` to its per-movie sidecar DTO.""" + return MovieReleaseSidecar( + schema_version=SCHEMA_VERSION, + tmdb_id=release.tmdb_id.value, + imdb_id=str(release.imdb_id) if release.imdb_id is not None else None, + folder=release.folder, + file=str(release.file_path), + audio=tuple(_audio_to_entry(t) for t in release.tracks.audio_tracks), + subtitles=tuple(_sub_to_entry(t) for t in release.tracks.subtitle_tracks), + ) + + +def movie_release_from_sidecar(sidecar: MovieReleaseSidecar) -> MovieRelease: + """Rebuild a :class:`MovieRelease` from its per-movie sidecar DTO.""" + return MovieRelease( + tmdb_id=TmdbId(sidecar.tmdb_id), + imdb_id=ImdbId(sidecar.imdb_id) if sidecar.imdb_id else None, + folder=sidecar.folder, + file_path=FilePath(sidecar.file), + tracks=TrackProfile( + audio_tracks=tuple( + _audio_from_entry(a, i) for i, a in enumerate(sidecar.audio) + ), + subtitle_tracks=tuple( + _sub_from_entry(s, i) for i, s in enumerate(sidecar.subtitles) + ), + ), + ) + + +# ════════════════════════════════════════════════════════════════════════════ +# Library-root index — projection only +# ════════════════════════════════════════════════════════════════════════════ + + +def show_index_entry_from( + info: TmdbShowInfo, + release: SeriesRelease | None, + *, + path: str, + fetched_at: datetime, +) -> ShowIndexEntry: + """Combine a TMDB cache snapshot + (optional) on-disk release into one + library-index entry. + + The TMDB DTO supplies identity and per-season ``episode_count`` + + ``aired`` flags. The release (if present) supplies the + ``episodes:`` slot map per season — flattened so a multi-episode + file appears under every TMDB slot it covers. + + A season exists in the index entry only if TMDB knows about it. + A release season unknown to TMDB (e.g. a manually-added "Season 0" + specials folder) is silently ignored at the index level — it's + still recorded faithfully in the per-show release sidecar, which + is the source of truth for what's on disk. + """ + release_by_season = ( + {s.season_number.value: s for s in release.seasons} if release else {} + ) + seasons = tuple( + SeasonIndexEntry( + number=s.number, + episode_count=s.episode_count, + aired=s.aired, + episodes=_flatten_season_episodes(release_by_season.get(s.number)), + ) + for s in info.seasons + ) + return ShowIndexEntry( + tmdb_id=info.tmdb_id, + imdb_id=info.imdb_id, + name=info.name, + status=info.status, + metadata=ShowIndexMetadata(path=path, fetched_at=fetched_at), + seasons=seasons, + ) + + +def movie_index_entry_from( + release: MovieRelease, + *, + name: str, + release_year: int | None, + path: str, + fetched_at: datetime, +) -> MovieIndexEntry: + """Project a movie release + identity facts into one library-index entry. + + Movies don't have a ``TmdbMovieInfo`` DTO yet (no per-movie TMDB + cache surface defined in Phase 2), so identity facts are passed + explicitly by the caller. The release supplies ``tmdb_id`` / + ``imdb_id``; ``name`` and ``release_year`` come from the caller's + TMDB lookup (or a future ``TmdbMovieInfo`` DTO when one ships). + """ + return MovieIndexEntry( + tmdb_id=release.tmdb_id.value, + imdb_id=str(release.imdb_id) if release.imdb_id is not None else None, + name=name, + release_year=release_year, + metadata=ShowIndexMetadata(path=path, fetched_at=fetched_at), + ) + + +def _flatten_season_episodes(season: SeasonRelease | None) -> dict[str, str]: + """Build the per-TMDB-slot ``{"E01": path, "E02": path …}`` map. + + A multi-episode file (``EpisodeRange(start=2, end=3)``) appears + twice — once under ``"E02"`` and once under ``"E03"`` — with the + same path. This duplication is intentional (see the spec's + *Trade-offs* section): symmetric reads, grep-friendly lookups. + """ + if season is None: + return {} + out: dict[str, str] = {} + for ep in season.episodes: + path = str(ep.file_path) + for n in ep.episodes.numbers(): + out[f"E{n.value:02d}"] = path + return out + + +# ════════════════════════════════════════════════════════════════════════════ +# Track conversions — sidecar entry ↔ domain track +# ════════════════════════════════════════════════════════════════════════════ +# +# The domain tracks (``AudioTrack`` / ``SubtitleTrack``) carry an +# ``index`` (ffprobe stream index) and ``is_default`` flag that are +# **not** persisted to the sidecar — the sidecar is a cache, not a +# full ffprobe dump. On read-back we synthesize ``index`` from the +# list position and default ``is_default`` to False; callers that +# need the original ffprobe view should re-probe. + + +def _audio_to_entry(track: AudioTrack) -> AudioTrackEntry: + return AudioTrackEntry( + codec=track.codec, + channels=track.channels, + channel_layout=track.channel_layout, + language=track.language, + ) + + +def _audio_from_entry(entry: AudioTrackEntry, index: int) -> AudioTrack: + return AudioTrack( + index=index, + codec=entry.codec, + channels=entry.channels, + channel_layout=entry.channel_layout, + language=entry.language, + ) + + +def _sub_to_entry(track: SubtitleTrack) -> SubtitleEntry: + return SubtitleEntry( + codec=track.codec, + language=track.language, + is_forced=track.is_forced, + is_sdh=track.is_sdh, + ) + + +def _sub_from_entry(entry: SubtitleEntry, index: int) -> SubtitleTrack: + return SubtitleTrack( + index=index, + codec=entry.codec, + language=entry.language, + is_default=False, + is_forced=entry.is_forced, + is_sdh=entry.is_sdh, + ) diff --git a/alfred/infrastructure/persistence/dot_alfred/v2/repository.py b/alfred/infrastructure/persistence/dot_alfred/v2/repository.py new file mode 100644 index 0000000..8d64e63 --- /dev/null +++ b/alfred/infrastructure/persistence/dot_alfred/v2/repository.py @@ -0,0 +1,658 @@ +"""Filesystem-backed implementations of the v2 ``.alfred`` repositories. + +Four concrete classes — two per media type — back the abstract ports +defined in the domain (``SeriesReleaseRepository`` etc., wired in +Phase 5): + +* :class:`DotAlfredSeriesReleaseRepository` — per-show ``.alfred``. +* :class:`DotAlfredMovieReleaseRepository` — per-movie ``.alfred``. +* :class:`DotAlfredTVShowLibraryIndex` — ``tv_shows/.alfred.index`` + (with auto-heal on missing/corrupt reads). +* :class:`DotAlfredMovieLibraryIndex` — ``movies/.alfred.index`` + (same auto-heal behavior). + +Validation is delegated to the Pydantic DTOs (strict, ``extra=forbid``). +The repository layer only handles: + +* YAML I/O (via :mod:`.serializer`). +* Atomic writes (``.tmp + os.replace``). +* Per-item log+skip on corruption (a single bad per-show sidecar must + not break the rest of the library walk). +* Auto-heal on the library index: when the index file is absent or + fails schema validation on load, the index is silently rebuilt by + walking ``library_root/*/`` and reading each per-item sidecar (the + source of truth for ``tmdb_id`` / ``imdb_id``). TMDB-cached fields + (``name``, ``status``, ``episode_count``, ``aired``) are left empty + until the next sync repopulates them. +* Anchor-mismatch warning: a per-show sidecar that lives at folder + ``X`` but whose index entry says ``path: Y`` triggers a warning log + (and the heal path on the next mismatch is the caller's call). +""" + +from __future__ import annotations + +import logging +from datetime import UTC, datetime +from pathlib import Path + +from pydantic import ValidationError + +from ....api.tmdb.dto import TmdbShowInfo +from .....domain.releases.entities import MovieRelease, SeriesRelease +from .....domain.shared.value_objects import ImdbId, TmdbId +from .bridge import ( + movie_index_entry_from, + movie_release_from_sidecar, + movie_release_to_sidecar, + series_release_from_sidecar, + series_release_to_sidecar, + show_index_entry_from, +) +from .serializer import ( + SCHEMA_VERSION, + SidecarSchemaError, + atomic_write_yaml, + read_yaml, +) +from .sidecar_release import MovieReleaseSidecar, SeriesReleaseSidecar +from .sidecar_root import ( + MovieIndexEntry, + MovieLibraryIndexSidecar, + ShowIndexEntry, + TVShowLibraryIndexSidecar, +) + +logger = logging.getLogger(__name__) + +SIDECAR_FILENAME = ".alfred" +INDEX_FILENAME = ".alfred.index" + + +__all__ = [ + "DotAlfredMovieLibraryIndex", + "DotAlfredMovieReleaseRepository", + "DotAlfredSeriesReleaseRepository", + "DotAlfredTVShowLibraryIndex", + "ShowFolderUnknown", +] + + +class ShowFolderUnknown(LookupError): + """Raised by release repos when ``save()`` is called with a path + that doesn't exist on disk. + + Repositories never invent folders — the upstream ``MediaOrganizer`` + is responsible for placing files first. ``save()`` only writes the + sidecar next to them. + """ + + +# ════════════════════════════════════════════════════════════════════════════ +# Series release repository — per-show ``.alfred`` +# ════════════════════════════════════════════════════════════════════════════ + + +class DotAlfredSeriesReleaseRepository: + """Per-show ``.alfred`` reader/writer. + + Args: + library_root: ``tv_shows/`` directory, containing one folder + per show. + + The repository walks ``library_root/*/`` on each read; there is no + in-memory cache. Reads that fail (missing file, corrupt YAML, + schema mismatch) are logged and skipped — one bad sidecar does + not break the rest of the library walk. + """ + + def __init__(self, library_root: Path) -> None: + self._library_root = Path(library_root) + + # ── Reads ─────────────────────────────────────────────────────────────── + + def find_by_tmdb_id(self, tmdb_id: TmdbId) -> SeriesRelease | None: + """Return the release whose sidecar carries ``tmdb_id``, or ``None``.""" + for _folder, release in self._iter_library(): + if release.tmdb_id == tmdb_id: + return release + return None + + def find_all(self) -> list[SeriesRelease]: + """Return every readable release under ``library_root/``. + + Order matches sorted folder name (deterministic across runs). + Corrupt sidecars are skipped with a warning log. + """ + return [release for _folder, release in self._iter_library()] + + # ── Writes ────────────────────────────────────────────────────────────── + + def save(self, release: SeriesRelease, *, show_folder: str) -> None: + """Atomically write ``release`` to ``/.alfred``. + + Args: + release: the aggregate to persist. + show_folder: folder name **relative to** ``library_root`` + (e.g. ``"Foundation"``). The caller is responsible for + knowing the folder — repositories never invent one. + + Raises: + ShowFolderUnknown: if ``library_root/show_folder`` does + not exist on disk. + """ + show_dir = self._library_root / show_folder + if not show_dir.is_dir(): + raise ShowFolderUnknown( + f"show folder does not exist on disk: {show_dir}" + ) + sidecar = series_release_to_sidecar(release) + atomic_write_yaml(show_dir / SIDECAR_FILENAME, _dump_model(sidecar)) + + def delete(self, tmdb_id: TmdbId) -> bool: + """Delete the sidecar of the show with ``tmdb_id``. + + Returns ``True`` if a sidecar was found and removed, ``False`` + otherwise. The folder itself is left untouched. + """ + for folder, release in self._iter_library(): + if release.tmdb_id == tmdb_id: + (self._library_root / folder / SIDECAR_FILENAME).unlink() + return True + return False + + # ── Internals ─────────────────────────────────────────────────────────── + + def _iter_library(self): + """Yield ``(folder_name, SeriesRelease)`` for every readable sidecar.""" + if not self._library_root.is_dir(): + return + for entry in sorted(self._library_root.iterdir()): + if not entry.is_dir(): + continue + sidecar_path = entry / SIDECAR_FILENAME + if not sidecar_path.is_file(): + continue + release = _load_series_release(sidecar_path, expected_folder=entry.name) + if release is not None: + yield entry.name, release + + +# ════════════════════════════════════════════════════════════════════════════ +# Movie release repository — per-movie ``.alfred`` +# ════════════════════════════════════════════════════════════════════════════ + + +class DotAlfredMovieReleaseRepository: + """Per-movie ``.alfred`` reader/writer. + + Mirrors :class:`DotAlfredSeriesReleaseRepository` for the movies + library — same walk-and-skip strategy, same atomic-write contract. + """ + + def __init__(self, library_root: Path) -> None: + self._library_root = Path(library_root) + + def find_by_tmdb_id(self, tmdb_id: TmdbId) -> MovieRelease | None: + for _folder, release in self._iter_library(): + if release.tmdb_id == tmdb_id: + return release + return None + + def find_all(self) -> list[MovieRelease]: + return [release for _folder, release in self._iter_library()] + + def save(self, release: MovieRelease) -> None: + """Atomically write ``release`` to ``/.alfred``. + + Unlike :class:`DotAlfredSeriesReleaseRepository.save`, the + folder is carried by the aggregate itself (movies are + one-folder-one-file by convention), so no separate parameter + is needed. + + Raises: + ShowFolderUnknown: if ``library_root/release.folder`` does + not exist on disk. + """ + movie_dir = self._library_root / release.folder + if not movie_dir.is_dir(): + raise ShowFolderUnknown( + f"movie folder does not exist on disk: {movie_dir}" + ) + sidecar = movie_release_to_sidecar(release) + atomic_write_yaml(movie_dir / SIDECAR_FILENAME, _dump_model(sidecar)) + + def delete(self, tmdb_id: TmdbId) -> bool: + for folder, release in self._iter_library(): + if release.tmdb_id == tmdb_id: + (self._library_root / folder / SIDECAR_FILENAME).unlink() + return True + return False + + def _iter_library(self): + if not self._library_root.is_dir(): + return + for entry in sorted(self._library_root.iterdir()): + if not entry.is_dir(): + continue + sidecar_path = entry / SIDECAR_FILENAME + if not sidecar_path.is_file(): + continue + release = _load_movie_release(sidecar_path, expected_folder=entry.name) + if release is not None: + yield entry.name, release + + +# ════════════════════════════════════════════════════════════════════════════ +# TV library-root index — ``tv_shows/.alfred.index`` +# ════════════════════════════════════════════════════════════════════════════ + + +class DotAlfredTVShowLibraryIndex: + """Library-root index for TV shows. + + Persists :class:`TVShowLibraryIndexSidecar` to + ``library_root/.alfred.index``. On every read, if the index file + is absent or fails schema validation, the index is silently + rebuilt from the per-show sidecars (auto-heal). TMDB-cached + fields are left empty on healed entries — the next sync repopulates + them. + + Writes are not auto-healed (a caller who attempts to write to a + corrupt-then-fixed index gets a clean read first via any of the + ``find_*`` methods; the next ``upsert`` then writes a valid file). + """ + + def __init__( + self, + library_root: Path, + *, + release_repo: DotAlfredSeriesReleaseRepository | None = None, + ) -> None: + self._library_root = Path(library_root) + # The release repo is injected to enable auto-heal (it knows + # how to walk the per-show sidecars). Defaults to one over the + # same library_root so the index is self-sufficient. + self._release_repo = release_repo or DotAlfredSeriesReleaseRepository( + self._library_root + ) + + # ── Reads (auto-heal on missing/corrupt) ──────────────────────────────── + + def find_by_tmdb_id(self, tmdb_id: TmdbId) -> ShowIndexEntry | None: + for entry in self._load_or_heal().shows: + if entry.tmdb_id == tmdb_id.value: + return entry + return None + + def find_by_imdb_id(self, imdb_id: ImdbId) -> ShowIndexEntry | None: + needle = str(imdb_id) + for entry in self._load_or_heal().shows: + if entry.imdb_id == needle: + return entry + return None + + def find_by_path(self, path: str) -> ShowIndexEntry | None: + for entry in self._load_or_heal().shows: + if entry.metadata.path == path: + return entry + return None + + def find_all(self) -> tuple[ShowIndexEntry, ...]: + return self._load_or_heal().shows + + # ── Writes ────────────────────────────────────────────────────────────── + + def upsert( + self, + info: TmdbShowInfo, + release: SeriesRelease | None, + *, + path: str, + fetched_at: datetime, + ) -> None: + """Insert or replace the index entry for ``info.tmdb_id``. + + Args: + info: TMDB cache snapshot (identity + per-season episode + counts + aired flags). + release: optional on-disk release for the same show. When + present, its files are flattened into the per-season + ``episodes:`` slot map (multi-episode files appear + under each covered slot). When absent, the slot map is + empty (the show is "known to TMDB but nothing on disk + yet" — a legitimate state). + path: folder name relative to ``library_root``. + fetched_at: timestamp of the TMDB sync that produced + ``info``. Used by the TTL policy to decide future + refreshes. + + The write is atomic. If the index doesn't exist yet, it's + created. Concurrent readers see either the old version or the + new one, never a torn file. + """ + new_entry = show_index_entry_from( + info, release, path=path, fetched_at=fetched_at + ) + current = self._load_or_heal() + shows = tuple( + new_entry if e.tmdb_id == new_entry.tmdb_id else e + for e in current.shows + ) + if not any(e.tmdb_id == new_entry.tmdb_id for e in shows): + shows = (*shows, new_entry) + self._write(TVShowLibraryIndexSidecar( + schema_version=SCHEMA_VERSION, + shows=shows, + )) + + def delete(self, tmdb_id: TmdbId) -> bool: + """Remove the entry for ``tmdb_id`` if present.""" + current = self._load_or_heal() + kept = tuple(e for e in current.shows if e.tmdb_id != tmdb_id.value) + if len(kept) == len(current.shows): + return False + self._write(TVShowLibraryIndexSidecar( + schema_version=SCHEMA_VERSION, + shows=kept, + )) + return True + + def heal(self) -> TVShowLibraryIndexSidecar: + """Rebuild the index by walking the per-show sidecars. + + Public entry point so callers can force a heal after detecting + drift (e.g. ``index.find_by_path`` returned an entry whose + ``path`` no longer exists on disk). TMDB-cached fields on + healed entries are left empty (``name=""`` etc.) — the next + sync repopulates them. + + Returns the fresh in-memory index after writing it atomically. + """ + sidecar = self._build_from_releases() + self._write(sidecar) + return sidecar + + # ── Internals ─────────────────────────────────────────────────────────── + + @property + def _index_path(self) -> Path: + return self._library_root / INDEX_FILENAME + + def _load_or_heal(self) -> TVShowLibraryIndexSidecar: + """Return the parsed index, healing silently on missing/corrupt.""" + path = self._index_path + if not path.is_file(): + logger.info( + "library index missing at %s — healing from per-show sidecars", + path, + ) + return self.heal() + try: + raw = read_yaml(path) + return TVShowLibraryIndexSidecar.model_validate(raw) + except (SidecarSchemaError, ValidationError) as exc: + logger.warning( + "library index at %s is corrupt (%s) — healing", path, exc + ) + return self.heal() + + def _build_from_releases(self) -> TVShowLibraryIndexSidecar: + """Walk the per-show sidecars and synthesize an index. + + TMDB-cached fields (``name``, ``status``, seasons) are left + empty placeholders because we cannot fabricate them without a + TMDB round-trip — the next sync fills them in. The ``path`` + is recovered from the folder name, and ``fetched_at`` is set + to "now" so the TTL policy treats the entry as fresh-but-empty + until a sync runs. + """ + now = datetime.now(UTC) + shows: list[ShowIndexEntry] = [] + for folder, release in self._release_repo._iter_library(): + shows.append( + ShowIndexEntry( + tmdb_id=release.tmdb_id.value, + imdb_id=str(release.imdb_id) if release.imdb_id else None, + name=folder, # placeholder until TMDB sync supplies the real name + status="unknown", # placeholder until TMDB sync supplies status + metadata={ + "path": folder, + "fetched_at": now, + }, + seasons=(), + ) + ) + return TVShowLibraryIndexSidecar( + schema_version=SCHEMA_VERSION, + shows=tuple(shows), + ) + + def _write(self, sidecar: TVShowLibraryIndexSidecar) -> None: + self._library_root.mkdir(parents=True, exist_ok=True) + atomic_write_yaml(self._index_path, _dump_model(sidecar)) + + +# ════════════════════════════════════════════════════════════════════════════ +# Movie library-root index — ``movies/.alfred.index`` +# ════════════════════════════════════════════════════════════════════════════ + + +class DotAlfredMovieLibraryIndex: + """Library-root index for movies — same shape as the TV index. + + Auto-heals on missing/corrupt loads by walking the per-movie + sidecars. Identity fields (``name``, ``release_year``) are left + empty placeholders on healed entries; the next sync fills them in. + """ + + def __init__( + self, + library_root: Path, + *, + release_repo: DotAlfredMovieReleaseRepository | None = None, + ) -> None: + self._library_root = Path(library_root) + self._release_repo = release_repo or DotAlfredMovieReleaseRepository( + self._library_root + ) + + # ── Reads (auto-heal on missing/corrupt) ──────────────────────────────── + + def find_by_tmdb_id(self, tmdb_id: TmdbId) -> MovieIndexEntry | None: + for entry in self._load_or_heal().movies: + if entry.tmdb_id == tmdb_id.value: + return entry + return None + + def find_by_imdb_id(self, imdb_id: ImdbId) -> MovieIndexEntry | None: + needle = str(imdb_id) + for entry in self._load_or_heal().movies: + if entry.imdb_id == needle: + return entry + return None + + def find_by_path(self, path: str) -> MovieIndexEntry | None: + for entry in self._load_or_heal().movies: + if entry.metadata.path == path: + return entry + return None + + def find_all(self) -> tuple[MovieIndexEntry, ...]: + return self._load_or_heal().movies + + # ── Writes ────────────────────────────────────────────────────────────── + + def upsert( + self, + release: MovieRelease, + *, + name: str, + release_year: int | None, + path: str, + fetched_at: datetime, + ) -> None: + """Insert or replace the index entry for ``release.tmdb_id``. + + Args: + release: the on-disk movie release (carries ``tmdb_id`` / + ``imdb_id``). + name: TMDB title (no per-movie TMDB DTO yet — see the + bridge module's note). + release_year: TMDB ``release_date`` year, or ``None``. + path: folder name relative to ``library_root``. + fetched_at: TMDB sync timestamp. + """ + new_entry = movie_index_entry_from( + release, + name=name, + release_year=release_year, + path=path, + fetched_at=fetched_at, + ) + current = self._load_or_heal() + movies = tuple( + new_entry if e.tmdb_id == new_entry.tmdb_id else e + for e in current.movies + ) + if not any(e.tmdb_id == new_entry.tmdb_id for e in movies): + movies = (*movies, new_entry) + self._write(MovieLibraryIndexSidecar( + schema_version=SCHEMA_VERSION, + movies=movies, + )) + + def delete(self, tmdb_id: TmdbId) -> bool: + current = self._load_or_heal() + kept = tuple(e for e in current.movies if e.tmdb_id != tmdb_id.value) + if len(kept) == len(current.movies): + return False + self._write(MovieLibraryIndexSidecar( + schema_version=SCHEMA_VERSION, + movies=kept, + )) + return True + + def heal(self) -> MovieLibraryIndexSidecar: + sidecar = self._build_from_releases() + self._write(sidecar) + return sidecar + + # ── Internals ─────────────────────────────────────────────────────────── + + @property + def _index_path(self) -> Path: + return self._library_root / INDEX_FILENAME + + def _load_or_heal(self) -> MovieLibraryIndexSidecar: + path = self._index_path + if not path.is_file(): + logger.info( + "library index missing at %s — healing from per-movie sidecars", + path, + ) + return self.heal() + try: + raw = read_yaml(path) + return MovieLibraryIndexSidecar.model_validate(raw) + except (SidecarSchemaError, ValidationError) as exc: + logger.warning( + "library index at %s is corrupt (%s) — healing", path, exc + ) + return self.heal() + + def _build_from_releases(self) -> MovieLibraryIndexSidecar: + now = datetime.now(UTC) + movies: list[MovieIndexEntry] = [] + for folder, release in self._release_repo._iter_library(): + movies.append( + MovieIndexEntry( + tmdb_id=release.tmdb_id.value, + imdb_id=str(release.imdb_id) if release.imdb_id else None, + name=folder, # placeholder until TMDB sync supplies the real name + release_year=None, + metadata={ + "path": folder, + "fetched_at": now, + }, + ) + ) + return MovieLibraryIndexSidecar( + schema_version=SCHEMA_VERSION, + movies=tuple(movies), + ) + + def _write(self, sidecar: MovieLibraryIndexSidecar) -> None: + self._library_root.mkdir(parents=True, exist_ok=True) + atomic_write_yaml(self._index_path, _dump_model(sidecar)) + + +# ════════════════════════════════════════════════════════════════════════════ +# Shared helpers +# ════════════════════════════════════════════════════════════════════════════ + + +def _dump_model(model) -> dict: + """Serialize a Pydantic model to a YAML-friendly dict. + + ``mode="json"`` is what coerces ``datetime`` → ISO 8601 string and + ``ReleaseMode`` enum → its underlying string value, which is what + we want in the on-disk YAML. Defaults are preserved (not + excluded) so a re-load yields an identical model. + """ + return model.model_dump(mode="json") + + +def _load_series_release( + sidecar_path: Path, *, expected_folder: str +) -> SeriesRelease | None: + """Load + validate one per-show sidecar; return ``None`` on failure. + + Logs a warning on corruption (caller still gets to walk the rest + of the library). Currently the ``expected_folder`` argument is + informational — anchor mismatches are surfaced by the library + index, not by the release sidecar itself (the sidecar has no + record of its own folder; only the per-season block does, and + those are validated against disk by the walker). + """ + try: + raw = read_yaml(sidecar_path) + sidecar = SeriesReleaseSidecar.model_validate(raw) + except (SidecarSchemaError, ValidationError) as exc: + logger.warning( + "skipping %s (in %s) — invalid sidecar: %s", + sidecar_path, + expected_folder, + exc, + ) + return None + return series_release_from_sidecar(sidecar) + + +def _load_movie_release( + sidecar_path: Path, *, expected_folder: str +) -> MovieRelease | None: + """Load + validate one per-movie sidecar; return ``None`` on failure. + + Warns on anchor mismatch (``sidecar.folder != expected_folder``) + — a movie sidecar carries its own folder name, so we can detect + drift directly here, unlike the series case. + """ + try: + raw = read_yaml(sidecar_path) + sidecar = MovieReleaseSidecar.model_validate(raw) + except (SidecarSchemaError, ValidationError) as exc: + logger.warning( + "skipping %s (in %s) — invalid sidecar: %s", + sidecar_path, + expected_folder, + exc, + ) + return None + if sidecar.folder != expected_folder: + logger.warning( + "anchor mismatch at %s: sidecar.folder=%r != actual folder=%r", + sidecar_path, + sidecar.folder, + expected_folder, + ) + return movie_release_from_sidecar(sidecar) diff --git a/alfred/infrastructure/persistence/dot_alfred/v2/serializer.py b/alfred/infrastructure/persistence/dot_alfred/v2/serializer.py new file mode 100644 index 0000000..d8f8513 --- /dev/null +++ b/alfred/infrastructure/persistence/dot_alfred/v2/serializer.py @@ -0,0 +1,76 @@ +"""YAML I/O helpers for ``.alfred`` v2 sidecars. + +This module is intentionally thin: validation lives entirely in the +Pydantic DTOs (``sidecar_release.py`` / ``sidecar_root.py``). Here we +only do two things: + +* ``read_yaml`` — load text → dict, with friendly error translation. +* ``atomic_write_yaml`` — render dict → ``.tmp`` file, then + ``os.replace`` to the final path. Atomic on POSIX and NTFS, so no + half-written file ever becomes visible to a concurrent reader. + +The bridge module is responsible for converting between domain +aggregates and these DTOs; the repository module composes everything +(walks the library, calls the bridge, persists via these helpers). +""" + +from __future__ import annotations + +import os +from pathlib import Path +from typing import Any + +import yaml + +from .sidecar_release import SCHEMA_VERSION + +__all__ = [ + "SCHEMA_VERSION", + "SidecarSchemaError", + "atomic_write_yaml", + "read_yaml", +] + + +class SidecarSchemaError(ValueError): + """Raised when a sidecar file fails to load or validate. + + Wraps both YAML parse errors and Pydantic validation errors so + callers can catch a single exception type and log + skip. + """ + + +def read_yaml(path: Path) -> dict[str, Any]: + """Load a YAML file and return its top-level mapping. + + Raises :class:`SidecarSchemaError` if the file is unreadable, + contains invalid YAML, or the top-level value is not a mapping + (a list or scalar at the root is always a bug for our sidecars). + """ + try: + text = path.read_text() + except OSError as exc: + raise SidecarSchemaError(f"cannot read {path}: {exc}") from exc + + try: + data = yaml.safe_load(text) + except yaml.YAMLError as exc: + raise SidecarSchemaError(f"invalid YAML in {path}: {exc}") from exc + + if not isinstance(data, dict): + raise SidecarSchemaError( + f"{path}: top-level must be a mapping, got {type(data).__name__}" + ) + return data + + +def atomic_write_yaml(path: Path, data: dict[str, Any]) -> None: + """Atomically write ``data`` as YAML to ``path``. + + Uses the same ``write-tmp + os.replace`` pattern as v1: a reader + racing with a writer either sees the previous version or the new + one, never a torn file. + """ + tmp = path.with_suffix(path.suffix + ".tmp") + tmp.write_text(yaml.safe_dump(data, sort_keys=False)) + os.replace(tmp, path) diff --git a/alfred/infrastructure/persistence/dot_alfred/v2/sidecar_release.py b/alfred/infrastructure/persistence/dot_alfred/v2/sidecar_release.py new file mode 100644 index 0000000..9264578 --- /dev/null +++ b/alfred/infrastructure/persistence/dot_alfred/v2/sidecar_release.py @@ -0,0 +1,167 @@ +"""Pydantic DTOs mirroring the per-item ``.alfred`` v2 schema. + +These models are the **in-memory representation** of one per-show or +per-movie sidecar file. They are intentionally strict +(``extra="forbid"``) so a stray key in the YAML raises at load time — +silent drift is a v1 lesson we don't want to repeat. + +Per-show ``releases[]`` is keyed by season; each season carries a +``mode`` flag (``pack`` or ``episodic``) and a list of physical files +described by an :class:`EpisodeReleaseEntry`. A single physical file +can cover one or several TMDB episodes via the ``start`` / ``end`` +range (multi-episode files like ``SxxE01E02E03.mkv``). + +The DTO layer carries no domain knowledge — it only validates shapes +and types. The bridge module translates between these DTOs and the +:mod:`alfred.domain.releases` aggregates. +""" + +from __future__ import annotations + +from pydantic import BaseModel, ConfigDict, Field, model_validator + +from .....domain.releases.value_objects import ReleaseMode + +# Reused by the root-index module; declared here once. +SCHEMA_VERSION = 1 + + +class _Strict(BaseModel): + """Base for every v2 DTO — strict on **unknown keys**, lenient on + sequence coercion (YAML always loads sequences as ``list`` even + when the DTO declares ``tuple``).""" + + model_config = ConfigDict(extra="forbid", frozen=True) + + +# ──────────────────────────────────────────────────────────────────────────── +# Track entries (audio + subtitle), per physical file +# ──────────────────────────────────────────────────────────────────────────── + + +class AudioTrackEntry(_Strict): + """One audio stream as cached in the sidecar. + + Mirrors :class:`alfred.domain.shared.media.AudioTrack` minus + ``index`` / ``is_default`` (the sidecar is a cache, not a + full-fidelity ffprobe dump). + """ + + codec: str | None = None + channels: int | None = None + channel_layout: str | None = None + language: str | None = None + + +class SubtitleEntry(_Strict): + """One embedded subtitle track as cached in the sidecar. + + ``is_sdh`` and ``is_forced`` are explicit flags (v1's ``type: + "sdh"`` is gone — see ``specs/dot_alfred_v2.md``). + """ + + codec: str | None = None + language: str | None = None + is_forced: bool = False + is_sdh: bool = False + + +# ──────────────────────────────────────────────────────────────────────────── +# Per-season release entries +# ──────────────────────────────────────────────────────────────────────────── + + +class EpisodeReleaseEntry(_Strict): + """One physical episode file (single or multi-episode). + + ``start`` / ``end`` are inclusive TMDB episode numbers. A + single-episode file has ``start == end``; a multi-episode file + (``SxxE02E03E04``) has ``end > start``. + + ``file`` is **relative to the show root** (e.g. + ``"Show.S01/Show.S01E02.mkv"`` in PACK, + ``"Show.S01/Show.S01E02-RG/Show.S01E02-RG.mkv"`` in EPISODIC). + """ + + start: int = Field(ge=1) + end: int = Field(ge=1) + file: str = Field(min_length=1) + audio: tuple[AudioTrackEntry, ...] = () + subtitles: tuple[SubtitleEntry, ...] = () + + @model_validator(mode="after") + def _end_ge_start(self) -> EpisodeReleaseEntry: + if self.end < self.start: + raise ValueError( + f"episode entry end ({self.end}) must be >= start ({self.start})" + ) + return self + + +class SeasonReleaseEntry(_Strict): + """One season block in the per-show sidecar's ``releases[]``. + + PACK and EPISODIC share the same shape (a list of physical files); + the mode is recorded explicitly so the walker doesn't have to + re-derive it from filesystem layout on every load. + """ + + season: int = Field(ge=0) + mode: ReleaseMode + folder: str = Field(min_length=1) + episodes: tuple[EpisodeReleaseEntry, ...] = () + + +# ──────────────────────────────────────────────────────────────────────────── +# Per-show / per-movie root DTOs +# ──────────────────────────────────────────────────────────────────────────── + + +class SeriesReleaseSidecar(_Strict): + """Root DTO — one per-show ``.alfred`` file maps to one of these. + + ``tmdb_id`` is the primary anchor (and the link back to the + library-root ``.alfred.index``). ``imdb_id`` is optional and acts + as a secondary anchor — useful for cross-checking when both are + known. + """ + + schema_version: int + tmdb_id: int = Field(gt=0) + imdb_id: str | None = None + releases: tuple[SeasonReleaseEntry, ...] = () + + @model_validator(mode="after") + def _check_schema_version(self) -> SeriesReleaseSidecar: + if self.schema_version != SCHEMA_VERSION: + raise ValueError( + f"unsupported schema_version: {self.schema_version} " + f"(expected {SCHEMA_VERSION})" + ) + return self + + +class MovieReleaseSidecar(_Strict): + """Root DTO — one per-movie ``.alfred`` file maps to one of these. + + Movies have a single ``release:`` block (no list, no modes). The + folder + file layout matches Alfred's "one folder, one file" movie + convention. + """ + + schema_version: int + tmdb_id: int = Field(gt=0) + imdb_id: str | None = None + folder: str = Field(min_length=1) + file: str = Field(min_length=1) + audio: tuple[AudioTrackEntry, ...] = () + subtitles: tuple[SubtitleEntry, ...] = () + + @model_validator(mode="after") + def _check_schema_version(self) -> MovieReleaseSidecar: + if self.schema_version != SCHEMA_VERSION: + raise ValueError( + f"unsupported schema_version: {self.schema_version} " + f"(expected {SCHEMA_VERSION})" + ) + return self diff --git a/alfred/infrastructure/persistence/dot_alfred/v2/sidecar_root.py b/alfred/infrastructure/persistence/dot_alfred/v2/sidecar_root.py new file mode 100644 index 0000000..6c129d1 --- /dev/null +++ b/alfred/infrastructure/persistence/dot_alfred/v2/sidecar_root.py @@ -0,0 +1,148 @@ +"""Pydantic DTOs mirroring the library-root ``.alfred.index`` v2 schema. + +Two DTOs, one per media type — TV shows and movies have intentionally +divergent schemas (see ``specs/dot_alfred_v2.md``). Both carry a +``schema_version`` checked at load time. + +The library-root index is the grep-friendly + TMDB-cache file that +sits at the root of ``tv_shows/`` and ``movies/``. It is rebuilt +from the per-item sidecars on auto-heal (see +:class:`DotAlfredTVShowLibraryIndex`), so a corruption is recoverable +without TMDB round-trips (TMDB-cached fields are simply left empty +until the next sync). +""" + +from __future__ import annotations + +from datetime import datetime + +from pydantic import BaseModel, ConfigDict, Field, model_validator + +from .sidecar_release import SCHEMA_VERSION + + +class _Strict(BaseModel): + """Base for every v2 DTO — strict on **unknown keys**, lenient on + sequence coercion (YAML always loads sequences as ``list`` even + when the DTO declares ``tuple``).""" + + model_config = ConfigDict(extra="forbid", frozen=True) + + +# ──────────────────────────────────────────────────────────────────────────── +# Per-item metadata block (shared by show + movie index entries) +# ──────────────────────────────────────────────────────────────────────────── + + +class ShowIndexMetadata(_Strict): + """Per-entry bookkeeping for one item in a library index. + + ``path`` is the folder name **relative to the library root** (so + moving the whole library root doesn't invalidate the index). + ``fetched_at`` is the UTC timestamp of the last TMDB sync; the + TTL policy compares against it to decide whether to re-fetch. + """ + + path: str = Field(min_length=1) + fetched_at: datetime + + +# ──────────────────────────────────────────────────────────────────────────── +# TV — per-season + per-show index entries +# ──────────────────────────────────────────────────────────────────────────── + + +class SeasonIndexEntry(_Strict): + """One season block inside a :class:`ShowIndexEntry`. + + ``episode_count`` and ``aired`` come from TMDB (cached). The + ``episodes`` map is a **flattened projection** of the per-show + release sidecar: one slot per TMDB episode, even for + multi-episode files (the same path string appears under each + covered ``"E0x"`` key). Keys are zero-padded TMDB episode tokens + (``"E01"``, ``"E02"`` …) — matches what the user greps for. + """ + + number: int = Field(ge=0) + episode_count: int = Field(ge=0) + aired: bool = False + episodes: dict[str, str] = Field(default_factory=dict) + + +class ShowIndexEntry(_Strict): + """One TV show entry in the library-root index. + + ``tmdb_id`` is the primary key (matches the per-show sidecar's + anchor). ``imdb_id`` is the optional secondary anchor. + ``status`` is the raw TMDB status string (e.g. ``"Returning + Series"`` / ``"Ended"``) — kept verbatim so callers don't have + to guess at our taxonomy. + """ + + tmdb_id: int = Field(gt=0) + imdb_id: str | None = None + name: str = Field(min_length=1) + status: str = Field(min_length=1) + metadata: ShowIndexMetadata + seasons: tuple[SeasonIndexEntry, ...] = () + + +# ──────────────────────────────────────────────────────────────────────────── +# Movies — per-item index entry +# ──────────────────────────────────────────────────────────────────────────── + + +class MovieIndexEntry(_Strict): + """One movie entry in the library-root index. + + Movies have no seasons and no ``status`` — they're released + once. ``release_year`` is optional because TMDB occasionally + lacks it (very old or future-dated titles). + """ + + tmdb_id: int = Field(gt=0) + imdb_id: str | None = None + name: str = Field(min_length=1) + release_year: int | None = None + metadata: ShowIndexMetadata + + +# ──────────────────────────────────────────────────────────────────────────── +# Library-root root DTOs (one per media type) +# ──────────────────────────────────────────────────────────────────────────── + + +class TVShowLibraryIndexSidecar(_Strict): + """Root DTO — one ``tv_shows/.alfred.index`` maps to one of these. + + The TV and movie indexes are intentionally **not** unified — see + the *Rejected alternatives* section of ``specs/dot_alfred_v2.md``. + """ + + schema_version: int + shows: tuple[ShowIndexEntry, ...] = () + + @model_validator(mode="after") + def _check_schema_version(self) -> TVShowLibraryIndexSidecar: + if self.schema_version != SCHEMA_VERSION: + raise ValueError( + f"unsupported schema_version: {self.schema_version} " + f"(expected {SCHEMA_VERSION})" + ) + return self + + +class MovieLibraryIndexSidecar(_Strict): + """Root DTO — one ``movies/.alfred.index`` maps to one of these.""" + + schema_version: int + movies: tuple[MovieIndexEntry, ...] = () + + @model_validator(mode="after") + def _check_schema_version(self) -> MovieLibraryIndexSidecar: + if self.schema_version != SCHEMA_VERSION: + raise ValueError( + f"unsupported schema_version: {self.schema_version} " + f"(expected {SCHEMA_VERSION})" + ) + return self diff --git a/alfred/infrastructure/probe/ffprobe_prober.py b/alfred/infrastructure/probe/ffprobe_prober.py index a8cea9b..eefa959 100644 --- a/alfred/infrastructure/probe/ffprobe_prober.py +++ b/alfred/infrastructure/probe/ffprobe_prober.py @@ -162,6 +162,7 @@ def _parse_media_info(data: dict) -> MediaInfo: language=stream.get("tags", {}).get("language"), is_default=stream.get("disposition", {}).get("default", 0) == 1, is_forced=stream.get("disposition", {}).get("forced", 0) == 1, + is_sdh=stream.get("disposition", {}).get("hearing_impaired", 0) == 1, ) ) diff --git a/tests/infrastructure/api/test_tmdb_client.py b/tests/infrastructure/api/test_tmdb_client.py index add753b..f466264 100644 --- a/tests/infrastructure/api/test_tmdb_client.py +++ b/tests/infrastructure/api/test_tmdb_client.py @@ -303,6 +303,63 @@ class TestDetailsEndpoints: assert result["number_of_seasons"] == 5 +class TestGetTvShowInfo: + """``get_tv_show_info`` aggregates ``/tv/{id}`` + external_ids.""" + + @patch("alfred.infrastructure.api.tmdb.client.requests.get") + def test_happy_path(self, mock_get, client): + details = { + "id": 84958, + "name": "Foundation", + "status": "Returning Series", + "seasons": [ + { + "season_number": 1, + "episode_count": 10, + "air_date": "2021-09-24", + }, + { + "season_number": 2, + "episode_count": 10, + "air_date": "2023-07-14", + }, + ], + } + external = {"imdb_id": "tt0804484"} + mock_get.side_effect = [ + _ok_response(details), + _ok_response(external), + ] + + info = client.get_tv_show_info(84958) + + assert info.tmdb_id == 84958 + assert info.imdb_id == "tt0804484" + assert info.name == "Foundation" + assert info.status == "Returning Series" + assert len(info.seasons) == 2 + assert info.seasons[0].number == 1 + assert info.seasons[0].episode_count == 10 + assert info.seasons[0].aired is True + + @patch("alfred.infrastructure.api.tmdb.client.requests.get") + def test_missing_imdb_id_becomes_none(self, mock_get, client): + mock_get.side_effect = [ + _ok_response( + { + "id": 1, + "name": "X", + "status": "Ended", + "seasons": [], + } + ), + _ok_response({}), # external_ids without imdb_id + ] + info = client.get_tv_show_info(1) + assert info.imdb_id is None + assert info.seasons == () + + class TestIsConfigured: def test_true_when_complete(self, client): assert client.is_configured() is True diff --git a/tests/infrastructure/api/test_tmdb_dto.py b/tests/infrastructure/api/test_tmdb_dto.py new file mode 100644 index 0000000..aa6ace3 --- /dev/null +++ b/tests/infrastructure/api/test_tmdb_dto.py @@ -0,0 +1,168 @@ +"""Tests for the pure parsing helpers in ``alfred.infrastructure.api.tmdb.dto``. + +These tests exercise :func:`parse_tv_show_info` without any HTTP — the +function takes the raw dicts that the client would otherwise pass after +deserializing the TMDB JSON response. The reference date is injected so +the ``aired`` derivation is deterministic. +""" + +from __future__ import annotations + +from datetime import date + +import pytest + +from alfred.infrastructure.api.tmdb.dto import ( + TmdbSeasonInfo, + TmdbShowInfo, + parse_tv_show_info, +) + +REF_DATE = date(2026, 5, 25) + + +def _details(**overrides): + base = { + "id": 84958, + "name": "Foundation", + "status": "Returning Series", + "seasons": [], + } + base.update(overrides) + return base + + +class TestParseTvShowInfoHappyPath: + def test_minimal(self): + info = parse_tv_show_info( + _details(), + {"imdb_id": "tt0804484"}, + today=REF_DATE, + ) + assert info == TmdbShowInfo( + tmdb_id=84958, + imdb_id="tt0804484", + name="Foundation", + status="Returning Series", + seasons=(), + ) + + def test_with_seasons(self): + info = parse_tv_show_info( + _details( + seasons=[ + {"season_number": 1, "episode_count": 10, "air_date": "2021-09-24"}, + {"season_number": 2, "episode_count": 10, "air_date": "2023-07-14"}, + {"season_number": 3, "episode_count": 10, "air_date": "2027-01-01"}, + ], + ), + {"imdb_id": "tt0804484"}, + today=REF_DATE, + ) + assert info.seasons == ( + TmdbSeasonInfo(number=1, episode_count=10, aired=True), + TmdbSeasonInfo(number=2, episode_count=10, aired=True), + TmdbSeasonInfo(number=3, episode_count=10, aired=False), + ) + + +class TestParseTvShowInfoImdb: + def test_missing_imdb_id_becomes_none(self): + info = parse_tv_show_info(_details(), {}, today=REF_DATE) + assert info.imdb_id is None + + def test_null_imdb_id_becomes_none(self): + info = parse_tv_show_info(_details(), {"imdb_id": None}, today=REF_DATE) + assert info.imdb_id is None + + def test_empty_string_imdb_id_becomes_none(self): + info = parse_tv_show_info(_details(), {"imdb_id": ""}, today=REF_DATE) + assert info.imdb_id is None + + +class TestParseTvShowInfoAired: + def test_air_date_today_counts_as_aired(self): + info = parse_tv_show_info( + _details( + seasons=[{"season_number": 1, "episode_count": 1, "air_date": "2026-05-25"}], + ), + {}, + today=REF_DATE, + ) + assert info.seasons[0].aired is True + + def test_air_date_tomorrow_not_aired(self): + info = parse_tv_show_info( + _details( + seasons=[{"season_number": 1, "episode_count": 1, "air_date": "2026-05-26"}], + ), + {}, + today=REF_DATE, + ) + assert info.seasons[0].aired is False + + def test_no_air_date_not_aired(self): + info = parse_tv_show_info( + _details( + seasons=[{"season_number": 1, "episode_count": 1}], + ), + {}, + today=REF_DATE, + ) + assert info.seasons[0].aired is False + + def test_empty_air_date_not_aired(self): + info = parse_tv_show_info( + _details( + seasons=[{"season_number": 1, "episode_count": 1, "air_date": ""}], + ), + {}, + today=REF_DATE, + ) + assert info.seasons[0].aired is False + + def test_malformed_air_date_not_aired(self): + info = parse_tv_show_info( + _details( + seasons=[{"season_number": 1, "episode_count": 1, "air_date": "soon"}], + ), + {}, + today=REF_DATE, + ) + assert info.seasons[0].aired is False + + +class TestParseTvShowInfoErrors: + def test_missing_id_raises(self): + with pytest.raises(ValueError, match="'id'"): + parse_tv_show_info({"name": "X", "status": "Ended"}, {}, today=REF_DATE) + + def test_missing_name_raises(self): + with pytest.raises(ValueError, match="'name'"): + parse_tv_show_info({"id": 1, "status": "Ended"}, {}, today=REF_DATE) + + def test_empty_name_raises(self): + with pytest.raises(ValueError, match="'name'"): + parse_tv_show_info( + {"id": 1, "name": "", "status": "Ended"}, {}, today=REF_DATE + ) + + def test_missing_status_raises(self): + with pytest.raises(ValueError, match="'status'"): + parse_tv_show_info({"id": 1, "name": "X"}, {}, today=REF_DATE) + + def test_season_missing_number_raises(self): + with pytest.raises(ValueError, match="season_number"): + parse_tv_show_info( + _details(seasons=[{"episode_count": 5}]), + {}, + today=REF_DATE, + ) + + def test_season_missing_episode_count_raises(self): + with pytest.raises(ValueError, match="episode_count"): + parse_tv_show_info( + _details(seasons=[{"season_number": 1}]), + {}, + today=REF_DATE, + ) diff --git a/tests/infrastructure/persistence/dot_alfred/v2/__init__.py b/tests/infrastructure/persistence/dot_alfred/v2/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/infrastructure/persistence/dot_alfred/v2/conftest.py b/tests/infrastructure/persistence/dot_alfred/v2/conftest.py new file mode 100644 index 0000000..4c552f2 --- /dev/null +++ b/tests/infrastructure/persistence/dot_alfred/v2/conftest.py @@ -0,0 +1,200 @@ +"""Shared fixtures for v2 ``.alfred`` integration tests. + +The fixtures here build realistic ``SeriesRelease`` / ``MovieRelease`` +aggregates — populated tracks, multi-episode files, both PACK and +EPISODIC modes — so every test starts from a known-rich state. +The point is to make round-trip tests genuinely lossless-checking +(if a field is unused in the fixture, the round-trip can't prove +much about it). +""" + +from __future__ import annotations + +from datetime import UTC, datetime + +import pytest + +from alfred.domain.releases.entities import ( + EpisodeRelease, + MovieRelease, + SeasonRelease, + SeriesRelease, + TrackProfile, +) +from alfred.domain.releases.value_objects import EpisodeRange, ReleaseMode +from alfred.domain.shared.media import AudioTrack, SubtitleTrack +from alfred.domain.shared.value_objects import FilePath, ImdbId, TmdbId +from alfred.domain.tv_shows.value_objects import EpisodeNumber, SeasonNumber +from alfred.infrastructure.api.tmdb.dto import TmdbSeasonInfo, TmdbShowInfo + + +def _audio(lang: str = "eng", *, index: int = 0) -> AudioTrack: + # ``index`` defaults to 0 to match what the bridge reconstructs on + # read (sidecars don't persist ffprobe stream indices — see the + # bridge module's track-conversion notes). Pass explicit indices + # only when a fixture has multiple tracks of the same kind. + return AudioTrack( + index=index, + codec="eac3", + channels=6, + channel_layout="5.1", + language=lang, + ) + + +def _sub( + lang: str = "eng", + *, + index: int = 0, + forced: bool = False, + sdh: bool = False, +) -> SubtitleTrack: + return SubtitleTrack( + index=index, + codec="subrip", + language=lang, + is_default=False, + is_forced=forced, + is_sdh=sdh, + ) + + +@pytest.fixture +def foundation_release() -> SeriesRelease: + """Foundation S01 (PACK, 3 files) + S02 (EPISODIC, one multi-episode file).""" + s01 = SeasonRelease( + season_number=SeasonNumber(1), + folder="Foundation.S01.1080p.WEBRip.x265-RARBG", + mode=ReleaseMode.PACK, + episodes=( + EpisodeRelease( + episodes=EpisodeRange(EpisodeNumber(1), EpisodeNumber(1)), + file_path=FilePath( + "Foundation.S01.1080p.WEBRip.x265-RARBG/" + "Foundation.S01E01.1080p.WEBRip.x265-RARBG.mkv" + ), + tracks=TrackProfile( + audio_tracks=(_audio("eng"),), + subtitle_tracks=( + _sub("eng", index=0), + _sub("eng", index=1, sdh=True), + ), + ), + ), + EpisodeRelease( + episodes=EpisodeRange(EpisodeNumber(2), EpisodeNumber(2)), + file_path=FilePath( + "Foundation.S01.1080p.WEBRip.x265-RARBG/" + "Foundation.S01E02.1080p.WEBRip.x265-RARBG.mkv" + ), + tracks=TrackProfile(audio_tracks=(_audio("eng"),)), + ), + EpisodeRelease( + episodes=EpisodeRange(EpisodeNumber(3), EpisodeNumber(3)), + file_path=FilePath( + "Foundation.S01.1080p.WEBRip.x265-RARBG/" + "Foundation.S01E03.1080p.WEBRip.x265-RARBG.mkv" + ), + tracks=TrackProfile(audio_tracks=(_audio("eng"),)), + ), + ), + ) + s02 = SeasonRelease( + season_number=SeasonNumber(2), + folder="Foundation.S02", + mode=ReleaseMode.EPISODIC, + episodes=( + EpisodeRelease( + episodes=EpisodeRange(EpisodeNumber(1), EpisodeNumber(1)), + file_path=FilePath( + "Foundation.S02/Foundation.S02E01.1080p.x265-ELiTE/" + "Foundation.S02E01.1080p.x265-ELiTE.mkv" + ), + tracks=TrackProfile(audio_tracks=(_audio("eng"),)), + ), + # Multi-episode file (E02 + E03 in one .mkv). + EpisodeRelease( + episodes=EpisodeRange(EpisodeNumber(2), EpisodeNumber(3)), + file_path=FilePath( + "Foundation.S02/Foundation.S02E02-E03.2160p.WEB.x265-CtrlHD/" + "Foundation.S02E02-E03.2160p.WEB.x265-CtrlHD.mkv" + ), + tracks=TrackProfile( + audio_tracks=(_audio("eng"),), + subtitle_tracks=(_sub("eng", forced=True),), + ), + ), + ), + ) + return SeriesRelease( + tmdb_id=TmdbId(84958), + imdb_id=ImdbId("tt0804484"), + seasons=(s01, s02), + ) + + +@pytest.fixture +def inception_release() -> MovieRelease: + """Inception (2010) — single-file movie with rich tracks.""" + return MovieRelease( + tmdb_id=TmdbId(27205), + imdb_id=ImdbId("tt1375666"), + folder="Inception.2010.1080p.BluRay.x264-GROUP", + file_path=FilePath("Inception.2010.1080p.BluRay.x264-GROUP.mkv"), + tracks=TrackProfile( + audio_tracks=( + AudioTrack( + index=0, + codec="dts", + channels=8, + channel_layout="7.1", + language="eng", + ), + ), + subtitle_tracks=( + _sub("eng", index=0), + _sub("fre", index=1, forced=True), + ), + ), + ) + + +@pytest.fixture +def foundation_tmdb_info() -> TmdbShowInfo: + """Foundation TMDB cache snapshot — 3 seasons, S03 not yet aired.""" + return TmdbShowInfo( + tmdb_id=84958, + imdb_id="tt0804484", + name="Foundation", + status="Returning Series", + seasons=( + TmdbSeasonInfo(number=1, episode_count=10, aired=True), + TmdbSeasonInfo(number=2, episode_count=10, aired=True), + TmdbSeasonInfo(number=3, episode_count=10, aired=False), + ), + ) + + +@pytest.fixture +def now_utc() -> datetime: + """Stable UTC reference for deterministic fetched_at fields.""" + return datetime(2026, 5, 25, 8, 30, 0, tzinfo=UTC) + + +@pytest.fixture +def tv_library(tmp_path): + """Empty ``tv_shows/`` directory pre-populated with show folders.""" + root = tmp_path / "tv_shows" + root.mkdir() + (root / "Foundation").mkdir() + (root / "Fallout").mkdir() + return root + + +@pytest.fixture +def movie_library(tmp_path): + """Empty ``movies/`` directory pre-populated with one movie folder.""" + root = tmp_path / "movies" + root.mkdir() + (root / "Inception.2010.1080p.BluRay.x264-GROUP").mkdir() + return root diff --git a/tests/infrastructure/persistence/dot_alfred/v2/test_library_index.py b/tests/infrastructure/persistence/dot_alfred/v2/test_library_index.py new file mode 100644 index 0000000..23a2fec --- /dev/null +++ b/tests/infrastructure/persistence/dot_alfred/v2/test_library_index.py @@ -0,0 +1,266 @@ +"""Integration tests for the library-root index repositories. + +Cover upsert / delete / find_by_* and the auto-heal behavior on +missing / corrupt index files. Auto-heal must produce a valid +sidecar with TMDB-cached fields left as documented placeholders +(``status="unknown"``, ``seasons=()``). +""" + +from __future__ import annotations + +import logging + +from alfred.domain.shared.value_objects import ImdbId, TmdbId +from alfred.infrastructure.persistence.dot_alfred.v2.repository import ( + DotAlfredMovieLibraryIndex, + DotAlfredMovieReleaseRepository, + DotAlfredSeriesReleaseRepository, + DotAlfredTVShowLibraryIndex, +) + + +# ════════════════════════════════════════════════════════════════════════════ +# TV — upsert / find / delete +# ════════════════════════════════════════════════════════════════════════════ + + +class TestTVShowLibraryIndexUpsert: + def test_upsert_creates_index_file( + self, tv_library, foundation_release, foundation_tmdb_info, now_utc + ): + index = DotAlfredTVShowLibraryIndex(tv_library) + index.upsert( + foundation_tmdb_info, + foundation_release, + path="Foundation", + fetched_at=now_utc, + ) + assert (tv_library / ".alfred.index").is_file() + + def test_upsert_then_find_by_tmdb_id_returns_entry( + self, tv_library, foundation_release, foundation_tmdb_info, now_utc + ): + index = DotAlfredTVShowLibraryIndex(tv_library) + index.upsert( + foundation_tmdb_info, + foundation_release, + path="Foundation", + fetched_at=now_utc, + ) + entry = index.find_by_tmdb_id(TmdbId(84958)) + assert entry is not None + assert entry.name == "Foundation" + assert entry.status == "Returning Series" + assert entry.metadata.path == "Foundation" + assert entry.metadata.fetched_at == now_utc + + def test_upsert_flattens_multi_episode_file_across_slots( + self, tv_library, foundation_release, foundation_tmdb_info, now_utc + ): + index = DotAlfredTVShowLibraryIndex(tv_library) + index.upsert( + foundation_tmdb_info, + foundation_release, + path="Foundation", + fetched_at=now_utc, + ) + entry = index.find_by_tmdb_id(TmdbId(84958)) + s02 = next(s for s in entry.seasons if s.number == 2) + # E02 and E03 must point to the SAME multi-episode file. + assert s02.episodes["E02"] == s02.episodes["E03"] + assert "E02-E03" in s02.episodes["E02"] + + def test_upsert_twice_replaces_entry_does_not_duplicate( + self, tv_library, foundation_release, foundation_tmdb_info, now_utc + ): + index = DotAlfredTVShowLibraryIndex(tv_library) + index.upsert( + foundation_tmdb_info, + foundation_release, + path="Foundation", + fetched_at=now_utc, + ) + index.upsert( + foundation_tmdb_info, + foundation_release, + path="Foundation", + fetched_at=now_utc, + ) + all_entries = index.find_all() + assert len(all_entries) == 1 + + def test_find_by_imdb_id_returns_entry( + self, tv_library, foundation_release, foundation_tmdb_info, now_utc + ): + index = DotAlfredTVShowLibraryIndex(tv_library) + index.upsert( + foundation_tmdb_info, + foundation_release, + path="Foundation", + fetched_at=now_utc, + ) + entry = index.find_by_imdb_id(ImdbId("tt0804484")) + assert entry is not None + assert entry.tmdb_id == 84958 + + def test_find_by_path_returns_entry( + self, tv_library, foundation_release, foundation_tmdb_info, now_utc + ): + index = DotAlfredTVShowLibraryIndex(tv_library) + index.upsert( + foundation_tmdb_info, + foundation_release, + path="Foundation", + fetched_at=now_utc, + ) + entry = index.find_by_path("Foundation") + assert entry is not None + + def test_delete_removes_entry( + self, tv_library, foundation_release, foundation_tmdb_info, now_utc + ): + index = DotAlfredTVShowLibraryIndex(tv_library) + index.upsert( + foundation_tmdb_info, + foundation_release, + path="Foundation", + fetched_at=now_utc, + ) + assert index.delete(TmdbId(84958)) is True + assert index.find_by_tmdb_id(TmdbId(84958)) is None + + def test_delete_unknown_id_returns_false(self, tv_library): + index = DotAlfredTVShowLibraryIndex(tv_library) + assert index.delete(TmdbId(999)) is False + + +# ════════════════════════════════════════════════════════════════════════════ +# TV — auto-heal +# ════════════════════════════════════════════════════════════════════════════ + + +class TestTVShowLibraryIndexAutoHeal: + def test_missing_index_is_silently_healed_from_per_show_sidecars( + self, tv_library, foundation_release, caplog + ): + # Write a per-show sidecar but no index. + release_repo = DotAlfredSeriesReleaseRepository(tv_library) + release_repo.save(foundation_release, show_folder="Foundation") + assert not (tv_library / ".alfred.index").exists() + + index = DotAlfredTVShowLibraryIndex(tv_library, release_repo=release_repo) + with caplog.at_level(logging.INFO): + entry = index.find_by_tmdb_id(TmdbId(84958)) + + assert entry is not None + assert entry.tmdb_id == 84958 + # Healed entries carry placeholders (no TMDB sync yet). + assert entry.status == "unknown" + assert entry.seasons == () + assert (tv_library / ".alfred.index").is_file() + assert any("healing" in r.message for r in caplog.records) + + def test_corrupt_index_is_healed( + self, tv_library, foundation_release, caplog + ): + release_repo = DotAlfredSeriesReleaseRepository(tv_library) + release_repo.save(foundation_release, show_folder="Foundation") + # Plant a corrupt index. + (tv_library / ".alfred.index").write_text("not: [valid yaml") + + index = DotAlfredTVShowLibraryIndex(tv_library, release_repo=release_repo) + with caplog.at_level(logging.WARNING): + entries = index.find_all() + + assert len(entries) == 1 + assert any("corrupt" in r.message for r in caplog.records) + + def test_schema_version_mismatch_in_index_is_healed( + self, tv_library, foundation_release + ): + release_repo = DotAlfredSeriesReleaseRepository(tv_library) + release_repo.save(foundation_release, show_folder="Foundation") + (tv_library / ".alfred.index").write_text( + "schema_version: 999\nshows: []\n" + ) + index = DotAlfredTVShowLibraryIndex(tv_library, release_repo=release_repo) + entries = index.find_all() + # After heal, only Foundation (the only valid per-show sidecar) appears. + assert len(entries) == 1 + assert entries[0].tmdb_id == 84958 + + def test_heal_is_idempotent( + self, tv_library, foundation_release + ): + release_repo = DotAlfredSeriesReleaseRepository(tv_library) + release_repo.save(foundation_release, show_folder="Foundation") + index = DotAlfredTVShowLibraryIndex(tv_library, release_repo=release_repo) + first = index.heal() + second = index.heal() + # Compare model state minus the ``fetched_at`` (timestamps differ). + assert len(first.shows) == len(second.shows) == 1 + assert first.shows[0].tmdb_id == second.shows[0].tmdb_id + + def test_heal_with_empty_library_writes_empty_index(self, tv_library): + index = DotAlfredTVShowLibraryIndex(tv_library) + index.heal() + assert (tv_library / ".alfred.index").is_file() + assert index.find_all() == () + + +# ════════════════════════════════════════════════════════════════════════════ +# TV — atomicity +# ════════════════════════════════════════════════════════════════════════════ + + +class TestTVShowLibraryIndexAtomicity: + def test_upsert_leaves_no_tmp_file( + self, tv_library, foundation_release, foundation_tmdb_info, now_utc + ): + index = DotAlfredTVShowLibraryIndex(tv_library) + index.upsert( + foundation_tmdb_info, + foundation_release, + path="Foundation", + fetched_at=now_utc, + ) + tmps = list(tv_library.glob("*.tmp")) + assert tmps == [] + + +# ════════════════════════════════════════════════════════════════════════════ +# Movies +# ════════════════════════════════════════════════════════════════════════════ + + +class TestMovieLibraryIndex: + def test_upsert_and_find( + self, movie_library, inception_release, now_utc + ): + index = DotAlfredMovieLibraryIndex(movie_library) + index.upsert( + inception_release, + name="Inception", + release_year=2010, + path=inception_release.folder, + fetched_at=now_utc, + ) + entry = index.find_by_tmdb_id(TmdbId(27205)) + assert entry is not None + assert entry.name == "Inception" + assert entry.release_year == 2010 + + def test_missing_index_heals_from_movie_sidecars( + self, movie_library, inception_release, caplog + ): + release_repo = DotAlfredMovieReleaseRepository(movie_library) + release_repo.save(inception_release) + + index = DotAlfredMovieLibraryIndex(movie_library, release_repo=release_repo) + with caplog.at_level(logging.INFO): + entry = index.find_by_tmdb_id(TmdbId(27205)) + assert entry is not None + assert entry.tmdb_id == 27205 + # Placeholder until TMDB sync. + assert entry.release_year is None + assert any("healing" in r.message for r in caplog.records) diff --git a/tests/infrastructure/persistence/dot_alfred/v2/test_release_repository.py b/tests/infrastructure/persistence/dot_alfred/v2/test_release_repository.py new file mode 100644 index 0000000..79ab606 --- /dev/null +++ b/tests/infrastructure/persistence/dot_alfred/v2/test_release_repository.py @@ -0,0 +1,137 @@ +"""Integration tests for the per-item release repositories. + +Cover the atomic-write contract, the log+skip-on-corruption behavior, +the strict schema-version check, and the movie-anchor warning. +""" + +from __future__ import annotations + +import logging + +import pytest + +from alfred.domain.shared.value_objects import TmdbId +from alfred.infrastructure.persistence.dot_alfred.v2.repository import ( + DotAlfredMovieReleaseRepository, + DotAlfredSeriesReleaseRepository, + ShowFolderUnknown, +) + + +# ════════════════════════════════════════════════════════════════════════════ +# Series — save / read / delete +# ════════════════════════════════════════════════════════════════════════════ + + +class TestSeriesReleaseRepositorySave: + def test_save_writes_alfred_in_show_folder( + self, tv_library, foundation_release + ): + repo = DotAlfredSeriesReleaseRepository(tv_library) + repo.save(foundation_release, show_folder="Foundation") + assert (tv_library / "Foundation" / ".alfred").is_file() + + def test_save_unknown_folder_raises(self, tv_library, foundation_release): + repo = DotAlfredSeriesReleaseRepository(tv_library) + with pytest.raises(ShowFolderUnknown): + repo.save(foundation_release, show_folder="Nope") + + def test_save_then_find_by_tmdb_id_returns_equal( + self, tv_library, foundation_release + ): + repo = DotAlfredSeriesReleaseRepository(tv_library) + repo.save(foundation_release, show_folder="Foundation") + restored = repo.find_by_tmdb_id(TmdbId(84958)) + assert restored == foundation_release + + def test_save_is_atomic_no_tmp_left_behind( + self, tv_library, foundation_release + ): + repo = DotAlfredSeriesReleaseRepository(tv_library) + repo.save(foundation_release, show_folder="Foundation") + tmps = list((tv_library / "Foundation").glob("*.tmp")) + assert tmps == [] + + +class TestSeriesReleaseRepositoryReads: + def test_find_all_skips_folders_without_sidecar( + self, tv_library, foundation_release + ): + repo = DotAlfredSeriesReleaseRepository(tv_library) + repo.save(foundation_release, show_folder="Foundation") + # Fallout/ exists in the fixture but has no .alfred — must be skipped. + results = repo.find_all() + assert len(results) == 1 + assert results[0].tmdb_id == TmdbId(84958) + + def test_find_all_logs_and_skips_corrupt_sidecar( + self, tv_library, foundation_release, caplog + ): + repo = DotAlfredSeriesReleaseRepository(tv_library) + repo.save(foundation_release, show_folder="Foundation") + # Corrupt Fallout's sidecar. + (tv_library / "Fallout" / ".alfred").write_text("not: [valid") + with caplog.at_level(logging.WARNING): + results = repo.find_all() + assert len(results) == 1 + assert any("Fallout" in r.message for r in caplog.records) + + def test_unknown_schema_version_is_skipped( + self, tv_library, foundation_release, caplog + ): + repo = DotAlfredSeriesReleaseRepository(tv_library) + repo.save(foundation_release, show_folder="Foundation") + # Hand-roll a future-version sidecar. + (tv_library / "Fallout" / ".alfred").write_text( + "schema_version: 999\ntmdb_id: 12345\nreleases: []\n" + ) + with caplog.at_level(logging.WARNING): + results = repo.find_all() + assert len(results) == 1 + + +class TestSeriesReleaseRepositoryDelete: + def test_delete_removes_sidecar(self, tv_library, foundation_release): + repo = DotAlfredSeriesReleaseRepository(tv_library) + repo.save(foundation_release, show_folder="Foundation") + assert repo.delete(TmdbId(84958)) is True + assert not (tv_library / "Foundation" / ".alfred").exists() + assert (tv_library / "Foundation").is_dir() # folder preserved + + def test_delete_unknown_id_returns_false(self, tv_library): + repo = DotAlfredSeriesReleaseRepository(tv_library) + assert repo.delete(TmdbId(999)) is False + + +# ════════════════════════════════════════════════════════════════════════════ +# Movies +# ════════════════════════════════════════════════════════════════════════════ + + +class TestMovieReleaseRepository: + def test_save_writes_alfred_in_movie_folder( + self, movie_library, inception_release + ): + repo = DotAlfredMovieReleaseRepository(movie_library) + repo.save(inception_release) + sidecar = movie_library / inception_release.folder / ".alfred" + assert sidecar.is_file() + + def test_save_round_trip(self, movie_library, inception_release): + repo = DotAlfredMovieReleaseRepository(movie_library) + repo.save(inception_release) + restored = repo.find_by_tmdb_id(TmdbId(27205)) + assert restored == inception_release + + def test_anchor_mismatch_logs_warning( + self, movie_library, inception_release, caplog + ): + repo = DotAlfredMovieReleaseRepository(movie_library) + repo.save(inception_release) + # Rename folder so the sidecar.folder anchor no longer matches. + original = movie_library / inception_release.folder + renamed = movie_library / "Renamed.Manually" + original.rename(renamed) + with caplog.at_level(logging.WARNING): + list(repo.find_all()) + assert any("anchor mismatch" in r.message for r in caplog.records) diff --git a/tests/infrastructure/persistence/dot_alfred/v2/test_round_trip.py b/tests/infrastructure/persistence/dot_alfred/v2/test_round_trip.py new file mode 100644 index 0000000..705590f --- /dev/null +++ b/tests/infrastructure/persistence/dot_alfred/v2/test_round_trip.py @@ -0,0 +1,91 @@ +"""Round-trip tests — domain → sidecar → YAML → sidecar → domain. + +These tests are the contract guarantee that the v2 sidecar is a +lossless cache for everything the spec claims it stores. Any field +introduced in the future must come with a round-trip test that +covers it; otherwise we can silently drop it on read. +""" + +from __future__ import annotations + +import yaml + +from alfred.infrastructure.persistence.dot_alfred.v2.bridge import ( + movie_release_from_sidecar, + movie_release_to_sidecar, + series_release_from_sidecar, + series_release_to_sidecar, +) +from alfred.infrastructure.persistence.dot_alfred.v2.sidecar_release import ( + MovieReleaseSidecar, + SeriesReleaseSidecar, +) + + +class TestSeriesReleaseRoundTrip: + def test_domain_to_sidecar_preserves_top_level(self, foundation_release): + sidecar = series_release_to_sidecar(foundation_release) + assert sidecar.schema_version == 1 + assert sidecar.tmdb_id == 84958 + assert sidecar.imdb_id == "tt0804484" + assert len(sidecar.releases) == 2 + + def test_full_loop_domain_to_domain_is_equal(self, foundation_release): + sidecar = series_release_to_sidecar(foundation_release) + restored = series_release_from_sidecar(sidecar) + assert restored == foundation_release + + def test_full_loop_through_yaml_is_equal(self, foundation_release): + sidecar = series_release_to_sidecar(foundation_release) + text = yaml.safe_dump(sidecar.model_dump(mode="json")) + reloaded = SeriesReleaseSidecar.model_validate(yaml.safe_load(text)) + restored = series_release_from_sidecar(reloaded) + assert restored == foundation_release + + def test_multi_episode_file_round_trips(self, foundation_release): + sidecar = series_release_to_sidecar(foundation_release) + s02 = sidecar.releases[1] + multi = s02.episodes[1] + assert multi.start == 2 and multi.end == 3 + restored = series_release_from_sidecar(sidecar) + restored_multi = restored.seasons[1].episodes[1] + assert restored_multi.episodes.start.value == 2 + assert restored_multi.episodes.end.value == 3 + + def test_sdh_flag_round_trips(self, foundation_release): + sidecar = series_release_to_sidecar(foundation_release) + restored = series_release_from_sidecar(sidecar) + sdh_track = restored.seasons[0].episodes[0].tracks.subtitle_tracks[1] + assert sdh_track.is_sdh is True + + def test_no_imdb_id_round_trips_as_none(self, foundation_release): + # Replace the imdb_id with None and verify it survives the loop. + from dataclasses import replace + no_imdb = replace(foundation_release, imdb_id=None) + sidecar = series_release_to_sidecar(no_imdb) + assert sidecar.imdb_id is None + restored = series_release_from_sidecar(sidecar) + assert restored.imdb_id is None + + +class TestMovieReleaseRoundTrip: + def test_domain_to_sidecar_preserves_top_level(self, inception_release): + sidecar = movie_release_to_sidecar(inception_release) + assert sidecar.schema_version == 1 + assert sidecar.tmdb_id == 27205 + assert sidecar.imdb_id == "tt1375666" + assert sidecar.folder == "Inception.2010.1080p.BluRay.x264-GROUP" + + def test_full_loop_through_yaml_is_equal(self, inception_release): + sidecar = movie_release_to_sidecar(inception_release) + text = yaml.safe_dump(sidecar.model_dump(mode="json")) + reloaded = MovieReleaseSidecar.model_validate(yaml.safe_load(text)) + restored = movie_release_from_sidecar(reloaded) + assert restored == inception_release + + def test_forced_subtitle_flag_round_trips(self, inception_release): + sidecar = movie_release_to_sidecar(inception_release) + restored = movie_release_from_sidecar(sidecar) + forced = restored.tracks.subtitle_tracks[1] + assert forced.is_forced is True + assert forced.language == "fre"