Files
alfred/alfred/infrastructure/persistence/dot_alfred/repository.py
T
francwa c7c11180d9 feat(persistence): add DotAlfredTVShowRepository (filesystem-backed)
Step 3 of specs/dot_alfred.md. Concrete TVShowRepository
implementation reading and writing per-show .alfred YAML files under
a configurable library_root. Writes are atomic (.alfred.tmp +
os.replace), reads tolerate corrupted/wrong-schema sidecars (log +
skip), and the repo never invents a folder name — save(show)
requires the target folder to exist beforehand (raises
ShowFolderUnknown otherwise), matching the spec's
MediaOrganizer-then-sidecar split.

Cold folders without a sidecar are skipped by find_all and yield
None from find_by_imdb_id — the upcoming rescan_show tool (step 4)
will own the opt-in rebuild path.

A small bridge module translates between the rich domain TVShow
(AudioTrack/SubtitleTrack with full ffprobe minutiae) and the
compact sidecar shape (language-only audio, embedded-only subs with
type derived from is_forced). The bridge is intentionally lossy on
probe details the sidecar does not store, per the spec's
factual-only philosophy.

20 integration tests on tmp_path: round-trip save/find,
cold-folder/unknown-id returns, find_all skipping
(corrupted/schema-violating sidecars), delete/exists, atomic write
(no .alfred.tmp leftover), overwrite, and folder-name fallbacks
(get_folder_name guess + full-scan rescue when renamed).
2026-05-22 17:16:41 +02:00

199 lines
7.4 KiB
Python

"""Filesystem-backed implementation of :class:`TVShowRepository`.
The repository keeps no in-memory cache of aggregates: every read goes
back to the filesystem. It does keep a tiny mapping ``imdb_id →
folder_name`` populated as folders are discovered, so subsequent saves
can find the right destination without re-walking ``library_root/``.
Atomic writes: the YAML is dumped to ``.alfred.tmp`` and then renamed
to ``.alfred`` via ``os.replace`` — atomic on POSIX and NTFS. No half-
written file ever becomes visible.
Cold scan: a show folder without a ``.alfred`` returns ``None`` from
``find_by_imdb_id`` and is skipped by ``find_all``. The opt-in
``rescan_show`` tool (step 4) will be responsible for rebuilding a
missing sidecar by walking the filesystem.
The repository never invents a folder name. ``save(show)`` assumes the
target folder already exists (the upstream ``MediaOrganizer`` is in
charge of placing files); the repository writes the ``.alfred`` next
to them.
"""
from __future__ import annotations
import logging
import os
from pathlib import Path
import yaml
from ....domain.shared.value_objects import ImdbId
from ....domain.tv_shows.entities import TVShow
from ....domain.tv_shows.repositories import TVShowRepository
from .bridge import from_sidecar, to_sidecar
from .serializer import SidecarSchemaError, deserialize, serialize
logger = logging.getLogger(__name__)
SIDECAR_FILENAME = ".alfred"
SIDECAR_TMP_FILENAME = ".alfred.tmp"
class ShowFolderUnknown(LookupError):
"""Raised by :meth:`DotAlfredTVShowRepository.save` when the folder
for the given show cannot be located.
The repository never invents a folder name; the caller is expected
to have placed files there beforehand (typically via the
``MediaOrganizer``).
"""
class DotAlfredTVShowRepository(TVShowRepository):
"""A :class:`TVShowRepository` backed by per-show ``.alfred`` files.
Args:
library_root: directory containing one folder per show.
"""
def __init__(self, library_root: Path) -> None:
self._library_root = Path(library_root)
# Lazy cache: imdb_id → folder name (relative to library_root).
# Populated on every successful read or save; rebuilt on demand.
self._folder_index: dict[str, str] = {}
# ── TVShowRepository surface ────────────────────────────────────────────
def save(self, show: TVShow) -> None:
folder_name = self._resolve_folder_name(show)
show_dir = self._library_root / folder_name
if not show_dir.is_dir():
raise ShowFolderUnknown(
f"show folder does not exist on disk: {show_dir}"
)
folder_paths = {
s.season_number.value: s.get_folder_name() for s in show.seasons
}
sidecar = to_sidecar(show, folder_paths=folder_paths)
text = yaml.safe_dump(serialize(sidecar), sort_keys=False)
self._atomic_write(show_dir, text)
self._folder_index[str(show.imdb_id)] = folder_name
def find_by_imdb_id(self, imdb_id: ImdbId) -> TVShow | None:
for folder_name, show in self._iter_library():
if show.imdb_id == imdb_id:
self._folder_index[str(imdb_id)] = folder_name
return show
return None
def find_all(self) -> list[TVShow]:
result: list[TVShow] = []
for folder_name, show in self._iter_library():
self._folder_index[str(show.imdb_id)] = folder_name
result.append(show)
return result
def delete(self, imdb_id: ImdbId) -> bool:
folder_name = self._lookup_folder(imdb_id)
if folder_name is None:
return False
sidecar_path = self._library_root / folder_name / SIDECAR_FILENAME
if not sidecar_path.is_file():
return False
sidecar_path.unlink()
self._folder_index.pop(str(imdb_id), None)
return True
def exists(self, imdb_id: ImdbId) -> bool:
return self.find_by_imdb_id(imdb_id) is not None
# ── Internals ───────────────────────────────────────────────────────────
def _iter_library(self):
"""Yield ``(folder_name, TVShow)`` for every readable sidecar.
Folders without a sidecar, or with an unreadable / invalid one,
are skipped (with a warning logged). The repository never
cold-scans here — that is the job of the upcoming
``rescan_show`` tool.
"""
if not self._library_root.is_dir():
return
for entry in sorted(self._library_root.iterdir()):
if not entry.is_dir():
continue
sidecar_path = entry / SIDECAR_FILENAME
if not sidecar_path.is_file():
continue
show = self._read_sidecar(entry, sidecar_path)
if show is not None:
yield entry.name, show
def _read_sidecar(self, show_dir: Path, sidecar_path: Path) -> TVShow | None:
try:
raw = yaml.safe_load(sidecar_path.read_text())
except (OSError, yaml.YAMLError) as exc:
logger.warning(
"skipping %s — sidecar unreadable: %s", sidecar_path, exc
)
return None
try:
sidecar = deserialize(raw)
except SidecarSchemaError as exc:
logger.warning(
"skipping %s — invalid sidecar schema: %s", sidecar_path, exc
)
return None
return from_sidecar(sidecar, title=show_dir.name)
def _resolve_folder_name(self, show: TVShow) -> str:
"""Return the folder name to write ``show``'s sidecar into.
Order of resolution:
1. Cache hit on ``imdb_id``.
2. Folder ``show.get_folder_name()`` exists on disk.
3. Full ``find_all`` scan as a last resort to refresh the index.
"""
key = str(show.imdb_id)
cached = self._folder_index.get(key)
if cached is not None and (self._library_root / cached).is_dir():
return cached
guess = show.get_folder_name()
if (self._library_root / guess).is_dir():
return guess
# Last resort — refresh the index in case the folder was renamed.
for folder_name, found in self._iter_library():
self._folder_index[str(found.imdb_id)] = folder_name
if found.imdb_id == show.imdb_id:
return folder_name
raise ShowFolderUnknown(
f"no folder found for show {show.imdb_id} under {self._library_root}"
)
def _lookup_folder(self, imdb_id: ImdbId) -> str | None:
key = str(imdb_id)
cached = self._folder_index.get(key)
if cached is not None and (self._library_root / cached).is_dir():
return cached
for folder_name, found in self._iter_library():
self._folder_index[str(found.imdb_id)] = folder_name
if found.imdb_id == imdb_id:
return folder_name
return None
@staticmethod
def _atomic_write(show_dir: Path, text: str) -> None:
tmp = show_dir / SIDECAR_TMP_FILENAME
final = show_dir / SIDECAR_FILENAME
tmp.write_text(text)
os.replace(tmp, final)
__all__ = ["DotAlfredTVShowRepository", "ShowFolderUnknown"]