Files
alfred/alfred/application/tv_shows/rescan.py
T
francwa 97dc799a26 fix(tv_shows): correct PACK vs EPISODIC classification model
The Phase 4 walker + rescan logic classified seasons by parser
output (does the filename carry Exx?), but PACK vs EPISODIC is a
structural distinction:

* PACK = season folder with N flat SxxEyy videos directly inside
* EPISODIC = season folder with N subfolders, each holding one video

Changes:
* walker.py: descends two levels under show_root and classifies
  each season folder by FS structure. SeasonFolder now carries
  mode: ReleaseMode | None. Mixed layouts (flat + subfolders) and
  EPISODIC subfolders with >1 video log a warning and report
  mode=None.
* rescan.py: trusts walker.mode; drops the bogus 'single un-
  numbered video → PACK with empty episodes' branch. A season
  with no parseable episodes is now skipped with a warning.
* Tests rewritten against the real model: PACK with flat numbered
  files, EPISODIC with one-video-per-subfolder, malformed mixed
  layout skipped, single-un-numbered-file skipped.

Suite: 1237 → 1245 passing.
2026-05-25 21:37:34 +02:00

205 lines
6.8 KiB
Python

"""``rescan_show`` — rebuild a SeriesRelease from disk and persist it.
The orchestrator walks the show folder, runs the existing release
pipeline (``inspect_release``) on every video file, and assembles the
result into a frozen :class:`SeriesRelease` written to the per-show
v2 ``.alfred`` sidecar.
Why reuse ``inspect_release``?
-------------------------------
The "fresh download" flow already parses release names, picks a main
video, runs ffprobe and refines media type. We want exactly the same
intelligence applied to library content — running it again here keeps
a single source of truth for parsing / probing rules. The orchestrator
just translates per-file :class:`InspectedResult` into release
aggregate construction.
PACK vs EPISODIC
----------------
Classification is done by the walker, by inspecting the season
folder's filesystem structure (flat videos → PACK, subfolders →
EPISODIC). See :mod:`alfred.application.tv_shows.walker`. The
orchestrator trusts ``season_folder.mode`` and never re-derives.
Files whose parser yields ``season is None`` or ``episode is None``
are logged and skipped — a real PACK or EPISODIC file always carries
both. Mixed-season folders (two different ``Sxx`` numbers in the
same directory) are skipped with a warning.
TMDB
----
``rescan_show`` does **not** call TMDB. It writes the release
sidecar; the library index is updated transparently by its auto-heal
path on the next read. A subsequent TMDB sync (Phase 5) layers
identity / season cache facts on top of the on-disk truth.
Out of scope (tracked as tech debt):
* Adjacent ``.srt`` files — only embedded subtitle tracks are
captured.
* Multi-episode files — ``ParsedRelease`` has no ``episode_end``
field yet.
"""
from __future__ import annotations
import logging
from pathlib import Path
from alfred.application.release.inspect import inspect_release
from alfred.application.tv_shows.walker import SeasonFolder, walk_show
from alfred.domain.release.ports import ReleaseKnowledge
from alfred.domain.releases.entities import (
EpisodeRelease,
SeasonRelease,
SeriesRelease,
TrackProfile,
)
from alfred.domain.releases.value_objects import EpisodeRange, ReleaseMode
from alfred.domain.shared.media import MediaInfo
from alfred.domain.shared.ports import FilesystemScanner, MediaProber
from alfred.domain.shared.value_objects import FilePath, ImdbId, TmdbId
from alfred.domain.tv_shows.value_objects import EpisodeNumber, SeasonNumber
from alfred.infrastructure.persistence.dot_alfred.v2.repository import (
DotAlfredSeriesReleaseRepository,
)
_LOG = logging.getLogger(__name__)
def rescan_show(
show_root: Path,
*,
tmdb_id: TmdbId,
imdb_id: ImdbId | None = None,
series_repo: DotAlfredSeriesReleaseRepository,
scanner: FilesystemScanner,
prober: MediaProber,
kb: ReleaseKnowledge,
) -> SeriesRelease:
"""Rebuild and persist the :class:`SeriesRelease` for ``show_root``.
The show's folder name (``show_root.name``) is used as the sidecar
location relative to the library root. TMDB identity comes from the
caller — the orchestrator does not call TMDB.
Returns the rebuilt frozen aggregate (also written to disk by
``series_repo.save``).
"""
tree = walk_show(show_root, scanner=scanner, kb=kb)
seasons: list[SeasonRelease] = []
for season_folder in tree.season_folders:
season = _ingest_season(season_folder, show_root, kb, prober)
if season is not None:
seasons.append(season)
release = SeriesRelease(
tmdb_id=tmdb_id,
imdb_id=imdb_id,
seasons=tuple(seasons),
)
series_repo.save(release, show_folder=show_root.name)
return release
# --------------------------------------------------------------------------- #
# Per-season ingestion #
# --------------------------------------------------------------------------- #
def _ingest_season(
season_folder: SeasonFolder,
show_root: Path,
kb: ReleaseKnowledge,
prober: MediaProber,
) -> SeasonRelease | None:
if season_folder.mode is None:
# Walker already logged the reason (empty / malformed mix /
# multi-video subfolder). Just skip.
return None
if not season_folder.video_files:
_LOG.warning(
"rescan_show: season folder %s contains no video file — skipping",
season_folder.season_dir,
)
return None
# Inspect every video to extract season + episode numbers.
inspected = []
for video_path in season_folder.video_files:
result = inspect_release(video_path.name, video_path, kb, prober)
inspected.append((video_path, result))
season_numbers = {
r.parsed.season for _, r in inspected if r.parsed.season is not None
}
if not season_numbers:
_LOG.warning(
"rescan_show: no season number parsed in %s — skipping",
season_folder.season_dir,
)
return None
if len(season_numbers) > 1:
_LOG.warning(
"rescan_show: mixed season numbers %s in %s — skipping",
sorted(season_numbers),
season_folder.season_dir,
)
return None
season_number = SeasonNumber(season_numbers.pop())
folder_name = season_folder.season_dir.name
episodes: list[EpisodeRelease] = []
for video_path, result in inspected:
if result.parsed.episode is None:
_LOG.warning(
"rescan_show: no episode number parsed for %s — skipping",
video_path,
)
continue
episodes.append(
_make_episode_release(
episode_number=EpisodeNumber(result.parsed.episode),
video_path=video_path,
show_root=show_root,
media_info=result.media_info,
)
)
if not episodes:
_LOG.warning(
"rescan_show: no parseable episodes in %s — skipping",
season_folder.season_dir,
)
return None
return SeasonRelease(
season_number=season_number,
folder=folder_name,
mode=season_folder.mode,
episodes=tuple(episodes),
)
def _make_episode_release(
*,
episode_number: EpisodeNumber,
video_path: Path,
show_root: Path,
media_info: MediaInfo | None,
) -> EpisodeRelease:
rel_path = video_path.relative_to(show_root)
audio_tracks = media_info.audio_tracks if media_info else ()
subtitle_tracks = media_info.subtitle_tracks if media_info else ()
return EpisodeRelease(
episodes=EpisodeRange(start=episode_number, end=episode_number),
file_path=FilePath(str(rel_path)),
tracks=TrackProfile(
audio_tracks=audio_tracks,
subtitle_tracks=subtitle_tracks,
),
)