refactor(domain): flatten shared/media package into single module

Six small files (audio, video, subtitle, info, matching, tracks_mixin + __init__) collapsed into one ~250 LoC media.py module. Python treats media.py and media/__init__.py interchangeably, so the 12 import sites that read 'from alfred.domain.shared.media import ...' continue to work without changes. Reasoning: the whole bounded context fits on one screen; splitting into sub-modules added more navigation friction than it saved. Tests stay green (1077 passed).
2026-05-20 23:35:49 +02:00
parent 0c9489e16b
commit 83dbed887b
9 changed files with 275 additions and 313 deletions
@@ -57,6 +57,14 @@ callers).
 ### Internal
 - **Flattened `alfred.domain.shared.media/` package into a single
  `media.py` module.** The 6-file package (audio, video, subtitle,
  info, matching, tracks_mixin + `__init__`) collapsed into one ~250
  LoC module. All 12 import sites continue to resolve unchanged
  (`from alfred.domain.shared.media import AudioTrack, MediaInfo, …`)
  since Python treats `media.py` and `media/__init__.py`
  interchangeably for import paths. Easier to scan when the whole
  bounded-context fits on one screen.
 - **`SubtitleKnowledgeBase` types `language_registry` against the
  `LanguageRepository` port** instead of the concrete `LanguageRegistry`
  class. The default constructor still instantiates the concrete adapter
@@ -0,0 +1,267 @@
 """Media — file-level track types (video/audio/subtitle) and MediaInfo container.
 These are the **container-view** dataclasses, populated from ffprobe output and
 used across the project to describe the content of a media file.
 Not to be confused with ``alfred.domain.subtitles.entities.SubtitleCandidate``
 which models a subtitle being **scanned/matched** (with confidence, raw tokens,
 file path, etc.). The two coexist by design — they describe the same real-world
 concept seen from two different bounded contexts.
 """
 from __future__ import annotations
 from dataclasses import dataclass, field
 from .value_objects import Language
 __all__ = [
    "AudioTrack",
    "MediaInfo",
    "MediaWithTracks",
    "SubtitleTrack",
    "VideoTrack",
    "track_lang_matches",
 ]
 # ─────────────────────────────────────────────────────────────────────────────
 # Track types — one frozen dataclass per stream kind
 # ─────────────────────────────────────────────────────────────────────────────
@dataclass(frozen=True)
 class AudioTrack:
    """A single audio track as reported by ffprobe."""
    index: int
    codec: str | None  # aac, ac3, eac3, dts, truehd, flac, …
    channels: int | None  # 2, 6 (5.1), 8 (7.1), …
    channel_layout: str | None  # stereo, 5.1, 7.1, …
    language: str | None  # ISO 639-2: fre, eng, und, …
    is_default: bool = False
@dataclass(frozen=True)
 class SubtitleTrack:
    """A single embedded subtitle track as reported by ffprobe."""
    index: int
    codec: str | None  # subrip, ass, hdmv_pgs_subtitle, …
    language: str | None  # ISO 639-2: fre, eng, und, …
    is_default: bool = False
    is_forced: bool = False
@dataclass(frozen=True)
 class VideoTrack:
    """A single video track as reported by ffprobe.
    A media file typically has one video track but can have several (alt
    camera angles, attached thumbnail images reported as still-image streams,
    etc.), hence the list[VideoTrack] on MediaInfo.
    """
    index: int
    codec: str | None  # h264, hevc, av1, …
    width: int | None
    height: int | None
    is_default: bool = False
    @property
    def resolution(self) -> str | None:
        """
        Best-effort resolution string: 2160p, 1080p, 720p, …
        Width takes priority over height to handle widescreen/cinema crops
        (e.g. 1920×960 scope → 1080p, not 720p). Falls back to height when
        width is unavailable.
        """
        match (self.width, self.height):
            case (None, None):
                return None
            case (w, h) if w is not None:
                match True:
                    case _ if w >= 3840:
                        return "2160p"
                    case _ if w >= 1920:
                        return "1080p"
                    case _ if w >= 1280:
                        return "720p"
                    case _ if w >= 720:
                        return "576p"
                    case _ if w >= 640:
                        return "480p"
                    case _:
                        return f"{h}p" if h else f"{w}w"
            case (None, h):
                match True:
                    case _ if h >= 2160:
                        return "2160p"
                    case _ if h >= 1080:
                        return "1080p"
                    case _ if h >= 720:
                        return "720p"
                    case _ if h >= 576:
                        return "576p"
                    case _ if h >= 480:
                        return "480p"
                    case _:
                        return f"{h}p"
 # ─────────────────────────────────────────────────────────────────────────────
 # MediaInfo — assembles video/audio/subtitle tracks for a media file
 # ─────────────────────────────────────────────────────────────────────────────
@dataclass(frozen=True)
 class MediaInfo:
    """
    File-level media metadata extracted by ffprobe — immutable snapshot.
    Symmetric design: every stream type is a tuple of typed track objects
    (immutable on purpose — a MediaInfo is a frozen view of one ffprobe run,
    not a mutable collection to append to).
    Backwards-compatible flat accessors (``resolution``, ``width``, …) read
    from the first video track when present.
    """
    video_tracks: tuple[VideoTrack, ...] = field(default_factory=tuple)
    audio_tracks: tuple[AudioTrack, ...] = field(default_factory=tuple)
    subtitle_tracks: tuple[SubtitleTrack, ...] = field(default_factory=tuple)
    # File-level (from ffprobe ``format`` block, not from any single stream)
    duration_seconds: float | None = None
    bitrate_kbps: int | None = None
    # ──────────────────────────────────────────────────────────────────────
    # Video conveniences — read the first video track
    # ──────────────────────────────────────────────────────────────────────
    @property
    def primary_video(self) -> VideoTrack | None:
        return self.video_tracks[0] if self.video_tracks else None
    @property
    def width(self) -> int | None:
        v = self.primary_video
        return v.width if v else None
    @property
    def height(self) -> int | None:
        v = self.primary_video
        return v.height if v else None
    @property
    def video_codec(self) -> str | None:
        v = self.primary_video
        return v.codec if v else None
    @property
    def resolution(self) -> str | None:
        v = self.primary_video
        return v.resolution if v else None
    # ──────────────────────────────────────────────────────────────────────
    # Audio conveniences
    # ──────────────────────────────────────────────────────────────────────
    @property
    def audio_languages(self) -> list[str]:
        """Unique audio languages across all tracks (ISO 639-2)."""
        seen: set[str] = set()
        result: list[str] = []
        for track in self.audio_tracks:
            if track.language and track.language not in seen:
                seen.add(track.language)
                result.append(track.language)
        return result
    @property
    def is_multi_audio(self) -> bool:
        """True if more than one audio language is present."""
        return len(self.audio_languages) > 1
 # ─────────────────────────────────────────────────────────────────────────────
 # Language matching — shared helper + mixin
 # ─────────────────────────────────────────────────────────────────────────────
 def track_lang_matches(track_lang: str | None, query: str | Language) -> bool:
    """
    Match a track's language string against a query (contract "C+").
      * ``Language`` query → matches if the track string is any known
        representation of that Language (delegates to ``Language.matches``).
        Powerful, cross-format mode.
      * ``str`` query → case-insensitive direct comparison against
        ``track_lang``. Simple, no normalization, no registry lookup.
    Callers needing cross-format resolution (``"fr"`` ↔ ``"fre"`` ↔
    ``"french"``) should resolve their string through a ``LanguageRegistry``
    once and pass the resulting ``Language``.
    """
    if track_lang is None:
        return False
    if isinstance(query, Language):
        return query.matches(track_lang)
    if isinstance(query, str):
        return track_lang.lower().strip() == query.lower().strip()
    return False
 class MediaWithTracks:
    """
    Mixin providing audio/subtitle helpers for entities with track collections.
    Hosts must expose two attributes:
    * ``audio_tracks: list[AudioTrack]``
    * ``subtitle_tracks: list[SubtitleTrack]``
    The helpers follow the "C+" matching contract: pass a :class:`Language`
    for cross-format matching, or a ``str`` for case-insensitive comparison.
    """
    # These attributes are provided by the host entity (Movie, Episode, …).
    # Declared here only for type-checkers and to make the contract explicit.
    audio_tracks: list[AudioTrack]
    subtitle_tracks: list[SubtitleTrack]
    # ── Audio helpers ──────────────────────────────────────────────────────
    def has_audio_in(self, lang: str | Language) -> bool:
        """True if at least one audio track is in the given language."""
        return any(track_lang_matches(t.language, lang) for t in self.audio_tracks)
    def audio_languages(self) -> list[str]:
        """Unique audio languages across all tracks, in track order."""
        seen: set[str] = set()
        result: list[str] = []
        for t in self.audio_tracks:
            if t.language and t.language not in seen:
                seen.add(t.language)
                result.append(t.language)
        return result
    # ── Subtitle helpers ───────────────────────────────────────────────────
    def has_subtitles_in(self, lang: str | Language) -> bool:
        """True if at least one subtitle track is in the given language."""
        return any(track_lang_matches(t.language, lang) for t in self.subtitle_tracks)
    def has_forced_subs(self) -> bool:
        """True if at least one subtitle track is flagged as forced."""
        return any(t.is_forced for t in self.subtitle_tracks)
    def subtitle_languages(self) -> list[str]:
        """Unique subtitle languages across all tracks, in track order."""
        seen: set[str] = set()
        result: list[str] = []
        for t in self.subtitle_tracks:
            if t.language and t.language not in seen:
                seen.add(t.language)
                result.append(t.language)
        return result
@@ -1,21 +0,0 @@
 """Media — file-level track types (video/audio/subtitle) and MediaInfo container.
 These are the **container-view** dataclasses, populated from ffprobe output and
 used across the project to describe the content of a media file.
 """
 from .audio import AudioTrack
 from .info import MediaInfo
 from .matching import track_lang_matches
 from .subtitle import SubtitleTrack
 from .tracks_mixin import MediaWithTracks
 from .video import VideoTrack
 __all__ = [
    "AudioTrack",
    "MediaInfo",
    "MediaWithTracks",
    "SubtitleTrack",
    "VideoTrack",
    "track_lang_matches",
 ]
@@ -1,17 +0,0 @@
 """AudioTrack — a single audio stream as reported by ffprobe."""
 from __future__ import annotations
 from dataclasses import dataclass
@dataclass(frozen=True)
 class AudioTrack:
    """A single audio track as reported by ffprobe."""
    index: int
    codec: str | None  # aac, ac3, eac3, dts, truehd, flac, …
    channels: int | None  # 2, 6 (5.1), 8 (7.1), …
    channel_layout: str | None  # stereo, 5.1, 7.1, …
    language: str | None  # ISO 639-2: fre, eng, und, …
    is_default: bool = False
@@ -1,78 +0,0 @@
 """MediaInfo — assembles video, audio and subtitle tracks for a media file."""
 from __future__ import annotations
 from dataclasses import dataclass, field
 from .audio import AudioTrack
 from .subtitle import SubtitleTrack
 from .video import VideoTrack
@dataclass(frozen=True)
 class MediaInfo:
    """
    File-level media metadata extracted by ffprobe — immutable snapshot.
    Symmetric design: every stream type is a tuple of typed track objects
    (immutable on purpose — a MediaInfo is a frozen view of one ffprobe run,
    not a mutable collection to append to).
    Backwards-compatible flat accessors (``resolution``, ``width``, …) read
    from the first video track when present.
    """
    video_tracks: tuple[VideoTrack, ...] = field(default_factory=tuple)
    audio_tracks: tuple[AudioTrack, ...] = field(default_factory=tuple)
    subtitle_tracks: tuple[SubtitleTrack, ...] = field(default_factory=tuple)
    # File-level (from ffprobe ``format`` block, not from any single stream)
    duration_seconds: float | None = None
    bitrate_kbps: int | None = None
    # ──────────────────────────────────────────────────────────────────────
    # Video conveniences — read the first video track
    # ──────────────────────────────────────────────────────────────────────
    @property
    def primary_video(self) -> VideoTrack | None:
        return self.video_tracks[0] if self.video_tracks else None
    @property
    def width(self) -> int | None:
        v = self.primary_video
        return v.width if v else None
    @property
    def height(self) -> int | None:
        v = self.primary_video
        return v.height if v else None
    @property
    def video_codec(self) -> str | None:
        v = self.primary_video
        return v.codec if v else None
    @property
    def resolution(self) -> str | None:
        v = self.primary_video
        return v.resolution if v else None
    # ──────────────────────────────────────────────────────────────────────
    # Audio conveniences
    # ──────────────────────────────────────────────────────────────────────
    @property
    def audio_languages(self) -> list[str]:
        """Unique audio languages across all tracks (ISO 639-2)."""
        seen: set[str] = set()
        result: list[str] = []
        for track in self.audio_tracks:
            if track.language and track.language not in seen:
                seen.add(track.language)
                result.append(track.language)
        return result
    @property
    def is_multi_audio(self) -> bool:
        """True if more than one audio language is present."""
        return len(self.audio_languages) > 1
@@ -1,33 +0,0 @@
 """Language-matching helper shared by media-bearing entities.
 Both ``Episode`` and ``Movie`` carry ``audio_tracks`` / ``subtitle_tracks`` and
 need to answer "do I have audio in language X?". The matching contract is the
 same in both cases — keep it in one place.
 """
 from __future__ import annotations
 from ..value_objects import Language
 def track_lang_matches(track_lang: str | None, query: str | Language) -> bool:
    """
    Match a track's language string against a query (contract "C+").
      * ``Language`` query → matches if the track string is any known
        representation of that Language (delegates to ``Language.matches``).
        Powerful, cross-format mode.
      * ``str`` query → case-insensitive direct comparison against
        ``track_lang``. Simple, no normalization, no registry lookup.
    Callers needing cross-format resolution (``"fr"`` ↔ ``"fre"`` ↔
    ``"french"``) should resolve their string through a ``LanguageRegistry``
    once and pass the resulting ``Language``.
    """
    if track_lang is None:
        return False
    if isinstance(query, Language):
        return query.matches(track_lang)
    if isinstance(query, str):
        return track_lang.lower().strip() == query.lower().strip()
    return False
@@ -1,25 +0,0 @@
 """SubtitleTrack — a single embedded subtitle stream as reported by ffprobe.
 This is the **container-view** representation (ffprobe output) used uniformly
 across the project to describe a subtitle stream embedded in a media file.
 Not to be confused with ``alfred.domain.subtitles.entities.SubtitleCandidate``
 which models a subtitle being **scanned/matched** (with confidence, raw tokens,
 file path, etc.). The two coexist by design — they describe the same real-world
 concept seen from two different bounded contexts.
 """
 from __future__ import annotations
 from dataclasses import dataclass
@dataclass(frozen=True)
 class SubtitleTrack:
    """A single embedded subtitle track as reported by ffprobe."""
    index: int
    codec: str | None  # subrip, ass, hdmv_pgs_subtitle, …
    language: str | None  # ISO 639-2: fre, eng, und, …
    is_default: bool = False
    is_forced: bool = False
@@ -1,77 +0,0 @@
 """Mixin shared by entities that carry audio + subtitle tracks.
 Both ``Movie`` and ``Episode`` carry a ``list[AudioTrack]`` plus a
 ``list[SubtitleTrack]`` and answer the same 5 queries about them (language
 presence, unique languages, forced flag). Keep that behavior in one place so a
 fix in one is a fix in both.
 The mixin is plain Python (no dataclass machinery) so it composes cleanly with
 ``@dataclass`` entities — it only reads ``self.audio_tracks`` and
 ``self.subtitle_tracks`` which the host class provides as fields.
 """
 from __future__ import annotations
 from typing import TYPE_CHECKING
 from ..value_objects import Language
 from .matching import track_lang_matches
 if TYPE_CHECKING:
    from .audio import AudioTrack
    from .subtitle import SubtitleTrack
 class MediaWithTracks:
    """
    Mixin providing audio/subtitle helpers for entities with track collections.
    Hosts must expose two attributes:
    * ``audio_tracks: list[AudioTrack]``
    * ``subtitle_tracks: list[SubtitleTrack]``
    The helpers follow the "C+" matching contract: pass a :class:`Language`
    for cross-format matching, or a ``str`` for case-insensitive comparison.
    """
    # These attributes are provided by the host entity (Movie, Episode, …).
    # Declared here only for type-checkers and to make the contract explicit.
    audio_tracks: list["AudioTrack"]
    subtitle_tracks: list["SubtitleTrack"]
    # ── Audio helpers ──────────────────────────────────────────────────────
    def has_audio_in(self, lang: str | Language) -> bool:
        """True if at least one audio track is in the given language."""
        return any(track_lang_matches(t.language, lang) for t in self.audio_tracks)
    def audio_languages(self) -> list[str]:
        """Unique audio languages across all tracks, in track order."""
        seen: set[str] = set()
        result: list[str] = []
        for t in self.audio_tracks:
            if t.language and t.language not in seen:
                seen.add(t.language)
                result.append(t.language)
        return result
    # ── Subtitle helpers ───────────────────────────────────────────────────
    def has_subtitles_in(self, lang: str | Language) -> bool:
        """True if at least one subtitle track is in the given language."""
        return any(track_lang_matches(t.language, lang) for t in self.subtitle_tracks)
    def has_forced_subs(self) -> bool:
        """True if at least one subtitle track is flagged as forced."""
        return any(t.is_forced for t in self.subtitle_tracks)
    def subtitle_languages(self) -> list[str]:
        """Unique subtitle languages across all tracks, in track order."""
        seen: set[str] = set()
        result: list[str] = []
        for t in self.subtitle_tracks:
            if t.language and t.language not in seen:
                seen.add(t.language)
                result.append(t.language)
        return result
@@ -1,62 +0,0 @@
 """VideoTrack — a single video stream as reported by ffprobe."""
 from __future__ import annotations
 from dataclasses import dataclass
@dataclass(frozen=True)
 class VideoTrack:
    """A single video track as reported by ffprobe.
    A media file typically has one video track but can have several (alt
    camera angles, attached thumbnail images reported as still-image streams,
    etc.), hence the list[VideoTrack] on MediaInfo.
    """
    index: int
    codec: str | None  # h264, hevc, av1, …
    width: int | None
    height: int | None
    is_default: bool = False
    @property
    def resolution(self) -> str | None:
        """
        Best-effort resolution string: 2160p, 1080p, 720p, …
        Width takes priority over height to handle widescreen/cinema crops
        (e.g. 1920×960 scope → 1080p, not 720p). Falls back to height when
        width is unavailable.
        """
        match (self.width, self.height):
            case (None, None):
                return None
            case (w, h) if w is not None:
                match True:
                    case _ if w >= 3840:
                        return "2160p"
                    case _ if w >= 1920:
                        return "1080p"
                    case _ if w >= 1280:
                        return "720p"
                    case _ if w >= 720:
                        return "576p"
                    case _ if w >= 640:
                        return "480p"
                    case _:
                        return f"{h}p" if h else f"{w}w"
            case (None, h):
                match True:
                    case _ if h >= 2160:
                        return "2160p"
                    case _ if h >= 1080:
                        return "1080p"
                    case _ if h >= 720:
                        return "720p"
                    case _ if h >= 576:
                        return "576p"
                    case _ if h >= 480:
                        return "480p"
                    case _:
                        return f"{h}p"