refactor(release): freeze ParsedRelease + enrich_from_probe returns new instance

ParsedRelease is now @dataclass(frozen=True). The enrichment passes that used to patch fields in place now produce new instances: - enrich_from_probe(parsed, info, kb) returns a new ParsedRelease via dataclasses.replace (no allocation when no field changed). - inspect_release rebinds 'parsed' after detect_media_type (wrapped in MediaTypeToken — the strict isinstance check now also runs on replace) and after enrich_from_probe. languages becomes a tuple[str, ...] so the VO is properly immutable. Parser pipeline packs languages as a tuple in the assemble dict. Callers updated: inspect_release, testing/recognize_folders_in_downloads.py. Tests updated: 22 enrich_from_probe call sites rebound, language assertions switched to tuple literals, test_release_fixtures normalizes result['languages'] back to list for YAML-fixture comparison. Suite: 1077 passed.
2026-05-21 07:51:49 +02:00
parent 9f1ce94690
commit b7979c0f8b
10 changed files with 101 additions and 63 deletions
@@ -57,6 +57,21 @@ callers).

 ### Changed

+- **`ParsedRelease` is now frozen; enrichment passes return new
+  instances.** The VO was mutable so `detect_media_type` and
+  `enrich_from_probe` could patch fields in place — a code smell in a
+  value object whose identity *is* its content. `ParsedRelease` is now
+  `@dataclass(frozen=True)`; `languages` is a `tuple[str, ...]`
+  instead of a `list[str]`. `enrich_from_probe` returns a new
+  `ParsedRelease` via `dataclasses.replace` (only allocates when at
+  least one field actually changed). `inspect_release` rebinds
+  `parsed` after both `detect_media_type` (wrapped in `MediaTypeToken`
+  to satisfy the strict isinstance check that now also runs on
+  replace) and `enrich_from_probe`. Parser pipeline now packs
+  `languages` as a tuple in the assemble dict. Callers updated:
+  `inspect_release`, `testing/recognize_folders_in_downloads.py`, and
+  the enrichment tests (22 call sites + language assertions switched
+  to tuple literals).
 - **`resolve_destination` use cases take `kb` / `prober` as required
  params; module-level singletons gone.** The four
  `resolve_{season,episode,movie,series}_destination` use cases now
@@ -2,6 +2,8 @@

 from __future__ import annotations

+from dataclasses import replace
+
 from alfred.domain.release.ports import ReleaseKnowledge
 from alfred.domain.release.value_objects import ParsedRelease
 from alfred.domain.shared.media import MediaInfo
@@ -9,12 +11,13 @@ from alfred.domain.shared.media import MediaInfo

 def enrich_from_probe(
    parsed: ParsedRelease, info: MediaInfo, kb: ReleaseKnowledge
-) -> None:
+) -> ParsedRelease:
    """
-    Fill None fields in parsed using data from ffprobe MediaInfo.
+    Return a new ParsedRelease with None fields filled from ffprobe MediaInfo.

    Only overwrites fields that are currently None — token-level values
-    from the release name always take priority. Mutates parsed in place.
+    from the release name always take priority. ``ParsedRelease`` is
+    frozen; this returns a new instance via :func:`dataclasses.replace`.

    Translation tables (ffprobe codec name → scene token, channel count
    → layout) live in ``kb.probe_mappings`` (loaded from
@@ -27,17 +30,17 @@ def enrich_from_probe(
    audio_codec_map: dict[str, str] = mappings.get("audio_codec", {})
    channel_map: dict[int, str] = mappings.get("audio_channels", {})

+    updates: dict[str, object] = {}
+
    if parsed.quality is None and info.resolution:
-        parsed.quality = info.resolution
+        updates["quality"] = info.resolution

    if parsed.codec is None and info.video_codec:
-        parsed.codec = video_codec_map.get(
+        updates["codec"] = video_codec_map.get(
            info.video_codec.lower(), info.video_codec.upper()
        )

-    if parsed.bit_depth is None and info.video_codec:
-        # ffprobe exposes bit depth via pix_fmt — not in MediaInfo yet, skip for now
-        pass
+    # bit_depth: ffprobe exposes it via pix_fmt — not in MediaInfo yet, skip.

    # Audio — use the default track, fallback to first
    default_track = next((t for t in info.audio_tracks if t.is_default), None)
@@ -45,23 +48,27 @@ def enrich_from_probe(

    if track:
        if parsed.audio_codec is None and track.codec:
-            parsed.audio_codec = audio_codec_map.get(
+            updates["audio_codec"] = audio_codec_map.get(
                track.codec.lower(), track.codec.upper()
            )

        if parsed.audio_channels is None and track.channels:
-            parsed.audio_channels = channel_map.get(
+            updates["audio_channels"] = channel_map.get(
                track.channels, f"{track.channels}ch"
            )

    # Languages — merge ffprobe languages with token-level ones
    # "und" = undetermined, not useful
    if info.audio_languages:
-        existing = set(parsed.languages)
+        existing_upper = {lang.upper() for lang in parsed.languages}
+        new_languages = list(parsed.languages)
        for lang in info.audio_languages:
-            if lang.lower() != "und" and lang.upper() not in existing:
-                parsed.languages.append(lang)
+            if lang.lower() != "und" and lang.upper() not in existing_upper:
+                new_languages.append(lang)
+                existing_upper.add(lang.upper())
+        if len(new_languages) != len(parsed.languages):
+            updates["languages"] = tuple(new_languages)

-    # tech_string is a derived property on ParsedRelease — it always
-    # reflects the current quality/source/codec, so there's nothing to
-    # refresh after enrichment.
+    if not updates:
+        return parsed
+    return replace(parsed, **updates)
@@ -45,7 +45,7 @@ Design notes:

 from __future__ import annotations

-from dataclasses import dataclass
+from dataclasses import dataclass, replace
 from pathlib import Path

 from alfred.application.release.detect_media_type import detect_media_type
@@ -53,7 +53,11 @@ from alfred.application.release.enrich_from_probe import enrich_from_probe
 from alfred.application.release.supported_media import find_main_video
 from alfred.domain.release.ports import ReleaseKnowledge
 from alfred.domain.release.services import parse_release
-from alfred.domain.release.value_objects import ParsedRelease, ParseReport
+from alfred.domain.release.value_objects import (
+    MediaTypeToken,
+    ParsedRelease,
+    ParseReport,
+)
 from alfred.domain.shared.media import MediaInfo
 from alfred.domain.shared.ports import MediaProber

@@ -115,8 +119,11 @@ def inspect_release(

    # Step 2: refine media_type from the on-disk extension mix.
    # detect_media_type tolerates non-existent paths (returns parsed.media_type
-    # untouched), so no need to guard here.
-    parsed.media_type = detect_media_type(parsed, source_path, kb)
+    # untouched), so no need to guard here. ParsedRelease is frozen — use
+    # dataclasses.replace to rebind with the refined value.
+    refined_media_type = MediaTypeToken(detect_media_type(parsed, source_path, kb))
+    if refined_media_type != parsed.media_type:
+        parsed = replace(parsed, media_type=refined_media_type)

    # Step 3: pick the canonical main video (top-level scan only).
    main_video = find_main_video(source_path, kb)
@@ -127,7 +134,7 @@ def inspect_release(
    if main_video is not None and parsed.media_type not in _NON_PROBABLE_MEDIA_TYPES:
        media_info = prober.probe(main_video)
        if media_info is not None:
-            enrich_from_probe(parsed, media_info, kb)
+            parsed = enrich_from_probe(parsed, media_info, kb)
            probe_used = True

    return InspectedResult(
@@ -753,7 +753,7 @@ def assemble(
        "group": group,
        "media_type": media_type,
        "site_tag": site_tag,
-        "languages": languages,
+        "languages": tuple(languages),
        "audio_codec": audio_codec,
        "audio_channels": audio_channels,
        "bit_depth": bit_depth,
@@ -15,7 +15,7 @@ calling ``kb.sanitize_for_fs(tmdb_title)`` before invoking the builders.

 from __future__ import annotations

-from dataclasses import dataclass, field
+from dataclasses import dataclass
 from enum import Enum

 from ..shared.exceptions import ValidationError
@@ -114,13 +114,17 @@ class ParseReport:
            )


-@dataclass
+@dataclass(frozen=True)
 class ParsedRelease:
    """Structured representation of a parsed release name.

    ``title_sanitized`` carries the filesystem-safe form of ``title`` (computed
    by the parser at construction time using the injected knowledge base).
    Builder methods rely on it being already-sanitized — see module docstring.
+
+    Frozen: enrichment passes (``detect_media_type``, ``enrich_from_probe``)
+    return a **new** ``ParsedRelease`` via ``dataclasses.replace`` rather
+    than mutating in place. ``languages`` is a tuple for the same reason.
    """

    raw: str  # original release name (untouched)
@@ -140,7 +144,7 @@ class ParsedRelease:
        None  # site watermark stripped from name, e.g. "TGx", "OxTorrent.vc"
    )
    parse_path: TokenizationRoute = TokenizationRoute.DIRECT
-    languages: list[str] = field(default_factory=list)  # ["MULTI", "VFF"], ["FRENCH"], …
+    languages: tuple[str, ...] = ()  # ("MULTI", "VFF"), ("FRENCH",), …
    audio_codec: str | None = None  # "DTS-HD.MA", "DDP", "EAC3", …
    audio_channels: str | None = None  # "5.1", "7.1", "2.0", …
    bit_depth: str | None = None  # "10bit", "8bit", …
@@ -100,9 +100,12 @@ def main() -> None:
        print(c(f"Error: {downloads} does not exist", RED), file=sys.stderr)
        sys.exit(1)

+    from dataclasses import replace
+
    from alfred.application.release.detect_media_type import detect_media_type
    from alfred.application.release.enrich_from_probe import enrich_from_probe
    from alfred.domain.release.services import parse_release
+    from alfred.domain.release.value_objects import MediaTypeToken
    from alfred.infrastructure.filesystem.find_video import find_video_file
    from alfred.infrastructure.knowledge.release_kb import YamlReleaseKnowledge
    from alfred.infrastructure.probe import FfprobeMediaProber
@@ -126,13 +129,13 @@ def main() -> None:

        try:
            p, _report = parse_release(name, _kb)
-            p.media_type = detect_media_type(p, entry, _kb)
+            p = replace(p, media_type=MediaTypeToken(detect_media_type(p, entry, _kb)))
            if p.media_type not in ("unknown", "other"):
                video_file = find_video_file(entry)
                if video_file:
                    media_info = _prober.probe(video_file)
                    if media_info:
-                        enrich_from_probe(p, media_info, _kb)
+                        p = enrich_from_probe(p, media_info, _kb)
            warnings = _assess(p)
        except Exception as e:
            warnings = [f"parse error: {e}"]
@@ -1,8 +1,8 @@
 """Tests for ``alfred.application.release.enrich_from_probe``.

-The function mutates a ``ParsedRelease`` in place using ffprobe ``MediaInfo``.
-Token-level values from the release name always win — only ``None`` fields
-are filled.
+The function returns a new ``ParsedRelease`` with ``None`` fields filled
+from ffprobe ``MediaInfo``. Token-level values from the release name
+always win — only ``None`` fields are filled.

 Coverage:

@@ -62,17 +62,17 @@ def _bare(**overrides) -> ParsedRelease:
 class TestQuality:
    def test_fills_when_none(self):
        p = _bare()
-        enrich_from_probe(p, _info_with_video(width=1920, height=1080), _KB)
+        p = enrich_from_probe(p, _info_with_video(width=1920, height=1080), _KB)
        assert p.quality == "1080p"

    def test_does_not_overwrite_existing(self):
        p = _bare(quality="2160p")
-        enrich_from_probe(p, _info_with_video(width=1920, height=1080), _KB)
+        p = enrich_from_probe(p, _info_with_video(width=1920, height=1080), _KB)
        assert p.quality == "2160p"

    def test_no_dims_leaves_none(self):
        p = _bare()
-        enrich_from_probe(p, MediaInfo(), _KB)
+        p = enrich_from_probe(p, MediaInfo(), _KB)
        assert p.quality is None


@@ -84,27 +84,27 @@ class TestQuality:
 class TestVideoCodec:
    def test_hevc_to_x265(self):
        p = _bare()
-        enrich_from_probe(p, _info_with_video(codec="hevc"), _KB)
+        p = enrich_from_probe(p, _info_with_video(codec="hevc"), _KB)
        assert p.codec == "x265"

    def test_h264_to_x264(self):
        p = _bare()
-        enrich_from_probe(p, _info_with_video(codec="h264"), _KB)
+        p = enrich_from_probe(p, _info_with_video(codec="h264"), _KB)
        assert p.codec == "x264"

    def test_unknown_codec_uppercased(self):
        p = _bare()
-        enrich_from_probe(p, _info_with_video(codec="weird"), _KB)
+        p = enrich_from_probe(p, _info_with_video(codec="weird"), _KB)
        assert p.codec == "WEIRD"

    def test_does_not_overwrite_existing(self):
        p = _bare(codec="HEVC")
-        enrich_from_probe(p, _info_with_video(codec="h264"), _KB)
+        p = enrich_from_probe(p, _info_with_video(codec="h264"), _KB)
        assert p.codec == "HEVC"

    def test_no_codec_leaves_none(self):
        p = _bare()
-        enrich_from_probe(p, MediaInfo(), _KB)
+        p = enrich_from_probe(p, MediaInfo(), _KB)
        assert p.codec is None


@@ -122,7 +122,7 @@ class TestAudio:
            ]
        )
        p = _bare()
-        enrich_from_probe(p, info, _KB)
+        p = enrich_from_probe(p, info, _KB)
        assert p.audio_codec == "EAC3"
        assert p.audio_channels == "5.1"

@@ -134,32 +134,32 @@ class TestAudio:
            ]
        )
        p = _bare()
-        enrich_from_probe(p, info, _KB)
+        p = enrich_from_probe(p, info, _KB)
        assert p.audio_codec == "AC3"
        assert p.audio_channels == "5.1"

    def test_channel_count_unknown_falls_back(self):
        info = MediaInfo(audio_tracks=[AudioTrack(0, "aac", 4, "quad", "eng")])
        p = _bare()
-        enrich_from_probe(p, info, _KB)
+        p = enrich_from_probe(p, info, _KB)
        assert p.audio_channels == "4ch"

    def test_unknown_audio_codec_uppercased(self):
        info = MediaInfo(audio_tracks=[AudioTrack(0, "newcodec", 2, "stereo", "eng")])
        p = _bare()
-        enrich_from_probe(p, info, _KB)
+        p = enrich_from_probe(p, info, _KB)
        assert p.audio_codec == "NEWCODEC"

    def test_no_audio_tracks(self):
        p = _bare()
-        enrich_from_probe(p, MediaInfo(), _KB)
+        p = enrich_from_probe(p, MediaInfo(), _KB)
        assert p.audio_codec is None
        assert p.audio_channels is None

    def test_does_not_overwrite_existing_audio_fields(self):
        info = MediaInfo(audio_tracks=[AudioTrack(0, "ac3", 6, "5.1", "eng")])
        p = _bare(audio_codec="DTS-HD.MA", audio_channels="7.1")
-        enrich_from_probe(p, info, _KB)
+        p = enrich_from_probe(p, info, _KB)
        assert p.audio_codec == "DTS-HD.MA"
        assert p.audio_channels == "7.1"

@@ -178,8 +178,8 @@ class TestLanguages:
            ]
        )
        p = _bare()
-        enrich_from_probe(p, info, _KB)
-        assert p.languages == ["eng", "fre"]
+        p = enrich_from_probe(p, info, _KB)
+        assert p.languages == ("eng", "fre")

    def test_skips_und(self):
        info = MediaInfo(
@@ -189,8 +189,8 @@ class TestLanguages:
            ]
        )
        p = _bare()
-        enrich_from_probe(p, info, _KB)
-        assert p.languages == ["eng"]
+        p = enrich_from_probe(p, info, _KB)
+        assert p.languages == ("eng",)

    def test_dedup_against_existing_case_insensitive(self):
        # existing token-level languages are typically upper-case ("FRENCH", "ENG")
@@ -202,16 +202,15 @@ class TestLanguages:
                AudioTrack(1, "aac", 2, "stereo", "fre"),
            ]
        )
-        p = _bare()
-        p.languages = ["ENG"]
-        enrich_from_probe(p, info, _KB)
+        p = _bare(languages=("ENG",))
+        p = enrich_from_probe(p, info, _KB)
        # "eng" → upper "ENG" already present → skipped. "fre" → "FRE" new → kept.
-        assert p.languages == ["ENG", "fre"]
+        assert p.languages == ("ENG", "fre")

    def test_no_audio_tracks_leaves_languages_empty(self):
        p = _bare()
-        enrich_from_probe(p, MediaInfo(), _KB)
-        assert p.languages == []
+        p = enrich_from_probe(p, MediaInfo(), _KB)
+        assert p.languages == ()


 # --------------------------------------------------------------------------- #
@@ -226,7 +225,7 @@ class TestTechString:

    def test_rebuilt_from_filled_quality_and_codec(self):
        p = _bare()
-        enrich_from_probe(
+        p = enrich_from_probe(
            p, _info_with_video(width=1920, height=1080, codec="hevc"), _KB
        )
        assert p.quality == "1080p"
@@ -236,7 +235,7 @@ class TestTechString:
    def test_keeps_existing_source_when_enriching(self):
        # Token-level source must stay; probe fills only None fields.
        p = _bare(source="BluRay")
-        enrich_from_probe(
+        p = enrich_from_probe(
            p, _info_with_video(width=1920, height=1080, codec="hevc"), _KB
        )
        assert p.tech_string == "1080p.BluRay.x265"
@@ -245,10 +244,10 @@ class TestTechString:
        # No video info → nothing to fill → derived tech_string stays as it was.
        p = _bare(quality="2160p", source="WEB-DL", codec="x265")
        assert p.tech_string == "2160p.WEB-DL.x265"
-        enrich_from_probe(p, MediaInfo(), _KB)
+        p = enrich_from_probe(p, MediaInfo(), _KB)
        assert p.tech_string == "2160p.WEB-DL.x265"

    def test_empty_when_nothing_known(self):
        p = _bare()
-        enrich_from_probe(p, MediaInfo(), _KB)
+        p = enrich_from_probe(p, MediaInfo(), _KB)
        assert p.tech_string == ""
@@ -198,7 +198,7 @@ class TestEnrichers:
        assert annotated is not None
        fields = assemble(annotated, tag, name, _KB)

-        assert fields["languages"] == ["FRENCH", "MULTI"]
+        assert fields["languages"] == ("FRENCH", "MULTI")
        assert fields["audio_codec"] == "DTS-HD.MA"
        assert fields["audio_channels"] == "5.1"

@@ -212,5 +212,5 @@ class TestEnrichers:
        assert fields["title"] == "Show"
        assert fields["season"] == 1
        assert fields["episode"] == 5
-        assert fields["languages"] == ["FRENCH"]
+        assert fields["languages"] == ("FRENCH",)
        assert fields["media_type"] == "tv_show"
@@ -264,10 +264,10 @@ class TestParsedReleaseInvariants:
        r = _parse(raw)
        assert r.raw == raw

-    def test_languages_defaults_to_empty_list_not_none(self):
+    def test_languages_defaults_to_empty_tuple_not_none(self):
        r = _parse("Movie.2020.1080p.BluRay.x264-GRP")
-        # __post_init__ ensures languages is a list, never None
-        assert r.languages == []
+        # ``languages`` defaults to an empty tuple (frozen VO).
+        assert r.languages == ()

    def test_tech_string_joined(self):
        r = _parse("Movie.2020.1080p.BluRay.x264-GRP")
@@ -48,6 +48,9 @@ def test_parse_matches_fixture(fixture: ReleaseFixture, tmp_path) -> None:
    # ``asdict()`` does not include them.
    result["is_season_pack"] = parsed.is_season_pack
    result["tech_string"] = parsed.tech_string
+    # ``languages`` is a tuple on the VO; fixtures encode it as a YAML list.
+    # Compare list-to-list so the equality is unambiguous.
+    result["languages"] = list(result.get("languages", ()))

    for field, expected in fixture.expected_parsed.items():
        assert field in result, (