diff --git a/CHANGELOG.md b/CHANGELOG.md index 1bce98a..0405a7a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -57,6 +57,21 @@ callers). ### Changed +- **`ParsedRelease` is now frozen; enrichment passes return new + instances.** The VO was mutable so `detect_media_type` and + `enrich_from_probe` could patch fields in place — a code smell in a + value object whose identity *is* its content. `ParsedRelease` is now + `@dataclass(frozen=True)`; `languages` is a `tuple[str, ...]` + instead of a `list[str]`. `enrich_from_probe` returns a new + `ParsedRelease` via `dataclasses.replace` (only allocates when at + least one field actually changed). `inspect_release` rebinds + `parsed` after both `detect_media_type` (wrapped in `MediaTypeToken` + to satisfy the strict isinstance check that now also runs on + replace) and `enrich_from_probe`. Parser pipeline now packs + `languages` as a tuple in the assemble dict. Callers updated: + `inspect_release`, `testing/recognize_folders_in_downloads.py`, and + the enrichment tests (22 call sites + language assertions switched + to tuple literals). - **`resolve_destination` use cases take `kb` / `prober` as required params; module-level singletons gone.** The four `resolve_{season,episode,movie,series}_destination` use cases now diff --git a/alfred/application/release/enrich_from_probe.py b/alfred/application/release/enrich_from_probe.py index b26eada..15e81c2 100644 --- a/alfred/application/release/enrich_from_probe.py +++ b/alfred/application/release/enrich_from_probe.py @@ -2,6 +2,8 @@ from __future__ import annotations +from dataclasses import replace + from alfred.domain.release.ports import ReleaseKnowledge from alfred.domain.release.value_objects import ParsedRelease from alfred.domain.shared.media import MediaInfo @@ -9,12 +11,13 @@ from alfred.domain.shared.media import MediaInfo def enrich_from_probe( parsed: ParsedRelease, info: MediaInfo, kb: ReleaseKnowledge -) -> None: +) -> ParsedRelease: """ - Fill None fields in parsed using data from ffprobe MediaInfo. + Return a new ParsedRelease with None fields filled from ffprobe MediaInfo. Only overwrites fields that are currently None — token-level values - from the release name always take priority. Mutates parsed in place. + from the release name always take priority. ``ParsedRelease`` is + frozen; this returns a new instance via :func:`dataclasses.replace`. Translation tables (ffprobe codec name → scene token, channel count → layout) live in ``kb.probe_mappings`` (loaded from @@ -27,17 +30,17 @@ def enrich_from_probe( audio_codec_map: dict[str, str] = mappings.get("audio_codec", {}) channel_map: dict[int, str] = mappings.get("audio_channels", {}) + updates: dict[str, object] = {} + if parsed.quality is None and info.resolution: - parsed.quality = info.resolution + updates["quality"] = info.resolution if parsed.codec is None and info.video_codec: - parsed.codec = video_codec_map.get( + updates["codec"] = video_codec_map.get( info.video_codec.lower(), info.video_codec.upper() ) - if parsed.bit_depth is None and info.video_codec: - # ffprobe exposes bit depth via pix_fmt — not in MediaInfo yet, skip for now - pass + # bit_depth: ffprobe exposes it via pix_fmt — not in MediaInfo yet, skip. # Audio — use the default track, fallback to first default_track = next((t for t in info.audio_tracks if t.is_default), None) @@ -45,23 +48,27 @@ def enrich_from_probe( if track: if parsed.audio_codec is None and track.codec: - parsed.audio_codec = audio_codec_map.get( + updates["audio_codec"] = audio_codec_map.get( track.codec.lower(), track.codec.upper() ) if parsed.audio_channels is None and track.channels: - parsed.audio_channels = channel_map.get( + updates["audio_channels"] = channel_map.get( track.channels, f"{track.channels}ch" ) # Languages — merge ffprobe languages with token-level ones # "und" = undetermined, not useful if info.audio_languages: - existing = set(parsed.languages) + existing_upper = {lang.upper() for lang in parsed.languages} + new_languages = list(parsed.languages) for lang in info.audio_languages: - if lang.lower() != "und" and lang.upper() not in existing: - parsed.languages.append(lang) + if lang.lower() != "und" and lang.upper() not in existing_upper: + new_languages.append(lang) + existing_upper.add(lang.upper()) + if len(new_languages) != len(parsed.languages): + updates["languages"] = tuple(new_languages) - # tech_string is a derived property on ParsedRelease — it always - # reflects the current quality/source/codec, so there's nothing to - # refresh after enrichment. + if not updates: + return parsed + return replace(parsed, **updates) diff --git a/alfred/application/release/inspect.py b/alfred/application/release/inspect.py index bb758cd..ea33b4d 100644 --- a/alfred/application/release/inspect.py +++ b/alfred/application/release/inspect.py @@ -45,7 +45,7 @@ Design notes: from __future__ import annotations -from dataclasses import dataclass +from dataclasses import dataclass, replace from pathlib import Path from alfred.application.release.detect_media_type import detect_media_type @@ -53,7 +53,11 @@ from alfred.application.release.enrich_from_probe import enrich_from_probe from alfred.application.release.supported_media import find_main_video from alfred.domain.release.ports import ReleaseKnowledge from alfred.domain.release.services import parse_release -from alfred.domain.release.value_objects import ParsedRelease, ParseReport +from alfred.domain.release.value_objects import ( + MediaTypeToken, + ParsedRelease, + ParseReport, +) from alfred.domain.shared.media import MediaInfo from alfred.domain.shared.ports import MediaProber @@ -115,8 +119,11 @@ def inspect_release( # Step 2: refine media_type from the on-disk extension mix. # detect_media_type tolerates non-existent paths (returns parsed.media_type - # untouched), so no need to guard here. - parsed.media_type = detect_media_type(parsed, source_path, kb) + # untouched), so no need to guard here. ParsedRelease is frozen — use + # dataclasses.replace to rebind with the refined value. + refined_media_type = MediaTypeToken(detect_media_type(parsed, source_path, kb)) + if refined_media_type != parsed.media_type: + parsed = replace(parsed, media_type=refined_media_type) # Step 3: pick the canonical main video (top-level scan only). main_video = find_main_video(source_path, kb) @@ -127,7 +134,7 @@ def inspect_release( if main_video is not None and parsed.media_type not in _NON_PROBABLE_MEDIA_TYPES: media_info = prober.probe(main_video) if media_info is not None: - enrich_from_probe(parsed, media_info, kb) + parsed = enrich_from_probe(parsed, media_info, kb) probe_used = True return InspectedResult( diff --git a/alfred/domain/release/parser/pipeline.py b/alfred/domain/release/parser/pipeline.py index be69729..84ebcd2 100644 --- a/alfred/domain/release/parser/pipeline.py +++ b/alfred/domain/release/parser/pipeline.py @@ -753,7 +753,7 @@ def assemble( "group": group, "media_type": media_type, "site_tag": site_tag, - "languages": languages, + "languages": tuple(languages), "audio_codec": audio_codec, "audio_channels": audio_channels, "bit_depth": bit_depth, diff --git a/alfred/domain/release/value_objects.py b/alfred/domain/release/value_objects.py index 243ecdc..8692bc6 100644 --- a/alfred/domain/release/value_objects.py +++ b/alfred/domain/release/value_objects.py @@ -15,7 +15,7 @@ calling ``kb.sanitize_for_fs(tmdb_title)`` before invoking the builders. from __future__ import annotations -from dataclasses import dataclass, field +from dataclasses import dataclass from enum import Enum from ..shared.exceptions import ValidationError @@ -114,13 +114,17 @@ class ParseReport: ) -@dataclass +@dataclass(frozen=True) class ParsedRelease: """Structured representation of a parsed release name. ``title_sanitized`` carries the filesystem-safe form of ``title`` (computed by the parser at construction time using the injected knowledge base). Builder methods rely on it being already-sanitized — see module docstring. + + Frozen: enrichment passes (``detect_media_type``, ``enrich_from_probe``) + return a **new** ``ParsedRelease`` via ``dataclasses.replace`` rather + than mutating in place. ``languages`` is a tuple for the same reason. """ raw: str # original release name (untouched) @@ -140,7 +144,7 @@ class ParsedRelease: None # site watermark stripped from name, e.g. "TGx", "OxTorrent.vc" ) parse_path: TokenizationRoute = TokenizationRoute.DIRECT - languages: list[str] = field(default_factory=list) # ["MULTI", "VFF"], ["FRENCH"], … + languages: tuple[str, ...] = () # ("MULTI", "VFF"), ("FRENCH",), … audio_codec: str | None = None # "DTS-HD.MA", "DDP", "EAC3", … audio_channels: str | None = None # "5.1", "7.1", "2.0", … bit_depth: str | None = None # "10bit", "8bit", … diff --git a/testing/recognize_folders_in_downloads.py b/testing/recognize_folders_in_downloads.py index 459cf29..3069e05 100644 --- a/testing/recognize_folders_in_downloads.py +++ b/testing/recognize_folders_in_downloads.py @@ -100,9 +100,12 @@ def main() -> None: print(c(f"Error: {downloads} does not exist", RED), file=sys.stderr) sys.exit(1) + from dataclasses import replace + from alfred.application.release.detect_media_type import detect_media_type from alfred.application.release.enrich_from_probe import enrich_from_probe from alfred.domain.release.services import parse_release + from alfred.domain.release.value_objects import MediaTypeToken from alfred.infrastructure.filesystem.find_video import find_video_file from alfred.infrastructure.knowledge.release_kb import YamlReleaseKnowledge from alfred.infrastructure.probe import FfprobeMediaProber @@ -126,13 +129,13 @@ def main() -> None: try: p, _report = parse_release(name, _kb) - p.media_type = detect_media_type(p, entry, _kb) + p = replace(p, media_type=MediaTypeToken(detect_media_type(p, entry, _kb))) if p.media_type not in ("unknown", "other"): video_file = find_video_file(entry) if video_file: media_info = _prober.probe(video_file) if media_info: - enrich_from_probe(p, media_info, _kb) + p = enrich_from_probe(p, media_info, _kb) warnings = _assess(p) except Exception as e: warnings = [f"parse error: {e}"] diff --git a/tests/application/test_enrich_from_probe.py b/tests/application/test_enrich_from_probe.py index 349d0e8..88a5376 100644 --- a/tests/application/test_enrich_from_probe.py +++ b/tests/application/test_enrich_from_probe.py @@ -1,8 +1,8 @@ """Tests for ``alfred.application.release.enrich_from_probe``. -The function mutates a ``ParsedRelease`` in place using ffprobe ``MediaInfo``. -Token-level values from the release name always win — only ``None`` fields -are filled. +The function returns a new ``ParsedRelease`` with ``None`` fields filled +from ffprobe ``MediaInfo``. Token-level values from the release name +always win — only ``None`` fields are filled. Coverage: @@ -62,17 +62,17 @@ def _bare(**overrides) -> ParsedRelease: class TestQuality: def test_fills_when_none(self): p = _bare() - enrich_from_probe(p, _info_with_video(width=1920, height=1080), _KB) + p = enrich_from_probe(p, _info_with_video(width=1920, height=1080), _KB) assert p.quality == "1080p" def test_does_not_overwrite_existing(self): p = _bare(quality="2160p") - enrich_from_probe(p, _info_with_video(width=1920, height=1080), _KB) + p = enrich_from_probe(p, _info_with_video(width=1920, height=1080), _KB) assert p.quality == "2160p" def test_no_dims_leaves_none(self): p = _bare() - enrich_from_probe(p, MediaInfo(), _KB) + p = enrich_from_probe(p, MediaInfo(), _KB) assert p.quality is None @@ -84,27 +84,27 @@ class TestQuality: class TestVideoCodec: def test_hevc_to_x265(self): p = _bare() - enrich_from_probe(p, _info_with_video(codec="hevc"), _KB) + p = enrich_from_probe(p, _info_with_video(codec="hevc"), _KB) assert p.codec == "x265" def test_h264_to_x264(self): p = _bare() - enrich_from_probe(p, _info_with_video(codec="h264"), _KB) + p = enrich_from_probe(p, _info_with_video(codec="h264"), _KB) assert p.codec == "x264" def test_unknown_codec_uppercased(self): p = _bare() - enrich_from_probe(p, _info_with_video(codec="weird"), _KB) + p = enrich_from_probe(p, _info_with_video(codec="weird"), _KB) assert p.codec == "WEIRD" def test_does_not_overwrite_existing(self): p = _bare(codec="HEVC") - enrich_from_probe(p, _info_with_video(codec="h264"), _KB) + p = enrich_from_probe(p, _info_with_video(codec="h264"), _KB) assert p.codec == "HEVC" def test_no_codec_leaves_none(self): p = _bare() - enrich_from_probe(p, MediaInfo(), _KB) + p = enrich_from_probe(p, MediaInfo(), _KB) assert p.codec is None @@ -122,7 +122,7 @@ class TestAudio: ] ) p = _bare() - enrich_from_probe(p, info, _KB) + p = enrich_from_probe(p, info, _KB) assert p.audio_codec == "EAC3" assert p.audio_channels == "5.1" @@ -134,32 +134,32 @@ class TestAudio: ] ) p = _bare() - enrich_from_probe(p, info, _KB) + p = enrich_from_probe(p, info, _KB) assert p.audio_codec == "AC3" assert p.audio_channels == "5.1" def test_channel_count_unknown_falls_back(self): info = MediaInfo(audio_tracks=[AudioTrack(0, "aac", 4, "quad", "eng")]) p = _bare() - enrich_from_probe(p, info, _KB) + p = enrich_from_probe(p, info, _KB) assert p.audio_channels == "4ch" def test_unknown_audio_codec_uppercased(self): info = MediaInfo(audio_tracks=[AudioTrack(0, "newcodec", 2, "stereo", "eng")]) p = _bare() - enrich_from_probe(p, info, _KB) + p = enrich_from_probe(p, info, _KB) assert p.audio_codec == "NEWCODEC" def test_no_audio_tracks(self): p = _bare() - enrich_from_probe(p, MediaInfo(), _KB) + p = enrich_from_probe(p, MediaInfo(), _KB) assert p.audio_codec is None assert p.audio_channels is None def test_does_not_overwrite_existing_audio_fields(self): info = MediaInfo(audio_tracks=[AudioTrack(0, "ac3", 6, "5.1", "eng")]) p = _bare(audio_codec="DTS-HD.MA", audio_channels="7.1") - enrich_from_probe(p, info, _KB) + p = enrich_from_probe(p, info, _KB) assert p.audio_codec == "DTS-HD.MA" assert p.audio_channels == "7.1" @@ -178,8 +178,8 @@ class TestLanguages: ] ) p = _bare() - enrich_from_probe(p, info, _KB) - assert p.languages == ["eng", "fre"] + p = enrich_from_probe(p, info, _KB) + assert p.languages == ("eng", "fre") def test_skips_und(self): info = MediaInfo( @@ -189,8 +189,8 @@ class TestLanguages: ] ) p = _bare() - enrich_from_probe(p, info, _KB) - assert p.languages == ["eng"] + p = enrich_from_probe(p, info, _KB) + assert p.languages == ("eng",) def test_dedup_against_existing_case_insensitive(self): # existing token-level languages are typically upper-case ("FRENCH", "ENG") @@ -202,16 +202,15 @@ class TestLanguages: AudioTrack(1, "aac", 2, "stereo", "fre"), ] ) - p = _bare() - p.languages = ["ENG"] - enrich_from_probe(p, info, _KB) + p = _bare(languages=("ENG",)) + p = enrich_from_probe(p, info, _KB) # "eng" → upper "ENG" already present → skipped. "fre" → "FRE" new → kept. - assert p.languages == ["ENG", "fre"] + assert p.languages == ("ENG", "fre") def test_no_audio_tracks_leaves_languages_empty(self): p = _bare() - enrich_from_probe(p, MediaInfo(), _KB) - assert p.languages == [] + p = enrich_from_probe(p, MediaInfo(), _KB) + assert p.languages == () # --------------------------------------------------------------------------- # @@ -226,7 +225,7 @@ class TestTechString: def test_rebuilt_from_filled_quality_and_codec(self): p = _bare() - enrich_from_probe( + p = enrich_from_probe( p, _info_with_video(width=1920, height=1080, codec="hevc"), _KB ) assert p.quality == "1080p" @@ -236,7 +235,7 @@ class TestTechString: def test_keeps_existing_source_when_enriching(self): # Token-level source must stay; probe fills only None fields. p = _bare(source="BluRay") - enrich_from_probe( + p = enrich_from_probe( p, _info_with_video(width=1920, height=1080, codec="hevc"), _KB ) assert p.tech_string == "1080p.BluRay.x265" @@ -245,10 +244,10 @@ class TestTechString: # No video info → nothing to fill → derived tech_string stays as it was. p = _bare(quality="2160p", source="WEB-DL", codec="x265") assert p.tech_string == "2160p.WEB-DL.x265" - enrich_from_probe(p, MediaInfo(), _KB) + p = enrich_from_probe(p, MediaInfo(), _KB) assert p.tech_string == "2160p.WEB-DL.x265" def test_empty_when_nothing_known(self): p = _bare() - enrich_from_probe(p, MediaInfo(), _KB) + p = enrich_from_probe(p, MediaInfo(), _KB) assert p.tech_string == "" diff --git a/tests/domain/release/test_parser_v2_easy.py b/tests/domain/release/test_parser_v2_easy.py index 027f276..9721c5e 100644 --- a/tests/domain/release/test_parser_v2_easy.py +++ b/tests/domain/release/test_parser_v2_easy.py @@ -198,7 +198,7 @@ class TestEnrichers: assert annotated is not None fields = assemble(annotated, tag, name, _KB) - assert fields["languages"] == ["FRENCH", "MULTI"] + assert fields["languages"] == ("FRENCH", "MULTI") assert fields["audio_codec"] == "DTS-HD.MA" assert fields["audio_channels"] == "5.1" @@ -212,5 +212,5 @@ class TestEnrichers: assert fields["title"] == "Show" assert fields["season"] == 1 assert fields["episode"] == 5 - assert fields["languages"] == ["FRENCH"] + assert fields["languages"] == ("FRENCH",) assert fields["media_type"] == "tv_show" diff --git a/tests/domain/test_release.py b/tests/domain/test_release.py index 733c960..b09e235 100644 --- a/tests/domain/test_release.py +++ b/tests/domain/test_release.py @@ -264,10 +264,10 @@ class TestParsedReleaseInvariants: r = _parse(raw) assert r.raw == raw - def test_languages_defaults_to_empty_list_not_none(self): + def test_languages_defaults_to_empty_tuple_not_none(self): r = _parse("Movie.2020.1080p.BluRay.x264-GRP") - # __post_init__ ensures languages is a list, never None - assert r.languages == [] + # ``languages`` defaults to an empty tuple (frozen VO). + assert r.languages == () def test_tech_string_joined(self): r = _parse("Movie.2020.1080p.BluRay.x264-GRP") diff --git a/tests/domain/test_release_fixtures.py b/tests/domain/test_release_fixtures.py index 424850a..9c410c2 100644 --- a/tests/domain/test_release_fixtures.py +++ b/tests/domain/test_release_fixtures.py @@ -48,6 +48,9 @@ def test_parse_matches_fixture(fixture: ReleaseFixture, tmp_path) -> None: # ``asdict()`` does not include them. result["is_season_pack"] = parsed.is_season_pack result["tech_string"] = parsed.tech_string + # ``languages`` is a tuple on the VO; fixtures encode it as a YAML list. + # Compare list-to-list so the equality is unambiguous. + result["languages"] = list(result.get("languages", ())) for field, expected in fixture.expected_parsed.items(): assert field in result, (