refactor(release): freeze ParsedRelease + enrich_from_probe returns new instance

ParsedRelease is now @dataclass(frozen=True). The enrichment passes that
used to patch fields in place now produce new instances:

- enrich_from_probe(parsed, info, kb) returns a new ParsedRelease via
  dataclasses.replace (no allocation when no field changed).
- inspect_release rebinds 'parsed' after detect_media_type (wrapped in
  MediaTypeToken — the strict isinstance check now also runs on
  replace) and after enrich_from_probe.

languages becomes a tuple[str, ...] so the VO is properly immutable.
Parser pipeline packs languages as a tuple in the assemble dict.

Callers updated: inspect_release, testing/recognize_folders_in_downloads.py.
Tests updated: 22 enrich_from_probe call sites rebound, language
assertions switched to tuple literals, test_release_fixtures normalizes
result['languages'] back to list for YAML-fixture comparison.

Suite: 1077 passed.
This commit is contained in:
2026-05-21 07:51:49 +02:00
parent 9f1ce94690
commit b7979c0f8b
10 changed files with 101 additions and 63 deletions
+15
View File
@@ -57,6 +57,21 @@ callers).
### Changed
- **`ParsedRelease` is now frozen; enrichment passes return new
instances.** The VO was mutable so `detect_media_type` and
`enrich_from_probe` could patch fields in place — a code smell in a
value object whose identity *is* its content. `ParsedRelease` is now
`@dataclass(frozen=True)`; `languages` is a `tuple[str, ...]`
instead of a `list[str]`. `enrich_from_probe` returns a new
`ParsedRelease` via `dataclasses.replace` (only allocates when at
least one field actually changed). `inspect_release` rebinds
`parsed` after both `detect_media_type` (wrapped in `MediaTypeToken`
to satisfy the strict isinstance check that now also runs on
replace) and `enrich_from_probe`. Parser pipeline now packs
`languages` as a tuple in the assemble dict. Callers updated:
`inspect_release`, `testing/recognize_folders_in_downloads.py`, and
the enrichment tests (22 call sites + language assertions switched
to tuple literals).
- **`resolve_destination` use cases take `kb` / `prober` as required
params; module-level singletons gone.** The four
`resolve_{season,episode,movie,series}_destination` use cases now
+23 -16
View File
@@ -2,6 +2,8 @@
from __future__ import annotations
from dataclasses import replace
from alfred.domain.release.ports import ReleaseKnowledge
from alfred.domain.release.value_objects import ParsedRelease
from alfred.domain.shared.media import MediaInfo
@@ -9,12 +11,13 @@ from alfred.domain.shared.media import MediaInfo
def enrich_from_probe(
parsed: ParsedRelease, info: MediaInfo, kb: ReleaseKnowledge
) -> None:
) -> ParsedRelease:
"""
Fill None fields in parsed using data from ffprobe MediaInfo.
Return a new ParsedRelease with None fields filled from ffprobe MediaInfo.
Only overwrites fields that are currently None — token-level values
from the release name always take priority. Mutates parsed in place.
from the release name always take priority. ``ParsedRelease`` is
frozen; this returns a new instance via :func:`dataclasses.replace`.
Translation tables (ffprobe codec name → scene token, channel count
→ layout) live in ``kb.probe_mappings`` (loaded from
@@ -27,17 +30,17 @@ def enrich_from_probe(
audio_codec_map: dict[str, str] = mappings.get("audio_codec", {})
channel_map: dict[int, str] = mappings.get("audio_channels", {})
updates: dict[str, object] = {}
if parsed.quality is None and info.resolution:
parsed.quality = info.resolution
updates["quality"] = info.resolution
if parsed.codec is None and info.video_codec:
parsed.codec = video_codec_map.get(
updates["codec"] = video_codec_map.get(
info.video_codec.lower(), info.video_codec.upper()
)
if parsed.bit_depth is None and info.video_codec:
# ffprobe exposes bit depth via pix_fmt — not in MediaInfo yet, skip for now
pass
# bit_depth: ffprobe exposes it via pix_fmt — not in MediaInfo yet, skip.
# Audio — use the default track, fallback to first
default_track = next((t for t in info.audio_tracks if t.is_default), None)
@@ -45,23 +48,27 @@ def enrich_from_probe(
if track:
if parsed.audio_codec is None and track.codec:
parsed.audio_codec = audio_codec_map.get(
updates["audio_codec"] = audio_codec_map.get(
track.codec.lower(), track.codec.upper()
)
if parsed.audio_channels is None and track.channels:
parsed.audio_channels = channel_map.get(
updates["audio_channels"] = channel_map.get(
track.channels, f"{track.channels}ch"
)
# Languages — merge ffprobe languages with token-level ones
# "und" = undetermined, not useful
if info.audio_languages:
existing = set(parsed.languages)
existing_upper = {lang.upper() for lang in parsed.languages}
new_languages = list(parsed.languages)
for lang in info.audio_languages:
if lang.lower() != "und" and lang.upper() not in existing:
parsed.languages.append(lang)
if lang.lower() != "und" and lang.upper() not in existing_upper:
new_languages.append(lang)
existing_upper.add(lang.upper())
if len(new_languages) != len(parsed.languages):
updates["languages"] = tuple(new_languages)
# tech_string is a derived property on ParsedRelease — it always
# reflects the current quality/source/codec, so there's nothing to
# refresh after enrichment.
if not updates:
return parsed
return replace(parsed, **updates)
+12 -5
View File
@@ -45,7 +45,7 @@ Design notes:
from __future__ import annotations
from dataclasses import dataclass
from dataclasses import dataclass, replace
from pathlib import Path
from alfred.application.release.detect_media_type import detect_media_type
@@ -53,7 +53,11 @@ from alfred.application.release.enrich_from_probe import enrich_from_probe
from alfred.application.release.supported_media import find_main_video
from alfred.domain.release.ports import ReleaseKnowledge
from alfred.domain.release.services import parse_release
from alfred.domain.release.value_objects import ParsedRelease, ParseReport
from alfred.domain.release.value_objects import (
MediaTypeToken,
ParsedRelease,
ParseReport,
)
from alfred.domain.shared.media import MediaInfo
from alfred.domain.shared.ports import MediaProber
@@ -115,8 +119,11 @@ def inspect_release(
# Step 2: refine media_type from the on-disk extension mix.
# detect_media_type tolerates non-existent paths (returns parsed.media_type
# untouched), so no need to guard here.
parsed.media_type = detect_media_type(parsed, source_path, kb)
# untouched), so no need to guard here. ParsedRelease is frozen — use
# dataclasses.replace to rebind with the refined value.
refined_media_type = MediaTypeToken(detect_media_type(parsed, source_path, kb))
if refined_media_type != parsed.media_type:
parsed = replace(parsed, media_type=refined_media_type)
# Step 3: pick the canonical main video (top-level scan only).
main_video = find_main_video(source_path, kb)
@@ -127,7 +134,7 @@ def inspect_release(
if main_video is not None and parsed.media_type not in _NON_PROBABLE_MEDIA_TYPES:
media_info = prober.probe(main_video)
if media_info is not None:
enrich_from_probe(parsed, media_info, kb)
parsed = enrich_from_probe(parsed, media_info, kb)
probe_used = True
return InspectedResult(
+1 -1
View File
@@ -753,7 +753,7 @@ def assemble(
"group": group,
"media_type": media_type,
"site_tag": site_tag,
"languages": languages,
"languages": tuple(languages),
"audio_codec": audio_codec,
"audio_channels": audio_channels,
"bit_depth": bit_depth,
+7 -3
View File
@@ -15,7 +15,7 @@ calling ``kb.sanitize_for_fs(tmdb_title)`` before invoking the builders.
from __future__ import annotations
from dataclasses import dataclass, field
from dataclasses import dataclass
from enum import Enum
from ..shared.exceptions import ValidationError
@@ -114,13 +114,17 @@ class ParseReport:
)
@dataclass
@dataclass(frozen=True)
class ParsedRelease:
"""Structured representation of a parsed release name.
``title_sanitized`` carries the filesystem-safe form of ``title`` (computed
by the parser at construction time using the injected knowledge base).
Builder methods rely on it being already-sanitized — see module docstring.
Frozen: enrichment passes (``detect_media_type``, ``enrich_from_probe``)
return a **new** ``ParsedRelease`` via ``dataclasses.replace`` rather
than mutating in place. ``languages`` is a tuple for the same reason.
"""
raw: str # original release name (untouched)
@@ -140,7 +144,7 @@ class ParsedRelease:
None # site watermark stripped from name, e.g. "TGx", "OxTorrent.vc"
)
parse_path: TokenizationRoute = TokenizationRoute.DIRECT
languages: list[str] = field(default_factory=list) # ["MULTI", "VFF"], ["FRENCH"], …
languages: tuple[str, ...] = () # ("MULTI", "VFF"), ("FRENCH",), …
audio_codec: str | None = None # "DTS-HD.MA", "DDP", "EAC3", …
audio_channels: str | None = None # "5.1", "7.1", "2.0", …
bit_depth: str | None = None # "10bit", "8bit", …
+5 -2
View File
@@ -100,9 +100,12 @@ def main() -> None:
print(c(f"Error: {downloads} does not exist", RED), file=sys.stderr)
sys.exit(1)
from dataclasses import replace
from alfred.application.release.detect_media_type import detect_media_type
from alfred.application.release.enrich_from_probe import enrich_from_probe
from alfred.domain.release.services import parse_release
from alfred.domain.release.value_objects import MediaTypeToken
from alfred.infrastructure.filesystem.find_video import find_video_file
from alfred.infrastructure.knowledge.release_kb import YamlReleaseKnowledge
from alfred.infrastructure.probe import FfprobeMediaProber
@@ -126,13 +129,13 @@ def main() -> None:
try:
p, _report = parse_release(name, _kb)
p.media_type = detect_media_type(p, entry, _kb)
p = replace(p, media_type=MediaTypeToken(detect_media_type(p, entry, _kb)))
if p.media_type not in ("unknown", "other"):
video_file = find_video_file(entry)
if video_file:
media_info = _prober.probe(video_file)
if media_info:
enrich_from_probe(p, media_info, _kb)
p = enrich_from_probe(p, media_info, _kb)
warnings = _assess(p)
except Exception as e:
warnings = [f"parse error: {e}"]
+30 -31
View File
@@ -1,8 +1,8 @@
"""Tests for ``alfred.application.release.enrich_from_probe``.
The function mutates a ``ParsedRelease`` in place using ffprobe ``MediaInfo``.
Token-level values from the release name always win — only ``None`` fields
are filled.
The function returns a new ``ParsedRelease`` with ``None`` fields filled
from ffprobe ``MediaInfo``. Token-level values from the release name
always win — only ``None`` fields are filled.
Coverage:
@@ -62,17 +62,17 @@ def _bare(**overrides) -> ParsedRelease:
class TestQuality:
def test_fills_when_none(self):
p = _bare()
enrich_from_probe(p, _info_with_video(width=1920, height=1080), _KB)
p = enrich_from_probe(p, _info_with_video(width=1920, height=1080), _KB)
assert p.quality == "1080p"
def test_does_not_overwrite_existing(self):
p = _bare(quality="2160p")
enrich_from_probe(p, _info_with_video(width=1920, height=1080), _KB)
p = enrich_from_probe(p, _info_with_video(width=1920, height=1080), _KB)
assert p.quality == "2160p"
def test_no_dims_leaves_none(self):
p = _bare()
enrich_from_probe(p, MediaInfo(), _KB)
p = enrich_from_probe(p, MediaInfo(), _KB)
assert p.quality is None
@@ -84,27 +84,27 @@ class TestQuality:
class TestVideoCodec:
def test_hevc_to_x265(self):
p = _bare()
enrich_from_probe(p, _info_with_video(codec="hevc"), _KB)
p = enrich_from_probe(p, _info_with_video(codec="hevc"), _KB)
assert p.codec == "x265"
def test_h264_to_x264(self):
p = _bare()
enrich_from_probe(p, _info_with_video(codec="h264"), _KB)
p = enrich_from_probe(p, _info_with_video(codec="h264"), _KB)
assert p.codec == "x264"
def test_unknown_codec_uppercased(self):
p = _bare()
enrich_from_probe(p, _info_with_video(codec="weird"), _KB)
p = enrich_from_probe(p, _info_with_video(codec="weird"), _KB)
assert p.codec == "WEIRD"
def test_does_not_overwrite_existing(self):
p = _bare(codec="HEVC")
enrich_from_probe(p, _info_with_video(codec="h264"), _KB)
p = enrich_from_probe(p, _info_with_video(codec="h264"), _KB)
assert p.codec == "HEVC"
def test_no_codec_leaves_none(self):
p = _bare()
enrich_from_probe(p, MediaInfo(), _KB)
p = enrich_from_probe(p, MediaInfo(), _KB)
assert p.codec is None
@@ -122,7 +122,7 @@ class TestAudio:
]
)
p = _bare()
enrich_from_probe(p, info, _KB)
p = enrich_from_probe(p, info, _KB)
assert p.audio_codec == "EAC3"
assert p.audio_channels == "5.1"
@@ -134,32 +134,32 @@ class TestAudio:
]
)
p = _bare()
enrich_from_probe(p, info, _KB)
p = enrich_from_probe(p, info, _KB)
assert p.audio_codec == "AC3"
assert p.audio_channels == "5.1"
def test_channel_count_unknown_falls_back(self):
info = MediaInfo(audio_tracks=[AudioTrack(0, "aac", 4, "quad", "eng")])
p = _bare()
enrich_from_probe(p, info, _KB)
p = enrich_from_probe(p, info, _KB)
assert p.audio_channels == "4ch"
def test_unknown_audio_codec_uppercased(self):
info = MediaInfo(audio_tracks=[AudioTrack(0, "newcodec", 2, "stereo", "eng")])
p = _bare()
enrich_from_probe(p, info, _KB)
p = enrich_from_probe(p, info, _KB)
assert p.audio_codec == "NEWCODEC"
def test_no_audio_tracks(self):
p = _bare()
enrich_from_probe(p, MediaInfo(), _KB)
p = enrich_from_probe(p, MediaInfo(), _KB)
assert p.audio_codec is None
assert p.audio_channels is None
def test_does_not_overwrite_existing_audio_fields(self):
info = MediaInfo(audio_tracks=[AudioTrack(0, "ac3", 6, "5.1", "eng")])
p = _bare(audio_codec="DTS-HD.MA", audio_channels="7.1")
enrich_from_probe(p, info, _KB)
p = enrich_from_probe(p, info, _KB)
assert p.audio_codec == "DTS-HD.MA"
assert p.audio_channels == "7.1"
@@ -178,8 +178,8 @@ class TestLanguages:
]
)
p = _bare()
enrich_from_probe(p, info, _KB)
assert p.languages == ["eng", "fre"]
p = enrich_from_probe(p, info, _KB)
assert p.languages == ("eng", "fre")
def test_skips_und(self):
info = MediaInfo(
@@ -189,8 +189,8 @@ class TestLanguages:
]
)
p = _bare()
enrich_from_probe(p, info, _KB)
assert p.languages == ["eng"]
p = enrich_from_probe(p, info, _KB)
assert p.languages == ("eng",)
def test_dedup_against_existing_case_insensitive(self):
# existing token-level languages are typically upper-case ("FRENCH", "ENG")
@@ -202,16 +202,15 @@ class TestLanguages:
AudioTrack(1, "aac", 2, "stereo", "fre"),
]
)
p = _bare()
p.languages = ["ENG"]
enrich_from_probe(p, info, _KB)
p = _bare(languages=("ENG",))
p = enrich_from_probe(p, info, _KB)
# "eng" → upper "ENG" already present → skipped. "fre" → "FRE" new → kept.
assert p.languages == ["ENG", "fre"]
assert p.languages == ("ENG", "fre")
def test_no_audio_tracks_leaves_languages_empty(self):
p = _bare()
enrich_from_probe(p, MediaInfo(), _KB)
assert p.languages == []
p = enrich_from_probe(p, MediaInfo(), _KB)
assert p.languages == ()
# --------------------------------------------------------------------------- #
@@ -226,7 +225,7 @@ class TestTechString:
def test_rebuilt_from_filled_quality_and_codec(self):
p = _bare()
enrich_from_probe(
p = enrich_from_probe(
p, _info_with_video(width=1920, height=1080, codec="hevc"), _KB
)
assert p.quality == "1080p"
@@ -236,7 +235,7 @@ class TestTechString:
def test_keeps_existing_source_when_enriching(self):
# Token-level source must stay; probe fills only None fields.
p = _bare(source="BluRay")
enrich_from_probe(
p = enrich_from_probe(
p, _info_with_video(width=1920, height=1080, codec="hevc"), _KB
)
assert p.tech_string == "1080p.BluRay.x265"
@@ -245,10 +244,10 @@ class TestTechString:
# No video info → nothing to fill → derived tech_string stays as it was.
p = _bare(quality="2160p", source="WEB-DL", codec="x265")
assert p.tech_string == "2160p.WEB-DL.x265"
enrich_from_probe(p, MediaInfo(), _KB)
p = enrich_from_probe(p, MediaInfo(), _KB)
assert p.tech_string == "2160p.WEB-DL.x265"
def test_empty_when_nothing_known(self):
p = _bare()
enrich_from_probe(p, MediaInfo(), _KB)
p = enrich_from_probe(p, MediaInfo(), _KB)
assert p.tech_string == ""
+2 -2
View File
@@ -198,7 +198,7 @@ class TestEnrichers:
assert annotated is not None
fields = assemble(annotated, tag, name, _KB)
assert fields["languages"] == ["FRENCH", "MULTI"]
assert fields["languages"] == ("FRENCH", "MULTI")
assert fields["audio_codec"] == "DTS-HD.MA"
assert fields["audio_channels"] == "5.1"
@@ -212,5 +212,5 @@ class TestEnrichers:
assert fields["title"] == "Show"
assert fields["season"] == 1
assert fields["episode"] == 5
assert fields["languages"] == ["FRENCH"]
assert fields["languages"] == ("FRENCH",)
assert fields["media_type"] == "tv_show"
+3 -3
View File
@@ -264,10 +264,10 @@ class TestParsedReleaseInvariants:
r = _parse(raw)
assert r.raw == raw
def test_languages_defaults_to_empty_list_not_none(self):
def test_languages_defaults_to_empty_tuple_not_none(self):
r = _parse("Movie.2020.1080p.BluRay.x264-GRP")
# __post_init__ ensures languages is a list, never None
assert r.languages == []
# ``languages`` defaults to an empty tuple (frozen VO).
assert r.languages == ()
def test_tech_string_joined(self):
r = _parse("Movie.2020.1080p.BluRay.x264-GRP")
+3
View File
@@ -48,6 +48,9 @@ def test_parse_matches_fixture(fixture: ReleaseFixture, tmp_path) -> None:
# ``asdict()`` does not include them.
result["is_season_pack"] = parsed.is_season_pack
result["tech_string"] = parsed.tech_string
# ``languages`` is a tuple on the VO; fixtures encode it as a YAML list.
# Compare list-to-list so the equality is unambiguous.
result["languages"] = list(result.get("languages", ()))
for field, expected in fixture.expected_parsed.items():
assert field in result, (