refactor(release): freeze ParsedRelease + enrich_from_probe returns new instance
ParsedRelease is now @dataclass(frozen=True). The enrichment passes that used to patch fields in place now produce new instances: - enrich_from_probe(parsed, info, kb) returns a new ParsedRelease via dataclasses.replace (no allocation when no field changed). - inspect_release rebinds 'parsed' after detect_media_type (wrapped in MediaTypeToken — the strict isinstance check now also runs on replace) and after enrich_from_probe. languages becomes a tuple[str, ...] so the VO is properly immutable. Parser pipeline packs languages as a tuple in the assemble dict. Callers updated: inspect_release, testing/recognize_folders_in_downloads.py. Tests updated: 22 enrich_from_probe call sites rebound, language assertions switched to tuple literals, test_release_fixtures normalizes result['languages'] back to list for YAML-fixture comparison. Suite: 1077 passed.
This commit is contained in:
@@ -57,6 +57,21 @@ callers).
|
||||
|
||||
### Changed
|
||||
|
||||
- **`ParsedRelease` is now frozen; enrichment passes return new
|
||||
instances.** The VO was mutable so `detect_media_type` and
|
||||
`enrich_from_probe` could patch fields in place — a code smell in a
|
||||
value object whose identity *is* its content. `ParsedRelease` is now
|
||||
`@dataclass(frozen=True)`; `languages` is a `tuple[str, ...]`
|
||||
instead of a `list[str]`. `enrich_from_probe` returns a new
|
||||
`ParsedRelease` via `dataclasses.replace` (only allocates when at
|
||||
least one field actually changed). `inspect_release` rebinds
|
||||
`parsed` after both `detect_media_type` (wrapped in `MediaTypeToken`
|
||||
to satisfy the strict isinstance check that now also runs on
|
||||
replace) and `enrich_from_probe`. Parser pipeline now packs
|
||||
`languages` as a tuple in the assemble dict. Callers updated:
|
||||
`inspect_release`, `testing/recognize_folders_in_downloads.py`, and
|
||||
the enrichment tests (22 call sites + language assertions switched
|
||||
to tuple literals).
|
||||
- **`resolve_destination` use cases take `kb` / `prober` as required
|
||||
params; module-level singletons gone.** The four
|
||||
`resolve_{season,episode,movie,series}_destination` use cases now
|
||||
|
||||
@@ -2,6 +2,8 @@
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import replace
|
||||
|
||||
from alfred.domain.release.ports import ReleaseKnowledge
|
||||
from alfred.domain.release.value_objects import ParsedRelease
|
||||
from alfred.domain.shared.media import MediaInfo
|
||||
@@ -9,12 +11,13 @@ from alfred.domain.shared.media import MediaInfo
|
||||
|
||||
def enrich_from_probe(
|
||||
parsed: ParsedRelease, info: MediaInfo, kb: ReleaseKnowledge
|
||||
) -> None:
|
||||
) -> ParsedRelease:
|
||||
"""
|
||||
Fill None fields in parsed using data from ffprobe MediaInfo.
|
||||
Return a new ParsedRelease with None fields filled from ffprobe MediaInfo.
|
||||
|
||||
Only overwrites fields that are currently None — token-level values
|
||||
from the release name always take priority. Mutates parsed in place.
|
||||
from the release name always take priority. ``ParsedRelease`` is
|
||||
frozen; this returns a new instance via :func:`dataclasses.replace`.
|
||||
|
||||
Translation tables (ffprobe codec name → scene token, channel count
|
||||
→ layout) live in ``kb.probe_mappings`` (loaded from
|
||||
@@ -27,17 +30,17 @@ def enrich_from_probe(
|
||||
audio_codec_map: dict[str, str] = mappings.get("audio_codec", {})
|
||||
channel_map: dict[int, str] = mappings.get("audio_channels", {})
|
||||
|
||||
updates: dict[str, object] = {}
|
||||
|
||||
if parsed.quality is None and info.resolution:
|
||||
parsed.quality = info.resolution
|
||||
updates["quality"] = info.resolution
|
||||
|
||||
if parsed.codec is None and info.video_codec:
|
||||
parsed.codec = video_codec_map.get(
|
||||
updates["codec"] = video_codec_map.get(
|
||||
info.video_codec.lower(), info.video_codec.upper()
|
||||
)
|
||||
|
||||
if parsed.bit_depth is None and info.video_codec:
|
||||
# ffprobe exposes bit depth via pix_fmt — not in MediaInfo yet, skip for now
|
||||
pass
|
||||
# bit_depth: ffprobe exposes it via pix_fmt — not in MediaInfo yet, skip.
|
||||
|
||||
# Audio — use the default track, fallback to first
|
||||
default_track = next((t for t in info.audio_tracks if t.is_default), None)
|
||||
@@ -45,23 +48,27 @@ def enrich_from_probe(
|
||||
|
||||
if track:
|
||||
if parsed.audio_codec is None and track.codec:
|
||||
parsed.audio_codec = audio_codec_map.get(
|
||||
updates["audio_codec"] = audio_codec_map.get(
|
||||
track.codec.lower(), track.codec.upper()
|
||||
)
|
||||
|
||||
if parsed.audio_channels is None and track.channels:
|
||||
parsed.audio_channels = channel_map.get(
|
||||
updates["audio_channels"] = channel_map.get(
|
||||
track.channels, f"{track.channels}ch"
|
||||
)
|
||||
|
||||
# Languages — merge ffprobe languages with token-level ones
|
||||
# "und" = undetermined, not useful
|
||||
if info.audio_languages:
|
||||
existing = set(parsed.languages)
|
||||
existing_upper = {lang.upper() for lang in parsed.languages}
|
||||
new_languages = list(parsed.languages)
|
||||
for lang in info.audio_languages:
|
||||
if lang.lower() != "und" and lang.upper() not in existing:
|
||||
parsed.languages.append(lang)
|
||||
if lang.lower() != "und" and lang.upper() not in existing_upper:
|
||||
new_languages.append(lang)
|
||||
existing_upper.add(lang.upper())
|
||||
if len(new_languages) != len(parsed.languages):
|
||||
updates["languages"] = tuple(new_languages)
|
||||
|
||||
# tech_string is a derived property on ParsedRelease — it always
|
||||
# reflects the current quality/source/codec, so there's nothing to
|
||||
# refresh after enrichment.
|
||||
if not updates:
|
||||
return parsed
|
||||
return replace(parsed, **updates)
|
||||
|
||||
@@ -45,7 +45,7 @@ Design notes:
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
from dataclasses import dataclass, replace
|
||||
from pathlib import Path
|
||||
|
||||
from alfred.application.release.detect_media_type import detect_media_type
|
||||
@@ -53,7 +53,11 @@ from alfred.application.release.enrich_from_probe import enrich_from_probe
|
||||
from alfred.application.release.supported_media import find_main_video
|
||||
from alfred.domain.release.ports import ReleaseKnowledge
|
||||
from alfred.domain.release.services import parse_release
|
||||
from alfred.domain.release.value_objects import ParsedRelease, ParseReport
|
||||
from alfred.domain.release.value_objects import (
|
||||
MediaTypeToken,
|
||||
ParsedRelease,
|
||||
ParseReport,
|
||||
)
|
||||
from alfred.domain.shared.media import MediaInfo
|
||||
from alfred.domain.shared.ports import MediaProber
|
||||
|
||||
@@ -115,8 +119,11 @@ def inspect_release(
|
||||
|
||||
# Step 2: refine media_type from the on-disk extension mix.
|
||||
# detect_media_type tolerates non-existent paths (returns parsed.media_type
|
||||
# untouched), so no need to guard here.
|
||||
parsed.media_type = detect_media_type(parsed, source_path, kb)
|
||||
# untouched), so no need to guard here. ParsedRelease is frozen — use
|
||||
# dataclasses.replace to rebind with the refined value.
|
||||
refined_media_type = MediaTypeToken(detect_media_type(parsed, source_path, kb))
|
||||
if refined_media_type != parsed.media_type:
|
||||
parsed = replace(parsed, media_type=refined_media_type)
|
||||
|
||||
# Step 3: pick the canonical main video (top-level scan only).
|
||||
main_video = find_main_video(source_path, kb)
|
||||
@@ -127,7 +134,7 @@ def inspect_release(
|
||||
if main_video is not None and parsed.media_type not in _NON_PROBABLE_MEDIA_TYPES:
|
||||
media_info = prober.probe(main_video)
|
||||
if media_info is not None:
|
||||
enrich_from_probe(parsed, media_info, kb)
|
||||
parsed = enrich_from_probe(parsed, media_info, kb)
|
||||
probe_used = True
|
||||
|
||||
return InspectedResult(
|
||||
|
||||
@@ -753,7 +753,7 @@ def assemble(
|
||||
"group": group,
|
||||
"media_type": media_type,
|
||||
"site_tag": site_tag,
|
||||
"languages": languages,
|
||||
"languages": tuple(languages),
|
||||
"audio_codec": audio_codec,
|
||||
"audio_channels": audio_channels,
|
||||
"bit_depth": bit_depth,
|
||||
|
||||
@@ -15,7 +15,7 @@ calling ``kb.sanitize_for_fs(tmdb_title)`` before invoking the builders.
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from dataclasses import dataclass
|
||||
from enum import Enum
|
||||
|
||||
from ..shared.exceptions import ValidationError
|
||||
@@ -114,13 +114,17 @@ class ParseReport:
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
@dataclass(frozen=True)
|
||||
class ParsedRelease:
|
||||
"""Structured representation of a parsed release name.
|
||||
|
||||
``title_sanitized`` carries the filesystem-safe form of ``title`` (computed
|
||||
by the parser at construction time using the injected knowledge base).
|
||||
Builder methods rely on it being already-sanitized — see module docstring.
|
||||
|
||||
Frozen: enrichment passes (``detect_media_type``, ``enrich_from_probe``)
|
||||
return a **new** ``ParsedRelease`` via ``dataclasses.replace`` rather
|
||||
than mutating in place. ``languages`` is a tuple for the same reason.
|
||||
"""
|
||||
|
||||
raw: str # original release name (untouched)
|
||||
@@ -140,7 +144,7 @@ class ParsedRelease:
|
||||
None # site watermark stripped from name, e.g. "TGx", "OxTorrent.vc"
|
||||
)
|
||||
parse_path: TokenizationRoute = TokenizationRoute.DIRECT
|
||||
languages: list[str] = field(default_factory=list) # ["MULTI", "VFF"], ["FRENCH"], …
|
||||
languages: tuple[str, ...] = () # ("MULTI", "VFF"), ("FRENCH",), …
|
||||
audio_codec: str | None = None # "DTS-HD.MA", "DDP", "EAC3", …
|
||||
audio_channels: str | None = None # "5.1", "7.1", "2.0", …
|
||||
bit_depth: str | None = None # "10bit", "8bit", …
|
||||
|
||||
@@ -100,9 +100,12 @@ def main() -> None:
|
||||
print(c(f"Error: {downloads} does not exist", RED), file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
from dataclasses import replace
|
||||
|
||||
from alfred.application.release.detect_media_type import detect_media_type
|
||||
from alfred.application.release.enrich_from_probe import enrich_from_probe
|
||||
from alfred.domain.release.services import parse_release
|
||||
from alfred.domain.release.value_objects import MediaTypeToken
|
||||
from alfred.infrastructure.filesystem.find_video import find_video_file
|
||||
from alfred.infrastructure.knowledge.release_kb import YamlReleaseKnowledge
|
||||
from alfred.infrastructure.probe import FfprobeMediaProber
|
||||
@@ -126,13 +129,13 @@ def main() -> None:
|
||||
|
||||
try:
|
||||
p, _report = parse_release(name, _kb)
|
||||
p.media_type = detect_media_type(p, entry, _kb)
|
||||
p = replace(p, media_type=MediaTypeToken(detect_media_type(p, entry, _kb)))
|
||||
if p.media_type not in ("unknown", "other"):
|
||||
video_file = find_video_file(entry)
|
||||
if video_file:
|
||||
media_info = _prober.probe(video_file)
|
||||
if media_info:
|
||||
enrich_from_probe(p, media_info, _kb)
|
||||
p = enrich_from_probe(p, media_info, _kb)
|
||||
warnings = _assess(p)
|
||||
except Exception as e:
|
||||
warnings = [f"parse error: {e}"]
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
"""Tests for ``alfred.application.release.enrich_from_probe``.
|
||||
|
||||
The function mutates a ``ParsedRelease`` in place using ffprobe ``MediaInfo``.
|
||||
Token-level values from the release name always win — only ``None`` fields
|
||||
are filled.
|
||||
The function returns a new ``ParsedRelease`` with ``None`` fields filled
|
||||
from ffprobe ``MediaInfo``. Token-level values from the release name
|
||||
always win — only ``None`` fields are filled.
|
||||
|
||||
Coverage:
|
||||
|
||||
@@ -62,17 +62,17 @@ def _bare(**overrides) -> ParsedRelease:
|
||||
class TestQuality:
|
||||
def test_fills_when_none(self):
|
||||
p = _bare()
|
||||
enrich_from_probe(p, _info_with_video(width=1920, height=1080), _KB)
|
||||
p = enrich_from_probe(p, _info_with_video(width=1920, height=1080), _KB)
|
||||
assert p.quality == "1080p"
|
||||
|
||||
def test_does_not_overwrite_existing(self):
|
||||
p = _bare(quality="2160p")
|
||||
enrich_from_probe(p, _info_with_video(width=1920, height=1080), _KB)
|
||||
p = enrich_from_probe(p, _info_with_video(width=1920, height=1080), _KB)
|
||||
assert p.quality == "2160p"
|
||||
|
||||
def test_no_dims_leaves_none(self):
|
||||
p = _bare()
|
||||
enrich_from_probe(p, MediaInfo(), _KB)
|
||||
p = enrich_from_probe(p, MediaInfo(), _KB)
|
||||
assert p.quality is None
|
||||
|
||||
|
||||
@@ -84,27 +84,27 @@ class TestQuality:
|
||||
class TestVideoCodec:
|
||||
def test_hevc_to_x265(self):
|
||||
p = _bare()
|
||||
enrich_from_probe(p, _info_with_video(codec="hevc"), _KB)
|
||||
p = enrich_from_probe(p, _info_with_video(codec="hevc"), _KB)
|
||||
assert p.codec == "x265"
|
||||
|
||||
def test_h264_to_x264(self):
|
||||
p = _bare()
|
||||
enrich_from_probe(p, _info_with_video(codec="h264"), _KB)
|
||||
p = enrich_from_probe(p, _info_with_video(codec="h264"), _KB)
|
||||
assert p.codec == "x264"
|
||||
|
||||
def test_unknown_codec_uppercased(self):
|
||||
p = _bare()
|
||||
enrich_from_probe(p, _info_with_video(codec="weird"), _KB)
|
||||
p = enrich_from_probe(p, _info_with_video(codec="weird"), _KB)
|
||||
assert p.codec == "WEIRD"
|
||||
|
||||
def test_does_not_overwrite_existing(self):
|
||||
p = _bare(codec="HEVC")
|
||||
enrich_from_probe(p, _info_with_video(codec="h264"), _KB)
|
||||
p = enrich_from_probe(p, _info_with_video(codec="h264"), _KB)
|
||||
assert p.codec == "HEVC"
|
||||
|
||||
def test_no_codec_leaves_none(self):
|
||||
p = _bare()
|
||||
enrich_from_probe(p, MediaInfo(), _KB)
|
||||
p = enrich_from_probe(p, MediaInfo(), _KB)
|
||||
assert p.codec is None
|
||||
|
||||
|
||||
@@ -122,7 +122,7 @@ class TestAudio:
|
||||
]
|
||||
)
|
||||
p = _bare()
|
||||
enrich_from_probe(p, info, _KB)
|
||||
p = enrich_from_probe(p, info, _KB)
|
||||
assert p.audio_codec == "EAC3"
|
||||
assert p.audio_channels == "5.1"
|
||||
|
||||
@@ -134,32 +134,32 @@ class TestAudio:
|
||||
]
|
||||
)
|
||||
p = _bare()
|
||||
enrich_from_probe(p, info, _KB)
|
||||
p = enrich_from_probe(p, info, _KB)
|
||||
assert p.audio_codec == "AC3"
|
||||
assert p.audio_channels == "5.1"
|
||||
|
||||
def test_channel_count_unknown_falls_back(self):
|
||||
info = MediaInfo(audio_tracks=[AudioTrack(0, "aac", 4, "quad", "eng")])
|
||||
p = _bare()
|
||||
enrich_from_probe(p, info, _KB)
|
||||
p = enrich_from_probe(p, info, _KB)
|
||||
assert p.audio_channels == "4ch"
|
||||
|
||||
def test_unknown_audio_codec_uppercased(self):
|
||||
info = MediaInfo(audio_tracks=[AudioTrack(0, "newcodec", 2, "stereo", "eng")])
|
||||
p = _bare()
|
||||
enrich_from_probe(p, info, _KB)
|
||||
p = enrich_from_probe(p, info, _KB)
|
||||
assert p.audio_codec == "NEWCODEC"
|
||||
|
||||
def test_no_audio_tracks(self):
|
||||
p = _bare()
|
||||
enrich_from_probe(p, MediaInfo(), _KB)
|
||||
p = enrich_from_probe(p, MediaInfo(), _KB)
|
||||
assert p.audio_codec is None
|
||||
assert p.audio_channels is None
|
||||
|
||||
def test_does_not_overwrite_existing_audio_fields(self):
|
||||
info = MediaInfo(audio_tracks=[AudioTrack(0, "ac3", 6, "5.1", "eng")])
|
||||
p = _bare(audio_codec="DTS-HD.MA", audio_channels="7.1")
|
||||
enrich_from_probe(p, info, _KB)
|
||||
p = enrich_from_probe(p, info, _KB)
|
||||
assert p.audio_codec == "DTS-HD.MA"
|
||||
assert p.audio_channels == "7.1"
|
||||
|
||||
@@ -178,8 +178,8 @@ class TestLanguages:
|
||||
]
|
||||
)
|
||||
p = _bare()
|
||||
enrich_from_probe(p, info, _KB)
|
||||
assert p.languages == ["eng", "fre"]
|
||||
p = enrich_from_probe(p, info, _KB)
|
||||
assert p.languages == ("eng", "fre")
|
||||
|
||||
def test_skips_und(self):
|
||||
info = MediaInfo(
|
||||
@@ -189,8 +189,8 @@ class TestLanguages:
|
||||
]
|
||||
)
|
||||
p = _bare()
|
||||
enrich_from_probe(p, info, _KB)
|
||||
assert p.languages == ["eng"]
|
||||
p = enrich_from_probe(p, info, _KB)
|
||||
assert p.languages == ("eng",)
|
||||
|
||||
def test_dedup_against_existing_case_insensitive(self):
|
||||
# existing token-level languages are typically upper-case ("FRENCH", "ENG")
|
||||
@@ -202,16 +202,15 @@ class TestLanguages:
|
||||
AudioTrack(1, "aac", 2, "stereo", "fre"),
|
||||
]
|
||||
)
|
||||
p = _bare()
|
||||
p.languages = ["ENG"]
|
||||
enrich_from_probe(p, info, _KB)
|
||||
p = _bare(languages=("ENG",))
|
||||
p = enrich_from_probe(p, info, _KB)
|
||||
# "eng" → upper "ENG" already present → skipped. "fre" → "FRE" new → kept.
|
||||
assert p.languages == ["ENG", "fre"]
|
||||
assert p.languages == ("ENG", "fre")
|
||||
|
||||
def test_no_audio_tracks_leaves_languages_empty(self):
|
||||
p = _bare()
|
||||
enrich_from_probe(p, MediaInfo(), _KB)
|
||||
assert p.languages == []
|
||||
p = enrich_from_probe(p, MediaInfo(), _KB)
|
||||
assert p.languages == ()
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------- #
|
||||
@@ -226,7 +225,7 @@ class TestTechString:
|
||||
|
||||
def test_rebuilt_from_filled_quality_and_codec(self):
|
||||
p = _bare()
|
||||
enrich_from_probe(
|
||||
p = enrich_from_probe(
|
||||
p, _info_with_video(width=1920, height=1080, codec="hevc"), _KB
|
||||
)
|
||||
assert p.quality == "1080p"
|
||||
@@ -236,7 +235,7 @@ class TestTechString:
|
||||
def test_keeps_existing_source_when_enriching(self):
|
||||
# Token-level source must stay; probe fills only None fields.
|
||||
p = _bare(source="BluRay")
|
||||
enrich_from_probe(
|
||||
p = enrich_from_probe(
|
||||
p, _info_with_video(width=1920, height=1080, codec="hevc"), _KB
|
||||
)
|
||||
assert p.tech_string == "1080p.BluRay.x265"
|
||||
@@ -245,10 +244,10 @@ class TestTechString:
|
||||
# No video info → nothing to fill → derived tech_string stays as it was.
|
||||
p = _bare(quality="2160p", source="WEB-DL", codec="x265")
|
||||
assert p.tech_string == "2160p.WEB-DL.x265"
|
||||
enrich_from_probe(p, MediaInfo(), _KB)
|
||||
p = enrich_from_probe(p, MediaInfo(), _KB)
|
||||
assert p.tech_string == "2160p.WEB-DL.x265"
|
||||
|
||||
def test_empty_when_nothing_known(self):
|
||||
p = _bare()
|
||||
enrich_from_probe(p, MediaInfo(), _KB)
|
||||
p = enrich_from_probe(p, MediaInfo(), _KB)
|
||||
assert p.tech_string == ""
|
||||
|
||||
@@ -198,7 +198,7 @@ class TestEnrichers:
|
||||
assert annotated is not None
|
||||
fields = assemble(annotated, tag, name, _KB)
|
||||
|
||||
assert fields["languages"] == ["FRENCH", "MULTI"]
|
||||
assert fields["languages"] == ("FRENCH", "MULTI")
|
||||
assert fields["audio_codec"] == "DTS-HD.MA"
|
||||
assert fields["audio_channels"] == "5.1"
|
||||
|
||||
@@ -212,5 +212,5 @@ class TestEnrichers:
|
||||
assert fields["title"] == "Show"
|
||||
assert fields["season"] == 1
|
||||
assert fields["episode"] == 5
|
||||
assert fields["languages"] == ["FRENCH"]
|
||||
assert fields["languages"] == ("FRENCH",)
|
||||
assert fields["media_type"] == "tv_show"
|
||||
|
||||
@@ -264,10 +264,10 @@ class TestParsedReleaseInvariants:
|
||||
r = _parse(raw)
|
||||
assert r.raw == raw
|
||||
|
||||
def test_languages_defaults_to_empty_list_not_none(self):
|
||||
def test_languages_defaults_to_empty_tuple_not_none(self):
|
||||
r = _parse("Movie.2020.1080p.BluRay.x264-GRP")
|
||||
# __post_init__ ensures languages is a list, never None
|
||||
assert r.languages == []
|
||||
# ``languages`` defaults to an empty tuple (frozen VO).
|
||||
assert r.languages == ()
|
||||
|
||||
def test_tech_string_joined(self):
|
||||
r = _parse("Movie.2020.1080p.BluRay.x264-GRP")
|
||||
|
||||
@@ -48,6 +48,9 @@ def test_parse_matches_fixture(fixture: ReleaseFixture, tmp_path) -> None:
|
||||
# ``asdict()`` does not include them.
|
||||
result["is_season_pack"] = parsed.is_season_pack
|
||||
result["tech_string"] = parsed.tech_string
|
||||
# ``languages`` is a tuple on the VO; fixtures encode it as a YAML list.
|
||||
# Compare list-to-list so the equality is unambiguous.
|
||||
result["languages"] = list(result.get("languages", ()))
|
||||
|
||||
for field, expected in fixture.expected_parsed.items():
|
||||
assert field in result, (
|
||||
|
||||
Reference in New Issue
Block a user