b7979c0f8b
ParsedRelease is now @dataclass(frozen=True). The enrichment passes that used to patch fields in place now produce new instances: - enrich_from_probe(parsed, info, kb) returns a new ParsedRelease via dataclasses.replace (no allocation when no field changed). - inspect_release rebinds 'parsed' after detect_media_type (wrapped in MediaTypeToken — the strict isinstance check now also runs on replace) and after enrich_from_probe. languages becomes a tuple[str, ...] so the VO is properly immutable. Parser pipeline packs languages as a tuple in the assemble dict. Callers updated: inspect_release, testing/recognize_folders_in_downloads.py. Tests updated: 22 enrich_from_probe call sites rebound, language assertions switched to tuple literals, test_release_fixtures normalizes result['languages'] back to list for YAML-fixture comparison. Suite: 1077 passed.
294 lines
10 KiB
Python
294 lines
10 KiB
Python
"""Tests for ``alfred.domain.release`` — release-name parser.
|
|
|
|
Covers the public surface used by the resolver / move pipeline:
|
|
|
|
- ``parse_release`` — well-formed scene names (TV episodes, season packs,
|
|
movies), site-tagged names, malformed names recovered via sanitization,
|
|
and irrecoverable names that fall back to ``media_type="unknown"``.
|
|
- ``ParsedRelease`` — derived properties (``is_season_pack``,
|
|
``show_folder_name``, ``season_folder_name``, ``episode_filename``,
|
|
``movie_folder_name``, ``movie_filename``) including the Windows-forbidden
|
|
character sanitizer and the episode-stripping helper for season folders.
|
|
|
|
These tests exercise the parser end-to-end through real YAML knowledge
|
|
files; no monkeypatching of the knowledge layer is performed.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import pytest
|
|
|
|
from alfred.domain.release.services import parse_release
|
|
from alfred.domain.release.value_objects import ParsedRelease
|
|
from alfred.infrastructure.knowledge.release_kb import YamlReleaseKnowledge
|
|
|
|
_KB = YamlReleaseKnowledge()
|
|
|
|
|
|
def _parse(name: str) -> ParsedRelease:
|
|
parsed, _report = parse_release(name, _KB)
|
|
return parsed
|
|
|
|
|
|
class TestParseTVEpisode:
|
|
"""Single-episode TV releases."""
|
|
|
|
def test_basic_tv_episode(self):
|
|
r = _parse("Oz.S03E01.1080p.WEBRip.x265-KONTRAST")
|
|
assert r.title == "Oz"
|
|
assert r.season == 3
|
|
assert r.episode == 1
|
|
assert r.episode_end is None
|
|
assert r.quality == "1080p"
|
|
assert r.source == "WEBRip"
|
|
assert r.codec == "x265"
|
|
assert r.group == "KONTRAST"
|
|
assert r.media_type == "tv_show"
|
|
assert r.parse_path == "direct"
|
|
assert r.is_season_pack is False
|
|
|
|
def test_multi_episode(self):
|
|
r = _parse("Archer.S14E09E10.1080p.WEB.x265-GRP")
|
|
assert r.season == 14
|
|
assert r.episode == 9
|
|
assert r.episode_end == 10
|
|
|
|
def test_nxnn_alt_form(self):
|
|
# Alt season/episode form: 1x05 instead of S01E05.
|
|
r = _parse("Some.Show.1x05.720p.HDTV.x264-GRP")
|
|
assert r.season == 1
|
|
assert r.episode == 5
|
|
assert r.episode_end is None
|
|
assert r.media_type == "tv_show"
|
|
|
|
def test_nxnnxnn_multi_episode_alt_form(self):
|
|
r = _parse("Some.Show.2x07x08.1080p.WEB.x265-GRP")
|
|
assert r.season == 2
|
|
assert r.episode == 7
|
|
assert r.episode_end == 8
|
|
|
|
def test_season_pack(self):
|
|
r = _parse("Oz.S03.1080p.WEBRip.x265-KONTRAST")
|
|
assert r.season == 3
|
|
assert r.episode is None
|
|
assert r.is_season_pack is True
|
|
assert r.media_type == "tv_show"
|
|
|
|
|
|
class TestParseMovie:
|
|
"""Movie releases."""
|
|
|
|
def test_basic_movie(self):
|
|
r = _parse("Inception.2010.1080p.BluRay.x264-GROUP")
|
|
assert r.title == "Inception"
|
|
assert r.year == 2010
|
|
assert r.season is None
|
|
assert r.episode is None
|
|
assert r.quality == "1080p"
|
|
assert r.source == "BluRay"
|
|
assert r.codec == "x264"
|
|
assert r.group == "GROUP"
|
|
assert r.media_type == "movie"
|
|
|
|
def test_movie_multi_word_title(self):
|
|
r = _parse("The.Dark.Knight.2008.2160p.UHD.BluRay.x265-TERMINAL")
|
|
assert r.title == "The.Dark.Knight"
|
|
assert r.year == 2008
|
|
assert r.quality == "2160p"
|
|
|
|
def test_movie_without_year_still_movie_if_tech_present(self):
|
|
r = _parse("UntitledFilm.1080p.WEBRip.x264-GRP")
|
|
# No season, no year, but tech markers → still movie
|
|
assert r.media_type == "movie"
|
|
assert r.year is None
|
|
|
|
|
|
class TestParseEdgeCases:
|
|
"""Site tags, malformed names, and unknown media types."""
|
|
|
|
def test_site_tag_prefix_stripped(self):
|
|
r = _parse("[ OxTorrent.vc ] The.Title.S01E01.1080p.WEB.x265-GRP")
|
|
assert r.site_tag == "OxTorrent.vc"
|
|
assert r.parse_path == "sanitized"
|
|
assert r.season == 1
|
|
assert r.episode == 1
|
|
|
|
def test_site_tag_suffix_stripped(self):
|
|
r = _parse("The.Title.S01E01.1080p.WEB.x265-NTb[TGx]")
|
|
assert r.site_tag == "TGx"
|
|
# Suffix-tagged names are well-formed (only [] in tag → after strip clean)
|
|
assert r.season == 1
|
|
|
|
def test_irrecoverably_malformed(self):
|
|
# @ is a forbidden char and not stripped by _sanitize → stays malformed
|
|
r = _parse("foo@bar@baz")
|
|
assert r.media_type == "unknown"
|
|
assert r.parse_path == "ai"
|
|
assert r.group == "UNKNOWN"
|
|
|
|
def test_empty_unknown_when_no_evidence(self):
|
|
r = _parse("Some.Random.Title")
|
|
# No season, no year, no tech markers → unknown
|
|
assert r.media_type == "unknown"
|
|
|
|
def test_missing_group_defaults_to_unknown(self):
|
|
r = _parse("Movie.2020.1080p.WEBRip.x265")
|
|
# No "-GROUP" suffix → group = "UNKNOWN"
|
|
assert r.group == "UNKNOWN"
|
|
|
|
def test_yts_bracket_release(self):
|
|
# YTS-style: spaces, parens for year, multiple bracketed tech tokens.
|
|
# The tokenizer must handle ' ', '(', ')', '[', ']' transparently.
|
|
r = _parse("The Father (2020) [1080p] [WEBRip] [5.1] [YTS.MX]")
|
|
assert r.title == "The.Father"
|
|
assert r.year == 2020
|
|
assert r.quality == "1080p"
|
|
assert r.source == "WEBRip"
|
|
assert r.audio_channels == "5.1"
|
|
assert r.media_type == "movie"
|
|
|
|
def test_human_friendly_spaces(self):
|
|
# Spaces as separators (no brackets).
|
|
r = _parse("Inception 2010 1080p BluRay x264-GROUP")
|
|
assert r.title == "Inception"
|
|
assert r.year == 2010
|
|
assert r.quality == "1080p"
|
|
assert r.codec == "x264"
|
|
assert r.group == "GROUP"
|
|
assert r.media_type == "movie"
|
|
|
|
def test_underscore_separators(self):
|
|
# Old usenet style: underscores between tokens.
|
|
r = _parse("Some_Show_S01E01_1080p_WEB_x265-GRP")
|
|
assert r.season == 1
|
|
assert r.episode == 1
|
|
assert r.quality == "1080p"
|
|
assert r.group == "GRP"
|
|
|
|
|
|
class TestParseAudioVideoEdition:
|
|
"""Audio, video metadata, edition extraction."""
|
|
|
|
def test_audio_codec_and_channels(self):
|
|
r = _parse("Movie.2020.1080p.BluRay.DTS.5.1.x264-GRP")
|
|
assert r.audio_channels == "5.1"
|
|
|
|
def test_language_token(self):
|
|
r = _parse("Movie.2020.MULTI.1080p.WEBRip.x265-GRP")
|
|
assert "MULTI" in r.languages
|
|
|
|
def test_edition_token(self):
|
|
r = _parse("Movie.2020.UNRATED.1080p.BluRay.x264-GRP")
|
|
assert r.edition == "UNRATED"
|
|
|
|
|
|
class TestParsedReleaseFolderNames:
|
|
"""Helpers that build filesystem-safe folder/filenames."""
|
|
|
|
def _parsed_tv(self) -> ParsedRelease:
|
|
return _parse("Oz.S03E01.1080p.WEBRip.x265-KONTRAST")
|
|
|
|
def _parsed_movie(self) -> ParsedRelease:
|
|
return _parse("Inception.2010.1080p.BluRay.x264-GROUP")
|
|
|
|
def test_show_folder_name(self):
|
|
r = self._parsed_tv()
|
|
assert r.show_folder_name("Oz", 1997) == "Oz.1997.1080p.WEBRip.x265-KONTRAST"
|
|
|
|
def test_show_folder_name_uses_already_safe_title(self):
|
|
# Option B: callers sanitize at the use-case boundary via
|
|
# kb.sanitize_for_fs(...) before passing the title in.
|
|
r = self._parsed_tv()
|
|
safe = _KB.sanitize_for_fs("Oz: The Series?")
|
|
result = r.show_folder_name(safe, 1997)
|
|
assert ":" not in result
|
|
assert "?" not in result
|
|
|
|
def test_season_folder_name_strips_episode(self):
|
|
r = self._parsed_tv()
|
|
# Episode token Exx is stripped, Sxx stays
|
|
result = r.season_folder_name()
|
|
assert "S03" in result
|
|
assert "E01" not in result
|
|
|
|
def test_season_folder_name_multi_episode(self):
|
|
r = _parse("Archer.S14E09E10E11.1080p.WEB.x265-GRP")
|
|
result = r.season_folder_name()
|
|
assert "S14" in result
|
|
assert "E09" not in result
|
|
assert "E10" not in result
|
|
assert "E11" not in result
|
|
|
|
def test_episode_filename_with_title(self):
|
|
r = self._parsed_tv()
|
|
fname = r.episode_filename("The Routine", "mkv")
|
|
assert fname.endswith(".mkv")
|
|
assert "S03E01" in fname
|
|
assert "The.Routine" in fname
|
|
assert "KONTRAST" in fname
|
|
|
|
def test_episode_filename_without_title(self):
|
|
r = self._parsed_tv()
|
|
fname = r.episode_filename(None, "mkv")
|
|
assert fname.endswith(".mkv")
|
|
assert "S03E01" in fname
|
|
|
|
def test_episode_filename_strips_ext_dot(self):
|
|
r = self._parsed_tv()
|
|
# Whether the caller passes "mkv" or ".mkv", we get a single dot.
|
|
a = r.episode_filename(None, "mkv")
|
|
b = r.episode_filename(None, ".mkv")
|
|
assert a == b
|
|
assert "..mkv" not in a
|
|
|
|
def test_movie_folder_name(self):
|
|
r = self._parsed_movie()
|
|
assert (
|
|
r.movie_folder_name("Inception", 2010)
|
|
== "Inception.2010.1080p.BluRay.x264-GROUP"
|
|
)
|
|
|
|
def test_movie_filename(self):
|
|
r = self._parsed_movie()
|
|
assert (
|
|
r.movie_filename("Inception", 2010, "mkv")
|
|
== "Inception.2010.1080p.BluRay.x264-GROUP.mkv"
|
|
)
|
|
|
|
|
|
class TestParsedReleaseInvariants:
|
|
"""Structural invariants of ParsedRelease."""
|
|
|
|
def test_raw_is_preserved(self):
|
|
raw = "Oz.S03E01.1080p.WEBRip.x265-KONTRAST"
|
|
r = _parse(raw)
|
|
assert r.raw == raw
|
|
|
|
def test_languages_defaults_to_empty_tuple_not_none(self):
|
|
r = _parse("Movie.2020.1080p.BluRay.x264-GRP")
|
|
# ``languages`` defaults to an empty tuple (frozen VO).
|
|
assert r.languages == ()
|
|
|
|
def test_tech_string_joined(self):
|
|
r = _parse("Movie.2020.1080p.BluRay.x264-GRP")
|
|
assert r.tech_string == "1080p.BluRay.x264"
|
|
|
|
def test_tech_string_partial(self):
|
|
# Codec-only release (no quality/source): tech_string == codec
|
|
r = _parse("Show.S01E01.x265-GRP")
|
|
assert r.tech_string == "x265"
|
|
assert r.codec == "x265"
|
|
assert r.quality is None
|
|
assert r.source is None
|
|
|
|
@pytest.mark.parametrize(
|
|
"name,expected_type",
|
|
[
|
|
("Show.S01E01.1080p.WEB.x265-GRP", "tv_show"),
|
|
("Movie.2020.1080p.BluRay.x264-GRP", "movie"),
|
|
("Random.Title.With.Nothing", "unknown"),
|
|
],
|
|
)
|
|
def test_media_type_inference(self, name, expected_type):
|
|
assert _parse(name).media_type == expected_type
|