chore: sprint cleanup — language unification, parser unification, fossils removal
Several weeks of work accumulated without being committed. Grouped here for clarity; see CHANGELOG.md [Unreleased] for the user-facing summary. Highlights ---------- P1 #2 — ISO 639-2/B canonical migration - New Language VO + LanguageRegistry (alfred/domain/shared/knowledge/). - iso_languages.yaml as single source of truth for language codes. - SubtitleKnowledgeBase now delegates lookup to LanguageRegistry; subtitles.yaml only declares subtitle-specific tokens (vostfr, vf, vff, …). - SubtitlePreferences default → ["fre", "eng"]; subtitle filenames written as {iso639_2b}.srt (legacy fr.srt still read via alias). - Scanner: dropped _LANG_KEYWORDS / _SDH_TOKENS / _FORCED_TOKENS / SUBTITLE_EXTENSIONS hardcoded dicts. - Fixed: 'hi' token no longer marks SDH (conflicted with Hindi alias). - Added settings.min_movie_size_bytes (was a module constant). P1 #3 — Release parser unification + data-driven tokenizer - parse_release() is now the single source of truth for release-name parsing. - alfred/knowledge/release/separators.yaml declares the token separators used by the tokenizer (., space, [, ], (, ), _). New conventions can be added without code changes. - Tokenizer now splits on any configured separator instead of name.split('.'). Releases like 'The Father (2020) [1080p] [WEBRip] [5.1] [YTS.MX]' parse via the direct path without sanitization fallback. - Site-tag extraction always runs first; well-formedness only rejects truly forbidden chars. - _parse_season_episode() extended with NxNN / NxNNxNN alt forms. - Removed dead helpers: _sanitize, _normalize. Domain cleanup - Deleted fossil services with zero production callers: alfred/domain/movies/services.py alfred/domain/tv_shows/services.py alfred/domain/subtitles/services.py (replaced by subtitles/services/ package) alfred/domain/subtitles/repositories.py - Split monolithic subtitle services into a package (identifier, matcher, placer, pattern_detector, utils) + dedicated knowledge/ package. - MediaInfo split into dedicated package (alfred/domain/shared/media/: audio, video, subtitle, info, matching). Persistence cleanup - Removed dead JSON repositories (movie/subtitle/tvshow_repository.py). Tests - Major expansion of the test suite organized to mirror the source tree. - Removed obsolete *_edge_cases test files superseded by structured tests. - Suite: 990 passed, 8 skipped. Misc - .gitignore: exclude env_backup/ and *.bak. - Adjustments across agent/llm, app.py, application/filesystem, and infrastructure/filesystem to align with the new domain layout.
This commit is contained in:
@@ -0,0 +1,283 @@
|
||||
"""Tests for ``alfred.domain.release`` — release-name parser.
|
||||
|
||||
Covers the public surface used by the resolver / move pipeline:
|
||||
|
||||
- ``parse_release`` — well-formed scene names (TV episodes, season packs,
|
||||
movies), site-tagged names, malformed names recovered via sanitization,
|
||||
and irrecoverable names that fall back to ``media_type="unknown"``.
|
||||
- ``ParsedRelease`` — derived properties (``is_season_pack``,
|
||||
``show_folder_name``, ``season_folder_name``, ``episode_filename``,
|
||||
``movie_folder_name``, ``movie_filename``) including the Windows-forbidden
|
||||
character sanitizer and the episode-stripping helper for season folders.
|
||||
|
||||
These tests exercise the parser end-to-end through real YAML knowledge
|
||||
files; no monkeypatching of the knowledge layer is performed.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
|
||||
from alfred.domain.release.services import parse_release
|
||||
from alfred.domain.release.value_objects import ParsedRelease
|
||||
|
||||
|
||||
class TestParseTVEpisode:
|
||||
"""Single-episode TV releases."""
|
||||
|
||||
def test_basic_tv_episode(self):
|
||||
r = parse_release("Oz.S03E01.1080p.WEBRip.x265-KONTRAST")
|
||||
assert r.title == "Oz"
|
||||
assert r.season == 3
|
||||
assert r.episode == 1
|
||||
assert r.episode_end is None
|
||||
assert r.quality == "1080p"
|
||||
assert r.source == "WEBRip"
|
||||
assert r.codec == "x265"
|
||||
assert r.group == "KONTRAST"
|
||||
assert r.media_type == "tv_show"
|
||||
assert r.parse_path == "direct"
|
||||
assert r.is_season_pack is False
|
||||
|
||||
def test_multi_episode(self):
|
||||
r = parse_release("Archer.S14E09E10.1080p.WEB.x265-GRP")
|
||||
assert r.season == 14
|
||||
assert r.episode == 9
|
||||
assert r.episode_end == 10
|
||||
|
||||
def test_nxnn_alt_form(self):
|
||||
# Alt season/episode form: 1x05 instead of S01E05.
|
||||
r = parse_release("Some.Show.1x05.720p.HDTV.x264-GRP")
|
||||
assert r.season == 1
|
||||
assert r.episode == 5
|
||||
assert r.episode_end is None
|
||||
assert r.media_type == "tv_show"
|
||||
|
||||
def test_nxnnxnn_multi_episode_alt_form(self):
|
||||
r = parse_release("Some.Show.2x07x08.1080p.WEB.x265-GRP")
|
||||
assert r.season == 2
|
||||
assert r.episode == 7
|
||||
assert r.episode_end == 8
|
||||
|
||||
def test_season_pack(self):
|
||||
r = parse_release("Oz.S03.1080p.WEBRip.x265-KONTRAST")
|
||||
assert r.season == 3
|
||||
assert r.episode is None
|
||||
assert r.is_season_pack is True
|
||||
assert r.media_type == "tv_show"
|
||||
|
||||
|
||||
class TestParseMovie:
|
||||
"""Movie releases."""
|
||||
|
||||
def test_basic_movie(self):
|
||||
r = parse_release("Inception.2010.1080p.BluRay.x264-GROUP")
|
||||
assert r.title == "Inception"
|
||||
assert r.year == 2010
|
||||
assert r.season is None
|
||||
assert r.episode is None
|
||||
assert r.quality == "1080p"
|
||||
assert r.source == "BluRay"
|
||||
assert r.codec == "x264"
|
||||
assert r.group == "GROUP"
|
||||
assert r.media_type == "movie"
|
||||
|
||||
def test_movie_multi_word_title(self):
|
||||
r = parse_release("The.Dark.Knight.2008.2160p.UHD.BluRay.x265-TERMINAL")
|
||||
assert r.title == "The.Dark.Knight"
|
||||
assert r.year == 2008
|
||||
assert r.quality == "2160p"
|
||||
|
||||
def test_movie_without_year_still_movie_if_tech_present(self):
|
||||
r = parse_release("UntitledFilm.1080p.WEBRip.x264-GRP")
|
||||
# No season, no year, but tech markers → still movie
|
||||
assert r.media_type == "movie"
|
||||
assert r.year is None
|
||||
|
||||
|
||||
class TestParseEdgeCases:
|
||||
"""Site tags, malformed names, and unknown media types."""
|
||||
|
||||
def test_site_tag_prefix_stripped(self):
|
||||
r = parse_release("[ OxTorrent.vc ] The.Title.S01E01.1080p.WEB.x265-GRP")
|
||||
assert r.site_tag == "OxTorrent.vc"
|
||||
assert r.parse_path == "sanitized"
|
||||
assert r.season == 1
|
||||
assert r.episode == 1
|
||||
|
||||
def test_site_tag_suffix_stripped(self):
|
||||
r = parse_release("The.Title.S01E01.1080p.WEB.x265-NTb[TGx]")
|
||||
assert r.site_tag == "TGx"
|
||||
# Suffix-tagged names are well-formed (only [] in tag → after strip clean)
|
||||
assert r.season == 1
|
||||
|
||||
def test_irrecoverably_malformed(self):
|
||||
# @ is a forbidden char and not stripped by _sanitize → stays malformed
|
||||
r = parse_release("foo@bar@baz")
|
||||
assert r.media_type == "unknown"
|
||||
assert r.parse_path == "ai"
|
||||
assert r.group == "UNKNOWN"
|
||||
|
||||
def test_empty_unknown_when_no_evidence(self):
|
||||
r = parse_release("Some.Random.Title")
|
||||
# No season, no year, no tech markers → unknown
|
||||
assert r.media_type == "unknown"
|
||||
|
||||
def test_missing_group_defaults_to_unknown(self):
|
||||
r = parse_release("Movie.2020.1080p.WEBRip.x265")
|
||||
# No "-GROUP" suffix → group = "UNKNOWN"
|
||||
assert r.group == "UNKNOWN"
|
||||
|
||||
def test_yts_bracket_release(self):
|
||||
# YTS-style: spaces, parens for year, multiple bracketed tech tokens.
|
||||
# The tokenizer must handle ' ', '(', ')', '[', ']' transparently.
|
||||
r = parse_release("The Father (2020) [1080p] [WEBRip] [5.1] [YTS.MX]")
|
||||
assert r.title == "The.Father"
|
||||
assert r.year == 2020
|
||||
assert r.quality == "1080p"
|
||||
assert r.source == "WEBRip"
|
||||
assert r.audio_channels == "5.1"
|
||||
assert r.media_type == "movie"
|
||||
|
||||
def test_human_friendly_spaces(self):
|
||||
# Spaces as separators (no brackets).
|
||||
r = parse_release("Inception 2010 1080p BluRay x264-GROUP")
|
||||
assert r.title == "Inception"
|
||||
assert r.year == 2010
|
||||
assert r.quality == "1080p"
|
||||
assert r.codec == "x264"
|
||||
assert r.group == "GROUP"
|
||||
assert r.media_type == "movie"
|
||||
|
||||
def test_underscore_separators(self):
|
||||
# Old usenet style: underscores between tokens.
|
||||
r = parse_release("Some_Show_S01E01_1080p_WEB_x265-GRP")
|
||||
assert r.season == 1
|
||||
assert r.episode == 1
|
||||
assert r.quality == "1080p"
|
||||
assert r.group == "GRP"
|
||||
|
||||
|
||||
class TestParseAudioVideoEdition:
|
||||
"""Audio, video metadata, edition extraction."""
|
||||
|
||||
def test_audio_codec_and_channels(self):
|
||||
r = parse_release("Movie.2020.1080p.BluRay.DTS.5.1.x264-GRP")
|
||||
assert r.audio_channels == "5.1"
|
||||
|
||||
def test_language_token(self):
|
||||
r = parse_release("Movie.2020.MULTI.1080p.WEBRip.x265-GRP")
|
||||
assert "MULTI" in r.languages
|
||||
|
||||
def test_edition_token(self):
|
||||
r = parse_release("Movie.2020.UNRATED.1080p.BluRay.x264-GRP")
|
||||
assert r.edition == "UNRATED"
|
||||
|
||||
|
||||
class TestParsedReleaseFolderNames:
|
||||
"""Helpers that build filesystem-safe folder/filenames."""
|
||||
|
||||
def _parsed_tv(self) -> ParsedRelease:
|
||||
return parse_release("Oz.S03E01.1080p.WEBRip.x265-KONTRAST")
|
||||
|
||||
def _parsed_movie(self) -> ParsedRelease:
|
||||
return parse_release("Inception.2010.1080p.BluRay.x264-GROUP")
|
||||
|
||||
def test_show_folder_name(self):
|
||||
r = self._parsed_tv()
|
||||
assert r.show_folder_name("Oz", 1997) == "Oz.1997.1080p.WEBRip.x265-KONTRAST"
|
||||
|
||||
def test_show_folder_name_strips_windows_chars(self):
|
||||
r = self._parsed_tv()
|
||||
# Colons and question marks are Windows-forbidden — must be stripped.
|
||||
result = r.show_folder_name("Oz: The Series?", 1997)
|
||||
assert ":" not in result
|
||||
assert "?" not in result
|
||||
|
||||
def test_season_folder_name_strips_episode(self):
|
||||
r = self._parsed_tv()
|
||||
# Episode token Exx is stripped, Sxx stays
|
||||
result = r.season_folder_name()
|
||||
assert "S03" in result
|
||||
assert "E01" not in result
|
||||
|
||||
def test_season_folder_name_multi_episode(self):
|
||||
r = parse_release("Archer.S14E09E10E11.1080p.WEB.x265-GRP")
|
||||
result = r.season_folder_name()
|
||||
assert "S14" in result
|
||||
assert "E09" not in result
|
||||
assert "E10" not in result
|
||||
assert "E11" not in result
|
||||
|
||||
def test_episode_filename_with_title(self):
|
||||
r = self._parsed_tv()
|
||||
fname = r.episode_filename("The Routine", "mkv")
|
||||
assert fname.endswith(".mkv")
|
||||
assert "S03E01" in fname
|
||||
assert "The.Routine" in fname
|
||||
assert "KONTRAST" in fname
|
||||
|
||||
def test_episode_filename_without_title(self):
|
||||
r = self._parsed_tv()
|
||||
fname = r.episode_filename(None, "mkv")
|
||||
assert fname.endswith(".mkv")
|
||||
assert "S03E01" in fname
|
||||
|
||||
def test_episode_filename_strips_ext_dot(self):
|
||||
r = self._parsed_tv()
|
||||
# Whether the caller passes "mkv" or ".mkv", we get a single dot.
|
||||
a = r.episode_filename(None, "mkv")
|
||||
b = r.episode_filename(None, ".mkv")
|
||||
assert a == b
|
||||
assert "..mkv" not in a
|
||||
|
||||
def test_movie_folder_name(self):
|
||||
r = self._parsed_movie()
|
||||
assert (
|
||||
r.movie_folder_name("Inception", 2010)
|
||||
== "Inception.2010.1080p.BluRay.x264-GROUP"
|
||||
)
|
||||
|
||||
def test_movie_filename(self):
|
||||
r = self._parsed_movie()
|
||||
assert (
|
||||
r.movie_filename("Inception", 2010, "mkv")
|
||||
== "Inception.2010.1080p.BluRay.x264-GROUP.mkv"
|
||||
)
|
||||
|
||||
|
||||
class TestParsedReleaseInvariants:
|
||||
"""Structural invariants of ParsedRelease."""
|
||||
|
||||
def test_raw_is_preserved(self):
|
||||
raw = "Oz.S03E01.1080p.WEBRip.x265-KONTRAST"
|
||||
r = parse_release(raw)
|
||||
assert r.raw == raw
|
||||
|
||||
def test_languages_defaults_to_empty_list_not_none(self):
|
||||
r = parse_release("Movie.2020.1080p.BluRay.x264-GRP")
|
||||
# __post_init__ ensures languages is a list, never None
|
||||
assert r.languages == []
|
||||
|
||||
def test_tech_string_joined(self):
|
||||
r = parse_release("Movie.2020.1080p.BluRay.x264-GRP")
|
||||
assert r.tech_string == "1080p.BluRay.x264"
|
||||
|
||||
def test_tech_string_partial(self):
|
||||
# Codec-only release (no quality/source): tech_string == codec
|
||||
r = parse_release("Show.S01E01.x265-GRP")
|
||||
assert r.tech_string == "x265"
|
||||
assert r.codec == "x265"
|
||||
assert r.quality is None
|
||||
assert r.source is None
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"name,expected_type",
|
||||
[
|
||||
("Show.S01E01.1080p.WEB.x265-GRP", "tv_show"),
|
||||
("Movie.2020.1080p.BluRay.x264-GRP", "movie"),
|
||||
("Random.Title.With.Nothing", "unknown"),
|
||||
],
|
||||
)
|
||||
def test_media_type_inference(self, name, expected_type):
|
||||
assert parse_release(name).media_type == expected_type
|
||||
Reference in New Issue
Block a user