Files
alfred/tests/domain/test_release_parser.py
T
francwa 1723b9fa53 feat: release parser, media type detection, ffprobe integration
Replace the old domain/media release parser with a full rewrite under
domain/release/:
- ParsedRelease with media_type ("movie" | "tv_show" | "tv_complete" |
  "documentary" | "concert" | "other" | "unknown"), site_tag, parse_path,
  languages, audio_codec, audio_channels, bit_depth, hdr_format, edition
- Well-formedness check + sanitize pipeline (_is_well_formed, _sanitize,
  _strip_site_tag) before token-level parsing
- Multi-token sequence matching for audio (DTS-HD.MA, TrueHD.Atmos…),
  HDR (DV.HDR10…) and editions (DIRECTORS.CUT…)
- Knowledge YAML: file_extensions, release_format, languages, audio,
  video, editions, sites/c411

New infrastructure:
- ffprobe.py — single-pass probe returning MediaInfo (video, audio
  tracks, subtitle tracks)
- find_video.py — locate first video file in a release folder

New application helpers:
- detect_media_type — filesystem-based type refinement
- enrich_from_probe — fill missing ParsedRelease fields from MediaInfo

New agent tools:
- analyze_release — parse + detect type + ffprobe in one call
- probe_media — standalone ffprobe for a specific file

New domain value object:
- MediaInfo + AudioTrack + SubtitleTrack (domain/shared/media_info.py)

Testing CLIs:
- recognize_folders_in_downloads.py — full pipeline with colored output
- probe_video.py — display MediaInfo for a video file
2026-05-12 16:14:20 +02:00

462 lines
18 KiB
Python

"""
Tests for alfred.domain.release.release_parser
Real-data cases sourced from /mnt/testipool/downloads/.
Covers: parsing, normalisation, naming methods, edge cases.
"""
import pytest
from alfred.domain.release import ParsedRelease, parse_release
from alfred.domain.release.services import _normalise
from alfred.domain.release.value_objects import _sanitise_for_fs, _strip_episode_from_normalised
# ---------------------------------------------------------------------------
# _normalise
# ---------------------------------------------------------------------------
class TestNormalise:
def test_dots_unchanged(self):
assert _normalise("Oz.S01.1080p.WEBRip.x265-KONTRAST") == "Oz.S01.1080p.WEBRip.x265-KONTRAST"
def test_spaces_become_dots(self):
assert _normalise("Oz S01 1080p WEBRip x265-KONTRAST") == "Oz.S01.1080p.WEBRip.x265-KONTRAST"
def test_double_dots_collapsed(self):
assert _normalise("Oz..S01..1080p") == "Oz.S01.1080p"
def test_leading_trailing_dots_stripped(self):
assert _normalise(".Oz.S01.") == "Oz.S01"
def test_mixed_spaces_and_dots(self):
# "Archer 2009 S14E09E10E11 Into the Cold 1080p HULU WEB-DL DDP5 1 H 264-NTb"
result = _normalise("Archer 2009 S14E09E10E11 Into the Cold 1080p HULU WEB-DL DDP5 1 H 264-NTb")
assert " " not in result
assert ".." not in result
# ---------------------------------------------------------------------------
# _sanitise_for_fs
# ---------------------------------------------------------------------------
class TestSanitiseForFs:
def test_clean_string_unchanged(self):
assert _sanitise_for_fs("Oz.S01.1080p-KONTRAST") == "Oz.S01.1080p-KONTRAST"
def test_removes_question_mark(self):
assert _sanitise_for_fs("What's Up?") == "What's Up"
def test_removes_colon(self):
assert _sanitise_for_fs("He Said: She Said") == "He Said She Said"
def test_removes_all_forbidden(self):
assert _sanitise_for_fs('a?b:c*d"e<f>g|h\\i') == "abcdefghi"
def test_apostrophe_kept(self):
# apostrophe is not in the forbidden set
assert _sanitise_for_fs("What's Up") == "What's Up"
def test_ellipsis_kept(self):
assert _sanitise_for_fs("What If...") == "What If..."
# ---------------------------------------------------------------------------
# _strip_episode_from_normalised
# ---------------------------------------------------------------------------
class TestStripEpisode:
def test_strips_single_episode(self):
assert _strip_episode_from_normalised("Oz.S01E01.1080p.WEBRip.x265-KONTRAST") \
== "Oz.S01.1080p.WEBRip.x265-KONTRAST"
def test_strips_multi_episode(self):
assert _strip_episode_from_normalised("Archer.S14E09E10E11.1080p.HULU.WEB-DL-NTb") \
== "Archer.S14.1080p.HULU.WEB-DL-NTb"
def test_season_pack_unchanged(self):
assert _strip_episode_from_normalised("Oz.S01.1080p.WEBRip.x265-KONTRAST") \
== "Oz.S01.1080p.WEBRip.x265-KONTRAST"
def test_case_insensitive(self):
assert _strip_episode_from_normalised("oz.s01e01.1080p-KONTRAST") \
== "oz.s01.1080p-KONTRAST"
# ---------------------------------------------------------------------------
# parse_release — Season packs (dots)
# ---------------------------------------------------------------------------
class TestSeasonPackDots:
"""Real cases: Oz.S01-S06 KONTRAST, Archer S03 EDGE2020, etc."""
def test_oz_s01_kontrast(self):
p = parse_release("Oz.S01.1080p.WEBRip.x265-KONTRAST")
assert p.title == "Oz"
assert p.season == 1
assert p.episode is None
assert p.quality == "1080p"
assert p.source == "WEBRip"
assert p.codec == "x265"
assert p.group == "KONTRAST"
assert p.is_season_pack
assert not p.is_movie
def test_fallout_s02_kontrast(self):
p = parse_release("Fallout.2024.S02.1080p.WEBRip.x265-KONTRAST")
assert p.title == "Fallout"
assert p.year == 2024
assert p.season == 2
assert p.episode is None
assert p.group == "KONTRAST"
def test_archer_s03_edge2020(self):
p = parse_release("Archer.2009.S03.1080p.BluRay.DDP.5.1.x265-EDGE2020")
assert p.title == "Archer"
assert p.year == 2009
assert p.season == 3
assert p.quality == "1080p"
assert p.source == "BluRay"
assert p.codec == "x265"
assert p.group == "EDGE2020"
def test_fargo_s05_hulu_webdl(self):
p = parse_release("Fargo.S05.1080p.HULU.WEB-DL.x265.10bit-Protozoan")
assert p.title == "Fargo"
assert p.season == 5
assert p.quality == "1080p"
assert p.group == "Protozoan"
def test_xfiles_s01_bluray_rarbg(self):
p = parse_release("The.X-Files.S01.1080p.BluRay.x265-RARBG")
assert p.title == "The.X-Files"
assert p.season == 1
assert p.source == "BluRay"
assert p.group == "RARBG"
def test_gilmore_girls_s01_s07_repack(self):
p = parse_release("Gilmore.Girls.Complete.S01-S07.REPACK.1080p.WEB-DL.x265.10bit.HEVC-MONOLITH")
# Season range — we parse the first season number found
assert p.season == 1
assert p.group == "MONOLITH"
def test_plot_against_america_4k(self):
p = parse_release("The.Plot.Against.America.S01.2160p.MAX.WEB-DL.x265.10bit.HDR.DDP5.1.x265-SH3LBY")
assert p.title == "The.Plot.Against.America"
assert p.season == 1
assert p.quality == "2160p"
assert p.group == "SH3LBY"
def test_foundation_with_year_in_title(self):
p = parse_release("Foundation.2021.S01.1080p.WEBRip.x265-RARBG")
assert p.title == "Foundation"
assert p.year == 2021
assert p.season == 1
assert p.group == "RARBG"
def test_gen_v_s02(self):
p = parse_release("Gen.V.S02.1080p.WEBRip.x265-KONTRAST")
assert p.title == "Gen.V"
assert p.season == 2
assert p.group == "KONTRAST"
# ---------------------------------------------------------------------------
# parse_release — Single episodes (dots)
# ---------------------------------------------------------------------------
class TestSingleEpisodeDots:
"""Real cases: Fallout S02Exx ELiTE, Mare of Easttown PSA, etc."""
def test_fallout_s02e01_elite(self):
p = parse_release("Fallout.2024.S02E01.1080p.x265-ELiTE")
assert p.title == "Fallout"
assert p.year == 2024
assert p.season == 2
assert p.episode == 1
assert p.episode_end is None
assert p.group == "ELiTE"
assert not p.is_season_pack
def test_mare_of_easttown_with_episode_title_in_filename(self):
# Episode filenames often embed the title — we parse the release folder name
p = parse_release("Mare.of.Easttown.S01.1080p.10bit.WEBRip.6CH.x265.HEVC-PSA")
assert p.title == "Mare.of.Easttown"
assert p.season == 1
assert p.group == "PSA"
def test_it_welcome_to_derry_s01e01(self):
p = parse_release("IT.Welcome.to.Derry.S01E01.1080p.x265-ELiTE")
assert p.title == "IT.Welcome.to.Derry"
assert p.season == 1
assert p.episode == 1
assert p.group == "ELiTE"
def test_landman_s02e01(self):
p = parse_release("Landman.S02E01.1080p.x265-ELiTE")
assert p.title == "Landman"
assert p.season == 2
assert p.episode == 1
def test_prodiges_episode_with_number_in_title(self):
# "Prodiges.S12E01.1ere.demi-finale..." — accented chars in episode title
p = parse_release("Prodiges.S12E01.1080p.WEB.H264-THESYNDiCATE")
assert p.title == "Prodiges"
assert p.season == 12
assert p.episode == 1
assert p.group == "THESYNDiCATE"
# ---------------------------------------------------------------------------
# parse_release — Multi-episode
# ---------------------------------------------------------------------------
class TestMultiEpisode:
def test_archer_triple_episode(self):
# "Archer 2009 S14E09E10E11 Into the Cold 1080p HULU WEB-DL DDP5 1 H 264-NTb"
p = parse_release("Archer.2009.S14E09E10E11.Into.the.Cold.1080p.HULU.WEB-DL.DDP5.1.H.264-NTb")
assert p.season == 14
assert p.episode == 9
assert p.episode_end == 10 # only first E-pair captured by regex group 2+3
# ---------------------------------------------------------------------------
# parse_release — Movies
# ---------------------------------------------------------------------------
class TestMovies:
def test_another_round_yts(self):
# "Another Round (2020) [1080p] [BluRay] [YTS.MX]" → normalised
p = parse_release("Another.Round.2020.1080p.BluRay.x264-YTS")
assert p.is_movie
assert p.title == "Another.Round"
assert p.year == 2020
assert p.quality == "1080p"
assert p.source == "BluRay"
assert p.group == "YTS"
def test_godzilla_minus_one(self):
p = parse_release("Godzilla.Minus.One.2023.1080p.BluRay.x265.10bit.AAC5.1-YTS")
assert p.title == "Godzilla.Minus.One"
assert p.year == 2023
assert p.is_movie
assert p.group == "YTS"
def test_deadwood_movie_2019(self):
p = parse_release("Deadwood.The.Movie.2019.1080p.BluRay.x265-RARBG")
assert p.year == 2019
assert p.is_movie
assert p.group == "RARBG"
def test_revolver_2005_bluray(self):
p = parse_release("Revolver.2005.1080p.BluRay.x265-RARBG")
assert p.title == "Revolver"
assert p.year == 2005
assert p.is_movie
def test_the_xfiles_movie_1998(self):
p = parse_release("The.X.Files.1998.1080p.BluRay.x265-RARBG")
assert p.year == 1998
assert p.is_movie
assert p.group == "RARBG"
def test_movie_no_group(self):
p = parse_release("Jurassic.Park.1993.1080p.BluRay.x265")
assert p.is_movie
assert p.year == 1993
assert p.group == "UNKNOWN"
def test_multi_language_movie(self):
p = parse_release("Jumanji.1995.MULTi.1080p.DSNP.WEB.H265-THESYNDiCATE")
assert p.year == 1995
assert p.group == "THESYNDiCATE"
# ---------------------------------------------------------------------------
# parse_release — Space-separated (no dots)
# ---------------------------------------------------------------------------
class TestSpaceSeparated:
def test_oz_spaces(self):
p = parse_release("Oz S01 1080p WEBRip x265-KONTRAST")
assert p.title == "Oz"
assert p.season == 1
assert p.quality == "1080p"
assert p.group == "KONTRAST"
def test_archer_spaces(self):
p = parse_release("Archer 2009 S14E09E10E11 Into the Cold 1080p HULU WEB-DL DDP5 1 H 264-NTb")
assert p.season == 14
assert p.episode == 9
assert p.group == "NTb"
# ---------------------------------------------------------------------------
# parse_release — tech_string
# ---------------------------------------------------------------------------
class TestTechString:
def test_full_tech(self):
p = parse_release("Oz.S01.1080p.WEBRip.x265-KONTRAST")
assert p.tech_string == "1080p.WEBRip.x265"
def test_tech_string_used_in_folder_name(self):
p = parse_release("Oz.S01.1080p.WEBRip.x265-KONTRAST")
folder = p.show_folder_name("Oz", 1997)
assert "1080p.WEBRip.x265" in folder
def test_no_tech_fallback(self):
p = parse_release("SomeShow.S01")
# tech_string is empty, show_folder_name uses "Unknown"
folder = p.show_folder_name("SomeShow", 2020)
assert "Unknown" in folder
def test_4k_hdr(self):
p = parse_release("The.Plot.Against.America.S01.2160p.MAX.WEB-DL.x265.10bit.HDR.DDP5.1-SH3LBY")
assert p.quality == "2160p"
# ---------------------------------------------------------------------------
# ParsedRelease — naming methods
# ---------------------------------------------------------------------------
class TestNamingMethods:
def test_show_folder_name(self):
p = parse_release("Oz.S01.1080p.WEBRip.x265-KONTRAST")
assert p.show_folder_name("Oz", 1997) == "Oz.1997.1080p.WEBRip.x265-KONTRAST"
def test_show_folder_name_sanitises_title(self):
p = parse_release("Oz.S01.1080p.WEBRip.x265-KONTRAST")
# Colon in TMDB title should be stripped, spaces become dots
folder = p.show_folder_name("Star Wars: Andor", 2022)
assert ":" not in folder
assert "Star.Wars.Andor" in folder
def test_season_folder_name_from_season_pack(self):
p = parse_release("Oz.S01.1080p.WEBRip.x265-KONTRAST")
assert p.season_folder_name() == "Oz.S01.1080p.WEBRip.x265-KONTRAST"
def test_season_folder_name_strips_episode(self):
p = parse_release("Fallout.2024.S02E01.1080p.x265-ELiTE")
assert p.season_folder_name() == "Fallout.2024.S02.1080p.x265-ELiTE"
def test_episode_filename_with_title(self):
p = parse_release("Oz.S01.1080p.WEBRip.x265-KONTRAST")
fname = p.episode_filename("The Routine", ".mkv")
assert fname == "Oz.S01.The.Routine.1080p.WEBRip.x265-KONTRAST.mkv"
def test_episode_filename_with_episode_number(self):
p = parse_release("Fallout.2024.S02E01.1080p.x265-ELiTE")
fname = p.episode_filename("The Beginning", ".mkv")
assert fname == "Fallout.S02E01.The.Beginning.1080p.x265-ELiTE.mkv"
def test_episode_filename_without_episode_title(self):
p = parse_release("Oz.S01E01.1080p.WEBRip.x265-KONTRAST")
fname = p.episode_filename(None, ".mp4")
assert fname == "Oz.S01E01.1080p.WEBRip.x265-KONTRAST.mp4"
def test_episode_filename_sanitises_episode_title(self):
p = parse_release("Oz.S01E01.1080p.WEBRip.x265-KONTRAST")
fname = p.episode_filename("What's Up?", ".mkv")
assert "?" not in fname
assert "What's.Up" in fname
def test_episode_filename_strips_leading_dot_from_ext(self):
p = parse_release("Oz.S01E01.1080p.WEBRip.x265-KONTRAST")
fname_with = p.episode_filename(None, ".mkv")
fname_without = p.episode_filename(None, "mkv")
assert fname_with == fname_without
def test_movie_folder_name(self):
p = parse_release("Another.Round.2020.1080p.BluRay.x264-YTS")
assert p.movie_folder_name("Another Round", 2020) == "Another.Round.2020.1080p.BluRay.x264-YTS"
def test_movie_filename(self):
p = parse_release("Another.Round.2020.1080p.BluRay.x264-YTS")
fname = p.movie_filename("Another Round", 2020, ".mp4")
assert fname == "Another.Round.2020.1080p.BluRay.x264-YTS.mp4"
def test_movie_folder_same_as_show_folder(self):
p = parse_release("Revolver.2005.1080p.BluRay.x265-RARBG")
assert p.movie_folder_name("Revolver", 2005) == p.show_folder_name("Revolver", 2005)
# ---------------------------------------------------------------------------
# ParsedRelease — is_movie / is_season_pack
# ---------------------------------------------------------------------------
class TestMediaTypeFlags:
def test_season_pack_is_not_movie(self):
p = parse_release("Oz.S01.1080p.WEBRip.x265-KONTRAST")
assert not p.is_movie
assert p.is_season_pack
def test_single_episode_is_not_season_pack(self):
p = parse_release("Oz.S01E01.1080p.WEBRip.x265-KONTRAST")
assert not p.is_movie
assert not p.is_season_pack
def test_movie_is_not_season_pack(self):
p = parse_release("Revolver.2005.1080p.BluRay.x265-RARBG")
assert p.is_movie
assert not p.is_season_pack
def test_no_season_no_year_treated_as_movie(self):
# No S/E marker → is_movie = True
p = parse_release("SomeContent.1080p.WEBRip.x265-GROUP")
assert p.is_movie
# ---------------------------------------------------------------------------
# Tricky real-world releases
# ---------------------------------------------------------------------------
class TestRealWorldEdgeCases:
def test_angel_integrale_multi(self):
# "Angel.1999.INTEGRALE.MULTI.1080p.WEBRip.10bits.x265.DD-Jarod"
p = parse_release("Angel.1999.INTEGRALE.MULTI.1080p.WEBRip.10bits.x265.DD-Jarod")
assert p.year == 1999
assert p.quality == "1080p"
assert p.source == "WEBRip"
def test_group_unknown_when_no_dash(self):
p = parse_release("Oz.S01.1080p.WEBRip.x265")
assert p.group == "UNKNOWN"
def test_normalised_stored_on_parsed(self):
p = parse_release("Oz S01 1080p WEBRip x265-KONTRAST")
assert p.normalised == "Oz.S01.1080p.WEBRip.x265-KONTRAST"
def test_raw_stored_as_is(self):
raw = "Oz S01 1080p WEBRip x265-KONTRAST"
p = parse_release(raw)
assert p.raw == raw
def test_hevc_codec(self):
# "Mare.of.Easttown.S01.1080p.10bit.WEBRip.6CH.x265.HEVC-PSA"
p = parse_release("Mare.of.Easttown.S01.1080p.10bit.WEBRip.6CH.x265.HEVC-PSA")
assert p.codec in ("x265", "HEVC")
assert p.group == "PSA"
def test_xfiles_hyphen_in_title(self):
p = parse_release("The.X-Files.S01.1080p.BluRay.x265-RARBG")
# Title should preserve the hyphen
assert "X-Files" in p.title
def test_foundation_s02_no_year(self):
# Foundation.S02 has no year in release name — year is None
p = parse_release("Foundation.S02.1080p.x265-ELiTE")
assert p.year is None
assert p.season == 2
assert p.group == "ELiTE"
def test_slow_horses_two_groups_same_show(self):
# Same show, different groups across seasons
s01 = parse_release("Slow.Horses.S01.1080p.WEBRip.x265-RARBG")
s04 = parse_release("Slow.Horses.S04.1080p.WEBRip.x265-KONTRAST")
assert s01.title == s04.title == "Slow.Horses"
assert s01.group == "RARBG"
assert s04.group == "KONTRAST"