test(fixtures): seed PATH OF PAIN bucket with 10 worst-case fixtures

10 pathological release names mined from the real downloads folder.
Each fixture locks in the current parse_release output (including
its silent losses and false positives) so future parser improvements
are intentional, not silent drift.

Cases:
- Khruangbin yt-dlp slug (UTF-8 wide pipe '|', YT ID as group)
- Deutschland 83-86-89 franchise box (group=S03 misdetection)
- Chérie Le BéBé (accented chars preserved, VFF language)
- Jimmy Carr 8-word stand-up special title
- [ OxTorrent.vc ] prefix + XviD codec (site_tag prefix)
- Prodiges S12E01 with episode title + air-date silently lost
- The Prodigy: apostrophe + Blu-ray dash + 1080i + multi-word audio
  = full AI-path degeneration (everything UNKNOWN)
- Sleaford Mods yt-dlp slug (YT ID glued to year)
- Super Mario Bros [FR-EN] (bilingual tag mistaken for group)
- Gilmore Girls Complete S01-S07 (the well-behaved exception:
  COMPLETE token correctly drives tv_complete + REPACK + 10bit)

Also adds shitty + path_of_pain to the per-bucket sanity assertion.

Suite: 1020 passed, 8 skipped.
This commit is contained in:
2026-05-18 15:57:56 +02:00
parent c1831e3f46
commit 273510dff8
12 changed files with 377 additions and 0 deletions
+2
View File
@@ -52,3 +52,5 @@ def test_at_least_one_fixture_per_bucket() -> None:
"""Each bucket should hold at least one case once populated."""
buckets = {f.name.split("/")[0] for f in FIXTURES}
assert "easy" in buckets, "EASY bucket must have at least one fixture"
assert "shitty" in buckets, "SHITTY bucket must have at least one fixture"
assert "path_of_pain" in buckets, "PATH_OF_PAIN bucket must have at least one fixture"
@@ -0,0 +1,31 @@
release_name: "Cherie.J.ai.Agrandi.Le.BéBé.1992.VFF.1080p.BluRay.AC3.x264-NOTAG.mkv"
# Accented chars ('BéBé') survive the tokenizer untouched — they're preserved
# in the title literally. VFF (Version Française) recognized as language.
# Trailing '.mkv' is also stripped correctly (group=NOTAG, not NOTAG.mkv).
# Note: this release name *also* exists in an ASCII-stripped variant in the
# SHITTY bucket (cherie_jai_agrandi_apostrophe). Both forms are real downloads.
parsed:
title: "Cherie.J.ai.Agrandi.Le.BéBé"
year: 1992
season: null
episode: null
quality: "1080p"
source: "BluRay"
codec: "x264"
group: "NOTAG"
tech_string: "1080p.BluRay.x264"
media_type: "movie"
languages: ["VFF"]
audio_codec: "AC3"
parse_path: "direct"
is_season_pack: false
tree:
- "Cherie.J.ai.Agrandi.Le.BéBé.1992.VFF.1080p.BluRay.AC3.x264-NOTAG.mkv"
routing:
library:
- "Cherie.J.ai.Agrandi.Le.BéBé.1992.VFF.1080p.BluRay.AC3.x264-NOTAG.mkv"
torrents: []
seed_hardlinks: []
@@ -0,0 +1,55 @@
release_name: "Deutschland 83-86-89 (2015) Season 1-3 S01-S03 (1080p BluRay x265 HEVC 10bit AAC 5.1 German Kappa)"
# Pathological franchise box-set:
# - Title contains year-suffix range "83-86-89" (3 years glued)
# - Season range expressed twice: "Season 1-3" AND "S01-S03"
# - All tech wrapped in parentheses (uncommon)
# - Group name "Kappa" is the *last word inside parens*, not after a dash
# Result: parser captures HEVC + 10bit + AAC + 5.1 + GERMAN but mistakes
# "S03" for the group, and media_type degenerates to movie (no episode).
# This is a 3-season franchise needing manual splitting into:
# Deutschland.83 / Deutschland.86 / Deutschland.89
parsed:
title: "Deutschland.83-86-89"
year: 2015
season: null
episode: null
quality: "1080p"
source: "BluRay"
codec: "HEVC"
group: "S03"
tech_string: "1080p.BluRay.HEVC"
media_type: "movie"
languages: ["GERMAN"]
audio_codec: "AAC"
audio_channels: "5.1"
bit_depth: "10bit"
parse_path: "direct"
is_season_pack: false
tree:
- "Deutschland 83-86-89 (2015) Season 1-3 S01-S03 (1080p BluRay x265 HEVC 10bit AAC 5.1 German Kappa)/"
- "Deutschland 83-86-89 (2015) Season 1-3 S01-S03 (1080p BluRay x265 HEVC 10bit AAC 5.1 German Kappa)/Season 1/"
- "Deutschland 83-86-89 (2015) Season 1-3 S01-S03 (1080p BluRay x265 HEVC 10bit AAC 5.1 German Kappa)/Season 1/Deutschland.83.S01E01.mkv"
- "Deutschland 83-86-89 (2015) Season 1-3 S01-S03 (1080p BluRay x265 HEVC 10bit AAC 5.1 German Kappa)/Season 2/"
- "Deutschland 83-86-89 (2015) Season 1-3 S01-S03 (1080p BluRay x265 HEVC 10bit AAC 5.1 German Kappa)/Season 2/Deutschland.86.S02E01.mkv"
- "Deutschland 83-86-89 (2015) Season 1-3 S01-S03 (1080p BluRay x265 HEVC 10bit AAC 5.1 German Kappa)/Season 3/"
- "Deutschland 83-86-89 (2015) Season 1-3 S01-S03 (1080p BluRay x265 HEVC 10bit AAC 5.1 German Kappa)/Season 3/Deutschland.89.S03E01.mkv"
# Routing will need manual user disambiguation — the 3 seasons go to 3
# different shows in library. Captured here as a "single library bucket"
# placeholder; the organiser will reject this and ask for input.
routing:
library:
- "Deutschland 83-86-89 (2015) Season 1-3 S01-S03 (1080p BluRay x265 HEVC 10bit AAC 5.1 German Kappa)/Season 1/Deutschland.83.S01E01.mkv"
- "Deutschland 83-86-89 (2015) Season 1-3 S01-S03 (1080p BluRay x265 HEVC 10bit AAC 5.1 German Kappa)/Season 2/Deutschland.86.S02E01.mkv"
- "Deutschland 83-86-89 (2015) Season 1-3 S01-S03 (1080p BluRay x265 HEVC 10bit AAC 5.1 German Kappa)/Season 3/Deutschland.89.S03E01.mkv"
torrents:
- "Deutschland 83-86-89 (2015) Season 1-3 S01-S03 (1080p BluRay x265 HEVC 10bit AAC 5.1 German Kappa)/"
seed_hardlinks:
- source: "library/Deutschland 83-86-89 (2015) Season 1-3 S01-S03 (1080p BluRay x265 HEVC 10bit AAC 5.1 German Kappa)/Season 1/Deutschland.83.S01E01.mkv"
target: "torrents/Deutschland 83-86-89 (2015) Season 1-3 S01-S03 (1080p BluRay x265 HEVC 10bit AAC 5.1 German Kappa)/Season 1/Deutschland.83.S01E01.mkv"
- source: "library/Deutschland 83-86-89 (2015) Season 1-3 S01-S03 (1080p BluRay x265 HEVC 10bit AAC 5.1 German Kappa)/Season 2/Deutschland.86.S02E01.mkv"
target: "torrents/Deutschland 83-86-89 (2015) Season 1-3 S01-S03 (1080p BluRay x265 HEVC 10bit AAC 5.1 German Kappa)/Season 2/Deutschland.86.S02E01.mkv"
- source: "library/Deutschland 83-86-89 (2015) Season 1-3 S01-S03 (1080p BluRay x265 HEVC 10bit AAC 5.1 German Kappa)/Season 3/Deutschland.89.S03E01.mkv"
target: "torrents/Deutschland 83-86-89 (2015) Season 1-3 S01-S03 (1080p BluRay x265 HEVC 10bit AAC 5.1 German Kappa)/Season 3/Deutschland.89.S03E01.mkv"
@@ -0,0 +1,43 @@
release_name: "Gilmore.Girls.Complete.S01-S07.REPACK.1080p.WEB-DL.x265.10bit.HEVC-MONOLITH"
# 'Complete' token correctly drives media_type=tv_complete (no INTEGRALE
# needed). 'S01-S07' range is recognized as "complete series" even though
# the range parsing for individual seasons doesn't exist yet — the
# Complete keyword wins here. REPACK + 10bit + HEVC all captured. group
# correctly identified as MONOLITH.
# This one is the GOOD case in path_of_pain — proves the COMPLETE path
# handles range syntax gracefully *when COMPLETE is explicit*.
parsed:
title: "Gilmore.Girls"
year: null
season: null
episode: null
quality: "1080p"
source: "WEB-DL"
codec: "HEVC"
group: "MONOLITH"
tech_string: "1080p.WEB-DL.HEVC"
media_type: "tv_complete"
bit_depth: "10bit"
edition: "COMPLETE"
parse_path: "direct"
is_season_pack: false
tree:
- "Gilmore.Girls.Complete.S01-S07.REPACK.1080p.WEB-DL.x265.10bit.HEVC-MONOLITH/"
- "Gilmore.Girls.Complete.S01-S07.REPACK.1080p.WEB-DL.x265.10bit.HEVC-MONOLITH/Season 01/"
- "Gilmore.Girls.Complete.S01-S07.REPACK.1080p.WEB-DL.x265.10bit.HEVC-MONOLITH/Season 01/Gilmore.Girls.S01E01.1080p.WEB-DL.x265-MONOLITH.mkv"
- "Gilmore.Girls.Complete.S01-S07.REPACK.1080p.WEB-DL.x265.10bit.HEVC-MONOLITH/Season 07/"
- "Gilmore.Girls.Complete.S01-S07.REPACK.1080p.WEB-DL.x265.10bit.HEVC-MONOLITH/Season 07/Gilmore.Girls.S07E22.1080p.WEB-DL.x265-MONOLITH.mkv"
routing:
library:
- "Gilmore.Girls.Complete.S01-S07.REPACK.1080p.WEB-DL.x265.10bit.HEVC-MONOLITH/Season 01/Gilmore.Girls.S01E01.1080p.WEB-DL.x265-MONOLITH.mkv"
- "Gilmore.Girls.Complete.S01-S07.REPACK.1080p.WEB-DL.x265.10bit.HEVC-MONOLITH/Season 07/Gilmore.Girls.S07E22.1080p.WEB-DL.x265-MONOLITH.mkv"
torrents:
- "Gilmore.Girls.Complete.S01-S07.REPACK.1080p.WEB-DL.x265.10bit.HEVC-MONOLITH/"
seed_hardlinks:
- source: "library/Gilmore.Girls.Complete.S01-S07.REPACK.1080p.WEB-DL.x265.10bit.HEVC-MONOLITH/Season 01/Gilmore.Girls.S01E01.1080p.WEB-DL.x265-MONOLITH.mkv"
target: "torrents/Gilmore.Girls.Complete.S01-S07.REPACK.1080p.WEB-DL.x265.10bit.HEVC-MONOLITH/Season 01/Gilmore.Girls.S01E01.1080p.WEB-DL.x265-MONOLITH.mkv"
- source: "library/Gilmore.Girls.Complete.S01-S07.REPACK.1080p.WEB-DL.x265.10bit.HEVC-MONOLITH/Season 07/Gilmore.Girls.S07E22.1080p.WEB-DL.x265-MONOLITH.mkv"
target: "torrents/Gilmore.Girls.Complete.S01-S07.REPACK.1080p.WEB-DL.x265.10bit.HEVC-MONOLITH/Season 07/Gilmore.Girls.S07E22.1080p.WEB-DL.x265-MONOLITH.mkv"
@@ -0,0 +1,32 @@
release_name: "Jimmy.Carr.The.Best.of.Ultimate.Gold.Greatest.Hits.2019.VOSTFR.1080p.WEBRip.x264.AAC.5.1-RARBG.mkv"
# Stand-up comedy special: an 8-word title that LOOKS LIKE a movie + subtitle
# but is actually one show name. Parser correctly treats it as one title.
# media_type=movie is the right call (specials behave like one-shot movies),
# even though the right home is closer to a "specials/" bucket.
# VOSTFR (Version Originale Sous-Titrée en Français) recognized as language.
parsed:
title: "Jimmy.Carr.The.Best.of.Ultimate.Gold.Greatest.Hits"
year: 2019
season: null
episode: null
quality: "1080p"
source: "WEBRip"
codec: "x264"
group: "RARBG"
tech_string: "1080p.WEBRip.x264"
media_type: "movie"
languages: ["VOSTFR"]
audio_codec: "AAC"
audio_channels: "5.1"
parse_path: "direct"
is_season_pack: false
tree:
- "Jimmy.Carr.The.Best.of.Ultimate.Gold.Greatest.Hits.2019.VOSTFR.1080p.WEBRip.x264.AAC.5.1-RARBG.mkv"
routing:
library:
- "Jimmy.Carr.The.Best.of.Ultimate.Gold.Greatest.Hits.2019.VOSTFR.1080p.WEBRip.x264.AAC.5.1-RARBG.mkv"
torrents: []
seed_hardlinks: []
@@ -0,0 +1,32 @@
release_name: "Khruangbin Austin City Limits Music Festival 2024 Full Set [V_-7WWPPeBs].webm"
# yt-dlp slug: UTF-8 wide pipe '' (U+FF5C, not the ASCII '|'), trailing
# YouTube video ID in brackets, .webm extension. Parser extracts the year
# (2024) correctly but mistakes the YouTube ID '7WWPPeBs' for a release
# group, and the wide pipe survives the tokenizer (not a separator).
# This is a concert recording — closer to "live music" than "movie", but
# media_type=movie is the current degenerate best guess.
parsed:
title: "Khruangbin..Austin.City.Limits.Music.Festival"
year: 2024
season: null
episode: null
quality: null
source: null
codec: null
group: "7WWPPeBs"
tech_string: ""
media_type: "movie"
parse_path: "direct"
is_season_pack: false
# Concerts/live downloads are inherently un-classifiable. Will likely live
# in a 'concerts/' library bucket eventually — separate from movies/tv.
tree:
- "Khruangbin Austin City Limits Music Festival 2024 Full Set [V_-7WWPPeBs].webm"
routing:
library:
- "Khruangbin Austin City Limits Music Festival 2024 Full Set [V_-7WWPPeBs].webm"
torrents: []
seed_hardlinks: []
@@ -0,0 +1,40 @@
release_name: "[ OxTorrent.vc ] La.Meilleure.Version.De.Moi-Meme.S01.FRENCH.WEB-DL.XviD-ZT"
# Site-tag PREFIX (rare position — usually trailing). Parser correctly
# identifies '[ OxTorrent.vc ]' as a site_tag and strips it
# (parse_path="sanitized"). XviD codec preserved. Dash *inside* title
# ("Moi-Meme") preserved untouched — good. No quality token because
# XviD releases predate the 1080p convention; tech_string falls back to
# source+codec only.
parsed:
title: "La.Meilleure.Version.De.Moi-Meme"
year: null
season: 1
episode: null
quality: null
source: "WEB-DL"
codec: "XviD"
group: "ZT"
tech_string: "WEB-DL.XviD"
media_type: "tv_show"
site_tag: "OxTorrent.vc"
languages: ["FRENCH"]
parse_path: "sanitized"
is_season_pack: true
tree:
- "[ OxTorrent.vc ] La.Meilleure.Version.De.Moi-Meme.S01.FRENCH.WEB-DL.XviD-ZT/"
- "[ OxTorrent.vc ] La.Meilleure.Version.De.Moi-Meme.S01.FRENCH.WEB-DL.XviD-ZT/La.Meilleure.Version.De.Moi-Meme.S01E01.FRENCH.WEB-DL.XviD-ZT.avi"
- "[ OxTorrent.vc ] La.Meilleure.Version.De.Moi-Meme.S01.FRENCH.WEB-DL.XviD-ZT/La.Meilleure.Version.De.Moi-Meme.S01E02.FRENCH.WEB-DL.XviD-ZT.avi"
routing:
library:
- "[ OxTorrent.vc ] La.Meilleure.Version.De.Moi-Meme.S01.FRENCH.WEB-DL.XviD-ZT/La.Meilleure.Version.De.Moi-Meme.S01E01.FRENCH.WEB-DL.XviD-ZT.avi"
- "[ OxTorrent.vc ] La.Meilleure.Version.De.Moi-Meme.S01.FRENCH.WEB-DL.XviD-ZT/La.Meilleure.Version.De.Moi-Meme.S01E02.FRENCH.WEB-DL.XviD-ZT.avi"
torrents:
- "[ OxTorrent.vc ] La.Meilleure.Version.De.Moi-Meme.S01.FRENCH.WEB-DL.XviD-ZT/"
seed_hardlinks:
- source: "library/[ OxTorrent.vc ] La.Meilleure.Version.De.Moi-Meme.S01.FRENCH.WEB-DL.XviD-ZT/La.Meilleure.Version.De.Moi-Meme.S01E01.FRENCH.WEB-DL.XviD-ZT.avi"
target: "torrents/[ OxTorrent.vc ] La.Meilleure.Version.De.Moi-Meme.S01.FRENCH.WEB-DL.XviD-ZT/La.Meilleure.Version.De.Moi-Meme.S01E01.FRENCH.WEB-DL.XviD-ZT.avi"
- source: "library/[ OxTorrent.vc ] La.Meilleure.Version.De.Moi-Meme.S01.FRENCH.WEB-DL.XviD-ZT/La.Meilleure.Version.De.Moi-Meme.S01E02.FRENCH.WEB-DL.XviD-ZT.avi"
target: "torrents/[ OxTorrent.vc ] La.Meilleure.Version.De.Moi-Meme.S01.FRENCH.WEB-DL.XviD-ZT/La.Meilleure.Version.De.Moi-Meme.S01E02.FRENCH.WEB-DL.XviD-ZT.avi"
@@ -0,0 +1,33 @@
release_name: "Prodiges.S12E01.1ere.demi-finale.2025-12-18.FRENCH.1080p.WEB.H264-THESYNDiCATE.mkv"
# Surprisingly the parser handles this reasonably: title=Prodiges, S12E01,
# FRENCH, group=THESYNDiCATE. The pathological parts are *silently lost*:
# - Episode title ("1ere.demi-finale") evaporates between season-episode
# detection and tech-token detection
# - Air-date ("2025-12-18") is also lost (could disambiguate vs. TMDB)
# Captured here because the future organiser may want both bits of metadata
# (episode title for the file rename, date for TMDB matching).
parsed:
title: "Prodiges"
year: null
season: 12
episode: 1
episode_end: null
quality: "1080p"
source: "WEB"
codec: "H264"
group: "THESYNDiCATE"
tech_string: "1080p.WEB.H264"
media_type: "tv_show"
languages: ["FRENCH"]
parse_path: "direct"
is_season_pack: false
tree:
- "Prodiges.S12E01.1ere.demi-finale.2025-12-18.FRENCH.1080p.WEB.H264-THESYNDiCATE.mkv"
routing:
library:
- "Prodiges.S12E01.1ere.demi-finale.2025-12-18.FRENCH.1080p.WEB.H264-THESYNDiCATE.mkv"
torrents: []
seed_hardlinks: []
@@ -0,0 +1,31 @@
release_name: "SLEAFORD MODS Live Glastonbury June 27th 2015-niNjHn8abyY.mp4"
# yt-dlp filename: triple space between band name and event, no canonical
# tech markers, dashed YouTube video ID glued to the year, .mp4 extension
# preserved in the title. Parser:
# - Squashes the triple space (good)
# - Mistakes 'niNjHn8abyY' (the YT ID) for a release group (false positive)
# - Keeps the .mp4 in the title (no extension stripping in this path)
# - media_type=unknown (no year/season/episode/tech detected)
parsed:
title: "SLEAFORD.MODS.Live.Glastonbury.June.27th.2015-niNjHn8abyY.mp4"
year: null
season: null
episode: null
quality: null
source: null
codec: null
group: "niNjHn8abyY"
tech_string: ""
media_type: "unknown"
parse_path: "direct"
is_season_pack: false
tree:
- "SLEAFORD MODS Live Glastonbury June 27th 2015-niNjHn8abyY.mp4"
routing:
library:
- "SLEAFORD MODS Live Glastonbury June 27th 2015-niNjHn8abyY.mp4"
torrents: []
seed_hardlinks: []
@@ -0,0 +1,32 @@
release_name: "Super Mario Bros. le film [FR-EN] (2023).mkv"
# Hybrid English/French marketing title with:
# - Trailing period after 'Bros' that is part of the title abbreviation
# (not a separator), but tokenizer treats it as one
# - Bilingual tag '[FR-EN]' that should map to MULTI but doesn't
# - Year in parentheses (rare in releases — usually bare)
# - .mkv at the end
# Result: 'EN' lifted as group (false positive from '[FR-EN]'), year correctly
# extracted from parens, 'FR-EN' kept as a non-language token glued to title.
parsed:
title: "Super.Mario.Bros.le.film.FR-EN"
year: 2023
season: null
episode: null
quality: null
source: null
codec: null
group: "EN"
tech_string: ""
media_type: "movie"
parse_path: "direct"
is_season_pack: false
tree:
- "Super Mario Bros. le film [FR-EN] (2023).mkv"
routing:
library:
- "Super Mario Bros. le film [FR-EN] (2023).mkv"
torrents: []
seed_hardlinks: []
@@ -0,0 +1,35 @@
release_name: "The Prodigy World's on Fire 2011 Blu-ray Remux 1080i AVC DTS-HD MA 5.1 - KRaLiMaRKo.mkv"
# Apocalypse case combining every horror:
# - Unescaped apostrophe ("World's") → forces parse_path="ai" fallback
# - Spaces AND dashes used as separators inconsistently
# - "Blu-ray" with a dash (vs. canonical BluRay)
# - "1080i" interlaced flag (not 1080p)
# - "DTS-HD MA 5.1" multi-word audio codec
# - " - GROUP.mkv" trailing format (space-dash-space before group)
# - Trailing .mkv extension survives in title
# Result: total degeneration — UNKNOWN across the board, title=raw input.
# Once the apostrophe + multi-word-audio + 1080i are handled this fixture
# should be revisited. For now: anti-regression of the failure shape.
parsed:
title: "The Prodigy World's on Fire 2011 Blu-ray Remux 1080i AVC DTS-HD MA 5.1 - KRaLiMaRKo.mkv"
year: null
season: null
episode: null
quality: null
source: null
codec: null
group: "UNKNOWN"
tech_string: ""
media_type: "unknown"
parse_path: "ai"
is_season_pack: false
tree:
- "The Prodigy World's on Fire 2011 Blu-ray Remux 1080i AVC DTS-HD MA 5.1 - KRaLiMaRKo.mkv"
routing:
library:
- "The Prodigy World's on Fire 2011 Blu-ray Remux 1080i AVC DTS-HD MA 5.1 - KRaLiMaRKo.mkv"
torrents: []
seed_hardlinks: []