diff --git a/CHANGELOG.md b/CHANGELOG.md index 90f2316..9d70d2d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -23,6 +23,12 @@ callers). with intermediate values implied. Fixture `shitty/archer_multi_episode/` updated from anti-regression-of-bug to anti-regression-of-fix. +- **Season-range markers (`Sxx-yy`) are now recognized as + `tv_complete`.** `Der.Tatortreiniger.S01-06.GERMAN...` previously + parsed as `media_type=movie` with `S01-06` glued onto the title. + The parser now recognizes the range, sets `season=first`, + `media_type=tv_complete`, and removes the marker from the title. + `is_season_pack` flips to `true`. - **Pure-punctuation TITLE tokens are dropped at assembly.** Releases with surrounding ` - ` separators (`Vinyl - 1x01 - FHD`) previously produced `title="Vinyl.-"`. Such tokens (a stray dash, a wide pipe diff --git a/alfred/domain/release/parser/pipeline.py b/alfred/domain/release/parser/pipeline.py index 2a96edb..25856aa 100644 --- a/alfred/domain/release/parser/pipeline.py +++ b/alfred/domain/release/parser/pipeline.py @@ -91,14 +91,17 @@ def tokenize(name: str, kb: ReleaseKnowledge) -> tuple[list[Token], str | None]: def _parse_season_episode(text: str) -> tuple[int, int | None, int | None] | None: - """Parse a single token as ``SxxExx`` / ``SxxExxExx`` / ``Sxx`` / ``NxNN``. + """Parse a single token as ``SxxExx`` / ``SxxExxExx`` / ``Sxx`` / + ``Sxx-yy`` (season range) / ``NxNN``. Returns ``(season, episode, episode_end)`` or ``None`` if the token - is not a season/episode marker. + is not a season/episode marker. For ``Sxx-yy``, returns the first + season with no episode info — the caller is expected to detect the + range form and promote ``media_type`` to ``tv_complete`` separately. """ upper = text.upper() - # SxxExx form + # SxxExx form (and Sxx, Sxx-yy) if len(upper) >= 3 and upper[0] == "S" and upper[1:3].isdigit(): season = int(upper[1:3]) rest = upper[3:] @@ -106,6 +109,15 @@ def _parse_season_episode(text: str) -> tuple[int, int | None, int | None] | Non if not rest: return season, None, None + # Sxx-yy season-range form: capture the first season, treat as a + # complete-series marker (no episode info). + if ( + len(rest) == 3 + and rest[0] == "-" + and rest[1:3].isdigit() + ): + return season, None, None + episodes: list[int] = [] while rest.startswith("E") and len(rest) >= 3 and rest[1:3].isdigit(): episodes.append(int(rest[1:3])) @@ -645,6 +657,7 @@ def assemble( edition: str | None = None distributor: str | None = None languages: list[str] = [] + is_season_range = False for tok in annotated: # Skip non-primary members of a multi-token sequence. @@ -658,6 +671,16 @@ def assemble( parsed = _parse_season_episode(tok.text) if parsed is not None: season, episode, episode_end = parsed + # Detect Sxx-yy range form to flag it as a multi-season pack. + upper = tok.text.upper() + if ( + len(upper) == 6 + and upper[0] == "S" + and upper[1:3].isdigit() + and upper[3] == "-" + and upper[4:6].isdigit() + ): + is_season_range = True elif role is TokenRole.RESOLUTION: quality = tok.text elif role is TokenRole.SOURCE: @@ -705,6 +728,8 @@ def assemble( media_type = "documentary" elif upper_tokens & concert_tokens: media_type = "concert" + elif is_season_range: + media_type = "tv_complete" elif ( edition in {"COMPLETE", "INTEGRALE", "COLLECTION"} or upper_tokens & integrale_tokens diff --git a/tests/fixtures/releases/shitty/tatortreiniger_flat_multiseason/expected.yaml b/tests/fixtures/releases/shitty/tatortreiniger_flat_multiseason/expected.yaml index 80ca66f..b6a83c0 100644 --- a/tests/fixtures/releases/shitty/tatortreiniger_flat_multiseason/expected.yaml +++ b/tests/fixtures/releases/shitty/tatortreiniger_flat_multiseason/expected.yaml @@ -1,22 +1,22 @@ release_name: "Der.Tatortreiniger.S01-06.GERMAN.1080p.WEB.x264-WAYNE" -# Tech debt: range syntax 'S01-06' is not recognized as TV — falls through -# to media_type=movie with the range glued onto the title. Captured here so a -# future ranger-aware parser change is intentional. +# Range syntax 'S01-06' is now recognized as a season-range marker: +# season=1 (first of the range), media_type=tv_complete, and the token +# no longer leaks into the title. parsed: - title: "Der.Tatortreiniger.S01-06" + title: "Der.Tatortreiniger" year: null - season: null + season: 1 episode: null quality: "1080p" source: "WEB" codec: "x264" group: "WAYNE" tech_string: "1080p.WEB.x264" - media_type: "movie" + media_type: "tv_complete" languages: ["GERMAN"] parse_path: "direct" - is_season_pack: false + is_season_pack: true tree: - "Der.Tatortreiniger.S01-06.GERMAN.1080p.WEB.x264-WAYNE/"