fix(release/parser): recognize Sxx-yy season range as tv_complete
`Der.Tatortreiniger.S01-06.GERMAN...` previously parsed as a movie with 'S01-06' glued to the title. The parser now matches the season-range form in _parse_season_episode (returning season=first, episode=None), and the assemble step detects the range token to promote media_type to 'tv_complete'. The first season is exposed as `season` so `is_season_pack` fires (season is not None and episode is None) — useful for routing to a series root folder. Fixture shitty/tatortreiniger_flat_multiseason/ updated: - title: Der.Tatortreiniger.S01-06 → Der.Tatortreiniger - season: null → 1 - media_type: movie → tv_complete - is_season_pack: false → true
This commit is contained in:
@@ -23,6 +23,12 @@ callers).
|
|||||||
with intermediate values implied. Fixture
|
with intermediate values implied. Fixture
|
||||||
`shitty/archer_multi_episode/` updated from anti-regression-of-bug
|
`shitty/archer_multi_episode/` updated from anti-regression-of-bug
|
||||||
to anti-regression-of-fix.
|
to anti-regression-of-fix.
|
||||||
|
- **Season-range markers (`Sxx-yy`) are now recognized as
|
||||||
|
`tv_complete`.** `Der.Tatortreiniger.S01-06.GERMAN...` previously
|
||||||
|
parsed as `media_type=movie` with `S01-06` glued onto the title.
|
||||||
|
The parser now recognizes the range, sets `season=first`,
|
||||||
|
`media_type=tv_complete`, and removes the marker from the title.
|
||||||
|
`is_season_pack` flips to `true`.
|
||||||
- **Pure-punctuation TITLE tokens are dropped at assembly.** Releases
|
- **Pure-punctuation TITLE tokens are dropped at assembly.** Releases
|
||||||
with surrounding ` - ` separators (`Vinyl - 1x01 - FHD`) previously
|
with surrounding ` - ` separators (`Vinyl - 1x01 - FHD`) previously
|
||||||
produced `title="Vinyl.-"`. Such tokens (a stray dash, a wide pipe
|
produced `title="Vinyl.-"`. Such tokens (a stray dash, a wide pipe
|
||||||
|
|||||||
@@ -91,14 +91,17 @@ def tokenize(name: str, kb: ReleaseKnowledge) -> tuple[list[Token], str | None]:
|
|||||||
|
|
||||||
|
|
||||||
def _parse_season_episode(text: str) -> tuple[int, int | None, int | None] | None:
|
def _parse_season_episode(text: str) -> tuple[int, int | None, int | None] | None:
|
||||||
"""Parse a single token as ``SxxExx`` / ``SxxExxExx`` / ``Sxx`` / ``NxNN``.
|
"""Parse a single token as ``SxxExx`` / ``SxxExxExx`` / ``Sxx`` /
|
||||||
|
``Sxx-yy`` (season range) / ``NxNN``.
|
||||||
|
|
||||||
Returns ``(season, episode, episode_end)`` or ``None`` if the token
|
Returns ``(season, episode, episode_end)`` or ``None`` if the token
|
||||||
is not a season/episode marker.
|
is not a season/episode marker. For ``Sxx-yy``, returns the first
|
||||||
|
season with no episode info — the caller is expected to detect the
|
||||||
|
range form and promote ``media_type`` to ``tv_complete`` separately.
|
||||||
"""
|
"""
|
||||||
upper = text.upper()
|
upper = text.upper()
|
||||||
|
|
||||||
# SxxExx form
|
# SxxExx form (and Sxx, Sxx-yy)
|
||||||
if len(upper) >= 3 and upper[0] == "S" and upper[1:3].isdigit():
|
if len(upper) >= 3 and upper[0] == "S" and upper[1:3].isdigit():
|
||||||
season = int(upper[1:3])
|
season = int(upper[1:3])
|
||||||
rest = upper[3:]
|
rest = upper[3:]
|
||||||
@@ -106,6 +109,15 @@ def _parse_season_episode(text: str) -> tuple[int, int | None, int | None] | Non
|
|||||||
if not rest:
|
if not rest:
|
||||||
return season, None, None
|
return season, None, None
|
||||||
|
|
||||||
|
# Sxx-yy season-range form: capture the first season, treat as a
|
||||||
|
# complete-series marker (no episode info).
|
||||||
|
if (
|
||||||
|
len(rest) == 3
|
||||||
|
and rest[0] == "-"
|
||||||
|
and rest[1:3].isdigit()
|
||||||
|
):
|
||||||
|
return season, None, None
|
||||||
|
|
||||||
episodes: list[int] = []
|
episodes: list[int] = []
|
||||||
while rest.startswith("E") and len(rest) >= 3 and rest[1:3].isdigit():
|
while rest.startswith("E") and len(rest) >= 3 and rest[1:3].isdigit():
|
||||||
episodes.append(int(rest[1:3]))
|
episodes.append(int(rest[1:3]))
|
||||||
@@ -645,6 +657,7 @@ def assemble(
|
|||||||
edition: str | None = None
|
edition: str | None = None
|
||||||
distributor: str | None = None
|
distributor: str | None = None
|
||||||
languages: list[str] = []
|
languages: list[str] = []
|
||||||
|
is_season_range = False
|
||||||
|
|
||||||
for tok in annotated:
|
for tok in annotated:
|
||||||
# Skip non-primary members of a multi-token sequence.
|
# Skip non-primary members of a multi-token sequence.
|
||||||
@@ -658,6 +671,16 @@ def assemble(
|
|||||||
parsed = _parse_season_episode(tok.text)
|
parsed = _parse_season_episode(tok.text)
|
||||||
if parsed is not None:
|
if parsed is not None:
|
||||||
season, episode, episode_end = parsed
|
season, episode, episode_end = parsed
|
||||||
|
# Detect Sxx-yy range form to flag it as a multi-season pack.
|
||||||
|
upper = tok.text.upper()
|
||||||
|
if (
|
||||||
|
len(upper) == 6
|
||||||
|
and upper[0] == "S"
|
||||||
|
and upper[1:3].isdigit()
|
||||||
|
and upper[3] == "-"
|
||||||
|
and upper[4:6].isdigit()
|
||||||
|
):
|
||||||
|
is_season_range = True
|
||||||
elif role is TokenRole.RESOLUTION:
|
elif role is TokenRole.RESOLUTION:
|
||||||
quality = tok.text
|
quality = tok.text
|
||||||
elif role is TokenRole.SOURCE:
|
elif role is TokenRole.SOURCE:
|
||||||
@@ -705,6 +728,8 @@ def assemble(
|
|||||||
media_type = "documentary"
|
media_type = "documentary"
|
||||||
elif upper_tokens & concert_tokens:
|
elif upper_tokens & concert_tokens:
|
||||||
media_type = "concert"
|
media_type = "concert"
|
||||||
|
elif is_season_range:
|
||||||
|
media_type = "tv_complete"
|
||||||
elif (
|
elif (
|
||||||
edition in {"COMPLETE", "INTEGRALE", "COLLECTION"}
|
edition in {"COMPLETE", "INTEGRALE", "COLLECTION"}
|
||||||
or upper_tokens & integrale_tokens
|
or upper_tokens & integrale_tokens
|
||||||
|
|||||||
+7
-7
@@ -1,22 +1,22 @@
|
|||||||
release_name: "Der.Tatortreiniger.S01-06.GERMAN.1080p.WEB.x264-WAYNE"
|
release_name: "Der.Tatortreiniger.S01-06.GERMAN.1080p.WEB.x264-WAYNE"
|
||||||
|
|
||||||
# Tech debt: range syntax 'S01-06' is not recognized as TV — falls through
|
# Range syntax 'S01-06' is now recognized as a season-range marker:
|
||||||
# to media_type=movie with the range glued onto the title. Captured here so a
|
# season=1 (first of the range), media_type=tv_complete, and the token
|
||||||
# future ranger-aware parser change is intentional.
|
# no longer leaks into the title.
|
||||||
parsed:
|
parsed:
|
||||||
title: "Der.Tatortreiniger.S01-06"
|
title: "Der.Tatortreiniger"
|
||||||
year: null
|
year: null
|
||||||
season: null
|
season: 1
|
||||||
episode: null
|
episode: null
|
||||||
quality: "1080p"
|
quality: "1080p"
|
||||||
source: "WEB"
|
source: "WEB"
|
||||||
codec: "x264"
|
codec: "x264"
|
||||||
group: "WAYNE"
|
group: "WAYNE"
|
||||||
tech_string: "1080p.WEB.x264"
|
tech_string: "1080p.WEB.x264"
|
||||||
media_type: "movie"
|
media_type: "tv_complete"
|
||||||
languages: ["GERMAN"]
|
languages: ["GERMAN"]
|
||||||
parse_path: "direct"
|
parse_path: "direct"
|
||||||
is_season_pack: false
|
is_season_pack: true
|
||||||
|
|
||||||
tree:
|
tree:
|
||||||
- "Der.Tatortreiniger.S01-06.GERMAN.1080p.WEB.x264-WAYNE/"
|
- "Der.Tatortreiniger.S01-06.GERMAN.1080p.WEB.x264-WAYNE/"
|
||||||
|
|||||||
Reference in New Issue
Block a user