refactor(release): ParsedRelease.media_type & parse_path are strict enums
The fields were already typed as MediaTypeToken / ParsePath, but a tolerant __post_init__ coerced raw strings into their enum form. With MediaTypeToken(str, Enum) (and ParsePath idem), the coercion served no purpose — callers that pass '.value' got back the enum anyway, and callers that pass an unknown string got a ValidationError just like they would now. Strict mode: constructor rejects non-enum values directly. The two in-tree builders (parse_release() and the parser pipeline) already produce enum values; all .value sites have been removed. Drops the unused _VALID_MEDIA_TYPES / _VALID_PARSE_PATHS lookup tables.
This commit is contained in:
@@ -29,6 +29,7 @@ arrives through ``kb: ReleaseKnowledge``.
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
from ..ports.knowledge import ReleaseKnowledge
|
from ..ports.knowledge import ReleaseKnowledge
|
||||||
|
from ..value_objects import MediaTypeToken
|
||||||
from .schema import GroupSchema
|
from .schema import GroupSchema
|
||||||
from .tokens import Token, TokenRole
|
from .tokens import Token, TokenRole
|
||||||
|
|
||||||
@@ -725,22 +726,22 @@ def assemble(
|
|||||||
integrale_tokens = {t.upper() for t in kb.media_type_tokens.get("integrale", [])}
|
integrale_tokens = {t.upper() for t in kb.media_type_tokens.get("integrale", [])}
|
||||||
|
|
||||||
if upper_tokens & doc_tokens:
|
if upper_tokens & doc_tokens:
|
||||||
media_type = "documentary"
|
media_type = MediaTypeToken.DOCUMENTARY
|
||||||
elif upper_tokens & concert_tokens:
|
elif upper_tokens & concert_tokens:
|
||||||
media_type = "concert"
|
media_type = MediaTypeToken.CONCERT
|
||||||
elif is_season_range:
|
elif is_season_range:
|
||||||
media_type = "tv_complete"
|
media_type = MediaTypeToken.TV_COMPLETE
|
||||||
elif (
|
elif (
|
||||||
edition in {"COMPLETE", "INTEGRALE", "COLLECTION"}
|
edition in {"COMPLETE", "INTEGRALE", "COLLECTION"}
|
||||||
or upper_tokens & integrale_tokens
|
or upper_tokens & integrale_tokens
|
||||||
) and season is None:
|
) and season is None:
|
||||||
media_type = "tv_complete"
|
media_type = MediaTypeToken.TV_COMPLETE
|
||||||
elif season is not None:
|
elif season is not None:
|
||||||
media_type = "tv_show"
|
media_type = MediaTypeToken.TV_SHOW
|
||||||
elif any((quality, source, codec, year)):
|
elif any((quality, source, codec, year)):
|
||||||
media_type = "movie"
|
media_type = MediaTypeToken.MOVIE
|
||||||
else:
|
else:
|
||||||
media_type = "unknown"
|
media_type = MediaTypeToken.UNKNOWN
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"title": title,
|
"title": title,
|
||||||
|
|||||||
@@ -44,7 +44,7 @@ def parse_release(
|
|||||||
3. Otherwise run the v2 pipeline: tokenize → annotate (EASY when a
|
3. Otherwise run the v2 pipeline: tokenize → annotate (EASY when a
|
||||||
group schema is known, SHITTY otherwise) → assemble → score.
|
group schema is known, SHITTY otherwise) → assemble → score.
|
||||||
"""
|
"""
|
||||||
parse_path = ParsePath.DIRECT.value
|
parse_path = ParsePath.DIRECT
|
||||||
|
|
||||||
# Apostrophes inside titles ("Don't", "L'avare") are common and should
|
# Apostrophes inside titles ("Don't", "L'avare") are common and should
|
||||||
# not push the release through the AI fallback. Strip them up front so
|
# not push the release through the AI fallback. Strip them up front so
|
||||||
@@ -53,11 +53,11 @@ def parse_release(
|
|||||||
working_name = name
|
working_name = name
|
||||||
if "'" in working_name:
|
if "'" in working_name:
|
||||||
working_name = working_name.replace("'", "")
|
working_name = working_name.replace("'", "")
|
||||||
parse_path = ParsePath.SANITIZED.value
|
parse_path = ParsePath.SANITIZED
|
||||||
|
|
||||||
clean, site_tag = _v2.strip_site_tag(working_name)
|
clean, site_tag = _v2.strip_site_tag(working_name)
|
||||||
if site_tag is not None:
|
if site_tag is not None:
|
||||||
parse_path = ParsePath.SANITIZED.value
|
parse_path = ParsePath.SANITIZED
|
||||||
|
|
||||||
if not _is_well_formed(clean, kb):
|
if not _is_well_formed(clean, kb):
|
||||||
parsed = ParsedRelease(
|
parsed = ParsedRelease(
|
||||||
@@ -74,9 +74,9 @@ def parse_release(
|
|||||||
codec=None,
|
codec=None,
|
||||||
group="UNKNOWN",
|
group="UNKNOWN",
|
||||||
tech_string="",
|
tech_string="",
|
||||||
media_type=MediaTypeToken.UNKNOWN.value,
|
media_type=MediaTypeToken.UNKNOWN,
|
||||||
site_tag=site_tag,
|
site_tag=site_tag,
|
||||||
parse_path=ParsePath.AI.value,
|
parse_path=ParsePath.AI,
|
||||||
)
|
)
|
||||||
report = ParseReport(
|
report = ParseReport(
|
||||||
confidence=0,
|
confidence=0,
|
||||||
|
|||||||
@@ -49,10 +49,6 @@ class ParsePath(str, Enum):
|
|||||||
AI = "ai"
|
AI = "ai"
|
||||||
|
|
||||||
|
|
||||||
_VALID_MEDIA_TYPES: frozenset[str] = frozenset(m.value for m in MediaTypeToken)
|
|
||||||
_VALID_PARSE_PATHS: frozenset[str] = frozenset(p.value for p in ParsePath)
|
|
||||||
|
|
||||||
|
|
||||||
def _strip_episode_from_normalized(normalized: str) -> str:
|
def _strip_episode_from_normalized(normalized: str) -> str:
|
||||||
"""
|
"""
|
||||||
Remove all episode parts (Exx) from a normalized release name, keeping Sxx.
|
Remove all episode parts (Exx) from a normalized release name, keeping Sxx.
|
||||||
@@ -168,23 +164,16 @@ class ParsedRelease:
|
|||||||
f"ParsedRelease.episode_end ({self.episode_end}) < "
|
f"ParsedRelease.episode_end ({self.episode_end}) < "
|
||||||
f"episode ({self.episode})"
|
f"episode ({self.episode})"
|
||||||
)
|
)
|
||||||
# Coerce raw strings into their enum form (tolerant constructor).
|
|
||||||
if not isinstance(self.media_type, MediaTypeToken):
|
if not isinstance(self.media_type, MediaTypeToken):
|
||||||
try:
|
raise ValidationError(
|
||||||
self.media_type = MediaTypeToken(self.media_type)
|
f"ParsedRelease.media_type must be a MediaTypeToken, "
|
||||||
except ValueError:
|
f"got {type(self.media_type).__name__}: {self.media_type!r}"
|
||||||
raise ValidationError(
|
)
|
||||||
f"ParsedRelease.media_type invalid: {self.media_type!r} "
|
|
||||||
f"(expected one of {sorted(_VALID_MEDIA_TYPES)})"
|
|
||||||
) from None
|
|
||||||
if not isinstance(self.parse_path, ParsePath):
|
if not isinstance(self.parse_path, ParsePath):
|
||||||
try:
|
raise ValidationError(
|
||||||
self.parse_path = ParsePath(self.parse_path)
|
f"ParsedRelease.parse_path must be a ParsePath, "
|
||||||
except ValueError:
|
f"got {type(self.parse_path).__name__}: {self.parse_path!r}"
|
||||||
raise ValidationError(
|
)
|
||||||
f"ParsedRelease.parse_path invalid: {self.parse_path!r} "
|
|
||||||
f"(expected one of {sorted(_VALID_PARSE_PATHS)})"
|
|
||||||
) from None
|
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def is_season_pack(self) -> bool:
|
def is_season_pack(self) -> bool:
|
||||||
|
|||||||
@@ -79,8 +79,8 @@ def _movie(year: int = 2020, **overrides) -> ParsedRelease:
|
|||||||
codec="x264",
|
codec="x264",
|
||||||
group="GROUP",
|
group="GROUP",
|
||||||
tech_string="1080p.BluRay.x264",
|
tech_string="1080p.BluRay.x264",
|
||||||
media_type=MediaTypeToken.MOVIE.value,
|
media_type=MediaTypeToken.MOVIE,
|
||||||
parse_path=ParsePath.DIRECT.value,
|
parse_path=ParsePath.DIRECT,
|
||||||
)
|
)
|
||||||
base.update(overrides)
|
base.update(overrides)
|
||||||
return ParsedRelease(**base)
|
return ParsedRelease(**base)
|
||||||
@@ -121,8 +121,8 @@ class TestComputeScore:
|
|||||||
codec="x265",
|
codec="x265",
|
||||||
group="KONTRAST",
|
group="KONTRAST",
|
||||||
tech_string="1080p.WEBRip.x265",
|
tech_string="1080p.WEBRip.x265",
|
||||||
media_type=MediaTypeToken.TV_SHOW.value,
|
media_type=MediaTypeToken.TV_SHOW,
|
||||||
parse_path=ParsePath.DIRECT.value,
|
parse_path=ParsePath.DIRECT,
|
||||||
)
|
)
|
||||||
tokens = [
|
tokens = [
|
||||||
Token("Oz", 0, TokenRole.TITLE),
|
Token("Oz", 0, TokenRole.TITLE),
|
||||||
@@ -166,7 +166,7 @@ class TestComputeScore:
|
|||||||
assert 0 <= score <= 100
|
assert 0 <= score <= 100
|
||||||
|
|
||||||
def test_unknown_media_type_does_not_count(self) -> None:
|
def test_unknown_media_type_does_not_count(self) -> None:
|
||||||
parsed = _movie(media_type=MediaTypeToken.UNKNOWN.value)
|
parsed = _movie(media_type=MediaTypeToken.UNKNOWN)
|
||||||
score = compute_score(parsed, _all_annotated(), _KB)
|
score = compute_score(parsed, _all_annotated(), _KB)
|
||||||
# Loses the 20 of media_type vs baseline
|
# Loses the 20 of media_type vs baseline
|
||||||
assert score == 85 - 20
|
assert score == 85 - 20
|
||||||
@@ -232,8 +232,8 @@ class TestCollectors:
|
|||||||
codec=None,
|
codec=None,
|
||||||
group="UNKNOWN",
|
group="UNKNOWN",
|
||||||
tech_string="",
|
tech_string="",
|
||||||
media_type=MediaTypeToken.UNKNOWN.value,
|
media_type=MediaTypeToken.UNKNOWN,
|
||||||
parse_path=ParsePath.DIRECT.value,
|
parse_path=ParsePath.DIRECT,
|
||||||
)
|
)
|
||||||
assert set(collect_missing_critical(empty)) == {
|
assert set(collect_missing_critical(empty)) == {
|
||||||
"title",
|
"title",
|
||||||
|
|||||||
Reference in New Issue
Block a user