80 lines
3.0 KiB
Python
80 lines
3.0 KiB
Python
"""Scaffolding tests for the v2 parser package.
|
|
|
|
These tests lock the **shape** of the new pipeline (token VOs, tokenize
|
|
output, site-tag stripping) before the annotate step is wired in. They
|
|
do not check parsed-release output yet — that comes once :func:`annotate`
|
|
is implemented and the fixtures-based suite switches over.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
from alfred.domain.releases.parser import Token, TokenRole
|
|
from alfred.domain.releases.parser import strip_site_tag, tokenize
|
|
from alfred.infrastructure.knowledge.release_kb import YamlReleaseKnowledge
|
|
|
|
_KB = YamlReleaseKnowledge()
|
|
|
|
|
|
class TestToken:
|
|
def test_default_role_is_unknown(self) -> None:
|
|
t = Token(text="1080p", index=3)
|
|
assert t.role is TokenRole.UNKNOWN
|
|
assert not t.is_annotated
|
|
|
|
def test_with_role_returns_new_instance(self) -> None:
|
|
t = Token(text="1080p", index=3)
|
|
promoted = t.with_role(TokenRole.RESOLUTION)
|
|
assert promoted is not t
|
|
assert promoted.role is TokenRole.RESOLUTION
|
|
assert t.role is TokenRole.UNKNOWN # original unchanged (frozen)
|
|
|
|
def test_with_role_merges_extra(self) -> None:
|
|
t = Token(text="x265-KONTRAST", index=5)
|
|
promoted = t.with_role(TokenRole.CODEC, group="KONTRAST")
|
|
assert promoted.role is TokenRole.CODEC
|
|
assert promoted.extra == {"group": "KONTRAST"}
|
|
|
|
|
|
class TestStripSiteTag:
|
|
def test_no_tag(self) -> None:
|
|
clean, tag = strip_site_tag("The.Movie.2020.1080p-GRP")
|
|
assert tag is None
|
|
assert clean == "The.Movie.2020.1080p-GRP"
|
|
|
|
def test_suffix_tag(self) -> None:
|
|
clean, tag = strip_site_tag("Sinners.2025.1080p-[YTS.MX]")
|
|
assert tag == "YTS.MX"
|
|
assert clean == "Sinners.2025.1080p-"
|
|
|
|
def test_prefix_tag(self) -> None:
|
|
clean, tag = strip_site_tag("[ OxTorrent.vc ] The.Title.S01E01")
|
|
assert tag == "OxTorrent.vc"
|
|
assert clean == "The.Title.S01E01"
|
|
|
|
|
|
class TestTokenize:
|
|
def test_simple_release(self) -> None:
|
|
tokens, tag = tokenize("Back.in.Action.2025.1080p.WEBRip.x265-KONTRAST", _KB)
|
|
assert tag is None
|
|
texts = [t.text for t in tokens]
|
|
# Dash is not a separator, so x265-KONTRAST stays glued.
|
|
assert texts == [
|
|
"Back", "in", "Action", "2025", "1080p", "WEBRip", "x265-KONTRAST",
|
|
]
|
|
|
|
def test_all_tokens_start_unknown(self) -> None:
|
|
tokens, _ = tokenize("Back.in.Action.2025.1080p.WEBRip.x265-KONTRAST", _KB)
|
|
assert all(t.role is TokenRole.UNKNOWN for t in tokens)
|
|
|
|
def test_indexes_are_contiguous(self) -> None:
|
|
tokens, _ = tokenize("A.B.C.D", _KB)
|
|
assert [t.index for t in tokens] == [0, 1, 2, 3]
|
|
|
|
def test_strips_site_tag_before_tokenize(self) -> None:
|
|
tokens, tag = tokenize(
|
|
"Sinners.2025.1080p.WEBRip.x265.10bit.AAC5.1-[YTS.MX]", _KB
|
|
)
|
|
assert tag == "YTS.MX"
|
|
# Site tag substring must not appear among tokens.
|
|
assert not any("YTS" in t.text for t in tokens)
|