"""Scaffolding tests for the v2 parser package. These tests lock the **shape** of the new pipeline (token VOs, tokenize output, site-tag stripping) before the annotate step is wired in. They do not check parsed-release output yet — that comes once :func:`annotate` is implemented and the fixtures-based suite switches over. """ from __future__ import annotations from alfred.domain.release.parser import Token, TokenRole from alfred.domain.release.parser.pipeline import strip_site_tag, tokenize from alfred.infrastructure.knowledge.release_kb import YamlReleaseKnowledge _KB = YamlReleaseKnowledge() class TestToken: def test_default_role_is_unknown(self) -> None: t = Token(text="1080p", index=3) assert t.role is TokenRole.UNKNOWN assert not t.is_annotated def test_with_role_returns_new_instance(self) -> None: t = Token(text="1080p", index=3) promoted = t.with_role(TokenRole.RESOLUTION) assert promoted is not t assert promoted.role is TokenRole.RESOLUTION assert t.role is TokenRole.UNKNOWN # original unchanged (frozen) def test_with_role_merges_extra(self) -> None: t = Token(text="x265-KONTRAST", index=5) promoted = t.with_role(TokenRole.CODEC, group="KONTRAST") assert promoted.role is TokenRole.CODEC assert promoted.extra == {"group": "KONTRAST"} class TestStripSiteTag: def test_no_tag(self) -> None: clean, tag = strip_site_tag("The.Movie.2020.1080p-GRP") assert tag is None assert clean == "The.Movie.2020.1080p-GRP" def test_suffix_tag(self) -> None: clean, tag = strip_site_tag("Sinners.2025.1080p-[YTS.MX]") assert tag == "YTS.MX" assert clean == "Sinners.2025.1080p-" def test_prefix_tag(self) -> None: clean, tag = strip_site_tag("[ OxTorrent.vc ] The.Title.S01E01") assert tag == "OxTorrent.vc" assert clean == "The.Title.S01E01" class TestTokenize: def test_simple_release(self) -> None: tokens, tag = tokenize("Back.in.Action.2025.1080p.WEBRip.x265-KONTRAST", _KB) assert tag is None texts = [t.text for t in tokens] # Dash is not a separator, so x265-KONTRAST stays glued. assert texts == [ "Back", "in", "Action", "2025", "1080p", "WEBRip", "x265-KONTRAST", ] def test_all_tokens_start_unknown(self) -> None: tokens, _ = tokenize("Back.in.Action.2025.1080p.WEBRip.x265-KONTRAST", _KB) assert all(t.role is TokenRole.UNKNOWN for t in tokens) def test_indexes_are_contiguous(self) -> None: tokens, _ = tokenize("A.B.C.D", _KB) assert [t.index for t in tokens] == [0, 1, 2, 3] def test_strips_site_tag_before_tokenize(self) -> None: tokens, tag = tokenize( "Sinners.2025.1080p.WEBRip.x265.10bit.AAC5.1-[YTS.MX]", _KB ) assert tag == "YTS.MX" # Site tag substring must not appear among tokens. assert not any("YTS" in t.text for t in tokens)