"""Tests for ``alfred.domain.subtitles.knowledge`` (loader + base). Covers: - ``TestMerge`` — the internal ``_merge`` deep-merge function: scalar override, dict merge, list extension+dedup. - ``TestLoader`` — builtin loads alone, learned overlays add tokens, learned-only pattern is picked up, missing files don't crash. - ``TestKnowledgeBase`` — typed view: formats / languages / type-token lookup, default rules, ``patterns_for_group``. Uses ``monkeypatch`` to override the module-level ``_BUILTIN_ROOT`` and ``_LEARNED_ROOT`` constants so we can drive the loader from a temp dir. """ from __future__ import annotations from pathlib import Path import pytest from alfred.domain.subtitles.knowledge import loader as loader_mod from alfred.domain.subtitles.knowledge.base import SubtitleKnowledgeBase from alfred.domain.subtitles.knowledge.loader import KnowledgeLoader, _merge from alfred.domain.subtitles.value_objects import ( ScanStrategy, SubtitleType, TypeDetectionMethod, ) # --------------------------------------------------------------------------- # # _merge — pure dict merger # # --------------------------------------------------------------------------- # class TestMerge: def test_scalar_override(self): assert _merge({"a": 1}, {"a": 2}) == {"a": 2} def test_new_key_added(self): assert _merge({"a": 1}, {"b": 2}) == {"a": 1, "b": 2} def test_nested_dict_merged(self): out = _merge({"a": {"x": 1}}, {"a": {"y": 2}}) assert out == {"a": {"x": 1, "y": 2}} def test_list_extended_and_deduped(self): out = _merge({"a": [1, 2]}, {"a": [2, 3]}) assert out == {"a": [1, 2, 3]} def test_list_preserves_order(self): out = _merge({"a": ["x", "y"]}, {"a": ["z", "x"]}) assert out == {"a": ["x", "y", "z"]} def test_type_mismatch_override_wins(self): # If shapes differ, override replaces wholesale. out = _merge({"a": [1, 2]}, {"a": {"new": True}}) assert out == {"a": {"new": True}} # --------------------------------------------------------------------------- # # Loader helpers # # --------------------------------------------------------------------------- # def _write(path: Path, content: str) -> None: path.parent.mkdir(parents=True, exist_ok=True) path.write_text(content, encoding="utf-8") @pytest.fixture def isolated_loader(tmp_path: Path, monkeypatch): """Redirect _BUILTIN_ROOT and _LEARNED_ROOT to temp dirs.""" builtin = tmp_path / "builtin" learned = tmp_path / "learned" builtin.mkdir() learned.mkdir() monkeypatch.setattr(loader_mod, "_BUILTIN_ROOT", builtin) monkeypatch.setattr(loader_mod, "_LEARNED_ROOT", learned) return builtin, learned class TestLoader: def test_builtin_only(self, isolated_loader): builtin, _ = isolated_loader _write( builtin / "subtitles.yaml", "languages:\n fra:\n tokens: [fr, fre]\n", ) ldr = KnowledgeLoader() assert ldr.subtitles()["languages"]["fra"]["tokens"] == ["fr", "fre"] def test_learned_adds_tokens_additively(self, isolated_loader): builtin, learned = isolated_loader _write( builtin / "subtitles.yaml", "languages:\n fra:\n tokens: [fr, fre]\n", ) _write( learned / "subtitles_learned.yaml", "languages:\n fra:\n tokens: [vff, custom]\n", ) ldr = KnowledgeLoader() tokens = ldr.subtitles()["languages"]["fra"]["tokens"] assert tokens == ["fr", "fre", "vff", "custom"] def test_missing_files_dont_crash(self, isolated_loader): # No files written → loader still produces empty structures. ldr = KnowledgeLoader() assert ldr.subtitles() == {} assert ldr.patterns() == {} assert ldr.release_groups() == {} def test_builtin_pattern_loaded(self, isolated_loader): builtin, _ = isolated_loader _write( builtin / "patterns" / "adjacent.yaml", "id: adjacent\nscan_strategy: adjacent\ndescription: test\n", ) ldr = KnowledgeLoader() assert "adjacent" in ldr.patterns() assert ldr.pattern("adjacent")["scan_strategy"] == "adjacent" def test_learned_pattern_overlays_builtin(self, isolated_loader): builtin, learned = isolated_loader _write( builtin / "patterns" / "p.yaml", "id: p\nscan_strategy: flat\ndescription: old\n", ) _write( learned / "patterns" / "p.yaml", "id: p\ndescription: new\n", ) ldr = KnowledgeLoader() # learned replaces scalar 'description', keeps scan_strategy from builtin assert ldr.pattern("p")["description"] == "new" assert ldr.pattern("p")["scan_strategy"] == "flat" def test_learned_only_pattern_added(self, isolated_loader): _, learned = isolated_loader _write( learned / "patterns" / "neo.yaml", "id: neo\nscan_strategy: embedded\n", ) ldr = KnowledgeLoader() assert "neo" in ldr.patterns() def test_release_group_case_insensitive_lookup(self, isolated_loader): builtin, _ = isolated_loader _write( builtin / "release_groups" / "kontrast.yaml", "name: KONTRAST\nknown_patterns: [adjacent]\n", ) ldr = KnowledgeLoader() # Stored under "KONTRAST" but case-insensitive match must work. assert ldr.release_group("kontrast") is not None assert ldr.release_group("Kontrast")["name"] == "KONTRAST" assert ldr.release_group("unknown_group") is None def test_pattern_id_falls_back_to_filename(self, isolated_loader): # File without 'id' field — uses the stem. builtin, _ = isolated_loader _write( builtin / "patterns" / "no_id.yaml", "scan_strategy: adjacent\n", ) ldr = KnowledgeLoader() assert "no_id" in ldr.patterns() # --------------------------------------------------------------------------- # # SubtitleKnowledgeBase # # --------------------------------------------------------------------------- # class TestKnowledgeBase: @pytest.fixture def kb(self, isolated_loader): builtin, _ = isolated_loader _write( builtin / "subtitles.yaml", """ formats: srt: extensions: [".srt"] description: "SubRip" ass: extensions: [".ass", ".ssa"] language_tokens: fre: ["vostfr"] types: sdh: tokens: ["sdh", "cc"] forced: tokens: ["forced"] defaults: languages: ["fre"] formats: ["srt"] types: ["standard"] format_priority: ["srt"] min_confidence: 0.8 """, ) _write( builtin / "patterns" / "adj.yaml", "id: adj\nscan_strategy: adjacent\ndescription: d\n", ) _write( builtin / "patterns" / "bad.yaml", # invalid scan_strategy → skipped at build time "id: bad\nscan_strategy: not_a_real_strategy\n", ) _write( builtin / "release_groups" / "group_a.yaml", "name: GroupA\nknown_patterns: [adj]\n", ) return SubtitleKnowledgeBase() def test_formats_loaded(self, kb): formats = kb.formats() assert "srt" in formats and "ass" in formats assert kb.format_for_extension(".srt").id == "srt" assert kb.format_for_extension(".ssa").id == "ass" assert kb.format_for_extension(".unknown") is None def test_known_extensions_aggregates(self, kb): exts = kb.known_extensions() assert ".srt" in exts and ".ass" in exts and ".ssa" in exts def test_language_for_token(self, kb): # Canonical ISO 639-2/B codes are sourced from LanguageRegistry. assert kb.language_for_token("french").code == "fre" assert kb.language_for_token("FR").code == "fre" assert kb.language_for_token("xxx") is None assert kb.is_known_lang_token("eng") is True assert kb.is_known_lang_token("ghost") is False def test_subtitle_specific_token_recognized(self, kb): # ``vostfr`` is subtitle-specific and lives in subtitles.yaml's # ``language_tokens`` block — still resolves to canonical "fre". assert kb.language_for_token("vostfr").code == "fre" def test_type_for_token(self, kb): assert kb.type_for_token("sdh") == SubtitleType.SDH assert kb.type_for_token("FORCED") == SubtitleType.FORCED assert kb.type_for_token("nope") is None # 'hi' must NOT be a SDH token any more (it collides with Hindi). assert kb.is_known_type_token("hi") is False assert kb.is_known_type_token("cc") is True def test_default_rules(self, kb): r = kb.default_rules() assert r.preferred_languages == ["fre"] assert r.preferred_formats == ["srt"] assert r.min_confidence == 0.8 def test_patterns_valid_kept_invalid_skipped(self, kb): patterns = kb.patterns() assert "adj" in patterns # 'bad' had an invalid scan_strategy → quietly dropped. assert "bad" not in patterns def test_pattern_typed_view(self, kb): p = kb.pattern("adj") assert p.scan_strategy == ScanStrategy.ADJACENT assert p.type_detection == TypeDetectionMethod.TOKEN_IN_NAME def test_patterns_for_group(self, kb): ps = kb.patterns_for_group("GroupA") assert len(ps) == 1 and ps[0].id == "adj" assert kb.patterns_for_group("unknown") == [] def test_reload_picks_up_changes(self, kb, isolated_loader): # Add a new pattern, reload, check it's visible. builtin, _ = isolated_loader _write( builtin / "patterns" / "new.yaml", "id: new\nscan_strategy: flat\n", ) kb.reload() assert "new" in kb.patterns()