ced72547f7
The domain layer no longer reads YAML files. All knowledge loaders move
from `alfred/domain/*/knowledge/` to `alfred/infrastructure/knowledge/`:
domain/release/knowledge.py
→ infrastructure/knowledge/release.py
domain/shared/knowledge/language_registry.py
→ infrastructure/knowledge/language_registry.py
domain/subtitles/knowledge/{loader,base}.py
→ infrastructure/knowledge/subtitles/{loader,base}.py
Callers in domain/release/{services,value_objects}.py,
domain/subtitles/{aggregates,services/*}.py, and
application/filesystem/manage_subtitles.py updated to absolute imports.
Re-exports of KnowledgeLoader/SubtitleKnowledgeBase from
domain/subtitles/__init__.py dropped (no shim per project convention).
Tests follow the moved targets.
282 lines
10 KiB
Python
282 lines
10 KiB
Python
"""Tests for ``alfred.infrastructure.knowledge.subtitles`` (loader + base).
|
|
|
|
Covers:
|
|
|
|
- ``TestMerge`` — the internal ``_merge`` deep-merge function:
|
|
scalar override, dict merge, list extension+dedup.
|
|
- ``TestLoader`` — builtin loads alone, learned overlays add tokens,
|
|
learned-only pattern is picked up, missing files don't crash.
|
|
- ``TestKnowledgeBase`` — typed view: formats / languages /
|
|
type-token lookup, default rules, ``patterns_for_group``.
|
|
|
|
Uses ``monkeypatch`` to override the module-level ``_BUILTIN_ROOT`` and
|
|
``_LEARNED_ROOT`` constants so we can drive the loader from a temp dir.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
from pathlib import Path
|
|
|
|
import pytest
|
|
|
|
from alfred.infrastructure.knowledge.subtitles import loader as loader_mod
|
|
from alfred.infrastructure.knowledge.subtitles.base import SubtitleKnowledgeBase
|
|
from alfred.infrastructure.knowledge.subtitles.loader import KnowledgeLoader, _merge
|
|
from alfred.domain.subtitles.value_objects import (
|
|
ScanStrategy,
|
|
SubtitleType,
|
|
TypeDetectionMethod,
|
|
)
|
|
|
|
# --------------------------------------------------------------------------- #
|
|
# _merge — pure dict merger #
|
|
# --------------------------------------------------------------------------- #
|
|
|
|
|
|
class TestMerge:
|
|
def test_scalar_override(self):
|
|
assert _merge({"a": 1}, {"a": 2}) == {"a": 2}
|
|
|
|
def test_new_key_added(self):
|
|
assert _merge({"a": 1}, {"b": 2}) == {"a": 1, "b": 2}
|
|
|
|
def test_nested_dict_merged(self):
|
|
out = _merge({"a": {"x": 1}}, {"a": {"y": 2}})
|
|
assert out == {"a": {"x": 1, "y": 2}}
|
|
|
|
def test_list_extended_and_deduped(self):
|
|
out = _merge({"a": [1, 2]}, {"a": [2, 3]})
|
|
assert out == {"a": [1, 2, 3]}
|
|
|
|
def test_list_preserves_order(self):
|
|
out = _merge({"a": ["x", "y"]}, {"a": ["z", "x"]})
|
|
assert out == {"a": ["x", "y", "z"]}
|
|
|
|
def test_type_mismatch_override_wins(self):
|
|
# If shapes differ, override replaces wholesale.
|
|
out = _merge({"a": [1, 2]}, {"a": {"new": True}})
|
|
assert out == {"a": {"new": True}}
|
|
|
|
|
|
# --------------------------------------------------------------------------- #
|
|
# Loader helpers #
|
|
# --------------------------------------------------------------------------- #
|
|
|
|
|
|
def _write(path: Path, content: str) -> None:
|
|
path.parent.mkdir(parents=True, exist_ok=True)
|
|
path.write_text(content, encoding="utf-8")
|
|
|
|
|
|
@pytest.fixture
|
|
def isolated_loader(tmp_path: Path, monkeypatch):
|
|
"""Redirect _BUILTIN_ROOT and _LEARNED_ROOT to temp dirs."""
|
|
builtin = tmp_path / "builtin"
|
|
learned = tmp_path / "learned"
|
|
builtin.mkdir()
|
|
learned.mkdir()
|
|
monkeypatch.setattr(loader_mod, "_BUILTIN_ROOT", builtin)
|
|
monkeypatch.setattr(loader_mod, "_LEARNED_ROOT", learned)
|
|
return builtin, learned
|
|
|
|
|
|
class TestLoader:
|
|
def test_builtin_only(self, isolated_loader):
|
|
builtin, _ = isolated_loader
|
|
_write(
|
|
builtin / "subtitles.yaml",
|
|
"languages:\n fra:\n tokens: [fr, fre]\n",
|
|
)
|
|
ldr = KnowledgeLoader()
|
|
assert ldr.subtitles()["languages"]["fra"]["tokens"] == ["fr", "fre"]
|
|
|
|
def test_learned_adds_tokens_additively(self, isolated_loader):
|
|
builtin, learned = isolated_loader
|
|
_write(
|
|
builtin / "subtitles.yaml",
|
|
"languages:\n fra:\n tokens: [fr, fre]\n",
|
|
)
|
|
_write(
|
|
learned / "subtitles_learned.yaml",
|
|
"languages:\n fra:\n tokens: [vff, custom]\n",
|
|
)
|
|
ldr = KnowledgeLoader()
|
|
tokens = ldr.subtitles()["languages"]["fra"]["tokens"]
|
|
assert tokens == ["fr", "fre", "vff", "custom"]
|
|
|
|
def test_missing_files_dont_crash(self, isolated_loader):
|
|
# No files written → loader still produces empty structures.
|
|
ldr = KnowledgeLoader()
|
|
assert ldr.subtitles() == {}
|
|
assert ldr.patterns() == {}
|
|
assert ldr.release_groups() == {}
|
|
|
|
def test_builtin_pattern_loaded(self, isolated_loader):
|
|
builtin, _ = isolated_loader
|
|
_write(
|
|
builtin / "patterns" / "adjacent.yaml",
|
|
"id: adjacent\nscan_strategy: adjacent\ndescription: test\n",
|
|
)
|
|
ldr = KnowledgeLoader()
|
|
assert "adjacent" in ldr.patterns()
|
|
assert ldr.pattern("adjacent")["scan_strategy"] == "adjacent"
|
|
|
|
def test_learned_pattern_overlays_builtin(self, isolated_loader):
|
|
builtin, learned = isolated_loader
|
|
_write(
|
|
builtin / "patterns" / "p.yaml",
|
|
"id: p\nscan_strategy: flat\ndescription: old\n",
|
|
)
|
|
_write(
|
|
learned / "patterns" / "p.yaml",
|
|
"id: p\ndescription: new\n",
|
|
)
|
|
ldr = KnowledgeLoader()
|
|
# learned replaces scalar 'description', keeps scan_strategy from builtin
|
|
assert ldr.pattern("p")["description"] == "new"
|
|
assert ldr.pattern("p")["scan_strategy"] == "flat"
|
|
|
|
def test_learned_only_pattern_added(self, isolated_loader):
|
|
_, learned = isolated_loader
|
|
_write(
|
|
learned / "patterns" / "neo.yaml",
|
|
"id: neo\nscan_strategy: embedded\n",
|
|
)
|
|
ldr = KnowledgeLoader()
|
|
assert "neo" in ldr.patterns()
|
|
|
|
def test_release_group_case_insensitive_lookup(self, isolated_loader):
|
|
builtin, _ = isolated_loader
|
|
_write(
|
|
builtin / "release_groups" / "kontrast.yaml",
|
|
"name: KONTRAST\nknown_patterns: [adjacent]\n",
|
|
)
|
|
ldr = KnowledgeLoader()
|
|
# Stored under "KONTRAST" but case-insensitive match must work.
|
|
assert ldr.release_group("kontrast") is not None
|
|
assert ldr.release_group("Kontrast")["name"] == "KONTRAST"
|
|
assert ldr.release_group("unknown_group") is None
|
|
|
|
def test_pattern_id_falls_back_to_filename(self, isolated_loader):
|
|
# File without 'id' field — uses the stem.
|
|
builtin, _ = isolated_loader
|
|
_write(
|
|
builtin / "patterns" / "no_id.yaml",
|
|
"scan_strategy: adjacent\n",
|
|
)
|
|
ldr = KnowledgeLoader()
|
|
assert "no_id" in ldr.patterns()
|
|
|
|
|
|
# --------------------------------------------------------------------------- #
|
|
# SubtitleKnowledgeBase #
|
|
# --------------------------------------------------------------------------- #
|
|
|
|
|
|
class TestKnowledgeBase:
|
|
@pytest.fixture
|
|
def kb(self, isolated_loader):
|
|
builtin, _ = isolated_loader
|
|
_write(
|
|
builtin / "subtitles.yaml",
|
|
"""
|
|
formats:
|
|
srt:
|
|
extensions: [".srt"]
|
|
description: "SubRip"
|
|
ass:
|
|
extensions: [".ass", ".ssa"]
|
|
language_tokens:
|
|
fre: ["vostfr"]
|
|
types:
|
|
sdh:
|
|
tokens: ["sdh", "cc"]
|
|
forced:
|
|
tokens: ["forced"]
|
|
defaults:
|
|
languages: ["fre"]
|
|
formats: ["srt"]
|
|
types: ["standard"]
|
|
format_priority: ["srt"]
|
|
min_confidence: 0.8
|
|
""",
|
|
)
|
|
_write(
|
|
builtin / "patterns" / "adj.yaml",
|
|
"id: adj\nscan_strategy: adjacent\ndescription: d\n",
|
|
)
|
|
_write(
|
|
builtin / "patterns" / "bad.yaml",
|
|
# invalid scan_strategy → skipped at build time
|
|
"id: bad\nscan_strategy: not_a_real_strategy\n",
|
|
)
|
|
_write(
|
|
builtin / "release_groups" / "group_a.yaml",
|
|
"name: GroupA\nknown_patterns: [adj]\n",
|
|
)
|
|
return SubtitleKnowledgeBase()
|
|
|
|
def test_formats_loaded(self, kb):
|
|
formats = kb.formats()
|
|
assert "srt" in formats and "ass" in formats
|
|
assert kb.format_for_extension(".srt").id == "srt"
|
|
assert kb.format_for_extension(".ssa").id == "ass"
|
|
assert kb.format_for_extension(".unknown") is None
|
|
|
|
def test_known_extensions_aggregates(self, kb):
|
|
exts = kb.known_extensions()
|
|
assert ".srt" in exts and ".ass" in exts and ".ssa" in exts
|
|
|
|
def test_language_for_token(self, kb):
|
|
# Canonical ISO 639-2/B codes are sourced from LanguageRegistry.
|
|
assert kb.language_for_token("french").code == "fre"
|
|
assert kb.language_for_token("FR").code == "fre"
|
|
assert kb.language_for_token("xxx") is None
|
|
assert kb.is_known_lang_token("eng") is True
|
|
assert kb.is_known_lang_token("ghost") is False
|
|
|
|
def test_subtitle_specific_token_recognized(self, kb):
|
|
# ``vostfr`` is subtitle-specific and lives in subtitles.yaml's
|
|
# ``language_tokens`` block — still resolves to canonical "fre".
|
|
assert kb.language_for_token("vostfr").code == "fre"
|
|
|
|
def test_type_for_token(self, kb):
|
|
assert kb.type_for_token("sdh") == SubtitleType.SDH
|
|
assert kb.type_for_token("FORCED") == SubtitleType.FORCED
|
|
assert kb.type_for_token("nope") is None
|
|
# 'hi' must NOT be a SDH token any more (it collides with Hindi).
|
|
assert kb.is_known_type_token("hi") is False
|
|
assert kb.is_known_type_token("cc") is True
|
|
|
|
def test_default_rules(self, kb):
|
|
r = kb.default_rules()
|
|
assert r.preferred_languages == ["fre"]
|
|
assert r.preferred_formats == ["srt"]
|
|
assert r.min_confidence == 0.8
|
|
|
|
def test_patterns_valid_kept_invalid_skipped(self, kb):
|
|
patterns = kb.patterns()
|
|
assert "adj" in patterns
|
|
# 'bad' had an invalid scan_strategy → quietly dropped.
|
|
assert "bad" not in patterns
|
|
|
|
def test_pattern_typed_view(self, kb):
|
|
p = kb.pattern("adj")
|
|
assert p.scan_strategy == ScanStrategy.ADJACENT
|
|
assert p.type_detection == TypeDetectionMethod.TOKEN_IN_NAME
|
|
|
|
def test_patterns_for_group(self, kb):
|
|
ps = kb.patterns_for_group("GroupA")
|
|
assert len(ps) == 1 and ps[0].id == "adj"
|
|
assert kb.patterns_for_group("unknown") == []
|
|
|
|
def test_reload_picks_up_changes(self, kb, isolated_loader):
|
|
# Add a new pattern, reload, check it's visible.
|
|
builtin, _ = isolated_loader
|
|
_write(
|
|
builtin / "patterns" / "new.yaml",
|
|
"id: new\nscan_strategy: flat\n",
|
|
)
|
|
kb.reload()
|
|
assert "new" in kb.patterns()
|