diff --git a/CHANGELOG.md b/CHANGELOG.md index bcc0ff1..77328d6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,8 +15,23 @@ callers). ## [Unreleased] +### Added + +- **`LanguageRepository` port** in `alfred.domain.shared.ports`. Structural + Protocol covering `from_iso`, `from_any`, `all`, `__contains__`, `__len__` + — the surface previously coupled to the concrete `LanguageRegistry`. + Mirrors the `MediaProber` / `FilesystemScanner` pattern: domain code + depends on the Protocol, infrastructure provides the YAML-backed + adapter. Tests in `tests/infrastructure/test_language_registry.py`. + ### Internal +- **`SubtitleKnowledgeBase` types `language_registry` against the + `LanguageRepository` port** instead of the concrete `LanguageRegistry` + class. The default constructor still instantiates the concrete adapter + when no repository is injected — behaviour is unchanged for existing + callers. Opens the door to in-memory fakes in future tests without + loading the full ISO 639 YAML. - **Moved `detect_media_type` and `enrich_from_probe` from `alfred.application.filesystem` to `alfred.application.release`**. They are inspection-pipeline helpers — their natural home is next to diff --git a/alfred/domain/shared/ports/__init__.py b/alfred/domain/shared/ports/__init__.py index ee7b70c..4cd0c44 100644 --- a/alfred/domain/shared/ports/__init__.py +++ b/alfred/domain/shared/ports/__init__.py @@ -7,11 +7,13 @@ Protocol without going through real I/O. """ from .filesystem_scanner import FileEntry, FilesystemScanner +from .language_repository import LanguageRepository from .media_prober import MediaProber, SubtitleStreamInfo __all__ = [ "FileEntry", "FilesystemScanner", + "LanguageRepository", "MediaProber", "SubtitleStreamInfo", ] diff --git a/alfred/domain/shared/ports/language_repository.py b/alfred/domain/shared/ports/language_repository.py new file mode 100644 index 0000000..d62c137 --- /dev/null +++ b/alfred/domain/shared/ports/language_repository.py @@ -0,0 +1,36 @@ +"""LanguageRepository port — abstracts canonical language lookup. + +The adapter (typically loading from ISO 639 YAML knowledge) maps a wide +range of raw forms (codes, English/native names, aliases) onto the +canonical :class:`Language` value object. Domain code accepts the port +via constructor injection; tests can pass a small in-memory fake. +""" + +from __future__ import annotations + +from typing import Protocol + +from alfred.domain.shared.value_objects import Language + + +class LanguageRepository(Protocol): + """Canonical language lookup.""" + + def from_iso(self, code: str) -> Language | None: + """Look up by canonical ISO 639-2/B code (case-insensitive).""" + ... + + def from_any(self, raw: str) -> Language | None: + """Look up by any known representation: ISO code, name, alias. + + Case-insensitive. Returns ``None`` when the raw form is unknown. + """ + ... + + def all(self) -> list[Language]: + """Return all known languages, in a stable order.""" + ... + + def __contains__(self, raw: str) -> bool: ... + + def __len__(self) -> int: ... diff --git a/alfred/infrastructure/knowledge/subtitles/base.py b/alfred/infrastructure/knowledge/subtitles/base.py index a3d42eb..e5e178b 100644 --- a/alfred/infrastructure/knowledge/subtitles/base.py +++ b/alfred/infrastructure/knowledge/subtitles/base.py @@ -2,7 +2,7 @@ import logging -from alfred.infrastructure.knowledge.language_registry import LanguageRegistry +from alfred.domain.shared.ports import LanguageRepository from alfred.domain.subtitles.value_objects import ( ScanStrategy, SubtitleFormat, @@ -12,6 +12,8 @@ from alfred.domain.subtitles.value_objects import ( SubtitleType, TypeDetectionMethod, ) +from alfred.infrastructure.knowledge.language_registry import LanguageRegistry + from .loader import KnowledgeLoader logger = logging.getLogger(__name__) @@ -28,10 +30,12 @@ class SubtitleKnowledgeBase: def __init__( self, loader: KnowledgeLoader | None = None, - language_registry: LanguageRegistry | None = None, + language_registry: LanguageRepository | None = None, ): self._loader = loader or KnowledgeLoader() - self._language_registry = language_registry or LanguageRegistry() + self._language_registry: LanguageRepository = ( + language_registry or LanguageRegistry() + ) self._build() def _build(self) -> None: # noqa: PLR0912 — straight-line YAML projection diff --git a/tests/infrastructure/test_language_registry.py b/tests/infrastructure/test_language_registry.py new file mode 100644 index 0000000..3e6fd7e --- /dev/null +++ b/tests/infrastructure/test_language_registry.py @@ -0,0 +1,82 @@ +"""Tests for ``LanguageRegistry`` — the YAML-backed adapter for the +:class:`alfred.domain.shared.ports.LanguageRepository` port. + +The port is structural (Protocol), so the assertion that the adapter +satisfies it is a static one — we exercise the public surface here and +let mypy / runtime polymorphism do the rest. +""" + +from __future__ import annotations + +from alfred.domain.shared.ports import LanguageRepository +from alfred.domain.shared.value_objects import Language +from alfred.infrastructure.knowledge.language_registry import LanguageRegistry + + +def _registry() -> LanguageRepository: + """Return a fresh registry typed as the port — proves structural fit.""" + return LanguageRegistry() + + +class TestPortSurface: + def test_satisfies_protocol(self): + # If LanguageRegistry diverged from LanguageRepository, the annotation + # below would already be wrong at type-check time; at runtime, this + # just confirms the methods exist. + reg: LanguageRepository = LanguageRegistry() + assert hasattr(reg, "from_iso") + assert hasattr(reg, "from_any") + assert hasattr(reg, "all") + + def test_len_reflects_loaded_entries(self): + reg = _registry() + # The builtin YAML ships dozens of languages — exact count drifts + # with knowledge updates, so just sanity-check it's non-empty. + assert len(reg) > 0 + + +class TestFromIso: + def test_known_iso_returns_language(self): + reg = _registry() + fre = reg.from_iso("fre") + assert isinstance(fre, Language) + assert fre.iso == "fre" + + def test_case_insensitive(self): + reg = _registry() + assert reg.from_iso("FRE") == reg.from_iso("fre") + + def test_unknown_iso_returns_none(self): + assert _registry().from_iso("zzz") is None + + def test_non_string_returns_none(self): + assert _registry().from_iso(None) is None # type: ignore[arg-type] + + +class TestFromAny: + def test_english_name(self): + reg = _registry() + lang = reg.from_any("French") + assert lang is not None + assert lang.iso == "fre" + + def test_iso_639_1_alias(self): + # "fr" is the 639-1 form, registered as an alias. + reg = _registry() + lang = reg.from_any("fr") + assert lang is not None + assert lang.iso == "fre" + + def test_unknown_returns_none(self): + assert _registry().from_any("vostfr") is None + + def test_non_string_returns_none(self): + assert _registry().from_any(123) is None # type: ignore[arg-type] + + +class TestMembership: + def test_contains_known(self): + assert "english" in _registry() + + def test_does_not_contain_unknown(self): + assert "klingon" not in _registry()