Merge branch 'refactor/language-port'
This commit is contained in:
@@ -15,8 +15,23 @@ callers).
|
||||
|
||||
## [Unreleased]
|
||||
|
||||
### Added
|
||||
|
||||
- **`LanguageRepository` port** in `alfred.domain.shared.ports`. Structural
|
||||
Protocol covering `from_iso`, `from_any`, `all`, `__contains__`, `__len__`
|
||||
— the surface previously coupled to the concrete `LanguageRegistry`.
|
||||
Mirrors the `MediaProber` / `FilesystemScanner` pattern: domain code
|
||||
depends on the Protocol, infrastructure provides the YAML-backed
|
||||
adapter. Tests in `tests/infrastructure/test_language_registry.py`.
|
||||
|
||||
### Internal
|
||||
|
||||
- **`SubtitleKnowledgeBase` types `language_registry` against the
|
||||
`LanguageRepository` port** instead of the concrete `LanguageRegistry`
|
||||
class. The default constructor still instantiates the concrete adapter
|
||||
when no repository is injected — behaviour is unchanged for existing
|
||||
callers. Opens the door to in-memory fakes in future tests without
|
||||
loading the full ISO 639 YAML.
|
||||
- **Moved `detect_media_type` and `enrich_from_probe` from
|
||||
`alfred.application.filesystem` to `alfred.application.release`**.
|
||||
They are inspection-pipeline helpers — their natural home is next to
|
||||
|
||||
@@ -7,11 +7,13 @@ Protocol without going through real I/O.
|
||||
"""
|
||||
|
||||
from .filesystem_scanner import FileEntry, FilesystemScanner
|
||||
from .language_repository import LanguageRepository
|
||||
from .media_prober import MediaProber, SubtitleStreamInfo
|
||||
|
||||
__all__ = [
|
||||
"FileEntry",
|
||||
"FilesystemScanner",
|
||||
"LanguageRepository",
|
||||
"MediaProber",
|
||||
"SubtitleStreamInfo",
|
||||
]
|
||||
|
||||
@@ -0,0 +1,36 @@
|
||||
"""LanguageRepository port — abstracts canonical language lookup.
|
||||
|
||||
The adapter (typically loading from ISO 639 YAML knowledge) maps a wide
|
||||
range of raw forms (codes, English/native names, aliases) onto the
|
||||
canonical :class:`Language` value object. Domain code accepts the port
|
||||
via constructor injection; tests can pass a small in-memory fake.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Protocol
|
||||
|
||||
from alfred.domain.shared.value_objects import Language
|
||||
|
||||
|
||||
class LanguageRepository(Protocol):
|
||||
"""Canonical language lookup."""
|
||||
|
||||
def from_iso(self, code: str) -> Language | None:
|
||||
"""Look up by canonical ISO 639-2/B code (case-insensitive)."""
|
||||
...
|
||||
|
||||
def from_any(self, raw: str) -> Language | None:
|
||||
"""Look up by any known representation: ISO code, name, alias.
|
||||
|
||||
Case-insensitive. Returns ``None`` when the raw form is unknown.
|
||||
"""
|
||||
...
|
||||
|
||||
def all(self) -> list[Language]:
|
||||
"""Return all known languages, in a stable order."""
|
||||
...
|
||||
|
||||
def __contains__(self, raw: str) -> bool: ...
|
||||
|
||||
def __len__(self) -> int: ...
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
import logging
|
||||
|
||||
from alfred.infrastructure.knowledge.language_registry import LanguageRegistry
|
||||
from alfred.domain.shared.ports import LanguageRepository
|
||||
from alfred.domain.subtitles.value_objects import (
|
||||
ScanStrategy,
|
||||
SubtitleFormat,
|
||||
@@ -12,6 +12,8 @@ from alfred.domain.subtitles.value_objects import (
|
||||
SubtitleType,
|
||||
TypeDetectionMethod,
|
||||
)
|
||||
from alfred.infrastructure.knowledge.language_registry import LanguageRegistry
|
||||
|
||||
from .loader import KnowledgeLoader
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -28,10 +30,12 @@ class SubtitleKnowledgeBase:
|
||||
def __init__(
|
||||
self,
|
||||
loader: KnowledgeLoader | None = None,
|
||||
language_registry: LanguageRegistry | None = None,
|
||||
language_registry: LanguageRepository | None = None,
|
||||
):
|
||||
self._loader = loader or KnowledgeLoader()
|
||||
self._language_registry = language_registry or LanguageRegistry()
|
||||
self._language_registry: LanguageRepository = (
|
||||
language_registry or LanguageRegistry()
|
||||
)
|
||||
self._build()
|
||||
|
||||
def _build(self) -> None: # noqa: PLR0912 — straight-line YAML projection
|
||||
|
||||
@@ -0,0 +1,82 @@
|
||||
"""Tests for ``LanguageRegistry`` — the YAML-backed adapter for the
|
||||
:class:`alfred.domain.shared.ports.LanguageRepository` port.
|
||||
|
||||
The port is structural (Protocol), so the assertion that the adapter
|
||||
satisfies it is a static one — we exercise the public surface here and
|
||||
let mypy / runtime polymorphism do the rest.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from alfred.domain.shared.ports import LanguageRepository
|
||||
from alfred.domain.shared.value_objects import Language
|
||||
from alfred.infrastructure.knowledge.language_registry import LanguageRegistry
|
||||
|
||||
|
||||
def _registry() -> LanguageRepository:
|
||||
"""Return a fresh registry typed as the port — proves structural fit."""
|
||||
return LanguageRegistry()
|
||||
|
||||
|
||||
class TestPortSurface:
|
||||
def test_satisfies_protocol(self):
|
||||
# If LanguageRegistry diverged from LanguageRepository, the annotation
|
||||
# below would already be wrong at type-check time; at runtime, this
|
||||
# just confirms the methods exist.
|
||||
reg: LanguageRepository = LanguageRegistry()
|
||||
assert hasattr(reg, "from_iso")
|
||||
assert hasattr(reg, "from_any")
|
||||
assert hasattr(reg, "all")
|
||||
|
||||
def test_len_reflects_loaded_entries(self):
|
||||
reg = _registry()
|
||||
# The builtin YAML ships dozens of languages — exact count drifts
|
||||
# with knowledge updates, so just sanity-check it's non-empty.
|
||||
assert len(reg) > 0
|
||||
|
||||
|
||||
class TestFromIso:
|
||||
def test_known_iso_returns_language(self):
|
||||
reg = _registry()
|
||||
fre = reg.from_iso("fre")
|
||||
assert isinstance(fre, Language)
|
||||
assert fre.iso == "fre"
|
||||
|
||||
def test_case_insensitive(self):
|
||||
reg = _registry()
|
||||
assert reg.from_iso("FRE") == reg.from_iso("fre")
|
||||
|
||||
def test_unknown_iso_returns_none(self):
|
||||
assert _registry().from_iso("zzz") is None
|
||||
|
||||
def test_non_string_returns_none(self):
|
||||
assert _registry().from_iso(None) is None # type: ignore[arg-type]
|
||||
|
||||
|
||||
class TestFromAny:
|
||||
def test_english_name(self):
|
||||
reg = _registry()
|
||||
lang = reg.from_any("French")
|
||||
assert lang is not None
|
||||
assert lang.iso == "fre"
|
||||
|
||||
def test_iso_639_1_alias(self):
|
||||
# "fr" is the 639-1 form, registered as an alias.
|
||||
reg = _registry()
|
||||
lang = reg.from_any("fr")
|
||||
assert lang is not None
|
||||
assert lang.iso == "fre"
|
||||
|
||||
def test_unknown_returns_none(self):
|
||||
assert _registry().from_any("vostfr") is None
|
||||
|
||||
def test_non_string_returns_none(self):
|
||||
assert _registry().from_any(123) is None # type: ignore[arg-type]
|
||||
|
||||
|
||||
class TestMembership:
|
||||
def test_contains_known(self):
|
||||
assert "english" in _registry()
|
||||
|
||||
def test_does_not_contain_unknown(self):
|
||||
assert "klingon" not in _registry()
|
||||
Reference in New Issue
Block a user