Merge branch 'refactor/language-port'
This commit is contained in:
@@ -15,8 +15,23 @@ callers).
|
|||||||
|
|
||||||
## [Unreleased]
|
## [Unreleased]
|
||||||
|
|
||||||
|
### Added
|
||||||
|
|
||||||
|
- **`LanguageRepository` port** in `alfred.domain.shared.ports`. Structural
|
||||||
|
Protocol covering `from_iso`, `from_any`, `all`, `__contains__`, `__len__`
|
||||||
|
— the surface previously coupled to the concrete `LanguageRegistry`.
|
||||||
|
Mirrors the `MediaProber` / `FilesystemScanner` pattern: domain code
|
||||||
|
depends on the Protocol, infrastructure provides the YAML-backed
|
||||||
|
adapter. Tests in `tests/infrastructure/test_language_registry.py`.
|
||||||
|
|
||||||
### Internal
|
### Internal
|
||||||
|
|
||||||
|
- **`SubtitleKnowledgeBase` types `language_registry` against the
|
||||||
|
`LanguageRepository` port** instead of the concrete `LanguageRegistry`
|
||||||
|
class. The default constructor still instantiates the concrete adapter
|
||||||
|
when no repository is injected — behaviour is unchanged for existing
|
||||||
|
callers. Opens the door to in-memory fakes in future tests without
|
||||||
|
loading the full ISO 639 YAML.
|
||||||
- **Moved `detect_media_type` and `enrich_from_probe` from
|
- **Moved `detect_media_type` and `enrich_from_probe` from
|
||||||
`alfred.application.filesystem` to `alfred.application.release`**.
|
`alfred.application.filesystem` to `alfred.application.release`**.
|
||||||
They are inspection-pipeline helpers — their natural home is next to
|
They are inspection-pipeline helpers — their natural home is next to
|
||||||
|
|||||||
@@ -7,11 +7,13 @@ Protocol without going through real I/O.
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
from .filesystem_scanner import FileEntry, FilesystemScanner
|
from .filesystem_scanner import FileEntry, FilesystemScanner
|
||||||
|
from .language_repository import LanguageRepository
|
||||||
from .media_prober import MediaProber, SubtitleStreamInfo
|
from .media_prober import MediaProber, SubtitleStreamInfo
|
||||||
|
|
||||||
__all__ = [
|
__all__ = [
|
||||||
"FileEntry",
|
"FileEntry",
|
||||||
"FilesystemScanner",
|
"FilesystemScanner",
|
||||||
|
"LanguageRepository",
|
||||||
"MediaProber",
|
"MediaProber",
|
||||||
"SubtitleStreamInfo",
|
"SubtitleStreamInfo",
|
||||||
]
|
]
|
||||||
|
|||||||
@@ -0,0 +1,36 @@
|
|||||||
|
"""LanguageRepository port — abstracts canonical language lookup.
|
||||||
|
|
||||||
|
The adapter (typically loading from ISO 639 YAML knowledge) maps a wide
|
||||||
|
range of raw forms (codes, English/native names, aliases) onto the
|
||||||
|
canonical :class:`Language` value object. Domain code accepts the port
|
||||||
|
via constructor injection; tests can pass a small in-memory fake.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Protocol
|
||||||
|
|
||||||
|
from alfred.domain.shared.value_objects import Language
|
||||||
|
|
||||||
|
|
||||||
|
class LanguageRepository(Protocol):
|
||||||
|
"""Canonical language lookup."""
|
||||||
|
|
||||||
|
def from_iso(self, code: str) -> Language | None:
|
||||||
|
"""Look up by canonical ISO 639-2/B code (case-insensitive)."""
|
||||||
|
...
|
||||||
|
|
||||||
|
def from_any(self, raw: str) -> Language | None:
|
||||||
|
"""Look up by any known representation: ISO code, name, alias.
|
||||||
|
|
||||||
|
Case-insensitive. Returns ``None`` when the raw form is unknown.
|
||||||
|
"""
|
||||||
|
...
|
||||||
|
|
||||||
|
def all(self) -> list[Language]:
|
||||||
|
"""Return all known languages, in a stable order."""
|
||||||
|
...
|
||||||
|
|
||||||
|
def __contains__(self, raw: str) -> bool: ...
|
||||||
|
|
||||||
|
def __len__(self) -> int: ...
|
||||||
@@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
import logging
|
import logging
|
||||||
|
|
||||||
from alfred.infrastructure.knowledge.language_registry import LanguageRegistry
|
from alfred.domain.shared.ports import LanguageRepository
|
||||||
from alfred.domain.subtitles.value_objects import (
|
from alfred.domain.subtitles.value_objects import (
|
||||||
ScanStrategy,
|
ScanStrategy,
|
||||||
SubtitleFormat,
|
SubtitleFormat,
|
||||||
@@ -12,6 +12,8 @@ from alfred.domain.subtitles.value_objects import (
|
|||||||
SubtitleType,
|
SubtitleType,
|
||||||
TypeDetectionMethod,
|
TypeDetectionMethod,
|
||||||
)
|
)
|
||||||
|
from alfred.infrastructure.knowledge.language_registry import LanguageRegistry
|
||||||
|
|
||||||
from .loader import KnowledgeLoader
|
from .loader import KnowledgeLoader
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
@@ -28,10 +30,12 @@ class SubtitleKnowledgeBase:
|
|||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
loader: KnowledgeLoader | None = None,
|
loader: KnowledgeLoader | None = None,
|
||||||
language_registry: LanguageRegistry | None = None,
|
language_registry: LanguageRepository | None = None,
|
||||||
):
|
):
|
||||||
self._loader = loader or KnowledgeLoader()
|
self._loader = loader or KnowledgeLoader()
|
||||||
self._language_registry = language_registry or LanguageRegistry()
|
self._language_registry: LanguageRepository = (
|
||||||
|
language_registry or LanguageRegistry()
|
||||||
|
)
|
||||||
self._build()
|
self._build()
|
||||||
|
|
||||||
def _build(self) -> None: # noqa: PLR0912 — straight-line YAML projection
|
def _build(self) -> None: # noqa: PLR0912 — straight-line YAML projection
|
||||||
|
|||||||
@@ -0,0 +1,82 @@
|
|||||||
|
"""Tests for ``LanguageRegistry`` — the YAML-backed adapter for the
|
||||||
|
:class:`alfred.domain.shared.ports.LanguageRepository` port.
|
||||||
|
|
||||||
|
The port is structural (Protocol), so the assertion that the adapter
|
||||||
|
satisfies it is a static one — we exercise the public surface here and
|
||||||
|
let mypy / runtime polymorphism do the rest.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from alfred.domain.shared.ports import LanguageRepository
|
||||||
|
from alfred.domain.shared.value_objects import Language
|
||||||
|
from alfred.infrastructure.knowledge.language_registry import LanguageRegistry
|
||||||
|
|
||||||
|
|
||||||
|
def _registry() -> LanguageRepository:
|
||||||
|
"""Return a fresh registry typed as the port — proves structural fit."""
|
||||||
|
return LanguageRegistry()
|
||||||
|
|
||||||
|
|
||||||
|
class TestPortSurface:
|
||||||
|
def test_satisfies_protocol(self):
|
||||||
|
# If LanguageRegistry diverged from LanguageRepository, the annotation
|
||||||
|
# below would already be wrong at type-check time; at runtime, this
|
||||||
|
# just confirms the methods exist.
|
||||||
|
reg: LanguageRepository = LanguageRegistry()
|
||||||
|
assert hasattr(reg, "from_iso")
|
||||||
|
assert hasattr(reg, "from_any")
|
||||||
|
assert hasattr(reg, "all")
|
||||||
|
|
||||||
|
def test_len_reflects_loaded_entries(self):
|
||||||
|
reg = _registry()
|
||||||
|
# The builtin YAML ships dozens of languages — exact count drifts
|
||||||
|
# with knowledge updates, so just sanity-check it's non-empty.
|
||||||
|
assert len(reg) > 0
|
||||||
|
|
||||||
|
|
||||||
|
class TestFromIso:
|
||||||
|
def test_known_iso_returns_language(self):
|
||||||
|
reg = _registry()
|
||||||
|
fre = reg.from_iso("fre")
|
||||||
|
assert isinstance(fre, Language)
|
||||||
|
assert fre.iso == "fre"
|
||||||
|
|
||||||
|
def test_case_insensitive(self):
|
||||||
|
reg = _registry()
|
||||||
|
assert reg.from_iso("FRE") == reg.from_iso("fre")
|
||||||
|
|
||||||
|
def test_unknown_iso_returns_none(self):
|
||||||
|
assert _registry().from_iso("zzz") is None
|
||||||
|
|
||||||
|
def test_non_string_returns_none(self):
|
||||||
|
assert _registry().from_iso(None) is None # type: ignore[arg-type]
|
||||||
|
|
||||||
|
|
||||||
|
class TestFromAny:
|
||||||
|
def test_english_name(self):
|
||||||
|
reg = _registry()
|
||||||
|
lang = reg.from_any("French")
|
||||||
|
assert lang is not None
|
||||||
|
assert lang.iso == "fre"
|
||||||
|
|
||||||
|
def test_iso_639_1_alias(self):
|
||||||
|
# "fr" is the 639-1 form, registered as an alias.
|
||||||
|
reg = _registry()
|
||||||
|
lang = reg.from_any("fr")
|
||||||
|
assert lang is not None
|
||||||
|
assert lang.iso == "fre"
|
||||||
|
|
||||||
|
def test_unknown_returns_none(self):
|
||||||
|
assert _registry().from_any("vostfr") is None
|
||||||
|
|
||||||
|
def test_non_string_returns_none(self):
|
||||||
|
assert _registry().from_any(123) is None # type: ignore[arg-type]
|
||||||
|
|
||||||
|
|
||||||
|
class TestMembership:
|
||||||
|
def test_contains_known(self):
|
||||||
|
assert "english" in _registry()
|
||||||
|
|
||||||
|
def test_does_not_contain_unknown(self):
|
||||||
|
assert "klingon" not in _registry()
|
||||||
Reference in New Issue
Block a user