Merge branch 'refactor/language-port'

This commit is contained in:
2026-05-20 23:20:18 +02:00
5 changed files with 142 additions and 3 deletions
+15
View File
@@ -15,8 +15,23 @@ callers).
## [Unreleased]
### Added
- **`LanguageRepository` port** in `alfred.domain.shared.ports`. Structural
Protocol covering `from_iso`, `from_any`, `all`, `__contains__`, `__len__`
— the surface previously coupled to the concrete `LanguageRegistry`.
Mirrors the `MediaProber` / `FilesystemScanner` pattern: domain code
depends on the Protocol, infrastructure provides the YAML-backed
adapter. Tests in `tests/infrastructure/test_language_registry.py`.
### Internal
- **`SubtitleKnowledgeBase` types `language_registry` against the
`LanguageRepository` port** instead of the concrete `LanguageRegistry`
class. The default constructor still instantiates the concrete adapter
when no repository is injected — behaviour is unchanged for existing
callers. Opens the door to in-memory fakes in future tests without
loading the full ISO 639 YAML.
- **Moved `detect_media_type` and `enrich_from_probe` from
`alfred.application.filesystem` to `alfred.application.release`**.
They are inspection-pipeline helpers — their natural home is next to
+2
View File
@@ -7,11 +7,13 @@ Protocol without going through real I/O.
"""
from .filesystem_scanner import FileEntry, FilesystemScanner
from .language_repository import LanguageRepository
from .media_prober import MediaProber, SubtitleStreamInfo
__all__ = [
"FileEntry",
"FilesystemScanner",
"LanguageRepository",
"MediaProber",
"SubtitleStreamInfo",
]
@@ -0,0 +1,36 @@
"""LanguageRepository port — abstracts canonical language lookup.
The adapter (typically loading from ISO 639 YAML knowledge) maps a wide
range of raw forms (codes, English/native names, aliases) onto the
canonical :class:`Language` value object. Domain code accepts the port
via constructor injection; tests can pass a small in-memory fake.
"""
from __future__ import annotations
from typing import Protocol
from alfred.domain.shared.value_objects import Language
class LanguageRepository(Protocol):
"""Canonical language lookup."""
def from_iso(self, code: str) -> Language | None:
"""Look up by canonical ISO 639-2/B code (case-insensitive)."""
...
def from_any(self, raw: str) -> Language | None:
"""Look up by any known representation: ISO code, name, alias.
Case-insensitive. Returns ``None`` when the raw form is unknown.
"""
...
def all(self) -> list[Language]:
"""Return all known languages, in a stable order."""
...
def __contains__(self, raw: str) -> bool: ...
def __len__(self) -> int: ...
@@ -2,7 +2,7 @@
import logging
from alfred.infrastructure.knowledge.language_registry import LanguageRegistry
from alfred.domain.shared.ports import LanguageRepository
from alfred.domain.subtitles.value_objects import (
ScanStrategy,
SubtitleFormat,
@@ -12,6 +12,8 @@ from alfred.domain.subtitles.value_objects import (
SubtitleType,
TypeDetectionMethod,
)
from alfred.infrastructure.knowledge.language_registry import LanguageRegistry
from .loader import KnowledgeLoader
logger = logging.getLogger(__name__)
@@ -28,10 +30,12 @@ class SubtitleKnowledgeBase:
def __init__(
self,
loader: KnowledgeLoader | None = None,
language_registry: LanguageRegistry | None = None,
language_registry: LanguageRepository | None = None,
):
self._loader = loader or KnowledgeLoader()
self._language_registry = language_registry or LanguageRegistry()
self._language_registry: LanguageRepository = (
language_registry or LanguageRegistry()
)
self._build()
def _build(self) -> None: # noqa: PLR0912 — straight-line YAML projection
@@ -0,0 +1,82 @@
"""Tests for ``LanguageRegistry`` — the YAML-backed adapter for the
:class:`alfred.domain.shared.ports.LanguageRepository` port.
The port is structural (Protocol), so the assertion that the adapter
satisfies it is a static one — we exercise the public surface here and
let mypy / runtime polymorphism do the rest.
"""
from __future__ import annotations
from alfred.domain.shared.ports import LanguageRepository
from alfred.domain.shared.value_objects import Language
from alfred.infrastructure.knowledge.language_registry import LanguageRegistry
def _registry() -> LanguageRepository:
"""Return a fresh registry typed as the port — proves structural fit."""
return LanguageRegistry()
class TestPortSurface:
def test_satisfies_protocol(self):
# If LanguageRegistry diverged from LanguageRepository, the annotation
# below would already be wrong at type-check time; at runtime, this
# just confirms the methods exist.
reg: LanguageRepository = LanguageRegistry()
assert hasattr(reg, "from_iso")
assert hasattr(reg, "from_any")
assert hasattr(reg, "all")
def test_len_reflects_loaded_entries(self):
reg = _registry()
# The builtin YAML ships dozens of languages — exact count drifts
# with knowledge updates, so just sanity-check it's non-empty.
assert len(reg) > 0
class TestFromIso:
def test_known_iso_returns_language(self):
reg = _registry()
fre = reg.from_iso("fre")
assert isinstance(fre, Language)
assert fre.iso == "fre"
def test_case_insensitive(self):
reg = _registry()
assert reg.from_iso("FRE") == reg.from_iso("fre")
def test_unknown_iso_returns_none(self):
assert _registry().from_iso("zzz") is None
def test_non_string_returns_none(self):
assert _registry().from_iso(None) is None # type: ignore[arg-type]
class TestFromAny:
def test_english_name(self):
reg = _registry()
lang = reg.from_any("French")
assert lang is not None
assert lang.iso == "fre"
def test_iso_639_1_alias(self):
# "fr" is the 639-1 form, registered as an alias.
reg = _registry()
lang = reg.from_any("fr")
assert lang is not None
assert lang.iso == "fre"
def test_unknown_returns_none(self):
assert _registry().from_any("vostfr") is None
def test_non_string_returns_none(self):
assert _registry().from_any(123) is None # type: ignore[arg-type]
class TestMembership:
def test_contains_known(self):
assert "english" in _registry()
def test_does_not_contain_unknown(self):
assert "klingon" not in _registry()