refactor(release): introduce ReleaseKnowledge Protocol port + YamlReleaseKnowledge adapter

Adds the port/adapter pair that lets the release domain consume parsing
knowledge without importing infrastructure or loading YAML at import time.

- alfred/domain/release/ports/knowledge.py declares the read-only query
  surface: token sets (resolutions, sources, codecs, language_tokens,
  forbidden_chars, hdr_extra), structured dicts (audio, video_meta,
  editions, media_type_tokens), separators list, file-extension sets,
  and sanitize_for_fs(text).
- alfred/infrastructure/knowledge/release_kb.py loads every YAML once
  at construction and exposes them as attributes, with an immutable
  str.maketrans table backing sanitize_for_fs.

No domain code is wired to the port yet — that lands in the next commit.
This commit is contained in:
2026-05-19 22:05:01 +02:00
parent 14941d47c0
commit c3a3cb50c9
3 changed files with 145 additions and 0 deletions
+10
View File
@@ -0,0 +1,10 @@
"""Domain ports for the release domain.
Protocol-based abstractions that decouple ``parse_release`` and
``ParsedRelease`` from any concrete knowledge-base loader. The
infrastructure layer provides the adapter that satisfies this contract.
"""
from .knowledge import ReleaseKnowledge
__all__ = ["ReleaseKnowledge"]
+52
View File
@@ -0,0 +1,52 @@
"""ReleaseKnowledge port — the read-only query surface that
``parse_release`` and ``ParsedRelease`` need from the release knowledge
base, expressed as a structural Protocol so the domain never imports any
concrete loader.
The concrete YAML-backed implementation lives in
``alfred/infrastructure/knowledge/release_kb.py``. Tests can supply any
object that satisfies this shape (e.g. a simple dataclass).
"""
from __future__ import annotations
from typing import Protocol
class ReleaseKnowledge(Protocol):
"""Read-only snapshot of release-name parsing knowledge."""
# --- Token sets used by the tokenizer / matchers ---
resolutions: set[str]
sources: set[str]
codecs: set[str]
language_tokens: set[str]
forbidden_chars: set[str]
hdr_extra: set[str]
# --- Structured knowledge (loaded from YAML as dicts) ---
audio: dict
video_meta: dict
editions: dict
media_type_tokens: dict
# --- Tokenizer separators ---
separators: list[str]
# --- File-extension sets (used by application/infra modules that work
# directly with filesystem paths, e.g. media-type detection, video
# lookup). Domain parsing itself doesn't touch these. ---
video_extensions: set[str]
non_video_extensions: set[str]
subtitle_extensions: set[str]
metadata_extensions: set[str]
# --- Filesystem sanitization (Option B: pre-sanitize at parse time) ---
def sanitize_for_fs(self, text: str) -> str:
"""Strip filesystem-forbidden characters from ``text``."""
...
@@ -0,0 +1,83 @@
"""YamlReleaseKnowledge — concrete adapter for the ``ReleaseKnowledge``
domain port.
Loads every release-knowledge YAML once at construction time and exposes
the parsed snapshots as plain attributes. The application layer builds a
single instance at boot and passes it down to ``parse_release`` and to
``ParsedRelease`` builder methods.
A few extras (``video_extensions``, ``non_video_extensions``,
``subtitle_extensions``, ``metadata_extensions``) are not part of the
domain port — they are consumed by application/infra modules that handle
filesystem-level concerns.
"""
from __future__ import annotations
from .release import (
load_audio,
load_codecs,
load_editions,
load_forbidden_chars,
load_hdr_extra,
load_language_tokens,
load_media_type_tokens,
load_metadata_extensions,
load_non_video_extensions,
load_resolutions,
load_separators,
load_sources,
load_sources_extra,
load_subtitle_extensions,
load_video,
load_video_extensions,
load_win_forbidden_chars,
)
class YamlReleaseKnowledge:
"""Single object holding every parsed-release knowledge constant.
Built once at application boot. Read-only at runtime — call sites
treat it as a snapshot. To pick up newly learned tokens without a
restart, build a fresh instance and swap it in at the call sites.
"""
def __init__(self) -> None:
# Domain-port surface
self.resolutions: set[str] = load_resolutions()
self.sources: set[str] = load_sources() | load_sources_extra()
self.codecs: set[str] = load_codecs()
self.language_tokens: set[str] = load_language_tokens()
self.forbidden_chars: set[str] = load_forbidden_chars()
self.hdr_extra: set[str] = load_hdr_extra()
self.audio: dict = load_audio()
self.video_meta: dict = load_video()
self.editions: dict = load_editions()
self.media_type_tokens: dict = load_media_type_tokens()
self.separators: list[str] = load_separators()
# File-extension sets (used by application/infra modules, not by
# the parser itself — kept here so there is a single ownership
# point for release knowledge).
self.video_extensions: set[str] = load_video_extensions()
self.non_video_extensions: set[str] = load_non_video_extensions()
self.subtitle_extensions: set[str] = load_subtitle_extensions()
# Metadata + subtitle extensions are both ignored when deciding
# the media type of a folder (neither is a conclusive signal for
# movie/tv/other), so we expose the union under the historical
# name.
self.metadata_extensions: set[str] = (
load_metadata_extensions() | self.subtitle_extensions
)
# Translation table for stripping Windows-forbidden chars.
self._win_forbidden_table = str.maketrans(
"", "", "".join(load_win_forbidden_chars())
)
def sanitize_for_fs(self, text: str) -> str:
"""Strip Windows-forbidden characters from ``text``."""
return text.translate(self._win_forbidden_table)