refactor(release): introduce ReleaseKnowledge Protocol port + YamlReleaseKnowledge adapter
Adds the port/adapter pair that lets the release domain consume parsing knowledge without importing infrastructure or loading YAML at import time. - alfred/domain/release/ports/knowledge.py declares the read-only query surface: token sets (resolutions, sources, codecs, language_tokens, forbidden_chars, hdr_extra), structured dicts (audio, video_meta, editions, media_type_tokens), separators list, file-extension sets, and sanitize_for_fs(text). - alfred/infrastructure/knowledge/release_kb.py loads every YAML once at construction and exposes them as attributes, with an immutable str.maketrans table backing sanitize_for_fs. No domain code is wired to the port yet — that lands in the next commit.
This commit is contained in:
@@ -0,0 +1,10 @@
|
||||
"""Domain ports for the release domain.
|
||||
|
||||
Protocol-based abstractions that decouple ``parse_release`` and
|
||||
``ParsedRelease`` from any concrete knowledge-base loader. The
|
||||
infrastructure layer provides the adapter that satisfies this contract.
|
||||
"""
|
||||
|
||||
from .knowledge import ReleaseKnowledge
|
||||
|
||||
__all__ = ["ReleaseKnowledge"]
|
||||
@@ -0,0 +1,52 @@
|
||||
"""ReleaseKnowledge port — the read-only query surface that
|
||||
``parse_release`` and ``ParsedRelease`` need from the release knowledge
|
||||
base, expressed as a structural Protocol so the domain never imports any
|
||||
concrete loader.
|
||||
|
||||
The concrete YAML-backed implementation lives in
|
||||
``alfred/infrastructure/knowledge/release_kb.py``. Tests can supply any
|
||||
object that satisfies this shape (e.g. a simple dataclass).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Protocol
|
||||
|
||||
|
||||
class ReleaseKnowledge(Protocol):
|
||||
"""Read-only snapshot of release-name parsing knowledge."""
|
||||
|
||||
# --- Token sets used by the tokenizer / matchers ---
|
||||
|
||||
resolutions: set[str]
|
||||
sources: set[str]
|
||||
codecs: set[str]
|
||||
language_tokens: set[str]
|
||||
forbidden_chars: set[str]
|
||||
hdr_extra: set[str]
|
||||
|
||||
# --- Structured knowledge (loaded from YAML as dicts) ---
|
||||
|
||||
audio: dict
|
||||
video_meta: dict
|
||||
editions: dict
|
||||
media_type_tokens: dict
|
||||
|
||||
# --- Tokenizer separators ---
|
||||
|
||||
separators: list[str]
|
||||
|
||||
# --- File-extension sets (used by application/infra modules that work
|
||||
# directly with filesystem paths, e.g. media-type detection, video
|
||||
# lookup). Domain parsing itself doesn't touch these. ---
|
||||
|
||||
video_extensions: set[str]
|
||||
non_video_extensions: set[str]
|
||||
subtitle_extensions: set[str]
|
||||
metadata_extensions: set[str]
|
||||
|
||||
# --- Filesystem sanitization (Option B: pre-sanitize at parse time) ---
|
||||
|
||||
def sanitize_for_fs(self, text: str) -> str:
|
||||
"""Strip filesystem-forbidden characters from ``text``."""
|
||||
...
|
||||
@@ -0,0 +1,83 @@
|
||||
"""YamlReleaseKnowledge — concrete adapter for the ``ReleaseKnowledge``
|
||||
domain port.
|
||||
|
||||
Loads every release-knowledge YAML once at construction time and exposes
|
||||
the parsed snapshots as plain attributes. The application layer builds a
|
||||
single instance at boot and passes it down to ``parse_release`` and to
|
||||
``ParsedRelease`` builder methods.
|
||||
|
||||
A few extras (``video_extensions``, ``non_video_extensions``,
|
||||
``subtitle_extensions``, ``metadata_extensions``) are not part of the
|
||||
domain port — they are consumed by application/infra modules that handle
|
||||
filesystem-level concerns.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from .release import (
|
||||
load_audio,
|
||||
load_codecs,
|
||||
load_editions,
|
||||
load_forbidden_chars,
|
||||
load_hdr_extra,
|
||||
load_language_tokens,
|
||||
load_media_type_tokens,
|
||||
load_metadata_extensions,
|
||||
load_non_video_extensions,
|
||||
load_resolutions,
|
||||
load_separators,
|
||||
load_sources,
|
||||
load_sources_extra,
|
||||
load_subtitle_extensions,
|
||||
load_video,
|
||||
load_video_extensions,
|
||||
load_win_forbidden_chars,
|
||||
)
|
||||
|
||||
|
||||
class YamlReleaseKnowledge:
|
||||
"""Single object holding every parsed-release knowledge constant.
|
||||
|
||||
Built once at application boot. Read-only at runtime — call sites
|
||||
treat it as a snapshot. To pick up newly learned tokens without a
|
||||
restart, build a fresh instance and swap it in at the call sites.
|
||||
"""
|
||||
|
||||
def __init__(self) -> None:
|
||||
# Domain-port surface
|
||||
self.resolutions: set[str] = load_resolutions()
|
||||
self.sources: set[str] = load_sources() | load_sources_extra()
|
||||
self.codecs: set[str] = load_codecs()
|
||||
self.language_tokens: set[str] = load_language_tokens()
|
||||
self.forbidden_chars: set[str] = load_forbidden_chars()
|
||||
self.hdr_extra: set[str] = load_hdr_extra()
|
||||
|
||||
self.audio: dict = load_audio()
|
||||
self.video_meta: dict = load_video()
|
||||
self.editions: dict = load_editions()
|
||||
self.media_type_tokens: dict = load_media_type_tokens()
|
||||
|
||||
self.separators: list[str] = load_separators()
|
||||
|
||||
# File-extension sets (used by application/infra modules, not by
|
||||
# the parser itself — kept here so there is a single ownership
|
||||
# point for release knowledge).
|
||||
self.video_extensions: set[str] = load_video_extensions()
|
||||
self.non_video_extensions: set[str] = load_non_video_extensions()
|
||||
self.subtitle_extensions: set[str] = load_subtitle_extensions()
|
||||
# Metadata + subtitle extensions are both ignored when deciding
|
||||
# the media type of a folder (neither is a conclusive signal for
|
||||
# movie/tv/other), so we expose the union under the historical
|
||||
# name.
|
||||
self.metadata_extensions: set[str] = (
|
||||
load_metadata_extensions() | self.subtitle_extensions
|
||||
)
|
||||
|
||||
# Translation table for stripping Windows-forbidden chars.
|
||||
self._win_forbidden_table = str.maketrans(
|
||||
"", "", "".join(load_win_forbidden_chars())
|
||||
)
|
||||
|
||||
def sanitize_for_fs(self, text: str) -> str:
|
||||
"""Strip Windows-forbidden characters from ``text``."""
|
||||
return text.translate(self._win_forbidden_table)
|
||||
Reference in New Issue
Block a user