diff --git a/alfred/domain/release/ports/__init__.py b/alfred/domain/release/ports/__init__.py new file mode 100644 index 0000000..b4ae2ec --- /dev/null +++ b/alfred/domain/release/ports/__init__.py @@ -0,0 +1,10 @@ +"""Domain ports for the release domain. + +Protocol-based abstractions that decouple ``parse_release`` and +``ParsedRelease`` from any concrete knowledge-base loader. The +infrastructure layer provides the adapter that satisfies this contract. +""" + +from .knowledge import ReleaseKnowledge + +__all__ = ["ReleaseKnowledge"] diff --git a/alfred/domain/release/ports/knowledge.py b/alfred/domain/release/ports/knowledge.py new file mode 100644 index 0000000..272e7ef --- /dev/null +++ b/alfred/domain/release/ports/knowledge.py @@ -0,0 +1,52 @@ +"""ReleaseKnowledge port — the read-only query surface that +``parse_release`` and ``ParsedRelease`` need from the release knowledge +base, expressed as a structural Protocol so the domain never imports any +concrete loader. + +The concrete YAML-backed implementation lives in +``alfred/infrastructure/knowledge/release_kb.py``. Tests can supply any +object that satisfies this shape (e.g. a simple dataclass). +""" + +from __future__ import annotations + +from typing import Protocol + + +class ReleaseKnowledge(Protocol): + """Read-only snapshot of release-name parsing knowledge.""" + + # --- Token sets used by the tokenizer / matchers --- + + resolutions: set[str] + sources: set[str] + codecs: set[str] + language_tokens: set[str] + forbidden_chars: set[str] + hdr_extra: set[str] + + # --- Structured knowledge (loaded from YAML as dicts) --- + + audio: dict + video_meta: dict + editions: dict + media_type_tokens: dict + + # --- Tokenizer separators --- + + separators: list[str] + + # --- File-extension sets (used by application/infra modules that work + # directly with filesystem paths, e.g. media-type detection, video + # lookup). Domain parsing itself doesn't touch these. --- + + video_extensions: set[str] + non_video_extensions: set[str] + subtitle_extensions: set[str] + metadata_extensions: set[str] + + # --- Filesystem sanitization (Option B: pre-sanitize at parse time) --- + + def sanitize_for_fs(self, text: str) -> str: + """Strip filesystem-forbidden characters from ``text``.""" + ... diff --git a/alfred/infrastructure/knowledge/release_kb.py b/alfred/infrastructure/knowledge/release_kb.py new file mode 100644 index 0000000..5d4a790 --- /dev/null +++ b/alfred/infrastructure/knowledge/release_kb.py @@ -0,0 +1,83 @@ +"""YamlReleaseKnowledge — concrete adapter for the ``ReleaseKnowledge`` +domain port. + +Loads every release-knowledge YAML once at construction time and exposes +the parsed snapshots as plain attributes. The application layer builds a +single instance at boot and passes it down to ``parse_release`` and to +``ParsedRelease`` builder methods. + +A few extras (``video_extensions``, ``non_video_extensions``, +``subtitle_extensions``, ``metadata_extensions``) are not part of the +domain port — they are consumed by application/infra modules that handle +filesystem-level concerns. +""" + +from __future__ import annotations + +from .release import ( + load_audio, + load_codecs, + load_editions, + load_forbidden_chars, + load_hdr_extra, + load_language_tokens, + load_media_type_tokens, + load_metadata_extensions, + load_non_video_extensions, + load_resolutions, + load_separators, + load_sources, + load_sources_extra, + load_subtitle_extensions, + load_video, + load_video_extensions, + load_win_forbidden_chars, +) + + +class YamlReleaseKnowledge: + """Single object holding every parsed-release knowledge constant. + + Built once at application boot. Read-only at runtime — call sites + treat it as a snapshot. To pick up newly learned tokens without a + restart, build a fresh instance and swap it in at the call sites. + """ + + def __init__(self) -> None: + # Domain-port surface + self.resolutions: set[str] = load_resolutions() + self.sources: set[str] = load_sources() | load_sources_extra() + self.codecs: set[str] = load_codecs() + self.language_tokens: set[str] = load_language_tokens() + self.forbidden_chars: set[str] = load_forbidden_chars() + self.hdr_extra: set[str] = load_hdr_extra() + + self.audio: dict = load_audio() + self.video_meta: dict = load_video() + self.editions: dict = load_editions() + self.media_type_tokens: dict = load_media_type_tokens() + + self.separators: list[str] = load_separators() + + # File-extension sets (used by application/infra modules, not by + # the parser itself — kept here so there is a single ownership + # point for release knowledge). + self.video_extensions: set[str] = load_video_extensions() + self.non_video_extensions: set[str] = load_non_video_extensions() + self.subtitle_extensions: set[str] = load_subtitle_extensions() + # Metadata + subtitle extensions are both ignored when deciding + # the media type of a folder (neither is a conclusive signal for + # movie/tv/other), so we expose the union under the historical + # name. + self.metadata_extensions: set[str] = ( + load_metadata_extensions() | self.subtitle_extensions + ) + + # Translation table for stripping Windows-forbidden chars. + self._win_forbidden_table = str.maketrans( + "", "", "".join(load_win_forbidden_chars()) + ) + + def sanitize_for_fs(self, text: str) -> str: + """Strip Windows-forbidden characters from ``text``.""" + return text.translate(self._win_forbidden_table)