Files
alfred/alfred/domain/release/knowledge.py
T
francwa f17abdbaec chore: cleanup — remove shims, fix ruff warnings, ignore noisy rules
- Removed backward-compat shims _sanitise_for_fs / _strip_episode_from_normalised
  in domain/release/value_objects.py (zero callers).
- Fixed ruff warnings across the codebase:
    * PLW1510: explicit check=False on subprocess.run calls
    * PLC0415: promoted lazy imports to module top where no cycle exists
      (manage_subtitles, placer, qbittorrent/client, file_manager)
    * E402: fixed module-level import ordering in language_registry.py and
      subtitles/knowledge/loader.py
    * F841 / B007: removed unused locals (identifier.py)
    * C416: replaced unnecessary set comprehension with set() in
      release/knowledge.py
- Ruff config: ignore PLR0911/PLR0912 globally (noisy on mappers and
  orchestrator use-cases) and PLW0603 (intentional for the memory singleton).
- Updated tech debt memory: P1 done, ShowStatus actually complete (was a
  stale note).
2026-05-18 00:02:45 +02:00

137 lines
3.8 KiB
Python

"""Release knowledge loader.
Three-layer merge (lowest → highest priority):
1. Builtin — alfred/knowledge/release/
2. Sites — alfred/knowledge/release/sites/*.yaml (all trackers)
3. Learned — data/knowledge/release/ (user additions via the learn tool)
Lists are extended additively, scalars from higher layers win.
"""
from pathlib import Path
import yaml
import alfred as _alfred_pkg
_BUILTIN_ROOT = Path(_alfred_pkg.__file__).parent / "knowledge" / "release"
_SITES_ROOT = _BUILTIN_ROOT / "sites"
_LEARNED_ROOT = (
Path(_alfred_pkg.__file__).parent.parent / "data" / "knowledge" / "release"
)
def _merge(base: dict, overlay: dict) -> dict:
"""Merge overlay into base — lists are extended, scalars from overlay win."""
result = dict(base)
for key, val in overlay.items():
if key in result and isinstance(result[key], list) and isinstance(val, list):
result[key] = result[key] + [v for v in val if v not in result[key]]
else:
result[key] = val
return result
def _read(path: Path) -> dict:
try:
with open(path, encoding="utf-8") as f:
return yaml.safe_load(f) or {}
except FileNotFoundError:
return {}
def _load(filename: str) -> dict:
result = _read(_BUILTIN_ROOT / filename)
result = _merge(result, _read(_LEARNED_ROOT / filename))
return result
def _load_sites() -> dict:
"""Merge all site YAML files into a single dict."""
result: dict = {}
for site_file in sorted(_SITES_ROOT.glob("*.yaml")):
result = _merge(result, _read(site_file))
return result
def load_resolutions() -> set[str]:
return set(_load("resolutions.yaml").get("resolutions", []))
def load_sources() -> set[str]:
return set(_load("sources.yaml").get("sources", []))
def load_codecs() -> set[str]:
return set(_load("codecs.yaml").get("codecs", []))
def load_win_forbidden_chars() -> list[str]:
return _load("filesystem.yaml").get("win_forbidden_chars", [])
def load_video_extensions() -> set[str]:
return set(_load("file_extensions.yaml").get("video", []))
def load_non_video_extensions() -> set[str]:
return set(_load("file_extensions.yaml").get("non_video", []))
def load_metadata_extensions() -> set[str]:
return set(_load("file_extensions.yaml").get("metadata", []))
def load_forbidden_chars() -> set[str]:
return set(_load("release_format.yaml").get("forbidden_chars", []))
def load_language_tokens() -> set[str]:
base = {t.upper() for t in _load("languages.yaml").get("tokens", [])}
sites = {t.upper() for t in _load_sites().get("languages", [])}
return base | sites
def load_audio() -> dict:
return _load("audio.yaml")
def load_video() -> dict:
return _load("video.yaml")
def load_editions() -> dict:
base = _load("editions.yaml")
site_tokens = _load_sites().get("editions", {}).get("tokens", [])
if site_tokens:
existing = base.get("tokens", [])
base["tokens"] = existing + [t for t in site_tokens if t not in existing]
return base
def load_sources_extra() -> set[str]:
"""Additional source tokens from site files."""
return set(_load_sites().get("sources", []))
def load_hdr_extra() -> set[str]:
"""Additional HDR tokens from site files."""
return {t.upper() for t in _load_sites().get("hdr", [])}
def load_media_type_tokens() -> dict:
"""Site-specific media type tokens (doc, concert, collection, integrale)."""
return _load_sites().get("media_type_tokens", {})
def load_separators() -> list[str]:
"""Single-char token separators used by the release name tokenizer.
Always includes the canonical "." even if absent from YAML, to prevent a
misconfigured file from breaking the parser entirely.
"""
seps = _load("separators.yaml").get("separators", []) or []
if "." not in seps:
seps = [".", *seps]
return seps