From f6eef59fcac87729cb12f46f0ee25f6d265c469e Mon Sep 17 00:00:00 2001 From: Francwa Date: Mon, 18 May 2026 16:24:28 +0200 Subject: [PATCH] refactor: tech debt mini-pass (items 5, 6, 7, 20) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Low-risk cleanup items, no functional change to the parser. The philosophy remains: keep the parser simple, the AI handles edge cases. - Extract duplicated 'fs-safe title → dot-folder-name' regex into to_dot_folder_name() in domain/shared/value_objects.py. Used by both MovieTitle.normalized() and TVShow.get_folder_name() (item #5). - ParsedRelease.languages now uses field(default_factory=list) instead of a manual __post_init__ assigning [] via object.__setattr__ (#6). - tv_shows/entities.py module docstring: prepend ASCII ownership tree for quicker visual scan of the aggregate hierarchy (#7). - file_extensions.yaml: split subtitle sidecars (.srt/.sub/.idx/.ass/.ssa) into a dedicated 'subtitle:' category instead of lumping them under 'metadata:'. _METADATA_EXTENSIONS at the value_objects.py level remains the union of both — detect_media_type behavior unchanged. New loader load_subtitle_extensions() exposes the distinct subtitle set for future callers in the subtitles domain (#20). Suite: 1020 passed, 8 skipped. --- CHANGELOG.md | 16 +++++++ alfred/domain/movies/value_objects.py | 8 +--- alfred/domain/release/knowledge.py | 4 ++ alfred/domain/release/value_objects.py | 14 +++--- alfred/domain/shared/value_objects.py | 15 +++++++ alfred/domain/tv_shows/entities.py | 43 ++++++++++++------- alfred/knowledge/release/file_extensions.yaml | 8 +++- 7 files changed, 78 insertions(+), 30 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 15d8649..ffd66ec 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -112,6 +112,22 @@ callers). ### Internal +- **`to_dot_folder_name(title)` helper** in + `alfred/domain/shared/value_objects.py` — extracts the + `re.sub(r"[^\w\s\.\-]", "", title).replace(" ", ".")` pattern that was + duplicated between `MovieTitle.normalized()` and `TVShow.get_folder_name()`. +- **`ParsedRelease.languages` uses `field(default_factory=list)`** instead of + a manual `__post_init__` that assigned `[]` via `object.__setattr__`. +- **`file_extensions.yaml` splits subtitle sidecars (`.srt`, `.sub`, `.idx`, + `.ass`, `.ssa`) into a dedicated `subtitle:` category** instead of lumping + them under `metadata:`. The `_METADATA_EXTENSIONS` set used by + `detect_media_type` remains the union of both (same behavior — subtitles + are still ignored when deciding the media type of a folder), but a new + `load_subtitle_extensions()` loader is now available for the subtitles + domain. Sematic clarity, no functional change. +- **`tv_shows/entities.py` module docstring** now shows the aggregate + ownership as an ASCII tree before the rule text — quicker visual scan + of the DDD structure. - Removed backward-compat shims `_sanitise_for_fs` / `_strip_episode_from_normalised` from `domain/release/value_objects.py` (zero callers). diff --git a/alfred/domain/movies/value_objects.py b/alfred/domain/movies/value_objects.py index 8a350d3..7b74fbf 100644 --- a/alfred/domain/movies/value_objects.py +++ b/alfred/domain/movies/value_objects.py @@ -1,10 +1,10 @@ """Movie domain value objects.""" -import re from dataclasses import dataclass from enum import Enum from ..shared.exceptions import ValidationError +from ..shared.value_objects import to_dot_folder_name class Quality(Enum): @@ -67,11 +67,7 @@ class MovieTitle: Removes special characters and replaces spaces with dots. """ - # Remove special characters except spaces, dots, and hyphens - cleaned = re.sub(r"[^\w\s\.\-]", "", self.value) - # Replace spaces with dots - normalized = cleaned.replace(" ", ".") - return normalized + return to_dot_folder_name(self.value) def __str__(self) -> str: return self.value diff --git a/alfred/domain/release/knowledge.py b/alfred/domain/release/knowledge.py index 63d62e8..b6b61ff 100644 --- a/alfred/domain/release/knowledge.py +++ b/alfred/domain/release/knowledge.py @@ -82,6 +82,10 @@ def load_metadata_extensions() -> set[str]: return set(_load("file_extensions.yaml").get("metadata", [])) +def load_subtitle_extensions() -> set[str]: + return set(_load("file_extensions.yaml").get("subtitle", [])) + + def load_forbidden_chars() -> set[str]: return set(_load("release_format.yaml").get("forbidden_chars", [])) diff --git a/alfred/domain/release/value_objects.py b/alfred/domain/release/value_objects.py index 4b18853..ee5ee3b 100644 --- a/alfred/domain/release/value_objects.py +++ b/alfred/domain/release/value_objects.py @@ -2,7 +2,7 @@ from __future__ import annotations -from dataclasses import dataclass +from dataclasses import dataclass, field from .knowledge import ( load_audio, @@ -17,6 +17,7 @@ from .knowledge import ( load_resolutions, load_sources, load_sources_extra, + load_subtitle_extensions, load_video, load_video_extensions, load_win_forbidden_chars, @@ -28,7 +29,10 @@ _SOURCES: set[str] = load_sources() | load_sources_extra() _CODECS: set[str] = load_codecs() _VIDEO_EXTENSIONS: set[str] = load_video_extensions() _NON_VIDEO_EXTENSIONS: set[str] = load_non_video_extensions() -_METADATA_EXTENSIONS: set[str] = load_metadata_extensions() +_SUBTITLE_EXTENSIONS: set[str] = load_subtitle_extensions() +# Both metadata and subtitle extensions are ignored when deciding the media +# type of a folder — neither is a conclusive signal for movie/tv/other. +_METADATA_EXTENSIONS: set[str] = load_metadata_extensions() | _SUBTITLE_EXTENSIONS _FORBIDDEN_CHARS: set[str] = load_forbidden_chars() _LANGUAGE_TOKENS: set[str] = load_language_tokens() _AUDIO: dict = load_audio() @@ -88,17 +92,13 @@ class ParsedRelease: None # site watermark stripped from name, e.g. "TGx", "OxTorrent.vc" ) parse_path: str = "direct" # "direct" | "sanitized" | "ai" - languages: list[str] = None # ["MULTI", "VFF"], ["FRENCH"], … + languages: list[str] = field(default_factory=list) # ["MULTI", "VFF"], ["FRENCH"], … audio_codec: str | None = None # "DTS-HD.MA", "DDP", "EAC3", … audio_channels: str | None = None # "5.1", "7.1", "2.0", … bit_depth: str | None = None # "10bit", "8bit", … hdr_format: str | None = None # "DV", "HDR10", "DV.HDR10", … edition: str | None = None # "UNRATED", "EXTENDED", "DIRECTORS.CUT", … - def __post_init__(self): - if self.languages is None: - object.__setattr__(self, "languages", []) - @property def is_season_pack(self) -> bool: return self.season is not None and self.episode is None diff --git a/alfred/domain/shared/value_objects.py b/alfred/domain/shared/value_objects.py index cdab6b9..9aafdcf 100644 --- a/alfred/domain/shared/value_objects.py +++ b/alfred/domain/shared/value_objects.py @@ -210,3 +210,18 @@ class Language: def __repr__(self) -> str: return f"Language({self.iso!r}, {self.english_name!r})" + + +# Characters allowed in dot-separated folder/filename forms: +# alphanumerics, underscores, spaces (about to be replaced with dots), +# literal dots, and hyphens. Everything else is stripped. +_FS_SAFE_CHARS = re.compile(r"[^\w\s\.\-]") + + +def to_dot_folder_name(title: str) -> str: + """Sanitize ``title`` for filesystem use and convert spaces to dots. + + Produces e.g. ``Breaking.Bad`` from ``"Breaking Bad"`` or + ``Spider.Man`` from ``"Spider-Man: No Way Home"``. + """ + return _FS_SAFE_CHARS.sub("", title).replace(" ", ".") diff --git a/alfred/domain/tv_shows/entities.py b/alfred/domain/tv_shows/entities.py index dfac8d1..01ffc45 100644 --- a/alfred/domain/tv_shows/entities.py +++ b/alfred/domain/tv_shows/entities.py @@ -1,20 +1,26 @@ """TV Show domain entities. -This module implements the TVShow aggregate following DDD principles: +This module implements the TVShow aggregate following DDD principles. + +Aggregate ownership:: + + TVShow ← aggregate root (the repo returns this) + └── seasons: dict[SeasonNumber, Season] + └── Season + └── episodes: dict[EpisodeNumber, Episode] + └── Episode ← file metadata + audio/subtitle tracks + +Rules: * ``TVShow`` is the aggregate **root** — the only entity exposed by the - repository. It owns its seasons (``seasons: dict[SeasonNumber, Season]``). -* ``Season`` is owned by TVShow and owns its episodes - (``episodes: dict[EpisodeNumber, Episode]``). -* ``Episode`` is owned by Season. It carries the actual file metadata - (path, size) and the discovered tracks (audio, subtitles). - -Children do not back-reference the root (no ``show_imdb_id`` on Season/Episode): -they are only ever reached through ``TVShow``. - -Mutation invariants are enforced through aggregate-root methods such as -``TVShow.add_episode()`` — never reach into ``show.seasons[...].episodes`` to -mutate without going through the root, otherwise invariants are not guaranteed. + repository. +* ``Season`` is owned by TVShow. ``Episode`` is owned by Season. +* Children do not back-reference the root (no ``show_imdb_id`` on + Season/Episode): they are only ever reached *through* TVShow. +* Mutation invariants are enforced through aggregate-root methods such as + ``TVShow.add_episode()`` — never reach into ``show.seasons[...].episodes`` + to mutate without going through the root, otherwise invariants are not + guaranteed. """ from __future__ import annotations @@ -23,7 +29,13 @@ import re from dataclasses import dataclass, field from ..shared.media import AudioTrack, SubtitleTrack, track_lang_matches -from ..shared.value_objects import FilePath, FileSize, ImdbId, Language +from ..shared.value_objects import ( + FilePath, + FileSize, + ImdbId, + Language, + to_dot_folder_name, +) from .value_objects import ( CollectionStatus, EpisodeNumber, @@ -421,8 +433,7 @@ class TVShow: def get_folder_name(self) -> str: """Dot-separated folder name (e.g. ``Breaking.Bad``).""" - cleaned = re.sub(r"[^\w\s\.\-]", "", self.title) - return cleaned.replace(" ", ".") + return to_dot_folder_name(self.title) def __str__(self) -> str: return f"{self.title} ({self.status.value}, {self.seasons_count} seasons)" diff --git a/alfred/knowledge/release/file_extensions.yaml b/alfred/knowledge/release/file_extensions.yaml index 6726434..025cf13 100644 --- a/alfred/knowledge/release/file_extensions.yaml +++ b/alfred/knowledge/release/file_extensions.yaml @@ -2,7 +2,9 @@ # # video — extensions that confirm a video media file # non_video — extensions that definitively exclude video content (no metadata here) -# metadata — extensions always present alongside releases, ignored in type decision +# metadata — release metadata (.nfo, .txt, …), ignored in type decision +# subtitle — subtitle sidecar formats, ignored in type decision but central +# to the subtitles domain (loaded as a distinct set) video: - .mkv @@ -57,6 +59,10 @@ metadata: - .md5 - .jpg - .png + +subtitle: + # Subtitle sidecar formats — also ignored in type detection, + # but kept distinct because they're central to the subtitles domain. - .srt - .sub - .idx