refactor: tech debt mini-pass (items 5, 6, 7, 20)

Low-risk cleanup items, no functional change to the parser. The
philosophy remains: keep the parser simple, the AI handles edge cases.

- Extract duplicated 'fs-safe title → dot-folder-name' regex into
  to_dot_folder_name() in domain/shared/value_objects.py. Used by both
  MovieTitle.normalized() and TVShow.get_folder_name() (item #5).
- ParsedRelease.languages now uses field(default_factory=list) instead
  of a manual __post_init__ assigning [] via object.__setattr__ (#6).
- tv_shows/entities.py module docstring: prepend ASCII ownership tree
  for quicker visual scan of the aggregate hierarchy (#7).
- file_extensions.yaml: split subtitle sidecars (.srt/.sub/.idx/.ass/.ssa)
  into a dedicated 'subtitle:' category instead of lumping them under
  'metadata:'. _METADATA_EXTENSIONS at the value_objects.py level remains
  the union of both — detect_media_type behavior unchanged. New loader
  load_subtitle_extensions() exposes the distinct subtitle set for future
  callers in the subtitles domain (#20).

Suite: 1020 passed, 8 skipped.
This commit is contained in:
2026-05-18 16:24:28 +02:00
parent 273510dff8
commit f6eef59fca
7 changed files with 78 additions and 30 deletions
+16
View File
@@ -112,6 +112,22 @@ callers).
### Internal ### Internal
- **`to_dot_folder_name(title)` helper** in
`alfred/domain/shared/value_objects.py` — extracts the
`re.sub(r"[^\w\s\.\-]", "", title).replace(" ", ".")` pattern that was
duplicated between `MovieTitle.normalized()` and `TVShow.get_folder_name()`.
- **`ParsedRelease.languages` uses `field(default_factory=list)`** instead of
a manual `__post_init__` that assigned `[]` via `object.__setattr__`.
- **`file_extensions.yaml` splits subtitle sidecars (`.srt`, `.sub`, `.idx`,
`.ass`, `.ssa`) into a dedicated `subtitle:` category** instead of lumping
them under `metadata:`. The `_METADATA_EXTENSIONS` set used by
`detect_media_type` remains the union of both (same behavior — subtitles
are still ignored when deciding the media type of a folder), but a new
`load_subtitle_extensions()` loader is now available for the subtitles
domain. Sematic clarity, no functional change.
- **`tv_shows/entities.py` module docstring** now shows the aggregate
ownership as an ASCII tree before the rule text — quicker visual scan
of the DDD structure.
- Removed backward-compat shims `_sanitise_for_fs` / - Removed backward-compat shims `_sanitise_for_fs` /
`_strip_episode_from_normalised` from `domain/release/value_objects.py` `_strip_episode_from_normalised` from `domain/release/value_objects.py`
(zero callers). (zero callers).
+2 -6
View File
@@ -1,10 +1,10 @@
"""Movie domain value objects.""" """Movie domain value objects."""
import re
from dataclasses import dataclass from dataclasses import dataclass
from enum import Enum from enum import Enum
from ..shared.exceptions import ValidationError from ..shared.exceptions import ValidationError
from ..shared.value_objects import to_dot_folder_name
class Quality(Enum): class Quality(Enum):
@@ -67,11 +67,7 @@ class MovieTitle:
Removes special characters and replaces spaces with dots. Removes special characters and replaces spaces with dots.
""" """
# Remove special characters except spaces, dots, and hyphens return to_dot_folder_name(self.value)
cleaned = re.sub(r"[^\w\s\.\-]", "", self.value)
# Replace spaces with dots
normalized = cleaned.replace(" ", ".")
return normalized
def __str__(self) -> str: def __str__(self) -> str:
return self.value return self.value
+4
View File
@@ -82,6 +82,10 @@ def load_metadata_extensions() -> set[str]:
return set(_load("file_extensions.yaml").get("metadata", [])) return set(_load("file_extensions.yaml").get("metadata", []))
def load_subtitle_extensions() -> set[str]:
return set(_load("file_extensions.yaml").get("subtitle", []))
def load_forbidden_chars() -> set[str]: def load_forbidden_chars() -> set[str]:
return set(_load("release_format.yaml").get("forbidden_chars", [])) return set(_load("release_format.yaml").get("forbidden_chars", []))
+7 -7
View File
@@ -2,7 +2,7 @@
from __future__ import annotations from __future__ import annotations
from dataclasses import dataclass from dataclasses import dataclass, field
from .knowledge import ( from .knowledge import (
load_audio, load_audio,
@@ -17,6 +17,7 @@ from .knowledge import (
load_resolutions, load_resolutions,
load_sources, load_sources,
load_sources_extra, load_sources_extra,
load_subtitle_extensions,
load_video, load_video,
load_video_extensions, load_video_extensions,
load_win_forbidden_chars, load_win_forbidden_chars,
@@ -28,7 +29,10 @@ _SOURCES: set[str] = load_sources() | load_sources_extra()
_CODECS: set[str] = load_codecs() _CODECS: set[str] = load_codecs()
_VIDEO_EXTENSIONS: set[str] = load_video_extensions() _VIDEO_EXTENSIONS: set[str] = load_video_extensions()
_NON_VIDEO_EXTENSIONS: set[str] = load_non_video_extensions() _NON_VIDEO_EXTENSIONS: set[str] = load_non_video_extensions()
_METADATA_EXTENSIONS: set[str] = load_metadata_extensions() _SUBTITLE_EXTENSIONS: set[str] = load_subtitle_extensions()
# Both metadata and subtitle extensions are ignored when deciding the media
# type of a folder — neither is a conclusive signal for movie/tv/other.
_METADATA_EXTENSIONS: set[str] = load_metadata_extensions() | _SUBTITLE_EXTENSIONS
_FORBIDDEN_CHARS: set[str] = load_forbidden_chars() _FORBIDDEN_CHARS: set[str] = load_forbidden_chars()
_LANGUAGE_TOKENS: set[str] = load_language_tokens() _LANGUAGE_TOKENS: set[str] = load_language_tokens()
_AUDIO: dict = load_audio() _AUDIO: dict = load_audio()
@@ -88,17 +92,13 @@ class ParsedRelease:
None # site watermark stripped from name, e.g. "TGx", "OxTorrent.vc" None # site watermark stripped from name, e.g. "TGx", "OxTorrent.vc"
) )
parse_path: str = "direct" # "direct" | "sanitized" | "ai" parse_path: str = "direct" # "direct" | "sanitized" | "ai"
languages: list[str] = None # ["MULTI", "VFF"], ["FRENCH"], … languages: list[str] = field(default_factory=list) # ["MULTI", "VFF"], ["FRENCH"], …
audio_codec: str | None = None # "DTS-HD.MA", "DDP", "EAC3", … audio_codec: str | None = None # "DTS-HD.MA", "DDP", "EAC3", …
audio_channels: str | None = None # "5.1", "7.1", "2.0", … audio_channels: str | None = None # "5.1", "7.1", "2.0", …
bit_depth: str | None = None # "10bit", "8bit", … bit_depth: str | None = None # "10bit", "8bit", …
hdr_format: str | None = None # "DV", "HDR10", "DV.HDR10", … hdr_format: str | None = None # "DV", "HDR10", "DV.HDR10", …
edition: str | None = None # "UNRATED", "EXTENDED", "DIRECTORS.CUT", … edition: str | None = None # "UNRATED", "EXTENDED", "DIRECTORS.CUT", …
def __post_init__(self):
if self.languages is None:
object.__setattr__(self, "languages", [])
@property @property
def is_season_pack(self) -> bool: def is_season_pack(self) -> bool:
return self.season is not None and self.episode is None return self.season is not None and self.episode is None
+15
View File
@@ -210,3 +210,18 @@ class Language:
def __repr__(self) -> str: def __repr__(self) -> str:
return f"Language({self.iso!r}, {self.english_name!r})" return f"Language({self.iso!r}, {self.english_name!r})"
# Characters allowed in dot-separated folder/filename forms:
# alphanumerics, underscores, spaces (about to be replaced with dots),
# literal dots, and hyphens. Everything else is stripped.
_FS_SAFE_CHARS = re.compile(r"[^\w\s\.\-]")
def to_dot_folder_name(title: str) -> str:
"""Sanitize ``title`` for filesystem use and convert spaces to dots.
Produces e.g. ``Breaking.Bad`` from ``"Breaking Bad"`` or
``Spider.Man`` from ``"Spider-Man: No Way Home"``.
"""
return _FS_SAFE_CHARS.sub("", title).replace(" ", ".")
+27 -16
View File
@@ -1,20 +1,26 @@
"""TV Show domain entities. """TV Show domain entities.
This module implements the TVShow aggregate following DDD principles: This module implements the TVShow aggregate following DDD principles.
Aggregate ownership::
TVShow ← aggregate root (the repo returns this)
└── seasons: dict[SeasonNumber, Season]
└── Season
└── episodes: dict[EpisodeNumber, Episode]
└── Episode ← file metadata + audio/subtitle tracks
Rules:
* ``TVShow`` is the aggregate **root** — the only entity exposed by the * ``TVShow`` is the aggregate **root** — the only entity exposed by the
repository. It owns its seasons (``seasons: dict[SeasonNumber, Season]``). repository.
* ``Season`` is owned by TVShow and owns its episodes * ``Season`` is owned by TVShow. ``Episode`` is owned by Season.
(``episodes: dict[EpisodeNumber, Episode]``). * Children do not back-reference the root (no ``show_imdb_id`` on
* ``Episode`` is owned by Season. It carries the actual file metadata Season/Episode): they are only ever reached *through* TVShow.
(path, size) and the discovered tracks (audio, subtitles). * Mutation invariants are enforced through aggregate-root methods such as
``TVShow.add_episode()`` — never reach into ``show.seasons[...].episodes``
Children do not back-reference the root (no ``show_imdb_id`` on Season/Episode): to mutate without going through the root, otherwise invariants are not
they are only ever reached through ``TVShow``. guaranteed.
Mutation invariants are enforced through aggregate-root methods such as
``TVShow.add_episode()`` — never reach into ``show.seasons[...].episodes`` to
mutate without going through the root, otherwise invariants are not guaranteed.
""" """
from __future__ import annotations from __future__ import annotations
@@ -23,7 +29,13 @@ import re
from dataclasses import dataclass, field from dataclasses import dataclass, field
from ..shared.media import AudioTrack, SubtitleTrack, track_lang_matches from ..shared.media import AudioTrack, SubtitleTrack, track_lang_matches
from ..shared.value_objects import FilePath, FileSize, ImdbId, Language from ..shared.value_objects import (
FilePath,
FileSize,
ImdbId,
Language,
to_dot_folder_name,
)
from .value_objects import ( from .value_objects import (
CollectionStatus, CollectionStatus,
EpisodeNumber, EpisodeNumber,
@@ -421,8 +433,7 @@ class TVShow:
def get_folder_name(self) -> str: def get_folder_name(self) -> str:
"""Dot-separated folder name (e.g. ``Breaking.Bad``).""" """Dot-separated folder name (e.g. ``Breaking.Bad``)."""
cleaned = re.sub(r"[^\w\s\.\-]", "", self.title) return to_dot_folder_name(self.title)
return cleaned.replace(" ", ".")
def __str__(self) -> str: def __str__(self) -> str:
return f"{self.title} ({self.status.value}, {self.seasons_count} seasons)" return f"{self.title} ({self.status.value}, {self.seasons_count} seasons)"
@@ -2,7 +2,9 @@
# #
# video — extensions that confirm a video media file # video — extensions that confirm a video media file
# non_video — extensions that definitively exclude video content (no metadata here) # non_video — extensions that definitively exclude video content (no metadata here)
# metadata — extensions always present alongside releases, ignored in type decision # metadata — release metadata (.nfo, .txt, …), ignored in type decision
# subtitle — subtitle sidecar formats, ignored in type decision but central
# to the subtitles domain (loaded as a distinct set)
video: video:
- .mkv - .mkv
@@ -57,6 +59,10 @@ metadata:
- .md5 - .md5
- .jpg - .jpg
- .png - .png
subtitle:
# Subtitle sidecar formats — also ignored in type detection,
# but kept distinct because they're central to the subtitles domain.
- .srt - .srt
- .sub - .sub
- .idx - .idx