refactor: tech debt mini-pass (items 5, 6, 7, 20)

Low-risk cleanup items, no functional change to the parser. The
philosophy remains: keep the parser simple, the AI handles edge cases.

- Extract duplicated 'fs-safe title → dot-folder-name' regex into
  to_dot_folder_name() in domain/shared/value_objects.py. Used by both
  MovieTitle.normalized() and TVShow.get_folder_name() (item #5).
- ParsedRelease.languages now uses field(default_factory=list) instead
  of a manual __post_init__ assigning [] via object.__setattr__ (#6).
- tv_shows/entities.py module docstring: prepend ASCII ownership tree
  for quicker visual scan of the aggregate hierarchy (#7).
- file_extensions.yaml: split subtitle sidecars (.srt/.sub/.idx/.ass/.ssa)
  into a dedicated 'subtitle:' category instead of lumping them under
  'metadata:'. _METADATA_EXTENSIONS at the value_objects.py level remains
  the union of both — detect_media_type behavior unchanged. New loader
  load_subtitle_extensions() exposes the distinct subtitle set for future
  callers in the subtitles domain (#20).

Suite: 1020 passed, 8 skipped.
This commit is contained in:
2026-05-18 16:24:28 +02:00
parent 273510dff8
commit f6eef59fca
7 changed files with 78 additions and 30 deletions
+16
View File
@@ -112,6 +112,22 @@ callers).
### Internal
- **`to_dot_folder_name(title)` helper** in
`alfred/domain/shared/value_objects.py` — extracts the
`re.sub(r"[^\w\s\.\-]", "", title).replace(" ", ".")` pattern that was
duplicated between `MovieTitle.normalized()` and `TVShow.get_folder_name()`.
- **`ParsedRelease.languages` uses `field(default_factory=list)`** instead of
a manual `__post_init__` that assigned `[]` via `object.__setattr__`.
- **`file_extensions.yaml` splits subtitle sidecars (`.srt`, `.sub`, `.idx`,
`.ass`, `.ssa`) into a dedicated `subtitle:` category** instead of lumping
them under `metadata:`. The `_METADATA_EXTENSIONS` set used by
`detect_media_type` remains the union of both (same behavior — subtitles
are still ignored when deciding the media type of a folder), but a new
`load_subtitle_extensions()` loader is now available for the subtitles
domain. Sematic clarity, no functional change.
- **`tv_shows/entities.py` module docstring** now shows the aggregate
ownership as an ASCII tree before the rule text — quicker visual scan
of the DDD structure.
- Removed backward-compat shims `_sanitise_for_fs` /
`_strip_episode_from_normalised` from `domain/release/value_objects.py`
(zero callers).
+2 -6
View File
@@ -1,10 +1,10 @@
"""Movie domain value objects."""
import re
from dataclasses import dataclass
from enum import Enum
from ..shared.exceptions import ValidationError
from ..shared.value_objects import to_dot_folder_name
class Quality(Enum):
@@ -67,11 +67,7 @@ class MovieTitle:
Removes special characters and replaces spaces with dots.
"""
# Remove special characters except spaces, dots, and hyphens
cleaned = re.sub(r"[^\w\s\.\-]", "", self.value)
# Replace spaces with dots
normalized = cleaned.replace(" ", ".")
return normalized
return to_dot_folder_name(self.value)
def __str__(self) -> str:
return self.value
+4
View File
@@ -82,6 +82,10 @@ def load_metadata_extensions() -> set[str]:
return set(_load("file_extensions.yaml").get("metadata", []))
def load_subtitle_extensions() -> set[str]:
return set(_load("file_extensions.yaml").get("subtitle", []))
def load_forbidden_chars() -> set[str]:
return set(_load("release_format.yaml").get("forbidden_chars", []))
+7 -7
View File
@@ -2,7 +2,7 @@
from __future__ import annotations
from dataclasses import dataclass
from dataclasses import dataclass, field
from .knowledge import (
load_audio,
@@ -17,6 +17,7 @@ from .knowledge import (
load_resolutions,
load_sources,
load_sources_extra,
load_subtitle_extensions,
load_video,
load_video_extensions,
load_win_forbidden_chars,
@@ -28,7 +29,10 @@ _SOURCES: set[str] = load_sources() | load_sources_extra()
_CODECS: set[str] = load_codecs()
_VIDEO_EXTENSIONS: set[str] = load_video_extensions()
_NON_VIDEO_EXTENSIONS: set[str] = load_non_video_extensions()
_METADATA_EXTENSIONS: set[str] = load_metadata_extensions()
_SUBTITLE_EXTENSIONS: set[str] = load_subtitle_extensions()
# Both metadata and subtitle extensions are ignored when deciding the media
# type of a folder — neither is a conclusive signal for movie/tv/other.
_METADATA_EXTENSIONS: set[str] = load_metadata_extensions() | _SUBTITLE_EXTENSIONS
_FORBIDDEN_CHARS: set[str] = load_forbidden_chars()
_LANGUAGE_TOKENS: set[str] = load_language_tokens()
_AUDIO: dict = load_audio()
@@ -88,17 +92,13 @@ class ParsedRelease:
None # site watermark stripped from name, e.g. "TGx", "OxTorrent.vc"
)
parse_path: str = "direct" # "direct" | "sanitized" | "ai"
languages: list[str] = None # ["MULTI", "VFF"], ["FRENCH"], …
languages: list[str] = field(default_factory=list) # ["MULTI", "VFF"], ["FRENCH"], …
audio_codec: str | None = None # "DTS-HD.MA", "DDP", "EAC3", …
audio_channels: str | None = None # "5.1", "7.1", "2.0", …
bit_depth: str | None = None # "10bit", "8bit", …
hdr_format: str | None = None # "DV", "HDR10", "DV.HDR10", …
edition: str | None = None # "UNRATED", "EXTENDED", "DIRECTORS.CUT", …
def __post_init__(self):
if self.languages is None:
object.__setattr__(self, "languages", [])
@property
def is_season_pack(self) -> bool:
return self.season is not None and self.episode is None
+15
View File
@@ -210,3 +210,18 @@ class Language:
def __repr__(self) -> str:
return f"Language({self.iso!r}, {self.english_name!r})"
# Characters allowed in dot-separated folder/filename forms:
# alphanumerics, underscores, spaces (about to be replaced with dots),
# literal dots, and hyphens. Everything else is stripped.
_FS_SAFE_CHARS = re.compile(r"[^\w\s\.\-]")
def to_dot_folder_name(title: str) -> str:
"""Sanitize ``title`` for filesystem use and convert spaces to dots.
Produces e.g. ``Breaking.Bad`` from ``"Breaking Bad"`` or
``Spider.Man`` from ``"Spider-Man: No Way Home"``.
"""
return _FS_SAFE_CHARS.sub("", title).replace(" ", ".")
+27 -16
View File
@@ -1,20 +1,26 @@
"""TV Show domain entities.
This module implements the TVShow aggregate following DDD principles:
This module implements the TVShow aggregate following DDD principles.
Aggregate ownership::
TVShow ← aggregate root (the repo returns this)
└── seasons: dict[SeasonNumber, Season]
└── Season
└── episodes: dict[EpisodeNumber, Episode]
└── Episode ← file metadata + audio/subtitle tracks
Rules:
* ``TVShow`` is the aggregate **root** — the only entity exposed by the
repository. It owns its seasons (``seasons: dict[SeasonNumber, Season]``).
* ``Season`` is owned by TVShow and owns its episodes
(``episodes: dict[EpisodeNumber, Episode]``).
* ``Episode`` is owned by Season. It carries the actual file metadata
(path, size) and the discovered tracks (audio, subtitles).
Children do not back-reference the root (no ``show_imdb_id`` on Season/Episode):
they are only ever reached through ``TVShow``.
Mutation invariants are enforced through aggregate-root methods such as
``TVShow.add_episode()`` — never reach into ``show.seasons[...].episodes`` to
mutate without going through the root, otherwise invariants are not guaranteed.
repository.
* ``Season`` is owned by TVShow. ``Episode`` is owned by Season.
* Children do not back-reference the root (no ``show_imdb_id`` on
Season/Episode): they are only ever reached *through* TVShow.
* Mutation invariants are enforced through aggregate-root methods such as
``TVShow.add_episode()`` — never reach into ``show.seasons[...].episodes``
to mutate without going through the root, otherwise invariants are not
guaranteed.
"""
from __future__ import annotations
@@ -23,7 +29,13 @@ import re
from dataclasses import dataclass, field
from ..shared.media import AudioTrack, SubtitleTrack, track_lang_matches
from ..shared.value_objects import FilePath, FileSize, ImdbId, Language
from ..shared.value_objects import (
FilePath,
FileSize,
ImdbId,
Language,
to_dot_folder_name,
)
from .value_objects import (
CollectionStatus,
EpisodeNumber,
@@ -421,8 +433,7 @@ class TVShow:
def get_folder_name(self) -> str:
"""Dot-separated folder name (e.g. ``Breaking.Bad``)."""
cleaned = re.sub(r"[^\w\s\.\-]", "", self.title)
return cleaned.replace(" ", ".")
return to_dot_folder_name(self.title)
def __str__(self) -> str:
return f"{self.title} ({self.status.value}, {self.seasons_count} seasons)"
@@ -2,7 +2,9 @@
#
# video — extensions that confirm a video media file
# non_video — extensions that definitively exclude video content (no metadata here)
# metadata — extensions always present alongside releases, ignored in type decision
# metadata — release metadata (.nfo, .txt, …), ignored in type decision
# subtitle — subtitle sidecar formats, ignored in type decision but central
# to the subtitles domain (loaded as a distinct set)
video:
- .mkv
@@ -57,6 +59,10 @@ metadata:
- .md5
- .jpg
- .png
subtitle:
# Subtitle sidecar formats — also ignored in type detection,
# but kept distinct because they're central to the subtitles domain.
- .srt
- .sub
- .idx