refactor: tech debt mini-pass (items 5, 6, 7, 20)
Low-risk cleanup items, no functional change to the parser. The philosophy remains: keep the parser simple, the AI handles edge cases. - Extract duplicated 'fs-safe title → dot-folder-name' regex into to_dot_folder_name() in domain/shared/value_objects.py. Used by both MovieTitle.normalized() and TVShow.get_folder_name() (item #5). - ParsedRelease.languages now uses field(default_factory=list) instead of a manual __post_init__ assigning [] via object.__setattr__ (#6). - tv_shows/entities.py module docstring: prepend ASCII ownership tree for quicker visual scan of the aggregate hierarchy (#7). - file_extensions.yaml: split subtitle sidecars (.srt/.sub/.idx/.ass/.ssa) into a dedicated 'subtitle:' category instead of lumping them under 'metadata:'. _METADATA_EXTENSIONS at the value_objects.py level remains the union of both — detect_media_type behavior unchanged. New loader load_subtitle_extensions() exposes the distinct subtitle set for future callers in the subtitles domain (#20). Suite: 1020 passed, 8 skipped.
This commit is contained in:
@@ -112,6 +112,22 @@ callers).
|
||||
|
||||
### Internal
|
||||
|
||||
- **`to_dot_folder_name(title)` helper** in
|
||||
`alfred/domain/shared/value_objects.py` — extracts the
|
||||
`re.sub(r"[^\w\s\.\-]", "", title).replace(" ", ".")` pattern that was
|
||||
duplicated between `MovieTitle.normalized()` and `TVShow.get_folder_name()`.
|
||||
- **`ParsedRelease.languages` uses `field(default_factory=list)`** instead of
|
||||
a manual `__post_init__` that assigned `[]` via `object.__setattr__`.
|
||||
- **`file_extensions.yaml` splits subtitle sidecars (`.srt`, `.sub`, `.idx`,
|
||||
`.ass`, `.ssa`) into a dedicated `subtitle:` category** instead of lumping
|
||||
them under `metadata:`. The `_METADATA_EXTENSIONS` set used by
|
||||
`detect_media_type` remains the union of both (same behavior — subtitles
|
||||
are still ignored when deciding the media type of a folder), but a new
|
||||
`load_subtitle_extensions()` loader is now available for the subtitles
|
||||
domain. Sematic clarity, no functional change.
|
||||
- **`tv_shows/entities.py` module docstring** now shows the aggregate
|
||||
ownership as an ASCII tree before the rule text — quicker visual scan
|
||||
of the DDD structure.
|
||||
- Removed backward-compat shims `_sanitise_for_fs` /
|
||||
`_strip_episode_from_normalised` from `domain/release/value_objects.py`
|
||||
(zero callers).
|
||||
|
||||
@@ -1,10 +1,10 @@
|
||||
"""Movie domain value objects."""
|
||||
|
||||
import re
|
||||
from dataclasses import dataclass
|
||||
from enum import Enum
|
||||
|
||||
from ..shared.exceptions import ValidationError
|
||||
from ..shared.value_objects import to_dot_folder_name
|
||||
|
||||
|
||||
class Quality(Enum):
|
||||
@@ -67,11 +67,7 @@ class MovieTitle:
|
||||
|
||||
Removes special characters and replaces spaces with dots.
|
||||
"""
|
||||
# Remove special characters except spaces, dots, and hyphens
|
||||
cleaned = re.sub(r"[^\w\s\.\-]", "", self.value)
|
||||
# Replace spaces with dots
|
||||
normalized = cleaned.replace(" ", ".")
|
||||
return normalized
|
||||
return to_dot_folder_name(self.value)
|
||||
|
||||
def __str__(self) -> str:
|
||||
return self.value
|
||||
|
||||
@@ -82,6 +82,10 @@ def load_metadata_extensions() -> set[str]:
|
||||
return set(_load("file_extensions.yaml").get("metadata", []))
|
||||
|
||||
|
||||
def load_subtitle_extensions() -> set[str]:
|
||||
return set(_load("file_extensions.yaml").get("subtitle", []))
|
||||
|
||||
|
||||
def load_forbidden_chars() -> set[str]:
|
||||
return set(_load("release_format.yaml").get("forbidden_chars", []))
|
||||
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
from dataclasses import dataclass, field
|
||||
|
||||
from .knowledge import (
|
||||
load_audio,
|
||||
@@ -17,6 +17,7 @@ from .knowledge import (
|
||||
load_resolutions,
|
||||
load_sources,
|
||||
load_sources_extra,
|
||||
load_subtitle_extensions,
|
||||
load_video,
|
||||
load_video_extensions,
|
||||
load_win_forbidden_chars,
|
||||
@@ -28,7 +29,10 @@ _SOURCES: set[str] = load_sources() | load_sources_extra()
|
||||
_CODECS: set[str] = load_codecs()
|
||||
_VIDEO_EXTENSIONS: set[str] = load_video_extensions()
|
||||
_NON_VIDEO_EXTENSIONS: set[str] = load_non_video_extensions()
|
||||
_METADATA_EXTENSIONS: set[str] = load_metadata_extensions()
|
||||
_SUBTITLE_EXTENSIONS: set[str] = load_subtitle_extensions()
|
||||
# Both metadata and subtitle extensions are ignored when deciding the media
|
||||
# type of a folder — neither is a conclusive signal for movie/tv/other.
|
||||
_METADATA_EXTENSIONS: set[str] = load_metadata_extensions() | _SUBTITLE_EXTENSIONS
|
||||
_FORBIDDEN_CHARS: set[str] = load_forbidden_chars()
|
||||
_LANGUAGE_TOKENS: set[str] = load_language_tokens()
|
||||
_AUDIO: dict = load_audio()
|
||||
@@ -88,17 +92,13 @@ class ParsedRelease:
|
||||
None # site watermark stripped from name, e.g. "TGx", "OxTorrent.vc"
|
||||
)
|
||||
parse_path: str = "direct" # "direct" | "sanitized" | "ai"
|
||||
languages: list[str] = None # ["MULTI", "VFF"], ["FRENCH"], …
|
||||
languages: list[str] = field(default_factory=list) # ["MULTI", "VFF"], ["FRENCH"], …
|
||||
audio_codec: str | None = None # "DTS-HD.MA", "DDP", "EAC3", …
|
||||
audio_channels: str | None = None # "5.1", "7.1", "2.0", …
|
||||
bit_depth: str | None = None # "10bit", "8bit", …
|
||||
hdr_format: str | None = None # "DV", "HDR10", "DV.HDR10", …
|
||||
edition: str | None = None # "UNRATED", "EXTENDED", "DIRECTORS.CUT", …
|
||||
|
||||
def __post_init__(self):
|
||||
if self.languages is None:
|
||||
object.__setattr__(self, "languages", [])
|
||||
|
||||
@property
|
||||
def is_season_pack(self) -> bool:
|
||||
return self.season is not None and self.episode is None
|
||||
|
||||
@@ -210,3 +210,18 @@ class Language:
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return f"Language({self.iso!r}, {self.english_name!r})"
|
||||
|
||||
|
||||
# Characters allowed in dot-separated folder/filename forms:
|
||||
# alphanumerics, underscores, spaces (about to be replaced with dots),
|
||||
# literal dots, and hyphens. Everything else is stripped.
|
||||
_FS_SAFE_CHARS = re.compile(r"[^\w\s\.\-]")
|
||||
|
||||
|
||||
def to_dot_folder_name(title: str) -> str:
|
||||
"""Sanitize ``title`` for filesystem use and convert spaces to dots.
|
||||
|
||||
Produces e.g. ``Breaking.Bad`` from ``"Breaking Bad"`` or
|
||||
``Spider.Man`` from ``"Spider-Man: No Way Home"``.
|
||||
"""
|
||||
return _FS_SAFE_CHARS.sub("", title).replace(" ", ".")
|
||||
|
||||
@@ -1,20 +1,26 @@
|
||||
"""TV Show domain entities.
|
||||
|
||||
This module implements the TVShow aggregate following DDD principles:
|
||||
This module implements the TVShow aggregate following DDD principles.
|
||||
|
||||
Aggregate ownership::
|
||||
|
||||
TVShow ← aggregate root (the repo returns this)
|
||||
└── seasons: dict[SeasonNumber, Season]
|
||||
└── Season
|
||||
└── episodes: dict[EpisodeNumber, Episode]
|
||||
└── Episode ← file metadata + audio/subtitle tracks
|
||||
|
||||
Rules:
|
||||
|
||||
* ``TVShow`` is the aggregate **root** — the only entity exposed by the
|
||||
repository. It owns its seasons (``seasons: dict[SeasonNumber, Season]``).
|
||||
* ``Season`` is owned by TVShow and owns its episodes
|
||||
(``episodes: dict[EpisodeNumber, Episode]``).
|
||||
* ``Episode`` is owned by Season. It carries the actual file metadata
|
||||
(path, size) and the discovered tracks (audio, subtitles).
|
||||
|
||||
Children do not back-reference the root (no ``show_imdb_id`` on Season/Episode):
|
||||
they are only ever reached through ``TVShow``.
|
||||
|
||||
Mutation invariants are enforced through aggregate-root methods such as
|
||||
``TVShow.add_episode()`` — never reach into ``show.seasons[...].episodes`` to
|
||||
mutate without going through the root, otherwise invariants are not guaranteed.
|
||||
repository.
|
||||
* ``Season`` is owned by TVShow. ``Episode`` is owned by Season.
|
||||
* Children do not back-reference the root (no ``show_imdb_id`` on
|
||||
Season/Episode): they are only ever reached *through* TVShow.
|
||||
* Mutation invariants are enforced through aggregate-root methods such as
|
||||
``TVShow.add_episode()`` — never reach into ``show.seasons[...].episodes``
|
||||
to mutate without going through the root, otherwise invariants are not
|
||||
guaranteed.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
@@ -23,7 +29,13 @@ import re
|
||||
from dataclasses import dataclass, field
|
||||
|
||||
from ..shared.media import AudioTrack, SubtitleTrack, track_lang_matches
|
||||
from ..shared.value_objects import FilePath, FileSize, ImdbId, Language
|
||||
from ..shared.value_objects import (
|
||||
FilePath,
|
||||
FileSize,
|
||||
ImdbId,
|
||||
Language,
|
||||
to_dot_folder_name,
|
||||
)
|
||||
from .value_objects import (
|
||||
CollectionStatus,
|
||||
EpisodeNumber,
|
||||
@@ -421,8 +433,7 @@ class TVShow:
|
||||
|
||||
def get_folder_name(self) -> str:
|
||||
"""Dot-separated folder name (e.g. ``Breaking.Bad``)."""
|
||||
cleaned = re.sub(r"[^\w\s\.\-]", "", self.title)
|
||||
return cleaned.replace(" ", ".")
|
||||
return to_dot_folder_name(self.title)
|
||||
|
||||
def __str__(self) -> str:
|
||||
return f"{self.title} ({self.status.value}, {self.seasons_count} seasons)"
|
||||
|
||||
@@ -2,7 +2,9 @@
|
||||
#
|
||||
# video — extensions that confirm a video media file
|
||||
# non_video — extensions that definitively exclude video content (no metadata here)
|
||||
# metadata — extensions always present alongside releases, ignored in type decision
|
||||
# metadata — release metadata (.nfo, .txt, …), ignored in type decision
|
||||
# subtitle — subtitle sidecar formats, ignored in type decision but central
|
||||
# to the subtitles domain (loaded as a distinct set)
|
||||
|
||||
video:
|
||||
- .mkv
|
||||
@@ -57,6 +59,10 @@ metadata:
|
||||
- .md5
|
||||
- .jpg
|
||||
- .png
|
||||
|
||||
subtitle:
|
||||
# Subtitle sidecar formats — also ignored in type detection,
|
||||
# but kept distinct because they're central to the subtitles domain.
|
||||
- .srt
|
||||
- .sub
|
||||
- .idx
|
||||
|
||||
Reference in New Issue
Block a user