refactor: tech debt mini-pass (items 5, 6, 7, 20)
Low-risk cleanup items, no functional change to the parser. The philosophy remains: keep the parser simple, the AI handles edge cases. - Extract duplicated 'fs-safe title → dot-folder-name' regex into to_dot_folder_name() in domain/shared/value_objects.py. Used by both MovieTitle.normalized() and TVShow.get_folder_name() (item #5). - ParsedRelease.languages now uses field(default_factory=list) instead of a manual __post_init__ assigning [] via object.__setattr__ (#6). - tv_shows/entities.py module docstring: prepend ASCII ownership tree for quicker visual scan of the aggregate hierarchy (#7). - file_extensions.yaml: split subtitle sidecars (.srt/.sub/.idx/.ass/.ssa) into a dedicated 'subtitle:' category instead of lumping them under 'metadata:'. _METADATA_EXTENSIONS at the value_objects.py level remains the union of both — detect_media_type behavior unchanged. New loader load_subtitle_extensions() exposes the distinct subtitle set for future callers in the subtitles domain (#20). Suite: 1020 passed, 8 skipped.
This commit is contained in:
@@ -112,6 +112,22 @@ callers).
|
|||||||
|
|
||||||
### Internal
|
### Internal
|
||||||
|
|
||||||
|
- **`to_dot_folder_name(title)` helper** in
|
||||||
|
`alfred/domain/shared/value_objects.py` — extracts the
|
||||||
|
`re.sub(r"[^\w\s\.\-]", "", title).replace(" ", ".")` pattern that was
|
||||||
|
duplicated between `MovieTitle.normalized()` and `TVShow.get_folder_name()`.
|
||||||
|
- **`ParsedRelease.languages` uses `field(default_factory=list)`** instead of
|
||||||
|
a manual `__post_init__` that assigned `[]` via `object.__setattr__`.
|
||||||
|
- **`file_extensions.yaml` splits subtitle sidecars (`.srt`, `.sub`, `.idx`,
|
||||||
|
`.ass`, `.ssa`) into a dedicated `subtitle:` category** instead of lumping
|
||||||
|
them under `metadata:`. The `_METADATA_EXTENSIONS` set used by
|
||||||
|
`detect_media_type` remains the union of both (same behavior — subtitles
|
||||||
|
are still ignored when deciding the media type of a folder), but a new
|
||||||
|
`load_subtitle_extensions()` loader is now available for the subtitles
|
||||||
|
domain. Sematic clarity, no functional change.
|
||||||
|
- **`tv_shows/entities.py` module docstring** now shows the aggregate
|
||||||
|
ownership as an ASCII tree before the rule text — quicker visual scan
|
||||||
|
of the DDD structure.
|
||||||
- Removed backward-compat shims `_sanitise_for_fs` /
|
- Removed backward-compat shims `_sanitise_for_fs` /
|
||||||
`_strip_episode_from_normalised` from `domain/release/value_objects.py`
|
`_strip_episode_from_normalised` from `domain/release/value_objects.py`
|
||||||
(zero callers).
|
(zero callers).
|
||||||
|
|||||||
@@ -1,10 +1,10 @@
|
|||||||
"""Movie domain value objects."""
|
"""Movie domain value objects."""
|
||||||
|
|
||||||
import re
|
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
|
|
||||||
from ..shared.exceptions import ValidationError
|
from ..shared.exceptions import ValidationError
|
||||||
|
from ..shared.value_objects import to_dot_folder_name
|
||||||
|
|
||||||
|
|
||||||
class Quality(Enum):
|
class Quality(Enum):
|
||||||
@@ -67,11 +67,7 @@ class MovieTitle:
|
|||||||
|
|
||||||
Removes special characters and replaces spaces with dots.
|
Removes special characters and replaces spaces with dots.
|
||||||
"""
|
"""
|
||||||
# Remove special characters except spaces, dots, and hyphens
|
return to_dot_folder_name(self.value)
|
||||||
cleaned = re.sub(r"[^\w\s\.\-]", "", self.value)
|
|
||||||
# Replace spaces with dots
|
|
||||||
normalized = cleaned.replace(" ", ".")
|
|
||||||
return normalized
|
|
||||||
|
|
||||||
def __str__(self) -> str:
|
def __str__(self) -> str:
|
||||||
return self.value
|
return self.value
|
||||||
|
|||||||
@@ -82,6 +82,10 @@ def load_metadata_extensions() -> set[str]:
|
|||||||
return set(_load("file_extensions.yaml").get("metadata", []))
|
return set(_load("file_extensions.yaml").get("metadata", []))
|
||||||
|
|
||||||
|
|
||||||
|
def load_subtitle_extensions() -> set[str]:
|
||||||
|
return set(_load("file_extensions.yaml").get("subtitle", []))
|
||||||
|
|
||||||
|
|
||||||
def load_forbidden_chars() -> set[str]:
|
def load_forbidden_chars() -> set[str]:
|
||||||
return set(_load("release_format.yaml").get("forbidden_chars", []))
|
return set(_load("release_format.yaml").get("forbidden_chars", []))
|
||||||
|
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass, field
|
||||||
|
|
||||||
from .knowledge import (
|
from .knowledge import (
|
||||||
load_audio,
|
load_audio,
|
||||||
@@ -17,6 +17,7 @@ from .knowledge import (
|
|||||||
load_resolutions,
|
load_resolutions,
|
||||||
load_sources,
|
load_sources,
|
||||||
load_sources_extra,
|
load_sources_extra,
|
||||||
|
load_subtitle_extensions,
|
||||||
load_video,
|
load_video,
|
||||||
load_video_extensions,
|
load_video_extensions,
|
||||||
load_win_forbidden_chars,
|
load_win_forbidden_chars,
|
||||||
@@ -28,7 +29,10 @@ _SOURCES: set[str] = load_sources() | load_sources_extra()
|
|||||||
_CODECS: set[str] = load_codecs()
|
_CODECS: set[str] = load_codecs()
|
||||||
_VIDEO_EXTENSIONS: set[str] = load_video_extensions()
|
_VIDEO_EXTENSIONS: set[str] = load_video_extensions()
|
||||||
_NON_VIDEO_EXTENSIONS: set[str] = load_non_video_extensions()
|
_NON_VIDEO_EXTENSIONS: set[str] = load_non_video_extensions()
|
||||||
_METADATA_EXTENSIONS: set[str] = load_metadata_extensions()
|
_SUBTITLE_EXTENSIONS: set[str] = load_subtitle_extensions()
|
||||||
|
# Both metadata and subtitle extensions are ignored when deciding the media
|
||||||
|
# type of a folder — neither is a conclusive signal for movie/tv/other.
|
||||||
|
_METADATA_EXTENSIONS: set[str] = load_metadata_extensions() | _SUBTITLE_EXTENSIONS
|
||||||
_FORBIDDEN_CHARS: set[str] = load_forbidden_chars()
|
_FORBIDDEN_CHARS: set[str] = load_forbidden_chars()
|
||||||
_LANGUAGE_TOKENS: set[str] = load_language_tokens()
|
_LANGUAGE_TOKENS: set[str] = load_language_tokens()
|
||||||
_AUDIO: dict = load_audio()
|
_AUDIO: dict = load_audio()
|
||||||
@@ -88,17 +92,13 @@ class ParsedRelease:
|
|||||||
None # site watermark stripped from name, e.g. "TGx", "OxTorrent.vc"
|
None # site watermark stripped from name, e.g. "TGx", "OxTorrent.vc"
|
||||||
)
|
)
|
||||||
parse_path: str = "direct" # "direct" | "sanitized" | "ai"
|
parse_path: str = "direct" # "direct" | "sanitized" | "ai"
|
||||||
languages: list[str] = None # ["MULTI", "VFF"], ["FRENCH"], …
|
languages: list[str] = field(default_factory=list) # ["MULTI", "VFF"], ["FRENCH"], …
|
||||||
audio_codec: str | None = None # "DTS-HD.MA", "DDP", "EAC3", …
|
audio_codec: str | None = None # "DTS-HD.MA", "DDP", "EAC3", …
|
||||||
audio_channels: str | None = None # "5.1", "7.1", "2.0", …
|
audio_channels: str | None = None # "5.1", "7.1", "2.0", …
|
||||||
bit_depth: str | None = None # "10bit", "8bit", …
|
bit_depth: str | None = None # "10bit", "8bit", …
|
||||||
hdr_format: str | None = None # "DV", "HDR10", "DV.HDR10", …
|
hdr_format: str | None = None # "DV", "HDR10", "DV.HDR10", …
|
||||||
edition: str | None = None # "UNRATED", "EXTENDED", "DIRECTORS.CUT", …
|
edition: str | None = None # "UNRATED", "EXTENDED", "DIRECTORS.CUT", …
|
||||||
|
|
||||||
def __post_init__(self):
|
|
||||||
if self.languages is None:
|
|
||||||
object.__setattr__(self, "languages", [])
|
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def is_season_pack(self) -> bool:
|
def is_season_pack(self) -> bool:
|
||||||
return self.season is not None and self.episode is None
|
return self.season is not None and self.episode is None
|
||||||
|
|||||||
@@ -210,3 +210,18 @@ class Language:
|
|||||||
|
|
||||||
def __repr__(self) -> str:
|
def __repr__(self) -> str:
|
||||||
return f"Language({self.iso!r}, {self.english_name!r})"
|
return f"Language({self.iso!r}, {self.english_name!r})"
|
||||||
|
|
||||||
|
|
||||||
|
# Characters allowed in dot-separated folder/filename forms:
|
||||||
|
# alphanumerics, underscores, spaces (about to be replaced with dots),
|
||||||
|
# literal dots, and hyphens. Everything else is stripped.
|
||||||
|
_FS_SAFE_CHARS = re.compile(r"[^\w\s\.\-]")
|
||||||
|
|
||||||
|
|
||||||
|
def to_dot_folder_name(title: str) -> str:
|
||||||
|
"""Sanitize ``title`` for filesystem use and convert spaces to dots.
|
||||||
|
|
||||||
|
Produces e.g. ``Breaking.Bad`` from ``"Breaking Bad"`` or
|
||||||
|
``Spider.Man`` from ``"Spider-Man: No Way Home"``.
|
||||||
|
"""
|
||||||
|
return _FS_SAFE_CHARS.sub("", title).replace(" ", ".")
|
||||||
|
|||||||
@@ -1,20 +1,26 @@
|
|||||||
"""TV Show domain entities.
|
"""TV Show domain entities.
|
||||||
|
|
||||||
This module implements the TVShow aggregate following DDD principles:
|
This module implements the TVShow aggregate following DDD principles.
|
||||||
|
|
||||||
|
Aggregate ownership::
|
||||||
|
|
||||||
|
TVShow ← aggregate root (the repo returns this)
|
||||||
|
└── seasons: dict[SeasonNumber, Season]
|
||||||
|
└── Season
|
||||||
|
└── episodes: dict[EpisodeNumber, Episode]
|
||||||
|
└── Episode ← file metadata + audio/subtitle tracks
|
||||||
|
|
||||||
|
Rules:
|
||||||
|
|
||||||
* ``TVShow`` is the aggregate **root** — the only entity exposed by the
|
* ``TVShow`` is the aggregate **root** — the only entity exposed by the
|
||||||
repository. It owns its seasons (``seasons: dict[SeasonNumber, Season]``).
|
repository.
|
||||||
* ``Season`` is owned by TVShow and owns its episodes
|
* ``Season`` is owned by TVShow. ``Episode`` is owned by Season.
|
||||||
(``episodes: dict[EpisodeNumber, Episode]``).
|
* Children do not back-reference the root (no ``show_imdb_id`` on
|
||||||
* ``Episode`` is owned by Season. It carries the actual file metadata
|
Season/Episode): they are only ever reached *through* TVShow.
|
||||||
(path, size) and the discovered tracks (audio, subtitles).
|
* Mutation invariants are enforced through aggregate-root methods such as
|
||||||
|
``TVShow.add_episode()`` — never reach into ``show.seasons[...].episodes``
|
||||||
Children do not back-reference the root (no ``show_imdb_id`` on Season/Episode):
|
to mutate without going through the root, otherwise invariants are not
|
||||||
they are only ever reached through ``TVShow``.
|
guaranteed.
|
||||||
|
|
||||||
Mutation invariants are enforced through aggregate-root methods such as
|
|
||||||
``TVShow.add_episode()`` — never reach into ``show.seasons[...].episodes`` to
|
|
||||||
mutate without going through the root, otherwise invariants are not guaranteed.
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
@@ -23,7 +29,13 @@ import re
|
|||||||
from dataclasses import dataclass, field
|
from dataclasses import dataclass, field
|
||||||
|
|
||||||
from ..shared.media import AudioTrack, SubtitleTrack, track_lang_matches
|
from ..shared.media import AudioTrack, SubtitleTrack, track_lang_matches
|
||||||
from ..shared.value_objects import FilePath, FileSize, ImdbId, Language
|
from ..shared.value_objects import (
|
||||||
|
FilePath,
|
||||||
|
FileSize,
|
||||||
|
ImdbId,
|
||||||
|
Language,
|
||||||
|
to_dot_folder_name,
|
||||||
|
)
|
||||||
from .value_objects import (
|
from .value_objects import (
|
||||||
CollectionStatus,
|
CollectionStatus,
|
||||||
EpisodeNumber,
|
EpisodeNumber,
|
||||||
@@ -421,8 +433,7 @@ class TVShow:
|
|||||||
|
|
||||||
def get_folder_name(self) -> str:
|
def get_folder_name(self) -> str:
|
||||||
"""Dot-separated folder name (e.g. ``Breaking.Bad``)."""
|
"""Dot-separated folder name (e.g. ``Breaking.Bad``)."""
|
||||||
cleaned = re.sub(r"[^\w\s\.\-]", "", self.title)
|
return to_dot_folder_name(self.title)
|
||||||
return cleaned.replace(" ", ".")
|
|
||||||
|
|
||||||
def __str__(self) -> str:
|
def __str__(self) -> str:
|
||||||
return f"{self.title} ({self.status.value}, {self.seasons_count} seasons)"
|
return f"{self.title} ({self.status.value}, {self.seasons_count} seasons)"
|
||||||
|
|||||||
@@ -2,7 +2,9 @@
|
|||||||
#
|
#
|
||||||
# video — extensions that confirm a video media file
|
# video — extensions that confirm a video media file
|
||||||
# non_video — extensions that definitively exclude video content (no metadata here)
|
# non_video — extensions that definitively exclude video content (no metadata here)
|
||||||
# metadata — extensions always present alongside releases, ignored in type decision
|
# metadata — release metadata (.nfo, .txt, …), ignored in type decision
|
||||||
|
# subtitle — subtitle sidecar formats, ignored in type decision but central
|
||||||
|
# to the subtitles domain (loaded as a distinct set)
|
||||||
|
|
||||||
video:
|
video:
|
||||||
- .mkv
|
- .mkv
|
||||||
@@ -57,6 +59,10 @@ metadata:
|
|||||||
- .md5
|
- .md5
|
||||||
- .jpg
|
- .jpg
|
||||||
- .png
|
- .png
|
||||||
|
|
||||||
|
subtitle:
|
||||||
|
# Subtitle sidecar formats — also ignored in type detection,
|
||||||
|
# but kept distinct because they're central to the subtitles domain.
|
||||||
- .srt
|
- .srt
|
||||||
- .sub
|
- .sub
|
||||||
- .idx
|
- .idx
|
||||||
|
|||||||
Reference in New Issue
Block a user