Merge branch 'dev/sprint-cleanup'

Multi-week sprint: ISO 639-2/B language unification, release parser
unification + data-driven tokenizer, removal of fossil services
(movies/tv_shows/subtitles), subtitle services split into a package,
MediaInfo split, test suite expansion (990 passing).

See CHANGELOG.md [Unreleased] for the user-facing summary.
This commit is contained in:
2026-05-17 23:42:05 +02:00
104 changed files with 8930 additions and 6560 deletions
+2
View File
@@ -59,6 +59,8 @@ Thumbs.db
# Backup files
*.backup
*.bak
env_backup/
# Application data dir
data/*
+1 -3
View File
@@ -192,9 +192,7 @@ class Agent:
if cache_key_value is not None:
cached = memory.stm.tool_results.get(tool_name, cache_key_value)
if cached is not None:
logger.info(
f"Tool cache HIT: {tool_name}[{cache_key_value}]"
)
logger.info(f"Tool cache HIT: {tool_name}[{cache_key_value}]")
self._post_tool_side_effects(tool_name, args, cached, from_cache=True)
return {**cached, "_from_cache": True}
+4 -2
View File
@@ -6,7 +6,8 @@ from typing import Any
import requests
from requests.exceptions import HTTPError, RequestException, Timeout
from alfred.settings import Settings, settings
from alfred.settings import Settings
from alfred.settings import settings as default_settings
from .exceptions import LLMAPIError, LLMConfigurationError
@@ -36,6 +37,7 @@ class DeepSeekClient:
Raises:
LLMConfigurationError: If API key is missing
"""
self.settings = settings or default_settings
self.api_key = api_key or self.settings.deepseek_api_key
self.base_url = base_url or self.settings.deepseek_base_url
self.model = model or self.settings.deepseek_model
@@ -96,7 +98,7 @@ class DeepSeekClient:
payload = {
"model": self.model,
"messages": messages,
"temperature": settings.llm_temperature,
"temperature": self.settings.llm_temperature,
}
# Add tools if provided
+6 -4
View File
@@ -7,6 +7,7 @@ import requests
from requests.exceptions import HTTPError, RequestException, Timeout
from alfred.settings import Settings
from alfred.settings import settings as default_settings
from .exceptions import LLMAPIError, LLMConfigurationError
@@ -46,11 +47,12 @@ class OllamaClient:
Raises:
LLMConfigurationError: If configuration is invalid
"""
self.base_url = base_url or settings.ollama_base_url
self.model = model or settings.ollama_model
self.timeout = timeout or settings.request_timeout
self.settings = settings or default_settings
self.base_url = base_url or self.settings.ollama_base_url
self.model = model or self.settings.ollama_model
self.timeout = timeout or self.settings.request_timeout
self.temperature = (
temperature if temperature is not None else settings.llm_temperature
temperature if temperature is not None else self.settings.llm_temperature
)
if not self.base_url:
+3 -1
View File
@@ -165,7 +165,9 @@ EXPRESSIONS À UTILISER (une par situation, naturellement intégrées dans ta r
lines.append(" Steps:")
for step in steps:
step_id = step.get("id", "?")
step_tool = step.get("tool") or ("ask_user" if step.get("ask_user") else "")
step_tool = step.get("tool") or (
"ask_user" if step.get("ask_user") else ""
)
lines.append(f" - {step_id} ({step_tool})")
lines.append(" Call end_workflow(reason) when done, cancelled, or off-topic.")
return "\n".join(lines)
+15 -10
View File
@@ -27,9 +27,9 @@ class ToolSpecError(ValueError):
class ParameterSpec:
"""Semantic description of a single tool parameter."""
description: str # Short: what the value represents.
why_needed: str # Why the tool needs this — drives LLM reasoning.
example: str | None = None # Concrete example value, shown to the LLM.
description: str # Short: what the value represents.
why_needed: str # Why the tool needs this — drives LLM reasoning.
example: str | None = None # Concrete example value, shown to the LLM.
@classmethod
def from_dict(cls, name: str, data: dict) -> ParameterSpec:
@@ -38,7 +38,9 @@ class ParameterSpec:
return cls(
description=str(data["description"]).strip(),
why_needed=str(data["why_needed"]).strip(),
example=str(data["example"]).strip() if data.get("example") is not None else None,
example=str(data["example"]).strip()
if data.get("example") is not None
else None,
)
@@ -54,7 +56,9 @@ class ReturnsSpec:
_require(data, "description", f"returns.{key}")
fields = data.get("fields") or {}
if not isinstance(fields, dict):
raise ToolSpecError(f"returns.{key}.fields must be a dict, got {type(fields).__name__}")
raise ToolSpecError(
f"returns.{key}.fields must be a dict, got {type(fields).__name__}"
)
return cls(
description=str(data["description"]).strip(),
fields={str(k): str(v).strip() for k, v in fields.items()},
@@ -78,14 +82,14 @@ class ToolSpec:
"""Full semantic spec for one tool."""
name: str
summary: str # One-liner — becomes Tool.description.
description: str # Longer paragraph.
summary: str # One-liner — becomes Tool.description.
description: str # Longer paragraph.
when_to_use: str
when_not_to_use: str | None
next_steps: str | None
parameters: dict[str, ParameterSpec] # name -> ParameterSpec
returns: dict[str, ReturnsSpec] # status_key -> ReturnsSpec
cache: CacheSpec | None = None # If present, tool is cached.
parameters: dict[str, ParameterSpec] # name -> ParameterSpec
returns: dict[str, ReturnsSpec] # status_key -> ReturnsSpec
cache: CacheSpec | None = None # If present, tool is cached.
@classmethod
def from_yaml_path(cls, path: Path) -> ToolSpec:
@@ -200,6 +204,7 @@ class ToolSpec:
# Helpers
# ---------------------------------------------------------------------------
def _require(data: dict, key: str, where: str) -> None:
if data.get(key) is None or (isinstance(data[key], str) and not data[key].strip()):
raise ToolSpecError(f"{where}: missing required field '{key}'")
+19 -1
View File
@@ -37,6 +37,21 @@ logger.info(f"Memory context initialized (path: {memory_path})")
llm_provider = settings.default_llm_provider.lower()
class _UnconfiguredLLM:
"""Placeholder LLM used when no provider could be configured at import time.
Importing the FastAPI app must not fail just because credentials are
absent (e.g. during test collection). Any actual call surfaces a clear
503 error at request time via the handlers below.
"""
def __init__(self, reason: str):
self.reason = reason
def complete(self, *args, **kwargs):
raise LLMAPIError(f"LLM is not configured: {self.reason}")
try:
if llm_provider == "local":
logger.info("Using local Ollama LLM")
@@ -49,8 +64,11 @@ try:
else:
raise ValueError(f"Unknown LLM provider: {llm_provider}")
except LLMConfigurationError as e:
# Degrade gracefully: keep the app importable so tests can patch agent.step
# and so missing credentials surface as a 503 at the endpoint, not as an
# import error.
logger.error(f"Failed to initialize LLM: {e}")
raise
llm = _UnconfiguredLLM(str(e))
# Initialize agent
agent = Agent(
@@ -3,7 +3,7 @@
from __future__ import annotations
from alfred.domain.release.value_objects import ParsedRelease
from alfred.domain.shared.media_info import MediaInfo
from alfred.domain.shared.media import MediaInfo
# Map ffprobe codec names to scene-style codec tokens
_VIDEO_CODEC_MAP = {
@@ -4,7 +4,7 @@ import logging
from pathlib import Path
from alfred.domain.shared.value_objects import ImdbId
from alfred.domain.subtitles.entities import SubtitleTrack
from alfred.domain.subtitles.entities import SubtitleCandidate
from alfred.domain.subtitles.knowledge.base import SubtitleKnowledgeBase
from alfred.domain.subtitles.knowledge.loader import KnowledgeLoader
from alfred.domain.subtitles.services.identifier import SubtitleIdentifier
@@ -264,7 +264,7 @@ class ManageSubtitlesUseCase:
def _to_unresolved_dto(
track: SubtitleTrack, min_confidence: float = 0.7
track: SubtitleCandidate, min_confidence: float = 0.7
) -> UnresolvedTrack:
reason = "unknown_language" if track.language is None else "low_confidence"
return UnresolvedTrack(
@@ -277,10 +277,10 @@ def _to_unresolved_dto(
def _pair_placed_with_tracks(
placed: list[PlacedTrack],
tracks: list[SubtitleTrack],
) -> list[tuple[PlacedTrack, SubtitleTrack]]:
tracks: list[SubtitleCandidate],
) -> list[tuple[PlacedTrack, SubtitleCandidate]]:
"""
Pair each PlacedTrack with its originating SubtitleTrack by source path.
Pair each PlacedTrack with its originating SubtitleCandidate by source path.
Falls back to positional matching if paths don't align.
"""
track_by_path = {t.file_path: t for t in tracks if t.file_path}
@@ -29,7 +29,9 @@ def _sanitize(text: str) -> str:
return _WIN_FORBIDDEN.sub("", text)
def _find_existing_tvshow_folders(tv_root: Path, tmdb_title: str, tmdb_year: int) -> list[str]:
def _find_existing_tvshow_folders(
tv_root: Path, tmdb_title: str, tmdb_year: int
) -> list[str]:
"""Return folder names in tv_root that match title + year prefix."""
if not tv_root.exists():
return []
@@ -52,9 +54,11 @@ def _get_tv_root() -> Path | None:
# Internal sentinel + series-folder resolver (shared by the 3 TV use cases)
# ---------------------------------------------------------------------------
@dataclass
class _Clarification:
"""Module-private sentinel signalling that user input is needed."""
question: str
options: list[str]
@@ -99,6 +103,7 @@ def _resolve_series_folder(
# DTOs
# ---------------------------------------------------------------------------
@dataclass
class _ResolvedDestinationBase:
"""
@@ -109,7 +114,7 @@ class _ResolvedDestinationBase:
and a to_dict() that delegates the non-ok cases via _base_dict().
"""
status: str # "ok" | "needs_clarification" | "error"
status: str # "ok" | "needs_clarification" | "error"
# needs_clarification
question: str | None = None
@@ -124,7 +129,11 @@ class _ResolvedDestinationBase:
if self.status == "error":
return {"status": self.status, "error": self.error, "message": self.message}
if self.status == "needs_clarification":
return {"status": self.status, "question": self.question, "options": self.options or []}
return {
"status": self.status,
"question": self.question,
"options": self.options or [],
}
return None
@@ -155,7 +164,7 @@ class ResolvedEpisodeDestination(_ResolvedDestinationBase):
series_folder: str | None = None
season_folder: str | None = None
library_file: str | None = None # full path to destination .mkv
library_file: str | None = None # full path to destination .mkv
series_folder_name: str | None = None
season_folder_name: str | None = None
filename: str | None = None
@@ -216,6 +225,7 @@ class ResolvedSeriesDestination(_ResolvedDestinationBase):
# Use cases
# ---------------------------------------------------------------------------
def resolve_season_destination(
release_name: str,
tmdb_title: str,
@@ -231,14 +241,17 @@ def resolve_season_destination(
tv_root = _get_tv_root()
if not tv_root:
return ResolvedSeasonDestination(
status="error", error="library_not_set",
status="error",
error="library_not_set",
message="TV show library path is not configured.",
)
parsed = parse_release(release_name)
computed_name = _sanitize(parsed.show_folder_name(tmdb_title, tmdb_year))
resolved = _resolve_series_folder(tv_root, tmdb_title, tmdb_year, computed_name, confirmed_folder)
resolved = _resolve_series_folder(
tv_root, tmdb_title, tmdb_year, computed_name, confirmed_folder
)
if isinstance(resolved, _Clarification):
return ResolvedSeasonDestination(
status="needs_clarification",
@@ -277,7 +290,8 @@ def resolve_episode_destination(
tv_root = _get_tv_root()
if not tv_root:
return ResolvedEpisodeDestination(
status="error", error="library_not_set",
status="error",
error="library_not_set",
message="TV show library path is not configured.",
)
@@ -285,7 +299,9 @@ def resolve_episode_destination(
ext = Path(source_file).suffix
computed_name = _sanitize(parsed.show_folder_name(tmdb_title, tmdb_year))
resolved = _resolve_series_folder(tv_root, tmdb_title, tmdb_year, computed_name, confirmed_folder)
resolved = _resolve_series_folder(
tv_root, tmdb_title, tmdb_year, computed_name, confirmed_folder
)
if isinstance(resolved, _Clarification):
return ResolvedEpisodeDestination(
status="needs_clarification",
@@ -328,7 +344,8 @@ def resolve_movie_destination(
movies_root = memory.ltm.library_paths.get("movie")
if not movies_root:
return ResolvedMovieDestination(
status="error", error="library_not_set",
status="error",
error="library_not_set",
message="Movie library path is not configured.",
)
@@ -365,14 +382,17 @@ def resolve_series_destination(
tv_root = _get_tv_root()
if not tv_root:
return ResolvedSeriesDestination(
status="error", error="library_not_set",
status="error",
error="library_not_set",
message="TV show library path is not configured.",
)
parsed = parse_release(release_name)
computed_name = _sanitize(parsed.show_folder_name(tmdb_title, tmdb_year))
resolved = _resolve_series_folder(tv_root, tmdb_title, tmdb_year, computed_name, confirmed_folder)
resolved = _resolve_series_folder(
tv_root, tmdb_title, tmdb_year, computed_name, confirmed_folder
)
if isinstance(resolved, _Clarification):
return ResolvedSeriesDestination(
status="needs_clarification",
-2
View File
@@ -2,7 +2,6 @@
from .entities import Movie
from .exceptions import InvalidMovieData, MovieNotFound
from .services import MovieService
from .value_objects import MovieTitle, Quality, ReleaseYear
__all__ = [
@@ -12,5 +11,4 @@ __all__ = [
"Quality",
"MovieNotFound",
"InvalidMovieData",
"MovieService",
]
+48 -3
View File
@@ -3,16 +3,23 @@
from dataclasses import dataclass, field
from datetime import datetime
from ..shared.value_objects import FilePath, FileSize, ImdbId
from ..shared.media import AudioTrack, SubtitleTrack, track_lang_matches
from ..shared.value_objects import FilePath, FileSize, ImdbId, Language
from .value_objects import MovieTitle, Quality, ReleaseYear
@dataclass
class Movie:
"""
Movie entity representing a movie in the media library.
Movie aggregate root for the movies domain.
This is the main aggregate root for the movies domain.
Carries file metadata (path, size) and the tracks discovered by the
ffprobe + subtitle scan pipeline. The track lists may be empty when the
movie is known but not yet scanned, or when no file is downloaded.
Track helpers follow the same "C+" contract as ``Episode``: pass a
``Language`` for cross-format matching, or a ``str`` for case-insensitive
direct comparison.
"""
imdb_id: ImdbId
@@ -23,6 +30,8 @@ class Movie:
file_size: FileSize | None = None
tmdb_id: int | None = None
added_at: datetime = field(default_factory=datetime.now)
audio_tracks: list[AudioTrack] = field(default_factory=list)
subtitle_tracks: list[SubtitleTrack] = field(default_factory=list)
def __post_init__(self):
"""Validate movie entity."""
@@ -52,6 +61,42 @@ class Movie:
"""Check if the movie is downloaded (has a file)."""
return self.has_file()
# ── Audio helpers ──────────────────────────────────────────────────────
def has_audio_in(self, lang: str | Language) -> bool:
"""True if at least one audio track is in the given language."""
return any(track_lang_matches(t.language, lang) for t in self.audio_tracks)
def audio_languages(self) -> list[str]:
"""Unique audio languages across all tracks, in track order."""
seen: set[str] = set()
result: list[str] = []
for t in self.audio_tracks:
if t.language and t.language not in seen:
seen.add(t.language)
result.append(t.language)
return result
# ── Subtitle helpers ───────────────────────────────────────────────────
def has_subtitles_in(self, lang: str | Language) -> bool:
"""True if at least one subtitle track is in the given language."""
return any(track_lang_matches(t.language, lang) for t in self.subtitle_tracks)
def has_forced_subs(self) -> bool:
"""True if at least one subtitle track is flagged as forced."""
return any(t.is_forced for t in self.subtitle_tracks)
def subtitle_languages(self) -> list[str]:
"""Unique subtitle languages across all tracks, in track order."""
seen: set[str] = set()
result: list[str] = []
for t in self.subtitle_tracks:
if t.language and t.language not in seen:
seen.add(t.language)
result.append(t.language)
return result
def get_folder_name(self) -> str:
"""
Get the folder name for this movie.
-192
View File
@@ -1,192 +0,0 @@
"""Movie domain services - Business logic."""
import logging
import re
from ..shared.value_objects import FilePath, ImdbId
from .entities import Movie
from .exceptions import MovieAlreadyExists, MovieNotFound
from .repositories import MovieRepository
from .value_objects import Quality
logger = logging.getLogger(__name__)
class MovieService:
"""
Domain service for movie-related business logic.
This service contains business rules that don't naturally fit
within a single entity.
"""
def __init__(self, repository: MovieRepository):
"""
Initialize movie service.
Args:
repository: Movie repository for persistence
"""
self.repository = repository
def add_movie(self, movie: Movie) -> None:
"""
Add a new movie to the library.
Args:
movie: Movie entity to add
Raises:
MovieAlreadyExists: If movie with same IMDb ID already exists
"""
if self.repository.exists(movie.imdb_id):
raise MovieAlreadyExists(
f"Movie with IMDb ID {movie.imdb_id} already exists"
)
self.repository.save(movie)
logger.info(f"Added movie: {movie.title.value} ({movie.imdb_id})")
def get_movie(self, imdb_id: ImdbId) -> Movie:
"""
Get a movie by IMDb ID.
Args:
imdb_id: IMDb ID of the movie
Returns:
Movie entity
Raises:
MovieNotFound: If movie not found
"""
movie = self.repository.find_by_imdb_id(imdb_id)
if not movie:
raise MovieNotFound(f"Movie with IMDb ID {imdb_id} not found")
return movie
def get_all_movies(self) -> list[Movie]:
"""
Get all movies in the library.
Returns:
List of all movies
"""
return self.repository.find_all()
def update_movie(self, movie: Movie) -> None:
"""
Update an existing movie.
Args:
movie: Movie entity with updated data
Raises:
MovieNotFound: If movie doesn't exist
"""
if not self.repository.exists(movie.imdb_id):
raise MovieNotFound(f"Movie with IMDb ID {movie.imdb_id} not found")
self.repository.save(movie)
logger.info(f"Updated movie: {movie.title.value} ({movie.imdb_id})")
def remove_movie(self, imdb_id: ImdbId) -> None:
"""
Remove a movie from the library.
Args:
imdb_id: IMDb ID of the movie to remove
Raises:
MovieNotFound: If movie not found
"""
if not self.repository.delete(imdb_id):
raise MovieNotFound(f"Movie with IMDb ID {imdb_id} not found")
logger.info(f"Removed movie with IMDb ID: {imdb_id}")
def detect_quality_from_filename(self, filename: str) -> Quality:
"""
Detect video quality from filename.
Args:
filename: Filename to analyze
Returns:
Detected quality or UNKNOWN
"""
filename_lower = filename.lower()
# Check for quality indicators
if "2160p" in filename_lower or "4k" in filename_lower:
return Quality.UHD_4K
elif "1080p" in filename_lower:
return Quality.FULL_HD
elif "720p" in filename_lower:
return Quality.HD
elif "480p" in filename_lower:
return Quality.SD
return Quality.UNKNOWN
def extract_year_from_filename(self, filename: str) -> int | None:
"""
Extract release year from filename.
Args:
filename: Filename to analyze
Returns:
Year if found, None otherwise
"""
# Look for 4-digit year in parentheses or standalone
# Examples: "Movie (2010)", "Movie.2010.1080p"
patterns = [
r"\((\d{4})\)", # (2010)
r"\.(\d{4})\.", # .2010.
r"\s(\d{4})\s", # 2010
]
for pattern in patterns:
match = re.search(pattern, filename)
if match:
year = int(match.group(1))
# Validate year is reasonable
if 1888 <= year <= 2100:
return year
return None
def validate_movie_file(self, file_path: FilePath) -> bool:
"""
Validate that a file is a valid movie file.
Args:
file_path: Path to the file
Returns:
True if valid movie file, False otherwise
"""
if not file_path.exists():
logger.warning(f"File does not exist: {file_path}")
return False
if not file_path.is_file():
logger.warning(f"Path is not a file: {file_path}")
return False
# Check file extension
valid_extensions = {".mkv", ".mp4", ".avi", ".mov", ".wmv", ".flv", ".webm"}
if file_path.value.suffix.lower() not in valid_extensions:
logger.warning(f"Invalid file extension: {file_path.value.suffix}")
return False
# Check file size (should be at least 100 MB for a movie)
min_size = 100 * 1024 * 1024 # 100 MB
if file_path.value.stat().st_size < min_size:
logger.warning(
f"File too small to be a movie: {file_path.value.stat().st_size} bytes"
)
return False
return True
+12
View File
@@ -122,3 +122,15 @@ def load_hdr_extra() -> set[str]:
def load_media_type_tokens() -> dict:
"""Site-specific media type tokens (doc, concert, collection, integrale)."""
return _load_sites().get("media_type_tokens", {})
def load_separators() -> list[str]:
"""Single-char token separators used by the release name tokenizer.
Always includes the canonical "." even if absent from YAML, to prevent a
misconfigured file from breaking the parser entirely.
"""
seps = _load("separators.yaml").get("separators", []) or []
if "." not in seps:
seps = [".", *seps]
return seps
+75 -67
View File
@@ -2,6 +2,9 @@
from __future__ import annotations
import re
from .knowledge import load_separators
from .value_objects import (
_AUDIO,
_CODECS,
@@ -17,42 +20,53 @@ from .value_objects import (
)
def _tokenize(name: str) -> list[str]:
"""Split a release name on the configured separators, dropping empty tokens."""
pattern = "[" + re.escape("".join(load_separators())) + "]+"
return [t for t in re.split(pattern, name) if t]
def parse_release(name: str) -> ParsedRelease:
"""
Parse a release name and return a ParsedRelease.
Well-formed names (no forbidden chars) go through full token-level parsing.
Malformed names go through _sanitize() — strip site tags, replace spaces —
then re-checked. Still malformed after sanitization → media_type="unknown", AI handles it.
Flow:
1. Strip a leading/trailing [site.tag] if present (sets parse_path="sanitized").
2. Check the remainder for truly forbidden chars (anything not in the
configured separators list). If any remain → media_type="unknown",
parse_path="ai", and the LLM handles it.
3. Tokenize using the configured separators (".", " ", "[", "]", "(", ")", "_", ...)
and run token-level matchers (season/episode, tech, languages, audio,
video, edition, title, year).
"""
site_tag = None
parse_path = "direct"
if not _is_well_formed(name):
clean, site_tag = _sanitize(name)
if not _is_well_formed(clean):
return ParsedRelease(
raw=name,
normalised=clean,
title=clean,
year=None,
season=None,
episode=None,
episode_end=None,
quality=None,
source=None,
codec=None,
group="UNKNOWN",
tech_string="",
media_type="unknown",
site_tag=site_tag,
parse_path="ai",
)
name = clean
# Always try to extract a bracket-enclosed site tag first.
clean, site_tag = _strip_site_tag(name)
if site_tag is not None:
parse_path = "sanitized"
tokens = name.split(".")
if not _is_well_formed(clean):
return ParsedRelease(
raw=name,
normalised=clean,
title=clean,
year=None,
season=None,
episode=None,
episode_end=None,
quality=None,
source=None,
codec=None,
group="UNKNOWN",
tech_string="",
media_type="unknown",
site_tag=site_tag,
parse_path="ai",
)
name = clean
tokens = _tokenize(name)
season, episode, episode_end = _extract_season_episode(tokens)
quality, source, codec, group, tech_tokens = _extract_tech(tokens)
@@ -139,23 +153,14 @@ def _infer_media_type(
def _is_well_formed(name: str) -> bool:
"""Return True if name contains no forbidden characters per scene naming rules."""
return not any(c in name for c in _FORBIDDEN_CHARS)
"""Return True if name contains no forbidden characters per scene naming rules.
def _sanitize(name: str) -> tuple[str, str | None]:
Characters listed as token separators (spaces, brackets, parens, …) are NOT
considered malforming — the tokenizer handles them. Only truly broken chars
like '@', '#', '!', '%' make a name malformed.
"""
Attempt to recover a malformed release name.
Steps (in order):
1. Strip site tag prefix/suffix [...]
2. Replace spaces with dots
Returns (clean_name, site_tag).
"""
s, site_tag = _strip_site_tag(name)
s = s.replace(" ", ".")
return s, site_tag
tokenizable = set(load_separators())
return not any(c in name for c in _FORBIDDEN_CHARS if c not in tokenizable)
def _strip_site_tag(name: str) -> tuple[str, str | None]:
@@ -190,43 +195,46 @@ def _strip_site_tag(name: str) -> tuple[str, str | None]:
return s, None
def _normalize(name: str) -> str:
"""Replace spaces with dots, collapse multiple dots."""
s = name.replace(" ", ".")
while ".." in s:
s = s.replace("..", ".")
return s.strip(".")
def _parse_season_episode(tok: str) -> tuple[int, int | None, int | None] | None:
"""
Parse a single token as a season/episode marker.
Handles: S03, S03E01, S03E01E02
Handles:
- SxxExx / SxxExxExx / Sxx (canonical scene form)
- NxNN / NxNNxNN (alt form: 1x05, 12x07x08)
Returns (season, episode, episode_end) or None if not a season token.
"""
upper = tok.upper()
if not (len(upper) >= 3 and upper[0] == "S" and upper[1:3].isdigit()):
return None
season = int(upper[1:3])
rest = upper[3:] # everything after Sxx
# SxxExx form
if len(upper) >= 3 and upper[0] == "S" and upper[1:3].isdigit():
season = int(upper[1:3])
rest = upper[3:]
if not rest:
return season, None, None
if not rest:
return season, None, None
# Parse one or two Exx segments
episodes: list[int] = []
while rest.startswith("E") and len(rest) >= 3 and rest[1:3].isdigit():
episodes.append(int(rest[1:3]))
rest = rest[3:]
episodes: list[int] = []
while rest.startswith("E") and len(rest) >= 3 and rest[1:3].isdigit():
episodes.append(int(rest[1:3]))
rest = rest[3:]
if not episodes:
return None # malformed token like "S03XYZ"
if not episodes:
return None # malformed token like "S03XYZ"
episode = episodes[0]
episode_end = episodes[1] if len(episodes) >= 2 else None
return season, episode, episode_end
return season, episodes[0], episodes[1] if len(episodes) >= 2 else None
# NxNN form — split on "X" (uppercased), all parts must be digits
if "X" in upper:
parts = upper.split("X")
if len(parts) >= 2 and all(p.isdigit() and p for p in parts):
season = int(parts[0])
episode = int(parts[1])
episode_end = int(parts[2]) if len(parts) >= 3 else None
return season, episode, episode_end
return None
def _extract_season_episode(
+2 -1
View File
@@ -1,7 +1,7 @@
"""Shared kernel - Common domain concepts used across subdomains."""
from .exceptions import DomainException, ValidationError
from .value_objects import FilePath, FileSize, ImdbId
from .value_objects import FilePath, FileSize, ImdbId, Language
__all__ = [
"DomainException",
@@ -9,4 +9,5 @@ __all__ = [
"ImdbId",
"FilePath",
"FileSize",
"Language",
]
@@ -0,0 +1,5 @@
"""Shared knowledge loaders (cross-domain)."""
from .language_registry import LanguageRegistry
__all__ = ["LanguageRegistry"]
@@ -0,0 +1,129 @@
"""LanguageRegistry — loads and queries the canonical language table from YAML.
Builtin entries live in ``alfred/knowledge/iso_languages.yaml`` (versioned).
Learned entries can be added to ``data/knowledge/iso_languages_learned.yaml``
(gitignored, instance-local) and are merged additively — they extend builtin
languages or add new ones, never remove builtin entries.
"""
import logging
from pathlib import Path
import yaml
from ..value_objects import Language
logger = logging.getLogger(__name__)
import alfred as _alfred_pkg
_BUILTIN_ROOT = Path(_alfred_pkg.__file__).parent / "knowledge"
_LEARNED_ROOT = Path(_alfred_pkg.__file__).parent.parent / "data" / "knowledge"
def _load_yaml(path: Path) -> dict:
try:
with open(path, encoding="utf-8") as f:
return yaml.safe_load(f) or {}
except FileNotFoundError:
return {}
except Exception as e:
logger.warning(f"LanguageRegistry: could not load {path}: {e}")
return {}
def _merge_language_entries(base: dict, override: dict) -> dict:
"""
Merge learned language entries into builtin entries.
For each language iso, aliases lists are extended (deduped, order preserved);
scalar fields in override win over base.
"""
result = dict(base)
for iso, override_entry in override.items():
if iso not in result:
result[iso] = override_entry
continue
merged = dict(result[iso])
for key, val in override_entry.items():
if key == "aliases" and isinstance(val, list):
existing = merged.get("aliases", []) or []
merged["aliases"] = existing + [v for v in val if v not in existing]
else:
merged[key] = val
result[iso] = merged
return result
class LanguageRegistry:
"""
Loads the canonical language table and provides lookup methods.
Usage::
registry = LanguageRegistry()
fr = registry.from_iso("fra")
fr2 = registry.from_any("French") # → same Language as `fr`
fr3 = registry.from_any("fr") # → same Language
fr4 = registry.from_any("vostfr") # → None (vostfr is subtitle-specific,
# lives in subtitles knowledge)
"""
def __init__(self) -> None:
self._by_iso: dict[str, Language] = {}
self._lookup: dict[str, Language] = {} # any-form → Language
self._load()
def _load(self) -> None:
builtin = (
_load_yaml(_BUILTIN_ROOT / "iso_languages.yaml").get("languages", {}) or {}
)
learned = (
_load_yaml(_LEARNED_ROOT / "iso_languages_learned.yaml").get(
"languages", {}
)
or {}
)
merged = _merge_language_entries(builtin, learned)
for iso, entry in merged.items():
language = Language(
iso=iso,
english_name=entry.get("english_name", iso),
native_name=entry.get("native_name", iso),
aliases=tuple(entry.get("aliases", []) or []),
)
self._by_iso[language.iso] = language
# Build the flat lookup table for from_any
self._lookup[language.iso] = language
self._lookup[language.english_name.lower()] = language
self._lookup[language.native_name.lower()] = language
for alias in language.aliases:
self._lookup[alias] = language
logger.info(f"LanguageRegistry: {len(self._by_iso)} languages loaded")
def from_iso(self, code: str) -> Language | None:
"""Look up by canonical 639-2/T code (case-insensitive)."""
if not isinstance(code, str):
return None
return self._by_iso.get(code.lower().strip())
def from_any(self, raw: str) -> Language | None:
"""
Look up by any known representation: iso code, 639-1, 639-2/B variant,
english name, native name, or any registered alias. Case-insensitive.
"""
if not isinstance(raw, str):
return None
return self._lookup.get(raw.lower().strip())
def all(self) -> list[Language]:
"""Return all known languages, in load order."""
return list(self._by_iso.values())
def __contains__(self, raw: str) -> bool:
return self.from_any(raw) is not None
def __len__(self) -> int:
return len(self._by_iso)
+19
View File
@@ -0,0 +1,19 @@
"""Media — file-level track types (video/audio/subtitle) and MediaInfo container.
These are the **container-view** dataclasses, populated from ffprobe output and
used across the project to describe the content of a media file.
"""
from .audio import AudioTrack
from .info import MediaInfo
from .matching import track_lang_matches
from .subtitle import SubtitleTrack
from .video import VideoTrack
__all__ = [
"AudioTrack",
"MediaInfo",
"SubtitleTrack",
"VideoTrack",
"track_lang_matches",
]
+17
View File
@@ -0,0 +1,17 @@
"""AudioTrack — a single audio stream as reported by ffprobe."""
from __future__ import annotations
from dataclasses import dataclass
@dataclass
class AudioTrack:
"""A single audio track as reported by ffprobe."""
index: int
codec: str | None # aac, ac3, eac3, dts, truehd, flac, …
channels: int | None # 2, 6 (5.1), 8 (7.1), …
channel_layout: str | None # stereo, 5.1, 7.1, …
language: str | None # ISO 639-2: fre, eng, und, …
is_default: bool = False
+76
View File
@@ -0,0 +1,76 @@
"""MediaInfo — assembles video, audio and subtitle tracks for a media file."""
from __future__ import annotations
from dataclasses import dataclass, field
from .audio import AudioTrack
from .subtitle import SubtitleTrack
from .video import VideoTrack
@dataclass
class MediaInfo:
"""
File-level media metadata extracted by ffprobe.
Symmetric design: every stream type is a list of typed track objects.
Backwards-compatible flat accessors (``resolution``, ``width``, …) read
from the first video track when present.
"""
video_tracks: list[VideoTrack] = field(default_factory=list)
audio_tracks: list[AudioTrack] = field(default_factory=list)
subtitle_tracks: list[SubtitleTrack] = field(default_factory=list)
# File-level (from ffprobe ``format`` block, not from any single stream)
duration_seconds: float | None = None
bitrate_kbps: int | None = None
# ──────────────────────────────────────────────────────────────────────
# Video conveniences — read the first video track
# ──────────────────────────────────────────────────────────────────────
@property
def primary_video(self) -> VideoTrack | None:
return self.video_tracks[0] if self.video_tracks else None
@property
def width(self) -> int | None:
v = self.primary_video
return v.width if v else None
@property
def height(self) -> int | None:
v = self.primary_video
return v.height if v else None
@property
def video_codec(self) -> str | None:
v = self.primary_video
return v.codec if v else None
@property
def resolution(self) -> str | None:
v = self.primary_video
return v.resolution if v else None
# ──────────────────────────────────────────────────────────────────────
# Audio conveniences
# ──────────────────────────────────────────────────────────────────────
@property
def audio_languages(self) -> list[str]:
"""Unique audio languages across all tracks (ISO 639-2)."""
seen: set[str] = set()
result: list[str] = []
for track in self.audio_tracks:
if track.language and track.language not in seen:
seen.add(track.language)
result.append(track.language)
return result
@property
def is_multi_audio(self) -> bool:
"""True if more than one audio language is present."""
return len(self.audio_languages) > 1
+33
View File
@@ -0,0 +1,33 @@
"""Language-matching helper shared by media-bearing entities.
Both ``Episode`` and ``Movie`` carry ``audio_tracks`` / ``subtitle_tracks`` and
need to answer "do I have audio in language X?". The matching contract is the
same in both cases — keep it in one place.
"""
from __future__ import annotations
from ..value_objects import Language
def track_lang_matches(track_lang: str | None, query: str | Language) -> bool:
"""
Match a track's language string against a query (contract "C+").
* ``Language`` query → matches if the track string is any known
representation of that Language (delegates to ``Language.matches``).
Powerful, cross-format mode.
* ``str`` query → case-insensitive direct comparison against
``track_lang``. Simple, no normalization, no registry lookup.
Callers needing cross-format resolution (``"fr"`` ↔ ``"fre"`` ↔
``"french"``) should resolve their string through a ``LanguageRegistry``
once and pass the resulting ``Language``.
"""
if track_lang is None:
return False
if isinstance(query, Language):
return query.matches(track_lang)
if isinstance(query, str):
return track_lang.lower().strip() == query.lower().strip()
return False
+25
View File
@@ -0,0 +1,25 @@
"""SubtitleTrack — a single embedded subtitle stream as reported by ffprobe.
This is the **container-view** representation (ffprobe output) used uniformly
across the project to describe a subtitle stream embedded in a media file.
Not to be confused with ``alfred.domain.subtitles.entities.SubtitleCandidate``
which models a subtitle being **scanned/matched** (with confidence, raw tokens,
file path, etc.). The two coexist by design — they describe the same real-world
concept seen from two different bounded contexts.
"""
from __future__ import annotations
from dataclasses import dataclass
@dataclass
class SubtitleTrack:
"""A single embedded subtitle track as reported by ffprobe."""
index: int
codec: str | None # subrip, ass, hdmv_pgs_subtitle, …
language: str | None # ISO 639-2: fre, eng, und, …
is_default: bool = False
is_forced: bool = False
+62
View File
@@ -0,0 +1,62 @@
"""VideoTrack — a single video stream as reported by ffprobe."""
from __future__ import annotations
from dataclasses import dataclass
@dataclass
class VideoTrack:
"""A single video track as reported by ffprobe.
A media file typically has one video track but can have several (alt
camera angles, attached thumbnail images reported as still-image streams,
etc.), hence the list[VideoTrack] on MediaInfo.
"""
index: int
codec: str | None # h264, hevc, av1, …
width: int | None
height: int | None
is_default: bool = False
@property
def resolution(self) -> str | None:
"""
Best-effort resolution string: 2160p, 1080p, 720p, …
Width takes priority over height to handle widescreen/cinema crops
(e.g. 1920×960 scope → 1080p, not 720p). Falls back to height when
width is unavailable.
"""
match (self.width, self.height):
case (None, None):
return None
case (w, h) if w is not None:
match True:
case _ if w >= 3840:
return "2160p"
case _ if w >= 1920:
return "1080p"
case _ if w >= 1280:
return "720p"
case _ if w >= 720:
return "576p"
case _ if w >= 640:
return "480p"
case _:
return f"{h}p" if h else f"{w}w"
case (None, h):
match True:
case _ if h >= 2160:
return "2160p"
case _ if h >= 1080:
return "1080p"
case _ if h >= 720:
return "720p"
case _ if h >= 576:
return "576p"
case _ if h >= 480:
return "480p"
case _:
return f"{h}p"
-107
View File
@@ -1,107 +0,0 @@
"""MediaInfo — pure domain dataclass for file-level media metadata."""
from __future__ import annotations
from dataclasses import dataclass, field
@dataclass
class AudioTrack:
"""A single audio track as reported by ffprobe."""
index: int
codec: str | None # aac, ac3, eac3, dts, truehd, flac, …
channels: int | None # 2, 6 (5.1), 8 (7.1), …
channel_layout: str | None # stereo, 5.1, 7.1, …
language: str | None # ISO 639-2: fre, eng, und, …
is_default: bool = False
@dataclass
class SubtitleTrack:
"""A single subtitle track as reported by ffprobe."""
index: int
codec: str | None # subrip, ass, hdmv_pgs_subtitle, …
language: str | None # ISO 639-2: fre, eng, und, …
is_default: bool = False
is_forced: bool = False
@dataclass
class MediaInfo:
"""
File-level media metadata extracted by ffprobe.
All fields are optional — ffprobe may not always report every value.
"""
# Video
width: int | None = None
height: int | None = None
video_codec: str | None = None # h264, hevc, av1, …
duration_seconds: float | None = None
bitrate_kbps: int | None = None
# Audio tracks (ordered by stream index)
audio_tracks: list[AudioTrack] = field(default_factory=list)
# Embedded subtitle tracks
subtitle_tracks: list[SubtitleTrack] = field(default_factory=list)
@property
def resolution(self) -> str | None:
"""
Best-effort resolution string: 2160p, 1080p, 720p, …
Width takes priority over height to handle widescreen/cinema crops
(e.g. 1920×960 scope → 1080p, not 720p).
Falls back to height when width is unavailable.
"""
match (self.width, self.height):
case (None, None):
return None
case (w, h) if w is not None:
match True:
case _ if w >= 3840:
return "2160p"
case _ if w >= 1920:
return "1080p"
case _ if w >= 1280:
return "720p"
case _ if w >= 720:
return "576p"
case _ if w >= 640:
return "480p"
case _:
return f"{h}p" if h else f"{w}w"
case (None, h):
match True:
case _ if h >= 2160:
return "2160p"
case _ if h >= 1080:
return "1080p"
case _ if h >= 720:
return "720p"
case _ if h >= 576:
return "576p"
case _ if h >= 480:
return "480p"
case _:
return f"{h}p"
@property
def audio_languages(self) -> list[str]:
"""Unique audio languages across all tracks (ISO 639-2)."""
seen: set[str] = set()
result = []
for track in self.audio_tracks:
if track.language and track.language not in seen:
seen.add(track.language)
result.append(track.language)
return result
@property
def is_multi_audio(self) -> bool:
"""True if more than one audio language is present."""
return len(self.audio_languages) > 1
+79
View File
@@ -131,3 +131,82 @@ class FileSize:
def __repr__(self) -> str:
return f"FileSize({self.bytes})"
@dataclass(frozen=True)
class Language:
"""
Canonical language value object.
The primary identifier is the ISO 639-2/B code (3 letters, bibliographic form,
e.g. "fre", "eng", "ger"). This is what ffprobe emits and the project-wide
canonical form. All other representations (ISO 639-1 code, ISO 639-2/T
variant, english/native names, common spellings) live in ``aliases`` and are
used by ``matches()`` for case-insensitive lookup.
Equality and hashing are based solely on ``iso`` so two Language objects with
the same canonical code are interchangeable regardless of aliases.
"""
iso: str
english_name: str
native_name: str
aliases: tuple[str, ...] = ()
def __post_init__(self):
if not isinstance(self.iso, str) or not self.iso:
raise ValidationError(
f"Language.iso must be a non-empty string, got {self.iso!r}"
)
if len(self.iso) != 3:
raise ValidationError(
f"Language.iso must be a 3-letter ISO 639-2/B code, got {self.iso!r}"
)
# Normalize iso to lowercase
object.__setattr__(self, "iso", self.iso.lower())
# Normalize aliases to a tuple of lowercase strings (dedup, preserve order)
seen: set[str] = set()
normalized: list[str] = []
for alias in self.aliases:
if not isinstance(alias, str):
continue
a = alias.lower().strip()
if a and a not in seen:
seen.add(a)
normalized.append(a)
object.__setattr__(self, "aliases", tuple(normalized))
def matches(self, raw: str) -> bool:
"""
True if ``raw`` is any known representation of this language.
Comparison is case-insensitive and whitespace-trimmed. The match space is
the union of the canonical ``iso`` code, the english/native names, and
every alias.
"""
if not isinstance(raw, str):
return False
needle = raw.lower().strip()
if not needle:
return False
if needle == self.iso:
return True
if needle == self.english_name.lower():
return True
if needle == self.native_name.lower():
return True
return needle in self.aliases
def __eq__(self, other: object) -> bool:
if not isinstance(other, Language):
return NotImplemented
return self.iso == other.iso
def __hash__(self) -> int:
return hash(self.iso)
def __str__(self) -> str:
return self.iso
def __repr__(self) -> str:
return f"Language({self.iso!r}, {self.english_name!r})"
+2 -2
View File
@@ -1,7 +1,7 @@
"""Subtitles domain — subtitle identification, classification and placement."""
from .aggregates import SubtitleRuleSet
from .entities import MediaSubtitleMetadata, SubtitleTrack
from .entities import MediaSubtitleMetadata, SubtitleCandidate
from .exceptions import SubtitleNotFound
from .knowledge import KnowledgeLoader, SubtitleKnowledgeBase
from .services import PatternDetector, SubtitleIdentifier, SubtitleMatcher
@@ -17,7 +17,7 @@ from .value_objects import (
)
__all__ = [
"SubtitleTrack",
"SubtitleCandidate",
"MediaSubtitleMetadata",
"SubtitleRuleSet",
"SubtitleKnowledgeBase",
+13 -9
View File
@@ -12,12 +12,16 @@ from .value_objects import (
@dataclass
class SubtitleTrack:
class SubtitleCandidate:
"""
A single subtitle track — either an external file or an embedded stream.
A subtitle being scanned and matched — either an external file or an embedded stream.
State can evolve: unknown → resolved after user clarification.
confidence reflects how certain we are about language + type classification.
Unlike ``alfred.domain.shared.media.SubtitleTrack`` (the pure container-view
populated from ffprobe), a SubtitleCandidate carries the **flow state** of the
subtitle matching pipeline: language/format are typed value objects that may
be ``None`` while classification is in progress, ``confidence`` reflects how
certain we are, and ``raw_tokens`` holds the filename fragments still under
analysis. State evolves: unknown → resolved after user clarification.
"""
# Classification (may be None if not yet resolved)
@@ -68,7 +72,7 @@ class SubtitleTrack:
if self.is_embedded
else str(self.file_path.name if self.file_path else "?")
)
return f"SubtitleTrack({lang}, {self.subtitle_type.value}, {fmt}, src={src}, conf={self.confidence:.2f})"
return f"SubtitleCandidate({lang}, {self.subtitle_type.value}, {fmt}, src={src}, conf={self.confidence:.2f})"
@dataclass
@@ -80,14 +84,14 @@ class MediaSubtitleMetadata:
media_id: ImdbId | None
media_type: str # "movie" | "tv_show"
embedded_tracks: list[SubtitleTrack] = field(default_factory=list)
external_tracks: list[SubtitleTrack] = field(default_factory=list)
embedded_tracks: list[SubtitleCandidate] = field(default_factory=list)
external_tracks: list[SubtitleCandidate] = field(default_factory=list)
release_group: str | None = None
detected_pattern_id: str | None = None # pattern id from knowledge base
pattern_confirmed: bool = False
@property
def all_tracks(self) -> list[SubtitleTrack]:
def all_tracks(self) -> list[SubtitleCandidate]:
return self.embedded_tracks + self.external_tracks
@property
@@ -95,5 +99,5 @@ class MediaSubtitleMetadata:
return len(self.embedded_tracks) + len(self.external_tracks)
@property
def unresolved_tracks(self) -> list[SubtitleTrack]:
def unresolved_tracks(self) -> list[SubtitleCandidate]:
return [t for t in self.external_tracks if t.language is None]
+45 -12
View File
@@ -2,6 +2,7 @@
import logging
from ...shared.knowledge.language_registry import LanguageRegistry
from ..value_objects import (
ScanStrategy,
SubtitleFormat,
@@ -24,11 +25,16 @@ class SubtitleKnowledgeBase:
without restarting.
"""
def __init__(self, loader: KnowledgeLoader | None = None):
def __init__(
self,
loader: KnowledgeLoader | None = None,
language_registry: LanguageRegistry | None = None,
):
self._loader = loader or KnowledgeLoader()
self._language_registry = language_registry or LanguageRegistry()
self._build()
def _build(self) -> None:
def _build(self) -> None: # noqa: PLR0912 — straight-line YAML projection
data = self._loader.subtitles()
self._formats: dict[str, SubtitleFormat] = {}
@@ -39,17 +45,44 @@ class SubtitleKnowledgeBase:
description=fdata.get("description", ""),
)
self._languages: dict[str, SubtitleLanguage] = {}
for code, ldata in data.get("languages", {}).items():
self._languages[code] = SubtitleLanguage(
code=code,
tokens=ldata.get("tokens", []),
)
# Languages are sourced primarily from the canonical LanguageRegistry
# (alfred/knowledge/iso_languages.yaml — ISO 639-2/B). Subtitle-specific
# tokens (VOSTFR, VF, VFF…) are merged on top from subtitles.yaml's
# ``language_tokens`` section.
subtitle_extras: dict[str, list[str]] = {
code: list(tokens or [])
for code, tokens in (data.get("language_tokens", {}) or {}).items()
}
# Build reverse token → language code map
self._languages: dict[str, SubtitleLanguage] = {}
self._lang_token_map: dict[str, str] = {}
for code, lang in self._languages.items():
for token in lang.tokens:
for language in self._language_registry.all():
tokens: list[str] = [language.iso, language.english_name.lower()]
if language.native_name.lower() not in tokens:
tokens.append(language.native_name.lower())
for alias in language.aliases:
if alias not in tokens:
tokens.append(alias)
for extra in subtitle_extras.get(language.iso, []):
if extra.lower() not in tokens:
tokens.append(extra.lower())
self._languages[language.iso] = SubtitleLanguage(
code=language.iso,
tokens=tokens,
)
for token in tokens:
self._lang_token_map[token.lower()] = language.iso
# Subtitle-specific tokens for languages NOT in the canonical registry
# are still honored: register them as a minimal SubtitleLanguage.
for code, extras in subtitle_extras.items():
if code in self._languages:
continue
tokens = [code] + [e.lower() for e in extras]
self._languages[code] = SubtitleLanguage(code=code, tokens=tokens)
for token in tokens:
self._lang_token_map[token.lower()] = code
# Build reverse token → type map
@@ -61,7 +94,7 @@ class SubtitleKnowledgeBase:
d = data.get("defaults", {})
self._default_rules = SubtitleMatchingRules(
preferred_languages=d.get("languages", ["fra", "eng"]),
preferred_languages=d.get("languages", ["fre", "eng"]),
preferred_formats=d.get("formats", ["srt"]),
allowed_types=d.get("types", ["standard", "forced"]),
format_priority=d.get("format_priority", ["srt", "ass"]),
-60
View File
@@ -1,60 +0,0 @@
"""Subtitle repository interfaces (abstract)."""
from abc import ABC, abstractmethod
from ..shared.value_objects import ImdbId
from .entities import Subtitle
from .value_objects import Language
class SubtitleRepository(ABC):
"""
Abstract repository for subtitle persistence.
This defines the interface that infrastructure implementations must follow.
"""
@abstractmethod
def save(self, subtitle: Subtitle) -> None:
"""
Save a subtitle to the repository.
Args:
subtitle: Subtitle entity to save
"""
pass
@abstractmethod
def find_by_media(
self,
media_imdb_id: ImdbId,
language: Language | None = None,
season: int | None = None,
episode: int | None = None,
) -> list[Subtitle]:
"""
Find subtitles for a media item.
Args:
media_imdb_id: IMDb ID of the media
language: Optional language filter
season: Optional season number (for TV shows)
episode: Optional episode number (for TV shows)
Returns:
List of matching subtitles
"""
pass
@abstractmethod
def delete(self, subtitle: Subtitle) -> bool:
"""
Delete a subtitle from the repository.
Args:
subtitle: Subtitle to delete
Returns:
True if deleted, False if not found
"""
pass
+47 -70
View File
@@ -3,8 +3,11 @@
Given a video file path, the scanner:
1. Looks for subtitle files in the same directory as the video.
2. Optionally also inspects a Subs/ subfolder adjacent to the video.
3. Classifies each file (language, SDH, forced) from its filename.
4. Filters according to SubtitlePreferences (languages, min_size_kb, keep_sdh, keep_forced).
3. Classifies each file (language, SDH, forced) from its filename, delegating
all token knowledge to SubtitleKnowledgeBase (which itself merges
LanguageRegistry + subtitle-specific tokens from subtitles.yaml).
4. Filters according to SubtitlePreferences (languages, min_size_kb, keep_sdh,
keep_forced).
5. Returns a list of SubtitleCandidate — one per file that passes the filter,
with the destination filename already computed.
@@ -12,12 +15,14 @@ Filename classification heuristics
-----------------------------------
We parse the stem of each subtitle file looking for known patterns:
fr.srt → lang=fr, sdh=False, forced=False
fr.sdh.srt → lang=fr, sdh=True
fr.hi.srt → lang=fr, sdh=True (hi = hearing-impaired, alias for sdh)
fr.forced.srt → lang=fr, forced=True
Breaking.Bad.S01E01.French.srt → lang=fr (keyword match)
Breaking.Bad.S01E01.VOSTFR.srt → lang=fr (VOSTFR = French forced/foreign subs)
fre.srt → lang=fre, sdh=False, forced=False
fre.sdh.srt → lang=fre, sdh=True
fre.forced.srt → lang=fre, forced=True
Breaking.Bad.S01E01.French.srt → lang=fre (alias match via LanguageRegistry)
Breaking.Bad.S01E01.VOSTFR.srt → lang=fre (subtitle-specific token)
ISO 639-2/B codes are used throughout (matching the project-wide canonical form
from iso_languages.yaml — what ffprobe emits).
Output naming convention (matches SubtitlePreferences docstring):
{lang}.srt
@@ -26,62 +31,16 @@ Output naming convention (matches SubtitlePreferences docstring):
"""
import logging
import re
from dataclasses import dataclass
from pathlib import Path
from .knowledge.base import SubtitleKnowledgeBase
from .value_objects import SubtitleType
logger = logging.getLogger(__name__)
# Subtitle file extensions we handle
SUBTITLE_EXTENSIONS = {".srt", ".ass", ".ssa", ".vtt", ".sub"}
# Language keyword map: lowercase token → ISO 639-1 code
_LANG_KEYWORDS: dict[str, str] = {
# French
"fr": "fr",
"fra": "fr",
"french": "fr",
"francais": "fr",
"français": "fr",
"vf": "fr",
"vff": "fr",
"vostfr": "fr",
# English
"en": "en",
"eng": "en",
"english": "en",
# Spanish
"es": "es",
"spa": "es",
"spanish": "es",
"espanol": "es",
# German
"de": "de",
"deu": "de",
"ger": "de",
"german": "de",
# Italian
"it": "it",
"ita": "it",
"italian": "it",
# Portuguese
"pt": "pt",
"por": "pt",
"portuguese": "pt",
# Dutch
"nl": "nl",
"nld": "nl",
"dutch": "nl",
# Japanese
"ja": "ja",
"jpn": "ja",
"japanese": "ja",
}
# Tokens that indicate SDH / hearing-impaired
_SDH_TOKENS = {"sdh", "hi", "hearing", "impaired", "cc", "closedcaption"}
# Tokens that indicate forced subtitles
_FORCED_TOKENS = {"forced", "foreign"}
_TOKEN_SPLIT = re.compile(r"[\.\s_\-]+")
@dataclass
@@ -89,7 +48,7 @@ class SubtitleCandidate:
"""A subtitle file that passed the filter, ready to be placed."""
source_path: Path
language: str # ISO 639-1 code, e.g. "fr"
language: str # ISO 639-2/B code, e.g. "fre"
is_sdh: bool
is_forced: bool
extension: str # e.g. ".srt"
@@ -111,28 +70,44 @@ class SubtitleCandidate:
return ".".join(parts) + "." + ext
# Module-level KB instance — built lazily on first use to avoid loading YAML at import.
_KB: SubtitleKnowledgeBase | None = None
def _kb() -> SubtitleKnowledgeBase:
global _KB # noqa: PLW0603 — intentional lazy module-level cache
if _KB is None:
_KB = SubtitleKnowledgeBase()
return _KB
def _classify(path: Path) -> tuple[str | None, bool, bool]:
"""
Parse a subtitle filename and return (language_code, is_sdh, is_forced).
``language_code`` is the ISO 639-2/B canonical code (e.g. ``"fre"``).
Returns (None, False, False) if the language cannot be determined.
"""
stem = path.stem.lower()
# Split on dots, spaces, underscores, hyphens
import re
tokens = re.split(r"[\.\s_\-]+", stem)
tokens = _TOKEN_SPLIT.split(stem)
kb = _kb()
language: str | None = None
is_sdh = False
is_forced = False
for token in tokens:
if token in _LANG_KEYWORDS:
language = _LANG_KEYWORDS[token]
if token in _SDH_TOKENS:
if not token:
continue
if language is None:
lang = kb.language_for_token(token)
if lang is not None:
language = lang.code
continue
stype = kb.type_for_token(token)
if stype is SubtitleType.SDH:
is_sdh = True
if token in _FORCED_TOKENS:
elif stype is SubtitleType.FORCED:
is_forced = True
return language, is_sdh, is_forced
@@ -151,10 +126,12 @@ class SubtitleScanner:
def __init__(
self, languages: list[str], min_size_kb: int, keep_sdh: bool, keep_forced: bool
):
self.languages = [l.lower() for l in languages]
self.languages = [lang.lower() for lang in languages]
self.min_size_kb = min_size_kb
self.keep_sdh = keep_sdh
self.keep_forced = keep_forced
self._kb = _kb()
self._subtitle_extensions = {e.lower() for e in self._kb.known_extensions()}
def scan(self, video_path: Path) -> list[SubtitleCandidate]:
"""
@@ -176,7 +153,7 @@ class SubtitleScanner:
for path in sorted(directory.iterdir()):
if not path.is_file():
continue
if path.suffix.lower() not in SUBTITLE_EXTENSIONS:
if path.suffix.lower() not in self._subtitle_extensions:
continue
candidate = self._evaluate(path)
-149
View File
@@ -1,149 +0,0 @@
"""Subtitle domain services - Business logic."""
import logging
from ..shared.value_objects import FilePath, ImdbId
from .entities import Subtitle
from .exceptions import SubtitleNotFound
from .repositories import SubtitleRepository
from .value_objects import Language, SubtitleFormat
logger = logging.getLogger(__name__)
class SubtitleService:
"""
Domain service for subtitle-related business logic.
This service is SHARED between movies and TV shows domains.
Both can use this service to manage subtitles.
"""
def __init__(self, repository: SubtitleRepository):
"""
Initialize subtitle service.
Args:
repository: Subtitle repository for persistence
"""
self.repository = repository
def add_subtitle(self, subtitle: Subtitle) -> None:
"""
Add a subtitle to the library.
Args:
subtitle: Subtitle entity to add
"""
self.repository.save(subtitle)
logger.info(
f"Added subtitle: {subtitle.language.value} for {subtitle.media_imdb_id}"
)
def find_subtitles_for_movie(
self, imdb_id: ImdbId, languages: list[Language] | None = None
) -> list[Subtitle]:
"""
Find subtitles for a movie.
Args:
imdb_id: IMDb ID of the movie
languages: Optional list of languages to filter by
Returns:
List of matching subtitles
"""
if languages:
all_subtitles = []
for lang in languages:
subs = self.repository.find_by_media(imdb_id, language=lang)
all_subtitles.extend(subs)
return all_subtitles
else:
return self.repository.find_by_media(imdb_id)
def find_subtitles_for_episode(
self,
imdb_id: ImdbId,
season: int,
episode: int,
languages: list[Language] | None = None,
) -> list[Subtitle]:
"""
Find subtitles for a TV show episode.
Args:
imdb_id: IMDb ID of the TV show
season: Season number
episode: Episode number
languages: Optional list of languages to filter by
Returns:
List of matching subtitles
"""
if languages:
all_subtitles = []
for lang in languages:
subs = self.repository.find_by_media(
imdb_id, language=lang, season=season, episode=episode
)
all_subtitles.extend(subs)
return all_subtitles
else:
return self.repository.find_by_media(
imdb_id, season=season, episode=episode
)
def remove_subtitle(self, subtitle: Subtitle) -> None:
"""
Remove a subtitle from the library.
Args:
subtitle: Subtitle to remove
Raises:
SubtitleNotFound: If subtitle not found
"""
if not self.repository.delete(subtitle):
raise SubtitleNotFound(f"Subtitle not found: {subtitle}")
logger.info(f"Removed subtitle: {subtitle}")
def detect_format_from_file(self, file_path: FilePath) -> SubtitleFormat:
"""
Detect subtitle format from file extension.
Args:
file_path: Path to subtitle file
Returns:
Detected subtitle format
"""
extension = file_path.value.suffix
return SubtitleFormat.from_extension(extension)
def validate_subtitle_file(self, file_path: FilePath) -> bool:
"""
Validate that a file is a valid subtitle file.
Args:
file_path: Path to the file
Returns:
True if valid subtitle file, False otherwise
"""
if not file_path.exists():
logger.warning(f"File does not exist: {file_path}")
return False
if not file_path.is_file():
logger.warning(f"Path is not a file: {file_path}")
return False
# Check file extension
try:
self.detect_format_from_file(file_path)
return True
except Exception as e:
logger.warning(f"Invalid subtitle format: {e}")
return False
+12 -10
View File
@@ -7,7 +7,7 @@ import subprocess
from pathlib import Path
from ...shared.value_objects import ImdbId
from ..entities import MediaSubtitleMetadata, SubtitleTrack
from ..entities import MediaSubtitleMetadata, SubtitleCandidate
from ..knowledge.base import SubtitleKnowledgeBase
from ..value_objects import ScanStrategy, SubtitlePattern, SubtitleType
@@ -91,7 +91,7 @@ class SubtitleIdentifier:
# Embedded tracks — ffprobe
# ------------------------------------------------------------------
def _scan_embedded(self, video_path: Path) -> list[SubtitleTrack]:
def _scan_embedded(self, video_path: Path) -> list[SubtitleCandidate]:
if not video_path.exists():
return []
try:
@@ -139,7 +139,7 @@ class SubtitleIdentifier:
stype = SubtitleType.STANDARD
tracks.append(
SubtitleTrack(
SubtitleCandidate(
language=lang,
format=None,
subtitle_type=stype,
@@ -159,7 +159,7 @@ class SubtitleIdentifier:
def _scan_external(
self, video_path: Path, pattern: SubtitlePattern
) -> list[SubtitleTrack]:
) -> list[SubtitleCandidate]:
strategy = pattern.scan_strategy
episode_stem: str | None = None
@@ -238,7 +238,7 @@ class SubtitleIdentifier:
paths: list[Path],
pattern: SubtitlePattern,
episode_stem: str | None = None,
) -> list[SubtitleTrack]:
) -> list[SubtitleCandidate]:
tracks = []
for path in paths:
track = self._classify_single(path, episode_stem=episode_stem)
@@ -253,7 +253,7 @@ class SubtitleIdentifier:
def _classify_single(
self, path: Path, episode_stem: str | None = None
) -> SubtitleTrack:
) -> SubtitleCandidate:
fmt = self.kb.format_for_extension(path.suffix)
tokens = (
_tokenize_suffix(path.stem, episode_stem)
@@ -290,7 +290,7 @@ class SubtitleIdentifier:
size_kb = path.stat().st_size / 1024 if path.exists() else None
entry_count = _count_entries(path) if path.exists() else None
return SubtitleTrack(
return SubtitleCandidate(
language=language,
format=fmt,
subtitle_type=subtitle_type,
@@ -302,7 +302,9 @@ class SubtitleIdentifier:
raw_tokens=tokens,
)
def _disambiguate_by_size(self, tracks: list[SubtitleTrack]) -> list[SubtitleTrack]:
def _disambiguate_by_size(
self, tracks: list[SubtitleCandidate]
) -> list[SubtitleCandidate]:
"""
When multiple tracks share the same language and type is UNKNOWN/STANDARD,
the one with the most entries (lines) is SDH, the smallest is FORCED if
@@ -312,7 +314,7 @@ class SubtitleIdentifier:
"""
# Group by language code
lang_groups: dict[str, list[SubtitleTrack]] = {}
lang_groups: dict[str, list[SubtitleCandidate]] = {}
for track in tracks:
key = track.language.code if track.language else "__unknown__"
lang_groups.setdefault(key, []).append(track)
@@ -341,6 +343,6 @@ class SubtitleIdentifier:
return result
def _set_type(self, track: SubtitleTrack, stype: SubtitleType) -> None:
def _set_type(self, track: SubtitleCandidate, stype: SubtitleType) -> None:
"""Mutate track type in-place."""
track.subtitle_type = stype
+12 -12
View File
@@ -2,7 +2,7 @@
import logging
from ..entities import SubtitleTrack
from ..entities import SubtitleCandidate
from ..value_objects import SubtitleMatchingRules
logger = logging.getLogger(__name__)
@@ -10,7 +10,7 @@ logger = logging.getLogger(__name__)
class SubtitleMatcher:
"""
Filters a list of SubtitleTrack against effective SubtitleMatchingRules.
Filters a list of SubtitleCandidate against effective SubtitleMatchingRules.
Returns matched tracks (pass all filters, confidence >= min_confidence)
and unresolved tracks (need user clarification).
@@ -21,14 +21,14 @@ class SubtitleMatcher:
def match(
self,
tracks: list[SubtitleTrack],
tracks: list[SubtitleCandidate],
rules: SubtitleMatchingRules,
) -> tuple[list[SubtitleTrack], list[SubtitleTrack]]:
) -> tuple[list[SubtitleCandidate], list[SubtitleCandidate]]:
"""
Returns (matched, unresolved).
"""
matched: list[SubtitleTrack] = []
unresolved: list[SubtitleTrack] = []
matched: list[SubtitleCandidate] = []
unresolved: list[SubtitleCandidate] = []
for track in tracks:
if track.is_embedded:
@@ -51,7 +51,7 @@ class SubtitleMatcher:
return matched, unresolved
def _passes_filters(
self, track: SubtitleTrack, rules: SubtitleMatchingRules
self, track: SubtitleCandidate, rules: SubtitleMatchingRules
) -> bool:
# Language filter
if rules.preferred_languages:
@@ -76,14 +76,14 @@ class SubtitleMatcher:
def _resolve_conflicts(
self,
tracks: list[SubtitleTrack],
tracks: list[SubtitleCandidate],
rules: SubtitleMatchingRules,
) -> list[SubtitleTrack]:
) -> list[SubtitleCandidate]:
"""
When multiple tracks have same language + type, keep only the best one
according to format_priority. If no format_priority applies, keep the first.
"""
seen: dict[tuple, SubtitleTrack] = {}
seen: dict[tuple, SubtitleCandidate] = {}
for track in tracks:
lang = track.language.code if track.language else None
@@ -106,8 +106,8 @@ class SubtitleMatcher:
def _prefer(
self,
candidate: SubtitleTrack,
existing: SubtitleTrack,
candidate: SubtitleCandidate,
existing: SubtitleCandidate,
format_priority: list[str],
) -> bool:
"""Return True if candidate is preferable to existing."""
+6 -6
View File
@@ -5,12 +5,12 @@ import os
from dataclasses import dataclass
from pathlib import Path
from ..entities import SubtitleTrack
from ..entities import SubtitleCandidate
logger = logging.getLogger(__name__)
def _build_dest_name(track: SubtitleTrack, video_stem: str) -> str:
def _build_dest_name(track: SubtitleCandidate, video_stem: str) -> str:
"""
Build the destination filename for a subtitle track.
@@ -42,7 +42,7 @@ class PlacedTrack:
@dataclass
class PlaceResult:
placed: list[PlacedTrack]
skipped: list[tuple[SubtitleTrack, str]] # (track, reason)
skipped: list[tuple[SubtitleCandidate, str]] # (track, reason)
@property
def placed_count(self) -> int:
@@ -55,7 +55,7 @@ class PlaceResult:
class SubtitlePlacer:
"""
Hard-links matched SubtitleTrack files next to a destination video.
Hard-links matched SubtitleCandidate files next to a destination video.
Uses the same hard-link strategy as FileManager.copy_file:
instant, no data duplication, qBittorrent keeps seeding.
@@ -65,11 +65,11 @@ class SubtitlePlacer:
def place(
self,
tracks: list[SubtitleTrack],
tracks: list[SubtitleCandidate],
destination_video: Path,
) -> PlaceResult:
placed: list[PlacedTrack] = []
skipped: list[tuple[SubtitleTrack, str]] = []
skipped: list[tuple[SubtitleCandidate, str]] = []
dest_dir = destination_video.parent
+3 -3
View File
@@ -1,9 +1,9 @@
"""Subtitle service utilities."""
from ..entities import SubtitleTrack
from ..entities import SubtitleCandidate
def available_subtitles(tracks: list[SubtitleTrack]) -> list[SubtitleTrack]:
def available_subtitles(tracks: list[SubtitleCandidate]) -> list[SubtitleCandidate]:
"""
Return the distinct subtitle tracks available, deduped by (language, type).
@@ -11,7 +11,7 @@ def available_subtitles(tracks: list[SubtitleTrack]) -> list[SubtitleTrack]:
preferences — e.g. eng, eng.sdh, fra all show up as separate entries.
"""
seen: set[tuple] = set()
result: list[SubtitleTrack] = []
result: list[SubtitleCandidate] = []
for track in tracks:
lang = track.language.code if track.language else None
key = (lang, track.subtitle_type)
+7 -3
View File
@@ -2,18 +2,22 @@
from .entities import Episode, Season, TVShow
from .exceptions import InvalidEpisode, SeasonNotFound, TVShowNotFound
from .services import TVShowService
from .value_objects import EpisodeNumber, SeasonNumber, ShowStatus
from .value_objects import (
CollectionStatus,
EpisodeNumber,
SeasonNumber,
ShowStatus,
)
__all__ = [
"TVShow",
"Season",
"Episode",
"ShowStatus",
"CollectionStatus",
"SeasonNumber",
"EpisodeNumber",
"TVShowNotFound",
"InvalidEpisode",
"SeasonNotFound",
"TVShowService",
]
+356 -118
View File
@@ -1,120 +1,258 @@
"""TV Show domain entities."""
"""TV Show domain entities.
This module implements the TVShow aggregate following DDD principles:
* ``TVShow`` is the aggregate **root** — the only entity exposed by the
repository. It owns its seasons (``seasons: dict[SeasonNumber, Season]``).
* ``Season`` is owned by TVShow and owns its episodes
(``episodes: dict[EpisodeNumber, Episode]``).
* ``Episode`` is owned by Season. It carries the actual file metadata
(path, size) and the discovered tracks (audio, subtitles).
Children do not back-reference the root (no ``show_imdb_id`` on Season/Episode):
they are only ever reached through ``TVShow``.
Mutation invariants are enforced through aggregate-root methods such as
``TVShow.add_episode()`` — never reach into ``show.seasons[...].episodes`` to
mutate without going through the root, otherwise invariants are not guaranteed.
"""
from __future__ import annotations
import re
from dataclasses import dataclass
from dataclasses import dataclass, field
from ..shared.value_objects import FilePath, FileSize, ImdbId
from .value_objects import EpisodeNumber, SeasonNumber, ShowStatus
from ..shared.media import AudioTrack, SubtitleTrack, track_lang_matches
from ..shared.value_objects import FilePath, FileSize, ImdbId, Language
from .value_objects import (
CollectionStatus,
EpisodeNumber,
SeasonNumber,
ShowStatus,
)
# ════════════════════════════════════════════════════════════════════════════
# Episode
# ════════════════════════════════════════════════════════════════════════════
@dataclass
class TVShow:
class Episode:
"""
TV Show entity representing a TV show in the media library.
A single episode of a TV show — leaf of the TVShow aggregate.
This is the main aggregate root for the TV shows domain.
Migrated from agent/models/tv_show.py
Carries the file metadata (path, size) and the discovered tracks
(audio + subtitle). Track lists are populated by the ffprobe + subtitle
scan pipeline; they may be empty when the episode is known but not yet
scanned, or when no file is downloaded yet.
"""
imdb_id: ImdbId
season_number: SeasonNumber
episode_number: EpisodeNumber
title: str
seasons_count: int
status: ShowStatus
tmdb_id: int | None = None
file_path: FilePath | None = None
file_size: FileSize | None = None
audio_tracks: list[AudioTrack] = field(default_factory=list)
subtitle_tracks: list[SubtitleTrack] = field(default_factory=list)
def __post_init__(self):
"""Validate TV show entity."""
# Ensure ImdbId is actually an ImdbId instance
if not isinstance(self.imdb_id, ImdbId):
if isinstance(self.imdb_id, str):
object.__setattr__(self, "imdb_id", ImdbId(self.imdb_id))
else:
raise ValueError(
f"imdb_id must be ImdbId or str, got {type(self.imdb_id)}"
)
def __post_init__(self) -> None:
# Coerce numbers if raw ints were passed
if not isinstance(self.season_number, SeasonNumber):
if isinstance(self.season_number, int):
self.season_number = SeasonNumber(self.season_number)
if not isinstance(self.episode_number, EpisodeNumber):
if isinstance(self.episode_number, int):
self.episode_number = EpisodeNumber(self.episode_number)
# Ensure ShowStatus is actually a ShowStatus instance
if not isinstance(self.status, ShowStatus):
if isinstance(self.status, str):
object.__setattr__(self, "status", ShowStatus.from_string(self.status))
else:
raise ValueError(
f"status must be ShowStatus or str, got {type(self.status)}"
)
# ── File presence ──────────────────────────────────────────────────────
# Validate seasons_count
if not isinstance(self.seasons_count, int) or self.seasons_count < 0:
raise ValueError(
f"seasons_count must be a non-negative integer, got {self.seasons_count}"
)
def has_file(self) -> bool:
"""True if a file path is set and the file actually exists on disk."""
return self.file_path is not None and self.file_path.exists()
def is_ongoing(self) -> bool:
"""Check if the show is still ongoing."""
return self.status == ShowStatus.ONGOING
def is_downloaded(self) -> bool:
"""Alias of ``has_file()`` — reads better in collection-status contexts."""
return self.has_file()
def is_ended(self) -> bool:
"""Check if the show has ended."""
return self.status == ShowStatus.ENDED
# ── Audio helpers ──────────────────────────────────────────────────────
def get_folder_name(self) -> str:
"""
Get the folder name for this TV show.
def has_audio_in(self, lang: str | Language) -> bool:
"""True if at least one audio track is in the given language."""
return any(track_lang_matches(t.language, lang) for t in self.audio_tracks)
Format: "Title"
Example: "Breaking.Bad"
"""
# Remove special characters and replace spaces with dots
cleaned = re.sub(r"[^\w\s\.\-]", "", self.title)
return cleaned.replace(" ", ".")
def audio_languages(self) -> list[str]:
"""Unique audio languages across all tracks, in track order."""
seen: set[str] = set()
result: list[str] = []
for t in self.audio_tracks:
if t.language and t.language not in seen:
seen.add(t.language)
result.append(t.language)
return result
# ── Subtitle helpers ───────────────────────────────────────────────────
def has_subtitles_in(self, lang: str | Language) -> bool:
"""True if at least one subtitle track is in the given language."""
return any(track_lang_matches(t.language, lang) for t in self.subtitle_tracks)
def has_forced_subs(self) -> bool:
"""True if at least one subtitle track is flagged as forced."""
return any(t.is_forced for t in self.subtitle_tracks)
def subtitle_languages(self) -> list[str]:
"""Unique subtitle languages across all tracks, in track order."""
seen: set[str] = set()
result: list[str] = []
for t in self.subtitle_tracks:
if t.language and t.language not in seen:
seen.add(t.language)
result.append(t.language)
return result
# ── Naming ─────────────────────────────────────────────────────────────
def get_filename(self) -> str:
"""Suggested filename: ``S01E05.Pilot``."""
season_str = f"S{self.season_number.value:02d}"
episode_str = f"E{self.episode_number.value:02d}"
clean_title = re.sub(r"[^\w\s\-]", "", self.title)
clean_title = clean_title.replace(" ", ".")
return f"{season_str}{episode_str}.{clean_title}"
def __str__(self) -> str:
return f"{self.title} ({self.status.value}, {self.seasons_count} seasons)"
return f"S{self.season_number.value:02d}E{self.episode_number.value:02d} - {self.title}"
def __repr__(self) -> str:
return f"TVShow(imdb_id={self.imdb_id}, title='{self.title}')"
return (
f"Episode(S{self.season_number.value:02d}E{self.episode_number.value:02d})"
)
# ════════════════════════════════════════════════════════════════════════════
# Season
# ════════════════════════════════════════════════════════════════════════════
@dataclass
class Season:
"""
Season entity representing a season of a TV show.
A season of a TV show — owned by ``TVShow``.
Owns its episodes via the ``episodes`` dict keyed by ``EpisodeNumber``.
Two TMDB-sourced counts shape the collection logic:
* ``expected_episodes`` — total episodes planned for the season
(``None`` if unknown).
* ``aired_episodes`` — episodes **already aired** as of the latest TMDB
refresh. ``None`` falls back to ``expected_episodes`` (best-effort).
The split matters: ``is_complete()`` checks owned against aired, so a season
in the middle of broadcasting can be "complete" today and become "partial"
later when new episodes air — that is correct behavior.
"""
show_imdb_id: ImdbId
season_number: SeasonNumber
episode_count: int
episodes: dict[EpisodeNumber, Episode] = field(default_factory=dict)
expected_episodes: int | None = None
aired_episodes: int | None = None
name: str | None = None
def __post_init__(self):
"""Validate season entity."""
# Ensure ImdbId is actually an ImdbId instance
if not isinstance(self.show_imdb_id, ImdbId):
if isinstance(self.show_imdb_id, str):
object.__setattr__(self, "show_imdb_id", ImdbId(self.show_imdb_id))
# Ensure SeasonNumber is actually a SeasonNumber instance
def __post_init__(self) -> None:
if not isinstance(self.season_number, SeasonNumber):
if isinstance(self.season_number, int):
object.__setattr__(
self, "season_number", SeasonNumber(self.season_number)
)
self.season_number = SeasonNumber(self.season_number)
# Validate episode_count
if not isinstance(self.episode_count, int) or self.episode_count < 0:
if self.expected_episodes is not None and self.expected_episodes < 0:
raise ValueError(
f"episode_count must be a non-negative integer, got {self.episode_count}"
f"expected_episodes must be >= 0, got {self.expected_episodes}"
)
if self.aired_episodes is not None and self.aired_episodes < 0:
raise ValueError(f"aired_episodes must be >= 0, got {self.aired_episodes}")
if (
self.expected_episodes is not None
and self.aired_episodes is not None
and self.aired_episodes > self.expected_episodes
):
raise ValueError(
f"aired_episodes ({self.aired_episodes}) cannot exceed "
f"expected_episodes ({self.expected_episodes})"
)
# ── Properties ─────────────────────────────────────────────────────────
@property
def episode_count(self) -> int:
"""Number of episodes currently owned in this season."""
return len(self.episodes)
# ── Collection state ───────────────────────────────────────────────────
def _effective_aired(self) -> int | None:
"""``aired_episodes`` if set, else fall back to ``expected_episodes``."""
return (
self.aired_episodes
if self.aired_episodes is not None
else self.expected_episodes
)
def is_complete(self) -> bool:
"""
True if every aired episode is owned.
Returns False (conservative) when the aired count is unknown — without
knowing how many episodes have aired we cannot claim completeness.
"""
aired = self._effective_aired()
if aired is None:
return False
if aired == 0:
# No episode has aired yet → trivially "complete"
return True
return len(self.episodes) >= aired
def is_fully_aired(self) -> bool:
"""True if all planned episodes have already aired."""
if self.expected_episodes is None or self.aired_episodes is None:
return False
return self.aired_episodes >= self.expected_episodes
def missing_episodes(self) -> list[EpisodeNumber]:
"""
List of episode numbers that have aired but are not owned.
Episodes beyond ``aired_episodes`` are **not** considered missing
(they have not aired yet). When the aired count is unknown, returns
an empty list — we cannot reason about gaps without a target.
"""
aired = self._effective_aired()
if aired is None or aired <= 0:
return []
present = {ep.value for ep in self.episodes}
return [EpisodeNumber(n) for n in range(1, aired + 1) if n not in present]
# ── Mutation (called through the aggregate root) ───────────────────────
def add_episode(self, episode: Episode) -> None:
"""
Insert an episode into this season. Replaces any episode with the same
number — callers wishing to detect conflicts should check beforehand.
"""
if episode.season_number != self.season_number:
raise ValueError(
f"Episode season ({episode.season_number}) does not match season "
f"({self.season_number})"
)
self.episodes[episode.episode_number] = episode
# ── Naming ─────────────────────────────────────────────────────────────
def is_special(self) -> bool:
"""Check if this is the specials season."""
return self.season_number.is_special()
def get_folder_name(self) -> str:
"""
Get the folder name for this season.
Format: "Season 01" or "Specials" for season 0
"""
"""``Season 01`` or ``Specials`` for season 0."""
if self.is_special():
return "Specials"
return f"Season {self.season_number.value:02d}"
@@ -125,69 +263,169 @@ class Season:
return f"Season {self.season_number.value}"
def __repr__(self) -> str:
return f"Season(show={self.show_imdb_id}, number={self.season_number.value})"
return (
f"Season(number={self.season_number.value}, episodes={len(self.episodes)})"
)
# ════════════════════════════════════════════════════════════════════════════
# TVShow — aggregate root
# ════════════════════════════════════════════════════════════════════════════
@dataclass
class Episode:
class TVShow:
"""
Episode entity representing an episode of a TV show.
Aggregate root for the TV shows domain.
Owns its seasons via the ``seasons`` dict keyed by ``SeasonNumber``.
All mutations (adding episodes, creating seasons) MUST go through the
methods on this class — that is how invariants are preserved.
Two axes describe the show, kept deliberately orthogonal:
* ``status`` (``ShowStatus``) — production state (TMDB-sourced).
* ``collection_status()`` — what the user owns vs what has aired today.
A third axis (upcoming/scheduled) will be added later as a separate flag
when scheduling support is introduced; for now we make no claim about
future episodes.
"""
show_imdb_id: ImdbId
season_number: SeasonNumber
episode_number: EpisodeNumber
imdb_id: ImdbId
title: str
file_path: FilePath | None = None
file_size: FileSize | None = None
status: ShowStatus
seasons: dict[SeasonNumber, Season] = field(default_factory=dict)
expected_seasons: int | None = None
tmdb_id: int | None = None
def __post_init__(self):
"""Validate episode entity."""
# Ensure ImdbId is actually an ImdbId instance
if not isinstance(self.show_imdb_id, ImdbId):
if isinstance(self.show_imdb_id, str):
object.__setattr__(self, "show_imdb_id", ImdbId(self.show_imdb_id))
# Ensure SeasonNumber is actually a SeasonNumber instance
if not isinstance(self.season_number, SeasonNumber):
if isinstance(self.season_number, int):
object.__setattr__(
self, "season_number", SeasonNumber(self.season_number)
def __post_init__(self) -> None:
if not isinstance(self.imdb_id, ImdbId):
if isinstance(self.imdb_id, str):
self.imdb_id = ImdbId(self.imdb_id)
else:
raise ValueError(
f"imdb_id must be ImdbId or str, got {type(self.imdb_id)}"
)
# Ensure EpisodeNumber is actually an EpisodeNumber instance
if not isinstance(self.episode_number, EpisodeNumber):
if isinstance(self.episode_number, int):
object.__setattr__(
self, "episode_number", EpisodeNumber(self.episode_number)
if not isinstance(self.status, ShowStatus):
if isinstance(self.status, str):
self.status = ShowStatus.from_string(self.status)
else:
raise ValueError(
f"status must be ShowStatus or str, got {type(self.status)}"
)
def has_file(self) -> bool:
"""Check if the episode has an associated file."""
return self.file_path is not None and self.file_path.exists()
if self.expected_seasons is not None and self.expected_seasons < 0:
raise ValueError(
f"expected_seasons must be >= 0, got {self.expected_seasons}"
)
def is_downloaded(self) -> bool:
"""Check if the episode is downloaded."""
return self.has_file()
# ── Production-state queries ───────────────────────────────────────────
def get_filename(self) -> str:
def is_ongoing(self) -> bool:
return self.status == ShowStatus.ONGOING
def is_ended(self) -> bool:
return self.status == ShowStatus.ENDED
# ── Properties ─────────────────────────────────────────────────────────
@property
def seasons_count(self) -> int:
"""Number of seasons currently owned (any episode count, even 0)."""
return len(self.seasons)
@property
def episode_count(self) -> int:
"""Total episodes owned across all seasons."""
return sum(s.episode_count for s in self.seasons.values())
# ── Mutation — the sole entry point for adding content ─────────────────
def add_episode(self, episode: Episode) -> None:
"""
Get the suggested filename for this episode.
Add an episode to the appropriate season, creating the season if needed.
Format: "S01E01 - Episode Title.ext"
Example: "S01E05 - Pilot.mkv"
This is the **only** sanctioned way to add content to the aggregate —
it preserves the invariant that an episode is always reachable through
``show.seasons[s].episodes[e]``.
"""
season_str = f"S{self.season_number.value:02d}"
episode_str = f"E{self.episode_number.value:02d}"
season = self.seasons.get(episode.season_number)
if season is None:
season = Season(season_number=episode.season_number)
self.seasons[episode.season_number] = season
season.add_episode(episode)
# Clean title for filename
clean_title = re.sub(r"[^\w\s\-]", "", self.title)
clean_title = clean_title.replace(" ", ".")
def add_season(self, season: Season) -> None:
"""
Attach a (possibly already populated) Season to the show.
return f"{season_str}{episode_str}.{clean_title}"
Replaces any existing season with the same number.
"""
self.seasons[season.season_number] = season
# ── Collection state ───────────────────────────────────────────────────
def collection_status(self) -> CollectionStatus:
"""
High-level state of the user's collection for this show.
* ``EMPTY`` — no episode owned
* ``COMPLETE`` — every season is complete relative to its aired count
* ``PARTIAL`` — at least one aired episode is missing
Seasons with an unknown aired count are treated conservatively: if no
season has any episode, the show is EMPTY; otherwise the unknown
seasons cannot prove completeness, so the show is PARTIAL.
"""
if self.episode_count == 0:
return CollectionStatus.EMPTY
# Check completeness across all seasons we know about
for season in self.seasons.values():
if not season.is_complete():
return CollectionStatus.PARTIAL
# We also need to consider whether seasons themselves are missing.
# If expected_seasons is known and we have fewer seasons than expected,
# the missing seasons may have aired episodes → cannot claim COMPLETE.
if (
self.expected_seasons is not None
and len(self.seasons) < self.expected_seasons
):
return CollectionStatus.PARTIAL
return CollectionStatus.COMPLETE
def is_complete_series(self) -> bool:
"""
True if the show is finished (ENDED) **and** the collection is complete.
This is the strongest "I own the entire series, no more to come" claim
we can make today, before scheduling/upcoming-episode awareness lands.
"""
return self.is_ended() and self.collection_status() == CollectionStatus.COMPLETE
def missing_episodes(self) -> list[tuple[SeasonNumber, EpisodeNumber]]:
"""All aired-but-not-owned ``(season, episode)`` pairs across the show."""
result: list[tuple[SeasonNumber, EpisodeNumber]] = []
for season_number, season in sorted(
self.seasons.items(), key=lambda kv: kv[0].value
):
for ep_number in season.missing_episodes():
result.append((season_number, ep_number))
return result
# ── Naming ─────────────────────────────────────────────────────────────
def get_folder_name(self) -> str:
"""Dot-separated folder name (e.g. ``Breaking.Bad``)."""
cleaned = re.sub(r"[^\w\s\.\-]", "", self.title)
return cleaned.replace(" ", ".")
def __str__(self) -> str:
return f"S{self.season_number.value:02d}E{self.episode_number.value:02d} - {self.title}"
return f"{self.title} ({self.status.value}, {self.seasons_count} seasons)"
def __repr__(self) -> str:
return f"Episode(show={self.show_imdb_id}, S{self.season_number.value:02d}E{self.episode_number.value:02d})"
return f"TVShow(imdb_id={self.imdb_id}, title='{self.title}')"
+15 -101
View File
@@ -1,126 +1,40 @@
"""TV Show repository interfaces (abstract)."""
"""TV Show repository interface.
A single repository for the aggregate root only — Season and Episode are
**inside** the TVShow aggregate and are never persisted independently. The
aggregate is always loaded and saved as a whole.
"""
from abc import ABC, abstractmethod
from ..shared.value_objects import ImdbId
from .entities import Episode, Season, TVShow
from .value_objects import EpisodeNumber, SeasonNumber
from .entities import TVShow
class TVShowRepository(ABC):
"""
Abstract repository for TV show persistence.
Abstract repository for the TVShow aggregate.
This defines the interface that infrastructure implementations must follow.
Implementations are responsible for persisting the full aggregate graph
(TVShow + all its Seasons + all their Episodes) atomically.
"""
@abstractmethod
def save(self, show: TVShow) -> None:
"""
Save a TV show to the repository.
Args:
show: TVShow entity to save
"""
pass
"""Persist the full TVShow aggregate."""
@abstractmethod
def find_by_imdb_id(self, imdb_id: ImdbId) -> TVShow | None:
"""
Find a TV show by its IMDb ID.
Args:
imdb_id: IMDb ID to search for
Returns:
TVShow if found, None otherwise
"""
pass
"""Load the full TVShow aggregate by IMDb ID, or None if absent."""
@abstractmethod
def find_all(self) -> list[TVShow]:
"""
Get all TV shows in the repository.
Returns:
List of all TV shows
"""
pass
"""Load all TVShow aggregates."""
@abstractmethod
def delete(self, imdb_id: ImdbId) -> bool:
"""
Delete a TV show from the repository.
Args:
imdb_id: IMDb ID of the show to delete
Returns:
True if deleted, False if not found
"""
pass
"""Remove the aggregate. Returns True if it existed and was deleted."""
@abstractmethod
def exists(self, imdb_id: ImdbId) -> bool:
"""
Check if a TV show exists in the repository.
Args:
imdb_id: IMDb ID to check
Returns:
True if exists, False otherwise
"""
pass
class SeasonRepository(ABC):
"""Abstract repository for season persistence."""
@abstractmethod
def save(self, season: Season) -> None:
"""Save a season."""
pass
@abstractmethod
def find_by_show_and_number(
self, show_imdb_id: ImdbId, season_number: SeasonNumber
) -> Season | None:
"""Find a season by show and season number."""
pass
@abstractmethod
def find_all_by_show(self, show_imdb_id: ImdbId) -> list[Season]:
"""Get all seasons for a show."""
pass
class EpisodeRepository(ABC):
"""Abstract repository for episode persistence."""
@abstractmethod
def save(self, episode: Episode) -> None:
"""Save an episode."""
pass
@abstractmethod
def find_by_show_season_episode(
self,
show_imdb_id: ImdbId,
season_number: SeasonNumber,
episode_number: EpisodeNumber,
) -> Episode | None:
"""Find an episode by show, season, and episode number."""
pass
@abstractmethod
def find_all_by_season(
self, show_imdb_id: ImdbId, season_number: SeasonNumber
) -> list[Episode]:
"""Get all episodes for a season."""
pass
@abstractmethod
def find_all_by_show(self, show_imdb_id: ImdbId) -> list[Episode]:
"""Get all episodes for a show."""
pass
"""True if the aggregate exists in the store."""
-234
View File
@@ -1,234 +0,0 @@
"""TV Show domain services - Business logic."""
import logging
import re
from ..shared.value_objects import ImdbId
from .entities import TVShow
from .exceptions import (
TVShowAlreadyExists,
TVShowNotFound,
)
from .repositories import EpisodeRepository, SeasonRepository, TVShowRepository
logger = logging.getLogger(__name__)
class TVShowService:
"""
Domain service for TV show-related business logic.
This service contains business rules that don't naturally fit
within a single entity.
"""
def __init__(
self,
show_repository: TVShowRepository,
season_repository: SeasonRepository | None = None,
episode_repository: EpisodeRepository | None = None,
):
"""
Initialize TV show service.
Args:
show_repository: TV show repository for persistence
season_repository: Optional season repository
episode_repository: Optional episode repository
"""
self.show_repository = show_repository
self.season_repository = season_repository
self.episode_repository = episode_repository
def track_show(self, show: TVShow) -> None:
"""
Start tracking a TV show.
Args:
show: TVShow entity to track
Raises:
TVShowAlreadyExists: If show is already being tracked
"""
if self.show_repository.exists(show.imdb_id):
raise TVShowAlreadyExists(
f"TV show with IMDb ID {show.imdb_id} is already tracked"
)
self.show_repository.save(show)
logger.info(f"Started tracking TV show: {show.title} ({show.imdb_id})")
def get_show(self, imdb_id: ImdbId) -> TVShow:
"""
Get a TV show by IMDb ID.
Args:
imdb_id: IMDb ID of the show
Returns:
TVShow entity
Raises:
TVShowNotFound: If show not found
"""
show = self.show_repository.find_by_imdb_id(imdb_id)
if not show:
raise TVShowNotFound(f"TV show with IMDb ID {imdb_id} not found")
return show
def get_all_shows(self) -> list[TVShow]:
"""
Get all tracked TV shows.
Returns:
List of all TV shows
"""
return self.show_repository.find_all()
def get_ongoing_shows(self) -> list[TVShow]:
"""
Get all ongoing TV shows.
Returns:
List of ongoing TV shows
"""
all_shows = self.show_repository.find_all()
return [show for show in all_shows if show.is_ongoing()]
def get_ended_shows(self) -> list[TVShow]:
"""
Get all ended TV shows.
Returns:
List of ended TV shows
"""
all_shows = self.show_repository.find_all()
return [show for show in all_shows if show.is_ended()]
def update_show(self, show: TVShow) -> None:
"""
Update an existing TV show.
Args:
show: TVShow entity with updated data
Raises:
TVShowNotFound: If show doesn't exist
"""
if not self.show_repository.exists(show.imdb_id):
raise TVShowNotFound(f"TV show with IMDb ID {show.imdb_id} not found")
self.show_repository.save(show)
logger.info(f"Updated TV show: {show.title} ({show.imdb_id})")
def untrack_show(self, imdb_id: ImdbId) -> None:
"""
Stop tracking a TV show.
Args:
imdb_id: IMDb ID of the show to untrack
Raises:
TVShowNotFound: If show not found
"""
if not self.show_repository.delete(imdb_id):
raise TVShowNotFound(f"TV show with IMDb ID {imdb_id} not found")
logger.info(f"Stopped tracking TV show with IMDb ID: {imdb_id}")
def parse_episode_from_filename(self, filename: str) -> tuple[int, int] | None:
"""
Parse season and episode numbers from filename.
Supports formats:
- S01E05
- 1x05
- Season 1 Episode 5
Args:
filename: Filename to parse
Returns:
Tuple of (season, episode) if found, None otherwise
"""
filename_lower = filename.lower()
# Pattern 1: S01E05
pattern1 = r"s(\d{1,2})e(\d{1,2})"
match = re.search(pattern1, filename_lower)
if match:
return (int(match.group(1)), int(match.group(2)))
# Pattern 2: 1x05
pattern2 = r"(\d{1,2})x(\d{1,2})"
match = re.search(pattern2, filename_lower)
if match:
return (int(match.group(1)), int(match.group(2)))
# Pattern 3: Season 1 Episode 5
pattern3 = r"season\s*(\d{1,2})\s*episode\s*(\d{1,2})"
match = re.search(pattern3, filename_lower)
if match:
return (int(match.group(1)), int(match.group(2)))
return None
def validate_episode_file(self, filename: str) -> bool:
"""
Validate that a file is a valid episode file.
Args:
filename: Filename to validate
Returns:
True if valid episode file, False otherwise
"""
# Check file extension
valid_extensions = {".mkv", ".mp4", ".avi", ".mov", ".wmv", ".flv", ".webm"}
extension = filename[filename.rfind(".") :].lower() if "." in filename else ""
if extension not in valid_extensions:
logger.warning(f"Invalid file extension: {extension}")
return False
# Check if we can parse episode info
episode_info = self.parse_episode_from_filename(filename)
if not episode_info:
logger.warning(f"Could not parse episode info from filename: {filename}")
return False
return True
def find_next_episode(
self, show: TVShow, last_season: int, last_episode: int
) -> tuple[int, int] | None:
"""
Find the next episode to download for a show.
Args:
show: TVShow entity
last_season: Last downloaded season number
last_episode: Last downloaded episode number
Returns:
Tuple of (season, episode) for next episode, or None if show is complete
"""
# If show has ended and we've watched all seasons, no next episode
if show.is_ended() and last_season >= show.seasons_count:
return None
# Simple logic: next episode in same season, or first episode of next season
# This could be enhanced with actual episode counts per season
next_episode = last_episode + 1
next_season = last_season
# Assume max 50 episodes per season (could be improved with actual data)
if next_episode > 50:
next_season += 1
next_episode = 1
# Don't go beyond known seasons
if next_season > show.seasons_count:
return None
return (next_season, next_episode)
+46 -7
View File
@@ -1,5 +1,7 @@
"""TV Show domain value objects."""
from __future__ import annotations
from dataclasses import dataclass
from enum import Enum
@@ -7,7 +9,12 @@ from ..shared.exceptions import ValidationError
class ShowStatus(Enum):
"""Status of a TV show - whether it's still airing or has ended."""
"""
Production status of a TV show (real-world, source of truth = TMDB).
Describes the **production** state of the show, independently of what
the user owns. Orthogonal to ``CollectionStatus``.
"""
ONGOING = "ongoing"
ENDED = "ended"
@@ -16,19 +23,34 @@ class ShowStatus(Enum):
@classmethod
def from_string(cls, status_str: str) -> ShowStatus:
"""
Parse status from string.
Parse a production status string into a ShowStatus.
Args:
status_str: Status string (e.g., "ongoing", "ended")
Accepts our internal vocabulary ("ongoing", "ended") as well as the
statuses returned by TMDB ("Returning Series", "In Production",
"Pilot", "Ended", "Canceled"). The mapping is intentionally binary:
Returns:
ShowStatus enum value
* ONGOING — any state where new episodes may still ship
* ENDED — production has stopped (naturally or cancelled)
* UNKNOWN — anything else / unrecognized
Comparison is case-insensitive and whitespace-trimmed.
"""
if not status_str:
return cls.UNKNOWN
key = status_str.strip().lower()
status_map = {
# Internal
"ongoing": cls.ONGOING,
"ended": cls.ENDED,
# TMDB
"returning series": cls.ONGOING,
"in production": cls.ONGOING,
"pilot": cls.ONGOING,
"planned": cls.ONGOING,
"canceled": cls.ENDED,
"cancelled": cls.ENDED,
}
return status_map.get(status_str.lower(), cls.UNKNOWN)
return status_map.get(key, cls.UNKNOWN)
@dataclass(frozen=True)
@@ -70,6 +92,23 @@ class SeasonNumber:
return self.value
class CollectionStatus(Enum):
"""
State of the user's **collection** for a TV show (orthogonal to ShowStatus).
Compares possessed episodes against episodes **already aired** — never
against announced/upcoming ones. A returning show with all aired episodes
owned is ``COMPLETE``, not ``PARTIAL``, even if more seasons are upcoming.
Future scheduling info (upcoming seasons, next airing date) will live on
the TVShow aggregate as separate flags, not in this enum.
"""
EMPTY = "empty" # 0 episode owned
PARTIAL = "partial" # some aired episodes are missing
COMPLETE = "complete" # all aired-to-date episodes are owned
@dataclass(frozen=True)
class EpisodeNumber:
"""
+12 -6
View File
@@ -7,7 +7,7 @@ import logging
import subprocess
from pathlib import Path
from alfred.domain.shared.media_info import AudioTrack, MediaInfo, SubtitleTrack
from alfred.domain.shared.media import AudioTrack, MediaInfo, SubtitleTrack, VideoTrack
logger = logging.getLogger(__name__)
@@ -58,7 +58,7 @@ def _parse(data: dict) -> MediaInfo:
info = MediaInfo()
# Format-level
# File-level duration/bitrate (ffprobe ``format`` block — independent of streams)
if "duration" in fmt:
try:
info.duration_seconds = float(fmt["duration"])
@@ -73,10 +73,16 @@ def _parse(data: dict) -> MediaInfo:
for stream in streams:
codec_type = stream.get("codec_type")
if codec_type == "video" and info.video_codec is None:
info.video_codec = stream.get("codec_name")
info.width = stream.get("width")
info.height = stream.get("height")
if codec_type == "video":
info.video_tracks.append(
VideoTrack(
index=stream.get("index", len(info.video_tracks)),
codec=stream.get("codec_name"),
width=stream.get("width"),
height=stream.get("height"),
is_default=stream.get("disposition", {}).get("default", 0) == 1,
)
)
elif codec_type == "audio":
info.audio_tracks.append(
+2 -10
View File
@@ -75,11 +75,7 @@ class MediaOrganizer:
show_dir = self.tvshow_folder / show_folder_name
# Create season folder
season = Season(
show_imdb_id=show.imdb_id,
season_number=episode.season_number,
episode_count=0, # Not needed for folder name
)
season = Season(season_number=episode.season_number)
season_folder_name = season.get_folder_name()
season_dir = show_dir / season_folder_name
@@ -126,11 +122,7 @@ class MediaOrganizer:
show_folder_name = show.get_folder_name()
show_dir = self.tvshow_folder / show_folder_name
season = Season(
show_imdb_id=show.imdb_id,
season_number=SeasonNumber(season_number),
episode_count=0,
)
season = Season(season_number=SeasonNumber(season_number))
season_folder_name = season.get_folder_name()
season_dir = show_dir / season_folder_name
@@ -1,11 +1,8 @@
"""JSON-based repository implementations."""
"""Placeholder package — previously held JSON-based repository implementations.
from .movie_repository import JsonMovieRepository
from .subtitle_repository import JsonSubtitleRepository
from .tvshow_repository import JsonTVShowRepository
__all__ = [
"JsonMovieRepository",
"JsonTVShowRepository",
"JsonSubtitleRepository",
]
The Json{Movie,TVShow,Subtitle}Repository classes were removed during the
test-week cleanup: they had no live callers, the subtitle variant had broken
imports, and the live code paths in agent/application use the memory-backed
``LongTermMemory.library`` directly. Keep this empty package so the namespace
remains importable if anything stale references ``alfred.infrastructure.persistence.json``.
"""
@@ -1,144 +0,0 @@
"""JSON-based movie repository implementation."""
import logging
from datetime import datetime
from typing import Any
from alfred.domain.movies.entities import Movie
from alfred.domain.movies.repositories import MovieRepository
from alfred.domain.movies.value_objects import MovieTitle, Quality, ReleaseYear
from alfred.domain.shared.value_objects import FilePath, FileSize, ImdbId
from alfred.infrastructure.persistence import get_memory
logger = logging.getLogger(__name__)
class JsonMovieRepository(MovieRepository):
"""
JSON-based implementation of MovieRepository.
Stores movies in the LTM library using the memory context.
"""
def save(self, movie: Movie) -> None:
"""
Save a movie to the repository.
Updates existing movie if IMDb ID matches.
Args:
movie: Movie entity to save.
"""
memory = get_memory()
movies = memory.ltm.library.get("movies", [])
# Remove existing movie with same IMDb ID
movies = [m for m in movies if m.get("imdb_id") != str(movie.imdb_id)]
movies.append(self._to_dict(movie))
memory.ltm.library["movies"] = movies
memory.save()
logger.debug(f"Saved movie: {movie.imdb_id}")
def find_by_imdb_id(self, imdb_id: ImdbId) -> Movie | None:
"""
Find a movie by its IMDb ID.
Args:
imdb_id: IMDb ID to search for.
Returns:
Movie if found, None otherwise.
"""
memory = get_memory()
movies = memory.ltm.library.get("movies", [])
for movie_dict in movies:
if movie_dict.get("imdb_id") == str(imdb_id):
return self._from_dict(movie_dict)
return None
def find_all(self) -> list[Movie]:
"""
Get all movies in the repository.
Returns:
List of all Movie entities.
"""
memory = get_memory()
movies_dict = memory.ltm.library.get("movies", [])
return [self._from_dict(m) for m in movies_dict]
def delete(self, imdb_id: ImdbId) -> bool:
"""
Delete a movie from the repository.
Args:
imdb_id: IMDb ID of movie to delete.
Returns:
True if deleted, False if not found.
"""
memory = get_memory()
movies = memory.ltm.library.get("movies", [])
initial_count = len(movies)
movies = [m for m in movies if m.get("imdb_id") != str(imdb_id)]
if len(movies) < initial_count:
memory.ltm.library["movies"] = movies
memory.save()
logger.debug(f"Deleted movie: {imdb_id}")
return True
return False
def exists(self, imdb_id: ImdbId) -> bool:
"""
Check if a movie exists in the repository.
Args:
imdb_id: IMDb ID to check.
Returns:
True if exists, False otherwise.
"""
return self.find_by_imdb_id(imdb_id) is not None
def _to_dict(self, movie: Movie) -> dict[str, Any]:
"""Convert Movie entity to dict for storage."""
return {
"imdb_id": str(movie.imdb_id),
"title": movie.title.value,
"release_year": movie.release_year.value if movie.release_year else None,
"quality": movie.quality.value,
"file_path": str(movie.file_path) if movie.file_path else None,
"file_size": movie.file_size.bytes if movie.file_size else None,
"tmdb_id": movie.tmdb_id,
"added_at": movie.added_at.isoformat(),
}
def _from_dict(self, data: dict[str, Any]) -> Movie:
"""Convert dict from storage to Movie entity."""
# Parse quality string to enum
quality_str = data.get("quality", "unknown")
quality = Quality.from_string(quality_str)
return Movie(
imdb_id=ImdbId(data["imdb_id"]),
title=MovieTitle(data["title"]),
release_year=(
ReleaseYear(data["release_year"]) if data.get("release_year") else None
),
quality=quality,
file_path=FilePath(data["file_path"]) if data.get("file_path") else None,
file_size=FileSize(data["file_size"]) if data.get("file_size") else None,
tmdb_id=data.get("tmdb_id"),
added_at=(
datetime.fromisoformat(data["added_at"])
if data.get("added_at")
else datetime.now()
),
)
@@ -1,136 +0,0 @@
"""JSON-based subtitle repository implementation."""
import logging
from typing import Any
from alfred.domain.shared.value_objects import FilePath, ImdbId
from alfred.domain.subtitles.entities import Subtitle
from alfred.domain.subtitles.repositories import SubtitleRepository
from alfred.domain.subtitles.value_objects import Language, SubtitleFormat, TimingOffset
from alfred.infrastructure.persistence import get_memory
logger = logging.getLogger(__name__)
class JsonSubtitleRepository(SubtitleRepository):
"""
JSON-based implementation of SubtitleRepository.
Stores subtitles in the LTM library using the memory context.
"""
def save(self, subtitle: Subtitle) -> None:
"""
Save a subtitle to the repository.
Multiple subtitles can exist for the same media.
Args:
subtitle: Subtitle entity to save.
"""
memory = get_memory()
subtitles = memory.ltm.library.get("subtitles", [])
subtitles.append(self._to_dict(subtitle))
if "subtitles" not in memory.ltm.library:
memory.ltm.library["subtitles"] = []
memory.ltm.library["subtitles"] = subtitles
memory.save()
logger.debug(f"Saved subtitle for: {subtitle.media_imdb_id}")
def find_by_media(
self,
media_imdb_id: ImdbId,
language: Language | None = None,
season: int | None = None,
episode: int | None = None,
) -> list[Subtitle]:
"""
Find subtitles for a media item.
Args:
media_imdb_id: IMDb ID of the media.
language: Optional language filter.
season: Optional season number filter.
episode: Optional episode number filter.
Returns:
List of matching Subtitle entities.
"""
memory = get_memory()
subtitles = memory.ltm.library.get("subtitles", [])
results = []
for sub_dict in subtitles:
if sub_dict.get("media_imdb_id") != str(media_imdb_id):
continue
if language and sub_dict.get("language") != language.value:
continue
if season is not None and sub_dict.get("season_number") != season:
continue
if episode is not None and sub_dict.get("episode_number") != episode:
continue
results.append(self._from_dict(sub_dict))
return results
def delete(self, subtitle: Subtitle) -> bool:
"""
Delete a subtitle from the repository.
Matches by file path.
Args:
subtitle: Subtitle entity to delete.
Returns:
True if deleted, False if not found.
"""
memory = get_memory()
subtitles = memory.ltm.library.get("subtitles", [])
initial_count = len(subtitles)
subtitles = [
s for s in subtitles if s.get("file_path") != str(subtitle.file_path)
]
if len(subtitles) < initial_count:
memory.ltm.library["subtitles"] = subtitles
memory.save()
logger.debug(f"Deleted subtitle: {subtitle.file_path}")
return True
return False
def _to_dict(self, subtitle: Subtitle) -> dict[str, Any]:
"""Convert Subtitle entity to dict for storage."""
return {
"media_imdb_id": str(subtitle.media_imdb_id),
"language": subtitle.language.value,
"format": subtitle.format.value,
"file_path": str(subtitle.file_path),
"season_number": subtitle.season_number,
"episode_number": subtitle.episode_number,
"timing_offset": subtitle.timing_offset.milliseconds,
"hearing_impaired": subtitle.hearing_impaired,
"forced": subtitle.forced,
}
def _from_dict(self, data: dict[str, Any]) -> Subtitle:
"""Convert dict from storage to Subtitle entity."""
return Subtitle(
media_imdb_id=ImdbId(data["media_imdb_id"]),
language=Language.from_code(data["language"]),
format=SubtitleFormat.from_extension(data["format"]),
file_path=FilePath(data["file_path"]),
season_number=data.get("season_number"),
episode_number=data.get("episode_number"),
timing_offset=TimingOffset(data.get("timing_offset", 0)),
hearing_impaired=data.get("hearing_impaired", False),
forced=data.get("forced", False),
)
@@ -1,127 +0,0 @@
"""JSON-based TV show repository implementation."""
import logging
from typing import Any
from alfred.domain.shared.value_objects import ImdbId
from alfred.domain.tv_shows.entities import TVShow
from alfred.domain.tv_shows.repositories import TVShowRepository
from alfred.domain.tv_shows.value_objects import ShowStatus
from alfred.infrastructure.persistence import get_memory
logger = logging.getLogger(__name__)
class JsonTVShowRepository(TVShowRepository):
"""
JSON-based implementation of TVShowRepository.
Stores TV shows in the LTM library using the memory context.
"""
def save(self, show: TVShow) -> None:
"""
Save a TV show to the repository.
Updates existing show if IMDb ID matches.
Args:
show: TVShow entity to save.
"""
memory = get_memory()
shows = memory.ltm.library.get("tv_shows", [])
# Remove existing show with same IMDb ID
shows = [s for s in shows if s.get("imdb_id") != str(show.imdb_id)]
shows.append(self._to_dict(show))
memory.ltm.library["tv_shows"] = shows
memory.save()
logger.debug(f"Saved TV show: {show.imdb_id}")
def find_by_imdb_id(self, imdb_id: ImdbId) -> TVShow | None:
"""
Find a TV show by its IMDb ID.
Args:
imdb_id: IMDb ID to search for.
Returns:
TVShow if found, None otherwise.
"""
memory = get_memory()
shows = memory.ltm.library.get("tv_shows", [])
for show_dict in shows:
if show_dict.get("imdb_id") == str(imdb_id):
return self._from_dict(show_dict)
return None
def find_all(self) -> list[TVShow]:
"""
Get all TV shows in the repository.
Returns:
List of all TVShow entities.
"""
memory = get_memory()
shows_dict = memory.ltm.library.get("tv_shows", [])
return [self._from_dict(s) for s in shows_dict]
def delete(self, imdb_id: ImdbId) -> bool:
"""
Delete a TV show from the repository.
Args:
imdb_id: IMDb ID of show to delete.
Returns:
True if deleted, False if not found.
"""
memory = get_memory()
shows = memory.ltm.library.get("tv_shows", [])
initial_count = len(shows)
shows = [s for s in shows if s.get("imdb_id") != str(imdb_id)]
if len(shows) < initial_count:
memory.ltm.library["tv_shows"] = shows
memory.save()
logger.debug(f"Deleted TV show: {imdb_id}")
return True
return False
def exists(self, imdb_id: ImdbId) -> bool:
"""
Check if a TV show exists in the repository.
Args:
imdb_id: IMDb ID to check.
Returns:
True if exists, False otherwise.
"""
return self.find_by_imdb_id(imdb_id) is not None
def _to_dict(self, show: TVShow) -> dict[str, Any]:
"""Convert TVShow entity to dict for storage."""
return {
"imdb_id": str(show.imdb_id),
"title": show.title,
"seasons_count": show.seasons_count,
"status": show.status.value,
"tmdb_id": show.tmdb_id,
}
def _from_dict(self, data: dict[str, Any]) -> TVShow:
"""Convert dict from storage to TVShow entity."""
return TVShow(
imdb_id=ImdbId(data["imdb_id"]),
title=data["title"],
seasons_count=data["seasons_count"],
status=ShowStatus.from_string(data["status"]),
tmdb_id=data.get("tmdb_id"),
)
@@ -62,7 +62,8 @@ class Memory:
return {
"workspace": self.ltm.workspace.as_dict(),
"library_paths": self.ltm.library_paths.to_dict(),
"preferences": self.ltm.preferences.to_dict(),
"media_preferences": self.ltm.media_preferences.to_dict(),
"subtitle_preferences": self.ltm.subtitle_preferences.to_dict(),
"current_workflow": self.stm.workflow.to_dict(),
"current_topic": self.stm.entities.topic,
"extracted_entities": self.stm.entities.data,
@@ -13,17 +13,17 @@ class SubtitlePreferences:
can override them via .alfred/rules.yaml.
Naming convention used when placing subtitle files alongside a video:
{lang}.srt → standard track (e.g. fr.srt, en.srt)
{lang}.srt → standard track (e.g. fre.srt, eng.srt)
{lang}.sdh.srt → SDH / hearing-impaired track
{lang}.forced.srt → forced track (foreign lines only)
Fields mirror SubtitleRuleSet.override() parameters:
- languages: ordered list of ISO 639-1 codes to keep (others ignored)
- languages: ordered list of ISO 639-2/B codes to keep (others ignored)
- formats: list of subtitle formats to keep (e.g. ["srt", "ass"])
- types: list of subtitle types to keep (e.g. ["standard", "forced", "sdh"])
"""
languages: list[str] = field(default_factory=lambda: ["fr", "en"])
languages: list[str] = field(default_factory=lambda: ["fre", "eng"])
formats: list[str] = field(default_factory=lambda: ["srt", "ass"])
types: list[str] = field(default_factory=lambda: ["standard", "forced", "sdh"])
@@ -47,8 +47,8 @@ class SubtitlePreferences:
),
"fields": {
"languages": (
"Ordered list of subtitle languages to keep (ISO 639-1). "
"Others are ignored. First = most preferred."
"Ordered list of subtitle languages to keep (ISO 639-2/B, "
"e.g. 'fre', 'eng'). Others are ignored. First = most preferred."
),
"formats": (
"List of subtitle formats to keep, e.g. ['srt', 'ass']. "
@@ -65,7 +65,7 @@ class SubtitlePreferences:
def from_dict(cls, data: dict) -> SubtitlePreferences:
# Migration: old fields (min_size_kb, keep_sdh, keep_forced, link_subs_folder) are silently dropped
prefs = cls(
languages=data.get("languages", ["fr", "en"]),
languages=data.get("languages", ["fre", "eng"]),
formats=data.get("formats", ["srt", "ass"]),
types=data.get("types", ["standard", "forced", "sdh"]),
)
@@ -64,6 +64,4 @@ class ToolResultsCache:
def to_dict(self) -> dict:
# Surface only the index (tool + keys), not the payloads — payloads
# can be large and the prompt only needs to know what's available.
return {
tool: list(bucket.keys()) for tool, bucket in self.results.items()
}
return {tool: list(bucket.keys()) for tool, bucket in self.results.items()}
@@ -13,7 +13,7 @@ from datetime import UTC, datetime
from pathlib import Path
from typing import Any
from alfred.domain.subtitles.entities import SubtitleTrack
from alfred.domain.subtitles.entities import SubtitleCandidate
from alfred.domain.subtitles.services.placer import PlacedTrack
from alfred.infrastructure.metadata.store import MetadataStore
@@ -25,7 +25,7 @@ class SubtitleMetadataStore:
Subtitle-pipeline view of the per-release `.alfred/metadata.yaml`.
Backed by a generic MetadataStore; this class only knows how to build
a subtitle_history entry from PlacedTrack/SubtitleTrack pairs.
a subtitle_history entry from PlacedTrack/SubtitleCandidate pairs.
"""
def __init__(self, library_root: Path):
@@ -45,7 +45,7 @@ class SubtitleMetadataStore:
def append_history(
self,
placed_pairs: list[tuple[PlacedTrack, SubtitleTrack]],
placed_pairs: list[tuple[PlacedTrack, SubtitleCandidate]],
season: int | None = None,
episode: int | None = None,
release_group: str | None = None,
@@ -83,9 +83,7 @@ class SubtitleMetadataStore:
entry["episode"] = episode
self._store.append_subtitle_history_entry(entry)
marker = (
f"S{season:02d}E{episode:02d}" if season and episode else "movie"
)
marker = f"S{season:02d}E{episode:02d}" if season and episode else "movie"
logger.info(
f"SubtitleMetadataStore: appended history "
f"({marker}) — {len(tracks_data)} track(s)"
+220
View File
@@ -0,0 +1,220 @@
name: iso_languages
version: "1.0"
description: >
Canonical language table. The primary key is the ISO 639-2/B code (3 letters,
bibliographic form), which is what ffprobe emits and is the project-wide
canonical form. Aliases include the ISO 639-1 code, the ISO 639-2/T
(terminologic) variant when it differs, english/native names, and any common
spelling encountered in release names or filesystems.
Lookups are case-insensitive and operate on the union of {iso, aliases}.
languages:
fre:
english_name: French
native_name: Français
aliases: [fr, fra, french, francais]
eng:
english_name: English
native_name: English
aliases: [en, english]
spa:
english_name: Spanish
native_name: Español
aliases: [es, spanish, espanol, español, castellano]
ger:
english_name: German
native_name: Deutsch
aliases: [de, deu, german, deutsch]
ita:
english_name: Italian
native_name: Italiano
aliases: [it, italian, italiano]
por:
english_name: Portuguese
native_name: Português
aliases: [pt, portuguese, portugues, português, brazilian, brasileiro]
dut:
english_name: Dutch
native_name: Nederlands
aliases: [nl, nld, dutch, nederlands]
nor:
english_name: Norwegian
native_name: Norsk
aliases: [no, norwegian, norsk]
swe:
english_name: Swedish
native_name: Svenska
aliases: [sv, swedish, svenska]
dan:
english_name: Danish
native_name: Dansk
aliases: [da, danish, dansk]
fin:
english_name: Finnish
native_name: Suomi
aliases: [fi, finnish, suomi]
pol:
english_name: Polish
native_name: Polski
aliases: [pl, polish, polski]
cze:
english_name: Czech
native_name: Čeština
aliases: [cs, ces, czech, cestina, čeština]
slo:
english_name: Slovak
native_name: Slovenčina
aliases: [sk, slk, slovak, slovencina, slovenčina]
hun:
english_name: Hungarian
native_name: Magyar
aliases: [hu, hungarian, magyar]
rum:
english_name: Romanian
native_name: Română
aliases: [ro, ron, romanian, romana, română]
bul:
english_name: Bulgarian
native_name: Български
aliases: [bg, bulgarian, български]
hrv:
english_name: Croatian
native_name: Hrvatski
aliases: [hr, croatian, hrvatski]
srp:
english_name: Serbian
native_name: Srpski
aliases: [sr, serbian, srpski, српски]
slv:
english_name: Slovenian
native_name: Slovenščina
aliases: [sl, slovenian, slovensko, slovenščina]
est:
english_name: Estonian
native_name: Eesti
aliases: [et, estonian, eesti]
lav:
english_name: Latvian
native_name: Latviešu
aliases: [lv, latvian, latviesu, latviešu]
lit:
english_name: Lithuanian
native_name: Lietuvių
aliases: [lt, lithuanian, lietuviu, lietuvių]
mac:
english_name: Macedonian
native_name: Македонски
aliases: [mk, mkd, macedonian, македонски]
jpn:
english_name: Japanese
native_name: 日本語
aliases: [ja, japanese, 日本語]
chi:
english_name: Chinese
native_name: 中文
aliases: [zh, zho, chinese, simplified, traditional, mandarin, 中文]
yue:
english_name: Cantonese
native_name: 粵語
aliases: [cantonese, 粵語, 粤语]
kor:
english_name: Korean
native_name: 한국어
aliases: [ko, korean, 한국어]
ara:
english_name: Arabic
native_name: العربية
aliases: [ar, arabic, العربية]
tur:
english_name: Turkish
native_name: Türkçe
aliases: [tr, turkish, turkce, türkçe]
gre:
english_name: Greek
native_name: Ελληνικά
aliases: [el, ell, greek, ελληνικά]
ind:
english_name: Indonesian
native_name: Bahasa Indonesia
aliases: [id, indonesian, bahasa]
may:
english_name: Malay
native_name: Bahasa Melayu
aliases: [ms, msa, malay, melayu]
rus:
english_name: Russian
native_name: Русский
aliases: [ru, russian, русский]
vie:
english_name: Vietnamese
native_name: Tiếng Việt
aliases: [vi, vietnamese, tiếng việt]
heb:
english_name: Hebrew
native_name: עברית
aliases: [he, hebrew, עברית]
tam:
english_name: Tamil
native_name: தமிழ்
aliases: [ta, tamil, தமிழ்]
tel:
english_name: Telugu
native_name: తెలుగు
aliases: [te, telugu, తెలుగు]
tha:
english_name: Thai
native_name: ไทย
aliases: [th, thai, ไทย]
hin:
english_name: Hindi
native_name: हिन्दी
aliases: [hi, hindi, हिन्दी]
ukr:
english_name: Ukrainian
native_name: Українська
aliases: [uk, ukrainian, українська]
und:
english_name: Undetermined
native_name: Undetermined
aliases: [unknown, unk]
+23
View File
@@ -0,0 +1,23 @@
# Token separators encountered in release names.
#
# Used by parse_release() to tokenize a release name into atomic tokens before
# applying token-level matchers (resolutions, codecs, languages, season/episode
# markers, etc.).
#
# Why a YAML and not hardcoded:
# - Different scene/p2p/site conventions evolve over time (brackets from YTS,
# parens from some retro packs, underscores from older releases).
# - Lets us extend without code change when a new convention shows up.
#
# Caveats:
# - "." is always present because it's the canonical scene separator. Removing
# it would break ~everything.
# - Order does not matter — they are merged into a regex character class.
separators:
- "." # canonical scene form: Show.S01E01.1080p
- " " # human-friendly form: The Father (2020) 1080p
- "[" # bracket-prefixed/embedded: [1080p] [WEBRip] [YTS.MX]
- "]"
- "(" # parenthesis-embedded (year, edition): (2020) (Director's Cut)
- ")"
- "_" # underscore-as-space (old usenet, some Asian releases)
+20 -87
View File
@@ -1,9 +1,13 @@
name: subtitles
version: "1.0"
description: "Subtitle classification rules — formats, types, languages and their tokens"
version: "1.1"
description: >
Subtitle classification rules — formats, types and subtitle-specific language
tokens (those that don't belong to the canonical iso_languages table, e.g.
VOSTFR, VF, VFF). General-purpose language lookup is delegated to
LanguageRegistry (alfred/knowledge/iso_languages.yaml).
defaults:
languages: ["fra", "eng"]
languages: ["fre", "eng"]
formats: ["srt"]
types: ["standard", "forced", "sdh"]
format_priority: ["srt", "ass"]
@@ -16,98 +20,27 @@ formats:
ass:
extensions: [".ass", ".ssa"]
description: "Advanced SubStation Alpha — with styles and positioning"
vtt:
extensions: [".vtt"]
description: "WebVTT — web video text tracks"
sub:
extensions: [".sub"]
description: "MicroDVD / SubViewer plain text"
types:
standard:
tokens: []
description: "Normal subtitle track"
sdh:
tokens: ["sdh", "hi", "cc", "hearing"]
tokens: ["sdh", "cc", "hearing"]
description: "Hearing-impaired — includes sound effects and speaker labels"
forced:
tokens: ["forced", "foreign"]
description: "Foreign lines only — e.g. alien speech in an otherwise English film"
languages:
fra:
tokens: ["fr", "fra", "fre", "french", "francais", "vf", "vff", "vostfr"]
eng:
tokens: ["en", "eng", "english"]
spa:
tokens: ["es", "spa", "spanish", "espanol", "español"]
deu:
tokens: ["de", "deu", "ger", "german", "deutsch"]
ita:
tokens: ["it", "ita", "italian", "italiano"]
por:
tokens: ["pt", "por", "portuguese", "portugues", "português"]
nld:
tokens: ["nl", "nld", "dut", "dutch", "nederlands"]
nor:
tokens: ["no", "nor", "norwegian", "norsk"]
swe:
tokens: ["sv", "swe", "swedish", "svenska"]
dan:
tokens: ["da", "dan", "danish", "dansk"]
fin:
tokens: ["fi", "fin", "finnish", "suomi"]
pol:
tokens: ["pl", "pol", "polish", "polski"]
ces:
tokens: ["cs", "ces", "cze", "czech"]
slk:
tokens: ["sk", "slk", "slo", "slovak"]
hun:
tokens: ["hu", "hun", "hungarian", "magyar"]
ron:
tokens: ["ro", "ron", "rum", "romanian", "romana", "română"]
bul:
tokens: ["bg", "bul", "bulgarian"]
hrv:
tokens: ["hr", "hrv", "croatian", "hrvatski"]
srp:
tokens: ["sr", "srp", "serbian", "srpski"]
slv:
tokens: ["sl", "slv", "slovenian", "slovensko"]
est:
tokens: ["et", "est", "estonian", "eesti"]
lav:
tokens: ["lv", "lav", "latvian", "latviesu"]
lit:
tokens: ["lt", "lit", "lithuanian", "lietuviu"]
mkd:
tokens: ["mk", "mkd", "mac", "macedonian"]
jpn:
tokens: ["ja", "jpn", "japanese"]
zho:
tokens: ["zh", "zho", "chi", "chinese", "simplified", "traditional"]
yue:
tokens: ["yue", "cantonese"]
kor:
tokens: ["ko", "kor", "korean"]
ara:
tokens: ["ar", "ara", "arabic"]
tur:
tokens: ["tr", "tur", "turkish"]
ell:
tokens: ["el", "ell", "gre", "greek"]
ind:
tokens: ["id", "ind", "indonesian"]
msa:
tokens: ["ms", "msa", "may", "malay", "malayalam"]
rus:
tokens: ["ru", "rus", "russian"]
vie:
tokens: ["vi", "vie", "vietnamese"]
heb:
tokens: ["he", "heb", "hebrew"]
tam:
tokens: ["ta", "tam", "tamil"]
tel:
tokens: ["te", "tel", "telugu"]
tha:
tokens: ["th", "tha", "thai"]
hin:
tokens: ["hi", "hin", "hindi"]
ukr:
tokens: ["uk", "ukr", "ukrainian"]
# Subtitle-specific language tokens that do NOT belong to the canonical
# iso_languages table. These are conventions found in release names that map
# to an ISO 639-2/B code but are too narrow (or too contextual) to live in the
# project-wide language registry.
language_tokens:
fre: ["vf", "vff", "vostfr"]
+15
View File
@@ -37,6 +37,12 @@ class Settings(BaseSettings):
llm_temperature: float = 0.2
data_storage_dir: str = "data"
# --- MEDIA ---
# Minimum file size to consider a video file as a real movie (in bytes).
# 100 MB is generous enough to skip sample clips / trailers without rejecting
# legitimate low-bitrate releases (e.g. older anime, certain web rips).
min_movie_size_bytes: int = 100 * 1024 * 1024
# --- BUILD ---
alfred_version: str | None = None
@@ -84,6 +90,15 @@ class Settings(BaseSettings):
)
return v
@field_validator("min_movie_size_bytes")
@classmethod
def validate_min_movie_size(cls, v: int) -> int:
if v < 0:
raise ConfigurationError(
f"min_movie_size_bytes must be non-negative, got {v}"
)
return v
@field_validator("request_timeout")
@classmethod
def validate_timeout(cls, v: int) -> int:
+277
View File
@@ -0,0 +1,277 @@
"""Tests for ``alfred.agent.llm.deepseek.DeepSeekClient``.
Thin wrapper around DeepSeek's OpenAI-compatible ``/v1/chat/completions``
endpoint. The client validates message shape, POSTs JSON with bearer auth,
and translates ``requests`` exceptions into ``LLMAPIError``.
Coverage:
- ``TestInit`` — explicit args win over settings; missing api_key / base_url
raise ``LLMConfigurationError``.
- ``TestCompleteValidation`` — empty list, non-dict element, missing role,
unknown role, missing content all raise ``ValueError``.
- ``TestCompleteHappyPath`` — POSTs to correct URL with bearer header,
returns ``choices[0].message`` verbatim, threads ``tools`` into payload.
- ``TestCompleteErrors`` — Timeout, HTTPError (with/without JSON body),
RequestException, malformed response (missing ``choices`` / ``message``,
``TypeError`` from parsing) are all wrapped as ``LLMAPIError``.
"""
from __future__ import annotations
from unittest.mock import MagicMock, patch
import pytest
from requests.exceptions import HTTPError, RequestException, Timeout
from alfred.agent.llm.deepseek import DeepSeekClient
from alfred.agent.llm.exceptions import LLMAPIError, LLMConfigurationError
from alfred.settings import Settings
def _settings(**overrides) -> Settings:
base = {
"deepseek_api_key": "test-key",
"deepseek_base_url": "https://api.deepseek.test",
"deepseek_model": "deepseek-chat",
"request_timeout": 30,
"llm_temperature": 0.2,
}
base.update(overrides)
return Settings(**base)
# --------------------------------------------------------------------------- #
# Init #
# --------------------------------------------------------------------------- #
class TestInit:
def test_defaults_from_settings(self):
s = _settings()
c = DeepSeekClient(settings=s)
assert c.api_key == "test-key"
assert c.base_url == "https://api.deepseek.test"
assert c.model == "deepseek-chat"
assert c.timeout == 30
def test_explicit_args_override_settings(self):
s = _settings()
c = DeepSeekClient(
api_key="override-key",
base_url="https://other.example",
model="other-model",
timeout=99,
settings=s,
)
assert c.api_key == "override-key"
assert c.base_url == "https://other.example"
assert c.model == "other-model"
assert c.timeout == 99
def test_missing_api_key_raises(self):
s = _settings(deepseek_api_key=None)
with pytest.raises(LLMConfigurationError, match="API key"):
DeepSeekClient(settings=s)
def test_missing_base_url_raises(self):
s = _settings(deepseek_base_url="")
with pytest.raises(LLMConfigurationError, match="base URL"):
DeepSeekClient(settings=s)
# --------------------------------------------------------------------------- #
# complete — message validation #
# --------------------------------------------------------------------------- #
@pytest.fixture
def client():
return DeepSeekClient(settings=_settings())
class TestCompleteValidation:
def test_empty_messages_raises(self, client):
with pytest.raises(ValueError, match="empty"):
client.complete([])
def test_non_dict_element_raises(self, client):
with pytest.raises(ValueError, match="must be a dict"):
client.complete(["not a dict"]) # type: ignore[list-item]
def test_missing_role_raises(self, client):
with pytest.raises(ValueError, match="'role' key"):
client.complete([{"content": "hi"}])
def test_invalid_role_raises(self, client):
with pytest.raises(ValueError, match="Invalid role"):
client.complete([{"role": "robot", "content": "beep"}])
def test_missing_content_for_non_tool_role_raises(self, client):
with pytest.raises(ValueError, match="'content' key"):
client.complete([{"role": "user"}])
def test_tool_role_allowed_without_content(self, client):
# 'tool' role is exempt from the content requirement; this should not
# raise during validation. We patch out the network call to verify the
# validator passes through.
with patch("alfred.agent.llm.deepseek.requests.post") as mock_post:
mock_post.return_value = MagicMock(
raise_for_status=MagicMock(),
json=MagicMock(
return_value={
"choices": [{"message": {"role": "assistant", "content": "ok"}}]
}
),
)
out = client.complete(
[{"role": "tool", "tool_call_id": "abc", "name": "x"}]
)
assert out["content"] == "ok"
# --------------------------------------------------------------------------- #
# complete — happy path #
# --------------------------------------------------------------------------- #
class TestCompleteHappyPath:
def test_posts_to_correct_url_with_bearer(self, client):
with patch("alfred.agent.llm.deepseek.requests.post") as mock_post:
mock_post.return_value = MagicMock(
raise_for_status=MagicMock(),
json=MagicMock(
return_value={
"choices": [{"message": {"role": "assistant", "content": "hi"}}]
}
),
)
client.complete([{"role": "user", "content": "hello"}])
args, kwargs = mock_post.call_args
assert args[0] == "https://api.deepseek.test/v1/chat/completions"
assert kwargs["headers"]["Authorization"] == "Bearer test-key"
assert kwargs["headers"]["Content-Type"] == "application/json"
assert kwargs["timeout"] == 30
payload = kwargs["json"]
assert payload["model"] == "deepseek-chat"
assert payload["temperature"] == 0.2
assert payload["messages"] == [{"role": "user", "content": "hello"}]
assert "tools" not in payload
def test_returns_message_verbatim(self, client):
message = {
"role": "assistant",
"content": "answer",
"tool_calls": [{"id": "x", "type": "function"}],
}
with patch("alfred.agent.llm.deepseek.requests.post") as mock_post:
mock_post.return_value = MagicMock(
raise_for_status=MagicMock(),
json=MagicMock(return_value={"choices": [{"message": message}]}),
)
out = client.complete([{"role": "user", "content": "q"}])
assert out == message
def test_tools_threaded_into_payload(self, client):
tools = [{"type": "function", "function": {"name": "foo"}}]
with patch("alfred.agent.llm.deepseek.requests.post") as mock_post:
mock_post.return_value = MagicMock(
raise_for_status=MagicMock(),
json=MagicMock(
return_value={
"choices": [{"message": {"role": "assistant", "content": ""}}]
}
),
)
client.complete([{"role": "user", "content": "q"}], tools=tools)
payload = mock_post.call_args.kwargs["json"]
assert payload["tools"] == tools
# --------------------------------------------------------------------------- #
# complete — error translation #
# --------------------------------------------------------------------------- #
class TestCompleteErrors:
def test_timeout_wrapped(self, client):
with patch(
"alfred.agent.llm.deepseek.requests.post",
side_effect=Timeout("read timeout"),
):
with pytest.raises(LLMAPIError, match="timeout"):
client.complete([{"role": "user", "content": "q"}])
def test_http_error_with_json_body_extracts_message(self, client):
resp = MagicMock()
resp.json.return_value = {"error": {"message": "rate limited"}}
err = HTTPError("boom")
err.response = resp
post_resp = MagicMock(raise_for_status=MagicMock(side_effect=err))
with patch("alfred.agent.llm.deepseek.requests.post", return_value=post_resp):
with pytest.raises(LLMAPIError, match="rate limited"):
client.complete([{"role": "user", "content": "q"}])
def test_http_error_with_non_json_body_falls_back_to_str(self, client):
resp = MagicMock()
resp.json.side_effect = ValueError("not json")
err = HTTPError("boom 500")
err.response = resp
post_resp = MagicMock(raise_for_status=MagicMock(side_effect=err))
with patch("alfred.agent.llm.deepseek.requests.post", return_value=post_resp):
with pytest.raises(LLMAPIError, match="DeepSeek API error"):
client.complete([{"role": "user", "content": "q"}])
def test_http_error_without_response(self, client):
err = HTTPError("boom")
err.response = None
post_resp = MagicMock(raise_for_status=MagicMock(side_effect=err))
with patch("alfred.agent.llm.deepseek.requests.post", return_value=post_resp):
with pytest.raises(LLMAPIError, match="HTTP error"):
client.complete([{"role": "user", "content": "q"}])
def test_request_exception_wrapped(self, client):
with patch(
"alfred.agent.llm.deepseek.requests.post",
side_effect=RequestException("conn refused"),
):
with pytest.raises(LLMAPIError, match="Failed to connect"):
client.complete([{"role": "user", "content": "q"}])
def test_missing_choices_raises(self, client):
with patch("alfred.agent.llm.deepseek.requests.post") as mock_post:
mock_post.return_value = MagicMock(
raise_for_status=MagicMock(),
json=MagicMock(return_value={}),
)
with pytest.raises(LLMAPIError, match="choices"):
client.complete([{"role": "user", "content": "q"}])
def test_empty_choices_raises(self, client):
with patch("alfred.agent.llm.deepseek.requests.post") as mock_post:
mock_post.return_value = MagicMock(
raise_for_status=MagicMock(),
json=MagicMock(return_value={"choices": []}),
)
with pytest.raises(LLMAPIError, match="choices"):
client.complete([{"role": "user", "content": "q"}])
def test_missing_message_in_choice_raises(self, client):
with patch("alfred.agent.llm.deepseek.requests.post") as mock_post:
mock_post.return_value = MagicMock(
raise_for_status=MagicMock(),
json=MagicMock(return_value={"choices": [{}]}),
)
with pytest.raises(LLMAPIError, match="message"):
client.complete([{"role": "user", "content": "q"}])
def test_malformed_response_typeerror_wrapped(self, client):
# If choices[0] is not subscriptable as a dict, a TypeError surfaces
# and is caught + wrapped.
with patch("alfred.agent.llm.deepseek.requests.post") as mock_post:
mock_post.return_value = MagicMock(
raise_for_status=MagicMock(),
json=MagicMock(return_value={"choices": ["not a dict"]}),
)
with pytest.raises(LLMAPIError, match="Invalid API response"):
client.complete([{"role": "user", "content": "q"}])
+296
View File
@@ -0,0 +1,296 @@
"""Tests for ``alfred.agent.llm.ollama.OllamaClient``.
Thin wrapper around Ollama's local ``/api/chat`` endpoint. The client
validates message shape, POSTs JSON without auth, and translates
``requests`` exceptions into ``LLMAPIError``.
Coverage:
- ``TestInit`` — explicit args win; missing base_url / model raise
``LLMConfigurationError``; temperature defaults from settings.
- ``TestCompleteValidation`` — same shape checks as DeepSeek (empty, bad
element, missing role, invalid role, missing content; tool role is
exempt).
- ``TestCompleteHappyPath`` — POSTs to ``/api/chat`` with proper payload
(no auth header), returns ``data.message`` verbatim, threads tools.
- ``TestCompleteErrors`` — Timeout, HTTPError (with/without JSON body),
RequestException, missing ``message`` field all wrapped as ``LLMAPIError``.
- ``TestListModels`` — happy path returns model names; failure returns ``[]``.
- ``TestIsAvailable`` — 200 → True; exception → False.
"""
from __future__ import annotations
from unittest.mock import MagicMock, patch
import pytest
from requests.exceptions import HTTPError, RequestException, Timeout
from alfred.agent.llm.exceptions import LLMAPIError, LLMConfigurationError
from alfred.agent.llm.ollama import OllamaClient
from alfred.settings import Settings
def _settings(**overrides) -> Settings:
base = {
"ollama_base_url": "http://ollama.test:11434",
"ollama_model": "llama3.3:latest",
"request_timeout": 30,
"llm_temperature": 0.3,
}
base.update(overrides)
return Settings(**base)
# --------------------------------------------------------------------------- #
# Init #
# --------------------------------------------------------------------------- #
class TestInit:
def test_defaults_from_settings(self):
c = OllamaClient(settings=_settings())
assert c.base_url == "http://ollama.test:11434"
assert c.model == "llama3.3:latest"
assert c.timeout == 30
assert c.temperature == 0.3
def test_explicit_args_override(self):
c = OllamaClient(
base_url="http://other:9999",
model="mistral",
timeout=120,
temperature=0.0,
settings=_settings(),
)
assert c.base_url == "http://other:9999"
assert c.model == "mistral"
assert c.timeout == 120
assert c.temperature == 0.0
def test_zero_temperature_explicit_respected(self):
# 0.0 is falsy; the implementation guards against this with a
# ``is not None`` check.
c = OllamaClient(temperature=0.0, settings=_settings())
assert c.temperature == 0.0
def test_missing_base_url_raises(self):
with pytest.raises(LLMConfigurationError, match="base URL"):
OllamaClient(settings=_settings(ollama_base_url=""))
def test_missing_model_raises(self):
with pytest.raises(LLMConfigurationError, match="model"):
OllamaClient(settings=_settings(ollama_model=""))
# --------------------------------------------------------------------------- #
# complete — message validation #
# --------------------------------------------------------------------------- #
@pytest.fixture
def client():
return OllamaClient(settings=_settings())
class TestCompleteValidation:
def test_empty_messages_raises(self, client):
with pytest.raises(ValueError, match="empty"):
client.complete([])
def test_non_dict_element_raises(self, client):
with pytest.raises(ValueError, match="must be a dict"):
client.complete(["nope"]) # type: ignore[list-item]
def test_missing_role_raises(self, client):
with pytest.raises(ValueError, match="'role' key"):
client.complete([{"content": "hi"}])
def test_invalid_role_raises(self, client):
with pytest.raises(ValueError, match="Invalid role"):
client.complete([{"role": "bogus", "content": "x"}])
def test_missing_content_for_non_tool_role_raises(self, client):
with pytest.raises(ValueError, match="'content' key"):
client.complete([{"role": "assistant"}])
def test_tool_role_allowed_without_content(self, client):
with patch("alfred.agent.llm.ollama.requests.post") as mock_post:
mock_post.return_value = MagicMock(
raise_for_status=MagicMock(),
json=MagicMock(
return_value={"message": {"role": "assistant", "content": "ok"}}
),
)
out = client.complete([{"role": "tool", "tool_call_id": "a"}])
assert out["content"] == "ok"
# --------------------------------------------------------------------------- #
# complete — happy path #
# --------------------------------------------------------------------------- #
class TestCompleteHappyPath:
def test_posts_to_api_chat_with_payload(self, client):
with patch("alfred.agent.llm.ollama.requests.post") as mock_post:
mock_post.return_value = MagicMock(
raise_for_status=MagicMock(),
json=MagicMock(
return_value={"message": {"role": "assistant", "content": "hi"}}
),
)
client.complete([{"role": "user", "content": "hello"}])
args, kwargs = mock_post.call_args
assert args[0] == "http://ollama.test:11434/api/chat"
assert kwargs["timeout"] == 30
payload = kwargs["json"]
assert payload["model"] == "llama3.3:latest"
assert payload["stream"] is False
assert payload["options"] == {"temperature": 0.3}
assert payload["messages"] == [{"role": "user", "content": "hello"}]
assert "tools" not in payload
# No Authorization header — Ollama is unauthenticated locally.
assert "headers" not in kwargs or "Authorization" not in (
kwargs.get("headers") or {}
)
def test_returns_message_verbatim(self, client):
message = {"role": "assistant", "content": "answer"}
with patch("alfred.agent.llm.ollama.requests.post") as mock_post:
mock_post.return_value = MagicMock(
raise_for_status=MagicMock(),
json=MagicMock(return_value={"message": message}),
)
out = client.complete([{"role": "user", "content": "q"}])
assert out == message
def test_tools_threaded_into_payload(self, client):
tools = [{"type": "function", "function": {"name": "x"}}]
with patch("alfred.agent.llm.ollama.requests.post") as mock_post:
mock_post.return_value = MagicMock(
raise_for_status=MagicMock(),
json=MagicMock(
return_value={"message": {"role": "assistant", "content": ""}}
),
)
client.complete([{"role": "user", "content": "q"}], tools=tools)
assert mock_post.call_args.kwargs["json"]["tools"] == tools
# --------------------------------------------------------------------------- #
# complete — errors #
# --------------------------------------------------------------------------- #
class TestCompleteErrors:
def test_timeout_wrapped(self, client):
with patch("alfred.agent.llm.ollama.requests.post", side_effect=Timeout("t")):
with pytest.raises(LLMAPIError, match="timeout"):
client.complete([{"role": "user", "content": "q"}])
def test_http_error_with_json_body(self, client):
resp = MagicMock()
resp.json.return_value = {"error": "model not found"}
err = HTTPError("404")
err.response = resp
post_resp = MagicMock(raise_for_status=MagicMock(side_effect=err))
with patch("alfred.agent.llm.ollama.requests.post", return_value=post_resp):
with pytest.raises(LLMAPIError, match="model not found"):
client.complete([{"role": "user", "content": "q"}])
def test_http_error_with_non_json_body(self, client):
resp = MagicMock()
resp.json.side_effect = ValueError("not json")
err = HTTPError("boom")
err.response = resp
post_resp = MagicMock(raise_for_status=MagicMock(side_effect=err))
with patch("alfred.agent.llm.ollama.requests.post", return_value=post_resp):
with pytest.raises(LLMAPIError, match="Ollama API error"):
client.complete([{"role": "user", "content": "q"}])
def test_http_error_without_response(self, client):
err = HTTPError("boom")
err.response = None
post_resp = MagicMock(raise_for_status=MagicMock(side_effect=err))
with patch("alfred.agent.llm.ollama.requests.post", return_value=post_resp):
with pytest.raises(LLMAPIError, match="HTTP error"):
client.complete([{"role": "user", "content": "q"}])
def test_request_exception_wrapped(self, client):
with patch(
"alfred.agent.llm.ollama.requests.post",
side_effect=RequestException("conn refused"),
):
with pytest.raises(LLMAPIError, match="Failed to connect"):
client.complete([{"role": "user", "content": "q"}])
def test_missing_message_field_raises(self, client):
with patch("alfred.agent.llm.ollama.requests.post") as mock_post:
mock_post.return_value = MagicMock(
raise_for_status=MagicMock(),
json=MagicMock(return_value={}),
)
with pytest.raises(LLMAPIError, match="missing 'message'"):
client.complete([{"role": "user", "content": "q"}])
# --------------------------------------------------------------------------- #
# list_models #
# --------------------------------------------------------------------------- #
class TestListModels:
def test_returns_model_names(self, client):
with patch("alfred.agent.llm.ollama.requests.get") as mock_get:
mock_get.return_value = MagicMock(
raise_for_status=MagicMock(),
json=MagicMock(
return_value={
"models": [
{"name": "llama3.3:latest"},
{"name": "mistral:7b"},
]
}
),
)
assert client.list_models() == ["llama3.3:latest", "mistral:7b"]
def test_no_models_returns_empty(self, client):
with patch("alfred.agent.llm.ollama.requests.get") as mock_get:
mock_get.return_value = MagicMock(
raise_for_status=MagicMock(),
json=MagicMock(return_value={}),
)
assert client.list_models() == []
def test_failure_returns_empty(self, client):
with patch(
"alfred.agent.llm.ollama.requests.get",
side_effect=RequestException("offline"),
):
assert client.list_models() == []
# --------------------------------------------------------------------------- #
# is_available #
# --------------------------------------------------------------------------- #
class TestIsAvailable:
def test_returns_true_on_200(self, client):
with patch("alfred.agent.llm.ollama.requests.get") as mock_get:
mock_get.return_value = MagicMock(status_code=200)
assert client.is_available() is True
def test_returns_false_on_non_200(self, client):
with patch("alfred.agent.llm.ollama.requests.get") as mock_get:
mock_get.return_value = MagicMock(status_code=503)
assert client.is_available() is False
def test_returns_false_on_exception(self, client):
with patch(
"alfred.agent.llm.ollama.requests.get",
side_effect=RequestException("down"),
):
assert client.is_available() is False
+82 -16
View File
@@ -1,5 +1,20 @@
"""
Tests for alfred.agent.registry — tool registration and JSON schema generation.
"""Tests for ``alfred.agent.registry`` — tool registration and JSON schema gen.
Two suites:
1. **TestCreateToolFromFunction** — Unit-tests the schema extraction from a
bare Python function: name resolution, docstring → description, required
versus optional parameters, ``Optional[X]`` / ``X | None`` stripping, and
the Python-to-JSON-Schema type mapping (``str/int/float/bool/list/dict``
→ ``string/integer/number/boolean/array/object``).
2. **TestMakeTools** — Integration check on the live registry: every tool
declared in ``make_tools(settings)`` is a real ``Tool`` instance with a
callable ``func`` and a name matching its dict key, and a known core set
of tools is always present. Resolver tests target the four media-typed
resolvers (``resolve_movie_destination``, ``_season_``, ``_episode_``,
``_series_``), not the legacy unified ``resolve_destination`` which no
longer exists.
"""
from alfred.agent.registry import Tool, _create_tool_from_function, make_tools
@@ -95,12 +110,43 @@ class TestCreateToolFromFunction:
t = _create_tool_from_function(tool)
assert t.parameters["properties"]["x"]["type"] == "boolean"
def test_unknown_type_defaults_to_string(self):
def test_type_mapping_list(self):
def tool(x: list) -> dict:
"""T."""
return {}
t = _create_tool_from_function(tool)
assert t.parameters["properties"]["x"]["type"] == "array"
def test_type_mapping_dict(self):
def tool(x: dict) -> dict:
"""T."""
return {}
t = _create_tool_from_function(tool)
assert t.parameters["properties"]["x"]["type"] == "object"
def test_unknown_type_defaults_to_string(self):
"""Custom classes without a JSON-Schema mapping fall back to ``string``."""
class CustomType:
pass
def tool(x: CustomType) -> dict:
"""T."""
return {}
t = _create_tool_from_function(tool)
assert t.parameters["properties"]["x"]["type"] == "string"
def test_optional_annotation_unwrapped(self):
def tool(x: str | None = None) -> dict:
"""T."""
return {}
t = _create_tool_from_function(tool)
# ``str | None`` should unwrap to ``str``, not fall back to "string"
# by accident — the mapping is intentional.
assert t.parameters["properties"]["x"]["type"] == "string"
def test_no_annotation_defaults_to_string(self):
@@ -150,23 +196,39 @@ class TestMakeTools:
assert isinstance(tools, dict)
def test_all_expected_tools_present(self):
"""Core tool set that the agent needs to perform the end-to-end flow."""
tools = make_tools(settings)
expected = {
# Folder & filesystem
"set_path_for_folder",
"list_folder",
"resolve_destination",
"move_media",
"move_to_destination",
# Resolvers (one per media type — no unified resolve_destination)
"resolve_season_destination",
"resolve_episode_destination",
"resolve_movie_destination",
"resolve_series_destination",
# Subtitles & seeding
"manage_subtitles",
"create_seed_links",
"learn",
# API
"find_media_imdb_id",
"find_torrent",
"add_torrent_by_index",
"add_torrent_to_qbittorrent",
"get_torrent_by_index",
# Conversation
"set_language",
}
assert expected.issubset(tools.keys())
missing = expected - tools.keys()
assert not missing, f"missing tools: {sorted(missing)}"
def test_no_legacy_unified_resolver(self):
"""The single ``resolve_destination`` tool was replaced by four typed resolvers."""
tools = make_tools(settings)
assert "resolve_destination" not in tools
def test_each_tool_is_tool_instance(self):
tools = make_tools(settings)
@@ -183,21 +245,25 @@ class TestMakeTools:
for key, tool in tools.items():
assert tool.name == key
def test_resolve_destination_schema(self):
def test_resolve_movie_destination_schema(self):
tools = make_tools(settings)
t = tools["resolve_destination"]
props = t.parameters["properties"]
t = tools["resolve_movie_destination"]
# Required args common to all movie resolutions.
for required_arg in ("source_file", "tmdb_title", "tmdb_year"):
assert required_arg in t.parameters["required"], (
f"resolve_movie_destination should require {required_arg}"
)
# tmdb_year is typed as int.
assert t.parameters["properties"]["tmdb_year"]["type"] == "integer"
def test_resolve_episode_destination_schema(self):
tools = make_tools(settings)
t = tools["resolve_episode_destination"]
required = t.parameters["required"]
# Required args
assert "release_name" in required
# An episode resolution needs at least the source file and the show
# identification (title/year). Season/episode numbers also required.
assert "source_file" in required
assert "tmdb_title" in required
assert "tmdb_year" in required
# Optional args not required
assert "tmdb_episode_title" not in required
assert "confirmed_folder" not in required
# tmdb_year is int
assert props["tmdb_year"]["type"] == "integer"
def test_move_media_schema(self):
tools = make_tools(settings)
+111
View File
@@ -0,0 +1,111 @@
"""Tests for ``alfred.application.torrents.add_torrent.AddTorrentUseCase``.
Wraps ``QBittorrentClient.add_torrent`` with magnet-link validation and
exception translation into an ``AddTorrentResponse`` envelope.
Coverage:
- ``TestValidation`` — empty / non-string / non-magnet rejection.
- ``TestSuccess`` — client returns True → status="ok".
- ``TestAddFailure`` — client returns False → status="error", error="add_failed".
- ``TestErrorTranslation`` — ``QBittorrentAuthError`` → authentication_failed,
``QBittorrentAPIError`` → api_error.
QBittorrentClient is fully mocked.
"""
from __future__ import annotations
from unittest.mock import MagicMock
import pytest
from alfred.application.torrents.add_torrent import AddTorrentUseCase
from alfred.infrastructure.api.qbittorrent.exceptions import (
QBittorrentAPIError,
QBittorrentAuthError,
)
@pytest.fixture
def client():
return MagicMock()
@pytest.fixture
def use_case(client):
return AddTorrentUseCase(client)
VALID_MAGNET = "magnet:?xt=urn:btih:abc"
# --------------------------------------------------------------------------- #
# Validation #
# --------------------------------------------------------------------------- #
class TestValidation:
@pytest.mark.parametrize("bad", ["", None, 42, b"magnet:?x"])
def test_invalid_inputs_return_validation_failed(self, use_case, client, bad):
r = use_case.execute(bad)
assert r.status == "error"
assert r.error == "validation_failed"
client.add_torrent.assert_not_called()
def test_non_magnet_scheme_rejected(self, use_case, client):
r = use_case.execute("http://example.com/torrent")
assert r.status == "error"
assert r.error == "validation_failed"
assert "magnet" in r.message.lower()
client.add_torrent.assert_not_called()
# --------------------------------------------------------------------------- #
# Success #
# --------------------------------------------------------------------------- #
class TestSuccess:
def test_add_success(self, use_case, client):
client.add_torrent.return_value = True
r = use_case.execute(VALID_MAGNET)
assert r.status == "ok"
assert r.error is None
assert "success" in r.message.lower()
client.add_torrent.assert_called_once_with(VALID_MAGNET)
# --------------------------------------------------------------------------- #
# Add failure #
# --------------------------------------------------------------------------- #
class TestAddFailure:
def test_add_returns_false(self, use_case, client):
client.add_torrent.return_value = False
r = use_case.execute(VALID_MAGNET)
assert r.status == "error"
assert r.error == "add_failed"
# --------------------------------------------------------------------------- #
# Error translation #
# --------------------------------------------------------------------------- #
class TestErrorTranslation:
def test_auth_error_translated(self, use_case, client):
client.add_torrent.side_effect = QBittorrentAuthError("bad creds")
r = use_case.execute(VALID_MAGNET)
assert r.status == "error"
assert r.error == "authentication_failed"
# The message is a fixed user-facing string, not the raw exception.
assert "authenticate" in r.message.lower()
def test_api_error_translated(self, use_case, client):
client.add_torrent.side_effect = QBittorrentAPIError("server down")
r = use_case.execute(VALID_MAGNET)
assert r.status == "error"
assert r.error == "api_error"
assert "server down" in r.message
+148
View File
@@ -0,0 +1,148 @@
"""Tests for ``alfred.application.filesystem.detect_media_type``.
The function refines a ``ParsedRelease.media_type`` using filesystem evidence.
Coverage:
- ``TestFile`` — single-file source (.mkv / .iso / .nfo-only).
- ``TestFolder`` — first-level folder scan; mixed/video-only/non-video-only.
- ``TestMetadataIgnored`` — ``.nfo``, ``.srt``, ``.jpg`` never tip the decision.
- ``TestMissing`` — non-existent paths fall through to parsed.media_type.
No mocking — pure function over a real ``tmp_path``.
"""
from __future__ import annotations
from pathlib import Path
import pytest
from alfred.application.filesystem.detect_media_type import detect_media_type
from alfred.domain.release.services import parse_release
def _parsed(media_type: str = "movie"):
"""Build a ParsedRelease with the requested media_type via the real parser."""
if media_type == "tv_show":
return parse_release("Show.S01E01.1080p-GRP")
if media_type == "movie":
return parse_release("Movie.2020.1080p-GRP")
# "unknown" / other — feed a name the parser can't classify
return parse_release("randomthing")
# --------------------------------------------------------------------------- #
# Single-file source #
# --------------------------------------------------------------------------- #
class TestFile:
def test_video_file_preserves_parsed_type(self, tmp_path: Path):
f = tmp_path / "x.mkv"
f.write_bytes(b"")
assert detect_media_type(_parsed("movie"), f) == "movie"
def test_video_file_preserves_tv_type(self, tmp_path: Path):
f = tmp_path / "ep.mp4"
f.write_bytes(b"")
assert detect_media_type(_parsed("tv_show"), f) == "tv_show"
def test_non_video_file_returns_other(self, tmp_path: Path):
f = tmp_path / "x.iso"
f.write_bytes(b"")
assert detect_media_type(_parsed("movie"), f) == "other"
@pytest.mark.parametrize("ext", [".rar", ".zip", ".7z", ".exe", ".dmg"])
def test_various_non_video_extensions(self, tmp_path: Path, ext):
f = tmp_path / f"x{ext}"
f.write_bytes(b"")
assert detect_media_type(_parsed("movie"), f) == "other"
def test_metadata_only_file_keeps_parsed_type(self, tmp_path: Path):
# Metadata extension is stripped from conclusive set — no video, no
# non-video → falls through to parsed.media_type.
f = tmp_path / "x.nfo"
f.write_bytes(b"")
assert detect_media_type(_parsed("movie"), f) == "movie"
# --------------------------------------------------------------------------- #
# Folder source #
# --------------------------------------------------------------------------- #
class TestFolder:
def test_folder_with_video_keeps_parsed_type(self, tmp_path: Path):
(tmp_path / "main.mkv").write_bytes(b"")
assert detect_media_type(_parsed("movie"), tmp_path) == "movie"
def test_folder_only_non_video_returns_other(self, tmp_path: Path):
(tmp_path / "disc.iso").write_bytes(b"")
(tmp_path / "part.rar").write_bytes(b"")
assert detect_media_type(_parsed("movie"), tmp_path) == "other"
def test_folder_mixed_returns_unknown(self, tmp_path: Path):
(tmp_path / "main.mkv").write_bytes(b"")
(tmp_path / "extras.iso").write_bytes(b"")
assert detect_media_type(_parsed("movie"), tmp_path) == "unknown"
def test_empty_folder_keeps_parsed_type(self, tmp_path: Path):
assert detect_media_type(_parsed("tv_show"), tmp_path) == "tv_show"
def test_folder_only_metadata_keeps_parsed_type(self, tmp_path: Path):
(tmp_path / "info.nfo").write_bytes(b"")
(tmp_path / "cover.jpg").write_bytes(b"")
(tmp_path / "subs.srt").write_bytes(b"")
# All metadata → conclusive set empty → falls through.
assert detect_media_type(_parsed("movie"), tmp_path) == "movie"
# --------------------------------------------------------------------------- #
# Metadata-noise resilience #
# --------------------------------------------------------------------------- #
class TestMetadataIgnored:
def test_video_plus_metadata_still_video(self, tmp_path: Path):
(tmp_path / "main.mkv").write_bytes(b"")
(tmp_path / "info.nfo").write_bytes(b"")
(tmp_path / "cover.jpg").write_bytes(b"")
(tmp_path / "subs.srt").write_bytes(b"")
assert detect_media_type(_parsed("movie"), tmp_path) == "movie"
def test_non_video_plus_metadata_still_other(self, tmp_path: Path):
(tmp_path / "disc.iso").write_bytes(b"")
(tmp_path / "info.nfo").write_bytes(b"")
assert detect_media_type(_parsed("movie"), tmp_path) == "other"
def test_case_insensitive_extensions(self, tmp_path: Path):
# Suffix is lowercased before classification.
f = tmp_path / "X.MKV"
f.write_bytes(b"")
assert detect_media_type(_parsed("movie"), f) == "movie"
# --------------------------------------------------------------------------- #
# Missing / non-existent paths #
# --------------------------------------------------------------------------- #
class TestMissing:
def test_nonexistent_path_keeps_parsed_type(self, tmp_path: Path):
missing = tmp_path / "does_not_exist.mkv"
# Doesn't exist → empty extension set → falls through.
assert detect_media_type(_parsed("movie"), missing) == "movie"
def test_nonexistent_folder_keeps_parsed_type(self, tmp_path: Path):
missing = tmp_path / "ghost"
assert detect_media_type(_parsed("tv_show"), missing) == "tv_show"
def test_subfolder_not_recursed(self, tmp_path: Path):
# _collect_extensions scans only the first level — files inside
# subfolders must not influence the decision.
sub = tmp_path / "sub"
sub.mkdir()
(sub / "deep.mkv").write_bytes(b"")
# Top level has no files at all → empty → falls through to parsed type.
assert detect_media_type(_parsed("movie"), tmp_path) == "movie"
+211
View File
@@ -0,0 +1,211 @@
"""Tests for ``alfred.application.filesystem.enrich_from_probe``.
The function mutates a ``ParsedRelease`` in place using ffprobe ``MediaInfo``.
Token-level values from the release name always win — only ``None`` fields
are filled.
Coverage:
- ``TestQuality`` — resolution fill-in (and no-overwrite).
- ``TestVideoCodec`` — codec map (hevc→x265, …) + uppercase fallback.
- ``TestAudio`` — default track preferred over first; codec & channel maps
with unknown-value fallbacks.
- ``TestLanguages`` — append-only merge; ``und`` skipped; case-insensitive
duplicate suppression.
Uses real ``ParsedRelease`` / ``MediaInfo`` instances — no mocking needed.
"""
from __future__ import annotations
from alfred.application.filesystem.enrich_from_probe import enrich_from_probe
from alfred.domain.release.value_objects import ParsedRelease
from alfred.domain.shared.media import AudioTrack, MediaInfo, VideoTrack
def _info_with_video(*, width=None, height=None, codec=None, **rest) -> MediaInfo:
"""Helper: build a MediaInfo with a single video track (the common case)."""
return MediaInfo(
video_tracks=[VideoTrack(index=0, codec=codec, width=width, height=height)],
**rest,
)
def _bare(**overrides) -> ParsedRelease:
"""Build a minimal ParsedRelease with all enrichable fields = None."""
defaults = dict(
raw="X",
normalised="X",
title="X",
year=None,
season=None,
episode=None,
episode_end=None,
quality=None,
source=None,
codec=None,
group="UNKNOWN",
tech_string="",
)
defaults.update(overrides)
return ParsedRelease(**defaults)
# --------------------------------------------------------------------------- #
# Quality / resolution #
# --------------------------------------------------------------------------- #
class TestQuality:
def test_fills_when_none(self):
p = _bare()
enrich_from_probe(p, _info_with_video(width=1920, height=1080))
assert p.quality == "1080p"
def test_does_not_overwrite_existing(self):
p = _bare(quality="2160p")
enrich_from_probe(p, _info_with_video(width=1920, height=1080))
assert p.quality == "2160p"
def test_no_dims_leaves_none(self):
p = _bare()
enrich_from_probe(p, MediaInfo())
assert p.quality is None
# --------------------------------------------------------------------------- #
# Video codec #
# --------------------------------------------------------------------------- #
class TestVideoCodec:
def test_hevc_to_x265(self):
p = _bare()
enrich_from_probe(p, _info_with_video(codec="hevc"))
assert p.codec == "x265"
def test_h264_to_x264(self):
p = _bare()
enrich_from_probe(p, _info_with_video(codec="h264"))
assert p.codec == "x264"
def test_unknown_codec_uppercased(self):
p = _bare()
enrich_from_probe(p, _info_with_video(codec="weird"))
assert p.codec == "WEIRD"
def test_does_not_overwrite_existing(self):
p = _bare(codec="HEVC")
enrich_from_probe(p, _info_with_video(codec="h264"))
assert p.codec == "HEVC"
def test_no_codec_leaves_none(self):
p = _bare()
enrich_from_probe(p, MediaInfo())
assert p.codec is None
# --------------------------------------------------------------------------- #
# Audio #
# --------------------------------------------------------------------------- #
class TestAudio:
def test_uses_default_track(self):
info = MediaInfo(
audio_tracks=[
AudioTrack(0, "aac", 2, "stereo", "eng", is_default=False),
AudioTrack(1, "eac3", 6, "5.1", "eng", is_default=True),
]
)
p = _bare()
enrich_from_probe(p, info)
assert p.audio_codec == "EAC3"
assert p.audio_channels == "5.1"
def test_falls_back_to_first_track_when_no_default(self):
info = MediaInfo(
audio_tracks=[
AudioTrack(0, "ac3", 6, "5.1", "eng"),
AudioTrack(1, "aac", 2, "stereo", "fre"),
]
)
p = _bare()
enrich_from_probe(p, info)
assert p.audio_codec == "AC3"
assert p.audio_channels == "5.1"
def test_channel_count_unknown_falls_back(self):
info = MediaInfo(audio_tracks=[AudioTrack(0, "aac", 4, "quad", "eng")])
p = _bare()
enrich_from_probe(p, info)
assert p.audio_channels == "4ch"
def test_unknown_audio_codec_uppercased(self):
info = MediaInfo(audio_tracks=[AudioTrack(0, "newcodec", 2, "stereo", "eng")])
p = _bare()
enrich_from_probe(p, info)
assert p.audio_codec == "NEWCODEC"
def test_no_audio_tracks(self):
p = _bare()
enrich_from_probe(p, MediaInfo())
assert p.audio_codec is None
assert p.audio_channels is None
def test_does_not_overwrite_existing_audio_fields(self):
info = MediaInfo(audio_tracks=[AudioTrack(0, "ac3", 6, "5.1", "eng")])
p = _bare(audio_codec="DTS-HD.MA", audio_channels="7.1")
enrich_from_probe(p, info)
assert p.audio_codec == "DTS-HD.MA"
assert p.audio_channels == "7.1"
# --------------------------------------------------------------------------- #
# Languages #
# --------------------------------------------------------------------------- #
class TestLanguages:
def test_appends_new(self):
info = MediaInfo(
audio_tracks=[
AudioTrack(0, "aac", 2, "stereo", "eng"),
AudioTrack(1, "aac", 2, "stereo", "fre"),
]
)
p = _bare()
enrich_from_probe(p, info)
assert p.languages == ["eng", "fre"]
def test_skips_und(self):
info = MediaInfo(
audio_tracks=[
AudioTrack(0, "aac", 2, "stereo", "und"),
AudioTrack(1, "aac", 2, "stereo", "eng"),
]
)
p = _bare()
enrich_from_probe(p, info)
assert p.languages == ["eng"]
def test_dedup_against_existing_case_insensitive(self):
# existing token-level languages are typically upper-case ("FRENCH", "ENG")
# The current logic compares track.lang.upper() against existing —
# so a track with "eng" is suppressed if "ENG" is already in languages.
info = MediaInfo(
audio_tracks=[
AudioTrack(0, "aac", 2, "stereo", "eng"),
AudioTrack(1, "aac", 2, "stereo", "fre"),
]
)
p = _bare()
p.languages = ["ENG"]
enrich_from_probe(p, info)
# "eng" → upper "ENG" already present → skipped. "fre" → "FRE" new → kept.
assert p.languages == ["ENG", "fre"]
def test_no_audio_tracks_leaves_languages_empty(self):
p = _bare()
enrich_from_probe(p, MediaInfo())
assert p.languages == []
+565
View File
@@ -0,0 +1,565 @@
"""Tests for ``alfred.application.filesystem.manage_subtitles``.
``ManageSubtitlesUseCase`` orchestrates the subtitle pipeline:
KB load → pattern resolution → identify → match → place → persist.
Strategy: mock the heavy collaborators (``SubtitleIdentifier``,
``PatternDetector``, ``SubtitleMatcher``, ``SubtitlePlacer``,
``RuleSetRepository``, ``SubtitleMetadataStore``, ``SubtitleKnowledgeBase``)
at the use-case module path. The use case instantiates them inline so each
patch targets a single class symbol.
Coverage:
- ``TestSourceMissing`` — source_not_found short-circuit when neither file
nor parent dir exists.
- ``TestPatternResolution`` — confirmed_pattern_id wins; falls back to
stored confirmed pattern; falls back to detector; falls back to
"adjacent"; pattern_not_found error when KB has nothing.
- ``TestNoTracks`` — empty identifier output → status=ok, empty placed list.
- ``TestEmbeddedShortCircuit`` — EMBEDDED scan_strategy yields ``available``
list and never calls the matcher/placer.
- ``TestMatcherFlow`` — unresolved → needs_clarification; no matches → ok
with skipped_count; happy path runs placer + appends history.
- ``TestDryRun`` — dry_run skips placement, returns predicted destinations.
- ``TestHelpers`` — ``_infer_library_root``, ``_to_imdb_id``,
``_to_unresolved_dto``, ``_pair_placed_with_tracks``.
"""
from __future__ import annotations
from pathlib import Path
from unittest.mock import MagicMock, patch
import pytest
from alfred.application.filesystem.manage_subtitles import (
ManageSubtitlesUseCase,
_infer_library_root,
_pair_placed_with_tracks,
_to_imdb_id,
_to_unresolved_dto,
)
from alfred.domain.subtitles.entities import MediaSubtitleMetadata, SubtitleCandidate
from alfred.domain.subtitles.services.placer import PlacedTrack, PlaceResult
from alfred.domain.subtitles.value_objects import (
ScanStrategy,
SubtitleFormat,
SubtitleLanguage,
SubtitleType,
)
SRT = SubtitleFormat(id="srt", extensions=[".srt"])
FRA = SubtitleLanguage(code="fra", tokens=["fr"])
ENG = SubtitleLanguage(code="eng", tokens=["en"])
def _track(
*,
lang=FRA,
fmt=SRT,
stype=SubtitleType.STANDARD,
file_path: Path | None = None,
is_embedded: bool = False,
raw_tokens: list[str] | None = None,
file_size_kb: float | None = None,
) -> SubtitleCandidate:
return SubtitleCandidate(
language=lang,
format=fmt,
subtitle_type=stype,
file_path=file_path,
is_embedded=is_embedded,
raw_tokens=raw_tokens or [],
file_size_kb=file_size_kb,
)
def _pattern(
pid: str = "adjacent", strategy: ScanStrategy = ScanStrategy.ADJACENT
) -> MagicMock:
p = MagicMock()
p.id = pid
p.scan_strategy = strategy
return p
# --------------------------------------------------------------------------- #
# Helper functions #
# --------------------------------------------------------------------------- #
class TestHelpers:
def test_infer_library_root_tv_show(self):
# video → Season 01 → Show
video = Path("/lib/tv/Show/Season.01/E01.mkv")
assert _infer_library_root(video, "tv_show") == Path("/lib/tv/Show")
def test_infer_library_root_movie(self):
video = Path("/lib/movies/Movie.2010/Movie.2010.mkv")
assert _infer_library_root(video, "movie") == Path("/lib/movies/Movie.2010")
def test_to_imdb_id_none_or_empty(self):
assert _to_imdb_id(None) is None
assert _to_imdb_id("") is None
def test_to_imdb_id_valid(self):
out = _to_imdb_id("tt1375666")
assert out is not None
assert str(out) == "tt1375666"
def test_to_imdb_id_invalid_returns_none(self):
assert _to_imdb_id("not-an-imdb-id") is None
def test_to_unresolved_dto_unknown_language(self):
t = _track(lang=None, raw_tokens=["fr", "x"], file_size_kb=12.0)
t.file_path = Path("/x/a.srt")
out = _to_unresolved_dto(t)
assert out.reason == "unknown_language"
assert out.raw_tokens == ["fr", "x"]
assert out.file_path == "/x/a.srt"
assert out.file_size_kb == 12.0
def test_to_unresolved_dto_low_confidence(self):
t = _track(lang=FRA, raw_tokens=["fr"])
out = _to_unresolved_dto(t)
assert out.reason == "low_confidence"
def test_to_unresolved_dto_no_file_path(self):
t = _track(lang=None)
out = _to_unresolved_dto(t)
assert out.file_path is None
def test_pair_placed_with_tracks_by_path(self):
src1, src2 = Path("/in/a.srt"), Path("/in/b.srt")
t1 = _track(file_path=src1, lang=FRA)
t2 = _track(file_path=src2, lang=ENG)
p1 = PlacedTrack(source=src1, destination=Path("/out/a"), filename="a")
p2 = PlacedTrack(source=src2, destination=Path("/out/b"), filename="b")
pairs = _pair_placed_with_tracks([p1, p2], [t1, t2])
assert pairs == [(p1, t1), (p2, t2)]
def test_pair_placed_falls_back_to_positional(self):
# Placed source path doesn't match any track.file_path → fallback uses tracks[0].
t = _track(file_path=Path("/in/known.srt"))
p = PlacedTrack(
source=Path("/in/ghost.srt"), destination=Path("/x"), filename="x"
)
pairs = _pair_placed_with_tracks([p], [t])
assert pairs == [(p, t)]
def test_pair_placed_empty_inputs(self):
assert _pair_placed_with_tracks([], []) == []
# --------------------------------------------------------------------------- #
# Use case shared fixtures #
# --------------------------------------------------------------------------- #
MOD = "alfred.application.filesystem.manage_subtitles"
@pytest.fixture
def video(tmp_path):
"""Real source + destination video paths inside tmp_path."""
src_dir = tmp_path / "dl"
src_dir.mkdir()
src = src_dir / "Movie.2010.mkv"
src.write_bytes(b"")
dest_dir = tmp_path / "lib" / "Movie.2010"
dest_dir.mkdir(parents=True)
dest = dest_dir / "Movie.2010.mkv"
dest.write_bytes(b"")
return src, dest
@pytest.fixture
def patches():
"""Patch all collaborator classes the use case instantiates inline."""
with (
patch(f"{MOD}.KnowledgeLoader") as mock_loader,
patch(f"{MOD}.SubtitleKnowledgeBase") as mock_kb_cls,
patch(f"{MOD}.SubtitleMetadataStore") as mock_store_cls,
patch(f"{MOD}.RuleSetRepository") as mock_repo_cls,
patch(f"{MOD}.SubtitleIdentifier") as mock_id_cls,
patch(f"{MOD}.PatternDetector") as mock_det_cls,
patch(f"{MOD}.SubtitleMatcher") as mock_match_cls,
patch(f"{MOD}.SubtitlePlacer") as mock_place_cls,
patch(f"{MOD}.get_memory") as mock_get_memory,
):
# KB returns a default "adjacent" pattern by default.
kb = mock_kb_cls.return_value
kb.pattern.return_value = _pattern()
# Store starts empty.
store = mock_store_cls.return_value
store.confirmed_pattern.return_value = None
# Detector returns no detection by default.
det = mock_det_cls.return_value
det.detect.return_value = {"detected": None, "confidence": 0.0}
# Identifier: 0 tracks by default.
ident = mock_id_cls.return_value
ident.identify.return_value = MediaSubtitleMetadata(
media_id=None,
media_type="movie",
release_group=None,
detected_pattern_id="adjacent",
)
# Matcher: no matched, no unresolved by default.
matcher = mock_match_cls.return_value
matcher.match.return_value = ([], [])
# Placer: empty result.
placer = mock_place_cls.return_value
placer.place.return_value = PlaceResult(placed=[], skipped=[])
# Rules: simple object passthrough; the use case only forwards it.
repo = mock_repo_cls.return_value
repo.load.return_value.resolve.return_value = MagicMock(name="Rules")
# get_memory: works by default.
mock_get_memory.return_value.ltm.subtitle_preferences = MagicMock()
yield {
"kb": kb,
"store": store,
"repo": repo,
"ident": ident,
"det": det,
"matcher": matcher,
"placer": placer,
"loader": mock_loader,
"get_memory": mock_get_memory,
}
# --------------------------------------------------------------------------- #
# Source missing #
# --------------------------------------------------------------------------- #
class TestSourceMissing:
def test_source_and_parent_missing_returns_error(self, tmp_path):
# Neither path nor parent exists.
uc = ManageSubtitlesUseCase()
out = uc.execute(
source_video=str(tmp_path / "ghost" / "ghost.mkv"),
destination_video=str(tmp_path / "lib" / "x.mkv"),
)
assert out.status == "error"
assert out.error == "source_not_found"
def test_source_missing_but_parent_exists_does_not_error_early(
self, tmp_path, patches
):
# Parent dir exists → use case proceeds. With default mocks the
# identifier returns 0 tracks → status="ok".
(tmp_path / "dl").mkdir()
(tmp_path / "lib").mkdir()
out = ManageSubtitlesUseCase().execute(
source_video=str(tmp_path / "dl" / "missing.mkv"),
destination_video=str(tmp_path / "lib" / "missing.mkv"),
media_type="movie",
)
assert out.status == "ok"
# --------------------------------------------------------------------------- #
# Pattern resolution #
# --------------------------------------------------------------------------- #
class TestPatternResolution:
def test_confirmed_pattern_id_wins(self, video, patches):
src, dest = video
custom = _pattern("subs_flat")
patches["kb"].pattern.side_effect = lambda pid: (
custom if pid == "subs_flat" else _pattern()
)
ManageSubtitlesUseCase().execute(
source_video=str(src),
destination_video=str(dest),
media_type="movie",
confirmed_pattern_id="subs_flat",
)
# Identifier called with the confirmed pattern (not the default).
args, kwargs = patches["ident"].identify.call_args
assert kwargs["pattern"].id == "subs_flat"
# Detector should not even run when an explicit confirmation is given.
patches["det"].detect.assert_not_called()
def test_confirmed_pattern_id_unknown_falls_through_to_stored(self, video, patches):
src, dest = video
# KB knows nothing about the requested override → returns None.
# Stored value provides 'subs_flat'.
patches["store"].confirmed_pattern.return_value = "subs_flat"
flat = _pattern("subs_flat")
patches["kb"].pattern.side_effect = lambda pid: {
"subs_flat": flat,
"adjacent": _pattern(),
}.get(pid)
ManageSubtitlesUseCase().execute(
source_video=str(src),
destination_video=str(dest),
media_type="movie",
confirmed_pattern_id="DOES_NOT_EXIST",
)
assert patches["ident"].identify.call_args.kwargs["pattern"].id == "subs_flat"
def test_detector_used_when_no_confirmed_and_no_stored(self, video, patches):
src, dest = video
detected = _pattern("episode_subfolder")
patches["det"].detect.return_value = {
"detected": detected,
"confidence": 0.9,
}
ManageSubtitlesUseCase().execute(
source_video=str(src),
destination_video=str(dest),
media_type="movie",
)
assert (
patches["ident"].identify.call_args.kwargs["pattern"].id
== "episode_subfolder"
)
def test_detector_low_confidence_falls_back_to_adjacent(self, video, patches):
src, dest = video
patches["det"].detect.return_value = {
"detected": _pattern("episode_subfolder"),
"confidence": 0.1,
}
ManageSubtitlesUseCase().execute(
source_video=str(src),
destination_video=str(dest),
media_type="movie",
)
# Falls back via kb.pattern('adjacent')
assert patches["kb"].pattern.call_args_list[-1].args == ("adjacent",)
def test_pattern_not_found_when_kb_returns_none(self, video, patches):
src, dest = video
patches["kb"].pattern.return_value = None # nothing known
patches["det"].detect.return_value = {"detected": None, "confidence": 0.0}
out = ManageSubtitlesUseCase().execute(
source_video=str(src),
destination_video=str(dest),
media_type="movie",
)
assert out.status == "error"
assert out.error == "pattern_not_found"
# --------------------------------------------------------------------------- #
# No tracks #
# --------------------------------------------------------------------------- #
class TestNoTracks:
def test_zero_tracks_returns_ok_empty(self, video, patches):
src, dest = video
out = ManageSubtitlesUseCase().execute(
source_video=str(src),
destination_video=str(dest),
media_type="movie",
)
assert out.status == "ok"
assert out.placed == []
assert out.skipped_count == 0
# --------------------------------------------------------------------------- #
# Embedded short-circuit #
# --------------------------------------------------------------------------- #
class TestEmbeddedShortCircuit:
def test_embedded_returns_available_and_skips_matcher(self, video, patches):
src, dest = video
patches["kb"].pattern.return_value = _pattern("embedded", ScanStrategy.EMBEDDED)
patches["ident"].identify.return_value = MediaSubtitleMetadata(
media_id=None,
media_type="movie",
release_group=None,
detected_pattern_id="embedded",
embedded_tracks=[
_track(lang=FRA, is_embedded=True),
_track(lang=ENG, stype=SubtitleType.SDH, is_embedded=True),
],
)
out = ManageSubtitlesUseCase().execute(
source_video=str(src),
destination_video=str(dest),
media_type="movie",
)
assert out.status == "ok"
assert out.placed == []
assert out.available is not None
langs = {a.language for a in out.available}
assert {"fra", "eng"}.issubset(langs)
patches["matcher"].match.assert_not_called()
patches["placer"].place.assert_not_called()
# --------------------------------------------------------------------------- #
# Matcher flow #
# --------------------------------------------------------------------------- #
class TestMatcherFlow:
def test_unresolved_returns_needs_clarification(self, video, patches):
src, dest = video
ext = [_track(file_path=src.parent / "a.srt")]
patches["ident"].identify.return_value = MediaSubtitleMetadata(
media_id=None,
media_type="movie",
release_group=None,
detected_pattern_id="adjacent",
external_tracks=ext,
)
unresolved_track = _track(
lang=None, raw_tokens=["xx"], file_path=src.parent / "?.srt"
)
patches["matcher"].match.return_value = ([], [unresolved_track])
out = ManageSubtitlesUseCase().execute(
source_video=str(src),
destination_video=str(dest),
media_type="movie",
)
assert out.status == "needs_clarification"
assert out.unresolved and out.unresolved[0].reason == "unknown_language"
patches["placer"].place.assert_not_called()
def test_no_matches_returns_ok_with_skipped(self, video, patches):
src, dest = video
patches["ident"].identify.return_value = MediaSubtitleMetadata(
media_id=None,
media_type="movie",
release_group=None,
detected_pattern_id="adjacent",
external_tracks=[_track(file_path=src.parent / "a.srt")],
embedded_tracks=[_track(is_embedded=True)],
)
patches["matcher"].match.return_value = ([], []) # no matches, no unresolved
out = ManageSubtitlesUseCase().execute(
source_video=str(src),
destination_video=str(dest),
media_type="movie",
)
assert out.status == "ok"
assert out.placed == []
# total_count = 1 ext + 1 emb = 2
assert out.skipped_count == 2
def test_happy_path_places_and_persists(self, video, patches):
src, dest = video
src_sub = src.parent / "a.srt"
src_sub.write_text("")
matched = [_track(file_path=src_sub, lang=FRA)]
patches["ident"].identify.return_value = MediaSubtitleMetadata(
media_id=None,
media_type="movie",
release_group=None,
detected_pattern_id="adjacent",
external_tracks=matched,
)
patches["matcher"].match.return_value = (matched, [])
placed = PlacedTrack(
source=src_sub,
destination=dest.parent / "Movie.2010.fra.srt",
filename="Movie.2010.fra.srt",
)
patches["placer"].place.return_value = PlaceResult(placed=[placed], skipped=[])
out = ManageSubtitlesUseCase().execute(
source_video=str(src),
destination_video=str(dest),
media_type="movie",
release_group="KONTRAST",
season=1,
episode=2,
)
assert out.status == "ok"
assert len(out.placed) == 1
assert out.placed[0].filename == "Movie.2010.fra.srt"
# History was appended with season/episode/group.
patches["store"].append_history.assert_called_once()
args, _ = patches["store"].append_history.call_args
# signature: append_history(pairs, season, episode, release_group)
assert args[1] == 1
assert args[2] == 2
assert args[3] == "KONTRAST"
def test_get_memory_failure_falls_through_to_rules_repo(self, video, patches):
# The use case swallows get_memory() exceptions and continues with
# subtitle_prefs=None. We assert: still progresses past matcher.
src, dest = video
patches["get_memory"].side_effect = RuntimeError("not initialised")
patches["ident"].identify.return_value = MediaSubtitleMetadata(
media_id=None,
media_type="movie",
release_group=None,
detected_pattern_id="adjacent",
external_tracks=[_track(file_path=src.parent / "a.srt")],
)
patches["matcher"].match.return_value = ([], [])
out = ManageSubtitlesUseCase().execute(
source_video=str(src),
destination_video=str(dest),
media_type="movie",
)
assert out.status == "ok"
# --------------------------------------------------------------------------- #
# Dry run #
# --------------------------------------------------------------------------- #
class TestDryRun:
def test_dry_run_skips_placer_and_returns_predicted(self, video, patches):
src, dest = video
src_sub = src.parent / "a.srt"
src_sub.write_text("")
matched = [_track(file_path=src_sub, lang=FRA)]
patches["ident"].identify.return_value = MediaSubtitleMetadata(
media_id=None,
media_type="movie",
release_group=None,
detected_pattern_id="adjacent",
external_tracks=matched,
)
patches["matcher"].match.return_value = (matched, [])
out = ManageSubtitlesUseCase().execute(
source_video=str(src),
destination_video=str(dest),
media_type="movie",
dry_run=True,
)
assert out.status == "ok"
assert out.placed and out.placed[0].filename.endswith(".fra.srt")
patches["placer"].place.assert_not_called()
patches["store"].append_history.assert_not_called()
def test_dry_run_skips_tracks_without_file_path(self, video, patches):
src, dest = video
matched = [_track(file_path=None, lang=FRA)] # no file_path → skipped
patches["ident"].identify.return_value = MediaSubtitleMetadata(
media_id=None,
media_type="movie",
release_group=None,
detected_pattern_id="adjacent",
external_tracks=matched,
)
patches["matcher"].match.return_value = (matched, [])
out = ManageSubtitlesUseCase().execute(
source_video=str(src),
destination_video=str(dest),
media_type="movie",
dry_run=True,
)
assert out.placed == []
+388 -291
View File
@@ -1,322 +1,419 @@
"""
Tests for alfred.application.filesystem.resolve_destination
"""Tests for ``alfred.application.filesystem.resolve_destination``.
Uses a real temp filesystem + a real Memory instance (via conftest fixtures).
No network calls — TMDB data is passed in directly.
Four use cases compute library paths from a release name + TMDB metadata:
- ``resolve_season_destination`` — folder move (series + season).
- ``resolve_episode_destination`` — file move (full library_file path).
- ``resolve_movie_destination`` — file move (folder + library_file).
- ``resolve_series_destination`` — folder move (whole multi-season pack).
Coverage:
- ``TestSanitize`` — Windows-forbidden chars stripped.
- ``TestFindExistingTvshowFolders`` — empty root, prefix match (case + space → dot).
- ``TestResolveSeriesFolderInternal`` — confirmed_folder, no existing, single match,
ambiguous → _Clarification.
- ``TestSeason`` — library_not_set, ok path, clarification path.
- ``TestEpisode`` — library_not_set, ok path, filename includes episode_title, ext from source.
- ``TestMovie`` — library_not_set, ok path, is_new_folder, sanitization.
- ``TestSeries`` — library_not_set, ok path.
- ``TestDTOToDict`` — each DTO's three states (ok / clarification / error).
"""
from pathlib import Path
from __future__ import annotations
import pytest
from alfred.application.filesystem.resolve_destination import (
ResolveDestinationUseCase,
_find_existing_series_folders,
ResolvedEpisodeDestination,
ResolvedMovieDestination,
ResolvedSeasonDestination,
ResolvedSeriesDestination,
_Clarification,
_find_existing_tvshow_folders,
_resolve_series_folder,
_sanitize,
resolve_episode_destination,
resolve_movie_destination,
resolve_season_destination,
resolve_series_destination,
)
from alfred.infrastructure.persistence import Memory, set_memory
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
REL_EPISODE = "Oz.S01E01.1080p.WEBRip.x265-KONTRAST"
REL_SEASON = "Oz.S03.1080p.WEBRip.x265-KONTRAST"
REL_MOVIE = "Inception.2010.1080p.BluRay.x265-GROUP"
REL_SERIES = "Oz.Complete.Series.1080p.WEBRip.x265-KONTRAST"
def _use_case():
return ResolveDestinationUseCase()
# --------------------------------------------------------------------------- #
# Helpers #
# --------------------------------------------------------------------------- #
# ---------------------------------------------------------------------------
# Movies
# ---------------------------------------------------------------------------
class TestSanitize:
def test_passthrough_safe_chars(self):
assert _sanitize("Oz.1997.1080p-GRP") == "Oz.1997.1080p-GRP"
def test_strips_windows_forbidden(self):
# ? : * " < > | \
assert _sanitize('a?b:c*d"e<f>g|h\\i') == "abcdefghi"
class TestResolveMovie:
def test_basic_movie(self, memory_configured):
result = _use_case().execute(
release_name="Another.Round.2020.1080p.BluRay.x264-YTS",
source_file="/downloads/Another.Round.2020.1080p.BluRay.x264-YTS/Another.Round.2020.1080p.BluRay.x264-YTS.mp4",
tmdb_title="Another Round",
tmdb_year=2020,
# --------------------------------------------------------------------------- #
# _find_existing_tvshow_folders #
# --------------------------------------------------------------------------- #
class TestFindExistingTvshowFolders:
def test_missing_root_returns_empty(self, tmp_path):
assert _find_existing_tvshow_folders(tmp_path / "ghost", "Oz", 1997) == []
def test_no_match(self, tmp_path):
(tmp_path / "OtherShow.1999").mkdir()
assert _find_existing_tvshow_folders(tmp_path, "Oz", 1997) == []
def test_matches_prefix_case_insensitive_with_space_dot(self, tmp_path):
(tmp_path / "Oz.1997.WEBRip-KONTRAST").mkdir()
(tmp_path / "oz.1997.bluray-OTHER").mkdir()
(tmp_path / "OtherShow.1999").mkdir()
out = _find_existing_tvshow_folders(tmp_path, "Oz", 1997)
assert out == ["Oz.1997.WEBRip-KONTRAST", "oz.1997.bluray-OTHER"] or set(
out
) == {
"Oz.1997.WEBRip-KONTRAST",
"oz.1997.bluray-OTHER",
}
def test_files_ignored(self, tmp_path):
(tmp_path / "Oz.1997.txt").write_text("not a folder")
assert _find_existing_tvshow_folders(tmp_path, "Oz", 1997) == []
def test_space_in_title_becomes_dot(self, tmp_path):
(tmp_path / "The.X.Files.1993.x265-KONTRAST").mkdir()
assert _find_existing_tvshow_folders(tmp_path, "The X Files", 1993) == [
"The.X.Files.1993.x265-KONTRAST"
]
# --------------------------------------------------------------------------- #
# _resolve_series_folder #
# --------------------------------------------------------------------------- #
class TestResolveSeriesFolderInternal:
def test_confirmed_folder_when_exists(self, tmp_path):
(tmp_path / "Oz.1997.X-GRP").mkdir()
out = _resolve_series_folder(
tmp_path,
"Oz",
1997,
"Oz.1997.WEBRip-KONTRAST",
confirmed_folder="Oz.1997.X-GRP",
)
assert result.status == "ok"
assert "Another.Round.2020" in result.series_folder_name
assert "1080p.BluRay.x264-YTS" in result.series_folder_name
assert result.filename.endswith(".mp4")
assert result.season_folder is None
assert out == ("Oz.1997.X-GRP", False)
def test_movie_library_file_path_is_inside_series_folder(self, memory_configured):
result = _use_case().execute(
release_name="Revolver.2005.1080p.BluRay.x265-RARBG",
source_file="/downloads/Revolver.2005.1080p.BluRay.x265-RARBG.mkv",
tmdb_title="Revolver",
tmdb_year=2005,
def test_confirmed_folder_when_new(self, tmp_path):
out = _resolve_series_folder(
tmp_path,
"Oz",
1997,
"Oz.1997.WEBRip-KONTRAST",
confirmed_folder="Oz.1997.New-X",
)
assert result.status == "ok"
assert result.library_file.startswith(result.series_folder)
assert out == ("Oz.1997.New-X", True)
def test_movie_library_not_set(self, memory):
# memory has no library paths configured
result = _use_case().execute(
release_name="Revolver.2005.1080p.BluRay.x265-RARBG",
source_file="/downloads/Revolver.2005.1080p.BluRay.x265-RARBG.mkv",
tmdb_title="Revolver",
tmdb_year=2005,
def test_no_existing_returns_computed_as_new(self, tmp_path):
out = _resolve_series_folder(
tmp_path, "Oz", 1997, "Oz.1997.WEBRip-KONTRAST", None
)
assert result.status == "error"
assert result.error == "library_not_set"
assert out == ("Oz.1997.WEBRip-KONTRAST", True)
def test_movie_folder_marked_new(self, memory_configured):
# No existing folder → is_new_series_folder = True
result = _use_case().execute(
release_name="Godzilla.Minus.One.2023.1080p.BluRay.x265-YTS",
source_file="/downloads/Godzilla.Minus.One.2023.1080p.BluRay.x265-YTS.mp4",
tmdb_title="Godzilla Minus One",
tmdb_year=2023,
def test_single_existing_matching_computed_returns_existing(self, tmp_path):
(tmp_path / "Oz.1997.WEBRip-KONTRAST").mkdir()
out = _resolve_series_folder(
tmp_path, "Oz", 1997, "Oz.1997.WEBRip-KONTRAST", None
)
assert result.status == "ok"
assert result.is_new_series_folder is True
assert out == ("Oz.1997.WEBRip-KONTRAST", False)
def test_movie_sanitises_forbidden_chars_in_title(self, memory_configured):
result = _use_case().execute(
release_name="Alien.Earth.2024.1080p.WEBRip.x265-KONTRAST",
source_file="/downloads/Alien.Earth.2024.1080p.WEBRip.x265-KONTRAST.mkv",
tmdb_title="Alien: Earth",
tmdb_year=2024,
def test_single_existing_different_name_returns_clarification(self, tmp_path):
(tmp_path / "Oz.1997.BluRay-OTHER").mkdir()
out = _resolve_series_folder(
tmp_path, "Oz", 1997, "Oz.1997.WEBRip-KONTRAST", None
)
assert result.status == "ok"
assert ":" not in result.series_folder_name
assert isinstance(out, _Clarification)
assert "Oz" in out.question
assert "Oz.1997.BluRay-OTHER" in out.options
assert "Oz.1997.WEBRip-KONTRAST" in out.options
def test_to_dict_ok(self, memory_configured):
result = _use_case().execute(
release_name="Revolver.2005.1080p.BluRay.x265-RARBG",
source_file="/downloads/Revolver.mkv",
tmdb_title="Revolver",
tmdb_year=2005,
def test_multiple_existing_returns_clarification(self, tmp_path):
(tmp_path / "Oz.1997.A-GRP").mkdir()
(tmp_path / "Oz.1997.B-GRP").mkdir()
out = _resolve_series_folder(tmp_path, "Oz", 1997, "Oz.1997.A-GRP", None)
assert isinstance(out, _Clarification)
# Computed already in existing → not duplicated.
assert out.options.count("Oz.1997.A-GRP") == 1
# --------------------------------------------------------------------------- #
# Season #
# --------------------------------------------------------------------------- #
@pytest.fixture
def cfg_memory(tmp_path):
"""Memory with tv_show + movie roots inside tmp_path. Roots NOT auto-created."""
storage = tmp_path / "_mem"
storage.mkdir()
tv = tmp_path / "tv"
mv = tmp_path / "mv"
tv.mkdir()
mv.mkdir()
mem = Memory(storage_dir=str(storage))
set_memory(mem)
mem.ltm.library_paths.set("tv_show", str(tv))
mem.ltm.library_paths.set("movie", str(mv))
mem.save()
return mem, tv, mv
@pytest.fixture
def empty_memory(tmp_path):
"""Memory with no library_paths configured."""
storage = tmp_path / "_mem_empty"
storage.mkdir()
mem = Memory(storage_dir=str(storage))
set_memory(mem)
return mem
class TestSeason:
def test_library_not_set(self, empty_memory):
out = resolve_season_destination(REL_SEASON, "Oz", 1997)
assert out.status == "error"
assert out.error == "library_not_set"
def test_ok_path_new_series(self, cfg_memory):
_, tv, _ = cfg_memory
out = resolve_season_destination(REL_SEASON, "Oz", 1997)
assert out.status == "ok"
assert out.is_new_series_folder is True
assert out.series_folder_name.startswith("Oz.1997")
assert out.season_folder_name.startswith("Oz.S03")
assert out.series_folder == str(tv / out.series_folder_name)
assert out.season_folder == str(
tv / out.series_folder_name / out.season_folder_name
)
d = result.to_dict()
def test_clarification_path(self, cfg_memory):
_, tv, _ = cfg_memory
(tv / "Oz.1997.BluRay-OTHER").mkdir()
out = resolve_season_destination(REL_SEASON, "Oz", 1997)
assert out.status == "needs_clarification"
assert out.options
assert any("Oz" in o for o in out.options)
# --------------------------------------------------------------------------- #
# Episode #
# --------------------------------------------------------------------------- #
class TestEpisode:
def test_library_not_set(self, empty_memory):
out = resolve_episode_destination(REL_EPISODE, "/in/x.mkv", "Oz", 1997)
assert out.status == "error"
assert out.error == "library_not_set"
def test_ok_path_with_episode_title(self, cfg_memory):
_, tv, _ = cfg_memory
out = resolve_episode_destination(
REL_EPISODE, "/dl/source.mkv", "Oz", 1997, tmdb_episode_title="The Routine"
)
assert out.status == "ok"
assert out.filename.endswith(".mkv")
assert "S01E01" in out.filename
assert "The.Routine" in out.filename
# library_file is series/season/file
assert out.library_file == str(
tv / out.series_folder_name / out.season_folder_name / out.filename
)
def test_ok_path_without_episode_title(self, cfg_memory):
out = resolve_episode_destination(REL_EPISODE, "/dl/source.mkv", "Oz", 1997)
assert out.status == "ok"
# No '..' from blank ep title.
assert ".." not in out.filename
def test_extension_taken_from_source_file(self, cfg_memory):
out = resolve_episode_destination(REL_EPISODE, "/dl/source.mp4", "Oz", 1997)
assert out.filename.endswith(".mp4")
def test_clarification_path(self, cfg_memory):
_, tv, _ = cfg_memory
(tv / "Oz.1997.BluRay-OTHER").mkdir()
out = resolve_episode_destination(REL_EPISODE, "/dl/source.mkv", "Oz", 1997)
assert out.status == "needs_clarification"
def test_confirmed_folder_threaded_through(self, cfg_memory):
_, tv, _ = cfg_memory
(tv / "Oz.1997.BluRay-OTHER").mkdir()
out = resolve_episode_destination(
REL_EPISODE,
"/dl/source.mkv",
"Oz",
1997,
confirmed_folder="Oz.1997.BluRay-OTHER",
)
assert out.status == "ok"
assert out.series_folder_name == "Oz.1997.BluRay-OTHER"
assert out.is_new_series_folder is False
# --------------------------------------------------------------------------- #
# Movie #
# --------------------------------------------------------------------------- #
class TestMovie:
def test_library_not_set(self, empty_memory):
out = resolve_movie_destination(REL_MOVIE, "/dl/m.mkv", "Inception", 2010)
assert out.status == "error"
assert out.error == "library_not_set"
def test_ok_path(self, cfg_memory):
_, _, mv = cfg_memory
out = resolve_movie_destination(REL_MOVIE, "/dl/m.mkv", "Inception", 2010)
assert out.status == "ok"
assert out.movie_folder_name.startswith("Inception.2010")
assert out.filename.endswith(".mkv")
assert out.movie_folder == str(mv / out.movie_folder_name)
assert out.library_file == str(mv / out.movie_folder_name / out.filename)
assert out.is_new_folder is True
def test_is_new_folder_false_when_exists(self, cfg_memory):
_, _, mv = cfg_memory
out_first = resolve_movie_destination(REL_MOVIE, "/dl/m.mkv", "Inception", 2010)
(mv / out_first.movie_folder_name).mkdir()
out = resolve_movie_destination(REL_MOVIE, "/dl/m.mkv", "Inception", 2010)
assert out.is_new_folder is False
def test_title_sanitized(self, cfg_memory):
# Title with forbidden chars should be stripped.
out = resolve_movie_destination(REL_MOVIE, "/dl/m.mkv", "Foo:Bar", 2010)
assert ":" not in out.movie_folder_name
assert ":" not in out.filename
# --------------------------------------------------------------------------- #
# Series #
# --------------------------------------------------------------------------- #
class TestSeries:
def test_library_not_set(self, empty_memory):
out = resolve_series_destination(REL_SERIES, "Oz", 1997)
assert out.status == "error"
assert out.error == "library_not_set"
def test_ok_path(self, cfg_memory):
_, tv, _ = cfg_memory
out = resolve_series_destination(REL_SERIES, "Oz", 1997)
assert out.status == "ok"
assert out.series_folder_name.startswith("Oz.1997")
assert out.series_folder == str(tv / out.series_folder_name)
assert out.is_new_series_folder is True
def test_clarification_path(self, cfg_memory):
_, tv, _ = cfg_memory
(tv / "Oz.1997.X-GRP").mkdir()
out = resolve_series_destination(REL_SERIES, "Oz", 1997)
assert out.status == "needs_clarification"
# --------------------------------------------------------------------------- #
# DTO to_dict() #
# --------------------------------------------------------------------------- #
class TestDTOToDict:
def test_season_ok(self):
d = ResolvedSeasonDestination(
status="ok",
series_folder="/tv/S",
season_folder="/tv/S/Season",
series_folder_name="S",
season_folder_name="Season",
is_new_series_folder=True,
).to_dict()
assert d["status"] == "ok"
assert "library_file" in d
assert "series_folder_name" in d
assert d["series_folder"] == "/tv/S"
assert d["season_folder"] == "/tv/S/Season"
assert d["is_new_series_folder"] is True
def test_season_error(self):
d = ResolvedSeasonDestination(
status="error", error="library_not_set", message="missing"
).to_dict()
assert d == {
"status": "error",
"error": "library_not_set",
"message": "missing",
}
# ---------------------------------------------------------------------------
# TV shows — no existing folder
# ---------------------------------------------------------------------------
def test_season_clarification(self):
d = ResolvedSeasonDestination(
status="needs_clarification", question="which?", options=["a", "b"]
).to_dict()
assert d == {
"status": "needs_clarification",
"question": "which?",
"options": ["a", "b"],
}
def test_episode_ok(self):
d = ResolvedEpisodeDestination(
status="ok",
series_folder="/tv/S",
season_folder="/tv/S/Season",
library_file="/tv/S/Season/X.mkv",
series_folder_name="S",
season_folder_name="Season",
filename="X.mkv",
is_new_series_folder=False,
).to_dict()
assert d["library_file"] == "/tv/S/Season/X.mkv"
assert d["filename"] == "X.mkv"
class TestResolveTVShowNewFolder:
def test_oz_s01_creates_new_folder(self, memory_configured):
result = _use_case().execute(
release_name="Oz.S01.1080p.WEBRip.x265-KONTRAST",
source_file="/downloads/Oz.S01.1080p.WEBRip.x265-KONTRAST/Oz.S01E01.1080p.WEBRip.x265-KONTRAST.mp4",
tmdb_title="Oz",
tmdb_year=1997,
)
assert result.status == "ok"
assert result.is_new_series_folder is True
assert result.series_folder_name == "Oz.1997.1080p.WEBRip.x265-KONTRAST"
assert result.season_folder_name == "Oz.S01.1080p.WEBRip.x265-KONTRAST"
def test_movie_ok(self):
d = ResolvedMovieDestination(
status="ok",
movie_folder="/mv/X",
library_file="/mv/X/X.mkv",
movie_folder_name="X",
filename="X.mkv",
is_new_folder=True,
).to_dict()
assert d["movie_folder"] == "/mv/X"
assert d["library_file"] == "/mv/X/X.mkv"
assert d["is_new_folder"] is True
def test_tv_library_not_set(self, memory):
result = _use_case().execute(
release_name="Oz.S01.1080p.WEBRip.x265-KONTRAST",
source_file="/downloads/Oz.S01E01.mp4",
tmdb_title="Oz",
tmdb_year=1997,
)
assert result.status == "error"
assert result.error == "library_not_set"
def test_series_ok(self):
d = ResolvedSeriesDestination(
status="ok",
series_folder="/tv/S",
series_folder_name="S",
is_new_series_folder=False,
).to_dict()
assert d == {
"status": "ok",
"series_folder": "/tv/S",
"series_folder_name": "S",
"is_new_series_folder": False,
}
def test_single_episode_filename(self, memory_configured):
result = _use_case().execute(
release_name="Fallout.2024.S02E01.1080p.x265-ELiTE",
source_file="/downloads/Fallout.2024.S02E01.1080p.x265-ELiTE.mkv",
tmdb_title="Fallout",
tmdb_year=2024,
tmdb_episode_title="The Beginning",
)
assert result.status == "ok"
assert "S02E01" in result.filename
assert "The.Beginning" in result.filename
assert result.filename.endswith(".mkv")
def test_season_pack_filename_is_folder_name_plus_ext(self, memory_configured):
result = _use_case().execute(
release_name="Oz.S01.1080p.WEBRip.x265-KONTRAST",
source_file="/downloads/Oz.S01.1080p.WEBRip.x265-KONTRAST/Oz.S01E01.mp4",
tmdb_title="Oz",
tmdb_year=1997,
)
assert result.status == "ok"
# Season pack: filename = season_folder_name + ext
assert result.filename == result.season_folder_name + ".mp4"
def test_library_file_is_inside_season_folder(self, memory_configured):
result = _use_case().execute(
release_name="Oz.S01.1080p.WEBRip.x265-KONTRAST",
source_file="/downloads/Oz.S01E01.mp4",
tmdb_title="Oz",
tmdb_year=1997,
)
assert result.library_file.startswith(result.season_folder)
assert result.season_folder.startswith(result.series_folder)
# ---------------------------------------------------------------------------
# TV shows — existing folder matching
# ---------------------------------------------------------------------------
class TestResolveTVShowExistingFolder:
def _make_series_folder(self, tv_root, name):
"""Create a series folder in the tv library."""
path = tv_root / name
path.mkdir(parents=True, exist_ok=True)
return path
def test_uses_existing_single_folder(self, memory_configured, app_temp):
"""When exactly one folder matches title+year, use it regardless of group."""
from alfred.infrastructure.persistence import get_memory
mem = get_memory()
tv_root = Path(mem.ltm.library_paths.get("tv_show"))
existing = tv_root / "Oz.1997.1080p.WEBRip.x265-RARBG"
existing.mkdir(parents=True, exist_ok=True)
result = _use_case().execute(
release_name="Oz.S02.1080p.WEBRip.x265-KONTRAST",
source_file="/downloads/Oz.S02E01.mp4",
tmdb_title="Oz",
tmdb_year=1997,
)
assert result.status == "ok"
assert result.series_folder_name == "Oz.1997.1080p.WEBRip.x265-RARBG"
assert result.is_new_series_folder is False
def test_needs_clarification_on_multiple_folders(self, memory_configured, app_temp):
"""When multiple folders match, return needs_clarification with options."""
from alfred.infrastructure.persistence import get_memory
mem = get_memory()
tv_root = Path(mem.ltm.library_paths.get("tv_show"))
(tv_root / "Slow.Horses.2022.1080p.WEBRip.x265-RARBG").mkdir(
parents=True, exist_ok=True
)
(tv_root / "Slow.Horses.2022.1080p.WEBRip.x265-KONTRAST").mkdir(
parents=True, exist_ok=True
)
result = _use_case().execute(
release_name="Slow.Horses.S05.1080p.WEBRip.x265-KONTRAST",
source_file="/downloads/Slow.Horses.S05E01.mkv",
tmdb_title="Slow Horses",
tmdb_year=2022,
)
assert result.status == "needs_clarification"
assert result.question is not None
assert len(result.options) == 2
assert "Slow.Horses.2022.1080p.WEBRip.x265-RARBG" in result.options
assert "Slow.Horses.2022.1080p.WEBRip.x265-KONTRAST" in result.options
def test_confirmed_folder_bypasses_detection(self, memory_configured, app_temp):
"""confirmed_folder skips the folder search."""
from alfred.infrastructure.persistence import get_memory
mem = get_memory()
tv_root = Path(mem.ltm.library_paths.get("tv_show"))
chosen = "Slow.Horses.2022.1080p.WEBRip.x265-RARBG"
(tv_root / chosen).mkdir(parents=True, exist_ok=True)
result = _use_case().execute(
release_name="Slow.Horses.S05.1080p.WEBRip.x265-KONTRAST",
source_file="/downloads/Slow.Horses.S05E01.mkv",
tmdb_title="Slow Horses",
tmdb_year=2022,
confirmed_folder=chosen,
)
assert result.status == "ok"
assert result.series_folder_name == chosen
def test_to_dict_needs_clarification(self, memory_configured, app_temp):
from alfred.infrastructure.persistence import get_memory
mem = get_memory()
tv_root = Path(mem.ltm.library_paths.get("tv_show"))
(tv_root / "Oz.1997.1080p.WEBRip.x265-RARBG").mkdir(parents=True, exist_ok=True)
(tv_root / "Oz.1997.1080p.WEBRip.x265-KONTRAST").mkdir(
parents=True, exist_ok=True
)
result = _use_case().execute(
release_name="Oz.S03.1080p.WEBRip.x265-KONTRAST",
source_file="/downloads/Oz.S03E01.mp4",
tmdb_title="Oz",
tmdb_year=1997,
)
d = result.to_dict()
assert d["status"] == "needs_clarification"
assert "question" in d
assert isinstance(d["options"], list)
def test_to_dict_error(self, memory):
result = _use_case().execute(
release_name="Oz.S01.1080p.WEBRip.x265-KONTRAST",
source_file="/downloads/Oz.S01E01.mp4",
tmdb_title="Oz",
tmdb_year=1997,
)
d = result.to_dict()
assert d["status"] == "error"
assert "error" in d
assert "message" in d
# ---------------------------------------------------------------------------
# _find_existing_series_folders
# ---------------------------------------------------------------------------
class TestFindExistingSeriesFolders:
def test_empty_library(self, tmp_path):
assert _find_existing_series_folders(tmp_path, "Oz", 1997) == []
def test_nonexistent_root(self, tmp_path):
assert _find_existing_series_folders(tmp_path / "nope", "Oz", 1997) == []
def test_single_match(self, tmp_path):
(tmp_path / "Oz.1997.1080p.WEBRip.x265-KONTRAST").mkdir()
result = _find_existing_series_folders(tmp_path, "Oz", 1997)
assert result == ["Oz.1997.1080p.WEBRip.x265-KONTRAST"]
def test_multiple_matches(self, tmp_path):
(tmp_path / "Oz.1997.1080p.WEBRip.x265-KONTRAST").mkdir()
(tmp_path / "Oz.1997.1080p.WEBRip.x265-RARBG").mkdir()
result = _find_existing_series_folders(tmp_path, "Oz", 1997)
assert len(result) == 2
assert sorted(result) == result # sorted
def test_no_match_different_year(self, tmp_path):
(tmp_path / "Oz.1997.1080p.WEBRip.x265-KONTRAST").mkdir()
result = _find_existing_series_folders(tmp_path, "Oz", 2000)
assert result == []
def test_no_match_different_title(self, tmp_path):
(tmp_path / "Oz.1997.1080p.WEBRip.x265-KONTRAST").mkdir()
result = _find_existing_series_folders(tmp_path, "Breaking Bad", 2008)
assert result == []
def test_ignores_files_not_dirs(self, tmp_path):
(tmp_path / "Oz.1997.1080p.WEBRip.x265-KONTRAST").mkdir()
(tmp_path / "Oz.1997.some.file.txt").touch()
result = _find_existing_series_folders(tmp_path, "Oz", 1997)
assert len(result) == 1
def test_case_insensitive_prefix(self, tmp_path):
# Folder stored with mixed case
(tmp_path / "OZ.1997.1080p.WEBRip.x265-KONTRAST").mkdir()
result = _find_existing_series_folders(tmp_path, "Oz", 1997)
assert len(result) == 1
def test_title_with_special_chars_sanitised(self, tmp_path):
# "Star Wars: Andor" → sanitised (colon removed) + spaces→dots → "Star.Wars.Andor.2022"
(tmp_path / "Star.Wars.Andor.2022.1080p.WEBRip.x265-GROUP").mkdir()
result = _find_existing_series_folders(tmp_path, "Star Wars: Andor", 2022)
assert len(result) == 1
def test_clarification_options_none_yields_empty_list(self):
d = ResolvedSeasonDestination(
status="needs_clarification", question="q", options=None
).to_dict()
assert d["options"] == []
+138
View File
@@ -0,0 +1,138 @@
"""Tests for ``alfred.application.movies.search_movie.SearchMovieUseCase``.
The use case wraps ``TMDBClient.search_media`` and converts results / errors
into a ``SearchMovieResponse`` envelope (status="ok"|"error").
Coverage:
- ``TestSuccess`` — full MediaResult with imdb_id → ok+imdb_id; missing
imdb_id → ok+no_imdb_id; TV media_type preserved.
- ``TestErrorTranslation`` — ``TMDBNotFoundError`` → not_found,
``TMDBConfigurationError`` → configuration_error,
``TMDBAPIError`` → api_error, ``ValueError`` → validation_failed.
- ``TestPassThrough`` — query is forwarded to the client unchanged.
TMDBClient is fully mocked — no real HTTP.
"""
from __future__ import annotations
from unittest.mock import MagicMock
import pytest
from alfred.application.movies.search_movie import SearchMovieUseCase
from alfred.infrastructure.api.tmdb.dto import MediaResult
from alfred.infrastructure.api.tmdb.exceptions import (
TMDBAPIError,
TMDBConfigurationError,
TMDBNotFoundError,
)
@pytest.fixture
def client():
return MagicMock()
@pytest.fixture
def use_case(client):
return SearchMovieUseCase(client)
def _result(**kw) -> MediaResult:
defaults = dict(
tmdb_id=1,
title="Inception",
media_type="movie",
imdb_id="tt1375666",
overview="o",
release_date="2010-07-15",
poster_path="/x.jpg",
vote_average=8.4,
)
defaults.update(kw)
return MediaResult(**defaults)
# --------------------------------------------------------------------------- #
# Success paths #
# --------------------------------------------------------------------------- #
class TestSuccess:
def test_full_result_returns_ok_with_imdb_id(self, client, use_case):
client.search_media.return_value = _result()
r = use_case.execute("Inception")
assert r.status == "ok"
assert r.imdb_id == "tt1375666"
assert r.title == "Inception"
assert r.media_type == "movie"
assert r.tmdb_id == 1
assert r.vote_average == 8.4
assert r.error is None
def test_tv_result(self, client, use_case):
client.search_media.return_value = _result(
media_type="tv", title="Breaking Bad", imdb_id="tt0903747"
)
r = use_case.execute("Breaking Bad")
assert r.status == "ok"
assert r.media_type == "tv"
assert r.imdb_id == "tt0903747"
def test_missing_imdb_id_returns_ok_with_no_imdb_id_error(self, client, use_case):
client.search_media.return_value = _result(imdb_id=None)
r = use_case.execute("Inception")
assert r.status == "ok"
assert r.error == "no_imdb_id"
assert r.message is not None
assert "Inception" in r.message
assert r.imdb_id is None
assert r.title == "Inception"
# --------------------------------------------------------------------------- #
# Error translation #
# --------------------------------------------------------------------------- #
class TestErrorTranslation:
def test_not_found(self, client, use_case):
client.search_media.side_effect = TMDBNotFoundError("no match")
r = use_case.execute("ghost")
assert r.status == "error"
assert r.error == "not_found"
assert "no match" in r.message
def test_configuration_error(self, client, use_case):
client.search_media.side_effect = TMDBConfigurationError("missing key")
r = use_case.execute("x")
assert r.status == "error"
assert r.error == "configuration_error"
def test_api_error(self, client, use_case):
client.search_media.side_effect = TMDBAPIError("500 oops")
r = use_case.execute("x")
assert r.status == "error"
assert r.error == "api_error"
assert "500" in r.message
def test_validation_error(self, client, use_case):
client.search_media.side_effect = ValueError("query too long")
r = use_case.execute("x")
assert r.status == "error"
assert r.error == "validation_failed"
assert "too long" in r.message
# --------------------------------------------------------------------------- #
# Pass-through #
# --------------------------------------------------------------------------- #
class TestPassThrough:
def test_query_forwarded_verbatim(self, client, use_case):
client.search_media.return_value = _result()
use_case.execute("Inception")
client.search_media.assert_called_once_with("Inception")
+147
View File
@@ -0,0 +1,147 @@
"""Tests for ``alfred.application.torrents.search_torrents.SearchTorrentsUseCase``.
Wraps ``KnabenClient.search`` and converts ``TorrentResult`` objects into
plain dicts inside a ``SearchTorrentsResponse`` envelope.
Coverage:
- ``TestSuccess`` — multiple results → status="ok" + ``count`` + dict shape.
- ``TestEmptyResults`` — empty list from client → status="error",
error="not_found".
- ``TestErrorTranslation`` — ``KnabenNotFoundError`` → not_found,
``KnabenAPIError`` → api_error, ``ValueError`` → validation_failed.
- ``TestPassThrough`` — query + limit are forwarded to the client.
KnabenClient is fully mocked — no real HTTP.
"""
from __future__ import annotations
from unittest.mock import MagicMock
import pytest
from alfred.application.torrents.search_torrents import SearchTorrentsUseCase
from alfred.infrastructure.api.knaben.dto import TorrentResult
from alfred.infrastructure.api.knaben.exceptions import (
KnabenAPIError,
KnabenNotFoundError,
)
@pytest.fixture
def client():
return MagicMock()
@pytest.fixture
def use_case(client):
return SearchTorrentsUseCase(client)
def _torrent(**kw) -> TorrentResult:
defaults = dict(
title="Inception.2010.1080p",
size="10 GB",
seeders=500,
leechers=50,
magnet="magnet:?xt=abc",
info_hash="abc",
tracker="rarbg",
upload_date="2020-01-01",
category="movie",
)
defaults.update(kw)
return TorrentResult(**defaults)
# --------------------------------------------------------------------------- #
# Success #
# --------------------------------------------------------------------------- #
class TestSuccess:
def test_single_result_serialized_to_dict(self, client, use_case):
client.search.return_value = [_torrent()]
r = use_case.execute("Inception")
assert r.status == "ok"
assert r.count == 1
assert len(r.torrents) == 1
t = r.torrents[0]
assert t["name"] == "Inception.2010.1080p"
assert t["size"] == "10 GB"
assert t["seeders"] == 500
assert t["leechers"] == 50
assert t["magnet"].startswith("magnet:")
assert t["info_hash"] == "abc"
assert t["tracker"] == "rarbg"
assert t["upload_date"] == "2020-01-01"
assert t["category"] == "movie"
def test_multiple_results(self, client, use_case):
client.search.return_value = [
_torrent(title="A"),
_torrent(title="B"),
_torrent(title="C"),
]
r = use_case.execute("x")
assert r.count == 3
assert [t["name"] for t in r.torrents] == ["A", "B", "C"]
# --------------------------------------------------------------------------- #
# Empty #
# --------------------------------------------------------------------------- #
class TestEmptyResults:
def test_empty_list_becomes_not_found(self, client, use_case):
client.search.return_value = []
r = use_case.execute("ghost")
assert r.status == "error"
assert r.error == "not_found"
assert "ghost" in r.message
# --------------------------------------------------------------------------- #
# Error translation #
# --------------------------------------------------------------------------- #
class TestErrorTranslation:
def test_not_found(self, client, use_case):
client.search.side_effect = KnabenNotFoundError("nope")
r = use_case.execute("x")
assert r.status == "error"
assert r.error == "not_found"
assert "nope" in r.message
def test_api_error(self, client, use_case):
client.search.side_effect = KnabenAPIError("rate limited")
r = use_case.execute("x")
assert r.status == "error"
assert r.error == "api_error"
assert "rate" in r.message
def test_validation_error(self, client, use_case):
client.search.side_effect = ValueError("too long")
r = use_case.execute("x")
assert r.status == "error"
assert r.error == "validation_failed"
# --------------------------------------------------------------------------- #
# Pass-through #
# --------------------------------------------------------------------------- #
class TestPassThrough:
def test_default_limit_forwarded(self, client, use_case):
client.search.return_value = [_torrent()]
use_case.execute("Inception")
client.search.assert_called_once_with("Inception", limit=10)
def test_custom_limit_forwarded(self, client, use_case):
client.search.return_value = [_torrent()]
use_case.execute("Inception", limit=25)
client.search.assert_called_once_with("Inception", limit=25)
+35 -12
View File
@@ -1,8 +1,23 @@
"""Pytest configuration and shared fixtures."""
"""Shared pytest fixtures for the Alfred test suite.
Provides three categories of fixtures used across all test packages:
1. **Isolation** — ``mock_memory_storage_dir`` (autouse) and ``temp_dir``
ensure no test ever touches the real ``data/`` directory.
2. **Memory builders** — ``memory``, ``memory_with_config``,
``memory_with_history``, ``memory_with_search_results``,
``memory_with_library`` produce ``Memory`` instances in known states for
tests that consume the global singleton.
3. **Test doubles** — ``mock_llm``, ``mock_llm_with_tool_call``,
``mock_tmdb_client``, ``mock_knaben_client``, ``mock_qbittorrent_client``,
``mock_deepseek``, and the filesystem fixture ``real_folder``.
All memory fixtures use the current component-based LTM API:
``ltm.library_paths.set(collection, path)`` and
``ltm.workspace.download``/``torrent``. Legacy flat attributes
(``movie_folder``, ``tvshow_folder``, ``download_folder``) no longer exist.
"""
# TODO: Moved directory, should not be necessary anymore but need to check !!
# Ajouter le dossier parent (brain) au PYTHONPATH
# sys.path.insert(0, str(Path(__file__).parent.parent))
import shutil
import sys
import tempfile
@@ -49,11 +64,15 @@ def memory(temp_dir):
@pytest.fixture
def memory_with_config(memory):
"""Memory with pre-configured folders."""
memory.ltm.download_folder = "/tmp/downloads"
memory.ltm.movie_folder = "/tmp/movies"
memory.ltm.tvshow_folder = "/tmp/tvshows"
memory.ltm.torrent_folder = "/tmp/torrents"
"""Memory with pre-configured workspace and library paths.
Uses the current component-based LTM API. The values are arbitrary
placeholders — tests that care about the actual paths should override.
"""
memory.ltm.workspace.download = "/tmp/downloads"
memory.ltm.workspace.torrent = "/tmp/torrents"
memory.ltm.library_paths.set("movies", "/tmp/movies")
memory.ltm.library_paths.set("tv_shows", "/tmp/tvshows")
return memory
@@ -105,8 +124,12 @@ def memory_with_history(memory):
@pytest.fixture
def memory_with_library(memory):
"""Memory with movies in library."""
memory.ltm.library["movies"] = [
"""Memory pre-populated with movies and TV shows.
Uses the current ``Library`` component (``library.movies`` and
``library.tv_shows`` lists of dicts).
"""
memory.ltm.library.movies = [
{
"imdb_id": "tt1375666",
"title": "Inception",
@@ -124,7 +147,7 @@ def memory_with_library(memory):
"added_at": "2024-01-16T14:20:00",
},
]
memory.ltm.library["tv_shows"] = [
memory.ltm.library.tv_shows = [
{
"imdb_id": "tt0944947",
"title": "Game of Thrones",
+152
View File
@@ -0,0 +1,152 @@
"""Tests for ``alfred.domain.shared.media`` — pure ffprobe dataclasses.
Exercises:
- ``AudioTrack`` / ``SubtitleTrack`` / ``VideoTrack`` — simple dataclass construction.
- ``VideoTrack.resolution`` — width-priority resolution detection (handles
widescreen/scope crops where width > height bucket), with height fallback
when width is missing.
- ``MediaInfo.resolution`` — delegates to the primary video track.
- ``MediaInfo.audio_languages`` — order-preserving deduplication.
- ``MediaInfo.is_multi_audio`` — multi-language detection.
"""
from __future__ import annotations
import pytest
from alfred.domain.shared.media import AudioTrack, MediaInfo, SubtitleTrack, VideoTrack
class TestTracks:
def test_audio_track_defaults(self):
t = AudioTrack(
index=0, codec="aac", channels=2, channel_layout="stereo", language="eng"
)
assert t.is_default is False
def test_subtitle_track_defaults(self):
t = SubtitleTrack(index=2, codec="subrip", language="fre")
assert t.is_default is False
assert t.is_forced is False
def test_video_track_defaults(self):
v = VideoTrack(index=0, codec="hevc", width=1920, height=1080)
assert v.is_default is False
class TestVideoTrackResolution:
def test_no_dimensions(self):
assert (
VideoTrack(index=0, codec=None, width=None, height=None).resolution is None
)
@pytest.mark.parametrize(
"w,expected",
[
(3840, "2160p"), # UHD lower bound
(3996, "2160p"), # cinema 4K
(1920, "1080p"),
(1280, "720p"),
(720, "576p"),
(640, "480p"),
],
)
def test_width_priority(self, w, expected):
assert (
VideoTrack(index=0, codec=None, width=w, height=1080).resolution == expected
)
def test_widescreen_scope_crop(self):
# 1920x960 (scope crop) → still 1080p because width-priority
assert (
VideoTrack(index=0, codec=None, width=1920, height=960).resolution
== "1080p"
)
@pytest.mark.parametrize(
"h,expected",
[
(2160, "2160p"),
(1080, "1080p"),
(720, "720p"),
(576, "576p"),
(480, "480p"),
],
)
def test_height_fallback_when_width_missing(self, h, expected):
assert (
VideoTrack(index=0, codec=None, width=None, height=h).resolution == expected
)
def test_width_below_buckets_falls_to_height(self):
# width=320 falls below every bucket; falls back to f"{h}p"
assert (
VideoTrack(index=0, codec=None, width=320, height=240).resolution == "240p"
)
def test_width_only_below_buckets(self):
# width=200, no height → f"{w}w" sentinel
result = VideoTrack(index=0, codec=None, width=200, height=None).resolution
assert result == "200w"
class TestMediaInfoResolutionDelegation:
def test_no_video_track(self):
assert MediaInfo().resolution is None
def test_delegates_to_primary_video(self):
m = MediaInfo(
video_tracks=[VideoTrack(index=0, codec="hevc", width=1920, height=1080)]
)
assert m.resolution == "1080p"
assert m.width == 1920
assert m.height == 1080
assert m.video_codec == "hevc"
def test_multiple_video_tracks_uses_first(self):
m = MediaInfo(
video_tracks=[
VideoTrack(index=0, codec="hevc", width=3840, height=2160),
VideoTrack(index=1, codec="mjpeg", width=320, height=240), # cover art
]
)
assert m.resolution == "2160p"
class TestAudioLanguages:
def test_empty(self):
assert MediaInfo().audio_languages == []
def test_dedup_preserves_order(self):
m = MediaInfo(
audio_tracks=[
AudioTrack(0, "eac3", 6, "5.1", "eng"),
AudioTrack(1, "ac3", 6, "5.1", "fre"),
AudioTrack(2, "ac3", 2, "stereo", "eng"), # duplicate eng
AudioTrack(3, "aac", 2, "stereo", None), # ignored
]
)
assert m.audio_languages == ["eng", "fre"]
def test_all_none_languages(self):
m = MediaInfo(
audio_tracks=[
AudioTrack(0, "aac", 2, "stereo", None),
AudioTrack(1, "aac", 2, "stereo", None),
]
)
assert m.audio_languages == []
def test_is_multi_audio_false_single_lang(self):
m = MediaInfo(audio_tracks=[AudioTrack(0, "aac", 2, "stereo", "eng")])
assert m.is_multi_audio is False
def test_is_multi_audio_true(self):
m = MediaInfo(
audio_tracks=[
AudioTrack(0, "aac", 2, "stereo", "eng"),
AudioTrack(1, "aac", 2, "stereo", "fre"),
]
)
assert m.is_multi_audio is True
+283
View File
@@ -0,0 +1,283 @@
"""Tests for ``alfred.domain.release`` — release-name parser.
Covers the public surface used by the resolver / move pipeline:
- ``parse_release`` — well-formed scene names (TV episodes, season packs,
movies), site-tagged names, malformed names recovered via sanitization,
and irrecoverable names that fall back to ``media_type="unknown"``.
- ``ParsedRelease`` — derived properties (``is_season_pack``,
``show_folder_name``, ``season_folder_name``, ``episode_filename``,
``movie_folder_name``, ``movie_filename``) including the Windows-forbidden
character sanitizer and the episode-stripping helper for season folders.
These tests exercise the parser end-to-end through real YAML knowledge
files; no monkeypatching of the knowledge layer is performed.
"""
from __future__ import annotations
import pytest
from alfred.domain.release.services import parse_release
from alfred.domain.release.value_objects import ParsedRelease
class TestParseTVEpisode:
"""Single-episode TV releases."""
def test_basic_tv_episode(self):
r = parse_release("Oz.S03E01.1080p.WEBRip.x265-KONTRAST")
assert r.title == "Oz"
assert r.season == 3
assert r.episode == 1
assert r.episode_end is None
assert r.quality == "1080p"
assert r.source == "WEBRip"
assert r.codec == "x265"
assert r.group == "KONTRAST"
assert r.media_type == "tv_show"
assert r.parse_path == "direct"
assert r.is_season_pack is False
def test_multi_episode(self):
r = parse_release("Archer.S14E09E10.1080p.WEB.x265-GRP")
assert r.season == 14
assert r.episode == 9
assert r.episode_end == 10
def test_nxnn_alt_form(self):
# Alt season/episode form: 1x05 instead of S01E05.
r = parse_release("Some.Show.1x05.720p.HDTV.x264-GRP")
assert r.season == 1
assert r.episode == 5
assert r.episode_end is None
assert r.media_type == "tv_show"
def test_nxnnxnn_multi_episode_alt_form(self):
r = parse_release("Some.Show.2x07x08.1080p.WEB.x265-GRP")
assert r.season == 2
assert r.episode == 7
assert r.episode_end == 8
def test_season_pack(self):
r = parse_release("Oz.S03.1080p.WEBRip.x265-KONTRAST")
assert r.season == 3
assert r.episode is None
assert r.is_season_pack is True
assert r.media_type == "tv_show"
class TestParseMovie:
"""Movie releases."""
def test_basic_movie(self):
r = parse_release("Inception.2010.1080p.BluRay.x264-GROUP")
assert r.title == "Inception"
assert r.year == 2010
assert r.season is None
assert r.episode is None
assert r.quality == "1080p"
assert r.source == "BluRay"
assert r.codec == "x264"
assert r.group == "GROUP"
assert r.media_type == "movie"
def test_movie_multi_word_title(self):
r = parse_release("The.Dark.Knight.2008.2160p.UHD.BluRay.x265-TERMINAL")
assert r.title == "The.Dark.Knight"
assert r.year == 2008
assert r.quality == "2160p"
def test_movie_without_year_still_movie_if_tech_present(self):
r = parse_release("UntitledFilm.1080p.WEBRip.x264-GRP")
# No season, no year, but tech markers → still movie
assert r.media_type == "movie"
assert r.year is None
class TestParseEdgeCases:
"""Site tags, malformed names, and unknown media types."""
def test_site_tag_prefix_stripped(self):
r = parse_release("[ OxTorrent.vc ] The.Title.S01E01.1080p.WEB.x265-GRP")
assert r.site_tag == "OxTorrent.vc"
assert r.parse_path == "sanitized"
assert r.season == 1
assert r.episode == 1
def test_site_tag_suffix_stripped(self):
r = parse_release("The.Title.S01E01.1080p.WEB.x265-NTb[TGx]")
assert r.site_tag == "TGx"
# Suffix-tagged names are well-formed (only [] in tag → after strip clean)
assert r.season == 1
def test_irrecoverably_malformed(self):
# @ is a forbidden char and not stripped by _sanitize → stays malformed
r = parse_release("foo@bar@baz")
assert r.media_type == "unknown"
assert r.parse_path == "ai"
assert r.group == "UNKNOWN"
def test_empty_unknown_when_no_evidence(self):
r = parse_release("Some.Random.Title")
# No season, no year, no tech markers → unknown
assert r.media_type == "unknown"
def test_missing_group_defaults_to_unknown(self):
r = parse_release("Movie.2020.1080p.WEBRip.x265")
# No "-GROUP" suffix → group = "UNKNOWN"
assert r.group == "UNKNOWN"
def test_yts_bracket_release(self):
# YTS-style: spaces, parens for year, multiple bracketed tech tokens.
# The tokenizer must handle ' ', '(', ')', '[', ']' transparently.
r = parse_release("The Father (2020) [1080p] [WEBRip] [5.1] [YTS.MX]")
assert r.title == "The.Father"
assert r.year == 2020
assert r.quality == "1080p"
assert r.source == "WEBRip"
assert r.audio_channels == "5.1"
assert r.media_type == "movie"
def test_human_friendly_spaces(self):
# Spaces as separators (no brackets).
r = parse_release("Inception 2010 1080p BluRay x264-GROUP")
assert r.title == "Inception"
assert r.year == 2010
assert r.quality == "1080p"
assert r.codec == "x264"
assert r.group == "GROUP"
assert r.media_type == "movie"
def test_underscore_separators(self):
# Old usenet style: underscores between tokens.
r = parse_release("Some_Show_S01E01_1080p_WEB_x265-GRP")
assert r.season == 1
assert r.episode == 1
assert r.quality == "1080p"
assert r.group == "GRP"
class TestParseAudioVideoEdition:
"""Audio, video metadata, edition extraction."""
def test_audio_codec_and_channels(self):
r = parse_release("Movie.2020.1080p.BluRay.DTS.5.1.x264-GRP")
assert r.audio_channels == "5.1"
def test_language_token(self):
r = parse_release("Movie.2020.MULTI.1080p.WEBRip.x265-GRP")
assert "MULTI" in r.languages
def test_edition_token(self):
r = parse_release("Movie.2020.UNRATED.1080p.BluRay.x264-GRP")
assert r.edition == "UNRATED"
class TestParsedReleaseFolderNames:
"""Helpers that build filesystem-safe folder/filenames."""
def _parsed_tv(self) -> ParsedRelease:
return parse_release("Oz.S03E01.1080p.WEBRip.x265-KONTRAST")
def _parsed_movie(self) -> ParsedRelease:
return parse_release("Inception.2010.1080p.BluRay.x264-GROUP")
def test_show_folder_name(self):
r = self._parsed_tv()
assert r.show_folder_name("Oz", 1997) == "Oz.1997.1080p.WEBRip.x265-KONTRAST"
def test_show_folder_name_strips_windows_chars(self):
r = self._parsed_tv()
# Colons and question marks are Windows-forbidden — must be stripped.
result = r.show_folder_name("Oz: The Series?", 1997)
assert ":" not in result
assert "?" not in result
def test_season_folder_name_strips_episode(self):
r = self._parsed_tv()
# Episode token Exx is stripped, Sxx stays
result = r.season_folder_name()
assert "S03" in result
assert "E01" not in result
def test_season_folder_name_multi_episode(self):
r = parse_release("Archer.S14E09E10E11.1080p.WEB.x265-GRP")
result = r.season_folder_name()
assert "S14" in result
assert "E09" not in result
assert "E10" not in result
assert "E11" not in result
def test_episode_filename_with_title(self):
r = self._parsed_tv()
fname = r.episode_filename("The Routine", "mkv")
assert fname.endswith(".mkv")
assert "S03E01" in fname
assert "The.Routine" in fname
assert "KONTRAST" in fname
def test_episode_filename_without_title(self):
r = self._parsed_tv()
fname = r.episode_filename(None, "mkv")
assert fname.endswith(".mkv")
assert "S03E01" in fname
def test_episode_filename_strips_ext_dot(self):
r = self._parsed_tv()
# Whether the caller passes "mkv" or ".mkv", we get a single dot.
a = r.episode_filename(None, "mkv")
b = r.episode_filename(None, ".mkv")
assert a == b
assert "..mkv" not in a
def test_movie_folder_name(self):
r = self._parsed_movie()
assert (
r.movie_folder_name("Inception", 2010)
== "Inception.2010.1080p.BluRay.x264-GROUP"
)
def test_movie_filename(self):
r = self._parsed_movie()
assert (
r.movie_filename("Inception", 2010, "mkv")
== "Inception.2010.1080p.BluRay.x264-GROUP.mkv"
)
class TestParsedReleaseInvariants:
"""Structural invariants of ParsedRelease."""
def test_raw_is_preserved(self):
raw = "Oz.S03E01.1080p.WEBRip.x265-KONTRAST"
r = parse_release(raw)
assert r.raw == raw
def test_languages_defaults_to_empty_list_not_none(self):
r = parse_release("Movie.2020.1080p.BluRay.x264-GRP")
# __post_init__ ensures languages is a list, never None
assert r.languages == []
def test_tech_string_joined(self):
r = parse_release("Movie.2020.1080p.BluRay.x264-GRP")
assert r.tech_string == "1080p.BluRay.x264"
def test_tech_string_partial(self):
# Codec-only release (no quality/source): tech_string == codec
r = parse_release("Show.S01E01.x265-GRP")
assert r.tech_string == "x265"
assert r.codec == "x265"
assert r.quality is None
assert r.source is None
@pytest.mark.parametrize(
"name,expected_type",
[
("Show.S01E01.1080p.WEB.x265-GRP", "tv_show"),
("Movie.2020.1080p.BluRay.x264-GRP", "movie"),
("Random.Title.With.Nothing", "unknown"),
],
)
def test_media_type_inference(self, name, expected_type):
assert parse_release(name).media_type == expected_type
-504
View File
@@ -1,504 +0,0 @@
"""
Tests for alfred.domain.release.release_parser
Real-data cases sourced from /mnt/testipool/downloads/.
Covers: parsing, normalisation, naming methods, edge cases.
"""
from alfred.domain.release import parse_release
from alfred.domain.release.services import _normalise
from alfred.domain.release.value_objects import (
_sanitise_for_fs,
_strip_episode_from_normalised,
)
# ---------------------------------------------------------------------------
# _normalise
# ---------------------------------------------------------------------------
class TestNormalise:
def test_dots_unchanged(self):
assert (
_normalise("Oz.S01.1080p.WEBRip.x265-KONTRAST")
== "Oz.S01.1080p.WEBRip.x265-KONTRAST"
)
def test_spaces_become_dots(self):
assert (
_normalise("Oz S01 1080p WEBRip x265-KONTRAST")
== "Oz.S01.1080p.WEBRip.x265-KONTRAST"
)
def test_double_dots_collapsed(self):
assert _normalise("Oz..S01..1080p") == "Oz.S01.1080p"
def test_leading_trailing_dots_stripped(self):
assert _normalise(".Oz.S01.") == "Oz.S01"
def test_mixed_spaces_and_dots(self):
# "Archer 2009 S14E09E10E11 Into the Cold 1080p HULU WEB-DL DDP5 1 H 264-NTb"
result = _normalise(
"Archer 2009 S14E09E10E11 Into the Cold 1080p HULU WEB-DL DDP5 1 H 264-NTb"
)
assert " " not in result
assert ".." not in result
# ---------------------------------------------------------------------------
# _sanitise_for_fs
# ---------------------------------------------------------------------------
class TestSanitiseForFs:
def test_clean_string_unchanged(self):
assert _sanitise_for_fs("Oz.S01.1080p-KONTRAST") == "Oz.S01.1080p-KONTRAST"
def test_removes_question_mark(self):
assert _sanitise_for_fs("What's Up?") == "What's Up"
def test_removes_colon(self):
assert _sanitise_for_fs("He Said: She Said") == "He Said She Said"
def test_removes_all_forbidden(self):
assert _sanitise_for_fs('a?b:c*d"e<f>g|h\\i') == "abcdefghi"
def test_apostrophe_kept(self):
# apostrophe is not in the forbidden set
assert _sanitise_for_fs("What's Up") == "What's Up"
def test_ellipsis_kept(self):
assert _sanitise_for_fs("What If...") == "What If..."
# ---------------------------------------------------------------------------
# _strip_episode_from_normalised
# ---------------------------------------------------------------------------
class TestStripEpisode:
def test_strips_single_episode(self):
assert (
_strip_episode_from_normalised("Oz.S01E01.1080p.WEBRip.x265-KONTRAST")
== "Oz.S01.1080p.WEBRip.x265-KONTRAST"
)
def test_strips_multi_episode(self):
assert (
_strip_episode_from_normalised("Archer.S14E09E10E11.1080p.HULU.WEB-DL-NTb")
== "Archer.S14.1080p.HULU.WEB-DL-NTb"
)
def test_season_pack_unchanged(self):
assert (
_strip_episode_from_normalised("Oz.S01.1080p.WEBRip.x265-KONTRAST")
== "Oz.S01.1080p.WEBRip.x265-KONTRAST"
)
def test_case_insensitive(self):
assert (
_strip_episode_from_normalised("oz.s01e01.1080p-KONTRAST")
== "oz.s01.1080p-KONTRAST"
)
# ---------------------------------------------------------------------------
# parse_release — Season packs (dots)
# ---------------------------------------------------------------------------
class TestSeasonPackDots:
"""Real cases: Oz.S01-S06 KONTRAST, Archer S03 EDGE2020, etc."""
def test_oz_s01_kontrast(self):
p = parse_release("Oz.S01.1080p.WEBRip.x265-KONTRAST")
assert p.title == "Oz"
assert p.season == 1
assert p.episode is None
assert p.quality == "1080p"
assert p.source == "WEBRip"
assert p.codec == "x265"
assert p.group == "KONTRAST"
assert p.is_season_pack
assert not p.is_movie
def test_fallout_s02_kontrast(self):
p = parse_release("Fallout.2024.S02.1080p.WEBRip.x265-KONTRAST")
assert p.title == "Fallout"
assert p.year == 2024
assert p.season == 2
assert p.episode is None
assert p.group == "KONTRAST"
def test_archer_s03_edge2020(self):
p = parse_release("Archer.2009.S03.1080p.BluRay.DDP.5.1.x265-EDGE2020")
assert p.title == "Archer"
assert p.year == 2009
assert p.season == 3
assert p.quality == "1080p"
assert p.source == "BluRay"
assert p.codec == "x265"
assert p.group == "EDGE2020"
def test_fargo_s05_hulu_webdl(self):
p = parse_release("Fargo.S05.1080p.HULU.WEB-DL.x265.10bit-Protozoan")
assert p.title == "Fargo"
assert p.season == 5
assert p.quality == "1080p"
assert p.group == "Protozoan"
def test_xfiles_s01_bluray_rarbg(self):
p = parse_release("The.X-Files.S01.1080p.BluRay.x265-RARBG")
assert p.title == "The.X-Files"
assert p.season == 1
assert p.source == "BluRay"
assert p.group == "RARBG"
def test_gilmore_girls_s01_s07_repack(self):
p = parse_release(
"Gilmore.Girls.Complete.S01-S07.REPACK.1080p.WEB-DL.x265.10bit.HEVC-MONOLITH"
)
# Season range — we parse the first season number found
assert p.season == 1
assert p.group == "MONOLITH"
def test_plot_against_america_4k(self):
p = parse_release(
"The.Plot.Against.America.S01.2160p.MAX.WEB-DL.x265.10bit.HDR.DDP5.1.x265-SH3LBY"
)
assert p.title == "The.Plot.Against.America"
assert p.season == 1
assert p.quality == "2160p"
assert p.group == "SH3LBY"
def test_foundation_with_year_in_title(self):
p = parse_release("Foundation.2021.S01.1080p.WEBRip.x265-RARBG")
assert p.title == "Foundation"
assert p.year == 2021
assert p.season == 1
assert p.group == "RARBG"
def test_gen_v_s02(self):
p = parse_release("Gen.V.S02.1080p.WEBRip.x265-KONTRAST")
assert p.title == "Gen.V"
assert p.season == 2
assert p.group == "KONTRAST"
# ---------------------------------------------------------------------------
# parse_release — Single episodes (dots)
# ---------------------------------------------------------------------------
class TestSingleEpisodeDots:
"""Real cases: Fallout S02Exx ELiTE, Mare of Easttown PSA, etc."""
def test_fallout_s02e01_elite(self):
p = parse_release("Fallout.2024.S02E01.1080p.x265-ELiTE")
assert p.title == "Fallout"
assert p.year == 2024
assert p.season == 2
assert p.episode == 1
assert p.episode_end is None
assert p.group == "ELiTE"
assert not p.is_season_pack
def test_mare_of_easttown_with_episode_title_in_filename(self):
# Episode filenames often embed the title — we parse the release folder name
p = parse_release("Mare.of.Easttown.S01.1080p.10bit.WEBRip.6CH.x265.HEVC-PSA")
assert p.title == "Mare.of.Easttown"
assert p.season == 1
assert p.group == "PSA"
def test_it_welcome_to_derry_s01e01(self):
p = parse_release("IT.Welcome.to.Derry.S01E01.1080p.x265-ELiTE")
assert p.title == "IT.Welcome.to.Derry"
assert p.season == 1
assert p.episode == 1
assert p.group == "ELiTE"
def test_landman_s02e01(self):
p = parse_release("Landman.S02E01.1080p.x265-ELiTE")
assert p.title == "Landman"
assert p.season == 2
assert p.episode == 1
def test_prodiges_episode_with_number_in_title(self):
# "Prodiges.S12E01.1ere.demi-finale..." — accented chars in episode title
p = parse_release("Prodiges.S12E01.1080p.WEB.H264-THESYNDiCATE")
assert p.title == "Prodiges"
assert p.season == 12
assert p.episode == 1
assert p.group == "THESYNDiCATE"
# ---------------------------------------------------------------------------
# parse_release — Multi-episode
# ---------------------------------------------------------------------------
class TestMultiEpisode:
def test_archer_triple_episode(self):
# "Archer 2009 S14E09E10E11 Into the Cold 1080p HULU WEB-DL DDP5 1 H 264-NTb"
p = parse_release(
"Archer.2009.S14E09E10E11.Into.the.Cold.1080p.HULU.WEB-DL.DDP5.1.H.264-NTb"
)
assert p.season == 14
assert p.episode == 9
assert p.episode_end == 10 # only first E-pair captured by regex group 2+3
# ---------------------------------------------------------------------------
# parse_release — Movies
# ---------------------------------------------------------------------------
class TestMovies:
def test_another_round_yts(self):
# "Another Round (2020) [1080p] [BluRay] [YTS.MX]" → normalised
p = parse_release("Another.Round.2020.1080p.BluRay.x264-YTS")
assert p.is_movie
assert p.title == "Another.Round"
assert p.year == 2020
assert p.quality == "1080p"
assert p.source == "BluRay"
assert p.group == "YTS"
def test_godzilla_minus_one(self):
p = parse_release("Godzilla.Minus.One.2023.1080p.BluRay.x265.10bit.AAC5.1-YTS")
assert p.title == "Godzilla.Minus.One"
assert p.year == 2023
assert p.is_movie
assert p.group == "YTS"
def test_deadwood_movie_2019(self):
p = parse_release("Deadwood.The.Movie.2019.1080p.BluRay.x265-RARBG")
assert p.year == 2019
assert p.is_movie
assert p.group == "RARBG"
def test_revolver_2005_bluray(self):
p = parse_release("Revolver.2005.1080p.BluRay.x265-RARBG")
assert p.title == "Revolver"
assert p.year == 2005
assert p.is_movie
def test_the_xfiles_movie_1998(self):
p = parse_release("The.X.Files.1998.1080p.BluRay.x265-RARBG")
assert p.year == 1998
assert p.is_movie
assert p.group == "RARBG"
def test_movie_no_group(self):
p = parse_release("Jurassic.Park.1993.1080p.BluRay.x265")
assert p.is_movie
assert p.year == 1993
assert p.group == "UNKNOWN"
def test_multi_language_movie(self):
p = parse_release("Jumanji.1995.MULTi.1080p.DSNP.WEB.H265-THESYNDiCATE")
assert p.year == 1995
assert p.group == "THESYNDiCATE"
# ---------------------------------------------------------------------------
# parse_release — Space-separated (no dots)
# ---------------------------------------------------------------------------
class TestSpaceSeparated:
def test_oz_spaces(self):
p = parse_release("Oz S01 1080p WEBRip x265-KONTRAST")
assert p.title == "Oz"
assert p.season == 1
assert p.quality == "1080p"
assert p.group == "KONTRAST"
def test_archer_spaces(self):
p = parse_release(
"Archer 2009 S14E09E10E11 Into the Cold 1080p HULU WEB-DL DDP5 1 H 264-NTb"
)
assert p.season == 14
assert p.episode == 9
assert p.group == "NTb"
# ---------------------------------------------------------------------------
# parse_release — tech_string
# ---------------------------------------------------------------------------
class TestTechString:
def test_full_tech(self):
p = parse_release("Oz.S01.1080p.WEBRip.x265-KONTRAST")
assert p.tech_string == "1080p.WEBRip.x265"
def test_tech_string_used_in_folder_name(self):
p = parse_release("Oz.S01.1080p.WEBRip.x265-KONTRAST")
folder = p.show_folder_name("Oz", 1997)
assert "1080p.WEBRip.x265" in folder
def test_no_tech_fallback(self):
p = parse_release("SomeShow.S01")
# tech_string is empty, show_folder_name uses "Unknown"
folder = p.show_folder_name("SomeShow", 2020)
assert "Unknown" in folder
def test_4k_hdr(self):
p = parse_release(
"The.Plot.Against.America.S01.2160p.MAX.WEB-DL.x265.10bit.HDR.DDP5.1-SH3LBY"
)
assert p.quality == "2160p"
# ---------------------------------------------------------------------------
# ParsedRelease — naming methods
# ---------------------------------------------------------------------------
class TestNamingMethods:
def test_show_folder_name(self):
p = parse_release("Oz.S01.1080p.WEBRip.x265-KONTRAST")
assert p.show_folder_name("Oz", 1997) == "Oz.1997.1080p.WEBRip.x265-KONTRAST"
def test_show_folder_name_sanitises_title(self):
p = parse_release("Oz.S01.1080p.WEBRip.x265-KONTRAST")
# Colon in TMDB title should be stripped, spaces become dots
folder = p.show_folder_name("Star Wars: Andor", 2022)
assert ":" not in folder
assert "Star.Wars.Andor" in folder
def test_season_folder_name_from_season_pack(self):
p = parse_release("Oz.S01.1080p.WEBRip.x265-KONTRAST")
assert p.season_folder_name() == "Oz.S01.1080p.WEBRip.x265-KONTRAST"
def test_season_folder_name_strips_episode(self):
p = parse_release("Fallout.2024.S02E01.1080p.x265-ELiTE")
assert p.season_folder_name() == "Fallout.2024.S02.1080p.x265-ELiTE"
def test_episode_filename_with_title(self):
p = parse_release("Oz.S01.1080p.WEBRip.x265-KONTRAST")
fname = p.episode_filename("The Routine", ".mkv")
assert fname == "Oz.S01.The.Routine.1080p.WEBRip.x265-KONTRAST.mkv"
def test_episode_filename_with_episode_number(self):
p = parse_release("Fallout.2024.S02E01.1080p.x265-ELiTE")
fname = p.episode_filename("The Beginning", ".mkv")
assert fname == "Fallout.S02E01.The.Beginning.1080p.x265-ELiTE.mkv"
def test_episode_filename_without_episode_title(self):
p = parse_release("Oz.S01E01.1080p.WEBRip.x265-KONTRAST")
fname = p.episode_filename(None, ".mp4")
assert fname == "Oz.S01E01.1080p.WEBRip.x265-KONTRAST.mp4"
def test_episode_filename_sanitises_episode_title(self):
p = parse_release("Oz.S01E01.1080p.WEBRip.x265-KONTRAST")
fname = p.episode_filename("What's Up?", ".mkv")
assert "?" not in fname
assert "What's.Up" in fname
def test_episode_filename_strips_leading_dot_from_ext(self):
p = parse_release("Oz.S01E01.1080p.WEBRip.x265-KONTRAST")
fname_with = p.episode_filename(None, ".mkv")
fname_without = p.episode_filename(None, "mkv")
assert fname_with == fname_without
def test_movie_folder_name(self):
p = parse_release("Another.Round.2020.1080p.BluRay.x264-YTS")
assert (
p.movie_folder_name("Another Round", 2020)
== "Another.Round.2020.1080p.BluRay.x264-YTS"
)
def test_movie_filename(self):
p = parse_release("Another.Round.2020.1080p.BluRay.x264-YTS")
fname = p.movie_filename("Another Round", 2020, ".mp4")
assert fname == "Another.Round.2020.1080p.BluRay.x264-YTS.mp4"
def test_movie_folder_same_as_show_folder(self):
p = parse_release("Revolver.2005.1080p.BluRay.x265-RARBG")
assert p.movie_folder_name("Revolver", 2005) == p.show_folder_name(
"Revolver", 2005
)
# ---------------------------------------------------------------------------
# ParsedRelease — is_movie / is_season_pack
# ---------------------------------------------------------------------------
class TestMediaTypeFlags:
def test_season_pack_is_not_movie(self):
p = parse_release("Oz.S01.1080p.WEBRip.x265-KONTRAST")
assert not p.is_movie
assert p.is_season_pack
def test_single_episode_is_not_season_pack(self):
p = parse_release("Oz.S01E01.1080p.WEBRip.x265-KONTRAST")
assert not p.is_movie
assert not p.is_season_pack
def test_movie_is_not_season_pack(self):
p = parse_release("Revolver.2005.1080p.BluRay.x265-RARBG")
assert p.is_movie
assert not p.is_season_pack
def test_no_season_no_year_treated_as_movie(self):
# No S/E marker → is_movie = True
p = parse_release("SomeContent.1080p.WEBRip.x265-GROUP")
assert p.is_movie
# ---------------------------------------------------------------------------
# Tricky real-world releases
# ---------------------------------------------------------------------------
class TestRealWorldEdgeCases:
def test_angel_integrale_multi(self):
# "Angel.1999.INTEGRALE.MULTI.1080p.WEBRip.10bits.x265.DD-Jarod"
p = parse_release(
"Angel.1999.INTEGRALE.MULTI.1080p.WEBRip.10bits.x265.DD-Jarod"
)
assert p.year == 1999
assert p.quality == "1080p"
assert p.source == "WEBRip"
def test_group_unknown_when_no_dash(self):
p = parse_release("Oz.S01.1080p.WEBRip.x265")
assert p.group == "UNKNOWN"
def test_normalised_stored_on_parsed(self):
p = parse_release("Oz S01 1080p WEBRip x265-KONTRAST")
assert p.normalised == "Oz.S01.1080p.WEBRip.x265-KONTRAST"
def test_raw_stored_as_is(self):
raw = "Oz S01 1080p WEBRip x265-KONTRAST"
p = parse_release(raw)
assert p.raw == raw
def test_hevc_codec(self):
# "Mare.of.Easttown.S01.1080p.10bit.WEBRip.6CH.x265.HEVC-PSA"
p = parse_release("Mare.of.Easttown.S01.1080p.10bit.WEBRip.6CH.x265.HEVC-PSA")
assert p.codec in ("x265", "HEVC")
assert p.group == "PSA"
def test_xfiles_hyphen_in_title(self):
p = parse_release("The.X-Files.S01.1080p.BluRay.x265-RARBG")
# Title should preserve the hyphen
assert "X-Files" in p.title
def test_foundation_s02_no_year(self):
# Foundation.S02 has no year in release name — year is None
p = parse_release("Foundation.S02.1080p.x265-ELiTE")
assert p.year is None
assert p.season == 2
assert p.group == "ELiTE"
def test_slow_horses_two_groups_same_show(self):
# Same show, different groups across seasons
s01 = parse_release("Slow.Horses.S01.1080p.WEBRip.x265-RARBG")
s04 = parse_release("Slow.Horses.S04.1080p.WEBRip.x265-KONTRAST")
assert s01.title == s04.title == "Slow.Horses"
assert s01.group == "RARBG"
assert s04.group == "KONTRAST"
+346
View File
@@ -0,0 +1,346 @@
"""Tests for ``alfred.domain.subtitles.services.identifier``.
Coverage:
- ``TestTokenize`` — ``_tokenize`` strips parentheses and splits on
``[.\\s_-]``; ``_tokenize_suffix`` peels the episode stem prefix.
- ``TestCountEntries`` — last-cue-number heuristic for SRT files.
- ``TestEmbedded`` — ffprobe is mocked; dispositions map to SDH/FORCED
/ STANDARD; non-existent file → empty list; ffprobe error → empty.
- ``TestAdjacent`` — adjacent strategy: only known extensions, excludes
the video file itself.
- ``TestFlat`` — Subs/ folder adjacent or at release root.
- ``TestEpisodeSubfolder`` — Subs/{stem}/*.srt; tokens after prefix.
- ``TestClassify`` — language + type token detection, confidence math.
- ``TestSizeDisambiguation`` — size_and_count post-processing rules
(2-track → standard+sdh; 3+ → forced + standard + sdh).
"""
from __future__ import annotations
from unittest.mock import patch
import pytest
from alfred.domain.subtitles.entities import SubtitleCandidate
from alfred.domain.subtitles.knowledge.base import SubtitleKnowledgeBase
from alfred.domain.subtitles.services.identifier import (
SubtitleIdentifier,
_count_entries,
_tokenize,
_tokenize_suffix,
)
from alfred.domain.subtitles.value_objects import (
ScanStrategy,
SubtitleLanguage,
SubtitlePattern,
SubtitleType,
TypeDetectionMethod,
)
@pytest.fixture(scope="module")
def kb():
return SubtitleKnowledgeBase()
@pytest.fixture
def identifier(kb):
return SubtitleIdentifier(kb)
def _pattern(
strategy: ScanStrategy,
root_folder: str | None = None,
detection: TypeDetectionMethod = TypeDetectionMethod.TOKEN_IN_NAME,
) -> SubtitlePattern:
return SubtitlePattern(
id=f"test-{strategy.value}",
description="",
scan_strategy=strategy,
root_folder=root_folder,
type_detection=detection,
)
# --------------------------------------------------------------------------- #
# _tokenize / _tokenize_suffix #
# --------------------------------------------------------------------------- #
class TestTokenize:
def test_basic_dotted(self):
assert _tokenize("Show.S01E01.French") == ["show", "s01e01", "french"]
def test_mixed_separators(self):
assert _tokenize("Show_S01-E01 French") == ["show", "s01", "e01", "french"]
def test_strips_parenthesized(self):
assert _tokenize("episode (Brazil).French") == ["episode", "french"]
def test_empty_string(self):
assert _tokenize("") == []
def test_suffix_strips_episode_prefix(self):
out = _tokenize_suffix("Show.S01E01.English", "Show.S01E01")
assert out == ["english"]
def test_suffix_falls_back_when_no_prefix(self):
# filename doesn't start with episode_stem → full tokenize.
out = _tokenize_suffix("Other.srt", "Show.S01E01")
assert "other" in out
def test_suffix_falls_back_when_suffix_is_empty(self):
# Suffix would tokenize to nothing → fall back to full stem.
out = _tokenize_suffix("Show.S01E01", "Show.S01E01")
# full tokenize of "Show.S01E01" → ['show', 's01e01']
assert out == ["show", "s01e01"]
# --------------------------------------------------------------------------- #
# _count_entries #
# --------------------------------------------------------------------------- #
class TestCountEntries:
def test_last_cue_number(self, tmp_path):
srt = tmp_path / "x.srt"
srt.write_text(
"1\n00:00:01,000 --> 00:00:02,000\nHello\n\n"
"2\n00:00:03,000 --> 00:00:04,000\nWorld\n\n"
"42\n00:00:05,000 --> 00:00:06,000\nLast\n",
encoding="utf-8",
)
assert _count_entries(srt) == 42
def test_missing_file_returns_zero(self, tmp_path):
assert _count_entries(tmp_path / "nope.srt") == 0
def test_empty_file_returns_zero(self, tmp_path):
f = tmp_path / "x.srt"
f.write_text("")
assert _count_entries(f) == 0
# --------------------------------------------------------------------------- #
# Embedded scan #
# --------------------------------------------------------------------------- #
class TestEmbedded:
def test_missing_file_returns_empty(self, identifier, tmp_path):
assert identifier._scan_embedded(tmp_path / "missing.mkv") == []
def test_ffprobe_failure_returns_empty(self, identifier, tmp_path):
video = tmp_path / "v.mkv"
video.write_bytes(b"")
with patch(
"alfred.domain.subtitles.services.identifier.subprocess.run",
side_effect=FileNotFoundError("no ffprobe"),
):
assert identifier._scan_embedded(video) == []
def test_disposition_to_subtitle_type(self, identifier, tmp_path):
video = tmp_path / "v.mkv"
video.write_bytes(b"")
fake_output = (
'{"streams":['
'{"tags":{"language":"eng"},"disposition":{"hearing_impaired":1}},'
'{"tags":{"language":"fre"},"disposition":{"forced":1}},'
'{"tags":{"language":"spa"},"disposition":{}},'
'{"tags":{},"disposition":{}}'
"]}"
)
class FakeResult:
stdout = fake_output
with patch(
"alfred.domain.subtitles.services.identifier.subprocess.run",
return_value=FakeResult(),
):
tracks = identifier._scan_embedded(video)
assert len(tracks) == 4
assert tracks[0].subtitle_type == SubtitleType.SDH
assert tracks[0].language.code == "eng"
assert tracks[1].subtitle_type == SubtitleType.FORCED
assert tracks[1].language.code == "fre"
assert tracks[2].subtitle_type == SubtitleType.STANDARD
assert tracks[3].language is None # no language tag
for t in tracks:
assert t.is_embedded is True
# --------------------------------------------------------------------------- #
# Adjacent / Flat / Episode subfolder discovery #
# --------------------------------------------------------------------------- #
class TestAdjacent:
def test_finds_only_known_subtitle_extensions(self, identifier, tmp_path):
video = tmp_path / "Show.S01E01.mkv"
video.write_bytes(b"")
(tmp_path / "Show.S01E01.English.srt").write_text("")
(tmp_path / "Show.S01E01.French.ass").write_text("")
# Non-subtitle files must be ignored.
(tmp_path / "Show.S01E01.nfo").write_text("")
(tmp_path / "cover.jpg").write_bytes(b"")
result = identifier._find_adjacent(video)
names = sorted(p.name for p in result)
assert names == ["Show.S01E01.English.srt", "Show.S01E01.French.ass"]
def test_excludes_the_video_file(self, identifier, tmp_path):
# An adjacent file with the *same stem* as the video would be the
# video itself (e.g. a .mkv named like the .srt). Not expected here,
# but the implementation guards via `p.stem != video.stem`.
video = tmp_path / "Show.S01E01.mkv"
video.write_bytes(b"")
(tmp_path / "Show.S01E01.srt").write_text("") # same stem
# Same stem → excluded; only subs with a different stem are returned.
assert identifier._find_adjacent(video) == []
class TestFlat:
def test_subs_folder_adjacent(self, identifier, tmp_path):
video = tmp_path / "Show.S01E01.mkv"
video.write_bytes(b"")
subs = tmp_path / "Subs"
subs.mkdir()
(subs / "English.srt").write_text("")
result = identifier._find_flat(video, "Subs")
assert len(result) == 1
def test_subs_folder_at_release_root_fallback(self, identifier, tmp_path):
season = tmp_path / "Season.1"
season.mkdir()
video = season / "Show.S01E01.mkv"
video.write_bytes(b"")
subs = tmp_path / "Subs"
subs.mkdir()
(subs / "English.srt").write_text("")
result = identifier._find_flat(video, "Subs")
assert len(result) == 1
def test_no_subs_folder_returns_empty(self, identifier, tmp_path):
video = tmp_path / "v.mkv"
video.write_bytes(b"")
assert identifier._find_flat(video, "Subs") == []
class TestEpisodeSubfolder:
def test_found_and_stem_returned(self, identifier, tmp_path):
video = tmp_path / "Show.S01E01.mkv"
video.write_bytes(b"")
subs = tmp_path / "Subs" / "Show.S01E01"
subs.mkdir(parents=True)
(subs / "2_English.srt").write_text("")
files, stem = identifier._find_episode_subfolder(video, "Subs")
assert len(files) == 1
assert stem == "Show.S01E01"
def test_not_found(self, identifier, tmp_path):
video = tmp_path / "Show.S01E01.mkv"
video.write_bytes(b"")
files, stem = identifier._find_episode_subfolder(video, "Subs")
assert files == []
assert stem == "Show.S01E01"
# --------------------------------------------------------------------------- #
# Classification #
# --------------------------------------------------------------------------- #
class TestClassify:
def test_classifies_language_and_format(self, identifier, tmp_path):
f = tmp_path / "Show.S01E01.English.srt"
f.write_text("1\n00:00:01,000 --> 00:00:02,000\nHi\n")
track = identifier._classify_single(f)
assert track.language.code == "eng"
assert track.format.id == "srt"
assert track.confidence > 0
assert track.is_embedded is False
def test_classifies_type_token(self, identifier, tmp_path):
f = tmp_path / "Show.S01E01.English.sdh.srt"
f.write_text("")
track = identifier._classify_single(f)
assert track.subtitle_type == SubtitleType.SDH
def test_unknown_tokens_lower_confidence(self, identifier, tmp_path):
f = tmp_path / "Show.S01E01.gibberish.srt"
f.write_text("")
track = identifier._classify_single(f)
# No lang/type recognized → confidence is 0 or very low.
assert track.language is None
assert track.confidence < 0.5
def test_episode_stem_prefix_stripped(self, identifier, tmp_path):
f = tmp_path / "Show.S01E01.English.srt"
f.write_text("")
track = identifier._classify_single(f, episode_stem="Show.S01E01")
# Only "english" remains as meaningful token → confidence == 1.0
assert track.language.code == "eng"
assert track.confidence == 1.0
# --------------------------------------------------------------------------- #
# size_and_count post-processing #
# --------------------------------------------------------------------------- #
class TestSizeDisambiguation:
@pytest.fixture
def pattern_size(self):
return _pattern(
ScanStrategy.FLAT,
root_folder="Subs",
detection=TypeDetectionMethod.SIZE_AND_COUNT,
)
def _track(self, lang_code: str, entries: int) -> SubtitleCandidate:
return SubtitleCandidate(
language=SubtitleLanguage(code=lang_code, tokens=[lang_code]),
format=None,
subtitle_type=SubtitleType.UNKNOWN,
entry_count=entries,
)
def test_two_tracks_split_into_standard_and_sdh(self, identifier, pattern_size):
t1 = self._track("eng", 800)
t2 = self._track("eng", 1200)
result = identifier._disambiguate_by_size([t1, t2])
# Sorted ascending → smaller=standard, larger=sdh
types = sorted([t.subtitle_type for t in result], key=lambda s: s.value)
assert SubtitleType.STANDARD in types
assert SubtitleType.SDH in types
def test_three_tracks_split_into_forced_standard_sdh(self, identifier):
t_small = self._track("eng", 50)
t_mid = self._track("eng", 600)
t_large = self._track("eng", 1200)
result = identifier._disambiguate_by_size([t_large, t_small, t_mid])
# Sorted ascending → smallest=forced, middle=standard, largest=sdh
by_count = sorted(result, key=lambda t: t.entry_count)
assert by_count[0].subtitle_type == SubtitleType.FORCED
assert by_count[1].subtitle_type == SubtitleType.STANDARD
assert by_count[2].subtitle_type == SubtitleType.SDH
def test_single_track_untouched(self, identifier):
t = self._track("eng", 800)
result = identifier._disambiguate_by_size([t])
assert result == [t]
assert t.subtitle_type == SubtitleType.UNKNOWN
def test_different_languages_grouped_independently(self, identifier):
# Two eng + one fra → fra is alone, eng pair gets split.
eng_small = self._track("eng", 800)
eng_large = self._track("eng", 1500)
fra_solo = self._track("fra", 1000)
result = identifier._disambiguate_by_size([eng_small, eng_large, fra_solo])
# fra solo stays UNKNOWN
assert fra_solo.subtitle_type == SubtitleType.UNKNOWN
# eng pair gets STANDARD + SDH
assert eng_small.subtitle_type == SubtitleType.STANDARD
assert eng_large.subtitle_type == SubtitleType.SDH
+281
View File
@@ -0,0 +1,281 @@
"""Tests for ``alfred.domain.subtitles.knowledge`` (loader + base).
Covers:
- ``TestMerge`` — the internal ``_merge`` deep-merge function:
scalar override, dict merge, list extension+dedup.
- ``TestLoader`` — builtin loads alone, learned overlays add tokens,
learned-only pattern is picked up, missing files don't crash.
- ``TestKnowledgeBase`` — typed view: formats / languages /
type-token lookup, default rules, ``patterns_for_group``.
Uses ``monkeypatch`` to override the module-level ``_BUILTIN_ROOT`` and
``_LEARNED_ROOT`` constants so we can drive the loader from a temp dir.
"""
from __future__ import annotations
from pathlib import Path
import pytest
from alfred.domain.subtitles.knowledge import loader as loader_mod
from alfred.domain.subtitles.knowledge.base import SubtitleKnowledgeBase
from alfred.domain.subtitles.knowledge.loader import KnowledgeLoader, _merge
from alfred.domain.subtitles.value_objects import (
ScanStrategy,
SubtitleType,
TypeDetectionMethod,
)
# --------------------------------------------------------------------------- #
# _merge — pure dict merger #
# --------------------------------------------------------------------------- #
class TestMerge:
def test_scalar_override(self):
assert _merge({"a": 1}, {"a": 2}) == {"a": 2}
def test_new_key_added(self):
assert _merge({"a": 1}, {"b": 2}) == {"a": 1, "b": 2}
def test_nested_dict_merged(self):
out = _merge({"a": {"x": 1}}, {"a": {"y": 2}})
assert out == {"a": {"x": 1, "y": 2}}
def test_list_extended_and_deduped(self):
out = _merge({"a": [1, 2]}, {"a": [2, 3]})
assert out == {"a": [1, 2, 3]}
def test_list_preserves_order(self):
out = _merge({"a": ["x", "y"]}, {"a": ["z", "x"]})
assert out == {"a": ["x", "y", "z"]}
def test_type_mismatch_override_wins(self):
# If shapes differ, override replaces wholesale.
out = _merge({"a": [1, 2]}, {"a": {"new": True}})
assert out == {"a": {"new": True}}
# --------------------------------------------------------------------------- #
# Loader helpers #
# --------------------------------------------------------------------------- #
def _write(path: Path, content: str) -> None:
path.parent.mkdir(parents=True, exist_ok=True)
path.write_text(content, encoding="utf-8")
@pytest.fixture
def isolated_loader(tmp_path: Path, monkeypatch):
"""Redirect _BUILTIN_ROOT and _LEARNED_ROOT to temp dirs."""
builtin = tmp_path / "builtin"
learned = tmp_path / "learned"
builtin.mkdir()
learned.mkdir()
monkeypatch.setattr(loader_mod, "_BUILTIN_ROOT", builtin)
monkeypatch.setattr(loader_mod, "_LEARNED_ROOT", learned)
return builtin, learned
class TestLoader:
def test_builtin_only(self, isolated_loader):
builtin, _ = isolated_loader
_write(
builtin / "subtitles.yaml",
"languages:\n fra:\n tokens: [fr, fre]\n",
)
ldr = KnowledgeLoader()
assert ldr.subtitles()["languages"]["fra"]["tokens"] == ["fr", "fre"]
def test_learned_adds_tokens_additively(self, isolated_loader):
builtin, learned = isolated_loader
_write(
builtin / "subtitles.yaml",
"languages:\n fra:\n tokens: [fr, fre]\n",
)
_write(
learned / "subtitles_learned.yaml",
"languages:\n fra:\n tokens: [vff, custom]\n",
)
ldr = KnowledgeLoader()
tokens = ldr.subtitles()["languages"]["fra"]["tokens"]
assert tokens == ["fr", "fre", "vff", "custom"]
def test_missing_files_dont_crash(self, isolated_loader):
# No files written → loader still produces empty structures.
ldr = KnowledgeLoader()
assert ldr.subtitles() == {}
assert ldr.patterns() == {}
assert ldr.release_groups() == {}
def test_builtin_pattern_loaded(self, isolated_loader):
builtin, _ = isolated_loader
_write(
builtin / "patterns" / "adjacent.yaml",
"id: adjacent\nscan_strategy: adjacent\ndescription: test\n",
)
ldr = KnowledgeLoader()
assert "adjacent" in ldr.patterns()
assert ldr.pattern("adjacent")["scan_strategy"] == "adjacent"
def test_learned_pattern_overlays_builtin(self, isolated_loader):
builtin, learned = isolated_loader
_write(
builtin / "patterns" / "p.yaml",
"id: p\nscan_strategy: flat\ndescription: old\n",
)
_write(
learned / "patterns" / "p.yaml",
"id: p\ndescription: new\n",
)
ldr = KnowledgeLoader()
# learned replaces scalar 'description', keeps scan_strategy from builtin
assert ldr.pattern("p")["description"] == "new"
assert ldr.pattern("p")["scan_strategy"] == "flat"
def test_learned_only_pattern_added(self, isolated_loader):
_, learned = isolated_loader
_write(
learned / "patterns" / "neo.yaml",
"id: neo\nscan_strategy: embedded\n",
)
ldr = KnowledgeLoader()
assert "neo" in ldr.patterns()
def test_release_group_case_insensitive_lookup(self, isolated_loader):
builtin, _ = isolated_loader
_write(
builtin / "release_groups" / "kontrast.yaml",
"name: KONTRAST\nknown_patterns: [adjacent]\n",
)
ldr = KnowledgeLoader()
# Stored under "KONTRAST" but case-insensitive match must work.
assert ldr.release_group("kontrast") is not None
assert ldr.release_group("Kontrast")["name"] == "KONTRAST"
assert ldr.release_group("unknown_group") is None
def test_pattern_id_falls_back_to_filename(self, isolated_loader):
# File without 'id' field — uses the stem.
builtin, _ = isolated_loader
_write(
builtin / "patterns" / "no_id.yaml",
"scan_strategy: adjacent\n",
)
ldr = KnowledgeLoader()
assert "no_id" in ldr.patterns()
# --------------------------------------------------------------------------- #
# SubtitleKnowledgeBase #
# --------------------------------------------------------------------------- #
class TestKnowledgeBase:
@pytest.fixture
def kb(self, isolated_loader):
builtin, _ = isolated_loader
_write(
builtin / "subtitles.yaml",
"""
formats:
srt:
extensions: [".srt"]
description: "SubRip"
ass:
extensions: [".ass", ".ssa"]
language_tokens:
fre: ["vostfr"]
types:
sdh:
tokens: ["sdh", "cc"]
forced:
tokens: ["forced"]
defaults:
languages: ["fre"]
formats: ["srt"]
types: ["standard"]
format_priority: ["srt"]
min_confidence: 0.8
""",
)
_write(
builtin / "patterns" / "adj.yaml",
"id: adj\nscan_strategy: adjacent\ndescription: d\n",
)
_write(
builtin / "patterns" / "bad.yaml",
# invalid scan_strategy → skipped at build time
"id: bad\nscan_strategy: not_a_real_strategy\n",
)
_write(
builtin / "release_groups" / "group_a.yaml",
"name: GroupA\nknown_patterns: [adj]\n",
)
return SubtitleKnowledgeBase()
def test_formats_loaded(self, kb):
formats = kb.formats()
assert "srt" in formats and "ass" in formats
assert kb.format_for_extension(".srt").id == "srt"
assert kb.format_for_extension(".ssa").id == "ass"
assert kb.format_for_extension(".unknown") is None
def test_known_extensions_aggregates(self, kb):
exts = kb.known_extensions()
assert ".srt" in exts and ".ass" in exts and ".ssa" in exts
def test_language_for_token(self, kb):
# Canonical ISO 639-2/B codes are sourced from LanguageRegistry.
assert kb.language_for_token("french").code == "fre"
assert kb.language_for_token("FR").code == "fre"
assert kb.language_for_token("xxx") is None
assert kb.is_known_lang_token("eng") is True
assert kb.is_known_lang_token("ghost") is False
def test_subtitle_specific_token_recognized(self, kb):
# ``vostfr`` is subtitle-specific and lives in subtitles.yaml's
# ``language_tokens`` block — still resolves to canonical "fre".
assert kb.language_for_token("vostfr").code == "fre"
def test_type_for_token(self, kb):
assert kb.type_for_token("sdh") == SubtitleType.SDH
assert kb.type_for_token("FORCED") == SubtitleType.FORCED
assert kb.type_for_token("nope") is None
# 'hi' must NOT be a SDH token any more (it collides with Hindi).
assert kb.is_known_type_token("hi") is False
assert kb.is_known_type_token("cc") is True
def test_default_rules(self, kb):
r = kb.default_rules()
assert r.preferred_languages == ["fre"]
assert r.preferred_formats == ["srt"]
assert r.min_confidence == 0.8
def test_patterns_valid_kept_invalid_skipped(self, kb):
patterns = kb.patterns()
assert "adj" in patterns
# 'bad' had an invalid scan_strategy → quietly dropped.
assert "bad" not in patterns
def test_pattern_typed_view(self, kb):
p = kb.pattern("adj")
assert p.scan_strategy == ScanStrategy.ADJACENT
assert p.type_detection == TypeDetectionMethod.TOKEN_IN_NAME
def test_patterns_for_group(self, kb):
ps = kb.patterns_for_group("GroupA")
assert len(ps) == 1 and ps[0].id == "adj"
assert kb.patterns_for_group("unknown") == []
def test_reload_picks_up_changes(self, kb, isolated_loader):
# Add a new pattern, reload, check it's visible.
builtin, _ = isolated_loader
_write(
builtin / "patterns" / "new.yaml",
"id: new\nscan_strategy: flat\n",
)
kb.reload()
assert "new" in kb.patterns()
+208
View File
@@ -0,0 +1,208 @@
"""Tests for ``alfred.domain.subtitles.services.matcher.SubtitleMatcher``.
The matcher filters classified subtitle tracks against effective rules,
returning ``(matched, unresolved)``. Coverage:
- ``TestUnresolved`` — None language or low confidence → unresolved.
- ``TestLanguageFilter`` / ``TestFormatFilter`` / ``TestTypeFilter`` —
rule-based exclusion.
- ``TestEmbeddedTracks`` — embedded tracks are skipped entirely.
- ``TestFormatPriority`` — conflict between two same-(lang, type) tracks
is resolved by ``format_priority``.
- ``TestNoConflict`` — different (lang, type) keys never collide.
Uses lightweight, hand-built value objects — no KB dependency.
"""
from __future__ import annotations
import pytest
from alfred.domain.subtitles.entities import SubtitleCandidate
from alfred.domain.subtitles.services.matcher import SubtitleMatcher
from alfred.domain.subtitles.value_objects import (
SubtitleFormat,
SubtitleLanguage,
SubtitleMatchingRules,
SubtitleType,
)
SRT = SubtitleFormat(id="srt", extensions=[".srt"])
ASS = SubtitleFormat(id="ass", extensions=[".ass"])
FRA = SubtitleLanguage(code="fra", tokens=["fr"])
ENG = SubtitleLanguage(code="eng", tokens=["en"])
SPA = SubtitleLanguage(code="spa", tokens=["es"])
def _track(
lang: SubtitleLanguage | None = FRA,
fmt: SubtitleFormat | None = SRT,
stype: SubtitleType = SubtitleType.STANDARD,
confidence: float = 1.0,
is_embedded: bool = False,
) -> SubtitleCandidate:
return SubtitleCandidate(
language=lang,
format=fmt,
subtitle_type=stype,
is_embedded=is_embedded,
confidence=confidence,
)
@pytest.fixture
def matcher():
return SubtitleMatcher()
# --------------------------------------------------------------------------- #
# Unresolved #
# --------------------------------------------------------------------------- #
class TestUnresolved:
def test_none_language_unresolved(self, matcher):
t = _track(lang=None)
rules = SubtitleMatchingRules(min_confidence=0.7)
matched, unresolved = matcher.match([t], rules)
assert matched == []
assert unresolved == [t]
def test_low_confidence_unresolved(self, matcher):
t = _track(confidence=0.3)
rules = SubtitleMatchingRules(min_confidence=0.7)
matched, unresolved = matcher.match([t], rules)
assert matched == []
assert unresolved == [t]
def test_threshold_exact_passes(self, matcher):
t = _track(confidence=0.7)
rules = SubtitleMatchingRules(min_confidence=0.7, preferred_languages=["fra"])
matched, unresolved = matcher.match([t], rules)
assert matched == [t]
# --------------------------------------------------------------------------- #
# Filters #
# --------------------------------------------------------------------------- #
class TestLanguageFilter:
def test_preferred_languages_filters_out(self, matcher):
t_eng = _track(lang=ENG)
rules = SubtitleMatchingRules(preferred_languages=["fra"], min_confidence=0.0)
matched, _ = matcher.match([t_eng], rules)
assert matched == []
def test_preferred_language_match_passes(self, matcher):
t_fra = _track(lang=FRA)
rules = SubtitleMatchingRules(preferred_languages=["fra"], min_confidence=0.0)
matched, _ = matcher.match([t_fra], rules)
assert matched == [t_fra]
def test_empty_preferred_allows_all(self, matcher):
t_fra = _track(lang=FRA)
t_eng = _track(lang=ENG)
rules = SubtitleMatchingRules(min_confidence=0.0)
matched, _ = matcher.match([t_fra, t_eng], rules)
# No language filter → both pass (different keys → no conflict).
assert len(matched) == 2
class TestFormatFilter:
def test_format_outside_preferred_filtered(self, matcher):
t = _track(fmt=ASS)
rules = SubtitleMatchingRules(preferred_formats=["srt"], min_confidence=0.0)
matched, _ = matcher.match([t], rules)
assert matched == []
def test_no_format_attribute_filtered_when_pref_set(self, matcher):
t = _track(fmt=None)
rules = SubtitleMatchingRules(preferred_formats=["srt"], min_confidence=0.0)
matched, _ = matcher.match([t], rules)
assert matched == []
class TestTypeFilter:
def test_disallowed_type_excluded(self, matcher):
t = _track(stype=SubtitleType.SDH)
rules = SubtitleMatchingRules(
allowed_types=["standard", "forced"], min_confidence=0.0
)
matched, _ = matcher.match([t], rules)
assert matched == []
def test_allowed_type_passes(self, matcher):
t = _track(stype=SubtitleType.STANDARD)
rules = SubtitleMatchingRules(allowed_types=["standard"], min_confidence=0.0)
matched, _ = matcher.match([t], rules)
assert matched == [t]
# --------------------------------------------------------------------------- #
# Embedded handling #
# --------------------------------------------------------------------------- #
class TestEmbeddedTracks:
def test_embedded_track_skipped_entirely(self, matcher):
e = _track(is_embedded=True)
rules = SubtitleMatchingRules(min_confidence=0.0)
matched, unresolved = matcher.match([e], rules)
# Embedded tracks are not the matcher's concern.
assert matched == []
assert unresolved == []
# --------------------------------------------------------------------------- #
# Conflict resolution #
# --------------------------------------------------------------------------- #
class TestFormatPriority:
def test_higher_priority_format_wins(self, matcher):
# Same (lang, type) but different formats → priority decides.
t_srt = _track(fmt=SRT)
t_ass = _track(fmt=ASS)
rules = SubtitleMatchingRules(
min_confidence=0.0,
format_priority=["srt", "ass"],
)
matched, _ = matcher.match([t_ass, t_srt], rules)
assert len(matched) == 1
assert matched[0].format.id == "srt"
def test_first_seen_kept_when_no_priority(self, matcher):
t_srt = _track(fmt=SRT)
t_ass = _track(fmt=ASS)
rules = SubtitleMatchingRules(min_confidence=0.0)
matched, _ = matcher.match([t_ass, t_srt], rules)
# No priority → ass came first → kept.
assert len(matched) == 1
assert matched[0].format.id == "ass"
def test_priority_order_reversed(self, matcher):
t_srt = _track(fmt=SRT)
t_ass = _track(fmt=ASS)
rules = SubtitleMatchingRules(
min_confidence=0.0,
format_priority=["ass", "srt"],
)
matched, _ = matcher.match([t_srt, t_ass], rules)
assert matched[0].format.id == "ass"
class TestNoConflict:
def test_different_languages_both_kept(self, matcher):
t_fra = _track(lang=FRA)
t_eng = _track(lang=ENG)
rules = SubtitleMatchingRules(min_confidence=0.0)
matched, _ = matcher.match([t_fra, t_eng], rules)
assert len(matched) == 2
def test_different_types_both_kept(self, matcher):
t_std = _track(stype=SubtitleType.STANDARD)
t_sdh = _track(stype=SubtitleType.SDH)
rules = SubtitleMatchingRules(min_confidence=0.0)
matched, _ = matcher.match([t_std, t_sdh], rules)
assert len(matched) == 2
@@ -0,0 +1,190 @@
"""Tests for ``alfred.domain.subtitles.services.pattern_detector.PatternDetector``.
The detector inspects a release folder and returns the best-matching known
pattern + a confidence score.
Coverage:
- ``TestEmbeddedDetection`` — ffprobe is mocked; ``embedded`` pattern wins
when no external subs and ffprobe reports tracks.
- ``TestAdjacentDetection`` — .srt next to the video → ``adjacent``.
- ``TestFlatSubsFolder`` — ``Subs/*.srt`` → ``subs_flat``.
- ``TestEpisodeSubfolder`` — ``Subs/{ep}/*.srt`` → ``episode_subfolder``.
- ``TestNothingFound`` — empty release returns no pattern.
- ``TestDescribe`` — human-readable description mentions the right cues.
Uses the real ``SubtitleKnowledgeBase`` (loaded from the live builtin
``patterns/`` folder) since rebuilding all four patterns by hand would
just duplicate fixture state.
"""
from __future__ import annotations
from pathlib import Path
from unittest.mock import patch
import pytest
from alfred.domain.subtitles.knowledge.base import SubtitleKnowledgeBase
from alfred.domain.subtitles.services.pattern_detector import PatternDetector
@pytest.fixture(scope="module")
def kb():
return SubtitleKnowledgeBase()
@pytest.fixture
def detector(kb):
return PatternDetector(kb)
def _make_video(folder: Path, name: str = "Show.S01E01.mkv") -> Path:
v = folder / name
v.write_bytes(b"")
return v
# --------------------------------------------------------------------------- #
# Embedded #
# --------------------------------------------------------------------------- #
class TestEmbeddedDetection:
def test_embedded_only(self, detector, tmp_path):
# Folder has video but no external .srt files anywhere.
video = _make_video(tmp_path)
with patch.object(
PatternDetector, "_has_embedded_subtitles", return_value=True
):
result = detector.detect(tmp_path, video)
assert result["detected"] is not None
assert result["detected"].id == "embedded"
assert result["confidence"] > 0
assert "embedded" in result["description"].lower()
# --------------------------------------------------------------------------- #
# Adjacent #
# --------------------------------------------------------------------------- #
class TestAdjacentDetection:
def test_srt_next_to_video(self, detector, tmp_path):
video = _make_video(tmp_path)
(tmp_path / "Show.S01E01.English.srt").write_text("")
(tmp_path / "Show.S01E01.French.srt").write_text("")
with patch.object(
PatternDetector, "_has_embedded_subtitles", return_value=False
):
result = detector.detect(tmp_path, video)
assert result["detected"] is not None
assert result["detected"].id == "adjacent"
assert "adjacent" in result["description"]
# --------------------------------------------------------------------------- #
# Subs flat folder #
# --------------------------------------------------------------------------- #
class TestFlatSubsFolder:
def test_flat_subs_folder_adjacent_to_video(self, detector, tmp_path):
video = _make_video(tmp_path)
subs = tmp_path / "Subs"
subs.mkdir()
(subs / "Show.S01E01.English.srt").write_text("")
(subs / "Show.S01E01.French.srt").write_text("")
with patch.object(
PatternDetector, "_has_embedded_subtitles", return_value=False
):
result = detector.detect(tmp_path, video)
assert result["detected"] is not None
assert result["detected"].id == "subs_flat"
assert "flat" in result["description"]
def test_flat_subs_folder_at_release_root(self, detector, tmp_path):
# Sample video lives one level deep; Subs/ is at the release root.
season_dir = tmp_path / "Season.01"
season_dir.mkdir()
video = _make_video(season_dir)
subs = tmp_path / "Subs"
subs.mkdir()
(subs / "ep01.English.srt").write_text("")
with patch.object(
PatternDetector, "_has_embedded_subtitles", return_value=False
):
result = detector.detect(tmp_path, video)
assert result["detected"] is not None
assert result["detected"].id == "subs_flat"
# --------------------------------------------------------------------------- #
# Episode subfolder #
# --------------------------------------------------------------------------- #
class TestEpisodeSubfolder:
def test_per_episode_subfolder(self, detector, tmp_path):
video = _make_video(tmp_path, name="Show.S01E01.mkv")
subs = tmp_path / "Subs" / "Show.S01E01"
subs.mkdir(parents=True)
(subs / "2_English.srt").write_text("")
(subs / "3_French.srt").write_text("")
with patch.object(
PatternDetector, "_has_embedded_subtitles", return_value=False
):
result = detector.detect(tmp_path, video)
assert result["detected"] is not None
assert result["detected"].id == "episode_subfolder"
desc = result["description"]
assert "episode_subfolder" in desc
# Numeric-prefix cue should be reported.
assert "numeric prefix" in desc
# --------------------------------------------------------------------------- #
# Nothing #
# --------------------------------------------------------------------------- #
class TestNothingFound:
def test_empty_release_no_pattern(self, detector, tmp_path):
video = _make_video(tmp_path)
with patch.object(
PatternDetector, "_has_embedded_subtitles", return_value=False
):
result = detector.detect(tmp_path, video)
# No external subs and no embedded → adjacent strategy still scores
# 0.5 (no Subs folder bonus). Best pattern may exist or not depending
# on threshold (0.4). Either way the description must reflect emptiness.
assert "no external subtitle files" in result["description"]
# --------------------------------------------------------------------------- #
# Describe #
# --------------------------------------------------------------------------- #
class TestDescribe:
def test_describe_includes_language_token_cue(self, detector, tmp_path):
video = _make_video(tmp_path)
subs = tmp_path / "Subs"
subs.mkdir()
(subs / "ep01.English.srt").write_text("")
with patch.object(
PatternDetector, "_has_embedded_subtitles", return_value=False
):
result = detector.detect(tmp_path, video)
assert "language tokens" in result["description"]
def test_describe_combines_external_and_embedded(self, detector, tmp_path):
video = _make_video(tmp_path)
(tmp_path / "Show.S01E01.English.srt").write_text("")
with patch.object(
PatternDetector, "_has_embedded_subtitles", return_value=True
):
result = detector.detect(tmp_path, video)
desc = result["description"]
assert "adjacent" in desc
assert "embedded" in desc.lower()
+221
View File
@@ -0,0 +1,221 @@
"""Tests for ``alfred.domain.subtitles.services.placer.SubtitlePlacer``.
The placer hard-links subtitle files next to a destination video, naming
them ``{video_stem}.{lang}[.sdh|.forced].{ext}``.
Coverage:
- ``TestBuildDestName`` — name construction for standard / SDH / forced;
errors on missing language or format.
- ``TestPlace`` — happy path: link is created, ``PlacedTrack`` populated.
- ``TestSkipReasons`` — embedded, missing source, missing language/format,
destination already exists.
- ``TestOSError`` — ``os.link`` failures are captured as ``skipped``.
- ``TestPlaceResultCounts`` — ``placed_count`` / ``skipped_count`` properties.
"""
from __future__ import annotations
from pathlib import Path
from unittest.mock import patch
import pytest
from alfred.domain.subtitles.entities import SubtitleCandidate
from alfred.domain.subtitles.services.placer import (
PlacedTrack,
PlaceResult,
SubtitlePlacer,
_build_dest_name,
)
from alfred.domain.subtitles.value_objects import (
SubtitleFormat,
SubtitleLanguage,
SubtitleType,
)
SRT = SubtitleFormat(id="srt", extensions=[".srt"])
ASS = SubtitleFormat(id="ass", extensions=[".ass", ".ssa"])
FRA = SubtitleLanguage(code="fra", tokens=["fr"])
def _track(
file_path: Path | None,
*,
lang=FRA,
fmt=SRT,
stype=SubtitleType.STANDARD,
is_embedded: bool = False,
) -> SubtitleCandidate:
return SubtitleCandidate(
language=lang,
format=fmt,
subtitle_type=stype,
file_path=file_path,
is_embedded=is_embedded,
)
# --------------------------------------------------------------------------- #
# _build_dest_name #
# --------------------------------------------------------------------------- #
class TestBuildDestName:
def test_standard(self):
t = _track(None, stype=SubtitleType.STANDARD)
assert _build_dest_name(t, "Movie.2010") == "Movie.2010.fra.srt"
def test_sdh(self):
t = _track(None, stype=SubtitleType.SDH)
assert _build_dest_name(t, "Movie.2010") == "Movie.2010.fra.sdh.srt"
def test_forced(self):
t = _track(None, stype=SubtitleType.FORCED)
assert _build_dest_name(t, "Movie.2010") == "Movie.2010.fra.forced.srt"
def test_uses_first_extension_of_multi_ext_format(self):
t = _track(None, fmt=ASS)
# ASS has [.ass, .ssa] — first wins.
assert _build_dest_name(t, "x").endswith(".ass")
def test_missing_lang_raises(self):
t = _track(None, lang=None)
with pytest.raises(ValueError, match="language or format"):
_build_dest_name(t, "x")
def test_missing_format_raises(self):
t = _track(None, fmt=None)
with pytest.raises(ValueError, match="language or format"):
_build_dest_name(t, "x")
# --------------------------------------------------------------------------- #
# Place — happy path #
# --------------------------------------------------------------------------- #
@pytest.fixture
def placer():
return SubtitlePlacer()
class TestPlace:
def test_creates_hard_link_with_correct_name(self, placer, tmp_path):
src = tmp_path / "input.srt"
src.write_text("subs")
video = tmp_path / "lib" / "Movie.2010.mkv"
video.parent.mkdir()
video.write_bytes(b"")
track = _track(src)
result = placer.place([track], video)
assert result.placed_count == 1
assert result.skipped_count == 0
placed = result.placed[0]
assert placed.filename == "Movie.2010.fra.srt"
assert placed.destination.exists()
# Hard link → same inode as source.
assert placed.destination.stat().st_ino == src.stat().st_ino
def test_multiple_tracks_distinct_destinations(self, placer, tmp_path):
s1 = tmp_path / "a.srt"
s1.write_text("")
s2 = tmp_path / "b.srt"
s2.write_text("")
video = tmp_path / "lib" / "Movie.mkv"
video.parent.mkdir()
video.write_bytes(b"")
ENG = SubtitleLanguage(code="eng", tokens=["en"])
t1 = _track(s1, lang=FRA)
t2 = _track(s2, lang=ENG, stype=SubtitleType.SDH)
result = placer.place([t1, t2], video)
assert result.placed_count == 2
names = {p.filename for p in result.placed}
assert names == {"Movie.fra.srt", "Movie.eng.sdh.srt"}
# --------------------------------------------------------------------------- #
# Skip reasons #
# --------------------------------------------------------------------------- #
class TestSkipReasons:
def test_embedded_skipped(self, placer, tmp_path):
video = tmp_path / "Movie.mkv"
video.write_bytes(b"")
track = _track(None, is_embedded=True)
result = placer.place([track], video)
assert result.placed == []
assert len(result.skipped) == 1
assert "embedded" in result.skipped[0][1]
def test_missing_source_file(self, placer, tmp_path):
video = tmp_path / "Movie.mkv"
video.write_bytes(b"")
track = _track(tmp_path / "ghost.srt")
result = placer.place([track], video)
assert result.placed == []
assert "not found" in result.skipped[0][1]
def test_missing_lang_or_format_skipped(self, placer, tmp_path):
video = tmp_path / "Movie.mkv"
video.write_bytes(b"")
src = tmp_path / "x.srt"
src.write_text("")
track = _track(src, lang=None)
result = placer.place([track], video)
assert result.placed == []
assert "language or format" in result.skipped[0][1]
def test_destination_already_exists(self, placer, tmp_path):
src = tmp_path / "x.srt"
src.write_text("a")
video = tmp_path / "lib" / "Movie.mkv"
video.parent.mkdir()
video.write_bytes(b"")
# Pre-create destination
(video.parent / "Movie.fra.srt").write_text("preexisting")
track = _track(src)
result = placer.place([track], video)
assert result.placed == []
assert "already exists" in result.skipped[0][1]
# --------------------------------------------------------------------------- #
# OSError handling #
# --------------------------------------------------------------------------- #
class TestOSError:
def test_link_failure_captured_as_skipped(self, placer, tmp_path):
src = tmp_path / "x.srt"
src.write_text("")
video = tmp_path / "lib" / "Movie.mkv"
video.parent.mkdir()
video.write_bytes(b"")
track = _track(src)
with patch(
"alfred.domain.subtitles.services.placer.os.link",
side_effect=OSError("cross-device link"),
):
result = placer.place([track], video)
assert result.placed == []
assert "cross-device" in result.skipped[0][1]
# --------------------------------------------------------------------------- #
# PlaceResult counters #
# --------------------------------------------------------------------------- #
class TestPlaceResultCounts:
def test_counts(self):
# Synthesize a PlaceResult directly for property check.
pt = PlacedTrack(source=Path("/a"), destination=Path("/b"), filename="b")
st = _track(None, is_embedded=True)
r = PlaceResult(placed=[pt], skipped=[(st, "x")])
assert r.placed_count == 1
assert r.skipped_count == 1
+54 -37
View File
@@ -14,11 +14,12 @@ from alfred.domain.subtitles.scanner import (
class TestClassify:
def test_iso_lang_code(self, tmp_path):
def test_iso_lang_code_639_1_alias(self, tmp_path):
# ``fr`` is an alias of the canonical ISO 639-2/B code ``fre``.
p = tmp_path / "fr.srt"
p.write_text("")
lang, is_sdh, is_forced = _classify(p)
assert lang == "fr"
assert lang == "fre"
assert not is_sdh
assert not is_forced
@@ -26,35 +27,39 @@ class TestClassify:
p = tmp_path / "english.srt"
p.write_text("")
lang, _, _ = _classify(p)
assert lang == "en"
assert lang == "eng"
def test_french_keyword(self, tmp_path):
p = tmp_path / "Show.S01E01.French.srt"
p.write_text("")
lang, _, _ = _classify(p)
assert lang == "fr"
assert lang == "fre"
def test_vostfr_is_french(self, tmp_path):
p = tmp_path / "Show.S01E01.VOSTFR.srt"
p.write_text("")
lang, _, _ = _classify(p)
assert lang == "fr"
assert lang == "fre"
def test_sdh_token(self, tmp_path):
p = tmp_path / "fr.sdh.srt"
p = tmp_path / "fre.sdh.srt"
p.write_text("")
lang, is_sdh, _ = _classify(p)
assert lang == "fr"
assert lang == "fre"
assert is_sdh
def test_hi_alias_for_sdh(self, tmp_path):
def test_hi_no_longer_marks_sdh(self, tmp_path):
# ``hi`` is the ISO 639-1 alias for Hindi; it must not mark a file as
# SDH any more (regression of the previous collision between SDH and
# Hindi tokens). Use ``sdh`` / ``cc`` / ``hearing`` to flag SDH instead.
p = tmp_path / "en.hi.srt"
p.write_text("")
_, is_sdh, _ = _classify(p)
assert is_sdh
lang, is_sdh, _ = _classify(p)
assert lang == "eng"
assert not is_sdh
def test_forced_token(self, tmp_path):
p = tmp_path / "fr.forced.srt"
p = tmp_path / "fre.forced.srt"
p.write_text("")
_, _, is_forced = _classify(p)
assert is_forced
@@ -66,17 +71,17 @@ class TestClassify:
assert lang is None
def test_dot_separator(self, tmp_path):
p = tmp_path / "fr.sdh.srt"
p = tmp_path / "fre.sdh.srt"
p.write_text("")
lang, is_sdh, _ = _classify(p)
assert lang == "fr"
assert lang == "fre"
assert is_sdh
def test_hyphen_separator(self, tmp_path):
p = tmp_path / "fr-forced.srt"
p = tmp_path / "fre-forced.srt"
p.write_text("")
lang, _, is_forced = _classify(p)
assert lang == "fr"
assert lang == "fre"
assert is_forced
@@ -86,9 +91,9 @@ class TestClassify:
class TestSubtitleCandidateDestinationName:
def _make(self, lang="fr", is_sdh=False, is_forced=False, ext=".srt", path=None):
def _make(self, lang="fre", is_sdh=False, is_forced=False, ext=".srt", path=None):
return SubtitleCandidate(
source_path=path or Path("/fake/fr.srt"),
source_path=path or Path("/fake/fre.srt"),
language=lang,
is_sdh=is_sdh,
is_forced=is_forced,
@@ -96,19 +101,19 @@ class TestSubtitleCandidateDestinationName:
)
def test_standard(self):
assert self._make().destination_name == "fr.srt"
assert self._make().destination_name == "fre.srt"
def test_sdh(self):
assert self._make(is_sdh=True).destination_name == "fr.sdh.srt"
assert self._make(is_sdh=True).destination_name == "fre.sdh.srt"
def test_forced(self):
assert self._make(is_forced=True).destination_name == "fr.forced.srt"
assert self._make(is_forced=True).destination_name == "fre.forced.srt"
def test_ass_extension(self):
assert self._make(ext=".ass").destination_name == "fr.ass"
assert self._make(ext=".ass").destination_name == "fre.ass"
def test_english_standard(self):
assert self._make(lang="en").destination_name == "en.srt"
assert self._make(lang="eng").destination_name == "eng.srt"
# ---------------------------------------------------------------------------
@@ -119,7 +124,7 @@ class TestSubtitleCandidateDestinationName:
class TestSubtitleScanner:
def _scanner(self, languages=None, min_size_kb=0, keep_sdh=True, keep_forced=True):
return SubtitleScanner(
languages=languages or ["fr", "en"],
languages=languages or ["fre", "eng"],
min_size_kb=min_size_kb,
keep_sdh=keep_sdh,
keep_forced=keep_forced,
@@ -131,31 +136,43 @@ class TestSubtitleScanner:
return video
def test_finds_adjacent_subtitle(self, tmp_path):
video = self._video(tmp_path)
(tmp_path / "fre.srt").write_text("subtitle content")
candidates = self._scanner().scan(video)
assert len(candidates) == 1
assert candidates[0].language == "fre"
def test_finds_adjacent_subtitle_legacy_639_1(self, tmp_path):
# Reading existing media libraries: ``fr.srt`` is still recognized as
# French and classified canonically as ``fre`` — covers user libraries
# written before the ISO 639-2/B migration.
video = self._video(tmp_path)
(tmp_path / "fr.srt").write_text("subtitle content")
candidates = self._scanner().scan(video)
assert len(candidates) == 1
assert candidates[0].language == "fr"
assert candidates[0].language == "fre"
def test_finds_multiple_languages(self, tmp_path):
video = self._video(tmp_path)
(tmp_path / "fr.srt").write_text("fr subtitle")
(tmp_path / "en.srt").write_text("en subtitle")
(tmp_path / "fre.srt").write_text("fr subtitle")
(tmp_path / "eng.srt").write_text("en subtitle")
candidates = self._scanner().scan(video)
langs = {c.language for c in candidates}
assert langs == {"fr", "en"}
assert langs == {"fre", "eng"}
def test_scans_subs_subfolder(self, tmp_path):
video = self._video(tmp_path)
subs = tmp_path / "Subs"
subs.mkdir()
(subs / "fr.srt").write_text("subtitle")
(subs / "fre.srt").write_text("subtitle")
candidates = self._scanner().scan(video)
assert any(c.language == "fr" for c in candidates)
assert any(c.language == "fre" for c in candidates)
def test_filters_unknown_language(self, tmp_path):
video = self._video(tmp_path)
@@ -166,14 +183,14 @@ class TestSubtitleScanner:
def test_filters_wrong_language(self, tmp_path):
video = self._video(tmp_path)
(tmp_path / "de.srt").write_text("german subtitle")
(tmp_path / "ger.srt").write_text("german subtitle")
candidates = self._scanner(languages=["fr"]).scan(video)
candidates = self._scanner(languages=["fre"]).scan(video)
assert len(candidates) == 0
def test_filters_too_small_file(self, tmp_path):
video = self._video(tmp_path)
small = tmp_path / "fr.srt"
small = tmp_path / "fre.srt"
small.write_bytes(b"x") # 1 byte, well below any min_size_kb
candidates = self._scanner(min_size_kb=10).scan(video)
@@ -181,21 +198,21 @@ class TestSubtitleScanner:
def test_filters_sdh_when_not_wanted(self, tmp_path):
video = self._video(tmp_path)
(tmp_path / "fr.sdh.srt").write_text("sdh subtitle")
(tmp_path / "fre.sdh.srt").write_text("sdh subtitle")
candidates = self._scanner(keep_sdh=False).scan(video)
assert len(candidates) == 0
def test_filters_forced_when_not_wanted(self, tmp_path):
video = self._video(tmp_path)
(tmp_path / "fr.forced.srt").write_text("forced subtitle")
(tmp_path / "fre.forced.srt").write_text("forced subtitle")
candidates = self._scanner(keep_forced=False).scan(video)
assert len(candidates) == 0
def test_keeps_sdh_when_wanted(self, tmp_path):
video = self._video(tmp_path)
(tmp_path / "fr.sdh.srt").write_text("sdh subtitle")
(tmp_path / "fre.sdh.srt").write_text("sdh subtitle")
candidates = self._scanner(keep_sdh=True).scan(video)
assert len(candidates) == 1
@@ -203,8 +220,8 @@ class TestSubtitleScanner:
def test_ignores_non_subtitle_files(self, tmp_path):
video = self._video(tmp_path)
(tmp_path / "fr.nfo").write_text("nfo file")
(tmp_path / "fr.jpg").write_bytes(b"image")
(tmp_path / "fre.nfo").write_text("nfo file")
(tmp_path / "fre.jpg").write_bytes(b"image")
candidates = self._scanner().scan(video)
assert len(candidates) == 0
+289
View File
@@ -0,0 +1,289 @@
"""Tests for subtitle value objects, entities, and the ``utils`` service.
Targets the quick-win surface of the subtitle domain that was largely
uncovered:
- ``TestSubtitleFormat`` — extension matching (case-insensitive).
- ``TestSubtitleLanguage`` — token matching (case-insensitive).
- ``TestSubtitleCandidateDestName`` — ``destination_name`` property:
standard / SDH / forced naming, error on missing language or format.
- ``TestSubtitleCandidateRepr`` — debug repr for embedded vs external.
- ``TestMediaSubtitleMetadata`` — ``all_tracks`` / ``total_count`` /
``unresolved_tracks``.
- ``TestAvailableSubtitles`` — utility dedup by (lang, type).
- ``TestSubtitleRuleSet`` — scope inheritance + ``override`` mutation +
``to_dict`` shape.
All pure-Python — no I/O.
"""
from __future__ import annotations
from pathlib import Path
import pytest
from alfred.domain.subtitles.aggregates import SubtitleRuleSet
from alfred.domain.subtitles.entities import MediaSubtitleMetadata, SubtitleCandidate
from alfred.domain.subtitles.services.utils import available_subtitles
from alfred.domain.subtitles.value_objects import (
RuleScope,
SubtitleFormat,
SubtitleLanguage,
SubtitleType,
)
# --------------------------------------------------------------------------- #
# Value objects #
# --------------------------------------------------------------------------- #
class TestSubtitleFormat:
def test_matches_extension_case_insensitive(self):
fmt = SubtitleFormat(id="srt", extensions=[".srt"])
assert fmt.matches_extension(".srt")
assert fmt.matches_extension(".SRT")
assert not fmt.matches_extension(".ass")
def test_multiple_extensions(self):
fmt = SubtitleFormat(id="ass", extensions=[".ass", ".ssa"])
assert fmt.matches_extension(".ass")
assert fmt.matches_extension(".ssa")
assert fmt.matches_extension(".SSA")
assert not fmt.matches_extension(".srt")
class TestSubtitleLanguage:
def test_matches_token_case_insensitive(self):
lang = SubtitleLanguage(code="fra", tokens=["fr", "fre", "french"])
assert lang.matches_token("fr")
assert lang.matches_token("FRENCH")
assert lang.matches_token("French")
assert not lang.matches_token("eng")
# --------------------------------------------------------------------------- #
# SubtitleCandidate #
# --------------------------------------------------------------------------- #
SRT = SubtitleFormat(id="srt", extensions=[".srt"])
FRA = SubtitleLanguage(code="fra", tokens=["fr", "fre"])
class TestSubtitleCandidateDestName:
def test_standard(self):
t = SubtitleCandidate(
language=FRA, format=SRT, subtitle_type=SubtitleType.STANDARD
)
assert t.destination_name == "fra.srt"
def test_sdh(self):
t = SubtitleCandidate(language=FRA, format=SRT, subtitle_type=SubtitleType.SDH)
assert t.destination_name == "fra.sdh.srt"
def test_forced(self):
t = SubtitleCandidate(
language=FRA, format=SRT, subtitle_type=SubtitleType.FORCED
)
assert t.destination_name == "fra.forced.srt"
def test_unknown_treated_as_standard(self):
t = SubtitleCandidate(
language=FRA, format=SRT, subtitle_type=SubtitleType.UNKNOWN
)
# UNKNOWN doesn't add a suffix → same as standard.
assert t.destination_name == "fra.srt"
def test_missing_language_raises(self):
t = SubtitleCandidate(language=None, format=SRT)
with pytest.raises(ValueError, match="language or format missing"):
t.destination_name
def test_missing_format_raises(self):
t = SubtitleCandidate(language=FRA, format=None)
with pytest.raises(ValueError, match="language or format missing"):
t.destination_name
def test_extension_dot_stripped(self):
# Format extension is ".srt" — leading dot must not be duplicated.
t = SubtitleCandidate(language=FRA, format=SRT)
assert t.destination_name.endswith(".srt")
assert ".." not in t.destination_name
class TestSubtitleCandidateRepr:
def test_embedded_repr(self):
t = SubtitleCandidate(
language=FRA, format=None, is_embedded=True, confidence=1.0
)
r = repr(t)
assert "fra" in r
assert "embedded" in r
def test_external_repr_uses_filename(self, tmp_path):
f = tmp_path / "fr.srt"
f.write_text("")
t = SubtitleCandidate(language=FRA, format=SRT, file_path=f, confidence=0.85)
r = repr(t)
assert "fra" in r
assert "fr.srt" in r
assert "0.85" in r
def test_unresolved_repr(self):
t = SubtitleCandidate(language=None, format=None)
r = repr(t)
assert "?" in r
# --------------------------------------------------------------------------- #
# MediaSubtitleMetadata #
# --------------------------------------------------------------------------- #
class TestMediaSubtitleMetadata:
def test_empty(self):
m = MediaSubtitleMetadata(media_id=None, media_type="movie")
assert m.all_tracks == []
assert m.total_count == 0
assert m.unresolved_tracks == []
def test_aggregates_embedded_and_external(self):
e = SubtitleCandidate(language=FRA, format=None, is_embedded=True)
x = SubtitleCandidate(language=FRA, format=SRT, file_path=Path("/x.srt"))
m = MediaSubtitleMetadata(
media_id=None,
media_type="movie",
embedded_tracks=[e],
external_tracks=[x],
)
assert m.total_count == 2
assert m.all_tracks == [e, x]
def test_unresolved_tracks_only_external_with_none_lang(self):
# An embedded with None language must NOT appear in unresolved_tracks
# (the property only iterates external_tracks).
embedded_unknown = SubtitleCandidate(
language=None, format=None, is_embedded=True
)
external_known = SubtitleCandidate(
language=FRA, format=SRT, file_path=Path("/a.srt")
)
external_unknown = SubtitleCandidate(
language=None, format=SRT, file_path=Path("/b.srt")
)
m = MediaSubtitleMetadata(
media_id=None,
media_type="movie",
embedded_tracks=[embedded_unknown],
external_tracks=[external_known, external_unknown],
)
assert m.unresolved_tracks == [external_unknown]
# --------------------------------------------------------------------------- #
# available_subtitles utility #
# --------------------------------------------------------------------------- #
class TestAvailableSubtitles:
def test_dedup_by_lang_and_type(self):
ENG = SubtitleLanguage(code="eng", tokens=["en"])
tracks = [
SubtitleCandidate(
language=FRA, format=SRT, subtitle_type=SubtitleType.STANDARD
),
SubtitleCandidate(
language=FRA, format=SRT, subtitle_type=SubtitleType.STANDARD
),
SubtitleCandidate(language=FRA, format=SRT, subtitle_type=SubtitleType.SDH),
SubtitleCandidate(
language=ENG, format=SRT, subtitle_type=SubtitleType.STANDARD
),
]
result = available_subtitles(tracks)
keys = [(t.language.code, t.subtitle_type) for t in result]
assert keys == [
("fra", SubtitleType.STANDARD),
("fra", SubtitleType.SDH),
("eng", SubtitleType.STANDARD),
]
def test_none_language_treated_as_key(self):
# Tracks with no language form a single None-keyed bucket.
t1 = SubtitleCandidate(
language=None, format=SRT, subtitle_type=SubtitleType.UNKNOWN
)
t2 = SubtitleCandidate(
language=None, format=SRT, subtitle_type=SubtitleType.UNKNOWN
)
result = available_subtitles([t1, t2])
assert len(result) == 1
def test_empty(self):
assert available_subtitles([]) == []
# --------------------------------------------------------------------------- #
# SubtitleRuleSet inheritance #
# --------------------------------------------------------------------------- #
class TestSubtitleRuleSet:
def test_global_default_uses_kb_defaults(self):
rs = SubtitleRuleSet.global_default()
rules = rs.resolve()
# Loaded from subtitles.yaml — defaults must be non-empty.
assert rules.preferred_languages
assert rules.preferred_formats
assert 0 < rules.min_confidence <= 1
def test_override_persists(self):
rs = SubtitleRuleSet.global_default()
rs.override(languages=["eng"], min_confidence=0.9)
rules = rs.resolve()
assert rules.preferred_languages == ["eng"]
assert rules.min_confidence == 0.9
def test_override_partial_keeps_parent_for_unset_fields(self):
parent = SubtitleRuleSet.global_default()
child = SubtitleRuleSet(
scope=RuleScope(level="show", identifier="tt1"),
parent=parent,
)
child.override(languages=["jpn"])
rules = child.resolve()
assert rules.preferred_languages == ["jpn"]
# min_confidence not overridden at child or parent → falls back to defaults
assert rules.min_confidence == parent.resolve().min_confidence
def test_to_dict_only_emits_set_deltas(self):
rs = SubtitleRuleSet(scope=RuleScope(level="show", identifier="tt1"))
rs.override(languages=["fra"])
out = rs.to_dict()
assert out["scope"] == {"level": "show", "identifier": "tt1"}
assert out["override"] == {"languages": ["fra"]}
def test_to_dict_full_override(self):
rs = SubtitleRuleSet(scope=RuleScope(level="global"))
rs.override(
languages=["fra"],
formats=["srt"],
types=["standard"],
format_priority=["srt", "ass"],
min_confidence=0.8,
)
out = rs.to_dict()
ov = out["override"]
assert ov["languages"] == ["fra"]
assert ov["formats"] == ["srt"]
assert ov["types"] == ["standard"]
assert ov["format_priority"] == ["srt", "ass"]
assert ov["min_confidence"] == 0.8
def test_min_confidence_zero_is_respected(self):
# `_min_confidence or base.min_confidence` would be a bug here — the
# code uses `is not None` explicitly. Verify 0.0 doesn't fall back.
rs = SubtitleRuleSet.global_default()
rs.override(min_confidence=0.0)
assert rs.resolve().min_confidence == 0.0
+341 -107
View File
@@ -1,10 +1,40 @@
"""Tests for TV Show domain — entities and value objects."""
"""Tests for the TV Show domain — entities, value objects, aggregate behavior.
Rewritten for the post-refactor aggregate:
* ``TVShow`` is the root, owning ``seasons: dict[SeasonNumber, Season]``.
* ``Season`` owns ``episodes: dict[EpisodeNumber, Episode]`` and tracks
``expected_episodes`` + ``aired_episodes``.
* ``Episode`` carries ``audio_tracks`` + ``subtitle_tracks`` and exposes
language helpers following contract C+ (``str`` direct compare, ``Language``
cross-format).
* No back-references on Season/Episode — they are reached through the root.
* Sole sanctioned mutation entry point: ``TVShow.add_episode(ep)``.
Coverage:
* ``TestShowStatus`` — including the extended TMDB string mapping.
* ``TestSeasonNumber`` / ``TestEpisodeNumber`` — value-object validation.
* ``TestEpisode`` — basic shape, file presence, audio/subtitle helpers.
* ``TestSeason`` — episode insertion, completeness vs aired, missing list.
* ``TestTVShow`` — aggregate invariants, ``add_episode``, ``collection_status``,
``missing_episodes``, ``is_complete_series``.
"""
from __future__ import annotations
import pytest
from alfred.domain.shared.exceptions import ValidationError
from alfred.domain.shared.media import AudioTrack, SubtitleTrack
from alfred.domain.shared.value_objects import ImdbId, Language
from alfred.domain.tv_shows.entities import Episode, Season, TVShow
from alfred.domain.tv_shows.value_objects import EpisodeNumber, SeasonNumber, ShowStatus
from alfred.domain.tv_shows.value_objects import (
CollectionStatus,
EpisodeNumber,
SeasonNumber,
ShowStatus,
)
# ---------------------------------------------------------------------------
# ShowStatus
@@ -20,11 +50,25 @@ class TestShowStatus:
def test_from_string_case_insensitive(self):
assert ShowStatus.from_string("ONGOING") == ShowStatus.ONGOING
assert ShowStatus.from_string("Ended") == ShowStatus.ENDED
assert ShowStatus.from_string(" Ended ") == ShowStatus.ENDED
def test_from_string_unknown(self):
assert ShowStatus.from_string("cancelled") == ShowStatus.UNKNOWN
@pytest.mark.parametrize(
"raw,expected",
[
("Returning Series", ShowStatus.ONGOING),
("In Production", ShowStatus.ONGOING),
("Pilot", ShowStatus.ONGOING),
("Planned", ShowStatus.ONGOING),
("Canceled", ShowStatus.ENDED),
("Cancelled", ShowStatus.ENDED),
],
)
def test_from_string_tmdb_mappings(self, raw, expected):
assert ShowStatus.from_string(raw) == expected
def test_from_string_empty_or_unknown(self):
assert ShowStatus.from_string("") == ShowStatus.UNKNOWN
assert ShowStatus.from_string("borked") == ShowStatus.UNKNOWN
# ---------------------------------------------------------------------------
@@ -34,12 +78,10 @@ class TestShowStatus:
class TestSeasonNumber:
def test_valid_season(self):
s = SeasonNumber(1)
assert s.value == 1
assert SeasonNumber(1).value == 1
def test_season_zero_is_specials(self):
s = SeasonNumber(0)
assert s.is_special()
assert SeasonNumber(0).is_special()
def test_normal_season_not_special(self):
assert not SeasonNumber(3).is_special()
@@ -69,8 +111,7 @@ class TestSeasonNumber:
class TestEpisodeNumber:
def test_valid_episode(self):
e = EpisodeNumber(1)
assert e.value == 1
assert EpisodeNumber(1).value == 1
def test_zero_raises(self):
with pytest.raises(ValidationError):
@@ -91,64 +132,107 @@ class TestEpisodeNumber:
# ---------------------------------------------------------------------------
# TVShow entity
# Episode entity
# ---------------------------------------------------------------------------
class TestTVShow:
def _make(
self, imdb_id="tt0903747", title="Breaking Bad", seasons=5, status="ended"
):
return TVShow(
imdb_id=imdb_id, title=title, seasons_count=seasons, status=status
class TestEpisode:
def _ep(self, *, season=1, episode=1, title="Pilot", **kwargs) -> Episode:
return Episode(
season_number=season,
episode_number=episode,
title=title,
**kwargs,
)
def test_basic_creation(self):
show = self._make()
assert show.title == "Breaking Bad"
assert show.seasons_count == 5
def test_basic_creation_coerces_numbers(self):
e = self._ep()
assert e.title == "Pilot"
assert isinstance(e.season_number, SeasonNumber)
assert isinstance(e.episode_number, EpisodeNumber)
def test_coerces_string_imdb_id(self):
show = self._make()
from alfred.domain.shared.value_objects import ImdbId
def test_get_filename_format(self):
e = self._ep(season=1, episode=5, title="Gray Matter")
filename = e.get_filename()
assert filename.startswith("S01E05")
assert "Gray.Matter" in filename
assert isinstance(show.imdb_id, ImdbId)
def test_has_file_false_when_no_path(self):
e = self._ep()
assert not e.has_file()
assert not e.is_downloaded()
def test_coerces_string_status(self):
show = self._make(status="ongoing")
assert show.status == ShowStatus.ONGOING
def test_str_format(self):
e = self._ep(season=2, episode=3, title="Bit by a Dead Bee")
s = str(e)
assert "S02E03" in s
assert "Bit by a Dead Bee" in s
def test_is_ongoing(self):
show = self._make(status="ongoing")
assert show.is_ongoing()
assert not show.is_ended()
# ── Audio helpers ──────────────────────────────────────────────────
def test_is_ended(self):
show = self._make(status="ended")
assert show.is_ended()
assert not show.is_ongoing()
def test_has_audio_in_with_str(self):
e = self._ep(
audio_tracks=[
AudioTrack(0, "eac3", 6, "5.1", "eng"),
AudioTrack(1, "ac3", 6, "5.1", "fre"),
]
)
assert e.has_audio_in("eng") is True
assert e.has_audio_in("ENG") is True # case-insensitive
assert e.has_audio_in("ger") is False
def test_negative_seasons_raises(self):
with pytest.raises(ValueError):
TVShow(imdb_id="tt0903747", title="X", seasons_count=-1, status="ended")
def test_has_audio_in_with_language(self):
lang = Language(
iso="fre",
english_name="French",
native_name="Français",
aliases=("fr", "fra", "french"),
)
e = self._ep(audio_tracks=[AudioTrack(0, "ac3", 6, "5.1", "fr")])
# str query "fre" wouldn't match "fr" directly — but Language does cross-format
assert e.has_audio_in(lang) is True
assert e.has_audio_in("fre") is False # direct compare misses
def test_invalid_imdb_id_type_raises(self):
with pytest.raises(ValueError):
TVShow(imdb_id=12345, title="X", seasons_count=1, status="ended") # type: ignore
def test_audio_languages_dedup_in_order(self):
e = self._ep(
audio_tracks=[
AudioTrack(0, "ac3", 6, "5.1", "eng"),
AudioTrack(1, "ac3", 6, "5.1", "fre"),
AudioTrack(2, "aac", 2, "stereo", "eng"), # dupe
AudioTrack(3, "aac", 2, "stereo", None), # skipped
]
)
assert e.audio_languages() == ["eng", "fre"]
def test_get_folder_name_replaces_spaces(self):
show = self._make(title="Breaking Bad")
assert show.get_folder_name() == "Breaking.Bad"
# ── Subtitle helpers ───────────────────────────────────────────────
def test_get_folder_name_strips_special_chars(self):
show = self._make(title="It's Always Sunny")
name = show.get_folder_name()
assert "'" not in name
def test_has_subtitles_in(self):
e = self._ep(subtitle_tracks=[SubtitleTrack(0, "subrip", "fre")])
assert e.has_subtitles_in("fre") is True
assert e.has_subtitles_in("eng") is False
def test_str_repr(self):
show = self._make()
assert "Breaking Bad" in str(show)
assert "tt0903747" in repr(show)
def test_has_forced_subs(self):
e = self._ep(
subtitle_tracks=[
SubtitleTrack(0, "subrip", "eng", is_forced=False),
SubtitleTrack(1, "subrip", "eng", is_forced=True),
]
)
assert e.has_forced_subs() is True
def test_has_forced_subs_false_when_none(self):
e = self._ep(subtitle_tracks=[SubtitleTrack(0, "subrip", "eng")])
assert e.has_forced_subs() is False
def test_subtitle_languages_dedup_in_order(self):
e = self._ep(
subtitle_tracks=[
SubtitleTrack(0, "subrip", "eng"),
SubtitleTrack(1, "subrip", "fre"),
SubtitleTrack(2, "subrip", "eng"),
]
)
assert e.subtitle_languages() == ["eng", "fre"]
# ---------------------------------------------------------------------------
@@ -157,76 +241,226 @@ class TestTVShow:
class TestSeason:
def test_basic_creation(self):
s = Season(show_imdb_id="tt0903747", season_number=1, episode_count=7)
assert s.episode_count == 7
def _ep(self, episode: int) -> Episode:
return Episode(season_number=1, episode_number=episode, title=f"Ep {episode}")
def test_basic_creation_coerces_season_number(self):
s = Season(season_number=1)
assert isinstance(s.season_number, SeasonNumber)
assert s.episode_count == 0
assert s.episodes == {}
def test_get_folder_name_normal(self):
s = Season(show_imdb_id="tt0903747", season_number=2, episode_count=13)
assert s.get_folder_name() == "Season 02"
assert Season(season_number=2).get_folder_name() == "Season 02"
def test_get_folder_name_specials(self):
s = Season(show_imdb_id="tt0903747", season_number=0, episode_count=3)
s = Season(season_number=0)
assert s.get_folder_name() == "Specials"
assert s.is_special()
def test_negative_episode_count_raises(self):
def test_negative_aired_raises(self):
with pytest.raises(ValueError):
Season(show_imdb_id="tt0903747", season_number=1, episode_count=-1)
Season(season_number=1, aired_episodes=-1)
def test_str(self):
s = Season(
show_imdb_id="tt0903747",
season_number=1,
episode_count=7,
name="Pilot Season",
)
def test_aired_cannot_exceed_expected(self):
with pytest.raises(ValueError):
Season(season_number=1, expected_episodes=5, aired_episodes=6)
def test_add_episode_rejects_mismatched_season(self):
s = Season(season_number=1)
ep = Episode(season_number=2, episode_number=1, title="x")
with pytest.raises(ValueError):
s.add_episode(ep)
def test_add_episode_replaces_same_number(self):
s = Season(season_number=1)
s.add_episode(self._ep(1))
s.add_episode(Episode(season_number=1, episode_number=1, title="Replaced"))
assert s.episodes[EpisodeNumber(1)].title == "Replaced"
def test_str_uses_name_when_present(self):
s = Season(season_number=1, name="Pilot Season")
assert "Pilot Season" in str(s)
# ── Completeness vs aired ──────────────────────────────────────────
def test_is_complete_unknown_aired_is_false(self):
# Conservative: no aired count → cannot claim complete
s = Season(season_number=1)
s.add_episode(self._ep(1))
assert s.is_complete() is False
def test_is_complete_when_owning_all_aired(self):
s = Season(season_number=1, aired_episodes=3)
for i in (1, 2, 3):
s.add_episode(self._ep(i))
assert s.is_complete() is True
def test_is_complete_zero_aired_is_trivially_true(self):
s = Season(season_number=1, aired_episodes=0)
assert s.is_complete() is True
def test_partial_when_missing_aired_episodes(self):
s = Season(season_number=1, aired_episodes=3)
s.add_episode(self._ep(1))
assert s.is_complete() is False
def test_is_fully_aired(self):
s = Season(season_number=1, expected_episodes=10, aired_episodes=10)
assert s.is_fully_aired() is True
def test_is_fully_aired_false_when_in_flight(self):
s = Season(season_number=1, expected_episodes=10, aired_episodes=4)
assert s.is_fully_aired() is False
def test_is_fully_aired_false_with_unknowns(self):
assert Season(season_number=1).is_fully_aired() is False
def test_missing_episodes_when_partial(self):
s = Season(season_number=1, aired_episodes=5)
s.add_episode(self._ep(1))
s.add_episode(self._ep(3))
missing = [n.value for n in s.missing_episodes()]
assert missing == [2, 4, 5]
def test_missing_episodes_empty_when_complete(self):
s = Season(season_number=1, aired_episodes=2)
s.add_episode(self._ep(1))
s.add_episode(self._ep(2))
assert s.missing_episodes() == []
def test_missing_episodes_empty_when_unknown_aired(self):
# Without an aired count we cannot reason about gaps
s = Season(season_number=1)
s.add_episode(self._ep(2))
assert s.missing_episodes() == []
# ---------------------------------------------------------------------------
# Episode entity
# TVShow aggregate root
# ---------------------------------------------------------------------------
class TestEpisode:
class TestTVShow:
def _show(self, **kwargs) -> TVShow:
defaults = dict(
imdb_id="tt0903747",
title="Breaking Bad",
status="ended",
)
defaults.update(kwargs)
return TVShow(**defaults)
# ── Construction & coercion ────────────────────────────────────────
def test_basic_creation(self):
e = Episode(
show_imdb_id="tt0903747",
season_number=1,
episode_number=1,
title="Pilot",
)
assert e.title == "Pilot"
show = self._show(expected_seasons=5)
assert show.title == "Breaking Bad"
assert show.expected_seasons == 5
assert show.seasons == {}
assert show.seasons_count == 0
def test_get_filename_format(self):
e = Episode(
show_imdb_id="tt0903747",
season_number=1,
episode_number=5,
title="Gray Matter",
)
filename = e.get_filename()
assert filename.startswith("S01E05")
assert "Gray.Matter" in filename
def test_coerces_string_imdb_id(self):
assert isinstance(self._show().imdb_id, ImdbId)
def test_has_file_false_when_no_path(self):
e = Episode(
show_imdb_id="tt0903747",
season_number=1,
episode_number=1,
title="Pilot",
)
assert not e.has_file()
assert not e.is_downloaded()
def test_coerces_string_status(self):
assert self._show(status="ongoing").status == ShowStatus.ONGOING
def test_str_format(self):
e = Episode(
show_imdb_id="tt0903747",
season_number=2,
episode_number=3,
title="Bit by a Dead Bee",
)
s = str(e)
assert "S02E03" in s
assert "Bit by a Dead Bee" in s
def test_is_ongoing_and_is_ended(self):
assert self._show(status="ongoing").is_ongoing()
assert self._show(status="ended").is_ended()
def test_negative_expected_seasons_raises(self):
with pytest.raises(ValueError):
self._show(expected_seasons=-1)
def test_invalid_imdb_id_type_raises(self):
with pytest.raises(ValueError):
TVShow(imdb_id=12345, title="X", status="ended") # type: ignore
def test_get_folder_name_replaces_spaces(self):
assert self._show(title="Breaking Bad").get_folder_name() == "Breaking.Bad"
def test_get_folder_name_strips_special_chars(self):
name = self._show(title="It's Always Sunny").get_folder_name()
assert "'" not in name
def test_str_repr(self):
show = self._show()
assert "Breaking Bad" in str(show)
assert "tt0903747" in repr(show)
# ── add_episode — the only sanctioned mutation ─────────────────────
def test_add_episode_creates_missing_season(self):
show = self._show()
show.add_episode(Episode(season_number=1, episode_number=1, title="Pilot"))
assert SeasonNumber(1) in show.seasons
assert show.seasons_count == 1
assert show.episode_count == 1
def test_add_episode_reuses_existing_season(self):
show = self._show()
show.add_episode(Episode(season_number=1, episode_number=1, title="A"))
show.add_episode(Episode(season_number=1, episode_number=2, title="B"))
assert show.seasons_count == 1
assert show.episode_count == 2
def test_add_season_replaces_existing(self):
show = self._show()
s1 = Season(season_number=1, aired_episodes=10)
show.add_season(s1)
s1bis = Season(season_number=1, aired_episodes=5)
show.add_season(s1bis)
assert show.seasons[SeasonNumber(1)] is s1bis
# ── Collection status ──────────────────────────────────────────────
def test_collection_status_empty(self):
assert self._show().collection_status() == CollectionStatus.EMPTY
def test_collection_status_partial_missing_episode(self):
show = self._show()
s = Season(season_number=1, aired_episodes=3)
s.add_episode(Episode(season_number=1, episode_number=1, title="x"))
show.add_season(s)
assert show.collection_status() == CollectionStatus.PARTIAL
def test_collection_status_complete(self):
show = self._show(expected_seasons=1)
s = Season(season_number=1, aired_episodes=2)
for n in (1, 2):
s.add_episode(Episode(season_number=1, episode_number=n, title=f"e{n}"))
show.add_season(s)
assert show.collection_status() == CollectionStatus.COMPLETE
def test_collection_status_partial_when_seasons_missing(self):
# Seasons we own are complete, but expected_seasons says more exist.
show = self._show(expected_seasons=2)
s = Season(season_number=1, aired_episodes=1)
s.add_episode(Episode(season_number=1, episode_number=1, title="x"))
show.add_season(s)
assert show.collection_status() == CollectionStatus.PARTIAL
def test_is_complete_series_requires_ended_and_complete(self):
show = self._show(status="ongoing", expected_seasons=1)
s = Season(season_number=1, aired_episodes=1)
s.add_episode(Episode(season_number=1, episode_number=1, title="x"))
show.add_season(s)
# Ongoing → never "complete series" even if collection is COMPLETE
assert show.is_complete_series() is False
show.status = ShowStatus.ENDED
assert show.is_complete_series() is True
# ── missing_episodes traversal ─────────────────────────────────────
def test_missing_episodes_walks_seasons_in_order(self):
show = self._show()
s2 = Season(season_number=2, aired_episodes=2)
s1 = Season(season_number=1, aired_episodes=3)
s1.add_episode(Episode(season_number=1, episode_number=2, title="x"))
show.add_season(s2)
show.add_season(s1)
missing = [(s.value, e.value) for s, e in show.missing_episodes()]
assert missing == [(1, 1), (1, 3), (2, 1), (2, 2)]
@@ -0,0 +1,228 @@
"""Tests for ``alfred.infrastructure.api.knaben.client.KnabenClient``.
- ``TestInit`` — explicit args override settings; no API key required.
- ``TestMakeRequest`` — error translation: timeout, 404, 429 (rate limit),
generic 5xx, and ``RequestException``.
- ``TestSearch`` — query validation, success path, empty hits, request
parameter wiring (search_field/order_by/etc.), 404 → empty list,
per-result parse failures are swallowed (best-effort parsing).
- ``TestParseTorrent`` — coverage of optional/missing fields and
``int(... or 0)`` coercion for null seeders/leechers.
All HTTP is mocked at ``alfred.infrastructure.api.knaben.client.requests``.
"""
from __future__ import annotations
from unittest.mock import MagicMock, patch
import pytest
from requests.exceptions import HTTPError, RequestException, Timeout
from alfred.infrastructure.api.knaben.client import KnabenClient
from alfred.infrastructure.api.knaben.exceptions import (
KnabenAPIError,
KnabenNotFoundError,
)
def _ok_response(json_body):
r = MagicMock()
r.status_code = 200
r.json.return_value = json_body
r.raise_for_status.return_value = None
return r
def _http_error_response(status_code):
r = MagicMock()
r.status_code = status_code
err = HTTPError(f"{status_code}")
err.response = r
r.raise_for_status.side_effect = err
return r
@pytest.fixture
def client():
return KnabenClient(base_url="https://api.knaben.test/v1", timeout=5)
# --------------------------------------------------------------------------- #
# Init #
# --------------------------------------------------------------------------- #
class TestInit:
def test_default_base_url(self):
c = KnabenClient()
assert c.base_url == "https://api.knaben.org/v1"
def test_explicit_override(self):
c = KnabenClient(base_url="https://x", timeout=99)
assert c.base_url == "https://x"
assert c.timeout == 99
# --------------------------------------------------------------------------- #
# _make_request #
# --------------------------------------------------------------------------- #
class TestMakeRequest:
@patch("alfred.infrastructure.api.knaben.client.requests.post")
def test_timeout(self, mock_post, client):
mock_post.side_effect = Timeout("slow")
with pytest.raises(KnabenAPIError, match="timeout"):
client._make_request({"q": "x"})
@patch("alfred.infrastructure.api.knaben.client.requests.post")
def test_http_404(self, mock_post, client):
mock_post.return_value = _http_error_response(404)
with pytest.raises(KnabenNotFoundError):
client._make_request({"q": "x"})
@patch("alfred.infrastructure.api.knaben.client.requests.post")
def test_http_429_rate_limit(self, mock_post, client):
mock_post.return_value = _http_error_response(429)
with pytest.raises(KnabenAPIError, match="Rate limit"):
client._make_request({"q": "x"})
@patch("alfred.infrastructure.api.knaben.client.requests.post")
def test_http_500(self, mock_post, client):
mock_post.return_value = _http_error_response(500)
with pytest.raises(KnabenAPIError, match="500"):
client._make_request({"q": "x"})
@patch("alfred.infrastructure.api.knaben.client.requests.post")
def test_request_exception(self, mock_post, client):
mock_post.side_effect = RequestException("net")
with pytest.raises(KnabenAPIError, match="connect"):
client._make_request({"q": "x"})
@patch("alfred.infrastructure.api.knaben.client.requests.post")
def test_posts_json_body(self, mock_post, client):
mock_post.return_value = _ok_response({"hits": []})
client._make_request({"q": "x"})
call = mock_post.call_args
# KnabenClient sends params as JSON body, not query string
assert call.kwargs["json"] == {"q": "x"}
assert call.kwargs["timeout"] == 5
# --------------------------------------------------------------------------- #
# search #
# --------------------------------------------------------------------------- #
class TestSearch:
@pytest.mark.parametrize("bad", ["", None, 42])
def test_invalid_query(self, client, bad):
with pytest.raises(ValueError):
client.search(bad)
def test_query_too_long(self, client):
with pytest.raises(ValueError, match="too long"):
client.search("a" * 501)
@patch("alfred.infrastructure.api.knaben.client.requests.post")
def test_success(self, mock_post, client):
mock_post.return_value = _ok_response(
{
"hits": [
{
"title": "Inception.2010.1080p",
"size": "10 GB",
"seeders": 500,
"leechers": 50,
"magnetUrl": "magnet:?xt=...",
"hash": "abc",
"tracker": "rarbg",
"date": "2020-01-01",
"category": "movie",
}
]
}
)
results = client.search("Inception")
assert len(results) == 1
r = results[0]
assert r.title == "Inception.2010.1080p"
assert r.seeders == 500
assert r.magnet.startswith("magnet:")
assert r.info_hash == "abc"
@patch("alfred.infrastructure.api.knaben.client.requests.post")
def test_empty_hits_returns_empty_list(self, mock_post, client):
mock_post.return_value = _ok_response({"hits": []})
assert client.search("nothing") == []
@patch("alfred.infrastructure.api.knaben.client.requests.post")
def test_404_returns_empty_list(self, mock_post, client):
mock_post.return_value = _http_error_response(404)
assert client.search("nothing") == []
@patch("alfred.infrastructure.api.knaben.client.requests.post")
def test_request_parameters(self, mock_post, client):
mock_post.return_value = _ok_response({"hits": []})
client.search("Inception", limit=25)
params = mock_post.call_args.kwargs["json"]
assert params["query"] == "Inception"
assert params["search_field"] == "title"
assert params["order_by"] == "peers"
assert params["order_direction"] == "desc"
assert params["size"] == 25
assert params["hide_unsafe"] is True
assert params["hide_xxx"] is True
@patch("alfred.infrastructure.api.knaben.client.requests.post")
def test_default_limit(self, mock_post, client):
mock_post.return_value = _ok_response({"hits": []})
client.search("x")
assert mock_post.call_args.kwargs["json"]["size"] == 10
@patch("alfred.infrastructure.api.knaben.client.requests.post")
def test_unexpected_exception_propagates(self, mock_post, client):
# Anything other than KnabenNotFoundError bubbles up.
mock_post.side_effect = RuntimeError("boom")
with pytest.raises(RuntimeError):
client.search("x")
# --------------------------------------------------------------------------- #
# _parse_torrent #
# --------------------------------------------------------------------------- #
class TestParseTorrent:
def test_minimal(self, client):
r = client._parse_torrent({})
assert r.title == "Unknown"
assert r.size == "Unknown"
assert r.seeders == 0
assert r.leechers == 0
assert r.magnet == ""
def test_null_seeders_coerced_to_zero(self, client):
r = client._parse_torrent({"seeders": None, "leechers": None})
assert r.seeders == 0
assert r.leechers == 0
def test_optional_fields_propagated(self, client):
r = client._parse_torrent(
{
"title": "X",
"size": "1 GB",
"seeders": 10,
"leechers": 2,
"magnetUrl": "magnet:?",
"hash": "h",
"tracker": "t",
"date": "d",
"category": "c",
}
)
assert r.info_hash == "h"
assert r.tracker == "t"
assert r.upload_date == "d"
assert r.category == "c"
@@ -0,0 +1,415 @@
"""Tests for ``alfred.infrastructure.api.qbittorrent.client.QBittorrentClient``.
Exercises every public method against a ``MagicMock`` ``requests.Session``
attached to the client. Auth state (``self._authenticated``) is asserted
explicitly so the implicit auto-login behavior of mutation methods is
covered.
Scope:
- ``TestInit`` — host/credentials wiring + Session attached.
- ``TestMakeRequest`` — verb dispatch (GET/POST), JSON vs text fallback,
error translation for timeout/403/5xx/RequestException, invalid verb.
- ``TestLogin`` — happy path, non-"Ok." rejection, propagation from
underlying API error.
- ``TestGetTorrents`` — auto-login, non-list payload safety, per-item parse
failures.
- ``TestAddTorrent`` — magnet payload wiring, optional category/save_path,
paused flag, unexpected response.
- ``TestMutations`` — pause/resume/delete/recheck/set_location all wire the
hash and propagate errors.
- ``TestFindByName`` — exact match, case-insensitive match, save_path fallback,
no match.
- ``TestParseTorrent`` — progress percentage conversion, defaults.
"""
from __future__ import annotations
from unittest.mock import MagicMock, patch
import pytest
from requests.exceptions import HTTPError, RequestException, Timeout
from alfred.infrastructure.api.qbittorrent.client import QBittorrentClient
from alfred.infrastructure.api.qbittorrent.dto import TorrentInfo
from alfred.infrastructure.api.qbittorrent.exceptions import (
QBittorrentAPIError,
QBittorrentAuthError,
)
def _resp(body, *, status=200, json_decodable=True):
r = MagicMock()
r.status_code = status
r.raise_for_status.return_value = None
if json_decodable:
r.json.return_value = body
else:
r.json.side_effect = ValueError("not json")
r.text = body
return r
def _http_error(status_code):
r = MagicMock()
r.status_code = status_code
err = HTTPError(f"{status_code}")
err.response = r
r.raise_for_status.side_effect = err
return r
@pytest.fixture
def client():
c = QBittorrentClient(
host="http://qbit.test:8080",
username="admin",
password="secret",
timeout=5,
)
# Replace requests.Session with a MagicMock so we control responses
c.session = MagicMock()
return c
# --------------------------------------------------------------------------- #
# Init #
# --------------------------------------------------------------------------- #
class TestInit:
def test_explicit_args(self):
c = QBittorrentClient(host="http://x:1", username="u", password="p", timeout=99)
assert c.host == "http://x:1"
assert c.username == "u"
assert c.password == "p"
assert c.timeout == 99
assert c._authenticated is False
# --------------------------------------------------------------------------- #
# _make_request #
# --------------------------------------------------------------------------- #
class TestMakeRequest:
def test_invalid_verb(self, client):
with pytest.raises(ValueError, match="HTTP"):
client._make_request("PATCH", "/api/v2/foo")
def test_get_returns_json(self, client):
client.session.get.return_value = _resp({"k": "v"})
out = client._make_request("GET", "/x", data={"a": 1})
assert out == {"k": "v"}
client.session.get.assert_called_once()
def test_post_returns_text_when_not_json(self, client):
client.session.post.return_value = _resp("Ok.", json_decodable=False)
out = client._make_request("POST", "/x", data={"a": 1})
assert out == "Ok."
def test_timeout(self, client):
client.session.get.side_effect = Timeout("slow")
with pytest.raises(QBittorrentAPIError, match="timeout"):
client._make_request("GET", "/x")
def test_http_403_auth_error(self, client):
client.session.post.return_value = _http_error(403)
with pytest.raises(QBittorrentAuthError):
client._make_request("POST", "/x")
def test_http_500_generic(self, client):
client.session.get.return_value = _http_error(500)
with pytest.raises(QBittorrentAPIError, match="500"):
client._make_request("GET", "/x")
def test_request_exception(self, client):
client.session.get.side_effect = RequestException("net down")
with pytest.raises(QBittorrentAPIError, match="connect"):
client._make_request("GET", "/x")
# --------------------------------------------------------------------------- #
# Login #
# --------------------------------------------------------------------------- #
class TestLogin:
def test_login_success(self, client):
client.session.post.return_value = _resp("Ok.", json_decodable=False)
assert client.login() is True
assert client._authenticated is True
def test_login_wrong_credentials(self, client):
client.session.post.return_value = _resp("Fails.", json_decodable=False)
with pytest.raises(QBittorrentAuthError):
client.login()
assert client._authenticated is False
def test_login_api_error_translated_to_auth_error(self, client):
client.session.post.return_value = _http_error(403)
with pytest.raises(QBittorrentAuthError):
client.login()
# --------------------------------------------------------------------------- #
# get_torrents (auto-login behavior) #
# --------------------------------------------------------------------------- #
class TestGetTorrents:
def test_auto_logs_in_then_fetches(self, client):
# Order: 1) login POST, 2) torrents/info GET
client.session.post.return_value = _resp("Ok.", json_decodable=False)
client.session.get.return_value = _resp(
[
{
"hash": "h1",
"name": "Foo",
"size": 100,
"progress": 0.5,
"state": "downloading",
"dlspeed": 1024,
"upspeed": 512,
"eta": 60,
"num_seeds": 5,
"num_leechs": 1,
"ratio": 0.1,
"category": "movies",
"save_path": "/dl",
}
]
)
torrents = client.get_torrents()
assert len(torrents) == 1
assert torrents[0].name == "Foo"
assert torrents[0].progress == 50.0 # 0.5 → 50%
assert client._authenticated is True
def test_non_list_returns_empty(self, client):
client._authenticated = True
client.session.get.return_value = _resp({"oops": "bad"})
assert client.get_torrents() == []
def test_filter_and_category_propagated(self, client):
client._authenticated = True
client.session.get.return_value = _resp([])
client.get_torrents(filter="completed", category="movies")
params = client.session.get.call_args.kwargs["params"]
assert params == {"filter": "completed", "category": "movies"}
def test_skips_unparseable_torrents(self, client):
client._authenticated = True
# _parse_torrent uses .get on every field with sensible defaults, so
# malformed dicts almost never raise — patch the parser to force it.
client.session.get.return_value = _resp([{"good": True}])
with patch.object(client, "_parse_torrent", side_effect=Exception("nope")):
assert client.get_torrents() == []
# --------------------------------------------------------------------------- #
# add_torrent #
# --------------------------------------------------------------------------- #
class TestAddTorrent:
def test_add_success(self, client):
client._authenticated = True
client.session.post.return_value = _resp("Ok.", json_decodable=False)
assert client.add_torrent("magnet:?xt=foo") is True
def test_add_unexpected_response(self, client):
client._authenticated = True
client.session.post.return_value = _resp("Fails.", json_decodable=False)
assert client.add_torrent("magnet:?xt=foo") is False
def test_add_payload(self, client):
client._authenticated = True
client.session.post.return_value = _resp("Ok.", json_decodable=False)
client.add_torrent(
"magnet:?xt=foo", category="movies", save_path="/dl", paused=True
)
payload = client.session.post.call_args.kwargs["data"]
assert payload["urls"] == "magnet:?xt=foo"
assert payload["paused"] == "true"
assert payload["category"] == "movies"
assert payload["savepath"] == "/dl"
def test_paused_false_serialized(self, client):
client._authenticated = True
client.session.post.return_value = _resp("Ok.", json_decodable=False)
client.add_torrent("magnet:?xt=foo")
payload = client.session.post.call_args.kwargs["data"]
assert payload["paused"] == "false"
# --------------------------------------------------------------------------- #
# Mutations (delete, pause, resume, recheck, set_location) #
# --------------------------------------------------------------------------- #
class TestMutations:
def _ok(self, client):
client._authenticated = True
client.session.post.return_value = _resp("Ok.", json_decodable=False)
def test_delete_success(self, client):
self._ok(client)
assert client.delete_torrent("hash1", delete_files=True) is True
payload = client.session.post.call_args.kwargs["data"]
assert payload["hashes"] == "hash1"
assert payload["deleteFiles"] == "true"
def test_delete_no_files_default(self, client):
self._ok(client)
client.delete_torrent("hash1")
assert client.session.post.call_args.kwargs["data"]["deleteFiles"] == "false"
def test_pause(self, client):
self._ok(client)
assert client.pause_torrent("hash1") is True
def test_resume(self, client):
self._ok(client)
assert client.resume_torrent("hash1") is True
def test_recheck(self, client):
self._ok(client)
assert client.recheck("hash1") is True
def test_set_location(self, client):
self._ok(client)
assert client.set_location("hash1", "/new/path") is True
payload = client.session.post.call_args.kwargs["data"]
assert payload == {"hashes": "hash1", "location": "/new/path"}
def test_mutation_propagates_api_error(self, client):
client._authenticated = True
client.session.post.return_value = _http_error(500)
with pytest.raises(QBittorrentAPIError):
client.delete_torrent("hash1")
# --------------------------------------------------------------------------- #
# find_by_name #
# --------------------------------------------------------------------------- #
def _torrent_dict(name, save_path=None):
return {
"hash": "h",
"name": name,
"size": 1,
"progress": 0.0,
"state": "x",
"dlspeed": 0,
"upspeed": 0,
"eta": 0,
"num_seeds": 0,
"num_leechs": 0,
"ratio": 0.0,
"save_path": save_path,
}
class TestFindByName:
def test_exact_match(self, client):
client._authenticated = True
client.session.get.return_value = _resp(
[_torrent_dict("Foundation.S01"), _torrent_dict("Other")]
)
result = client.find_by_name("Foundation.S01")
assert isinstance(result, TorrentInfo)
assert result.name == "Foundation.S01"
def test_case_insensitive_match(self, client):
client._authenticated = True
client.session.get.return_value = _resp([_torrent_dict("foundation.s01")])
result = client.find_by_name("Foundation.S01")
assert result is not None
assert result.name == "foundation.s01"
def test_save_path_fallback(self, client):
client._authenticated = True
client.session.get.return_value = _resp(
[_torrent_dict("Different", save_path="/dl/Foundation.S01")]
)
result = client.find_by_name("Foundation.S01")
assert result is not None
assert result.save_path.endswith("Foundation.S01")
def test_no_match_returns_none(self, client):
client._authenticated = True
client.session.get.return_value = _resp([_torrent_dict("nope")])
assert client.find_by_name("Foundation.S01") is None
# --------------------------------------------------------------------------- #
# _parse_torrent #
# --------------------------------------------------------------------------- #
class TestParseTorrent:
def test_defaults(self, client):
t = client._parse_torrent({})
assert t.hash == ""
assert t.name == "Unknown"
assert t.progress == 0.0
assert t.state == "unknown"
def test_progress_converted_to_percentage(self, client):
t = client._parse_torrent({"progress": 0.75})
assert t.progress == 75.0
def test_full_payload(self, client):
t = client._parse_torrent(
{
"hash": "h",
"name": "n",
"size": 1024,
"progress": 1.0,
"state": "uploading",
"dlspeed": 100,
"upspeed": 50,
"eta": 0,
"num_seeds": 10,
"num_leechs": 2,
"ratio": 2.5,
"category": "movies",
"save_path": "/dl",
}
)
assert t.progress == 100.0
assert t.ratio == 2.5
assert t.category == "movies"
# --------------------------------------------------------------------------- #
# logout #
# --------------------------------------------------------------------------- #
class TestLogout:
def test_logout_success(self, client):
client._authenticated = True
client.session.post.return_value = _resp("", json_decodable=False)
assert client.logout() is True
assert client._authenticated is False
def test_logout_swallows_errors(self, client):
client._authenticated = True
client.session.post.side_effect = RuntimeError("boom")
# Per implementation, logout returns False instead of raising.
assert client.logout() is False
# --------------------------------------------------------------------------- #
# get_torrent_properties #
# --------------------------------------------------------------------------- #
class TestGetTorrentProperties:
def test_properties_returned(self, client):
client._authenticated = True
client.session.get.return_value = _resp({"piece_size": 16384})
assert client.get_torrent_properties("h")["piece_size"] == 16384
@@ -0,0 +1,308 @@
"""Tests for ``alfred.infrastructure.api.tmdb.client.TMDBClient``.
Exercises the public surface without any real HTTP traffic:
- ``TestInit`` — configuration via constructor args vs. ``Settings``;
enforcement of the ``api_key``/``base_url`` invariants.
- ``TestMakeRequest`` — error translation for timeouts, HTTP 401/404/5xx,
and generic ``RequestException``.
- ``TestSearchMulti`` — query validation, success path, empty-results →
``TMDBNotFoundError``.
- ``TestGetExternalIds`` — ``media_type`` whitelist enforcement.
- ``TestSearchMedia`` — happy path (movie/tv), media_type fallthrough to
the next result, structural-validation error, and the case where
external-ID resolution fails but the search still succeeds.
- ``TestDetailsEndpoints`` — ``get_movie_details`` / ``get_tv_details``.
- ``TestIsConfigured`` — reports ``True`` only when both api_key & url set.
All HTTP is mocked at ``alfred.infrastructure.api.tmdb.client.requests``.
"""
from __future__ import annotations
from unittest.mock import MagicMock, patch
import pytest
from requests.exceptions import HTTPError, RequestException, Timeout
from alfred.infrastructure.api.tmdb.client import TMDBClient
from alfred.infrastructure.api.tmdb.dto import MediaResult
from alfred.infrastructure.api.tmdb.exceptions import (
TMDBAPIError,
TMDBConfigurationError,
TMDBNotFoundError,
)
# --------------------------------------------------------------------------- #
# Helpers #
# --------------------------------------------------------------------------- #
def _ok_response(json_body):
"""Return a Mock that mimics a successful requests.Response."""
r = MagicMock()
r.status_code = 200
r.json.return_value = json_body
r.raise_for_status.return_value = None
return r
def _http_error_response(status_code):
r = MagicMock()
r.status_code = status_code
err = HTTPError(f"{status_code}")
err.response = r
r.raise_for_status.side_effect = err
return r
@pytest.fixture
def client():
return TMDBClient(
api_key="fake-key",
base_url="https://api.example.com/3",
timeout=5,
)
# --------------------------------------------------------------------------- #
# Init / configuration #
# --------------------------------------------------------------------------- #
class TestInit:
def test_explicit_args_win_over_settings(self):
c = TMDBClient(api_key="explicit", base_url="https://x", timeout=99)
assert c.api_key == "explicit"
assert c.base_url == "https://x"
assert c.timeout == 99
def test_missing_api_key_raises(self):
from alfred.settings import Settings
cfg = Settings(tmdb_api_key="", tmdb_base_url="https://x")
with pytest.raises(TMDBConfigurationError, match="API key"):
TMDBClient(api_key="", config=cfg)
def test_missing_base_url_raises(self):
# Pass api_key but force empty base_url. Need a config with empty URL too.
from alfred.settings import Settings
cfg = Settings(tmdb_api_key="fake", tmdb_base_url="")
with pytest.raises(TMDBConfigurationError, match="base URL"):
TMDBClient(config=cfg, base_url="")
# --------------------------------------------------------------------------- #
# _make_request — error translation #
# --------------------------------------------------------------------------- #
class TestMakeRequest:
@patch("alfred.infrastructure.api.tmdb.client.requests.get")
def test_timeout_translated(self, mock_get, client):
mock_get.side_effect = Timeout("slow")
with pytest.raises(TMDBAPIError, match="timeout"):
client._make_request("/x")
@patch("alfred.infrastructure.api.tmdb.client.requests.get")
def test_http_401_invalid_key(self, mock_get, client):
mock_get.return_value = _http_error_response(401)
with pytest.raises(TMDBAPIError, match="Invalid"):
client._make_request("/x")
@patch("alfred.infrastructure.api.tmdb.client.requests.get")
def test_http_404_not_found(self, mock_get, client):
mock_get.return_value = _http_error_response(404)
with pytest.raises(TMDBNotFoundError):
client._make_request("/x")
@patch("alfred.infrastructure.api.tmdb.client.requests.get")
def test_http_500_generic(self, mock_get, client):
mock_get.return_value = _http_error_response(500)
with pytest.raises(TMDBAPIError, match="500"):
client._make_request("/x")
@patch("alfred.infrastructure.api.tmdb.client.requests.get")
def test_request_exception_translated(self, mock_get, client):
mock_get.side_effect = RequestException("network down")
with pytest.raises(TMDBAPIError, match="connect"):
client._make_request("/x")
@patch("alfred.infrastructure.api.tmdb.client.requests.get")
def test_api_key_added_to_params(self, mock_get, client):
mock_get.return_value = _ok_response({"ok": True})
client._make_request("/path", {"q": "foo"})
called_kwargs = mock_get.call_args.kwargs
assert called_kwargs["params"]["api_key"] == "fake-key"
assert called_kwargs["params"]["q"] == "foo"
assert called_kwargs["timeout"] == 5
# --------------------------------------------------------------------------- #
# search_multi #
# --------------------------------------------------------------------------- #
class TestSearchMulti:
@pytest.mark.parametrize("bad", ["", None, 123])
def test_invalid_query_raises_value_error(self, client, bad):
with pytest.raises(ValueError):
client.search_multi(bad)
def test_query_too_long(self, client):
with pytest.raises(ValueError, match="too long"):
client.search_multi("a" * 501)
@patch("alfred.infrastructure.api.tmdb.client.requests.get")
def test_success(self, mock_get, client):
mock_get.return_value = _ok_response(
{"results": [{"id": 1, "media_type": "movie"}]}
)
results = client.search_multi("Inception")
assert len(results) == 1
assert results[0]["id"] == 1
@patch("alfred.infrastructure.api.tmdb.client.requests.get")
def test_empty_results_raise_not_found(self, mock_get, client):
mock_get.return_value = _ok_response({"results": []})
with pytest.raises(TMDBNotFoundError):
client.search_multi("nothing")
# --------------------------------------------------------------------------- #
# get_external_ids #
# --------------------------------------------------------------------------- #
class TestGetExternalIds:
def test_invalid_media_type(self, client):
with pytest.raises(ValueError, match="media_type"):
client.get_external_ids("game", 42)
@patch("alfred.infrastructure.api.tmdb.client.requests.get")
def test_movie(self, mock_get, client):
mock_get.return_value = _ok_response({"imdb_id": "tt1375666"})
result = client.get_external_ids("movie", 27205)
assert result["imdb_id"] == "tt1375666"
@patch("alfred.infrastructure.api.tmdb.client.requests.get")
def test_tv(self, mock_get, client):
mock_get.return_value = _ok_response({"imdb_id": "tt0903747"})
result = client.get_external_ids("tv", 1396)
assert result["imdb_id"] == "tt0903747"
# --------------------------------------------------------------------------- #
# search_media (composite) #
# --------------------------------------------------------------------------- #
class TestSearchMedia:
@patch("alfred.infrastructure.api.tmdb.client.requests.get")
def test_happy_path_movie(self, mock_get, client):
# First call → /search/multi ; second → /movie/X/external_ids
mock_get.side_effect = [
_ok_response(
{
"results": [
{
"id": 27205,
"media_type": "movie",
"title": "Inception",
"overview": "...",
"release_date": "2010-07-15",
"poster_path": "/x.jpg",
"vote_average": 8.4,
}
]
}
),
_ok_response({"imdb_id": "tt1375666"}),
]
result = client.search_media("Inception")
assert isinstance(result, MediaResult)
assert result.title == "Inception"
assert result.imdb_id == "tt1375666"
assert result.media_type == "movie"
assert result.vote_average == 8.4
@patch("alfred.infrastructure.api.tmdb.client.requests.get")
def test_tv_uses_name_field(self, mock_get, client):
mock_get.side_effect = [
_ok_response(
{"results": [{"id": 1396, "media_type": "tv", "name": "Breaking Bad"}]}
),
_ok_response({"imdb_id": "tt0903747"}),
]
result = client.search_media("Breaking Bad")
assert result.title == "Breaking Bad"
assert result.media_type == "tv"
@patch("alfred.infrastructure.api.tmdb.client.requests.get")
def test_person_result_skipped_uses_next(self, mock_get, client):
# First result is a person → falls through to second result.
mock_get.side_effect = [
_ok_response(
{
"results": [
{"id": 1, "media_type": "person", "name": "X"},
{"id": 2, "media_type": "movie", "title": "Y"},
]
}
),
_ok_response({"imdb_id": "tt7654321"}),
]
result = client.search_media("Y")
assert result.title == "Y"
assert result.media_type == "movie"
@patch("alfred.infrastructure.api.tmdb.client.requests.get")
def test_only_person_result_raises_not_found(self, mock_get, client):
mock_get.return_value = _ok_response(
{"results": [{"id": 1, "media_type": "person", "name": "X"}]}
)
with pytest.raises(TMDBNotFoundError):
client.search_media("X")
@patch("alfred.infrastructure.api.tmdb.client.requests.get")
def test_malformed_top_result_raises(self, mock_get, client):
mock_get.return_value = _ok_response(
{"results": [{"title": "no id or media_type"}]}
)
with pytest.raises(TMDBAPIError, match="Invalid"):
client.search_media("X")
@patch("alfred.infrastructure.api.tmdb.client.requests.get")
def test_external_ids_failure_returns_result_without_imdb(self, mock_get, client):
# Second call (external IDs) fails — the search should still succeed.
mock_get.side_effect = [
_ok_response({"results": [{"id": 1, "media_type": "movie", "title": "X"}]}),
Timeout("slow"),
]
result = client.search_media("X")
assert result.imdb_id is None
# --------------------------------------------------------------------------- #
# Details endpoints #
# --------------------------------------------------------------------------- #
class TestDetailsEndpoints:
@patch("alfred.infrastructure.api.tmdb.client.requests.get")
def test_movie_details(self, mock_get, client):
mock_get.return_value = _ok_response({"id": 27205, "runtime": 148})
result = client.get_movie_details(27205)
assert result["runtime"] == 148
@patch("alfred.infrastructure.api.tmdb.client.requests.get")
def test_tv_details(self, mock_get, client):
mock_get.return_value = _ok_response({"id": 1396, "number_of_seasons": 5})
result = client.get_tv_details(1396)
assert result["number_of_seasons"] == 5
class TestIsConfigured:
def test_true_when_complete(self, client):
assert client.is_configured() is True
@@ -0,0 +1,384 @@
"""Tests for the smaller ``alfred.infrastructure.filesystem`` helpers.
Covers four siblings of ``FileManager`` that had near-zero coverage:
- ``ffprobe.probe`` — wraps ``ffprobe`` JSON output into a ``MediaInfo``.
- ``filesystem_operations.create_folder`` / ``move`` — thin
``mkdir`` / ``mv`` wrappers returning dict-shaped responses.
- ``organizer.MediaOrganizer`` — computes destination paths for movies
and TV episodes; creates folders for them.
- ``find_video.find_video_file`` — first-video lookup in a folder.
External commands (``ffprobe`` / ``mv``) are patched via ``subprocess.run``.
"""
from __future__ import annotations
import json
import subprocess
from unittest.mock import MagicMock, patch
from alfred.domain.movies.entities import Movie
from alfred.domain.movies.value_objects import MovieTitle, Quality, ReleaseYear
from alfred.domain.shared.value_objects import ImdbId
from alfred.domain.tv_shows.entities import Episode, TVShow
from alfred.domain.tv_shows.value_objects import (
EpisodeNumber,
SeasonNumber,
ShowStatus,
)
from alfred.infrastructure.filesystem import ffprobe
from alfred.infrastructure.filesystem.filesystem_operations import (
create_folder,
move,
)
from alfred.infrastructure.filesystem.find_video import find_video_file
from alfred.infrastructure.filesystem.organizer import MediaOrganizer
# --------------------------------------------------------------------------- #
# ffprobe.probe #
# --------------------------------------------------------------------------- #
def _ffprobe_result(returncode=0, stdout="{}", stderr="") -> MagicMock:
return MagicMock(returncode=returncode, stdout=stdout, stderr=stderr)
class TestFfprobe:
def test_timeout_returns_none(self, tmp_path):
f = tmp_path / "x.mkv"
f.write_bytes(b"")
with patch(
"alfred.infrastructure.filesystem.ffprobe.subprocess.run",
side_effect=subprocess.TimeoutExpired(cmd="ffprobe", timeout=30),
):
assert ffprobe.probe(f) is None
def test_nonzero_returncode_returns_none(self, tmp_path):
f = tmp_path / "x.mkv"
f.write_bytes(b"")
with patch(
"alfred.infrastructure.filesystem.ffprobe.subprocess.run",
return_value=_ffprobe_result(returncode=1, stderr="not a media file"),
):
assert ffprobe.probe(f) is None
def test_invalid_json_returns_none(self, tmp_path):
f = tmp_path / "x.mkv"
f.write_bytes(b"")
with patch(
"alfred.infrastructure.filesystem.ffprobe.subprocess.run",
return_value=_ffprobe_result(stdout="not json {"),
):
assert ffprobe.probe(f) is None
def test_parses_format_duration_and_bitrate(self, tmp_path):
f = tmp_path / "x.mkv"
f.write_bytes(b"")
payload = {
"format": {"duration": "1234.5", "bit_rate": "5000000"},
"streams": [],
}
with patch(
"alfred.infrastructure.filesystem.ffprobe.subprocess.run",
return_value=_ffprobe_result(stdout=json.dumps(payload)),
):
info = ffprobe.probe(f)
assert info is not None
assert info.duration_seconds == 1234.5
assert info.bitrate_kbps == 5000 # bit_rate // 1000
def test_invalid_numeric_format_fields_skipped(self, tmp_path):
f = tmp_path / "x.mkv"
f.write_bytes(b"")
payload = {
"format": {"duration": "garbage", "bit_rate": "also-bad"},
"streams": [],
}
with patch(
"alfred.infrastructure.filesystem.ffprobe.subprocess.run",
return_value=_ffprobe_result(stdout=json.dumps(payload)),
):
info = ffprobe.probe(f)
assert info is not None
assert info.duration_seconds is None
assert info.bitrate_kbps is None
def test_parses_streams(self, tmp_path):
f = tmp_path / "x.mkv"
f.write_bytes(b"")
payload = {
"format": {},
"streams": [
{
"index": 0,
"codec_type": "video",
"codec_name": "h264",
"width": 1920,
"height": 1080,
},
{
"index": 1,
"codec_type": "audio",
"codec_name": "ac3",
"channels": 6,
"channel_layout": "5.1",
"tags": {"language": "eng"},
"disposition": {"default": 1},
},
{
"index": 2,
"codec_type": "audio",
"codec_name": "aac",
"channels": 2,
"tags": {"language": "fra"},
},
{
"index": 3,
"codec_type": "subtitle",
"codec_name": "subrip",
"tags": {"language": "fra"},
"disposition": {"forced": 1},
},
],
}
with patch(
"alfred.infrastructure.filesystem.ffprobe.subprocess.run",
return_value=_ffprobe_result(stdout=json.dumps(payload)),
):
info = ffprobe.probe(f)
assert info.video_codec == "h264"
assert info.width == 1920 and info.height == 1080
assert len(info.audio_tracks) == 2
eng = info.audio_tracks[0]
assert eng.language == "eng"
assert eng.is_default is True
assert info.audio_tracks[1].is_default is False
assert len(info.subtitle_tracks) == 1
assert info.subtitle_tracks[0].is_forced is True
def test_first_video_stream_wins(self, tmp_path):
# The implementation only fills video_codec on the FIRST video stream.
f = tmp_path / "x.mkv"
f.write_bytes(b"")
payload = {
"format": {},
"streams": [
{"codec_type": "video", "codec_name": "h264", "width": 1920},
{"codec_type": "video", "codec_name": "hevc", "width": 3840},
],
}
with patch(
"alfred.infrastructure.filesystem.ffprobe.subprocess.run",
return_value=_ffprobe_result(stdout=json.dumps(payload)),
):
info = ffprobe.probe(f)
assert info.video_codec == "h264"
assert info.width == 1920
# --------------------------------------------------------------------------- #
# filesystem_operations #
# --------------------------------------------------------------------------- #
class TestCreateFolder:
def test_creates_nested(self, tmp_path):
target = tmp_path / "a" / "b" / "c"
out = create_folder(str(target))
assert out == {"status": "ok", "path": str(target)}
assert target.is_dir()
def test_existing_is_ok(self, tmp_path):
out = create_folder(str(tmp_path))
assert out["status"] == "ok"
def test_os_error_wrapped(self, tmp_path):
with patch(
"alfred.infrastructure.filesystem.filesystem_operations.Path.mkdir",
side_effect=OSError("readonly fs"),
):
out = create_folder(str(tmp_path / "x"))
assert out == {
"status": "error",
"error": "mkdir_failed",
"message": "readonly fs",
}
class TestMove:
def test_source_not_found(self, tmp_path):
out = move(str(tmp_path / "ghost"), str(tmp_path / "dst"))
assert out["status"] == "error"
assert out["error"] == "source_not_found"
def test_destination_exists(self, tmp_path):
src = tmp_path / "src"
src.write_text("x")
dst = tmp_path / "dst"
dst.write_text("y")
out = move(str(src), str(dst))
assert out["error"] == "destination_exists"
def test_happy_path_returns_ok(self, tmp_path):
src = tmp_path / "src"
src.write_text("x")
dst = tmp_path / "dst"
# Patch subprocess so we don't actually shell out; pretend success.
with patch(
"alfred.infrastructure.filesystem.filesystem_operations.subprocess.run",
return_value=MagicMock(returncode=0, stderr=""),
):
out = move(str(src), str(dst))
assert out == {"status": "ok", "source": str(src), "destination": str(dst)}
def test_mv_failure_wrapped(self, tmp_path):
src = tmp_path / "src"
src.write_text("x")
with patch(
"alfred.infrastructure.filesystem.filesystem_operations.subprocess.run",
return_value=MagicMock(returncode=1, stderr="cross-device link\n"),
):
out = move(str(src), str(tmp_path / "dst"))
assert out["error"] == "move_failed"
assert out["message"] == "cross-device link"
def test_os_error_wrapped(self, tmp_path):
src = tmp_path / "src"
src.write_text("x")
with patch(
"alfred.infrastructure.filesystem.filesystem_operations.subprocess.run",
side_effect=OSError("ENOSPC"),
):
out = move(str(src), str(tmp_path / "dst"))
assert out["error"] == "move_failed"
# --------------------------------------------------------------------------- #
# find_video #
# --------------------------------------------------------------------------- #
class TestFindVideo:
def test_returns_file_directly_when_video(self, tmp_path):
f = tmp_path / "Movie.mkv"
f.write_bytes(b"")
assert find_video_file(f) == f
def test_returns_none_when_file_is_not_video(self, tmp_path):
f = tmp_path / "notes.txt"
f.write_text("x")
assert find_video_file(f) is None
def test_returns_none_when_folder_has_no_video(self, tmp_path):
(tmp_path / "a.txt").write_text("x")
assert find_video_file(tmp_path) is None
def test_returns_first_sorted_video(self, tmp_path):
(tmp_path / "B.mkv").write_bytes(b"")
(tmp_path / "A.mkv").write_bytes(b"")
(tmp_path / "C.mkv").write_bytes(b"")
found = find_video_file(tmp_path)
assert found.name == "A.mkv"
def test_recurses_into_subfolders(self, tmp_path):
sub = tmp_path / "sub"
sub.mkdir()
(sub / "X.mkv").write_bytes(b"")
found = find_video_file(tmp_path)
assert found is not None and found.name == "X.mkv"
def test_case_insensitive_extension(self, tmp_path):
f = tmp_path / "Movie.MKV"
f.write_bytes(b"")
assert find_video_file(f) == f
# --------------------------------------------------------------------------- #
# MediaOrganizer #
# --------------------------------------------------------------------------- #
def _movie() -> Movie:
return Movie(
imdb_id=ImdbId("tt1375666"),
title=MovieTitle("Inception"),
release_year=ReleaseYear(2010),
quality=Quality.HD,
)
def _show() -> TVShow:
return TVShow(
imdb_id=ImdbId("tt0773262"),
title="Dexter",
expected_seasons=8,
status=ShowStatus.ENDED,
)
def _episode() -> Episode:
return Episode(
season_number=SeasonNumber(1),
episode_number=EpisodeNumber(1),
title="Dexter",
)
class TestMediaOrganizer:
def test_get_movie_destination(self, tmp_path):
org = MediaOrganizer(tmp_path / "movies", tmp_path / "tv")
out = org.get_movie_destination(_movie(), "source.mkv")
# Path: /movies/<folder>/<filename>.mkv
assert out.suffix == ".mkv"
assert out.parent.name == _movie().get_folder_name()
assert out.parent.parent == tmp_path / "movies"
def test_get_movie_destination_preserves_extension(self, tmp_path):
org = MediaOrganizer(tmp_path / "movies", tmp_path / "tv")
out = org.get_movie_destination(_movie(), "source.MP4")
assert out.suffix == ".MP4"
def test_get_episode_destination(self, tmp_path):
org = MediaOrganizer(tmp_path / "movies", tmp_path / "tv")
out = org.get_episode_destination(_show(), _episode(), "raw.mkv")
# Path: /tv/<show>/<season>/<episode>.mkv
assert out.suffix == ".mkv"
assert out.parent.parent.parent == tmp_path / "tv"
assert out.parent.parent.name == _show().get_folder_name()
def test_create_movie_directory_creates_folder(self, tmp_path):
org = MediaOrganizer(tmp_path / "movies", tmp_path / "tv")
assert org.create_movie_directory(_movie()) is True
assert (tmp_path / "movies" / _movie().get_folder_name()).is_dir()
def test_create_movie_directory_already_exists_ok(self, tmp_path):
org = MediaOrganizer(tmp_path / "movies", tmp_path / "tv")
org.create_movie_directory(_movie())
# Second call is also fine (parents=True, exist_ok=True).
assert org.create_movie_directory(_movie()) is True
def test_create_movie_directory_failure_returns_false(self, tmp_path):
org = MediaOrganizer(tmp_path / "movies", tmp_path / "tv")
with patch(
"alfred.infrastructure.filesystem.organizer.Path.mkdir",
side_effect=PermissionError("denied"),
):
assert org.create_movie_directory(_movie()) is False
def test_create_episode_directory_creates_season_folder(self, tmp_path):
org = MediaOrganizer(tmp_path / "movies", tmp_path / "tv")
assert org.create_episode_directory(_show(), 1) is True
# /tv/<show>/<season> exists
show_dir = tmp_path / "tv" / _show().get_folder_name()
assert show_dir.is_dir()
# At least one child (the season folder) was created.
assert any(show_dir.iterdir())
def test_create_episode_directory_failure_returns_false(self, tmp_path):
org = MediaOrganizer(tmp_path / "movies", tmp_path / "tv")
with patch(
"alfred.infrastructure.filesystem.organizer.Path.mkdir",
side_effect=OSError("readonly"),
):
assert org.create_episode_directory(_show(), 1) is False
+283
View File
@@ -0,0 +1,283 @@
"""Tests for ``alfred.infrastructure.metadata.store.MetadataStore``.
The store manages ``<release_root>/.alfred/metadata.yaml`` — a per-release
sidecar with parse, probe, TMDB, pattern, and subtitle-history sections.
Coverage:
- ``TestIdentityAndExists`` — accessors + ``exists()``.
- ``TestLoad`` — empty/missing/corrupt YAML returns ``{}``.
- ``TestSave`` — atomic write creates ``.alfred/`` + temp file is gone.
- ``TestUpdateSection`` — replaces the section + adds ``_updated_at``.
- ``TestUpdateParse/Probe/Tmdb`` — strips ``status`` from payload;
TMDB promotes ``imdb_id`` / ``tmdb_id`` / ``media_type`` / ``title``
to the top level.
- ``TestPattern`` — ``confirmed_pattern`` returns the id only when flag
is set; ``mark_pattern_confirmed`` preserves pre-existing keys.
- ``TestSubtitleHistory`` — append + release-group dedup.
"""
from __future__ import annotations
import yaml
from alfred.infrastructure.metadata.store import MetadataStore
# --------------------------------------------------------------------------- #
# Identity / exists #
# --------------------------------------------------------------------------- #
class TestIdentityAndExists:
def test_paths(self, tmp_path):
s = MetadataStore(tmp_path)
assert s.release_root == tmp_path
assert s.metadata_path == tmp_path / ".alfred" / "metadata.yaml"
def test_exists_false_initially(self, tmp_path):
assert MetadataStore(tmp_path).exists() is False
def test_exists_after_save(self, tmp_path):
s = MetadataStore(tmp_path)
s.save({"a": 1})
assert s.exists() is True
# --------------------------------------------------------------------------- #
# Load #
# --------------------------------------------------------------------------- #
class TestLoad:
def test_missing_file_returns_empty(self, tmp_path):
assert MetadataStore(tmp_path).load() == {}
def test_empty_yaml_returns_empty(self, tmp_path):
s = MetadataStore(tmp_path)
(tmp_path / ".alfred").mkdir()
(tmp_path / ".alfred" / "metadata.yaml").write_text("")
assert s.load() == {}
def test_corrupt_yaml_returns_empty(self, tmp_path):
s = MetadataStore(tmp_path)
(tmp_path / ".alfred").mkdir()
(tmp_path / ".alfred" / "metadata.yaml").write_text("not: : valid: yaml: [")
# Logged warning, but never raises.
assert s.load() == {}
# --------------------------------------------------------------------------- #
# Save #
# --------------------------------------------------------------------------- #
class TestSave:
def test_creates_alfred_dir(self, tmp_path):
s = MetadataStore(tmp_path)
s.save({"a": 1})
assert (tmp_path / ".alfred").is_dir()
assert (tmp_path / ".alfred" / "metadata.yaml").is_file()
def test_yaml_roundtrip(self, tmp_path):
s = MetadataStore(tmp_path)
data = {"a": 1, "b": ["x", "y"], "c": {"nested": True}}
s.save(data)
loaded = yaml.safe_load((tmp_path / ".alfred" / "metadata.yaml").read_text())
assert loaded == data
# And via the store API.
assert s.load() == data
def test_temp_file_cleaned_up(self, tmp_path):
s = MetadataStore(tmp_path)
s.save({"a": 1})
# No stale .tmp left around.
assert not (tmp_path / ".alfred" / "metadata.yaml.tmp").exists()
def test_unicode_preserved(self, tmp_path):
s = MetadataStore(tmp_path)
s.save({"title": "Amélie"})
assert s.load() == {"title": "Amélie"}
# --------------------------------------------------------------------------- #
# update_section #
# --------------------------------------------------------------------------- #
class TestUpdateSection:
def test_adds_section_with_timestamp(self, tmp_path):
s = MetadataStore(tmp_path)
s.update_section("parse", {"title": "X"})
data = s.load()
assert data["parse"]["title"] == "X"
assert "_updated_at" in data["parse"]
# ISO-8601 with TZ offset
assert "T" in data["parse"]["_updated_at"]
def test_section_replaced_wholesale(self, tmp_path):
s = MetadataStore(tmp_path)
s.update_section("parse", {"a": 1, "b": 2})
s.update_section("parse", {"c": 3})
data = s.load()
assert "a" not in data["parse"]
assert data["parse"]["c"] == 3
def test_preserves_other_sections(self, tmp_path):
s = MetadataStore(tmp_path)
s.update_section("parse", {"a": 1})
s.update_section("probe", {"b": 2})
data = s.load()
assert data["parse"]["a"] == 1
assert data["probe"]["b"] == 2
# --------------------------------------------------------------------------- #
# update_parse / update_probe #
# --------------------------------------------------------------------------- #
class TestUpdateParseAndProbe:
def test_update_parse_strips_status(self, tmp_path):
s = MetadataStore(tmp_path)
s.update_parse({"status": "ok", "title": "X", "year": 2020})
data = s.load()
assert "status" not in data["parse"]
assert data["parse"]["title"] == "X"
assert data["parse"]["year"] == 2020
def test_update_probe_strips_status(self, tmp_path):
s = MetadataStore(tmp_path)
s.update_probe({"status": "ok", "resolution": "1080p"})
assert s.load()["probe"]["resolution"] == "1080p"
assert "status" not in s.load()["probe"]
# --------------------------------------------------------------------------- #
# update_tmdb #
# --------------------------------------------------------------------------- #
class TestUpdateTmdb:
def test_promotes_identity_to_top_level(self, tmp_path):
s = MetadataStore(tmp_path)
s.update_tmdb(
{
"status": "ok",
"imdb_id": "tt1375666",
"tmdb_id": 27205,
"media_type": "movie",
"title": "Inception",
}
)
data = s.load()
assert data["imdb_id"] == "tt1375666"
assert data["tmdb_id"] == 27205
assert data["media_type"] == "movie"
assert data["title"] == "Inception"
# And the full block is still under tmdb
assert data["tmdb"]["imdb_id"] == "tt1375666"
def test_does_not_overwrite_existing_title(self, tmp_path):
s = MetadataStore(tmp_path)
# Pre-existing title (e.g. from earlier confirmation).
s.save({"title": "Old Title"})
s.update_tmdb({"title": "New Title", "imdb_id": "tt1"})
data = s.load()
# setdefault means the existing title wins.
assert data["title"] == "Old Title"
assert data["imdb_id"] == "tt1"
def test_none_values_not_promoted(self, tmp_path):
s = MetadataStore(tmp_path)
s.update_tmdb({"imdb_id": None, "tmdb_id": 27205, "media_type": None})
data = s.load()
assert "imdb_id" not in data
assert data["tmdb_id"] == 27205
assert "media_type" not in data
# --------------------------------------------------------------------------- #
# Pattern #
# --------------------------------------------------------------------------- #
class TestPattern:
def test_confirmed_pattern_empty_when_missing(self, tmp_path):
assert MetadataStore(tmp_path).confirmed_pattern() is None
def test_confirmed_pattern_only_when_flag_true(self, tmp_path):
s = MetadataStore(tmp_path)
s.save({"detected_pattern": "adjacent", "pattern_confirmed": False})
assert s.confirmed_pattern() is None
s.save({"detected_pattern": "adjacent", "pattern_confirmed": True})
assert s.confirmed_pattern() == "adjacent"
def test_mark_pattern_confirmed_sets_flag(self, tmp_path):
s = MetadataStore(tmp_path)
s.mark_pattern_confirmed("subs_flat")
data = s.load()
assert data["detected_pattern"] == "subs_flat"
assert data["pattern_confirmed"] is True
def test_mark_pattern_preserves_media_info(self, tmp_path):
s = MetadataStore(tmp_path)
s.mark_pattern_confirmed(
"adjacent",
media_info={
"media_type": "movie",
"imdb_id": "tt1",
"title": "Foo",
},
)
data = s.load()
assert data["media_type"] == "movie"
assert data["imdb_id"] == "tt1"
assert data["title"] == "Foo"
def test_mark_pattern_does_not_overwrite_existing_identity(self, tmp_path):
s = MetadataStore(tmp_path)
s.save({"title": "Existing", "imdb_id": "tt_old"})
s.mark_pattern_confirmed(
"adjacent",
media_info={"imdb_id": "tt_new", "title": "New"},
)
data = s.load()
# setdefault on existing keys → old values win.
assert data["title"] == "Existing"
assert data["imdb_id"] == "tt_old"
# --------------------------------------------------------------------------- #
# Subtitle history #
# --------------------------------------------------------------------------- #
class TestSubtitleHistory:
def test_initially_empty(self, tmp_path):
assert MetadataStore(tmp_path).subtitle_history() == []
def test_append_one(self, tmp_path):
s = MetadataStore(tmp_path)
s.append_subtitle_history_entry({"tracks": 2, "release_group": "GRP"})
hist = s.subtitle_history()
assert len(hist) == 1
assert hist[0]["tracks"] == 2
def test_release_group_recorded_once(self, tmp_path):
s = MetadataStore(tmp_path)
s.append_subtitle_history_entry({"release_group": "GRP"})
s.append_subtitle_history_entry({"release_group": "GRP"})
s.append_subtitle_history_entry({"release_group": "OTHER"})
groups = s.load()["release_groups"]
assert groups == ["GRP", "OTHER"]
def test_no_release_group_does_not_create_groups_list(self, tmp_path):
s = MetadataStore(tmp_path)
s.append_subtitle_history_entry({"tracks": 0})
assert "release_groups" not in s.load()
def test_multiple_entries_preserved_in_order(self, tmp_path):
s = MetadataStore(tmp_path)
for i in range(3):
s.append_subtitle_history_entry({"i": i})
assert [e["i"] for e in s.subtitle_history()] == [0, 1, 2]
@@ -0,0 +1,178 @@
"""Tests for ``alfred.infrastructure.subtitle.rule_repository.RuleSetRepository``.
Loads/saves the SubtitleRuleSet inheritance chain from ``.alfred/`` YAML.
Coverage:
- ``TestLoad`` — no files → ``global_default``; rules.yaml override applied
on top; release_groups/{NAME}.yaml override applied;
SubtitlePreferences seeds the base when provided; full 3-level chain.
- ``TestFilterOverride`` — unknown keys discarded.
- ``TestSaveLocal`` — atomic write, merges with existing, creates .alfred/.
"""
from __future__ import annotations
from pathlib import Path
import yaml
from alfred.infrastructure.persistence.memory.ltm.components.subtitle_preferences import (
SubtitlePreferences,
)
from alfred.infrastructure.subtitle.rule_repository import (
RuleSetRepository,
_filter_override,
)
def _write(path: Path, data: dict) -> None:
path.parent.mkdir(parents=True, exist_ok=True)
path.write_text(yaml.safe_dump(data), encoding="utf-8")
# --------------------------------------------------------------------------- #
# _filter_override #
# --------------------------------------------------------------------------- #
class TestFilterOverride:
def test_keeps_only_valid_keys(self):
out = _filter_override(
{
"languages": ["fra"],
"formats": ["srt"],
"types": ["standard"],
"format_priority": ["srt"],
"min_confidence": 0.8,
"unknown_key": "ignored",
"another": 42,
}
)
assert set(out) == {
"languages",
"formats",
"types",
"format_priority",
"min_confidence",
}
assert "unknown_key" not in out
def test_empty(self):
assert _filter_override({}) == {}
# --------------------------------------------------------------------------- #
# load #
# --------------------------------------------------------------------------- #
class TestLoad:
def test_no_files_returns_global_default(self, tmp_path):
repo = RuleSetRepository(tmp_path)
rs = repo.load()
# Should resolve cleanly using the hardcoded defaults.
rules = rs.resolve()
assert rules.preferred_languages # non-empty
assert rules.min_confidence > 0
def test_subtitle_preferences_override_base(self, tmp_path):
prefs = SubtitlePreferences(
languages=["jpn"], formats=["ass"], types=["standard"]
)
repo = RuleSetRepository(tmp_path)
rules = repo.load(subtitle_preferences=prefs).resolve()
assert rules.preferred_languages == ["jpn"]
assert rules.preferred_formats == ["ass"]
assert rules.allowed_types == ["standard"]
def test_local_rules_yaml_applied(self, tmp_path):
_write(
tmp_path / ".alfred" / "rules.yaml",
{"override": {"languages": ["spa"], "min_confidence": 0.95}},
)
repo = RuleSetRepository(tmp_path)
rules = repo.load().resolve()
assert rules.preferred_languages == ["spa"]
assert rules.min_confidence == 0.95
def test_release_group_override_applied(self, tmp_path):
_write(
tmp_path / ".alfred" / "release_groups" / "KONTRAST.yaml",
{"override": {"format_priority": ["ass", "srt"]}},
)
repo = RuleSetRepository(tmp_path)
rules = repo.load(release_group="KONTRAST").resolve()
assert rules.format_priority == ["ass", "srt"]
def test_full_three_level_chain(self, tmp_path):
# Base: prefs sets languages=["jpn"]
prefs = SubtitlePreferences(languages=["jpn"])
# Group: overrides format_priority
_write(
tmp_path / ".alfred" / "release_groups" / "GRP.yaml",
{"override": {"format_priority": ["ass"]}},
)
# Local: overrides min_confidence
_write(
tmp_path / ".alfred" / "rules.yaml",
{"override": {"min_confidence": 0.99}},
)
repo = RuleSetRepository(tmp_path)
rules = repo.load(release_group="GRP", subtitle_preferences=prefs).resolve()
# All three levels visible — local overrides on top
assert rules.preferred_languages == ["jpn"]
assert rules.format_priority == ["ass"]
assert rules.min_confidence == 0.99
def test_release_group_yaml_without_override_section_ignored(self, tmp_path):
_write(
tmp_path / ".alfred" / "release_groups" / "GRP.yaml",
{"name": "GRP"}, # no 'override' key
)
# Must not crash and must not introduce an intermediate node.
repo = RuleSetRepository(tmp_path)
rs = repo.load(release_group="GRP")
# No extra rule set was created → it's still the global default.
assert rs.scope.level == "global"
def test_missing_release_group_file_silently_ignored(self, tmp_path):
repo = RuleSetRepository(tmp_path)
rs = repo.load(release_group="DOES_NOT_EXIST")
assert rs.scope.level == "global"
# --------------------------------------------------------------------------- #
# save_local #
# --------------------------------------------------------------------------- #
class TestSaveLocal:
def test_creates_file(self, tmp_path):
repo = RuleSetRepository(tmp_path)
repo.save_local({"languages": ["spa"]})
path = tmp_path / ".alfred" / "rules.yaml"
assert path.is_file()
loaded = yaml.safe_load(path.read_text())
assert loaded == {"override": {"languages": ["spa"]}}
def test_merges_with_existing(self, tmp_path):
repo = RuleSetRepository(tmp_path)
repo.save_local({"languages": ["spa"]})
repo.save_local({"min_confidence": 0.8})
loaded = yaml.safe_load((tmp_path / ".alfred" / "rules.yaml").read_text())
assert loaded["override"]["languages"] == ["spa"]
assert loaded["override"]["min_confidence"] == 0.8
def test_overwrites_existing_key(self, tmp_path):
repo = RuleSetRepository(tmp_path)
repo.save_local({"languages": ["spa"]})
repo.save_local({"languages": ["jpn"]})
loaded = yaml.safe_load((tmp_path / ".alfred" / "rules.yaml").read_text())
assert loaded["override"]["languages"] == ["jpn"]
def test_temp_file_cleaned_up(self, tmp_path):
repo = RuleSetRepository(tmp_path)
repo.save_local({"languages": ["spa"]})
# No stale .tmp file
assert not (tmp_path / ".alfred" / "rules.yaml.tmp").exists()
@@ -0,0 +1,173 @@
"""Tests for ``alfred.infrastructure.subtitle.metadata_store.SubtitleMetadataStore``.
Subtitle-pipeline view over a per-release ``.alfred/metadata.yaml``.
Coverage:
- ``TestPatternDelegation`` — ``confirmed_pattern`` / ``mark_pattern_confirmed``
delegate to the generic store.
- ``TestAppendHistory`` — entry shape (placed_at, release_group, tracks),
per-track fields (language/type/format/source_file/placed_as/confidence),
type inference from filename pieces (en.sdh.srt → "sdh"),
empty pairs → no-op, season/episode included only when given.
"""
from __future__ import annotations
from pathlib import Path
from alfred.domain.subtitles.entities import SubtitleCandidate
from alfred.domain.subtitles.services.placer import PlacedTrack
from alfred.domain.subtitles.value_objects import (
SubtitleFormat,
SubtitleLanguage,
SubtitleType,
)
from alfred.infrastructure.subtitle.metadata_store import SubtitleMetadataStore
SRT = SubtitleFormat(id="srt", extensions=[".srt"])
FRA = SubtitleLanguage(code="fra", tokens=["fr"])
ENG = SubtitleLanguage(code="eng", tokens=["en"])
def _track(
lang=FRA, *, embedded: bool = False, confidence: float = 0.92
) -> SubtitleCandidate:
return SubtitleCandidate(
language=lang,
format=SRT,
subtitle_type=SubtitleType.STANDARD,
is_embedded=embedded,
confidence=confidence,
)
def _placed(src_name: str, dest_name: str, dest_dir: Path) -> PlacedTrack:
return PlacedTrack(
source=Path("/in") / src_name,
destination=dest_dir / dest_name,
filename=dest_name,
)
# --------------------------------------------------------------------------- #
# Pattern delegation #
# --------------------------------------------------------------------------- #
class TestPatternDelegation:
def test_confirmed_pattern_initially_none(self, tmp_path):
s = SubtitleMetadataStore(tmp_path)
assert s.confirmed_pattern() is None
def test_mark_then_read_back(self, tmp_path):
s = SubtitleMetadataStore(tmp_path)
s.mark_pattern_confirmed("adjacent", {"media_type": "movie"})
assert s.confirmed_pattern() == "adjacent"
# --------------------------------------------------------------------------- #
# append_history #
# --------------------------------------------------------------------------- #
class TestAppendHistory:
def test_empty_pairs_is_noop(self, tmp_path):
s = SubtitleMetadataStore(tmp_path)
s.append_history([])
assert s.history() == []
# No .alfred dir written either.
assert not (tmp_path / ".alfred").exists()
def test_single_entry_shape(self, tmp_path):
s = SubtitleMetadataStore(tmp_path)
# Two-segment filename (after rsplit on '.', 2) → falls into the
# "standard" branch only when len(parts) != 3. Here we pass a 2-part
# name like ``moviesrt`` with one extension piece via an artificial
# case — easier: use a "Movie.srt" simulation.
p = _placed("input.srt", "Movie.srt", tmp_path)
t = _track(lang=FRA, confidence=0.875)
s.append_history([(p, t)], release_group="GRP")
hist = s.history()
assert len(hist) == 1
entry = hist[0]
assert entry["release_group"] == "GRP"
assert "placed_at" in entry
assert entry["tracks"] == [
{
"language": "fra",
"type": "standard", # 2-part filename → default
"format": "srt",
"is_embedded": False,
"source_file": "input.srt",
"placed_as": "Movie.srt",
"confidence": 0.875,
}
]
def test_type_inferred_from_filename_segments(self, tmp_path):
s = SubtitleMetadataStore(tmp_path)
# The implementation uses ``filename.rsplit('.', 2)`` and reads
# ``parts[1]``. For "Show.eng.sdh.srt" → ["Show.eng", "sdh", "srt"]
# → type="sdh". For "Show.fra.srt" → ["Show", "fra", "srt"]
# → type="fra" (a known quirk — language token leaks into the type
# slot when the filename has exactly three rsplit pieces).
p_sdh = _placed("a.srt", "Show.eng.sdh.srt", tmp_path)
p_forced = _placed("b.srt", "Show.fra.forced.srt", tmp_path)
p_two_part = _placed("c.srt", "Show.srt", tmp_path) # < 3 → "standard"
s.append_history(
[(p_sdh, _track(ENG)), (p_forced, _track(FRA)), (p_two_part, _track(FRA))],
)
tracks = s.history()[0]["tracks"]
assert tracks[0]["type"] == "sdh"
assert tracks[1]["type"] == "forced"
assert tracks[2]["type"] == "standard"
def test_unknown_language_when_track_has_no_language(self, tmp_path):
s = SubtitleMetadataStore(tmp_path)
p = _placed("a.srt", "Show.und.srt", tmp_path)
t = _track(lang=None)
s.append_history([(p, t)])
assert s.history()[0]["tracks"][0]["language"] == "unknown"
def test_embedded_flag_propagated(self, tmp_path):
s = SubtitleMetadataStore(tmp_path)
p = _placed("x.srt", "Show.fra.srt", tmp_path)
t = _track(embedded=True)
s.append_history([(p, t)])
assert s.history()[0]["tracks"][0]["is_embedded"] is True
def test_season_and_episode_present_when_given(self, tmp_path):
s = SubtitleMetadataStore(tmp_path)
p = _placed("x.srt", "Show.S01E03.fra.srt", tmp_path)
s.append_history([(p, _track())], season=1, episode=3)
entry = s.history()[0]
assert entry["season"] == 1
assert entry["episode"] == 3
def test_season_and_episode_absent_when_omitted(self, tmp_path):
s = SubtitleMetadataStore(tmp_path)
p = _placed("x.srt", "Movie.fra.srt", tmp_path)
s.append_history([(p, _track())])
entry = s.history()[0]
assert "season" not in entry
assert "episode" not in entry
def test_confidence_rounded_to_3_decimals(self, tmp_path):
s = SubtitleMetadataStore(tmp_path)
p = _placed("x.srt", "X.fra.srt", tmp_path)
t = _track(confidence=0.123456789)
s.append_history([(p, t)])
assert s.history()[0]["tracks"][0]["confidence"] == 0.123
def test_release_group_appended_to_top_level_groups(self, tmp_path):
s = SubtitleMetadataStore(tmp_path)
p = _placed("x.srt", "X.fra.srt", tmp_path)
s.append_history([(p, _track())], release_group="GRP1")
s.append_history([(p, _track())], release_group="GRP1") # dup
s.append_history([(p, _track())], release_group="GRP2")
# Use the underlying MetadataStore by reading the YAML directly.
from alfred.infrastructure.metadata.store import MetadataStore
groups = MetadataStore(tmp_path).load().get("release_groups", [])
assert groups == ["GRP1", "GRP2"]
+22 -5
View File
@@ -1,4 +1,21 @@
"""Tests for the Agent."""
"""Tests for ``alfred.agent.agent.Agent`` — the LLM orchestration layer.
Covers the public agent surface used by the FastAPI handlers:
- **Construction** — ``Agent(settings, llm, max_tool_iterations)`` wires the
prompt builder, the tool registry, and the in-memory tool catalogue.
- **Tool execution** — ``_execute_tool_call`` parses an OpenAI-shaped
tool-call dict, validates the tool exists and is in scope for the current
workflow, executes it, and surfaces errors as structured dicts.
- **Step loop** — ``step(user_input)`` records the user message, builds the
system prompt, runs the LLM/tool loop up to ``max_tool_iterations``, and
returns the final assistant text.
These tests use the current component-based LTM API
(``memory.ltm.workspace.download``, ``memory.ltm.library_paths.set(...)``).
The legacy flat attributes (``download_folder``, ``movie_folder``, …) no
longer exist.
"""
from unittest.mock import Mock
@@ -49,7 +66,7 @@ class TestExecuteToolCall:
def test_execute_known_tool(self, memory, mock_settings, mock_llm, real_folder):
"""Should execute known tool."""
agent = Agent(settings=mock_settings, llm=mock_llm)
memory.ltm.download_folder = str(real_folder["downloads"])
memory.ltm.workspace.download = str(real_folder["downloads"])
tool_call = {
"id": "call_123",
@@ -145,7 +162,7 @@ class TestStep:
self, memory, mock_settings, mock_llm_with_tool_call, real_folder
):
"""Should execute tool and continue."""
memory.ltm.download_folder = str(real_folder["downloads"])
memory.ltm.workspace.download = str(real_folder["downloads"])
agent = Agent(settings=mock_settings, llm=mock_llm_with_tool_call)
@@ -229,8 +246,8 @@ class TestAgentIntegration:
def test_multiple_tool_calls(self, memory, mock_settings, mock_llm, real_folder):
"""Should handle multiple tool calls in sequence."""
memory.ltm.download_folder = str(real_folder["downloads"])
memory.ltm.movie_folder = str(real_folder["movies"])
memory.ltm.workspace.download = str(real_folder["downloads"])
memory.ltm.library_paths.set("movies", str(real_folder["movies"]))
call_count = [0]
+32 -8
View File
@@ -1,4 +1,20 @@
"""Edge case tests for the Agent."""
"""Edge-case tests for ``alfred.agent.agent.Agent``.
Covers pathological tool-call inputs and unusual control flow:
- **TestExecuteToolCallEdgeCases** — malformed JSON arguments, unknown
tools, extra/wrong-typed args, and propagation of ``KeyboardInterrupt``
(must not be swallowed by the tool executor).
- **TestStepEdgeCases** — empty input, oversize input, unicode input.
- **TestAgentConcurrencyEdgeCases** — mid-step memory mutations through
``set_path_for_folder``.
- **TestAgentErrorRecovery** — recovery from tool errors during the loop.
The KeyboardInterrupt test patches ``visible_tool_names`` so the injected
test tool is in scope; otherwise the agent's workflow-scope guard would
short-circuit before ``tool.func()`` runs and the exception would never be
raised.
"""
from unittest.mock import Mock
@@ -31,8 +47,8 @@ class TestExecuteToolCallEdgeCases:
assert result is None or isinstance(result, dict)
def test_tool_raises_keyboard_interrupt(self, memory, mock_llm):
"""Should propagate KeyboardInterrupt."""
def test_tool_raises_keyboard_interrupt(self, memory, mock_llm, monkeypatch):
"""KeyboardInterrupt raised by a tool must propagate up, not be swallowed."""
agent = Agent(settings=settings, llm=mock_llm)
from alfred.agent.registry import Tool
@@ -43,6 +59,12 @@ class TestExecuteToolCallEdgeCases:
agent.tools["test_tool"] = Tool(
name="test_tool", description="Test", func=raise_interrupt, parameters={}
)
# The scope guard (``visible_tool_names``) would otherwise short-circuit
# the call before reaching ``tool.func()``. Make our injected tool
# visible to reach the exception path under test.
monkeypatch.setattr(
agent.prompt_builder, "visible_tool_names", lambda: ["test_tool"]
)
tool_call = {
"id": "call_123",
@@ -55,7 +77,7 @@ class TestExecuteToolCallEdgeCases:
def test_tool_with_extra_args(self, memory, mock_llm, real_folder):
"""Should handle extra arguments gracefully."""
agent = Agent(settings=settings, llm=mock_llm)
memory.ltm.download_folder = str(real_folder["downloads"])
memory.ltm.workspace.download = str(real_folder["downloads"])
tool_call = {
"id": "call_123",
@@ -243,8 +265,8 @@ class TestAgentConcurrencyEdgeCases:
assert len(history) == 4
def test_tool_modifies_memory_during_step(self, memory, mock_llm, real_folder):
"""Should handle memory modifications during step."""
memory.ltm.download_folder = str(real_folder["downloads"])
"""A tool invocation must persist its mutation into LTM."""
memory.ltm.workspace.download = str(real_folder["downloads"])
call_count = [0]
@@ -259,7 +281,7 @@ class TestAgentConcurrencyEdgeCases:
"id": "call_1",
"function": {
"name": "set_path_for_folder",
"arguments": f'{{"folder_name": "movie", "path_value": "{str(real_folder["movies"])}"}}',
"arguments": f'{{"folder_name": "movies", "path_value": "{str(real_folder["movies"])}"}}',
},
}
],
@@ -272,7 +294,9 @@ class TestAgentConcurrencyEdgeCases:
agent.step("Set movie folder")
mem = get_memory()
assert mem.ltm.movie_folder == str(real_folder["movies"])
# ``movies`` is a library collection (not download/torrent) → stored in
# library_paths, not as a flat attribute.
assert mem.ltm.library_paths.get("movies") == str(real_folder["movies"])
class TestAgentErrorRecovery:
+18 -1
View File
@@ -1,4 +1,21 @@
"""Tests for FastAPI endpoints."""
"""Tests for the FastAPI endpoints exposed by ``alfred.app``.
Covers the OpenAI-compatible surface that LibreChat consumes:
- ``GET /health`` — version + status.
- ``GET /v1/models`` — single ``agent-media`` entry.
- ``POST /v1/chat/completions`` — both blocking and streaming modes,
request validation (empty messages, missing user role, invalid JSON),
and the OpenAI-compatible response envelope (``choices[0].message``).
- ``GET /memory/state`` and ``GET /memory/episodic/search-results`` —
debug introspection endpoints.
- ``POST /memory/clear-session`` — STM/episodic reset.
Tests patch ``alfred.app.agent.step`` rather than running the real LLM.
The app module degrades gracefully when no LLM provider is configured at
import time (placeholder LLM that 503s on use), which is what lets these
tests collect under pytest without ``DEEPSEEK_API_KEY``.
"""
from unittest.mock import patch
+23 -6
View File
@@ -1,4 +1,19 @@
"""Edge case tests for FastAPI endpoints."""
"""Edge-case tests for the FastAPI endpoints.
Covers adversarial and unusual inputs across each endpoint group:
- **TestChatCompletionsEdgeCases** — malformed payloads, missing roles,
null / empty content, system-or-assistant-only conversations, streaming
with tool-calls.
- **TestModelsEndpointEdgeCases** — response shape conformance.
- **TestMemoryEndpointsEdgeCases** — unicode in LTM paths, special chars
and quotes in stored search results, idempotency of clear-session, and
LTM preservation across clears.
- **TestHealthEndpointEdgeCases** — query-param tolerance, version string.
Uses the current LTM API (``ltm.workspace.download``); JSON assertions
target the new persisted shape (``data["ltm"]["workspace"]["download"]``).
"""
from unittest.mock import Mock, patch
@@ -337,7 +352,7 @@ class TestChatCompletionsEdgeCases:
from alfred.infrastructure.persistence import get_memory
mem = get_memory()
mem.ltm.download_folder = str(real_folder["downloads"])
mem.ltm.workspace.download = str(real_folder["downloads"])
call_count = [0]
@@ -453,7 +468,7 @@ class TestMemoryEndpointsEdgeCases:
mock_llm.return_value = Mock()
from alfred.app import app
memory.ltm.download_folder = "/path/日本語テスト"
memory.ltm.workspace.download = "/path/日本語テスト"
memory.stm.add_message("user", "🎬 Movie request")
client = TestClient(app)
@@ -461,7 +476,8 @@ class TestMemoryEndpointsEdgeCases:
assert response.status_code == 200
data = response.json()
assert "日本語" in str(data)
# Unicode must survive the JSON roundtrip in workspace paths.
assert data["ltm"]["workspace"]["download"] == "/path/日本語テスト"
def test_search_results_with_special_chars(self, memory):
"""Should handle special characters in search results."""
@@ -501,7 +517,7 @@ class TestMemoryEndpointsEdgeCases:
mock_llm.return_value = Mock()
from alfred.app import app
memory.ltm.download_folder = "/important/data"
memory.ltm.workspace.download = "/important/data"
memory.stm.add_message("user", "Hello")
client = TestClient(app)
@@ -510,7 +526,8 @@ class TestMemoryEndpointsEdgeCases:
response = client.get("/memory/state")
data = response.json()
assert data["ltm"]["download_folder"] == "/important/data"
# LTM survives the clear; STM is reset.
assert data["ltm"]["workspace"]["download"] == "/important/data"
assert data["stm"]["conversation_history"] == []
+36 -23
View File
@@ -1,4 +1,15 @@
"""Critical tests for configuration validation."""
"""Tests for ``alfred.settings.Settings`` validation.
Covers the field-level validators that ship today:
- ``llm_temperature`` — must be within [0, 2].
- ``max_tool_iterations`` — must be positive.
- ``request_timeout`` — must be positive.
URL fields (``deepseek_base_url``, ``tmdb_base_url``) are *not* currently
URL-validated; tests document that contract explicitly so a future
regression that silently drops the validator would be caught.
"""
import pytest
@@ -53,30 +64,32 @@ class TestConfigValidation:
Settings(request_timeout=30)
Settings(request_timeout=300)
def test_invalid_deepseek_url_raises_error(self):
"""Verify invalid DeepSeek URL is rejected."""
with pytest.raises(ConfigurationError, match="Invalid deepseek_base_url"):
Settings(deepseek_base_url="not-a-url")
def test_deepseek_url_accepted_verbatim(self):
"""``deepseek_base_url`` is currently not URL-validated.
with pytest.raises(ConfigurationError, match="Invalid deepseek_base_url"):
Settings(deepseek_base_url="ftp://invalid.com")
Documents the actual contract: any non-empty string is accepted, and
the burden of producing a valid URL falls on the caller. If URL
validation is introduced later, this test should be replaced with
``test_invalid_deepseek_url_raises_error``.
"""
for url in (
"https://api.deepseek.com",
"http://localhost:8000",
"not-a-url", # currently accepted — see docstring
"ftp://invalid.com",
):
s = Settings(deepseek_base_url=url)
assert s.deepseek_base_url == url
def test_valid_deepseek_url_accepted(self):
"""Verify valid DeepSeek URL is accepted."""
# Should not raise
Settings(deepseek_base_url="https://api.deepseek.com")
Settings(deepseek_base_url="http://localhost:8000")
def test_invalid_tmdb_url_raises_error(self):
"""Verify invalid TMDB URL is rejected."""
with pytest.raises(ConfigurationError, match="Invalid tmdb_base_url"):
Settings(tmdb_base_url="not-a-url")
def test_valid_tmdb_url_accepted(self):
"""Verify valid TMDB URL is accepted."""
# Should not raise
Settings(tmdb_base_url="https://api.themoviedb.org/3")
Settings(tmdb_base_url="http://localhost:3000")
def test_tmdb_url_accepted_verbatim(self):
"""``tmdb_base_url`` is currently not URL-validated (see deepseek test)."""
for url in (
"https://api.themoviedb.org/3",
"http://localhost:3000",
"not-a-url",
):
s = Settings(tmdb_base_url=url)
assert s.tmdb_base_url == url
class TestConfigChecks:
-319
View File
@@ -1,319 +0,0 @@
"""Edge case tests for configuration and parameters."""
import os
from unittest.mock import patch
import pytest
from alfred.agent.parameters import (
REQUIRED_PARAMETERS,
ParameterSchema,
format_parameters_for_prompt,
get_missing_required_parameters,
)
from alfred.settings import ConfigurationError, Settings
class TestSettingsEdgeCases:
"""Edge case tests for Settings."""
def test_default_values(self):
"""Should have sensible defaults."""
with patch.dict(os.environ, {}, clear=True):
settings = Settings()
assert settings.llm_temperature == 0.2
assert settings.max_tool_iterations == 10
assert settings.request_timeout == 30
def test_temperature_boundary_low(self):
"""Should accept temperature at lower boundary."""
with patch.dict(os.environ, {"LLM_TEMPERATURE": "0.0"}, clear=True):
settings = Settings()
assert settings.llm_temperature == 0.0
def test_temperature_boundary_high(self):
"""Should accept temperature at upper boundary."""
with patch.dict(os.environ, {"LLM_TEMPERATURE": "2.0"}, clear=True):
settings = Settings()
assert settings.llm_temperature == 2.0
def test_temperature_below_boundary(self):
"""Should reject temperature below 0."""
with patch.dict(os.environ, {"LLM_TEMPERATURE": "-0.1"}, clear=True):
with pytest.raises(ConfigurationError):
Settings()
def test_temperature_above_boundary(self):
"""Should reject temperature above 2."""
with patch.dict(os.environ, {"LLM_TEMPERATURE": "2.1"}, clear=True):
with pytest.raises(ConfigurationError):
Settings()
def test_max_tool_iterations_boundary_low(self):
"""Should accept max_tool_iterations at lower boundary."""
with patch.dict(os.environ, {"MAX_TOOL_ITERATIONS": "1"}, clear=True):
settings = Settings()
assert settings.max_tool_iterations == 1
def test_max_tool_iterations_boundary_high(self):
"""Should accept max_tool_iterations at upper boundary."""
with patch.dict(os.environ, {"MAX_TOOL_ITERATIONS": "20"}, clear=True):
settings = Settings()
assert settings.max_tool_iterations == 20
def test_max_tool_iterations_below_boundary(self):
"""Should reject max_tool_iterations below 1."""
with patch.dict(os.environ, {"MAX_TOOL_ITERATIONS": "0"}, clear=True):
with pytest.raises(ConfigurationError):
Settings()
def test_max_tool_iterations_above_boundary(self):
"""Should reject max_tool_iterations above 20."""
with patch.dict(os.environ, {"MAX_TOOL_ITERATIONS": "21"}, clear=True):
with pytest.raises(ConfigurationError):
Settings()
def test_request_timeout_boundary_low(self):
"""Should accept request_timeout at lower boundary."""
with patch.dict(os.environ, {"REQUEST_TIMEOUT": "1"}, clear=True):
settings = Settings()
assert settings.request_timeout == 1
def test_request_timeout_boundary_high(self):
"""Should accept request_timeout at upper boundary."""
with patch.dict(os.environ, {"REQUEST_TIMEOUT": "300"}, clear=True):
settings = Settings()
assert settings.request_timeout == 300
def test_request_timeout_below_boundary(self):
"""Should reject request_timeout below 1."""
with patch.dict(os.environ, {"REQUEST_TIMEOUT": "0"}, clear=True):
with pytest.raises(ConfigurationError):
Settings()
def test_request_timeout_above_boundary(self):
"""Should reject request_timeout above 300."""
with patch.dict(os.environ, {"REQUEST_TIMEOUT": "301"}, clear=True):
with pytest.raises(ConfigurationError):
Settings()
def test_invalid_deepseek_url(self):
"""Should reject invalid DeepSeek URL."""
with patch.dict(os.environ, {"DEEPSEEK_BASE_URL": "not-a-url"}, clear=True):
with pytest.raises(ConfigurationError):
Settings()
def test_invalid_tmdb_url(self):
"""Should reject invalid TMDB URL."""
with patch.dict(os.environ, {"TMDB_BASE_URL": "ftp://invalid"}, clear=True):
with pytest.raises(ConfigurationError):
Settings()
def test_http_url_accepted(self):
"""Should accept http:// URLs."""
with patch.dict(
os.environ,
{
"DEEPSEEK_BASE_URL": "http://localhost:8080",
"TMDB_BASE_URL": "http://localhost:3000",
},
clear=True,
):
settings = Settings()
assert settings.deepseek_base_url == "http://localhost:8080"
def test_https_url_accepted(self):
"""Should accept https:// URLs."""
with patch.dict(
os.environ,
{
"DEEPSEEK_BASE_URL": "https://api.example.com",
"TMDB_BASE_URL": "https://api.example.com",
},
clear=True,
):
settings = Settings()
assert settings.deepseek_base_url == "https://api.example.com"
def test_is_deepseek_configured_with_key(self):
"""Should return True when API key is set."""
with patch.dict(os.environ, {"DEEPSEEK_API_KEY": "test-key"}, clear=True):
settings = Settings()
assert settings.is_deepseek_configured() is True
def test_is_deepseek_configured_without_key(self):
"""Should return False when API key is not set."""
with patch.dict(os.environ, {"DEEPSEEK_API_KEY": ""}, clear=True):
settings = Settings()
assert settings.is_deepseek_configured() is False
def test_is_tmdb_configured_with_key(self):
"""Should return True when API key is set."""
with patch.dict(os.environ, {"TMDB_API_KEY": "test-key"}, clear=True):
settings = Settings()
assert settings.is_tmdb_configured() is True
def test_is_tmdb_configured_without_key(self):
"""Should return False when API key is not set."""
with patch.dict(os.environ, {"TMDB_API_KEY": ""}, clear=True):
settings = Settings()
assert settings.is_tmdb_configured() is False
def test_non_numeric_temperature(self):
"""Should handle non-numeric temperature."""
with patch.dict(os.environ, {"LLM_TEMPERATURE": "not-a-number"}, clear=True):
with pytest.raises((ConfigurationError, ValueError)):
Settings()
def test_non_numeric_max_iterations(self):
"""Should handle non-numeric max_tool_iterations."""
with patch.dict(os.environ, {"MAX_TOOL_ITERATIONS": "five"}, clear=True):
with pytest.raises((ConfigurationError, ValueError)):
Settings()
class TestParametersEdgeCases:
"""Edge case tests for parameters module."""
def test_parameter_creation(self):
"""Should create parameter with all fields."""
param = ParameterSchema(
key="test_key",
description="Test description",
why_needed="Test reason",
type="string",
)
assert param.key == "test_key"
assert param.description == "Test description"
assert param.why_needed == "Test reason"
assert param.type == "string"
def test_required_parameters_not_empty(self):
"""Should have at least one required parameter."""
assert len(REQUIRED_PARAMETERS) > 0
def test_format_parameters_for_prompt(self):
"""Should format parameters for prompt."""
result = format_parameters_for_prompt()
assert isinstance(result, str)
# Should contain parameter information
for param in REQUIRED_PARAMETERS:
assert param.key in result or param.description in result
def test_get_missing_required_parameters_all_missing(self):
"""Should return all parameters when none configured."""
memory_data = {"config": {}}
missing = get_missing_required_parameters(memory_data)
# Config may have defaults, so check it's a list
assert isinstance(missing, list)
assert len(missing) >= 0
def test_get_missing_required_parameters_none_missing(self):
"""Should return empty when all configured."""
memory_data = {"config": {}}
for param in REQUIRED_PARAMETERS:
memory_data["config"][param.key] = "/some/path"
missing = get_missing_required_parameters(memory_data)
assert len(missing) == 0
def test_get_missing_required_parameters_some_missing(self):
"""Should return only missing parameters."""
memory_data = {"config": {}}
if REQUIRED_PARAMETERS:
# Configure first parameter only
memory_data["config"][REQUIRED_PARAMETERS[0].key] = "/path"
missing = get_missing_required_parameters(memory_data)
# Config may have defaults
assert isinstance(missing, list)
assert len(missing) >= 0
def test_get_missing_required_parameters_with_none_value(self):
"""Should treat None as missing."""
memory_data = {"config": {}}
for param in REQUIRED_PARAMETERS:
memory_data["config"][param.key] = None
missing = get_missing_required_parameters(memory_data)
# Config may have defaults
assert isinstance(missing, list)
assert len(missing) >= 0
def test_get_missing_required_parameters_with_empty_string(self):
"""Should treat empty string as missing."""
memory_data = {"config": {}}
for param in REQUIRED_PARAMETERS:
memory_data["config"][param.key] = ""
missing = get_missing_required_parameters(memory_data)
# Behavior depends on implementation
# Empty string might be considered as "set" or "missing"
assert isinstance(missing, list)
def test_get_missing_required_parameters_no_config_key(self):
"""Should handle missing config key in memory."""
memory_data = {} # No config key at all
missing = get_missing_required_parameters(memory_data)
# Config may have defaults
assert isinstance(missing, list)
assert len(missing) >= 0
def test_get_missing_required_parameters_config_not_dict(self):
"""Should handle config that is not a dict."""
memory_data = {"config": "not a dict"}
# Should either handle gracefully or raise
try:
missing = get_missing_required_parameters(memory_data)
assert isinstance(missing, list)
except (TypeError, AttributeError):
pass # Also acceptable
class TestParameterValidation:
"""Tests for parameter validation."""
def test_parameter_with_unicode(self):
"""Should handle unicode in parameter fields."""
param = ParameterSchema(
key="日本語_key",
description="日本語の説明",
why_needed="日本語の理由",
type="string",
)
assert "日本語" in param.description
def test_parameter_with_special_chars(self):
"""Should handle special characters."""
param = ParameterSchema(
key="key_with_special",
description='Description with "quotes" and \\backslash',
why_needed="Reason with <html> tags",
type="string",
)
assert '"quotes"' in param.description
def test_parameter_with_empty_fields(self):
"""Should handle empty fields."""
param = ParameterSchema(
key="",
description="",
why_needed="",
type="",
)
assert param.key == ""
-525
View File
@@ -1,525 +0,0 @@
"""Edge case tests for domain entities and value objects."""
from datetime import datetime
import pytest
from alfred.domain.movies.entities import Movie
from alfred.domain.movies.value_objects import MovieTitle, Quality, ReleaseYear
from alfred.domain.shared.exceptions import ValidationError
from alfred.domain.shared.value_objects import FilePath, FileSize, ImdbId
from alfred.domain.subtitles.entities import Subtitle
from alfred.domain.subtitles.value_objects import Language, SubtitleFormat, TimingOffset
from alfred.domain.tv_shows.entities import TVShow
from alfred.domain.tv_shows.value_objects import ShowStatus
class TestImdbIdEdgeCases:
"""Edge case tests for ImdbId."""
def test_valid_imdb_id(self):
"""Should accept valid IMDb ID."""
imdb_id = ImdbId("tt1375666")
assert str(imdb_id) == "tt1375666"
def test_imdb_id_with_leading_zeros(self):
"""Should accept IMDb ID with leading zeros."""
imdb_id = ImdbId("tt0000001")
assert str(imdb_id) == "tt0000001"
def test_imdb_id_long_number(self):
"""Should accept IMDb ID with 8 digits."""
imdb_id = ImdbId("tt12345678")
assert str(imdb_id) == "tt12345678"
def test_imdb_id_lowercase(self):
"""Should accept lowercase tt prefix."""
imdb_id = ImdbId("tt1234567")
assert str(imdb_id) == "tt1234567"
def test_imdb_id_uppercase(self):
"""Should handle uppercase TT prefix."""
# Behavior depends on implementation
try:
imdb_id = ImdbId("TT1234567")
# If accepted, should work
assert imdb_id is not None
except (ValidationError, ValueError):
# If rejected, that's also valid
pass
def test_imdb_id_without_prefix(self):
"""Should reject ID without tt prefix."""
with pytest.raises((ValidationError, ValueError)):
ImdbId("1234567")
def test_imdb_id_empty(self):
"""Should reject empty string."""
with pytest.raises((ValidationError, ValueError)):
ImdbId("")
def test_imdb_id_none(self):
"""Should reject None."""
with pytest.raises((ValidationError, ValueError, TypeError)):
ImdbId(None)
def test_imdb_id_with_spaces(self):
"""Should reject ID with spaces."""
with pytest.raises((ValidationError, ValueError)):
ImdbId("tt 1234567")
def test_imdb_id_with_special_chars(self):
"""Should reject ID with special characters."""
with pytest.raises((ValidationError, ValueError)):
ImdbId("tt1234567!")
def test_imdb_id_equality(self):
"""Should compare equal IDs."""
id1 = ImdbId("tt1234567")
id2 = ImdbId("tt1234567")
assert id1 == id2 or str(id1) == str(id2)
def test_imdb_id_hash(self):
"""Should be hashable for use in sets/dicts."""
id1 = ImdbId("tt1234567")
id2 = ImdbId("tt1234567")
# Should be usable in set
_s = {id1, id2} # Test hashability
# Depending on implementation, might be 1 or 2 items
class TestFilePathEdgeCases:
"""Edge case tests for FilePath."""
def test_absolute_path(self):
"""Should accept absolute path."""
path = FilePath("/home/user/movies/movie.mkv")
assert "/home/user/movies/movie.mkv" in str(path)
def test_relative_path(self):
"""Should accept relative path."""
path = FilePath("movies/movie.mkv")
assert "movies/movie.mkv" in str(path)
def test_path_with_spaces(self):
"""Should accept path with spaces."""
path = FilePath("/home/user/My Movies/movie file.mkv")
assert "My Movies" in str(path)
def test_path_with_unicode(self):
"""Should accept path with unicode."""
path = FilePath("/home/user/映画/日本語.mkv")
assert "映画" in str(path)
def test_windows_path(self):
"""Should handle Windows-style path."""
path = FilePath("C:\\Users\\user\\Movies\\movie.mkv")
assert "movie.mkv" in str(path)
def test_empty_path(self):
"""Should handle empty path."""
try:
path = FilePath("")
# If accepted, may return "." for current directory
assert str(path) in ["", "."]
except (ValidationError, ValueError):
# If rejected, that's also valid
pass
def test_path_with_dots(self):
"""Should handle path with . and .."""
path = FilePath("/home/user/../other/./movie.mkv")
assert "movie.mkv" in str(path)
class TestFileSizeEdgeCases:
"""Edge case tests for FileSize."""
def test_zero_size(self):
"""Should accept zero size."""
size = FileSize(0)
assert size.bytes == 0
def test_very_large_size(self):
"""Should accept very large size (petabytes)."""
size = FileSize(1024**5) # 1 PB
assert size.bytes == 1024**5
def test_negative_size(self):
"""Should reject negative size."""
with pytest.raises((ValidationError, ValueError)):
FileSize(-1)
def test_human_readable_bytes(self):
"""Should format bytes correctly."""
size = FileSize(500)
readable = size.to_human_readable()
assert "500" in readable or "B" in readable
def test_human_readable_kb(self):
"""Should format KB correctly."""
size = FileSize(1024)
readable = size.to_human_readable()
assert "KB" in readable or "1" in readable
def test_human_readable_mb(self):
"""Should format MB correctly."""
size = FileSize(1024 * 1024)
readable = size.to_human_readable()
assert "MB" in readable or "1" in readable
def test_human_readable_gb(self):
"""Should format GB correctly."""
size = FileSize(1024 * 1024 * 1024)
readable = size.to_human_readable()
assert "GB" in readable or "1" in readable
class TestMovieTitleEdgeCases:
"""Edge case tests for MovieTitle."""
def test_normal_title(self):
"""Should accept normal title."""
title = MovieTitle("Inception")
assert title.value == "Inception"
def test_title_with_year(self):
"""Should accept title with year."""
title = MovieTitle("Blade Runner 2049")
assert "2049" in title.value
def test_title_with_special_chars(self):
"""Should accept title with special characters."""
title = MovieTitle("Se7en")
assert title.value == "Se7en"
def test_title_with_colon(self):
"""Should accept title with colon."""
title = MovieTitle("Star Wars: A New Hope")
assert ":" in title.value
def test_title_with_unicode(self):
"""Should accept unicode title."""
title = MovieTitle("千と千尋の神隠し")
assert title.value == "千と千尋の神隠し"
def test_empty_title(self):
"""Should reject empty title."""
with pytest.raises((ValidationError, ValueError)):
MovieTitle("")
def test_whitespace_title(self):
"""Should handle whitespace title (may strip or reject)."""
try:
title = MovieTitle(" ")
# If accepted after stripping, that's valid
assert title.value is not None
except (ValidationError, ValueError):
# If rejected, that's also valid
pass
def test_very_long_title(self):
"""Should handle very long title."""
long_title = "A" * 1000
try:
title = MovieTitle(long_title)
assert len(title.value) == 1000
except (ValidationError, ValueError):
# If there's a length limit, that's valid
pass
class TestReleaseYearEdgeCases:
"""Edge case tests for ReleaseYear."""
def test_valid_year(self):
"""Should accept valid year."""
year = ReleaseYear(2024)
assert year.value == 2024
def test_old_movie_year(self):
"""Should accept old movie year."""
year = ReleaseYear(1895) # First movie ever
assert year.value == 1895
def test_future_year(self):
"""Should accept near future year."""
year = ReleaseYear(2030)
assert year.value == 2030
def test_very_old_year(self):
"""Should reject very old year."""
with pytest.raises((ValidationError, ValueError)):
ReleaseYear(1800)
def test_very_future_year(self):
"""Should reject very future year."""
with pytest.raises((ValidationError, ValueError)):
ReleaseYear(3000)
def test_negative_year(self):
"""Should reject negative year."""
with pytest.raises((ValidationError, ValueError)):
ReleaseYear(-2024)
def test_zero_year(self):
"""Should reject zero year."""
with pytest.raises((ValidationError, ValueError)):
ReleaseYear(0)
class TestQualityEdgeCases:
"""Edge case tests for Quality."""
def test_standard_qualities(self):
"""Should accept standard qualities."""
qualities = [
(Quality.SD, "480p"),
(Quality.HD, "720p"),
(Quality.FULL_HD, "1080p"),
(Quality.UHD_4K, "2160p"),
]
for quality_enum, expected_value in qualities:
assert quality_enum.value == expected_value
def test_unknown_quality(self):
"""Should accept unknown quality."""
quality = Quality.UNKNOWN
assert quality.value == "unknown"
def test_from_string_quality(self):
"""Should parse quality from string."""
assert Quality.from_string("1080p") == Quality.FULL_HD
assert Quality.from_string("720p") == Quality.HD
assert Quality.from_string("2160p") == Quality.UHD_4K
assert Quality.from_string("HDTV") == Quality.UNKNOWN
def test_empty_quality(self):
"""Should handle empty quality string."""
quality = Quality.from_string("")
assert quality == Quality.UNKNOWN
class TestShowStatusEdgeCases:
"""Edge case tests for ShowStatus."""
def test_all_statuses(self):
"""Should have all expected statuses."""
assert ShowStatus.ONGOING is not None
assert ShowStatus.ENDED is not None
assert ShowStatus.UNKNOWN is not None
def test_from_string_valid(self):
"""Should parse valid status strings."""
assert ShowStatus.from_string("ongoing") == ShowStatus.ONGOING
assert ShowStatus.from_string("ended") == ShowStatus.ENDED
def test_from_string_case_insensitive(self):
"""Should be case insensitive."""
assert ShowStatus.from_string("ONGOING") == ShowStatus.ONGOING
assert ShowStatus.from_string("Ended") == ShowStatus.ENDED
def test_from_string_unknown(self):
"""Should return UNKNOWN for invalid strings."""
assert ShowStatus.from_string("invalid") == ShowStatus.UNKNOWN
assert ShowStatus.from_string("") == ShowStatus.UNKNOWN
class TestLanguageEdgeCases:
"""Edge case tests for Language."""
def test_common_languages(self):
"""Should have common languages."""
assert Language.ENGLISH is not None
assert Language.FRENCH is not None
def test_from_code_valid(self):
"""Should parse valid language codes."""
assert Language.from_code("en") == Language.ENGLISH
assert Language.from_code("fr") == Language.FRENCH
def test_from_code_case_insensitive(self):
"""Should be case insensitive."""
assert Language.from_code("EN") == Language.ENGLISH
assert Language.from_code("Fr") == Language.FRENCH
def test_from_code_unknown(self):
"""Should handle unknown codes."""
# Behavior depends on implementation
try:
lang = Language.from_code("xx")
# If it returns something, that's valid
assert lang is not None
except (ValidationError, ValueError, KeyError):
# If it raises, that's also valid
pass
class TestSubtitleFormatEdgeCases:
"""Edge case tests for SubtitleFormat."""
def test_common_formats(self):
"""Should have common formats."""
assert SubtitleFormat.SRT is not None
assert SubtitleFormat.ASS is not None
def test_from_extension_with_dot(self):
"""Should handle extension with dot."""
fmt = SubtitleFormat.from_extension(".srt")
assert fmt == SubtitleFormat.SRT
def test_from_extension_without_dot(self):
"""Should handle extension without dot."""
fmt = SubtitleFormat.from_extension("srt")
assert fmt == SubtitleFormat.SRT
def test_from_extension_case_insensitive(self):
"""Should be case insensitive."""
assert SubtitleFormat.from_extension("SRT") == SubtitleFormat.SRT
assert SubtitleFormat.from_extension(".ASS") == SubtitleFormat.ASS
class TestTimingOffsetEdgeCases:
"""Edge case tests for TimingOffset."""
def test_zero_offset(self):
"""Should accept zero offset."""
offset = TimingOffset(0)
assert offset.milliseconds == 0
def test_positive_offset(self):
"""Should accept positive offset."""
offset = TimingOffset(5000)
assert offset.milliseconds == 5000
def test_negative_offset(self):
"""Should accept negative offset."""
offset = TimingOffset(-5000)
assert offset.milliseconds == -5000
def test_very_large_offset(self):
"""Should accept very large offset."""
offset = TimingOffset(3600000) # 1 hour
assert offset.milliseconds == 3600000
class TestMovieEntityEdgeCases:
"""Edge case tests for Movie entity."""
def test_minimal_movie(self):
"""Should create movie with minimal fields."""
movie = Movie(
imdb_id=ImdbId("tt1234567"),
title=MovieTitle("Test"),
quality=Quality.UNKNOWN,
)
assert movie.imdb_id is not None
def test_full_movie(self):
"""Should create movie with all fields."""
movie = Movie(
imdb_id=ImdbId("tt1234567"),
title=MovieTitle("Test Movie"),
release_year=ReleaseYear(2024),
quality=Quality.FULL_HD,
file_path=FilePath("/movies/test.mkv"),
file_size=FileSize(1000000000),
tmdb_id=12345,
added_at=datetime.now(),
)
assert movie.tmdb_id == 12345
def test_movie_without_optional_fields(self):
"""Should handle None optional fields."""
movie = Movie(
imdb_id=ImdbId("tt1234567"),
title=MovieTitle("Test"),
release_year=None,
quality=Quality.UNKNOWN,
file_path=None,
file_size=None,
tmdb_id=None,
)
assert movie.release_year is None
assert movie.file_path is None
class TestTVShowEntityEdgeCases:
"""Edge case tests for TVShow entity."""
def test_minimal_show(self):
"""Should create show with minimal fields."""
show = TVShow(
imdb_id=ImdbId("tt1234567"),
title="Test Show",
seasons_count=1,
status=ShowStatus.UNKNOWN,
)
assert show.title == "Test Show"
def test_show_with_zero_seasons(self):
"""Should handle show with zero seasons."""
show = TVShow(
imdb_id=ImdbId("tt1234567"),
title="Upcoming Show",
seasons_count=0,
status=ShowStatus.ONGOING,
)
assert show.seasons_count == 0
def test_show_with_many_seasons(self):
"""Should handle show with many seasons."""
show = TVShow(
imdb_id=ImdbId("tt1234567"),
title="Long Running Show",
seasons_count=50,
status=ShowStatus.ONGOING,
)
assert show.seasons_count == 50
class TestSubtitleEntityEdgeCases:
"""Edge case tests for Subtitle entity."""
def test_minimal_subtitle(self):
"""Should create subtitle with minimal fields."""
subtitle = Subtitle(
media_imdb_id=ImdbId("tt1234567"),
language=Language.ENGLISH,
format=SubtitleFormat.SRT,
file_path=FilePath("/subs/test.srt"),
)
assert subtitle.language == Language.ENGLISH
def test_subtitle_for_episode(self):
"""Should create subtitle for specific episode."""
subtitle = Subtitle(
media_imdb_id=ImdbId("tt1234567"),
language=Language.ENGLISH,
format=SubtitleFormat.SRT,
file_path=FilePath("/subs/s01e01.srt"),
season_number=1,
episode_number=1,
)
assert subtitle.season_number == 1
assert subtitle.episode_number == 1
def test_subtitle_with_all_metadata(self):
"""Should create subtitle with all metadata."""
subtitle = Subtitle(
media_imdb_id=ImdbId("tt1234567"),
language=Language.ENGLISH,
format=SubtitleFormat.SRT,
file_path=FilePath("/subs/test.srt"),
timing_offset=TimingOffset(500),
hearing_impaired=True,
forced=True,
source="OpenSubtitles",
uploader="user123",
download_count=10000,
rating=9.5,
)
assert subtitle.hearing_impaired is True
assert subtitle.forced is True
assert subtitle.rating == 9.5
+284 -129
View File
@@ -1,4 +1,20 @@
"""Tests for the Memory system."""
"""Tests for the three-tier memory system.
Covers the public API of the memory subsystem:
- ``LongTermMemory`` — persistent, component-based (workspace, library_paths,
media_preferences, subtitle_preferences, library, following).
- ``ShortTermMemory`` — session-only conversation/workflow/entity state.
- ``EpisodicMemory`` — volatile event-driven state (search results, downloads,
errors, pending questions, background events).
- ``Memory`` — unified manager (load/save LTM, clear session).
- Context functions — ``init_memory`` / ``get_memory`` / ``has_memory`` /
``reset_memory``.
These tests target the current component-based LTM (no legacy ``set_config`` /
``add_to_library`` / ``follow_show`` aliases) and assert on observable
behavior, not implementation details.
"""
from datetime import datetime
@@ -16,118 +32,157 @@ from alfred.infrastructure.persistence import (
from alfred.infrastructure.persistence.context import reset_memory
def is_iso_format(s: str) -> bool:
"""Helper to check if a string is a valid ISO 8601 timestamp."""
if not isinstance(s, str):
def _is_iso_timestamp(value: str) -> bool:
"""Return True if ``value`` parses as an ISO-8601 datetime."""
if not isinstance(value, str):
return False
try:
# Attempt to parse the string as an ISO 8601 timestamp
datetime.fromisoformat(s.replace("Z", "+00:00"))
datetime.fromisoformat(value.replace("Z", "+00:00"))
return True
except (ValueError, TypeError):
return False
class TestLongTermMemory:
"""Tests for LongTermMemory."""
# ---------------------------------------------------------------------------
# LongTermMemory
# ---------------------------------------------------------------------------
def test_default_values(self):
class TestLongTermMemoryDefaults:
"""Default-state guarantees for a freshly constructed LTM."""
def test_workspace_paths_unset_by_default(self):
ltm = LongTermMemory()
assert ltm.workspace.download is None
assert ltm.workspace.torrent is None
assert ltm.workspace.trash is None
def test_library_paths_empty_by_default(self):
ltm = LongTermMemory()
assert ltm.library_paths.folders == {}
assert ltm.library_paths.get("movies") is None
def test_media_preferences_defaults(self):
ltm = LongTermMemory()
assert ltm.media_preferences.quality == "1080p"
assert "en" in ltm.media_preferences.audio_languages
assert ltm.following == []
def test_set_and_get_config(self):
def test_following_empty_by_default(self):
ltm = LongTermMemory()
ltm.set_config("download_folder", "/path/to/downloads")
assert ltm.get_config("download_folder") == "/path/to/downloads"
assert ltm.following.shows == []
def test_get_config_default(self):
def test_library_empty_by_default(self):
ltm = LongTermMemory()
assert ltm.get_config("nonexistent") is None
assert ltm.get_config("nonexistent", "default") == "default"
assert ltm.library.movies == []
assert ltm.library.tv_shows == []
def test_has_config(self):
class TestLibraryPaths:
"""LibraryPaths.set / get on the LTM component."""
def test_set_and_get_roundtrip(self):
ltm = LongTermMemory()
assert not ltm.has_config("download_folder")
ltm.set_config("download_folder", "/path")
assert ltm.has_config("download_folder")
ltm.library_paths.set("movies", "/media/movies")
assert ltm.library_paths.get("movies") == "/media/movies"
def test_has_config_none_value(self):
def test_unknown_collection_returns_none(self):
ltm = LongTermMemory()
ltm.config["key"] = None
assert not ltm.has_config("key")
assert ltm.library_paths.get("anything") is None
def test_add_to_library(self):
def test_set_overwrites_existing_value(self):
ltm = LongTermMemory()
ltm.library_paths.set("movies", "/old/path")
ltm.library_paths.set("movies", "/new/path")
assert ltm.library_paths.get("movies") == "/new/path"
class TestLibrary:
"""Library.add / get on the LTM component."""
def test_add_new_movie_is_recorded(self):
ltm = LongTermMemory()
ltm.library.add("movies", {"imdb_id": "tt1375666", "title": "Inception"})
movies = ltm.library.get("movies")
assert len(movies) == 1
assert movies[0]["title"] == "Inception"
assert _is_iso_timestamp(movies[0]["added_at"])
def test_add_is_idempotent_on_imdb_id(self):
ltm = LongTermMemory()
movie = {"imdb_id": "tt1375666", "title": "Inception"}
ltm.add_to_library("movies", movie)
assert len(ltm.library["movies"]) == 1
assert ltm.library["movies"][0]["title"] == "Inception"
assert is_iso_format(ltm.library["movies"][0].get("added_at"))
ltm.library.add("movies", movie)
ltm.library.add("movies", movie)
assert len(ltm.library.get("movies")) == 1
def test_add_to_library_no_duplicates(self):
def test_get_unknown_media_type_returns_empty_list(self):
ltm = LongTermMemory()
movie = {"imdb_id": "tt1375666", "title": "Inception"}
ltm.add_to_library("movies", movie)
ltm.add_to_library("movies", movie)
assert len(ltm.library["movies"]) == 1
assert ltm.library.get("anything") == []
def test_add_to_library_new_type(self):
def test_add_unknown_media_type_is_a_no_op(self):
ltm = LongTermMemory()
subtitle = {"imdb_id": "tt1375666", "language": "en"}
ltm.add_to_library("subtitles", subtitle)
assert "subtitles" in ltm.library
assert len(ltm.library["subtitles"]) == 1
ltm.library.add("podcasts", {"imdb_id": "x", "title": "y"})
# Nothing crashes; library state unchanged.
assert ltm.library.movies == []
assert ltm.library.tv_shows == []
def test_get_library(self):
class TestFollowing:
"""Following.add on the LTM component."""
def test_add_show_records_timestamp(self):
ltm = LongTermMemory()
ltm.add_to_library("movies", {"imdb_id": "tt1", "title": "Movie 1"})
ltm.add_to_library("movies", {"imdb_id": "tt2", "title": "Movie 2"})
movies = ltm.get_library("movies")
assert len(movies) == 2
ltm.following.add({"imdb_id": "tt0944947", "title": "Game of Thrones"})
def test_get_library_empty(self):
ltm = LongTermMemory()
assert ltm.get_library("unknown") == []
assert len(ltm.following.shows) == 1
assert ltm.following.shows[0]["title"] == "Game of Thrones"
assert _is_iso_timestamp(ltm.following.shows[0]["followed_at"])
def test_follow_show(self):
def test_add_is_idempotent_on_imdb_id(self):
ltm = LongTermMemory()
show = {"imdb_id": "tt0944947", "title": "Game of Thrones"}
ltm.follow_show(show)
assert len(ltm.following) == 1
assert ltm.following[0]["title"] == "Game of Thrones"
assert is_iso_format(ltm.following[0].get("followed_at"))
ltm.following.add(show)
ltm.following.add(show)
assert len(ltm.following.shows) == 1
def test_follow_show_no_duplicates(self):
class TestLongTermMemorySerialization:
"""to_dict / from_dict roundtrip and legacy migration."""
def test_roundtrip_preserves_state(self):
ltm = LongTermMemory()
show = {"imdb_id": "tt0944947", "title": "Game of Thrones"}
ltm.follow_show(show)
ltm.follow_show(show)
assert len(ltm.following) == 1
ltm.workspace.download = "/downloads"
ltm.library_paths.set("movies", "/media/movies")
ltm.library.add("movies", {"imdb_id": "tt1", "title": "Movie"})
ltm.following.add({"imdb_id": "tt2", "title": "Show"})
def test_to_dict(self):
ltm = LongTermMemory()
ltm.set_config("key", "value")
data = ltm.to_dict()
assert "config" in data
assert data["config"]["key"] == "value"
restored = LongTermMemory.from_dict(ltm.to_dict())
def test_from_dict(self):
data = {
"config": {"download_folder": "/downloads"},
"preferences": {"preferred_quality": "4K"},
"library": {"movies": [{"imdb_id": "tt1", "title": "Test"}]},
"following": [],
}
ltm = LongTermMemory.from_dict(data)
assert ltm.get_config("download_folder") == "/downloads"
assert ltm.media_preferences.quality == "4K"
assert len(ltm.library["movies"]) == 1
assert restored.workspace.download == "/downloads"
assert restored.library_paths.get("movies") == "/media/movies"
assert restored.library.get("movies")[0]["title"] == "Movie"
assert restored.following.shows[0]["title"] == "Show"
def test_from_dict_handles_empty_dict(self):
ltm = LongTermMemory.from_dict({})
assert ltm.workspace.download is None
assert ltm.library_paths.folders == {}
def test_from_dict_migrates_legacy_flat_workspace_keys(self):
"""Legacy snapshots had ``download_folder`` / ``torrent_folder`` at root."""
legacy = {"download_folder": "/dl", "torrent_folder": "/tt"}
ltm = LongTermMemory.from_dict(legacy)
assert ltm.workspace.download == "/dl"
assert ltm.workspace.torrent == "/tt"
# ---------------------------------------------------------------------------
# ShortTermMemory
# ---------------------------------------------------------------------------
class TestShortTermMemory:
"""Tests for ShortTermMemory."""
"""Conversation, workflow, entity, and language state."""
def test_default_values(self):
stm = ShortTermMemory()
@@ -137,102 +192,202 @@ class TestShortTermMemory:
assert stm.current_topic is None
assert stm.language == "en"
def test_add_message(self):
def test_add_message_records_timestamp(self):
stm = ShortTermMemory()
stm.add_message("user", "Hello")
assert len(stm.conversation_history) == 1
assert is_iso_format(stm.conversation_history[0].get("timestamp"))
history = stm.conversation_history
assert len(history) == 1
assert history[0]["role"] == "user"
assert history[0]["content"] == "Hello"
assert _is_iso_timestamp(history[0]["timestamp"])
def test_add_message_max_history(self):
stm = ShortTermMemory(max_history=5)
def test_get_recent_history_caps_at_n(self):
stm = ShortTermMemory()
for i in range(10):
stm.add_message("user", f"Message {i}")
assert len(stm.conversation_history) == 5
assert stm.conversation_history[0]["content"] == "Message 5"
assert len(stm.get_recent_history(3)) == 3
def test_language_management(self):
def test_set_language_overrides_default(self):
stm = ShortTermMemory()
assert stm.language == "en"
stm.set_language("fr")
assert stm.language == "fr"
stm.clear()
assert stm.language == "en"
def test_clear(self):
def test_clear_resets_volatile_state(self):
stm = ShortTermMemory()
stm.add_message("user", "Hello")
stm.set_language("fr")
stm.set_entity("title", "Inception")
stm.clear()
assert stm.conversation_history == []
assert stm.extracted_entities == {}
# Language is volatile session-state too; clear() resets it.
assert stm.language == "en"
def test_entity_set_get_roundtrip(self):
stm = ShortTermMemory()
stm.set_entity("title", "Inception")
assert stm.get_entity("title") == "Inception"
assert stm.get_entity("missing") is None
assert stm.get_entity("missing", "fallback") == "fallback"
class TestEpisodicMemory:
"""Tests for EpisodicMemory."""
def test_workflow_lifecycle(self):
stm = ShortTermMemory()
assert stm.current_workflow is None
def test_add_error(self):
episodic = EpisodicMemory()
episodic.add_error("find_torrent", "API timeout")
assert len(episodic.recent_errors) == 1
assert is_iso_format(episodic.recent_errors[0].get("timestamp"))
stm.start_workflow("organize_media", {"release_name": "X"})
assert stm.current_workflow is not None
assert stm.current_workflow["name"] == "organize_media"
assert stm.current_workflow["params"] == {"release_name": "X"}
def test_add_error_max_limit(self):
episodic = EpisodicMemory(max_errors=3)
for i in range(5):
episodic.add_error("action", f"Error {i}")
assert len(episodic.recent_errors) == 3
error_messages = [e["error"] for e in episodic.recent_errors]
assert error_messages == ["Error 2", "Error 3", "Error 4"]
stm.update_workflow_stage("moving")
assert stm.current_workflow["stage"] == "moving"
def test_store_search_results(self):
episodic = EpisodicMemory()
episodic.store_search_results("test query", [])
assert is_iso_format(episodic.last_search_results.get("timestamp"))
def test_get_result_by_index(self):
episodic = EpisodicMemory()
results = [{"name": "Result 1"}, {"name": "Result 2"}]
episodic.store_search_results("query", results)
result = episodic.get_result_by_index(2)
assert result is not None
assert result["name"] == "Result 2"
stm.end_workflow()
assert stm.current_workflow is None
class TestMemory:
"""Tests for the Memory manager."""
# ---------------------------------------------------------------------------
# EpisodicMemory
# ---------------------------------------------------------------------------
def test_init_creates_directories(self, temp_dir):
class TestEpisodicMemorySearchResults:
"""Search-result storage and 1-based index retrieval."""
def test_store_records_timestamp_and_query(self):
ep = EpisodicMemory()
ep.store_search_results("Inception", [{"name": "r1"}])
last = ep.last_search_results
assert last["query"] == "Inception"
assert _is_iso_timestamp(last["timestamp"])
def test_get_result_by_index_is_one_based(self):
ep = EpisodicMemory()
ep.store_search_results("q", [{"name": "first"}, {"name": "second"}])
assert ep.get_result_by_index(1)["name"] == "first"
assert ep.get_result_by_index(2)["name"] == "second"
def test_get_result_by_out_of_range_index_returns_none(self):
ep = EpisodicMemory()
ep.store_search_results("q", [{"name": "only"}])
assert ep.get_result_by_index(0) is None
assert ep.get_result_by_index(99) is None
def test_get_result_by_index_with_no_search_returns_none(self):
assert EpisodicMemory().get_result_by_index(1) is None
class TestEpisodicMemoryErrors:
"""Recent error log with capped retention."""
def test_add_error_records_timestamp(self):
ep = EpisodicMemory()
ep.add_error("find_torrent", "API timeout")
errors = ep.recent_errors
assert len(errors) == 1
assert errors[0]["action"] == "find_torrent"
assert errors[0]["error"] == "API timeout"
assert _is_iso_timestamp(errors[0]["timestamp"])
def test_recent_errors_keep_latest_only(self):
"""When more errors are added than the limit, the oldest are dropped."""
ep = EpisodicMemory()
for i in range(60): # well over any sane retention
ep.add_error("action", f"Error {i}")
errors = ep.recent_errors
# Whatever the cap, the latest entry must always survive.
assert errors[-1]["error"] == "Error 59"
class TestEpisodicMemoryDownloads:
"""Active download tracking."""
def test_complete_download_moves_record_out(self):
ep = EpisodicMemory()
ep.add_active_download({"task_id": "t1", "name": "X"})
completed = ep.complete_download("t1", "/library/X.mkv")
assert completed is not None
assert completed["file_path"] == "/library/X.mkv"
assert ep.get_active_downloads() == []
def test_complete_unknown_download_returns_none(self):
ep = EpisodicMemory()
assert ep.complete_download("missing", "/x") is None
class TestEpisodicMemoryPendingQuestion:
"""Single-slot pending question."""
def test_set_and_resolve(self):
ep = EpisodicMemory()
ep.set_pending_question(
question="Which one?",
options=[
{"index": 1, "label": "A"},
{"index": 2, "label": "B"},
],
context={},
)
assert ep.get_pending_question() is not None
resolved = ep.resolve_pending_question(answer_index=1)
assert resolved == {"index": 1, "label": "A"}
assert ep.get_pending_question() is None
def test_resolve_without_pending_question_returns_none(self):
assert EpisodicMemory().resolve_pending_question(answer_index=1) is None
# ---------------------------------------------------------------------------
# Memory manager
# ---------------------------------------------------------------------------
class TestMemoryManager:
"""Memory orchestrator — disk I/O and session reset."""
def test_init_creates_storage_directory(self, temp_dir):
storage = temp_dir / "memory_data"
Memory(storage_dir=str(storage))
assert storage.exists()
def test_save_and_load_ltm(self, temp_dir):
storage = str(temp_dir)
memory = Memory(storage_dir=storage)
memory.ltm.set_config("test_key", "test_value")
def test_save_persists_ltm_across_instances(self, temp_dir):
memory = Memory(storage_dir=str(temp_dir))
memory.ltm.workspace.download = "/dl"
memory.ltm.library_paths.set("movies", "/media/movies")
memory.save()
new_memory = Memory(storage_dir=storage)
assert new_memory.ltm.get_config("test_key") == "test_value"
def test_clear_session(self, memory):
memory.ltm.set_config("key", "value")
reloaded = Memory(storage_dir=str(temp_dir))
assert reloaded.ltm.workspace.download == "/dl"
assert reloaded.ltm.library_paths.get("movies") == "/media/movies"
def test_clear_session_preserves_ltm(self, memory):
memory.ltm.library_paths.set("movies", "/media/movies")
memory.stm.add_message("user", "Hello")
memory.episodic.add_error("action", "error")
memory.episodic.add_error("action", "boom")
memory.clear_session()
assert memory.ltm.get_config("key") == "value"
assert memory.ltm.library_paths.get("movies") == "/media/movies"
assert memory.stm.conversation_history == []
assert memory.episodic.recent_errors == []
class TestMemoryContext:
"""Tests for memory context functions."""
# ---------------------------------------------------------------------------
# Global memory singleton
# ---------------------------------------------------------------------------
def test_get_memory_not_initialized(self):
class TestMemoryContext:
"""Global ``init_memory`` / ``get_memory`` / ``has_memory`` accessors."""
def test_get_memory_without_init_raises(self):
reset_memory()
with pytest.raises(RuntimeError, match="Memory not initialized"):
get_memory()
def test_init_memory(self, temp_dir):
def test_init_memory_then_get_memory_returns_same_instance(self, temp_dir):
reset_memory()
memory = init_memory(str(temp_dir))
assert has_memory()
-543
View File
@@ -1,543 +0,0 @@
"""Edge case tests for the Memory system."""
import json
import os
import pytest
from alfred.infrastructure.persistence import (
EpisodicMemory,
LongTermMemory,
Memory,
ShortTermMemory,
get_memory,
init_memory,
set_memory,
)
from alfred.infrastructure.persistence.context import _memory_ctx
class TestLongTermMemoryEdgeCases:
"""Edge case tests for LongTermMemory."""
def test_config_with_none_value(self):
"""Should handle None values in config."""
ltm = LongTermMemory()
ltm.set_config("key", None)
assert ltm.get_config("key") is None
assert not ltm.has_config("key")
def test_config_with_empty_string(self):
"""Should handle empty string values."""
ltm = LongTermMemory()
ltm.set_config("key", "")
assert ltm.get_config("key") == ""
assert ltm.has_config("key") # Empty string is still a value
def test_config_with_complex_types(self):
"""Should handle complex types in config."""
ltm = LongTermMemory()
ltm.set_config("list", [1, 2, 3])
ltm.set_config("dict", {"nested": {"deep": "value"}})
ltm.set_config("bool", False)
ltm.set_config("int", 0)
assert ltm.get_config("list") == [1, 2, 3]
assert ltm.get_config("dict")["nested"]["deep"] == "value"
assert ltm.get_config("bool") is False
assert ltm.get_config("int") == 0
def test_library_with_missing_imdb_id(self):
"""Should handle media without imdb_id."""
ltm = LongTermMemory()
media = {"title": "No ID Movie"}
ltm.add_to_library("movies", media)
# Should still add (imdb_id will be None)
assert len(ltm.library["movies"]) == 1
def test_library_duplicate_check_with_none_id(self):
"""Should handle duplicate check when imdb_id is None."""
ltm = LongTermMemory()
media1 = {"title": "Movie 1"}
media2 = {"title": "Movie 2"}
ltm.add_to_library("movies", media1)
ltm.add_to_library("movies", media2)
# May dedupe or not depending on implementation
assert len(ltm.library["movies"]) >= 1
def test_from_dict_with_extra_keys(self):
"""Should ignore extra keys in dict."""
data = {
"config": {},
"preferences": {},
"library": {"movies": []},
"following": [],
"extra_key": "should be ignored",
"another_extra": [1, 2, 3],
}
ltm = LongTermMemory.from_dict(data)
assert not hasattr(ltm, "extra_key")
def test_from_dict_with_wrong_types(self):
"""Should handle wrong types gracefully."""
data = {
"config": "not a dict", # Should be dict
"preferences": [], # Should be dict
"library": "wrong", # Should be dict
"following": {}, # Should be list
}
# Should not crash, but behavior may vary
try:
ltm = LongTermMemory.from_dict(data)
# If it doesn't crash, check it has some defaults
assert ltm is not None
except (TypeError, AttributeError):
# This is also acceptable behavior
pass
def test_to_dict_preserves_unicode(self):
"""Should preserve unicode in serialization."""
ltm = LongTermMemory()
ltm.set_config("japanese", "日本語")
ltm.set_config("emoji", "🎬🎥")
ltm.add_to_library("movies", {"title": "Amélie", "imdb_id": "tt1"})
data = ltm.to_dict()
assert data["config"]["japanese"] == "日本語"
assert data["config"]["emoji"] == "🎬🎥"
assert data["library"]["movies"][0]["title"] == "Amélie"
class TestShortTermMemoryEdgeCases:
"""Edge case tests for ShortTermMemory."""
def test_add_message_with_empty_content(self):
"""Should handle empty message content."""
stm = ShortTermMemory()
stm.add_message("user", "")
assert len(stm.conversation_history) == 1
assert stm.conversation_history[0]["content"] == ""
def test_add_message_with_very_long_content(self):
"""Should handle very long messages."""
stm = ShortTermMemory()
long_content = "x" * 100000
stm.add_message("user", long_content)
assert len(stm.conversation_history[0]["content"]) == 100000
def test_add_message_with_special_characters(self):
"""Should handle special characters."""
stm = ShortTermMemory()
special = "Line1\nLine2\tTab\r\nWindows\x00Null"
stm.add_message("user", special)
assert stm.conversation_history[0]["content"] == special
def test_max_history_zero(self):
"""Should handle max_history of 0."""
stm = ShortTermMemory()
stm.max_history = 0
stm.add_message("user", "Hello")
# Behavior: either empty or keeps last message
assert len(stm.conversation_history) <= 1
def test_max_history_one(self):
"""Should handle max_history of 1."""
stm = ShortTermMemory()
stm.max_history = 1
stm.add_message("user", "First")
stm.add_message("user", "Second")
assert len(stm.conversation_history) == 1
assert stm.conversation_history[0]["content"] == "Second"
def test_get_recent_history_zero(self):
"""Should handle n=0."""
stm = ShortTermMemory()
stm.add_message("user", "Hello")
recent = stm.get_recent_history(0)
# May return empty or all messages depending on implementation
assert isinstance(recent, list)
def test_get_recent_history_negative(self):
"""Should handle negative n."""
stm = ShortTermMemory()
stm.add_message("user", "Hello")
recent = stm.get_recent_history(-1)
# Python slicing with negative returns empty or last element
assert isinstance(recent, list)
def test_workflow_with_empty_target(self):
"""Should handle empty workflow target."""
stm = ShortTermMemory()
stm.start_workflow("download", {})
assert stm.current_workflow["target"] == {}
def test_workflow_with_none_target(self):
"""Should handle None workflow target."""
stm = ShortTermMemory()
stm.start_workflow("download", None)
assert stm.current_workflow["target"] is None
def test_entity_with_none_value(self):
"""Should store None as entity value."""
stm = ShortTermMemory()
stm.set_entity("key", None)
assert stm.get_entity("key") is None
assert "key" in stm.extracted_entities
def test_entity_overwrite(self):
"""Should overwrite existing entity."""
stm = ShortTermMemory()
stm.set_entity("key", "value1")
stm.set_entity("key", "value2")
assert stm.get_entity("key") == "value2"
def test_topic_with_empty_string(self):
"""Should handle empty topic."""
stm = ShortTermMemory()
stm.set_topic("")
assert stm.current_topic == ""
class TestEpisodicMemoryEdgeCases:
"""Edge case tests for EpisodicMemory."""
def test_store_empty_results(self):
"""Should handle empty results list."""
episodic = EpisodicMemory()
episodic.store_search_results("query", [])
assert episodic.last_search_results is not None
assert episodic.last_search_results["results"] == []
def test_store_results_with_none_values(self):
"""Should handle results with None values."""
episodic = EpisodicMemory()
results = [
{"name": None, "seeders": None},
{"name": "Valid", "seeders": 100},
]
episodic.store_search_results("query", results)
assert len(episodic.last_search_results["results"]) == 2
def test_get_result_by_index_after_clear(self):
"""Should return None after clearing results."""
episodic = EpisodicMemory()
episodic.store_search_results("query", [{"name": "Test"}])
episodic.clear_search_results()
result = episodic.get_result_by_index(1)
assert result is None
def test_get_result_by_very_large_index(self):
"""Should handle very large index."""
episodic = EpisodicMemory()
episodic.store_search_results("query", [{"name": "Test"}])
result = episodic.get_result_by_index(999999999)
assert result is None
def test_download_with_missing_fields(self):
"""Should handle download with missing fields."""
episodic = EpisodicMemory()
episodic.add_active_download({}) # Empty dict
assert len(episodic.active_downloads) == 1
assert "started_at" in episodic.active_downloads[0]
def test_update_nonexistent_download(self):
"""Should not crash when updating nonexistent download."""
episodic = EpisodicMemory()
# Should not raise
episodic.update_download_progress("nonexistent", 50)
assert episodic.active_downloads == []
def test_complete_nonexistent_download(self):
"""Should return None for nonexistent download."""
episodic = EpisodicMemory()
result = episodic.complete_download("nonexistent", "/path")
assert result is None
def test_error_with_empty_context(self):
"""Should handle error with None context."""
episodic = EpisodicMemory()
episodic.add_error("action", "error", None)
assert episodic.recent_errors[0]["context"] == {}
def test_error_with_very_long_message(self):
"""Should handle very long error messages."""
episodic = EpisodicMemory()
long_error = "x" * 10000
episodic.add_error("action", long_error)
assert len(episodic.recent_errors[0]["error"]) == 10000
def test_pending_question_with_empty_options(self):
"""Should handle question with no options."""
episodic = EpisodicMemory()
episodic.set_pending_question("Question?", [], {})
assert episodic.pending_question["options"] == []
def test_resolve_question_invalid_index(self):
"""Should return None for invalid answer index."""
episodic = EpisodicMemory()
episodic.set_pending_question(
"Question?",
[{"index": 1, "label": "Option"}],
{},
)
result = episodic.resolve_pending_question(999)
assert result is None
assert episodic.pending_question is None # Still cleared
def test_resolve_question_when_none(self):
"""Should handle resolving when no question pending."""
episodic = EpisodicMemory()
result = episodic.resolve_pending_question(1)
assert result is None
def test_background_event_with_empty_data(self):
"""Should handle event with empty data."""
episodic = EpisodicMemory()
episodic.add_background_event("event", {})
assert episodic.background_events[0]["data"] == {}
def test_get_unread_events_multiple_calls(self):
"""Should return empty on second call."""
episodic = EpisodicMemory()
episodic.add_background_event("event", {})
first = episodic.get_unread_events()
second = episodic.get_unread_events()
assert len(first) == 1
assert len(second) == 0
def test_max_errors_boundary(self):
"""Should keep exactly max_errors."""
episodic = EpisodicMemory()
episodic.max_errors = 3
for i in range(3):
episodic.add_error("action", f"Error {i}")
assert len(episodic.recent_errors) == 3
episodic.add_error("action", "Error 3")
assert len(episodic.recent_errors) == 3
assert episodic.recent_errors[0]["error"] == "Error 1"
def test_max_events_boundary(self):
"""Should keep exactly max_events."""
episodic = EpisodicMemory()
episodic.max_events = 3
for i in range(5):
episodic.add_background_event("event", {"i": i})
assert len(episodic.background_events) == 3
assert episodic.background_events[0]["data"]["i"] == 2
class TestMemoryEdgeCases:
"""Edge case tests for Memory manager."""
def test_init_with_nonexistent_directory(self, temp_dir):
"""Should create directory if not exists."""
new_dir = temp_dir / "new" / "nested" / "dir"
# Don't create the directory - let Memory do it
Memory(storage_dir=str(new_dir))
assert new_dir.exists()
def test_init_with_readonly_directory(self, temp_dir):
"""Should handle readonly directory gracefully."""
readonly_dir = temp_dir / "readonly"
readonly_dir.mkdir()
# Make readonly (may not work on all systems)
try:
os.chmod(readonly_dir, 0o444)
# This might raise or might work depending on OS
Memory(storage_dir=str(readonly_dir))
except (PermissionError, OSError):
pass # Expected on some systems
finally:
os.chmod(readonly_dir, 0o755)
def test_load_ltm_with_empty_file(self, temp_dir):
"""Should handle empty LTM file."""
ltm_file = temp_dir / "ltm.json"
ltm_file.write_text("")
memory = Memory(storage_dir=str(temp_dir))
# Should use defaults
assert memory.ltm.config == {}
def test_load_ltm_with_partial_data(self, temp_dir):
"""Should handle partial LTM data."""
ltm_file = temp_dir / "ltm.json"
ltm_file.write_text('{"config": {"key": "value"}}')
memory = Memory(storage_dir=str(temp_dir))
assert memory.ltm.get_config("key") == "value"
# Other fields should have defaults
assert memory.ltm.library == {"movies": [], "tv_shows": []}
def test_save_with_unicode(self, temp_dir):
"""Should save unicode correctly."""
memory = Memory(storage_dir=str(temp_dir))
memory.ltm.set_config("japanese", "日本語テスト")
memory.save()
# Read back and verify
ltm_file = temp_dir / "ltm.json"
data = json.loads(ltm_file.read_text(encoding="utf-8"))
assert data["config"]["japanese"] == "日本語テスト"
def test_save_preserves_formatting(self, temp_dir):
"""Should save with readable formatting."""
memory = Memory(storage_dir=str(temp_dir))
memory.ltm.set_config("key", "value")
memory.save()
ltm_file = temp_dir / "ltm.json"
content = ltm_file.read_text()
# Should be indented (pretty printed)
assert "\n" in content
def test_concurrent_access_simulation(self, temp_dir):
"""Should handle rapid save/load cycles."""
memory = Memory(storage_dir=str(temp_dir))
for i in range(100):
memory.ltm.set_config(f"key_{i}", f"value_{i}")
memory.save()
# Reload and verify
memory2 = Memory(storage_dir=str(temp_dir))
assert memory2.ltm.get_config("key_99") == "value_99"
def test_clear_session_preserves_ltm(self, temp_dir):
"""Should preserve LTM after clear_session."""
memory = Memory(storage_dir=str(temp_dir))
memory.ltm.set_config("important", "data")
memory.stm.add_message("user", "Hello")
memory.episodic.store_search_results("query", [{}])
memory.clear_session()
assert memory.ltm.get_config("important") == "data"
assert memory.stm.conversation_history == []
assert memory.episodic.last_search_results is None
def test_get_context_for_prompt_empty(self, temp_dir):
"""Should handle empty memory state."""
memory = Memory(storage_dir=str(temp_dir))
context = memory.get_context_for_prompt()
assert context["config"] == {}
assert context["last_search"]["query"] is None
assert context["last_search"]["result_count"] == 0
def test_get_full_state_serializable(self, temp_dir):
"""Should return JSON-serializable state."""
memory = Memory(storage_dir=str(temp_dir))
memory.ltm.set_config("key", "value")
memory.stm.add_message("user", "Hello")
memory.episodic.store_search_results("query", [{"name": "Test"}])
state = memory.get_full_state()
# Should be JSON serializable
json_str = json.dumps(state)
assert json_str is not None
class TestMemoryContextEdgeCases:
"""Edge case tests for memory context."""
def test_multiple_init_calls(self, temp_dir):
"""Should handle multiple init calls."""
_memory_ctx.set(None)
init_memory(str(temp_dir))
mem2 = init_memory(str(temp_dir))
# Second call should replace first
assert get_memory() is mem2
def test_set_memory_with_none(self):
"""Should handle setting None."""
_memory_ctx.set(None)
set_memory(None)
with pytest.raises(RuntimeError):
get_memory()
def test_context_isolation(self, temp_dir):
"""Context should be isolated per context."""
from contextvars import copy_context
_memory_ctx.set(None)
mem1 = init_memory(str(temp_dir))
# Create a copy of context
ctx = copy_context()
# In the copy, memory should still be set
def check_memory():
return get_memory()
result = ctx.run(check_memory)
assert result is mem1
-299
View File
@@ -1,299 +0,0 @@
"""Tests for PromptBuilder."""
from alfred.agent.prompts import PromptBuilder
from alfred.agent.registry import make_tools
from alfred.settings import settings
class TestPromptBuilder:
"""Tests for PromptBuilder."""
def test_init(self, memory):
"""Should initialize with tools."""
tools = make_tools(settings)
builder = PromptBuilder(tools)
assert builder.tools is tools
def test_build_system_prompt(self, memory):
"""Should build a complete system prompt."""
tools = make_tools(settings)
builder = PromptBuilder(tools)
prompt = builder.build_system_prompt()
assert "AI assistant" in prompt
assert "media library" in prompt
assert "AVAILABLE TOOLS" in prompt
def test_includes_tools(self, memory):
"""Should include all tool descriptions."""
tools = make_tools(settings)
builder = PromptBuilder(tools)
prompt = builder.build_system_prompt()
for tool_name in tools.keys():
assert tool_name in prompt
def test_includes_config(self, memory):
"""Should include current configuration."""
memory.ltm.download_folder = "/path/to/downloads"
tools = make_tools(settings)
builder = PromptBuilder(tools)
prompt = builder.build_system_prompt()
assert "/path/to/downloads" in prompt
def test_includes_search_results(self, memory_with_search_results):
"""Should include search results summary."""
tools = make_tools(settings)
builder = PromptBuilder(tools)
prompt = builder.build_system_prompt()
assert "LAST SEARCH" in prompt
assert "Inception 1080p" in prompt
assert "3 results" in prompt or "results available" in prompt
def test_includes_search_result_names(self, memory_with_search_results):
"""Should include search result names."""
tools = make_tools(settings)
builder = PromptBuilder(tools)
prompt = builder.build_system_prompt()
assert "Inception.2010.1080p.BluRay.x264" in prompt
def test_includes_active_downloads(self, memory):
"""Should include active downloads."""
memory.episodic.add_active_download(
{
"task_id": "123",
"name": "Test.Movie.mkv",
"progress": 50,
}
)
tools = make_tools(settings)
builder = PromptBuilder(tools)
prompt = builder.build_system_prompt()
assert "ACTIVE DOWNLOADS" in prompt
assert "Test.Movie.mkv" in prompt
def test_includes_pending_question(self, memory):
"""Should include pending question."""
memory.episodic.set_pending_question(
"Which torrent?",
[{"index": 1, "label": "Option 1"}, {"index": 2, "label": "Option 2"}],
{},
)
tools = make_tools(settings)
builder = PromptBuilder(tools)
prompt = builder.build_system_prompt()
assert "PENDING QUESTION" in prompt
assert "Which torrent?" in prompt
def test_includes_last_error(self, memory):
"""Should include last error."""
memory.episodic.add_error("find_torrent", "API timeout")
tools = make_tools(settings)
builder = PromptBuilder(tools)
prompt = builder.build_system_prompt()
assert "RECENT ERRORS" in prompt
assert "API timeout" in prompt
def test_includes_workflow(self, memory):
"""Should include current workflow."""
memory.stm.start_workflow("download", {"title": "Inception"})
tools = make_tools(settings)
builder = PromptBuilder(tools)
prompt = builder.build_system_prompt()
assert "CURRENT WORKFLOW" in prompt
assert "download" in prompt
def test_includes_topic(self, memory):
"""Should include current topic."""
memory.stm.set_topic("selecting_torrent")
tools = make_tools(settings)
builder = PromptBuilder(tools)
prompt = builder.build_system_prompt()
assert "CURRENT TOPIC" in prompt
assert "selecting_torrent" in prompt
def test_includes_entities(self, memory):
"""Should include extracted entities."""
memory.stm.set_entity("movie_title", "Inception")
memory.stm.set_entity("year", 2010)
tools = make_tools(settings)
builder = PromptBuilder(tools)
prompt = builder.build_system_prompt()
assert "EXTRACTED ENTITIES" in prompt
assert "Inception" in prompt
def test_includes_rules(self, memory):
"""Should include important rules."""
tools = make_tools(settings)
builder = PromptBuilder(tools)
prompt = builder.build_system_prompt()
assert "IMPORTANT RULES" in prompt
assert "add_torrent_by_index" in prompt
def test_includes_examples(self, memory):
"""Should include usage examples."""
tools = make_tools(settings)
builder = PromptBuilder(tools)
prompt = builder.build_system_prompt()
assert "EXAMPLES" in prompt
assert "download the 3rd one" in prompt or "torrent number" in prompt
def test_empty_context(self, memory):
"""Should handle empty context gracefully."""
tools = make_tools(settings)
builder = PromptBuilder(tools)
prompt = builder.build_system_prompt()
# Should not crash and should have basic structure
assert "AVAILABLE TOOLS" in prompt
assert "CURRENT CONFIGURATION" in prompt
def test_limits_search_results_display(self, memory):
"""Should limit displayed search results."""
# Add many results
results = [{"name": f"Torrent {i}", "seeders": i} for i in range(20)]
memory.episodic.store_search_results("test", results)
tools = make_tools(settings)
builder = PromptBuilder(tools)
prompt = builder.build_system_prompt()
# Should show first 5 and indicate more
assert "Torrent 0" in prompt or "1." in prompt
assert "... and" in prompt or "more" in prompt
# REMOVED: test_json_format_in_prompt
# We removed the "action" format from prompts as it was confusing the LLM
# The LLM now uses native OpenAI tool calling format
class TestFormatToolsDescription:
"""Tests for _format_tools_description method."""
def test_format_all_tools(self, memory):
"""Should format all tools."""
tools = make_tools(settings)
builder = PromptBuilder(tools)
desc = builder._format_tools_description()
for tool in tools.values():
assert tool.name in desc
assert tool.description in desc
def test_includes_parameters(self, memory):
"""Should include parameter schemas."""
tools = make_tools(settings)
builder = PromptBuilder(tools)
desc = builder._format_tools_description()
assert "Parameters:" in desc
assert '"type"' in desc
class TestFormatEpisodicContext:
"""Tests for _format_episodic_context method."""
def test_empty_episodic(self, memory):
"""Should return empty string for empty episodic."""
tools = make_tools(settings)
builder = PromptBuilder(tools)
context = builder._format_episodic_context(memory)
assert context == ""
def test_with_search_results(self, memory_with_search_results):
"""Should format search results."""
tools = make_tools(settings)
builder = PromptBuilder(tools)
context = builder._format_episodic_context(memory_with_search_results)
assert "LAST SEARCH" in context
assert "Inception 1080p" in context
def test_with_multiple_sections(self, memory):
"""Should format multiple sections."""
memory.episodic.store_search_results("test", [{"name": "Result"}])
memory.episodic.add_active_download({"task_id": "1", "name": "Download"})
memory.episodic.add_error("action", "error")
tools = make_tools(settings)
builder = PromptBuilder(tools)
context = builder._format_episodic_context(memory)
assert "LAST SEARCH" in context
assert "ACTIVE DOWNLOADS" in context
assert "RECENT ERRORS" in context
class TestFormatStmContext:
"""Tests for _format_stm_context method."""
def test_empty_stm(self, memory):
"""Should return language info even for empty STM."""
tools = make_tools(settings)
builder = PromptBuilder(tools)
context = builder._format_stm_context(memory)
# Should at least show language
assert "CONVERSATION LANGUAGE" in context or context == ""
def test_with_workflow(self, memory):
"""Should format workflow."""
memory.stm.start_workflow("download", {"title": "Test"})
tools = make_tools(settings)
builder = PromptBuilder(tools)
context = builder._format_stm_context(memory)
assert "CURRENT WORKFLOW" in context
assert "download" in context
def test_with_all_sections(self, memory):
"""Should format all STM sections."""
memory.stm.start_workflow("download", {"title": "Test"})
memory.stm.set_topic("searching")
memory.stm.set_entity("key", "value")
tools = make_tools(settings)
builder = PromptBuilder(tools)
context = builder._format_stm_context(memory)
assert "CURRENT WORKFLOW" in context
assert "CURRENT TOPIC" in context
assert "EXTRACTED ENTITIES" in context
-283
View File
@@ -1,283 +0,0 @@
"""Critical tests for prompt builder - Tests that would have caught bugs."""
from alfred.agent.prompts import PromptBuilder
from alfred.agent.registry import make_tools
from alfred.settings import settings
class TestPromptBuilderToolsInjection:
"""Critical tests for tools injection in prompts."""
def test_system_prompt_includes_all_tools(self, memory):
"""CRITICAL: Verify all tools are mentioned in system prompt."""
tools = make_tools(settings)
builder = PromptBuilder(tools)
prompt = builder.build_system_prompt()
# Verify each tool is mentioned
for tool_name in tools.keys():
assert tool_name in prompt, (
f"Tool {tool_name} not mentioned in system prompt"
)
def test_tools_spec_contains_all_registered_tools(self, memory):
"""CRITICAL: Verify build_tools_spec() returns all tools."""
tools = make_tools(settings)
builder = PromptBuilder(tools)
specs = builder.build_tools_spec()
spec_names = {spec["function"]["name"] for spec in specs}
tool_names = set(tools.keys())
assert spec_names == tool_names, f"Missing tools: {tool_names - spec_names}"
def test_tools_spec_is_not_empty(self, memory):
"""CRITICAL: Verify tools spec is never empty."""
tools = make_tools(settings)
builder = PromptBuilder(tools)
specs = builder.build_tools_spec()
assert len(specs) > 0, "Tools spec is empty!"
def test_tools_spec_format_matches_openai(self, memory):
"""CRITICAL: Verify tools spec format is OpenAI-compatible."""
tools = make_tools(settings)
builder = PromptBuilder(tools)
specs = builder.build_tools_spec()
for spec in specs:
assert "type" in spec
assert spec["type"] == "function"
assert "function" in spec
assert "name" in spec["function"]
assert "description" in spec["function"]
assert "parameters" in spec["function"]
class TestPromptBuilderMemoryContext:
"""Tests for memory context injection in prompts."""
def test_prompt_includes_current_topic(self, memory):
"""Verify current topic is included in prompt."""
tools = make_tools(settings)
builder = PromptBuilder(tools)
memory.stm.set_topic("test_topic")
prompt = builder.build_system_prompt()
assert "test_topic" in prompt
def test_prompt_includes_extracted_entities(self, memory):
"""Verify extracted entities are included in prompt."""
tools = make_tools(settings)
builder = PromptBuilder(tools)
memory.stm.set_entity("test_key", "test_value")
prompt = builder.build_system_prompt()
assert "test_key" in prompt
def test_prompt_includes_search_results(self, memory_with_search_results):
"""Verify search results are included in prompt."""
tools = make_tools(settings)
builder = PromptBuilder(tools)
prompt = builder.build_system_prompt()
assert "Inception" in prompt
assert "LAST SEARCH" in prompt
def test_prompt_includes_active_downloads(self, memory):
"""Verify active downloads are included in prompt."""
tools = make_tools(settings)
builder = PromptBuilder(tools)
memory.episodic.add_active_download(
{"task_id": "123", "name": "Test Movie", "progress": 50}
)
prompt = builder.build_system_prompt()
assert "ACTIVE DOWNLOADS" in prompt
assert "Test Movie" in prompt
def test_prompt_includes_recent_errors(self, memory):
"""Verify recent errors are included in prompt."""
tools = make_tools(settings)
builder = PromptBuilder(tools)
memory.episodic.add_error("test_action", "test error message")
prompt = builder.build_system_prompt()
assert "RECENT ERRORS" in prompt or "error" in prompt.lower()
def test_prompt_includes_configuration(self, memory):
"""Verify configuration is included in prompt."""
tools = make_tools(settings)
builder = PromptBuilder(tools)
memory.ltm.download_folder = "/test/downloads"
prompt = builder.build_system_prompt()
assert "CONFIGURATION" in prompt or "download_folder" in prompt
def test_prompt_includes_language(self, memory):
"""Verify language is included in prompt."""
tools = make_tools(settings)
builder = PromptBuilder(tools)
memory.stm.set_language("fr")
prompt = builder.build_system_prompt()
assert "fr" in prompt or "LANGUAGE" in prompt
class TestPromptBuilderStructure:
"""Tests for prompt structure and completeness."""
def test_system_prompt_is_not_empty(self, memory):
"""Verify system prompt is never empty."""
tools = make_tools(settings)
builder = PromptBuilder(tools)
prompt = builder.build_system_prompt()
assert len(prompt) > 0
assert prompt.strip() != ""
def test_system_prompt_includes_base_instruction(self, memory):
"""Verify system prompt includes base instruction."""
tools = make_tools(settings)
builder = PromptBuilder(tools)
prompt = builder.build_system_prompt()
assert "assistant" in prompt.lower() or "help" in prompt.lower()
def test_system_prompt_includes_rules(self, memory):
"""Verify system prompt includes important rules."""
tools = make_tools(settings)
builder = PromptBuilder(tools)
prompt = builder.build_system_prompt()
assert "RULES" in prompt or "IMPORTANT" in prompt
def test_system_prompt_includes_examples(self, memory):
"""Verify system prompt includes examples."""
tools = make_tools(settings)
builder = PromptBuilder(tools)
prompt = builder.build_system_prompt()
assert "EXAMPLES" in prompt or "example" in prompt.lower()
def test_tools_description_format(self, memory):
"""Verify tools are properly formatted in description."""
tools = make_tools(settings)
builder = PromptBuilder(tools)
description = builder._format_tools_description()
# Should have tool names and descriptions
for tool_name, _tool in tools.items():
assert tool_name in description
# Should have parameters info
assert "Parameters" in description or "parameters" in description
def test_episodic_context_format(self, memory_with_search_results):
"""Verify episodic context is properly formatted."""
tools = make_tools(settings)
builder = PromptBuilder(tools)
context = builder._format_episodic_context(memory_with_search_results)
assert "LAST SEARCH" in context
assert "Inception" in context
def test_stm_context_format(self, memory):
"""Verify STM context is properly formatted."""
tools = make_tools(settings)
builder = PromptBuilder(tools)
memory.stm.set_topic("test_topic")
memory.stm.set_entity("key", "value")
context = builder._format_stm_context(memory)
assert "TOPIC" in context or "test_topic" in context
assert "ENTITIES" in context or "key" in context
def test_config_context_format(self, memory):
"""Verify config context is properly formatted."""
tools = make_tools(settings)
builder = PromptBuilder(tools)
memory.ltm.download_folder = "/test/downloads"
context = builder._format_config_context(memory)
assert "CONFIGURATION" in context
assert "download_folder" in context
class TestPromptBuilderEdgeCases:
"""Tests for edge cases in prompt building."""
def test_prompt_with_no_memory_context(self, memory):
"""Verify prompt works with empty memory."""
tools = make_tools(settings)
builder = PromptBuilder(tools)
# Memory is empty
prompt = builder.build_system_prompt()
# Should still have base content
assert len(prompt) > 0
assert "assistant" in prompt.lower()
def test_prompt_with_empty_tools(self):
"""Verify prompt handles empty tools dict."""
builder = PromptBuilder({})
prompt = builder.build_system_prompt()
# Should still generate a prompt
assert len(prompt) > 0
def test_tools_spec_with_empty_tools(self):
"""Verify tools spec handles empty tools dict."""
builder = PromptBuilder({})
specs = builder.build_tools_spec()
assert isinstance(specs, list)
assert len(specs) == 0
def test_prompt_with_unicode_in_memory(self, memory):
"""Verify prompt handles unicode in memory."""
tools = make_tools(settings)
builder = PromptBuilder(tools)
memory.stm.set_entity("movie", "Amélie 🎬")
prompt = builder.build_system_prompt()
assert "Amélie" in prompt
assert "🎬" in prompt
def test_prompt_with_long_search_results(self, memory):
"""Verify prompt handles many search results."""
tools = make_tools(settings)
builder = PromptBuilder(tools)
# Add many results
results = [{"name": f"Movie {i}", "seeders": i} for i in range(20)]
memory.episodic.store_search_results("test", results, "torrent")
prompt = builder.build_system_prompt()
# Should include some results but not all (to avoid huge prompts)
assert "Movie 0" in prompt or "Movie 1" in prompt
# Should indicate there are more
assert "more" in prompt.lower() or "..." in prompt
-402
View File
@@ -1,402 +0,0 @@
"""Edge case tests for PromptBuilder."""
from alfred.agent.prompts import PromptBuilder
from alfred.agent.registry import make_tools
from alfred.settings import settings
class TestPromptBuilderEdgeCases:
"""Edge case tests for PromptBuilder."""
def test_prompt_with_empty_memory(self, memory):
"""Should build prompt with completely empty memory."""
tools = make_tools(settings)
builder = PromptBuilder(tools)
prompt = builder.build_system_prompt()
assert "AVAILABLE TOOLS" in prompt
assert "CURRENT CONFIGURATION" in prompt
def test_prompt_with_unicode_config(self, memory):
"""Should handle unicode in config."""
memory.ltm.download_folder = "/path/to/日本語"
memory.ltm.tvshow_folder = "/path/🎬"
tools = make_tools(settings)
builder = PromptBuilder(tools)
prompt = builder.build_system_prompt()
assert "日本語" in prompt
assert "🎬" in prompt
def test_prompt_with_very_long_config_value(self, memory):
"""Should handle very long config values."""
long_path = "/very/long/path/" + "x" * 1000
memory.ltm.download_folder = long_path
tools = make_tools(settings)
builder = PromptBuilder(tools)
prompt = builder.build_system_prompt()
# Should include the path (possibly truncated)
assert "very/long/path" in prompt
def test_prompt_with_special_chars_in_config(self, memory):
"""Should escape special characters in config."""
memory.ltm.download_folder = '/path/with "quotes" and \\backslash'
tools = make_tools(settings)
builder = PromptBuilder(tools)
prompt = builder.build_system_prompt()
# Should be valid (not crash)
assert "CURRENT CONFIGURATION" in prompt
def test_prompt_with_many_search_results(self, memory):
"""Should limit displayed search results."""
results = [{"name": f"Torrent {i}", "seeders": i} for i in range(50)]
memory.episodic.store_search_results("test query", results)
tools = make_tools(settings)
builder = PromptBuilder(tools)
prompt = builder.build_system_prompt()
# Should show limited results
assert "LAST SEARCH" in prompt
# Should indicate there are more
assert "more" in prompt.lower() or "..." in prompt
def test_prompt_with_search_results_missing_fields(self, memory):
"""Should handle search results with missing fields."""
results = [
{"name": "Complete"},
{}, # Empty
{"seeders": 100}, # Missing name
]
memory.episodic.store_search_results("test", results)
tools = make_tools(settings)
builder = PromptBuilder(tools)
prompt = builder.build_system_prompt()
# Should not crash
assert "LAST SEARCH" in prompt
def test_prompt_with_many_active_downloads(self, memory):
"""Should limit displayed active downloads."""
for i in range(20):
memory.episodic.add_active_download(
{
"task_id": str(i),
"name": f"Download {i}",
"progress": i * 5,
}
)
tools = make_tools(settings)
builder = PromptBuilder(tools)
prompt = builder.build_system_prompt()
assert "ACTIVE DOWNLOADS" in prompt
# Should show limited number
assert "Download 0" in prompt
def test_prompt_with_many_errors(self, memory):
"""Should show recent errors."""
for i in range(10):
memory.episodic.add_error(f"action_{i}", f"Error {i}")
tools = make_tools(settings)
builder = PromptBuilder(tools)
prompt = builder.build_system_prompt()
assert "RECENT ERRORS" in prompt
# Should show the most recent errors (up to 3)
def test_prompt_with_pending_question_many_options(self, memory):
"""Should handle pending question with many options."""
options = [{"index": i, "label": f"Option {i}"} for i in range(20)]
memory.episodic.set_pending_question("Choose one:", options, {})
tools = make_tools(settings)
builder = PromptBuilder(tools)
prompt = builder.build_system_prompt()
assert "PENDING QUESTION" in prompt
assert "Choose one:" in prompt
def test_prompt_with_complex_workflow(self, memory):
"""Should handle complex workflow state."""
memory.stm.start_workflow(
"download",
{
"title": "Test Movie",
"year": 2024,
"quality": "1080p",
"nested": {"deep": {"value": "test"}},
},
)
memory.stm.update_workflow_stage("searching_torrents")
tools = make_tools(settings)
builder = PromptBuilder(tools)
prompt = builder.build_system_prompt()
assert "CURRENT WORKFLOW" in prompt
assert "download" in prompt
assert "searching_torrents" in prompt
def test_prompt_with_many_entities(self, memory):
"""Should handle many extracted entities."""
for i in range(50):
memory.stm.set_entity(f"entity_{i}", f"value_{i}")
tools = make_tools(settings)
builder = PromptBuilder(tools)
prompt = builder.build_system_prompt()
assert "EXTRACTED ENTITIES" in prompt
def test_prompt_with_null_values_in_entities(self, memory):
"""Should handle null values in entities."""
memory.stm.set_entity("null_value", None)
memory.stm.set_entity("empty_string", "")
memory.stm.set_entity("zero", 0)
memory.stm.set_entity("false", False)
tools = make_tools(settings)
builder = PromptBuilder(tools)
prompt = builder.build_system_prompt()
# Should not crash
assert "EXTRACTED ENTITIES" in prompt
def test_prompt_with_unread_events(self, memory):
"""Should include unread events."""
memory.episodic.add_background_event("download_complete", {"name": "Movie.mkv"})
memory.episodic.add_background_event("new_files", {"count": 5})
tools = make_tools(settings)
builder = PromptBuilder(tools)
prompt = builder.build_system_prompt()
assert "UNREAD EVENTS" in prompt
def test_prompt_with_all_sections(self, memory):
"""Should include all sections when all data present."""
# Config
memory.ltm.download_folder = "/downloads"
# Search results
memory.episodic.store_search_results("test", [{"name": "Result"}])
# Active downloads
memory.episodic.add_active_download({"task_id": "1", "name": "Download"})
# Errors
memory.episodic.add_error("action", "error")
# Pending question
memory.episodic.set_pending_question("Question?", [], {})
# Workflow
memory.stm.start_workflow("download", {"title": "Test"})
# Topic
memory.stm.set_topic("searching")
# Entities
memory.stm.set_entity("key", "value")
# Events
memory.episodic.add_background_event("event", {})
tools = make_tools(settings)
builder = PromptBuilder(tools)
prompt = builder.build_system_prompt()
# All sections should be present
assert "CURRENT CONFIGURATION" in prompt
assert "LAST SEARCH" in prompt
assert "ACTIVE DOWNLOADS" in prompt
assert "RECENT ERRORS" in prompt
assert "PENDING QUESTION" in prompt
assert "CURRENT WORKFLOW" in prompt
assert "CURRENT TOPIC" in prompt
assert "EXTRACTED ENTITIES" in prompt
assert "UNREAD EVENTS" in prompt
def test_prompt_json_serializable(self, memory):
"""Should produce JSON-serializable content."""
memory.ltm.download_folder = "/some/path"
memory.stm.set_entity("complex", {"a": {"b": {"c": "d"}}})
tools = make_tools(settings)
builder = PromptBuilder(tools)
prompt = builder.build_system_prompt()
# The prompt itself is a string, but embedded JSON should be valid
assert isinstance(prompt, str)
class TestFormatToolsDescriptionEdgeCases:
"""Edge case tests for _format_tools_description."""
def test_format_with_no_tools(self, memory):
"""Should handle empty tools dict."""
builder = PromptBuilder({})
desc = builder._format_tools_description()
assert desc == ""
def test_format_with_complex_parameters(self, memory):
"""Should format complex parameter schemas."""
from alfred.agent.registry import Tool
tools = {
"complex_tool": Tool(
name="complex_tool",
description="A complex tool",
func=lambda: {},
parameters={
"type": "object",
"properties": {
"nested": {
"type": "object",
"properties": {
"deep": {"type": "string"},
},
},
"array": {
"type": "array",
"items": {"type": "integer"},
},
},
"required": ["nested"],
},
),
}
builder = PromptBuilder(tools)
desc = builder._format_tools_description()
assert "complex_tool" in desc
assert "nested" in desc
class TestFormatEpisodicContextEdgeCases:
"""Edge case tests for _format_episodic_context."""
def test_format_with_empty_search_query(self, memory):
"""Should handle empty search query."""
memory.episodic.store_search_results("", [{"name": "Result"}])
tools = make_tools(settings)
builder = PromptBuilder(tools)
context = builder._format_episodic_context(memory)
assert "LAST SEARCH" in context
def test_format_with_search_results_none_names(self, memory):
"""Should handle results with None names."""
memory.episodic.store_search_results(
"test",
[
{"name": None},
{"title": None},
{},
],
)
tools = make_tools(settings)
builder = PromptBuilder(tools)
context = builder._format_episodic_context(memory)
# Should not crash
assert "LAST SEARCH" in context
def test_format_with_download_missing_progress(self, memory):
"""Should handle download without progress."""
memory.episodic.add_active_download({"task_id": "1", "name": "Test"})
tools = make_tools(settings)
builder = PromptBuilder(tools)
context = builder._format_episodic_context(memory)
assert "ACTIVE DOWNLOADS" in context
assert "0%" in context # Default progress
class TestFormatStmContextEdgeCases:
"""Edge case tests for _format_stm_context."""
def test_format_with_workflow_missing_target(self, memory):
"""Should handle workflow with missing target."""
memory.stm.current_workflow = {
"type": "download",
"stage": "started",
}
tools = make_tools(settings)
builder = PromptBuilder(tools)
context = builder._format_stm_context(memory)
assert "CURRENT WORKFLOW" in context
def test_format_with_workflow_none_target(self, memory):
"""Should handle workflow with None target."""
memory.stm.start_workflow("download", None)
tools = make_tools(settings)
builder = PromptBuilder(tools)
try:
context = builder._format_stm_context(memory)
assert "CURRENT WORKFLOW" in context or True
except (AttributeError, TypeError):
# Expected if None target causes issues
pass
def test_format_with_empty_topic(self, memory):
"""Should handle empty topic."""
memory.stm.set_topic("")
tools = make_tools(settings)
builder = PromptBuilder(tools)
context = builder._format_stm_context(memory)
# Empty topic might not be shown
assert isinstance(context, str)
def test_format_with_entities_containing_json(self, memory):
"""Should handle entities containing JSON strings."""
memory.stm.set_entity("json_string", '{"key": "value"}')
tools = make_tools(settings)
builder = PromptBuilder(tools)
context = builder._format_stm_context(memory)
assert "EXTRACTED ENTITIES" in context
-233
View File
@@ -1,233 +0,0 @@
"""Critical tests for tool registry - Tests that would have caught bugs."""
import inspect
import pytest
from alfred.agent.prompts import PromptBuilder
from alfred.agent.registry import Tool, _create_tool_from_function, make_tools
from alfred.settings import settings
class TestToolSpecFormat:
"""Critical tests for tool specification format."""
def test_tool_spec_format_is_openai_compatible(self):
"""CRITICAL: Verify tool specs are OpenAI-compatible."""
tools = make_tools(settings)
builder = PromptBuilder(tools)
specs = builder.build_tools_spec()
# Verify structure
assert isinstance(specs, list), "Tool specs must be a list"
assert len(specs) > 0, "Tool specs list is empty"
for spec in specs:
# OpenAI format requires these fields
assert spec["type"] == "function", (
f"Tool type must be 'function', got {spec.get('type')}"
)
assert "function" in spec, "Tool spec missing 'function' key"
func = spec["function"]
assert "name" in func, "Function missing 'name'"
assert "description" in func, "Function missing 'description'"
assert "parameters" in func, "Function missing 'parameters'"
params = func["parameters"]
assert params["type"] == "object", "Parameters type must be 'object'"
assert "properties" in params, "Parameters missing 'properties'"
assert "required" in params, "Parameters missing 'required'"
assert isinstance(params["required"], list), "Required must be a list"
def test_tool_parameters_match_function_signature(self):
"""CRITICAL: Verify generated parameters match function signature."""
def test_func(name: str, age: int, active: bool = True):
"""Test function with typed parameters."""
return {"status": "ok"}
tool = _create_tool_from_function(test_func)
# Verify types are correctly mapped
assert tool.parameters["properties"]["name"]["type"] == "string"
assert tool.parameters["properties"]["age"]["type"] == "integer"
assert tool.parameters["properties"]["active"]["type"] == "boolean"
# Verify required vs optional
assert "name" in tool.parameters["required"], "name should be required"
assert "age" in tool.parameters["required"], "age should be required"
assert "active" not in tool.parameters["required"], (
"active has default, should not be required"
)
def test_all_registered_tools_are_callable(self):
"""CRITICAL: Verify all registered tools are actually callable."""
tools = make_tools(settings)
assert len(tools) > 0, "No tools registered"
for name, tool in tools.items():
assert callable(tool.func), f"Tool {name} is not callable"
# Verify function has valid signature
try:
inspect.signature(tool.func)
# If we get here, signature is valid
except Exception as e:
pytest.fail(f"Tool {name} has invalid signature: {e}")
def test_tools_spec_contains_all_registered_tools(self):
"""CRITICAL: Verify build_tools_spec() returns all registered tools."""
tools = make_tools(settings)
builder = PromptBuilder(tools)
specs = builder.build_tools_spec()
spec_names = {spec["function"]["name"] for spec in specs}
tool_names = set(tools.keys())
missing = tool_names - spec_names
extra = spec_names - tool_names
assert not missing, f"Tools missing from specs: {missing}"
assert not extra, f"Extra tools in specs: {extra}"
assert spec_names == tool_names, "Tool specs don't match registered tools"
def test_tool_description_extracted_from_docstring(self):
"""Verify tool description is extracted from function docstring."""
def test_func(param: str):
"""This is the description.
More details here.
"""
return {}
tool = _create_tool_from_function(test_func)
assert tool.description == "This is the description."
assert "More details" not in tool.description
def test_tool_without_docstring_uses_function_name(self):
"""Verify tool without docstring uses function name as description."""
def test_func_no_doc(param: str):
return {}
tool = _create_tool_from_function(test_func_no_doc)
assert tool.description == "test_func_no_doc"
def test_tool_parameters_have_descriptions(self):
"""Verify all tool parameters have descriptions."""
tools = make_tools(settings)
builder = PromptBuilder(tools)
specs = builder.build_tools_spec()
for spec in specs:
params = spec["function"]["parameters"]
properties = params.get("properties", {})
for param_name, param_spec in properties.items():
assert "description" in param_spec, (
f"Parameter {param_name} in {spec['function']['name']} missing description"
)
def test_required_parameters_are_marked_correctly(self):
"""Verify required parameters are correctly identified."""
def func_with_optional(required: str, optional: int = 5):
return {}
tool = _create_tool_from_function(func_with_optional)
assert "required" in tool.parameters["required"]
assert "optional" not in tool.parameters["required"]
assert len(tool.parameters["required"]) == 1
class TestToolRegistry:
"""Tests for tool registry functionality."""
def test_make_tools_returns_dict(self):
"""Verify make_tools returns a dictionary."""
tools = make_tools(settings)
assert isinstance(tools, dict)
assert len(tools) > 0
def test_all_tools_have_unique_names(self):
"""Verify all tool names are unique."""
tools = make_tools(settings)
names = [tool.name for tool in tools.values()]
assert len(names) == len(set(names)), "Duplicate tool names found"
def test_tool_names_match_dict_keys(self):
"""Verify tool names match their dictionary keys."""
tools = make_tools(settings)
for key, tool in tools.items():
assert key == tool.name, f"Key {key} doesn't match tool name {tool.name}"
def test_expected_tools_are_registered(self):
"""Verify all expected tools are registered."""
tools = make_tools(settings)
expected_tools = [
"set_path_for_folder",
"list_folder",
"find_media_imdb_id",
"find_torrent",
"add_torrent_by_index",
"add_torrent_to_qbittorrent",
"get_torrent_by_index",
"set_language",
]
for expected in expected_tools:
assert expected in tools, f"Expected tool {expected} not registered"
def test_tool_functions_are_valid(self):
"""Verify all tool functions are properly structured."""
tools = make_tools(settings)
# Verify structure without calling functions
# (calling would require full setup with memory, clients, etc.)
for name, tool in tools.items():
assert callable(tool.func), f"Tool {name} function is not callable"
class TestToolDataclass:
"""Tests for Tool dataclass."""
def test_tool_creation(self):
"""Verify Tool can be created with all fields."""
def dummy_func():
return {}
tool = Tool(
name="test_tool",
description="Test description",
func=dummy_func,
parameters={"type": "object", "properties": {}, "required": []},
)
assert tool.name == "test_tool"
assert tool.description == "Test description"
assert tool.func == dummy_func
assert isinstance(tool.parameters, dict)
def test_tool_parameters_structure(self):
"""Verify Tool parameters have correct structure."""
def dummy_func(arg: str):
return {}
tool = _create_tool_from_function(dummy_func)
assert "type" in tool.parameters
assert "properties" in tool.parameters
assert "required" in tool.parameters
assert tool.parameters["type"] == "object"
+20 -4
View File
@@ -1,4 +1,20 @@
"""Edge case tests for tool registry."""
"""Edge-case tests for ``alfred.agent.registry``.
Covers unusual but legitimate signatures handled by the JSON-Schema
extractor:
- ``TestToolEdgeCases`` — direct ``Tool`` dataclass construction with
minimal and maximal field sets.
- ``TestCreateToolFromFunctionEdgeCases`` — bare functions (no annotations,
no docstring), functions with only ``*args``/``**kwargs``, generic
``list``/``dict`` annotations, ``Optional`` unwrapping.
- ``TestMakeToolsEdgeCases`` — global registry construction with the live
``Settings`` object: every registered tool has a callable ``func``, a
unique name, and a JSON-Schema-shaped ``parameters`` dict.
Uses ``memory.ltm.workspace.download`` (the current API) when staging the
filesystem-backed tools.
"""
import pytest
@@ -258,7 +274,7 @@ class TestToolExecution:
def test_tool_returns_dict(self, memory, real_folder):
"""Should return dict from tool execution."""
tools = make_tools(settings)
memory.ltm.download_folder = str(real_folder["downloads"])
memory.ltm.workspace.download = str(real_folder["downloads"])
result = tools["list_folder"].func(folder_type="download")
@@ -267,7 +283,7 @@ class TestToolExecution:
def test_tool_returns_status(self, memory, real_folder):
"""Should return status in result."""
tools = make_tools(settings)
memory.ltm.download_folder = str(real_folder["downloads"])
memory.ltm.workspace.download = str(real_folder["downloads"])
result = tools["list_folder"].func(folder_type="download")
@@ -295,7 +311,7 @@ class TestToolExecution:
def test_tool_handles_extra_args(self, memory, real_folder):
"""Should handle extra arguments."""
tools = make_tools(settings)
memory.ltm.download_folder = str(real_folder["downloads"])
memory.ltm.workspace.download = str(real_folder["downloads"])
# Extra args should raise TypeError
with pytest.raises(TypeError):
-422
View File
@@ -1,422 +0,0 @@
"""Tests for JSON repositories."""
from alfred.domain.movies.entities import Movie
from alfred.domain.movies.value_objects import MovieTitle, Quality, ReleaseYear
from alfred.domain.shared.value_objects import FilePath, FileSize, ImdbId
from alfred.domain.subtitles.entities import Subtitle
from alfred.domain.subtitles.value_objects import Language, SubtitleFormat, TimingOffset
from alfred.domain.tv_shows.entities import TVShow
from alfred.domain.tv_shows.value_objects import ShowStatus
from alfred.infrastructure.persistence.json import (
JsonMovieRepository,
JsonSubtitleRepository,
JsonTVShowRepository,
)
class TestJsonMovieRepository:
"""Tests for JsonMovieRepository."""
def test_save_movie(self, memory):
"""Should save a movie."""
repo = JsonMovieRepository()
movie = Movie(
imdb_id=ImdbId("tt1375666"),
title=MovieTitle("Inception"),
release_year=ReleaseYear(2010),
quality=Quality.FULL_HD,
)
repo.save(movie)
assert len(memory.ltm.library["movies"]) == 1
assert memory.ltm.library["movies"][0]["imdb_id"] == "tt1375666"
def test_save_updates_existing(self, memory):
"""Should update existing movie."""
repo = JsonMovieRepository()
movie1 = Movie(
imdb_id=ImdbId("tt1375666"),
title=MovieTitle("Inception"),
quality=Quality.HD,
)
movie2 = Movie(
imdb_id=ImdbId("tt1375666"),
title=MovieTitle("Inception"),
quality=Quality.FULL_HD,
)
repo.save(movie1)
repo.save(movie2)
assert len(memory.ltm.library["movies"]) == 1
assert memory.ltm.library["movies"][0]["quality"] == "1080p"
def test_find_by_imdb_id(self, memory_with_library):
"""Should find movie by IMDb ID."""
repo = JsonMovieRepository()
movie = repo.find_by_imdb_id(ImdbId("tt1375666"))
assert movie is not None
assert movie.title.value == "Inception"
def test_find_by_imdb_id_not_found(self, memory):
"""Should return None if not found."""
repo = JsonMovieRepository()
movie = repo.find_by_imdb_id(ImdbId("tt9999999"))
assert movie is None
def test_find_all(self, memory_with_library):
"""Should return all movies."""
repo = JsonMovieRepository()
movies = repo.find_all()
assert len(movies) >= 2
titles = [m.title.value for m in movies]
assert "Inception" in titles
assert "Interstellar" in titles
def test_find_all_empty(self, memory):
"""Should return empty list if no movies."""
repo = JsonMovieRepository()
movies = repo.find_all()
assert movies == []
def test_delete(self, memory_with_library):
"""Should delete movie."""
repo = JsonMovieRepository()
result = repo.delete(ImdbId("tt1375666"))
assert result is True
assert len(memory_with_library.ltm.library["movies"]) == 1
def test_delete_not_found(self, memory):
"""Should return False if not found."""
repo = JsonMovieRepository()
result = repo.delete(ImdbId("tt9999999"))
assert result is False
def test_exists(self, memory_with_library):
"""Should check if movie exists."""
repo = JsonMovieRepository()
assert repo.exists(ImdbId("tt1375666")) is True
assert repo.exists(ImdbId("tt9999999")) is False
def test_preserves_all_fields(self, memory):
"""Should preserve all movie fields."""
repo = JsonMovieRepository()
movie = Movie(
imdb_id=ImdbId("tt1375666"),
title=MovieTitle("Inception"),
release_year=ReleaseYear(2010),
quality=Quality.FULL_HD,
file_path=FilePath("/movies/inception.mkv"),
file_size=FileSize(2500000000),
tmdb_id=27205,
)
repo.save(movie)
loaded = repo.find_by_imdb_id(ImdbId("tt1375666"))
assert loaded.title.value == "Inception"
assert loaded.release_year.value == 2010
assert loaded.quality.value == "1080p"
assert str(loaded.file_path) == "/movies/inception.mkv"
assert loaded.file_size.bytes == 2500000000
assert loaded.tmdb_id == 27205
class TestJsonTVShowRepository:
"""Tests for JsonTVShowRepository."""
def test_save_show(self, memory):
"""Should save a TV show."""
repo = JsonTVShowRepository()
show = TVShow(
imdb_id=ImdbId("tt0944947"),
title="Game of Thrones",
seasons_count=8,
status=ShowStatus.ENDED,
)
repo.save(show)
assert len(memory.ltm.library["tv_shows"]) == 1
assert memory.ltm.library["tv_shows"][0]["title"] == "Game of Thrones"
def test_save_updates_existing(self, memory):
"""Should update existing show."""
repo = JsonTVShowRepository()
show1 = TVShow(
imdb_id=ImdbId("tt0944947"),
title="Game of Thrones",
seasons_count=7,
status=ShowStatus.ONGOING,
)
show2 = TVShow(
imdb_id=ImdbId("tt0944947"),
title="Game of Thrones",
seasons_count=8,
status=ShowStatus.ENDED,
)
repo.save(show1)
repo.save(show2)
assert len(memory.ltm.library["tv_shows"]) == 1
assert memory.ltm.library["tv_shows"][0]["seasons_count"] == 8
def test_find_by_imdb_id(self, memory_with_library):
"""Should find show by IMDb ID."""
repo = JsonTVShowRepository()
show = repo.find_by_imdb_id(ImdbId("tt0944947"))
assert show is not None
assert show.title == "Game of Thrones"
def test_find_by_imdb_id_not_found(self, memory):
"""Should return None if not found."""
repo = JsonTVShowRepository()
show = repo.find_by_imdb_id(ImdbId("tt9999999"))
assert show is None
def test_find_all(self, memory_with_library):
"""Should return all shows."""
repo = JsonTVShowRepository()
shows = repo.find_all()
assert len(shows) == 1
assert shows[0].title == "Game of Thrones"
def test_delete(self, memory_with_library):
"""Should delete show."""
repo = JsonTVShowRepository()
result = repo.delete(ImdbId("tt0944947"))
assert result is True
assert len(memory_with_library.ltm.library["tv_shows"]) == 0
def test_exists(self, memory_with_library):
"""Should check if show exists."""
repo = JsonTVShowRepository()
assert repo.exists(ImdbId("tt0944947")) is True
assert repo.exists(ImdbId("tt9999999")) is False
def test_preserves_status(self, memory):
"""Should preserve show status."""
repo = JsonTVShowRepository()
for i, status in enumerate(
[ShowStatus.ONGOING, ShowStatus.ENDED, ShowStatus.UNKNOWN]
):
show = TVShow(
imdb_id=ImdbId(f"tt{i + 1000000:07d}"),
title=f"Show {status.value}",
seasons_count=1,
status=status,
)
repo.save(show)
loaded = repo.find_by_imdb_id(ImdbId(f"tt{i + 1000000:07d}"))
assert loaded.status == status
class TestJsonSubtitleRepository:
"""Tests for JsonSubtitleRepository."""
def test_save_subtitle(self, memory):
"""Should save a subtitle."""
repo = JsonSubtitleRepository()
subtitle = Subtitle(
media_imdb_id=ImdbId("tt1375666"),
language=Language.ENGLISH,
format=SubtitleFormat.SRT,
file_path=FilePath("/subs/inception.en.srt"),
)
repo.save(subtitle)
assert "subtitles" in memory.ltm.library
assert len(memory.ltm.library["subtitles"]) == 1
def test_save_multiple_for_same_media(self, memory):
"""Should allow multiple subtitles for same media."""
repo = JsonSubtitleRepository()
sub_en = Subtitle(
media_imdb_id=ImdbId("tt1375666"),
language=Language.ENGLISH,
format=SubtitleFormat.SRT,
file_path=FilePath("/subs/inception.en.srt"),
)
sub_fr = Subtitle(
media_imdb_id=ImdbId("tt1375666"),
language=Language.FRENCH,
format=SubtitleFormat.SRT,
file_path=FilePath("/subs/inception.fr.srt"),
)
repo.save(sub_en)
repo.save(sub_fr)
assert len(memory.ltm.library["subtitles"]) == 2
def test_find_by_media(self, memory):
"""Should find subtitles by media ID."""
repo = JsonSubtitleRepository()
subtitle = Subtitle(
media_imdb_id=ImdbId("tt1375666"),
language=Language.ENGLISH,
format=SubtitleFormat.SRT,
file_path=FilePath("/subs/inception.en.srt"),
)
repo.save(subtitle)
results = repo.find_by_media(ImdbId("tt1375666"))
assert len(results) == 1
assert results[0].language == Language.ENGLISH
def test_find_by_media_with_language_filter(self, memory):
"""Should filter by language."""
repo = JsonSubtitleRepository()
repo.save(
Subtitle(
media_imdb_id=ImdbId("tt1375666"),
language=Language.ENGLISH,
format=SubtitleFormat.SRT,
file_path=FilePath("/subs/en.srt"),
)
)
repo.save(
Subtitle(
media_imdb_id=ImdbId("tt1375666"),
language=Language.FRENCH,
format=SubtitleFormat.SRT,
file_path=FilePath("/subs/fr.srt"),
)
)
results = repo.find_by_media(ImdbId("tt1375666"), language=Language.FRENCH)
assert len(results) == 1
assert results[0].language == Language.FRENCH
def test_find_by_media_with_episode_filter(self, memory):
"""Should filter by season/episode."""
repo = JsonSubtitleRepository()
repo.save(
Subtitle(
media_imdb_id=ImdbId("tt0944947"),
language=Language.ENGLISH,
format=SubtitleFormat.SRT,
file_path=FilePath("/subs/s01e01.srt"),
season_number=1,
episode_number=1,
)
)
repo.save(
Subtitle(
media_imdb_id=ImdbId("tt0944947"),
language=Language.ENGLISH,
format=SubtitleFormat.SRT,
file_path=FilePath("/subs/s01e02.srt"),
season_number=1,
episode_number=2,
)
)
results = repo.find_by_media(
ImdbId("tt0944947"),
season=1,
episode=1,
)
assert len(results) == 1
assert results[0].episode_number == 1
def test_find_by_media_not_found(self, memory):
"""Should return empty list if not found."""
repo = JsonSubtitleRepository()
results = repo.find_by_media(ImdbId("tt9999999"))
assert results == []
def test_delete(self, memory):
"""Should delete subtitle by file path."""
repo = JsonSubtitleRepository()
subtitle = Subtitle(
media_imdb_id=ImdbId("tt1375666"),
language=Language.ENGLISH,
format=SubtitleFormat.SRT,
file_path=FilePath("/subs/inception.en.srt"),
)
repo.save(subtitle)
result = repo.delete(subtitle)
assert result is True
assert len(memory.ltm.library["subtitles"]) == 0
def test_delete_not_found(self, memory):
"""Should return False if not found."""
repo = JsonSubtitleRepository()
subtitle = Subtitle(
media_imdb_id=ImdbId("tt1375666"),
language=Language.ENGLISH,
format=SubtitleFormat.SRT,
file_path=FilePath("/nonexistent.srt"),
)
result = repo.delete(subtitle)
assert result is False
def test_preserves_all_fields(self, memory):
"""Should preserve all subtitle fields."""
repo = JsonSubtitleRepository()
subtitle = Subtitle(
media_imdb_id=ImdbId("tt1375666"),
language=Language.ENGLISH,
format=SubtitleFormat.SRT,
file_path=FilePath("/subs/inception.en.srt"),
season_number=1,
episode_number=5,
timing_offset=TimingOffset(500),
hearing_impaired=True,
forced=False,
source="OpenSubtitles",
uploader="user123",
download_count=1000,
rating=8.5,
)
repo.save(subtitle)
results = repo.find_by_media(ImdbId("tt1375666"))
assert len(results) == 1
loaded = results[0]
assert loaded.season_number == 1
assert loaded.episode_number == 5
assert loaded.timing_offset.milliseconds == 500
assert loaded.hearing_impaired is True
assert loaded.forced is False
assert loaded.source == "OpenSubtitles"
assert loaded.uploader == "user123"
assert loaded.download_count == 1000
assert loaded.rating == 8.5

Some files were not shown because too many files have changed in this diff Show More