From 1723b9fa53ced1b0882455094266267a6b2b6395 Mon Sep 17 00:00:00 2001 From: Francwa Date: Tue, 12 May 2026 16:14:20 +0200 Subject: [PATCH] feat: release parser, media type detection, ffprobe integration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the old domain/media release parser with a full rewrite under domain/release/: - ParsedRelease with media_type ("movie" | "tv_show" | "tv_complete" | "documentary" | "concert" | "other" | "unknown"), site_tag, parse_path, languages, audio_codec, audio_channels, bit_depth, hdr_format, edition - Well-formedness check + sanitize pipeline (_is_well_formed, _sanitize, _strip_site_tag) before token-level parsing - Multi-token sequence matching for audio (DTS-HD.MA, TrueHD.Atmos…), HDR (DV.HDR10…) and editions (DIRECTORS.CUT…) - Knowledge YAML: file_extensions, release_format, languages, audio, video, editions, sites/c411 New infrastructure: - ffprobe.py — single-pass probe returning MediaInfo (video, audio tracks, subtitle tracks) - find_video.py — locate first video file in a release folder New application helpers: - detect_media_type — filesystem-based type refinement - enrich_from_probe — fill missing ParsedRelease fields from MediaInfo New agent tools: - analyze_release — parse + detect type + ffprobe in one call - probe_media — standalone ffprobe for a specific file New domain value object: - MediaInfo + AudioTrack + SubtitleTrack (domain/shared/media_info.py) Testing CLIs: - recognize_folders_in_downloads.py — full pipeline with colored output - probe_video.py — display MediaInfo for a video file --- alfred/agent/agent.py | 2 +- alfred/agent/prompts.py | 206 -------- alfred/agent/registry.py | 2 + alfred/agent/tools/filesystem.py | 122 +++++ .../filesystem/detect_media_type.py | 69 +++ .../filesystem/enrich_from_probe.py | 76 +++ .../filesystem/resolve_destination.py | 17 +- alfred/domain/media/__init__.py | 5 - alfred/domain/media/release_parser.py | 306 ----------- alfred/domain/release/__init__.py | 6 + alfred/domain/release/knowledge.py | 121 +++++ alfred/domain/release/services.py | 484 ++++++++++++++++++ alfred/domain/release/value_objects.py | 166 ++++++ alfred/domain/shared/media_info.py | 95 ++++ alfred/infrastructure/filesystem/ffprobe.py | 98 ++++ .../infrastructure/filesystem/find_video.py | 25 + alfred/knowledge/release/audio.yaml | 43 ++ alfred/knowledge/release/codecs.yaml | 14 + alfred/knowledge/release/editions.yaml | 28 + alfred/knowledge/release/file_extensions.yaml | 64 +++ alfred/knowledge/release/filesystem.yaml | 10 + alfred/knowledge/release/languages.yaml | 44 ++ alfred/knowledge/release/release_format.yaml | 49 ++ alfred/knowledge/release/resolutions.yaml | 9 + alfred/knowledge/release/sites/c411.yaml | 39 ++ alfred/knowledge/release/sources.yaml | 21 + alfred/knowledge/release/video.yaml | 29 ++ testing/parse_release.py | 229 +++++++++ testing/probe_video.py | 160 ++++++ testing/recognize_folders_in_downloads.py | 203 ++++++++ testing/workflows/run_workflow.py | 131 +++-- tests/domain/test_release_parser.py | 12 +- 32 files changed, 2323 insertions(+), 562 deletions(-) delete mode 100644 alfred/agent/prompts.py create mode 100644 alfred/application/filesystem/detect_media_type.py create mode 100644 alfred/application/filesystem/enrich_from_probe.py delete mode 100644 alfred/domain/media/__init__.py delete mode 100644 alfred/domain/media/release_parser.py create mode 100644 alfred/domain/release/__init__.py create mode 100644 alfred/domain/release/knowledge.py create mode 100644 alfred/domain/release/services.py create mode 100644 alfred/domain/release/value_objects.py create mode 100644 alfred/domain/shared/media_info.py create mode 100644 alfred/infrastructure/filesystem/ffprobe.py create mode 100644 alfred/infrastructure/filesystem/find_video.py create mode 100644 alfred/knowledge/release/audio.yaml create mode 100644 alfred/knowledge/release/codecs.yaml create mode 100644 alfred/knowledge/release/editions.yaml create mode 100644 alfred/knowledge/release/file_extensions.yaml create mode 100644 alfred/knowledge/release/filesystem.yaml create mode 100644 alfred/knowledge/release/languages.yaml create mode 100644 alfred/knowledge/release/release_format.yaml create mode 100644 alfred/knowledge/release/resolutions.yaml create mode 100644 alfred/knowledge/release/sites/c411.yaml create mode 100644 alfred/knowledge/release/sources.yaml create mode 100644 alfred/knowledge/release/video.yaml create mode 100644 testing/parse_release.py create mode 100644 testing/probe_video.py create mode 100644 testing/recognize_folders_in_downloads.py diff --git a/alfred/agent/agent.py b/alfred/agent/agent.py index 0248a2c..5caba8a 100644 --- a/alfred/agent/agent.py +++ b/alfred/agent/agent.py @@ -8,7 +8,7 @@ from typing import Any from alfred.infrastructure.persistence import get_memory from alfred.settings import settings -from .prompts import PromptBuilder +from .prompt import PromptBuilder from .registry import Tool, make_tools logger = logging.getLogger(__name__) diff --git a/alfred/agent/prompts.py b/alfred/agent/prompts.py deleted file mode 100644 index cfb5ff3..0000000 --- a/alfred/agent/prompts.py +++ /dev/null @@ -1,206 +0,0 @@ -"""Prompt builder for the agent system.""" - -import json -from typing import Any - -from alfred.infrastructure.persistence import get_memory -from alfred.infrastructure.persistence.memory import MemoryRegistry - -from .registry import Tool - - -class PromptBuilder: - """Builds system prompts for the agent with memory context.""" - - def __init__(self, tools: dict[str, Tool]): - self.tools = tools - self._memory_registry = MemoryRegistry() - - def build_tools_spec(self) -> list[dict[str, Any]]: - """Build the tool specification for the LLM API.""" - tool_specs = [] - for tool in self.tools.values(): - spec = { - "type": "function", - "function": { - "name": tool.name, - "description": tool.description, - "parameters": tool.parameters, - }, - } - tool_specs.append(spec) - return tool_specs - - def _format_tools_description(self) -> str: - """Format tools with their descriptions and parameters.""" - if not self.tools: - return "" - return "\n".join( - f"- {tool.name}: {tool.description}\n" - f" Parameters: {json.dumps(tool.parameters, ensure_ascii=False)}" - for tool in self.tools.values() - ) - - def _format_episodic_context(self, memory) -> str: - """Format episodic memory context for the prompt.""" - lines = [] - - if memory.episodic.last_search_results: - results = memory.episodic.last_search_results - result_list = results.get("results", []) - lines.append( - f"\nLAST SEARCH: '{results.get('query')}' ({len(result_list)} results)" - ) - # Show first 5 results - for i, result in enumerate(result_list[:5]): - name = result.get("name", "Unknown") - lines.append(f" {i + 1}. {name}") - if len(result_list) > 5: - lines.append(f" ... and {len(result_list) - 5} more") - - if memory.episodic.pending_question: - question = memory.episodic.pending_question - lines.append(f"\nPENDING QUESTION: {question.get('question')}") - lines.append(f" Type: {question.get('type')}") - if question.get("options"): - lines.append(f" Options: {len(question.get('options'))}") - - if memory.episodic.active_downloads: - lines.append(f"\nACTIVE DOWNLOADS: {len(memory.episodic.active_downloads)}") - for dl in memory.episodic.active_downloads[:3]: - lines.append(f" - {dl.get('name')}: {dl.get('progress', 0)}%") - - if memory.episodic.recent_errors: - lines.append("\nRECENT ERRORS (up to 3):") - for error in memory.episodic.recent_errors[-3:]: - lines.append( - f" - Action '{error.get('action')}' failed: {error.get('error')}" - ) - - # Unread events - unread = [e for e in memory.episodic.background_events if not e.get("read")] - if unread: - lines.append(f"\nUNREAD EVENTS: {len(unread)}") - for event in unread[:3]: - lines.append(f" - {event.get('type')}: {event.get('data')}") - - return "\n".join(lines) - - def _format_stm_context(self, memory) -> str: - """Format short-term memory context for the prompt.""" - lines = [] - - if memory.stm.current_workflow: - workflow = memory.stm.current_workflow - lines.append( - f"CURRENT WORKFLOW: {workflow.get('type')} (stage: {workflow.get('stage')})" - ) - if workflow.get("target"): - lines.append(f" Target: {workflow.get('target')}") - - if memory.stm.current_topic: - lines.append(f"CURRENT TOPIC: {memory.stm.current_topic}") - - if memory.stm.extracted_entities: - lines.append("EXTRACTED ENTITIES:") - for key, value in memory.stm.extracted_entities.items(): - lines.append(f" - {key}: {value}") - - if memory.stm.language: - lines.append(f"CONVERSATION LANGUAGE: {memory.stm.language}") - - return "\n".join(lines) - - def _format_memory_schema(self) -> str: - """Describe available memory components so the agent knows what to read/write and when.""" - schema = self._memory_registry.schema() - tier_labels = {"ltm": "LONG-TERM (persisted)", "stm": "SHORT-TERM (session)", "episodic": "EPISODIC (volatile)"} - lines = ["MEMORY COMPONENTS:"] - - for tier, components in schema.items(): - if not components: - continue - lines.append(f"\n [{tier_labels.get(tier, tier.upper())}]") - for c in components: - access = c.get("access", "read") - lines.append(f" {c['name']} ({access}): {c['description']}") - for field_name, field_desc in c.get("fields", {}).items(): - lines.append(f" · {field_name}: {field_desc}") - - return "\n".join(lines) - - def _format_config_context(self, memory) -> str: - """Format configuration context.""" - lines = ["CURRENT CONFIGURATION:"] - folders = {**memory.ltm.workspace.as_dict(), **memory.ltm.library_paths.to_dict()} - if folders: - for key, value in folders.items(): - lines.append(f" - {key}: {value}") - else: - lines.append(" (no configuration set)") - return "\n".join(lines) - - def build_system_prompt(self) -> str: - """Build the complete system prompt.""" - # Get memory once for all context formatting - memory = get_memory() - - # Base instruction - base = "You are a helpful AI assistant for managing a media library." - - # Language instruction - language_instruction = ( - "Your first task is to determine the user's language from their message " - "and use the `set_language` tool if it's different from the current one. " - "After that, proceed to help the user." - ) - - # Available tools - tools_desc = self._format_tools_description() - tools_section = f"\nAVAILABLE TOOLS:\n{tools_desc}" if tools_desc else "" - - # Memory schema - memory_schema = self._format_memory_schema() - - # Configuration - config_section = self._format_config_context(memory) - if config_section: - config_section = f"\n{config_section}" - - # STM context - stm_context = self._format_stm_context(memory) - if stm_context: - stm_context = f"\n{stm_context}" - - # Episodic context - episodic_context = self._format_episodic_context(memory) - - # Important rules - rules = """ -IMPORTANT RULES: -- Use tools to accomplish tasks -- When search results are available, reference them by index (e.g., "add_torrent_by_index") -- Always confirm actions with the user before executing destructive operations -- Provide clear, concise responses -""" - - # Examples - examples = """ -EXAMPLES: -- User: "Find Inception" → Use find_media_imdb_id, then find_torrent -- User: "download the 3rd one" → Use add_torrent_by_index with index=3 -- User: "List my downloads" → Use list_folder with folder_type="download" -""" - - return f"""{base} - -{language_instruction} -{tools_section} - -{memory_schema} -{config_section} -{stm_context} -{episodic_context} -{rules} -{examples} -""" diff --git a/alfred/agent/registry.py b/alfred/agent/registry.py index d9ac6b2..ab75ffa 100644 --- a/alfred/agent/registry.py +++ b/alfred/agent/registry.py @@ -97,6 +97,8 @@ def make_tools(settings) -> dict[str, Tool]: tool_functions = [ fs_tools.set_path_for_folder, fs_tools.list_folder, + fs_tools.analyze_release, + fs_tools.probe_media, fs_tools.resolve_destination, fs_tools.move_media, fs_tools.manage_subtitles, diff --git a/alfred/agent/tools/filesystem.py b/alfred/agent/tools/filesystem.py index 5ce4cfb..7fe6398 100644 --- a/alfred/agent/tools/filesystem.py +++ b/alfred/agent/tools/filesystem.py @@ -14,7 +14,11 @@ from alfred.application.filesystem import ( ResolveDestinationUseCase, SetFolderPathUseCase, ) +from alfred.application.filesystem.detect_media_type import detect_media_type +from alfred.application.filesystem.enrich_from_probe import enrich_from_probe from alfred.infrastructure.filesystem import FileManager +from alfred.infrastructure.filesystem.ffprobe import probe +from alfred.infrastructure.filesystem.find_video import find_video_file _LEARNED_ROOT = Path(_alfred_pkg.__file__).parent.parent / "data" / "knowledge" @@ -213,6 +217,124 @@ def set_path_for_folder(folder_name: str, path_value: str) -> dict[str, Any]: return response.to_dict() +def analyze_release(release_name: str, source_path: str) -> dict[str, Any]: + """ + Fully analyze a release: parse name, detect media type, probe video with ffprobe. + + Combines parse_release + filesystem type detection + ffprobe in a single call. + Use this at the start of any organize workflow to get a complete picture before + deciding how to route the release. + + Args: + release_name: Raw release folder or file name. + source_path: Absolute path to the release folder or file on disk. + + Returns: + Dict with all parsed fields: media_type, title, year, season, episode, + quality, codec, source, group, languages, audio_codec, audio_channels, + bit_depth, hdr_format, edition, site_tag, parse_path, + and probe_used (bool). + """ + from alfred.domain.release.services import parse_release + + path = Path(source_path) + parsed = parse_release(release_name) + parsed.media_type = detect_media_type(parsed, path) + + probe_used = False + if parsed.media_type not in ("unknown", "other"): + video_file = find_video_file(path) + if video_file: + media_info = probe(video_file) + if media_info: + enrich_from_probe(parsed, media_info) + probe_used = True + + return { + "status": "ok", + "media_type": parsed.media_type, + "parse_path": parsed.parse_path, + "title": parsed.title, + "year": parsed.year, + "season": parsed.season, + "episode": parsed.episode, + "episode_end": parsed.episode_end, + "quality": parsed.quality, + "source": parsed.source, + "codec": parsed.codec, + "group": parsed.group, + "languages": parsed.languages, + "audio_codec": parsed.audio_codec, + "audio_channels": parsed.audio_channels, + "bit_depth": parsed.bit_depth, + "hdr_format": parsed.hdr_format, + "edition": parsed.edition, + "site_tag": parsed.site_tag, + "is_season_pack": parsed.is_season_pack, + "probe_used": probe_used, + } + + +def probe_media(source_path: str) -> dict[str, Any]: + """ + Run ffprobe on a video file and return detailed media information. + + Use this to inspect a specific file for codec, resolution, audio tracks, + languages, and embedded subtitles — independently of release name parsing. + + Args: + source_path: Absolute path to the video file. + + Returns: + Dict with video (codec, resolution, width, height, duration, bitrate), + audio_tracks (list of codec/channels/language), subtitle_tracks + (list of codec/language/forced), audio_languages, is_multi_audio — + or error if ffprobe fails. + """ + path = Path(source_path) + if not path.exists(): + return {"status": "error", "error": "not_found", "message": f"{source_path} does not exist"} + + media_info = probe(path) + if media_info is None: + return {"status": "error", "error": "probe_failed", "message": "ffprobe failed to read the file"} + + return { + "status": "ok", + "video": { + "codec": media_info.video_codec, + "resolution": media_info.resolution, + "width": media_info.width, + "height": media_info.height, + "duration_seconds": media_info.duration_seconds, + "bitrate_kbps": media_info.bitrate_kbps, + }, + "audio_tracks": [ + { + "index": t.index, + "codec": t.codec, + "channels": t.channels, + "channel_layout": t.channel_layout, + "language": t.language, + "is_default": t.is_default, + } + for t in media_info.audio_tracks + ], + "subtitle_tracks": [ + { + "index": t.index, + "codec": t.codec, + "language": t.language, + "is_default": t.is_default, + "is_forced": t.is_forced, + } + for t in media_info.subtitle_tracks + ], + "audio_languages": media_info.audio_languages, + "is_multi_audio": media_info.is_multi_audio, + } + + def list_folder(folder_type: str, path: str = ".") -> dict[str, Any]: """ List contents of a configured folder. diff --git a/alfred/application/filesystem/detect_media_type.py b/alfred/application/filesystem/detect_media_type.py new file mode 100644 index 0000000..cd8b8ce --- /dev/null +++ b/alfred/application/filesystem/detect_media_type.py @@ -0,0 +1,69 @@ +""" +detect_media_type — filesystem-based media type refinement. + +Enriches a ParsedRelease.media_type with evidence from the actual source path +(file or folder). Called after parse_release() to produce a final classification. + +Classification logic: + 1. If source_path is a file — check its extension directly. + 2. If source_path is a folder — collect all extensions inside (non-recursive + for the first level, then recursive if nothing conclusive found). + 3. Decision: + - Any non_video extension AND no video extension → "other" + - Any video extension → keep parsed media_type ("movie" | "tv_show" | "unknown") + - No conclusive extension found → keep parsed media_type as-is + - Mixed (video + non_video) → "unknown" +""" + +from __future__ import annotations + +from pathlib import Path + +from alfred.domain.release.value_objects import ( + ParsedRelease, + _METADATA_EXTENSIONS, + _NON_VIDEO_EXTENSIONS, + _VIDEO_EXTENSIONS, +) + + +def detect_media_type(parsed: ParsedRelease, source_path: Path) -> str: + """ + Return a refined media_type string for the given source_path. + + Does not mutate parsed — returns the new media_type value only. + The caller is responsible for updating the ParsedRelease if needed. + """ + extensions = _collect_extensions(source_path) + # Metadata extensions (.nfo, .srt, …) are always present alongside releases + # and must not influence the type decision. + conclusive = extensions - _METADATA_EXTENSIONS + + has_video = bool(conclusive & _VIDEO_EXTENSIONS) + has_non_video = bool(conclusive & _NON_VIDEO_EXTENSIONS) + + if has_video and has_non_video: + return "unknown" + if has_non_video and not has_video: + return "other" + if has_video: + return parsed.media_type # trust token-level inference + # No conclusive extension — trust token-level inference + return parsed.media_type + + +def _collect_extensions(path: Path) -> set[str]: + """Return the set of lowercase extensions found at path (file or folder).""" + if not path.exists(): + return set() + + if path.is_file(): + return {path.suffix.lower()} + + # Folder — scan first level only + exts: set[str] = set() + for child in path.iterdir(): + if child.is_file(): + exts.add(child.suffix.lower()) + + return exts diff --git a/alfred/application/filesystem/enrich_from_probe.py b/alfred/application/filesystem/enrich_from_probe.py new file mode 100644 index 0000000..2b97609 --- /dev/null +++ b/alfred/application/filesystem/enrich_from_probe.py @@ -0,0 +1,76 @@ +"""enrich_from_probe — fill missing ParsedRelease fields from MediaInfo.""" + +from __future__ import annotations + +from alfred.domain.release.value_objects import ParsedRelease +from alfred.domain.shared.media_info import MediaInfo + +# Map ffprobe codec names to scene-style codec tokens +_VIDEO_CODEC_MAP = { + "hevc": "x265", + "h264": "x264", + "h265": "x265", + "av1": "AV1", + "vp9": "VP9", + "mpeg4": "XviD", +} + +# Map ffprobe audio codec names to scene-style tokens +_AUDIO_CODEC_MAP = { + "eac3": "EAC3", + "ac3": "AC3", + "dts": "DTS", + "truehd": "TrueHD", + "aac": "AAC", + "flac": "FLAC", + "opus": "OPUS", + "mp3": "MP3", + "pcm_s16l": "PCM", + "pcm_s24l": "PCM", +} + +# Map channel count to standard layout string +_CHANNEL_MAP = { + 8: "7.1", + 6: "5.1", + 2: "2.0", + 1: "1.0", +} + + +def enrich_from_probe(parsed: ParsedRelease, info: MediaInfo) -> None: + """ + Fill None fields in parsed using data from ffprobe MediaInfo. + + Only overwrites fields that are currently None — token-level values + from the release name always take priority. + Mutates parsed in place. + """ + if parsed.quality is None and info.resolution: + parsed.quality = info.resolution + + if parsed.codec is None and info.video_codec: + parsed.codec = _VIDEO_CODEC_MAP.get(info.video_codec.lower(), info.video_codec.upper()) + + if parsed.bit_depth is None and info.video_codec: + # ffprobe exposes bit depth via pix_fmt — not in MediaInfo yet, skip for now + pass + + # Audio — use the default track, fallback to first + default_track = next((t for t in info.audio_tracks if t.is_default), None) + track = default_track or (info.audio_tracks[0] if info.audio_tracks else None) + + if track: + if parsed.audio_codec is None and track.codec: + parsed.audio_codec = _AUDIO_CODEC_MAP.get(track.codec.lower(), track.codec.upper()) + + if parsed.audio_channels is None and track.channels: + parsed.audio_channels = _CHANNEL_MAP.get(track.channels, f"{track.channels}ch") + + # Languages — merge ffprobe languages with token-level ones + # "und" = undetermined, not useful + if info.audio_languages: + existing = set(parsed.languages) + for lang in info.audio_languages: + if lang.lower() != "und" and lang.upper() not in existing: + parsed.languages.append(lang) diff --git a/alfred/application/filesystem/resolve_destination.py b/alfred/application/filesystem/resolve_destination.py index dc16af2..0f7335f 100644 --- a/alfred/application/filesystem/resolve_destination.py +++ b/alfred/application/filesystem/resolve_destination.py @@ -16,7 +16,7 @@ import re from dataclasses import dataclass, field from pathlib import Path -from alfred.domain.media.release_parser import ParsedRelease, parse_release +from alfred.domain.release import ParsedRelease, parse_release from alfred.infrastructure.persistence import get_memory logger = logging.getLogger(__name__) @@ -109,10 +109,19 @@ class ResolveDestinationUseCase: parsed = parse_release(release_name) ext = Path(source_file).suffix # ".mkv" - if parsed.is_movie: + if parsed.media_type == "movie": return self._resolve_movie(parsed, tmdb_title, tmdb_year, ext) - return self._resolve_tvshow( - parsed, tmdb_title, tmdb_year, tmdb_episode_title, ext, confirmed_folder + if parsed.media_type == "tv_show": + return self._resolve_tvshow( + parsed, tmdb_title, tmdb_year, tmdb_episode_title, ext, confirmed_folder + ) + return ResolvedDestination( + status="error", + error="unsupported_media_type", + message=( + f"Cannot organize '{release_name}': detected as '{parsed.media_type}'. " + "Only movies and TV shows are supported." + ), ) # ------------------------------------------------------------------ diff --git a/alfred/domain/media/__init__.py b/alfred/domain/media/__init__.py deleted file mode 100644 index b474b59..0000000 --- a/alfred/domain/media/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -"""Media domain — shared naming and release parsing.""" - -from .release_parser import ParsedRelease, parse_release - -__all__ = ["ParsedRelease", "parse_release"] diff --git a/alfred/domain/media/release_parser.py b/alfred/domain/media/release_parser.py deleted file mode 100644 index 734a0b0..0000000 --- a/alfred/domain/media/release_parser.py +++ /dev/null @@ -1,306 +0,0 @@ -""" -release_parser.py — Parse a release name into structured components. - -Handles both dot-separated and space-separated release names: - Oz.S03.1080p.WEBRip.x265-KONTRAST - Oz S03 1080p WEBRip x265-KONTRAST - Inception.2010.1080p.BluRay.x265-GROUP -""" - -from __future__ import annotations - -import re -from dataclasses import dataclass, field - -# Known quality tokens -_QUALITIES = {"2160p", "1080p", "720p", "480p", "576p", "4k", "8k"} - -# Known source tokens (case-insensitive match) -_SOURCES = { - "bluray", "blu-ray", "bdrip", "brrip", - "webrip", "web-rip", "webdl", "web-dl", "web", - "hdtv", "hdrip", "dvdrip", "dvd", "vodrip", - "amzn", "nf", "dsnp", "hmax", "atvp", -} - -# Known codec tokens -_CODECS = { - "x264", "x265", "h264", "h265", "hevc", "avc", - "xvid", "divx", "av1", "vp9", - "h.264", "h.265", -} - -# Windows-forbidden characters (we strip these from display names) -_WIN_FORBIDDEN = re.compile(r'[?:*"<>|\\]') - -# Episode/season pattern: S01, S01E02, S01E02E03, 1x02, etc. -_SEASON_EP_RE = re.compile( - r"S(\d{1,2})(?:E(\d{2})(?:E(\d{2}))?)?", - re.IGNORECASE, -) - -# Year pattern -_YEAR_RE = re.compile(r"\b(19\d{2}|20\d{2})\b") - - -@dataclass -class ParsedRelease: - """Structured representation of a parsed release name.""" - - raw: str # original release name (untouched) - normalised: str # dots instead of spaces - title: str # show/movie title (dots, no year/season/tech) - year: int | None # movie year or show start year (from TMDB) - season: int | None # season number (None for movies) - episode: int | None # first episode number (None if season-pack) - episode_end: int | None # last episode for multi-ep (None otherwise) - quality: str | None # 1080p, 2160p, … - source: str | None # WEBRip, BluRay, … - codec: str | None # x265, HEVC, … - group: str # release group, "UNKNOWN" if missing - tech_string: str # quality.source.codec joined with dots - - # ------------------------------------------------------------------------- - # Derived helpers - # ------------------------------------------------------------------------- - - @property - def is_movie(self) -> bool: - return self.season is None - - @property - def is_season_pack(self) -> bool: - return self.season is not None and self.episode is None - - def show_folder_name(self, tmdb_title: str, tmdb_year: int) -> str: - """ - Build the series root folder name. - - Format: {Title}.{Year}.{Tech}-{Group} - Example: Oz.1997.1080p.WEBRip.x265-KONTRAST - """ - title_part = _sanitise_for_fs(tmdb_title).replace(" ", ".") - tech = self.tech_string or "Unknown" - return f"{title_part}.{tmdb_year}.{tech}-{self.group}" - - def season_folder_name(self) -> str: - """ - Build the season subfolder name = normalised release name (no episode). - - Example: Oz.S03.1080p.WEBRip.x265-KONTRAST - For a single-episode release we still strip the episode token so the - folder can hold the whole season. - """ - return _strip_episode_from_normalised(self.normalised) - - def episode_filename(self, tmdb_episode_title: str | None, ext: str) -> str: - """ - Build the episode filename. - - Format: {Title}.{SxxExx}.{EpisodeTitle}.{Tech}-{Group}.{ext} - Example: Oz.S01E01.The.Routine.1080p.WEBRip.x265-KONTRAST.mkv - - If tmdb_episode_title is None, omits the episode title segment. - """ - title_part = _sanitise_for_fs(self.title) # already dotted from normalised - s = f"S{self.season:02d}" if self.season is not None else "" - e = f"E{self.episode:02d}" if self.episode is not None else "" - se = s + e - - ep_title = "" - if tmdb_episode_title: - ep_title = "." + _sanitise_for_fs(tmdb_episode_title).replace(" ", ".") - - tech = self.tech_string or "Unknown" - ext_clean = ext.lstrip(".") - return f"{title_part}.{se}{ep_title}.{tech}-{self.group}.{ext_clean}" - - def movie_folder_name(self, tmdb_title: str, tmdb_year: int) -> str: - """ - Build the movie folder name. - - Format: {Title}.{Year}.{Tech}-{Group} - Example: Inception.2010.1080p.BluRay.x265-GROUP - """ - return self.show_folder_name(tmdb_title, tmdb_year) - - def movie_filename(self, tmdb_title: str, tmdb_year: int, ext: str) -> str: - """ - Build the movie filename (same as folder name + extension). - - Example: Inception.2010.1080p.BluRay.x265-GROUP.mkv - """ - ext_clean = ext.lstrip(".") - return f"{self.movie_folder_name(tmdb_title, tmdb_year)}.{ext_clean}" - - -# --------------------------------------------------------------------------- -# Public API -# --------------------------------------------------------------------------- - -def parse_release(name: str) -> ParsedRelease: - """ - Parse a release name and return a ParsedRelease. - - Accepts both dot-separated and space-separated names. - """ - normalised = _normalise(name) - tokens = normalised.split(".") - - season, episode, episode_end = _extract_season_episode(tokens) - quality, source, codec, group, tech_tokens = _extract_tech(tokens) - title = _extract_title(tokens, season, episode, tech_tokens) - year = _extract_year(tokens, title) - - tech_parts = [p for p in [quality, source, codec] if p] - tech_string = ".".join(tech_parts) - - return ParsedRelease( - raw=name, - normalised=normalised, - title=title, - year=year, - season=season, - episode=episode, - episode_end=episode_end, - quality=quality, - source=source, - codec=codec, - group=group, - tech_string=tech_string, - ) - - -# --------------------------------------------------------------------------- -# Internal helpers -# --------------------------------------------------------------------------- - -def _normalise(name: str) -> str: - """Replace spaces with dots, collapse multiple dots.""" - s = name.replace(" ", ".") - s = re.sub(r"\.{2,}", ".", s) - return s.strip(".") - - -def _sanitise_for_fs(text: str) -> str: - """Remove Windows-forbidden characters from a string.""" - return _WIN_FORBIDDEN.sub("", text) - - -def _extract_season_episode(tokens: list[str]) -> tuple[int | None, int | None, int | None]: - joined = ".".join(tokens) - m = _SEASON_EP_RE.search(joined) - if not m: - return None, None, None - season = int(m.group(1)) - episode = int(m.group(2)) if m.group(2) else None - episode_end = int(m.group(3)) if m.group(3) else None - return season, episode, episode_end - - -def _extract_tech( - tokens: list[str], -) -> tuple[str | None, str | None, str | None, str, set[str]]: - """ - Extract quality, source, codec, group from tokens. - - Returns (quality, source, codec, group, tech_token_set). - - Group extraction strategy (in priority order): - 1. Token where prefix is a known codec: x265-GROUP - 2. Last token in the list that contains a dash (fallback for 10bit-GROUP, AAC5.1-GROUP, etc.) - """ - quality: str | None = None - source: str | None = None - codec: str | None = None - group = "UNKNOWN" - tech_tokens: set[str] = set() - - for tok in tokens: - tl = tok.lower() - - if tl in _QUALITIES: - quality = tok - tech_tokens.add(tok) - continue - - if tl in _SOURCES: - source = tok - tech_tokens.add(tok) - continue - - if "-" in tok: - parts = tok.rsplit("-", 1) - # codec-GROUP (highest priority for group) - if parts[0].lower() in _CODECS: - codec = parts[0] - group = parts[1] if parts[1] else "UNKNOWN" - tech_tokens.add(tok) - continue - # source with dash: Web-DL, WEB-DL, etc. - if parts[0].lower() in _SOURCES or tok.lower().replace("-", "") in _SOURCES: - source = tok - tech_tokens.add(tok) - continue - - if tl in _CODECS: - codec = tok - tech_tokens.add(tok) - - # Fallback: if group still UNKNOWN, use the rightmost token with a dash - # that isn't a known source (handles "10bit-Protozoan", "AAC5.1-YTS", etc.) - if group == "UNKNOWN": - for tok in reversed(tokens): - if "-" in tok: - parts = tok.rsplit("-", 1) - tl = tok.lower() - if tl in _SOURCES or tok.lower().replace("-", "") in _SOURCES: - continue - if parts[1]: # non-empty group part - group = parts[1] - break - - return quality, source, codec, group, tech_tokens - - -def _extract_title(tokens: list[str], season: int | None, episode: int | None, tech_tokens: set[str]) -> str: - """ - Extract the title portion: everything before the first season/year/tech token. - """ - title_parts = [] - for tok in tokens: - # Stop at season token - if _SEASON_EP_RE.match(tok): - break - # Stop at year - if _YEAR_RE.fullmatch(tok): - break - # Stop at tech tokens - if tok in tech_tokens or tok.lower() in _QUALITIES | _SOURCES | _CODECS: - break - # Stop if token contains a dash (likely codec-GROUP) - if "-" in tok and any(p.lower() in _CODECS | _SOURCES for p in tok.split("-")): - break - title_parts.append(tok) - - return ".".join(title_parts) if title_parts else tokens[0] - - -def _extract_year(tokens: list[str], title: str) -> int | None: - """Extract a 4-digit year from tokens (only after the title).""" - title_len = len(title.split(".")) - for tok in tokens[title_len:]: - m = _YEAR_RE.fullmatch(tok) - if m: - return int(m.group(1)) - return None - - -def _strip_episode_from_normalised(normalised: str) -> str: - """ - Remove all episode parts (Exx) from a normalised release name, keeping Sxx. - - Oz.S03E01.1080p... → Oz.S03.1080p... - Archer.S14E09E10E11.1080p... → Archer.S14.1080p... - """ - return re.sub(r"(S\d{2})(E\d{2})+", r"\1", normalised, flags=re.IGNORECASE) diff --git a/alfred/domain/release/__init__.py b/alfred/domain/release/__init__.py new file mode 100644 index 0000000..2e96275 --- /dev/null +++ b/alfred/domain/release/__init__.py @@ -0,0 +1,6 @@ +"""Release domain — release name parsing and naming conventions.""" + +from .services import parse_release +from .value_objects import ParsedRelease + +__all__ = ["ParsedRelease", "parse_release"] diff --git a/alfred/domain/release/knowledge.py b/alfred/domain/release/knowledge.py new file mode 100644 index 0000000..4f6dd5c --- /dev/null +++ b/alfred/domain/release/knowledge.py @@ -0,0 +1,121 @@ +"""Release knowledge loader. + +Three-layer merge (lowest → highest priority): + 1. Builtin — alfred/knowledge/release/ + 2. Sites — alfred/knowledge/release/sites/*.yaml (all trackers) + 3. Learned — data/knowledge/release/ (user additions via the learn tool) + +Lists are extended additively, scalars from higher layers win. +""" + +from pathlib import Path + +import alfred as _alfred_pkg +import yaml + +_BUILTIN_ROOT = Path(_alfred_pkg.__file__).parent / "knowledge" / "release" +_SITES_ROOT = _BUILTIN_ROOT / "sites" +_LEARNED_ROOT = Path(_alfred_pkg.__file__).parent.parent / "data" / "knowledge" / "release" + + +def _merge(base: dict, overlay: dict) -> dict: + """Merge overlay into base — lists are extended, scalars from overlay win.""" + result = dict(base) + for key, val in overlay.items(): + if key in result and isinstance(result[key], list) and isinstance(val, list): + result[key] = result[key] + [v for v in val if v not in result[key]] + else: + result[key] = val + return result + + +def _read(path: Path) -> dict: + try: + with open(path, encoding="utf-8") as f: + return yaml.safe_load(f) or {} + except FileNotFoundError: + return {} + + +def _load(filename: str) -> dict: + result = _read(_BUILTIN_ROOT / filename) + result = _merge(result, _read(_LEARNED_ROOT / filename)) + return result + + +def _load_sites() -> dict: + """Merge all site YAML files into a single dict.""" + result: dict = {} + for site_file in sorted(_SITES_ROOT.glob("*.yaml")): + result = _merge(result, _read(site_file)) + return result + + +def load_resolutions() -> set[str]: + return set(_load("resolutions.yaml").get("resolutions", [])) + + +def load_sources() -> set[str]: + return set(_load("sources.yaml").get("sources", [])) + + +def load_codecs() -> set[str]: + return set(_load("codecs.yaml").get("codecs", [])) + + +def load_win_forbidden_chars() -> list[str]: + return _load("filesystem.yaml").get("win_forbidden_chars", []) + + +def load_video_extensions() -> set[str]: + return set(_load("file_extensions.yaml").get("video", [])) + + +def load_non_video_extensions() -> set[str]: + return set(_load("file_extensions.yaml").get("non_video", [])) + + +def load_metadata_extensions() -> set[str]: + return set(_load("file_extensions.yaml").get("metadata", [])) + + +def load_forbidden_chars() -> set[str]: + return set(_load("release_format.yaml").get("forbidden_chars", [])) + + +def load_language_tokens() -> set[str]: + base = {t.upper() for t in _load("languages.yaml").get("tokens", [])} + sites = {t.upper() for t in _load_sites().get("languages", [])} + return base | sites + + +def load_audio() -> dict: + return _load("audio.yaml") + + +def load_video() -> dict: + return _load("video.yaml") + + +def load_editions() -> dict: + base = _load("editions.yaml") + site_tokens = _load_sites().get("editions", {}).get("tokens", []) + if site_tokens: + existing = base.get("tokens", []) + base["tokens"] = existing + [t for t in site_tokens if t not in existing] + return base + + +def load_sources_extra() -> set[str]: + """Additional source tokens from site files.""" + return {t for t in _load_sites().get("sources", [])} + + +def load_hdr_extra() -> set[str]: + """Additional HDR tokens from site files.""" + return {t.upper() for t in _load_sites().get("hdr", [])} + + +def load_media_type_tokens() -> dict: + """Site-specific media type tokens (doc, concert, collection, integrale).""" + return _load_sites().get("media_type_tokens", {}) diff --git a/alfred/domain/release/services.py b/alfred/domain/release/services.py new file mode 100644 index 0000000..fde6aa4 --- /dev/null +++ b/alfred/domain/release/services.py @@ -0,0 +1,484 @@ +"""Release domain — parsing service.""" + +from __future__ import annotations + +from .value_objects import ( + ParsedRelease, + _AUDIO, + _CODECS, + _EDITIONS, + _FORBIDDEN_CHARS, + _HDR_EXTRA, + _LANGUAGE_TOKENS, + _MEDIA_TYPE_TOKENS, + _RESOLUTIONS, + _SOURCES, + _VIDEO_EXTENSIONS, + _VIDEO_META, + _NON_VIDEO_EXTENSIONS, +) + + +def parse_release(name: str) -> ParsedRelease: + """ + Parse a release name and return a ParsedRelease. + + Well-formed names (no forbidden chars) go through full token-level parsing. + Malformed names go through _sanitize() — strip site tags, replace spaces — + then re-checked. Still malformed after sanitization → media_type="unknown", AI handles it. + """ + site_tag = None + + parse_path = "direct" + + if not _is_well_formed(name): + clean, site_tag = _sanitize(name) + if not _is_well_formed(clean): + return ParsedRelease( + raw=name, + normalised=clean, + title=clean, + year=None, + season=None, + episode=None, + episode_end=None, + quality=None, + source=None, + codec=None, + group="UNKNOWN", + tech_string="", + media_type="unknown", + site_tag=site_tag, + parse_path="ai", + ) + name = clean + parse_path = "sanitized" + + tokens = name.split(".") + + season, episode, episode_end = _extract_season_episode(tokens) + quality, source, codec, group, tech_tokens = _extract_tech(tokens) + languages, lang_tokens = _extract_languages(tokens) + audio_codec, audio_channels, audio_tokens = _extract_audio(tokens) + bit_depth, hdr_format, video_tokens = _extract_video_meta(tokens) + edition, edition_tokens = _extract_edition(tokens) + title = _extract_title( + tokens, + tech_tokens | lang_tokens | audio_tokens | video_tokens | edition_tokens, + ) + year = _extract_year(tokens, title) + media_type = _infer_media_type(season, quality, source, codec, year, edition, tokens) + + tech_parts = [p for p in [quality, source, codec] if p] + tech_string = ".".join(tech_parts) + + return ParsedRelease( + raw=name, + normalised=name, + title=title, + year=year, + season=season, + episode=episode, + episode_end=episode_end, + quality=quality, + source=source, + codec=codec, + group=group, + tech_string=tech_string, + media_type=media_type, + site_tag=site_tag, + parse_path=parse_path, + languages=languages, + audio_codec=audio_codec, + audio_channels=audio_channels, + bit_depth=bit_depth, + hdr_format=hdr_format, + edition=edition, + ) + + +def _infer_media_type( + season: int | None, + quality: str | None, + source: str | None, + codec: str | None, + year: int | None, + edition: str | None, + tokens: list[str], +) -> str: + """ + Infer media_type from token-level evidence only (no filesystem access). + + - documentary : DOC token present + - concert : CONCERT token present + - tv_complete : INTEGRALE/COMPLETE token, no season + - tv_show : season token found + - movie : no season, at least one tech marker + - unknown : no conclusive evidence + """ + upper_tokens = {t.upper() for t in tokens} + + doc_tokens = {t.upper() for t in _MEDIA_TYPE_TOKENS.get("doc", [])} + concert_tokens = {t.upper() for t in _MEDIA_TYPE_TOKENS.get("concert", [])} + integrale_tokens = {t.upper() for t in _MEDIA_TYPE_TOKENS.get("integrale", [])} + + if upper_tokens & doc_tokens: + return "documentary" + if upper_tokens & concert_tokens: + return "concert" + if (edition in {"COMPLETE", "INTEGRALE", "COLLECTION"} or upper_tokens & integrale_tokens) and season is None: + return "tv_complete" + if season is not None: + return "tv_show" + if any([quality, source, codec, year]): + return "movie" + return "unknown" + + +def _is_well_formed(name: str) -> bool: + """Return True if name contains no forbidden characters per scene naming rules.""" + return not any(c in name for c in _FORBIDDEN_CHARS) + + +def _sanitize(name: str) -> tuple[str, str | None]: + """ + Attempt to recover a malformed release name. + + Steps (in order): + 1. Strip site tag prefix/suffix [...] + 2. Replace spaces with dots + + Returns (clean_name, site_tag). + """ + s, site_tag = _strip_site_tag(name) + s = s.replace(" ", ".") + return s, site_tag + + +def _strip_site_tag(name: str) -> tuple[str, str | None]: + """ + Strip a site watermark tag from the release name and return (clean_name, tag). + + Handles two positions: + - Prefix: "[ OxTorrent.vc ] The.Title.S01..." + - Suffix: "The.Title.S01...-NTb[TGx]" + + Anything between [...] is treated as a site tag. + Returns (original_name, None) if no tag found. + """ + s = name.strip() + + if s.startswith("["): + close = s.find("]") + if close != -1: + tag = s[1:close].strip() + remainder = s[close + 1:].strip() + if tag and remainder: + return remainder, tag + + if s.endswith("]"): + open_bracket = s.rfind("[") + if open_bracket != -1: + tag = s[open_bracket + 1:-1].strip() + remainder = s[:open_bracket].strip() + if tag and remainder: + return remainder, tag + + return s, None + + +def _normalize(name: str) -> str: + """Replace spaces with dots, collapse multiple dots.""" + s = name.replace(" ", ".") + while ".." in s: + s = s.replace("..", ".") + return s.strip(".") + + +def _parse_season_episode(tok: str) -> tuple[int, int | None, int | None] | None: + """ + Parse a single token as a season/episode marker. + + Handles: S03, S03E01, S03E01E02 + Returns (season, episode, episode_end) or None if not a season token. + """ + upper = tok.upper() + if not (len(upper) >= 3 and upper[0] == "S" and upper[1:3].isdigit()): + return None + + season = int(upper[1:3]) + rest = upper[3:] # everything after Sxx + + if not rest: + return season, None, None + + # Parse one or two Exx segments + episodes: list[int] = [] + while rest.startswith("E") and len(rest) >= 3 and rest[1:3].isdigit(): + episodes.append(int(rest[1:3])) + rest = rest[3:] + + if not episodes: + return None # malformed token like "S03XYZ" + + episode = episodes[0] + episode_end = episodes[1] if len(episodes) >= 2 else None + return season, episode, episode_end + + +def _extract_season_episode(tokens: list[str]) -> tuple[int | None, int | None, int | None]: + for tok in tokens: + parsed = _parse_season_episode(tok) + if parsed is not None: + return parsed + return None, None, None + + +def _extract_tech( + tokens: list[str], +) -> tuple[str | None, str | None, str | None, str, set[str]]: + """ + Extract quality, source, codec, group from tokens. + + Returns (quality, source, codec, group, tech_token_set). + + Group extraction strategy (in priority order): + 1. Token where prefix is a known codec: x265-GROUP + 2. Rightmost token with a dash that isn't a known source + """ + quality: str | None = None + source: str | None = None + codec: str | None = None + group = "UNKNOWN" + tech_tokens: set[str] = set() + + for tok in tokens: + tl = tok.lower() + + if tl in _RESOLUTIONS: + quality = tok + tech_tokens.add(tok) + continue + + if tl in _SOURCES: + source = tok + tech_tokens.add(tok) + continue + + if "-" in tok: + parts = tok.rsplit("-", 1) + # codec-GROUP (highest priority for group) + if parts[0].lower() in _CODECS: + codec = parts[0] + group = parts[1] if parts[1] else "UNKNOWN" + tech_tokens.add(tok) + continue + # source with dash: Web-DL, WEB-DL, etc. + if parts[0].lower() in _SOURCES or tok.lower().replace("-", "") in _SOURCES: + source = tok + tech_tokens.add(tok) + continue + + if tl in _CODECS: + codec = tok + tech_tokens.add(tok) + + # Fallback: rightmost token with a dash that isn't a known source + if group == "UNKNOWN": + for tok in reversed(tokens): + if "-" in tok: + parts = tok.rsplit("-", 1) + tl = tok.lower() + if tl in _SOURCES or tok.lower().replace("-", "") in _SOURCES: + continue + if parts[1]: + group = parts[1] + break + + return quality, source, codec, group, tech_tokens + + +def _is_year_token(tok: str) -> bool: + """Return True if tok is a 4-digit year between 1900 and 2099.""" + return len(tok) == 4 and tok.isdigit() and 1900 <= int(tok) <= 2099 + + +def _extract_title(tokens: list[str], tech_tokens: set[str]) -> str: + """Extract the title portion: everything before the first season/year/tech token.""" + title_parts = [] + for tok in tokens: + if _parse_season_episode(tok) is not None: + break + if _is_year_token(tok): + break + if tok in tech_tokens or tok.lower() in _RESOLUTIONS | _SOURCES | _CODECS: + break + if "-" in tok and any(p.lower() in _CODECS | _SOURCES for p in tok.split("-")): + break + title_parts.append(tok) + + return ".".join(title_parts) if title_parts else tokens[0] + + +def _extract_year(tokens: list[str], title: str) -> int | None: + """Extract a 4-digit year from tokens (only after the title).""" + title_len = len(title.split(".")) + for tok in tokens[title_len:]: + if _is_year_token(tok): + return int(tok) + return None + + +# --------------------------------------------------------------------------- +# Sequence matcher +# --------------------------------------------------------------------------- + +def _match_sequences( + tokens: list[str], + sequences: list[dict], + key: str, +) -> tuple[str | None, set[str]]: + """ + Try to match multi-token sequences against consecutive tokens. + + Returns (matched_value, set_of_matched_tokens) or (None, empty_set). + Sequences must be ordered most-specific first in the YAML. + """ + upper_tokens = [t.upper() for t in tokens] + for seq in sequences: + seq_upper = [s.upper() for s in seq["tokens"]] + n = len(seq_upper) + for i in range(len(upper_tokens) - n + 1): + if upper_tokens[i:i + n] == seq_upper: + matched = set(tokens[i:i + n]) + return seq[key], matched + return None, set() + + +# --------------------------------------------------------------------------- +# Language extraction +# --------------------------------------------------------------------------- + +def _extract_languages(tokens: list[str]) -> tuple[list[str], set[str]]: + """Extract language tokens. Returns (languages, matched_token_set).""" + languages = [] + lang_tokens: set[str] = set() + for tok in tokens: + if tok.upper() in _LANGUAGE_TOKENS: + languages.append(tok.upper()) + lang_tokens.add(tok) + return languages, lang_tokens + + +# --------------------------------------------------------------------------- +# Audio extraction +# --------------------------------------------------------------------------- + +def _extract_audio( + tokens: list[str], +) -> tuple[str | None, str | None, set[str]]: + """ + Extract audio codec and channel layout. + + Returns (audio_codec, audio_channels, matched_token_set). + Sequences are tried first (DTS.HD.MA, TrueHD.Atmos, …), then single tokens. + """ + audio_codec: str | None = None + audio_channels: str | None = None + audio_tokens: set[str] = set() + + known_codecs = {c.upper() for c in _AUDIO.get("codecs", [])} + known_channels = set(_AUDIO.get("channels", [])) + + # Try multi-token sequences first + matched_codec, matched_set = _match_sequences(tokens, _AUDIO.get("sequences", []), "codec") + if matched_codec: + audio_codec = matched_codec + audio_tokens |= matched_set + + # Channel layouts like "5.1" or "7.1" are split into two tokens by normalize — + # detect them as consecutive pairs "X" + "Y" where "X.Y" is a known channel. + # The second token may have a "-GROUP" suffix (e.g. "1-KTH" → strip it). + for i in range(len(tokens) - 1): + second = tokens[i + 1].split("-")[0] + candidate = f"{tokens[i]}.{second}" + if candidate in known_channels and audio_channels is None: + audio_channels = candidate + audio_tokens.add(tokens[i]) + audio_tokens.add(tokens[i + 1]) + + for tok in tokens: + if tok in audio_tokens: + continue + if tok.upper() in known_codecs and audio_codec is None: + audio_codec = tok + audio_tokens.add(tok) + elif tok in known_channels and audio_channels is None: + audio_channels = tok + audio_tokens.add(tok) + + return audio_codec, audio_channels, audio_tokens + + +# --------------------------------------------------------------------------- +# Video metadata extraction (bit depth, HDR) +# --------------------------------------------------------------------------- + +def _extract_video_meta( + tokens: list[str], +) -> tuple[str | None, str | None, set[str]]: + """ + Extract bit depth and HDR format. + + Returns (bit_depth, hdr_format, matched_token_set). + """ + bit_depth: str | None = None + hdr_format: str | None = None + video_tokens: set[str] = set() + + known_hdr = {h.upper() for h in _VIDEO_META.get("hdr", [])} | _HDR_EXTRA + known_depth = {d.lower() for d in _VIDEO_META.get("bit_depth", [])} + + # Try HDR sequences first + matched_hdr, matched_set = _match_sequences(tokens, _VIDEO_META.get("sequences", []), "hdr") + if matched_hdr: + hdr_format = matched_hdr + video_tokens |= matched_set + + for tok in tokens: + if tok in video_tokens: + continue + if tok.upper() in known_hdr and hdr_format is None: + hdr_format = tok.upper() + video_tokens.add(tok) + elif tok.lower() in known_depth and bit_depth is None: + bit_depth = tok.lower() + video_tokens.add(tok) + + return bit_depth, hdr_format, video_tokens + + +# --------------------------------------------------------------------------- +# Edition extraction +# --------------------------------------------------------------------------- + +def _extract_edition(tokens: list[str]) -> tuple[str | None, set[str]]: + """ + Extract release edition (UNRATED, EXTENDED, DIRECTORS.CUT, …). + + Returns (edition, matched_token_set). + """ + known_tokens = {t.upper() for t in _EDITIONS.get("tokens", [])} + + # Try multi-token sequences first + matched_edition, matched_set = _match_sequences( + tokens, _EDITIONS.get("sequences", []), "edition" + ) + if matched_edition: + return matched_edition, matched_set + + for tok in tokens: + if tok.upper() in known_tokens: + return tok.upper(), {tok} + + return None, set() diff --git a/alfred/domain/release/value_objects.py b/alfred/domain/release/value_objects.py new file mode 100644 index 0000000..a56fc8e --- /dev/null +++ b/alfred/domain/release/value_objects.py @@ -0,0 +1,166 @@ +"""Release domain — value objects and token sets.""" + +from __future__ import annotations + +from dataclasses import dataclass + +from .knowledge import ( + load_audio, + load_codecs, + load_editions, + load_forbidden_chars, + load_hdr_extra, + load_language_tokens, + load_media_type_tokens, + load_metadata_extensions, + load_non_video_extensions, + load_resolutions, + load_sources, + load_sources_extra, + load_video, + load_video_extensions, + load_win_forbidden_chars, +) + +# Token sets — loaded once at import time from alfred/knowledge/release/ +_RESOLUTIONS: set[str] = load_resolutions() +_SOURCES: set[str] = load_sources() | load_sources_extra() +_CODECS: set[str] = load_codecs() +_VIDEO_EXTENSIONS: set[str] = load_video_extensions() +_NON_VIDEO_EXTENSIONS: set[str] = load_non_video_extensions() +_METADATA_EXTENSIONS: set[str] = load_metadata_extensions() +_FORBIDDEN_CHARS: set[str] = load_forbidden_chars() +_LANGUAGE_TOKENS: set[str] = load_language_tokens() +_AUDIO: dict = load_audio() +_VIDEO_META: dict = load_video() +_EDITIONS: dict = load_editions() +_HDR_EXTRA: set[str] = load_hdr_extra() +_MEDIA_TYPE_TOKENS: dict = load_media_type_tokens() + +# Translation table for stripping Windows-forbidden characters +_WIN_FORBIDDEN_TABLE = str.maketrans("", "", "".join(load_win_forbidden_chars())) + + +def _sanitize_for_fs(text: str) -> str: + """Remove Windows-forbidden characters from a string.""" + return text.translate(_WIN_FORBIDDEN_TABLE) + + +def _strip_episode_from_normalized(normalized: str) -> str: + """ + Remove all episode parts (Exx) from a normalized release name, keeping Sxx. + + Oz.S03E01.1080p... → Oz.S03.1080p... + Archer.S14E09E10E11.1080p... → Archer.S14.1080p... + """ + tokens = normalized.split(".") + result = [] + for tok in tokens: + upper = tok.upper() + # Token is SxxExx... — keep only the Sxx part + if len(upper) >= 3 and upper[0] == "S" and upper[1:3].isdigit(): + result.append(tok[:3]) # "S" + two digits + else: + result.append(tok) + return ".".join(result) + + +# Keep old names as aliases for backward compatibility during the US English migration +_sanitise_for_fs = _sanitize_for_fs +_strip_episode_from_normalised = _strip_episode_from_normalized + + +@dataclass +class ParsedRelease: + """Structured representation of a parsed release name.""" + + raw: str # original release name (untouched) + normalised: str # dots instead of spaces + title: str # show/movie title (dots, no year/season/tech) + year: int | None # movie year or show start year (from TMDB) + season: int | None # season number (None for movies) + episode: int | None # first episode number (None if season-pack) + episode_end: int | None # last episode for multi-ep (None otherwise) + quality: str | None # 1080p, 2160p, … + source: str | None # WEBRip, BluRay, … + codec: str | None # x265, HEVC, … + group: str # release group, "UNKNOWN" if missing + tech_string: str # quality.source.codec joined with dots + media_type: str = "unknown" # "movie" | "tv_show" | "tv_complete" | "other" | "unknown" + site_tag: str | None = None # site watermark stripped from name, e.g. "TGx", "OxTorrent.vc" + parse_path: str = "direct" # "direct" | "sanitized" | "ai" + languages: list[str] = None # ["MULTI", "VFF"], ["FRENCH"], … + audio_codec: str | None = None # "DTS-HD.MA", "DDP", "EAC3", … + audio_channels: str | None = None # "5.1", "7.1", "2.0", … + bit_depth: str | None = None # "10bit", "8bit", … + hdr_format: str | None = None # "DV", "HDR10", "DV.HDR10", … + edition: str | None = None # "UNRATED", "EXTENDED", "DIRECTORS.CUT", … + + def __post_init__(self): + if self.languages is None: + object.__setattr__(self, "languages", []) + + @property + def is_season_pack(self) -> bool: + return self.season is not None and self.episode is None + + def show_folder_name(self, tmdb_title: str, tmdb_year: int) -> str: + """ + Build the series root folder name. + + Format: {Title}.{Year}.{Tech}-{Group} + Example: Oz.1997.1080p.WEBRip.x265-KONTRAST + """ + title_part = _sanitize_for_fs(tmdb_title).replace(" ", ".") + tech = self.tech_string or "Unknown" + return f"{title_part}.{tmdb_year}.{tech}-{self.group}" + + def season_folder_name(self) -> str: + """ + Build the season subfolder name = normalized release name (no episode). + + Example: Oz.S03.1080p.WEBRip.x265-KONTRAST + For a single-episode release we still strip the episode token so the + folder can hold the whole season. + """ + return _strip_episode_from_normalized(self.normalised) + + def episode_filename(self, tmdb_episode_title: str | None, ext: str) -> str: + """ + Build the episode filename. + + Format: {Title}.{SxxExx}.{EpisodeTitle}.{Tech}-{Group}.{ext} + Example: Oz.S01E01.The.Routine.1080p.WEBRip.x265-KONTRAST.mkv + + If tmdb_episode_title is None, omits the episode title segment. + """ + title_part = _sanitize_for_fs(self.title) + s = f"S{self.season:02d}" if self.season is not None else "" + e = f"E{self.episode:02d}" if self.episode is not None else "" + se = s + e + + ep_title = "" + if tmdb_episode_title: + ep_title = "." + _sanitize_for_fs(tmdb_episode_title).replace(" ", ".") + + tech = self.tech_string or "Unknown" + ext_clean = ext.lstrip(".") + return f"{title_part}.{se}{ep_title}.{tech}-{self.group}.{ext_clean}" + + def movie_folder_name(self, tmdb_title: str, tmdb_year: int) -> str: + """ + Build the movie folder name. + + Format: {Title}.{Year}.{Tech}-{Group} + Example: Inception.2010.1080p.BluRay.x265-GROUP + """ + return self.show_folder_name(tmdb_title, tmdb_year) + + def movie_filename(self, tmdb_title: str, tmdb_year: int, ext: str) -> str: + """ + Build the movie filename (same as folder name + extension). + + Example: Inception.2010.1080p.BluRay.x265-GROUP.mkv + """ + ext_clean = ext.lstrip(".") + return f"{self.movie_folder_name(tmdb_title, tmdb_year)}.{ext_clean}" diff --git a/alfred/domain/shared/media_info.py b/alfred/domain/shared/media_info.py new file mode 100644 index 0000000..f69bf47 --- /dev/null +++ b/alfred/domain/shared/media_info.py @@ -0,0 +1,95 @@ +"""MediaInfo — pure domain dataclass for file-level media metadata.""" + +from __future__ import annotations + +from dataclasses import dataclass, field + + +@dataclass +class AudioTrack: + """A single audio track as reported by ffprobe.""" + + index: int + codec: str | None # aac, ac3, eac3, dts, truehd, flac, … + channels: int | None # 2, 6 (5.1), 8 (7.1), … + channel_layout: str | None # stereo, 5.1, 7.1, … + language: str | None # ISO 639-2: fre, eng, und, … + is_default: bool = False + + +@dataclass +class SubtitleTrack: + """A single subtitle track as reported by ffprobe.""" + + index: int + codec: str | None # subrip, ass, hdmv_pgs_subtitle, … + language: str | None # ISO 639-2: fre, eng, und, … + is_default: bool = False + is_forced: bool = False + + +@dataclass +class MediaInfo: + """ + File-level media metadata extracted by ffprobe. + + All fields are optional — ffprobe may not always report every value. + """ + + # Video + width: int | None = None + height: int | None = None + video_codec: str | None = None # h264, hevc, av1, … + duration_seconds: float | None = None + bitrate_kbps: int | None = None + + # Audio tracks (ordered by stream index) + audio_tracks: list[AudioTrack] = field(default_factory=list) + + # Embedded subtitle tracks + subtitle_tracks: list[SubtitleTrack] = field(default_factory=list) + + @property + def resolution(self) -> str | None: + """ + Best-effort resolution string: 2160p, 1080p, 720p, … + + Width takes priority over height to handle widescreen/cinema crops + (e.g. 1920×960 scope → 1080p, not 720p). + Falls back to height when width is unavailable. + """ + match (self.width, self.height): + case (None, None): + return None + case (w, h) if w is not None: + match True: + case _ if w >= 3840: return "2160p" + case _ if w >= 1920: return "1080p" + case _ if w >= 1280: return "720p" + case _ if w >= 720: return "576p" + case _ if w >= 640: return "480p" + case _: return f"{h}p" if h else f"{w}w" + case (None, h): + match True: + case _ if h >= 2160: return "2160p" + case _ if h >= 1080: return "1080p" + case _ if h >= 720: return "720p" + case _ if h >= 576: return "576p" + case _ if h >= 480: return "480p" + case _: return f"{h}p" + + @property + def audio_languages(self) -> list[str]: + """Unique audio languages across all tracks (ISO 639-2).""" + seen: set[str] = set() + result = [] + for track in self.audio_tracks: + if track.language and track.language not in seen: + seen.add(track.language) + result.append(track.language) + return result + + @property + def is_multi_audio(self) -> bool: + """True if more than one audio language is present.""" + return len(self.audio_languages) > 1 diff --git a/alfred/infrastructure/filesystem/ffprobe.py b/alfred/infrastructure/filesystem/ffprobe.py new file mode 100644 index 0000000..da5e757 --- /dev/null +++ b/alfred/infrastructure/filesystem/ffprobe.py @@ -0,0 +1,98 @@ +"""ffprobe — infrastructure adapter for extracting MediaInfo from a video file.""" + +from __future__ import annotations + +import json +import logging +import subprocess +from pathlib import Path + +from alfred.domain.shared.media_info import AudioTrack, MediaInfo, SubtitleTrack + +logger = logging.getLogger(__name__) + +_FFPROBE_CMD = [ + "ffprobe", + "-v", "quiet", + "-print_format", "json", + "-show_streams", + "-show_format", +] + + +def probe(path: Path) -> MediaInfo | None: + """ + Run ffprobe on path and return a MediaInfo. + + Returns None if ffprobe is not available or the file cannot be probed. + """ + try: + result = subprocess.run( + [*_FFPROBE_CMD, str(path)], + capture_output=True, + text=True, + timeout=30, + ) + except subprocess.TimeoutExpired: + logger.warning("ffprobe timed out on %s", path) + return None + + if result.returncode != 0: + logger.warning("ffprobe failed on %s: %s", path, result.stderr.strip()) + return None + + try: + data = json.loads(result.stdout) + except json.JSONDecodeError: + logger.warning("ffprobe returned invalid JSON for %s", path) + return None + + return _parse(data) + + +def _parse(data: dict) -> MediaInfo: + streams = data.get("streams", []) + fmt = data.get("format", {}) + + info = MediaInfo() + + # Format-level + if "duration" in fmt: + try: + info.duration_seconds = float(fmt["duration"]) + except ValueError: + pass + if "bit_rate" in fmt: + try: + info.bitrate_kbps = int(fmt["bit_rate"]) // 1000 + except ValueError: + pass + + for stream in streams: + codec_type = stream.get("codec_type") + + if codec_type == "video" and info.video_codec is None: + info.video_codec = stream.get("codec_name") + info.width = stream.get("width") + info.height = stream.get("height") + + elif codec_type == "audio": + info.audio_tracks.append(AudioTrack( + index=stream.get("index", len(info.audio_tracks)), + codec=stream.get("codec_name"), + channels=stream.get("channels"), + channel_layout=stream.get("channel_layout"), + language=stream.get("tags", {}).get("language"), + is_default=stream.get("disposition", {}).get("default", 0) == 1, + )) + + elif codec_type == "subtitle": + info.subtitle_tracks.append(SubtitleTrack( + index=stream.get("index", len(info.subtitle_tracks)), + codec=stream.get("codec_name"), + language=stream.get("tags", {}).get("language"), + is_default=stream.get("disposition", {}).get("default", 0) == 1, + is_forced=stream.get("disposition", {}).get("forced", 0) == 1, + )) + + return info diff --git a/alfred/infrastructure/filesystem/find_video.py b/alfred/infrastructure/filesystem/find_video.py new file mode 100644 index 0000000..e91a290 --- /dev/null +++ b/alfred/infrastructure/filesystem/find_video.py @@ -0,0 +1,25 @@ +"""find_video — locate the first video file in a release folder.""" + +from __future__ import annotations + +from pathlib import Path + +from alfred.domain.release.value_objects import _VIDEO_EXTENSIONS + + +def find_video_file(path: Path) -> Path | None: + """ + Return the first video file found at path. + + - If path is a file and is a video — return it directly. + - If path is a folder — scan recursively, return the first video found + (sorted by name for determinism, picks S01E01 before S01E02 etc.). + """ + if path.is_file(): + return path if path.suffix.lower() in _VIDEO_EXTENSIONS else None + + for candidate in sorted(path.rglob("*")): + if candidate.is_file() and candidate.suffix.lower() in _VIDEO_EXTENSIONS: + return candidate + + return None diff --git a/alfred/knowledge/release/audio.yaml b/alfred/knowledge/release/audio.yaml new file mode 100644 index 0000000..8f0f846 --- /dev/null +++ b/alfred/knowledge/release/audio.yaml @@ -0,0 +1,43 @@ +# Audio codec and channel tokens found in scene release names +# +# sequences: multi-token patterns matched left-to-right on consecutive tokens +# Order matters — longest/most specific first. +# codecs: single-token codec identifiers +# channels: single-token channel layout identifiers + +sequences: + - tokens: [DTS, HD, MA] + codec: DTS-HD.MA + - tokens: [DTS, HD] + codec: DTS-HD + - tokens: [DTS, X] + codec: DTS-X + - tokens: [TrueHD, Atmos] + codec: TrueHD.Atmos + - tokens: [DD, Plus] + codec: DDP + - tokens: [DDP, Atmos] + codec: DDP.Atmos + - tokens: [EAC3, Atmos] + codec: EAC3.Atmos + +codecs: + - DTS + - DDP # Dolby Digital Plus (alternate label) + - EAC3 # Dolby Digital Plus (codec name) + - AC3 # Dolby Digital + - DD # Dolby Digital (alternate label) + - TrueHD + - AAC + - FLAC + - OPUS + - MP3 + - PCM + - LPCM + - ATMOS # sometimes appears standalone + +channels: + - "7.1" + - "5.1" + - "2.0" + - "1.0" diff --git a/alfred/knowledge/release/codecs.yaml b/alfred/knowledge/release/codecs.yaml new file mode 100644 index 0000000..25c55dc --- /dev/null +++ b/alfred/knowledge/release/codecs.yaml @@ -0,0 +1,14 @@ +# Known video codec tokens (case-insensitive match) +codecs: + - x264 + - x265 + - h264 + - h265 + - hevc + - avc + - xvid + - divx + - av1 + - vp9 + - h.264 + - h.265 diff --git a/alfred/knowledge/release/editions.yaml b/alfred/knowledge/release/editions.yaml new file mode 100644 index 0000000..a5e4106 --- /dev/null +++ b/alfred/knowledge/release/editions.yaml @@ -0,0 +1,28 @@ +# Release edition and version tokens + +# sequences: multi-token editions matched on consecutive tokens +# tokens: single-token edition identifiers + +sequences: + - tokens: [DIRECTORS, CUT] + edition: DIRECTORS.CUT + - tokens: [EXTENDED, CUT] + edition: EXTENDED.CUT + - tokens: [THEATRICAL, CUT] + edition: THEATRICAL.CUT + +tokens: + - UNRATED + - EXTENDED + - THEATRICAL + - REMASTERED + - PROPER # re-release fixing a technical flaw + - REPACK # re-release fixing packaging issue + - RERIP # re-ripped from source + - READNFO # see NFO for details + - LIMITED + - INTERNAL # group-internal release + - RETAIL + - COMPLETE + - INTEGRALE # French equivalent of COMPLETE (full series) + - COLLECTION # film pack/collection diff --git a/alfred/knowledge/release/file_extensions.yaml b/alfred/knowledge/release/file_extensions.yaml new file mode 100644 index 0000000..6726434 --- /dev/null +++ b/alfred/knowledge/release/file_extensions.yaml @@ -0,0 +1,64 @@ +# File extension classification for media type detection +# +# video — extensions that confirm a video media file +# non_video — extensions that definitively exclude video content (no metadata here) +# metadata — extensions always present alongside releases, ignored in type decision + +video: + - .mkv + - .mp4 + - .avi + - .mov + - .wmv + - .flv + - .m4v + - .ts + - .m2ts + - .vob + - .ogm + - .webm + - .divx + - .xvid + +non_video: + # Disc images + - .iso + - .img + - .bin + - .cue + - .nrg + # Archives + - .rar + - .zip + - .7z + - .tar + - .gz + - .r00 + - .r01 + # Games / console ROMs + - .nsp + - .xci + - .pkg + - .xex + - .rpx + - .apk + # Executables / installers + - .exe + - .msi + - .dmg + - .deb + - .rpm + +metadata: + # Release metadata — always ignored in type detection + - .nfo + - .txt + - .sfv + - .md5 + - .jpg + - .png + - .srt + - .sub + - .idx + - .ass + - .ssa diff --git a/alfred/knowledge/release/filesystem.yaml b/alfred/knowledge/release/filesystem.yaml new file mode 100644 index 0000000..4823b0c --- /dev/null +++ b/alfred/knowledge/release/filesystem.yaml @@ -0,0 +1,10 @@ +# Characters forbidden in filenames on Windows (stripped from display names) +win_forbidden_chars: + - "?" + - ":" + - "*" + - "\"" + - "<" + - ">" + - "|" + - "\\" diff --git a/alfred/knowledge/release/languages.yaml b/alfred/knowledge/release/languages.yaml new file mode 100644 index 0000000..2142214 --- /dev/null +++ b/alfred/knowledge/release/languages.yaml @@ -0,0 +1,44 @@ +# Audio/subtitle language tokens found in scene release names +# These are not always strictly scene-compliant — real-world torrent sites +# use additional tokens (VFF, VFQ, VF2, etc.) that are included here. + +tokens: + # French variants + - FRENCH + - TRUEFRENCH + - VFF # Version Française Française (dubbed in France) + - VFQ # Version Française Québécoise (dubbed in Quebec) + - VF2 # Multi: VFF + VFQ + - VF # Version Française (generic) + - VOST # Version Originale Sous-Titrée + - VOSTFR # Version Originale Sous-Titrée Français + - VOSTSUB # Version Originale Sous-Titrée (alternate) + + # Multi / dual + - MULTI # Multiple audio tracks (usually OV + local dub) + - DUAL # Two audio tracks + - BILINGUAL # Two audio tracks (alternate term) + + # Original version + - VO # Version Originale + - VOF # Version Originale Française + + # English + - ENG + - ENGLISH + + # Other common languages + - SPA + - SPANISH + - GER + - GERMAN + - ITA + - ITALIAN + - POR + - PORTUGUESE + - JAP + - JAPANESE + - KOR + - KOREAN + - CHI + - CHINESE diff --git a/alfred/knowledge/release/release_format.yaml b/alfred/knowledge/release/release_format.yaml new file mode 100644 index 0000000..28af7d8 --- /dev/null +++ b/alfred/knowledge/release/release_format.yaml @@ -0,0 +1,49 @@ +# Scene release naming conventions +# Reference: standard warez scene naming rules +# +# A well-formed release name uses only the characters and structure defined here. +# Anything deviating from this is considered malformed and handed off to the AI. + +# Characters allowed in a token (a-z, A-Z, 0-9) +token_chars: "[A-Za-z0-9]" + +# Valid word separators (only one style per release — no mixing) +separators: + - "." + - "_" + +# Dash is allowed only as a group separator at the end of a tech token: x265-GROUP +group_separator: "-" + +# A release is malformed if it contains any of these +forbidden_chars: + - " " # spaces must be replaced by separator + - "[" + - "]" + - "(" + - ")" + - "{" + - "}" + - "@" + - "#" + - "!" + - "+" + - "=" + - "~" + - "'" + - "%" + - "&" + - "$" + - "^" + - "`" + +# Standard element order (informational — used by AI for context) +element_order: + - title + - year # optional for TV shows + - language # optional: FRENCH, MULTI, VOSTFR, TRUEFRENCH … + - season_episode # optional: S01E01, S01, … + - resolution # optional: 720p, 1080p, 2160p … + - source # optional: BDRip, WEB-DL, HDTV … + - codec # optional: x264, x265, XviD … + - group # after final dash: -NoGroup diff --git a/alfred/knowledge/release/resolutions.yaml b/alfred/knowledge/release/resolutions.yaml new file mode 100644 index 0000000..05ce482 --- /dev/null +++ b/alfred/knowledge/release/resolutions.yaml @@ -0,0 +1,9 @@ +# Known resolution/quality tokens (case-insensitive match) +resolutions: + - 2160p + - 1080p + - 720p + - 576p + - 480p + - 4k + - 8k diff --git a/alfred/knowledge/release/sites/c411.yaml b/alfred/knowledge/release/sites/c411.yaml new file mode 100644 index 0000000..2bd579e --- /dev/null +++ b/alfred/knowledge/release/sites/c411.yaml @@ -0,0 +1,39 @@ +# c411.org site-specific release naming conventions +# Source: https://c411.org/wiki/nommage +# +# This file extends the base knowledge files with tokens and patterns +# specific to this tracker. Merged at runtime with the base knowledge. + +languages: + - VFI # Version Française Internationale + - VOF # Version Originale Française + - FANSUB # Fan-subtitled release + +sources: + - 4KLight # HDLight variant for 4K + - HDLight # Compressed BluRay (custom source) + - REMUX # Lossless remux from disc + - BDMV # Full Blu-ray disc structure + - UHD # UHD BluRay (used with BluRay) + +hdr: + - HDR10PLUS # HDR10+ (alternate spelling without +) + +editions: + tokens: + - IMAX + - UNCENSORED + - CUSTOM # custom color grading / encoding + - PROPER + - REPACK + +# Site-specific media type tokens +media_type_tokens: + doc: + - DOC # Documentary marker + concert: + - CONCERT + collection: + - COLLECTION # Film pack/collection + integrale: + - INTEGRALE # Complete series (French term for COMPLETE) diff --git a/alfred/knowledge/release/sources.yaml b/alfred/knowledge/release/sources.yaml new file mode 100644 index 0000000..3c7b8eb --- /dev/null +++ b/alfred/knowledge/release/sources.yaml @@ -0,0 +1,21 @@ +# Known release source tokens (case-insensitive match) +sources: + - bluray + - blu-ray + - bdrip + - brrip + - webrip + - web-rip + - webdl + - web-dl + - web + - hdtv + - hdrip + - dvdrip + - dvd + - vodrip + - amzn + - nf + - dsnp + - hmax + - atvp diff --git a/alfred/knowledge/release/video.yaml b/alfred/knowledge/release/video.yaml new file mode 100644 index 0000000..a61c03a --- /dev/null +++ b/alfred/knowledge/release/video.yaml @@ -0,0 +1,29 @@ +# Video encoding metadata tokens: bit depth, HDR formats +# +# sequences: multi-token HDR patterns, most specific first +# hdr: single-token HDR identifiers +# bit_depth: single-token bit depth identifiers + +sequences: + - tokens: [DV, HDR10] + hdr: DV.HDR10 + - tokens: [DV, HDR] + hdr: DV.HDR + - tokens: [HDR, HDR10Plus] + hdr: HDR10+ + - tokens: [HDR10, Plus] + hdr: HDR10+ + +hdr: + - DV # Dolby Vision + - HDR10 + - HDR10Plus + - HDR + - HLG # Hybrid Log-Gamma + +bit_depth: + - 10bit + - 10Bit + - 8bit + - 8Bit + - 12bit diff --git a/testing/parse_release.py b/testing/parse_release.py new file mode 100644 index 0000000..1e13f2f --- /dev/null +++ b/testing/parse_release.py @@ -0,0 +1,229 @@ +#!/usr/bin/env python3 +""" +parse_release.py — Test ParsedRelease interactively or via CLI args. + +Usage: + uv run testing/parse_release.py "Oz.S03.1080p.WEBRip.x265-KONTRAST" + uv run testing/parse_release.py "Oz.S03.1080p.WEBRip.x265-KONTRAST" --tmdb + uv run testing/parse_release.py "Inception.2010.1080p.BluRay.x265-GROUP" --tmdb-title "Inception" --tmdb-year 2010 + uv run testing/parse_release.py --interactive +""" + +import argparse +import sys +from pathlib import Path + +_PROJECT_ROOT = Path(__file__).resolve().parents[1] +if str(_PROJECT_ROOT) not in sys.path: + sys.path.insert(0, str(_PROJECT_ROOT)) + +# --------------------------------------------------------------------------- +# Colours +# --------------------------------------------------------------------------- + +RESET = "\033[0m" +BOLD = "\033[1m" +DIM = "\033[2m" +GREEN = "\033[32m" +YELLOW = "\033[33m" +RED = "\033[31m" +CYAN = "\033[36m" +BLUE = "\033[34m" + +USE_COLOR = True + + +def c(text: str, *codes: str) -> str: + if not USE_COLOR: + return str(text) + return "".join(codes) + str(text) + RESET + + +def kv(key: str, val: str, color: str = CYAN) -> None: + print(f" {c(key + ':', BOLD)} {c(val, color)}") + + +def hr() -> None: + print(c("─" * 64, DIM)) + + +# --------------------------------------------------------------------------- +# TMDB lookup +# --------------------------------------------------------------------------- + +def _fetch_tmdb(title: str) -> tuple[str | None, int | None]: + """ + Call TMDBClient.search_media() and return (canonical_title, year). + Returns (None, None) on failure. + """ + try: + from alfred.infrastructure.api.tmdb import TMDBClient + client = TMDBClient() + result = client.search_media(title) + year: int | None = None + if result.release_date: + try: + year = int(result.release_date[:4]) + except (ValueError, IndexError): + pass + print(c(f" TMDB → {result.title} ({year}) [{result.media_type}] imdb={result.imdb_id}", DIM)) + return result.title, year + except Exception as e: + print(c(f" TMDB lookup failed: {e}", YELLOW)) + return None, None + + +# --------------------------------------------------------------------------- +# Display +# --------------------------------------------------------------------------- + +def _show(release_name: str, tmdb_title: str | None, tmdb_year: int | None, + tmdb_episode_title: str | None, ext: str) -> None: + from alfred.domain.release import parse_release + + p = parse_release(release_name) + + # Auto-fetch TMDB if requested and not already provided + if not (tmdb_title and tmdb_year): + fetched_title, fetched_year = _fetch_tmdb(p.title.replace(".", " ")) + tmdb_title = tmdb_title or fetched_title + tmdb_year = tmdb_year or fetched_year + + print() + print(c("━" * 64, BOLD)) + print(c(f" ParsedRelease — {p.raw}", BOLD, CYAN)) + print(c("━" * 64, BOLD)) + + # Core fields + hr() + kv("raw", p.raw) + kv("normalised", p.normalised) + kv("title", p.title) + kv("year", str(p.year) if p.year else c("None", DIM)) + kv("season", str(p.season) if p.season is not None else c("None", DIM)) + kv("episode", str(p.episode) if p.episode is not None else c("None", DIM)) + kv("episode_end", str(p.episode_end) if p.episode_end is not None else c("None", DIM)) + kv("quality", p.quality or c("None", DIM)) + kv("source", p.source or c("None", DIM)) + kv("codec", p.codec or c("None", DIM)) + kv("group", p.group, YELLOW if p.group == "UNKNOWN" else GREEN) + kv("tech_string", p.tech_string or c("(empty)", DIM)) + + # Derived booleans + hr() + kv("is_movie", c(str(p.is_movie), GREEN if p.is_movie else DIM)) + kv("is_season_pack", c(str(p.is_season_pack), GREEN if p.is_season_pack else DIM)) + + # Generated names + hr() + title_for_names = tmdb_title or p.title.replace(".", " ") + year_for_names = tmdb_year or p.year or 0 + + if p.is_movie: + kv("movie_folder_name", p.movie_folder_name(title_for_names, year_for_names)) + kv("movie_filename", p.movie_filename(title_for_names, year_for_names, ext)) + else: + kv("show_folder_name", p.show_folder_name(title_for_names, year_for_names)) + kv("season_folder_name", p.season_folder_name()) + if not p.is_season_pack: + kv("episode_filename", p.episode_filename(tmdb_episode_title, ext)) + else: + kv("episode_filename", c("(season pack — no episode filename)", DIM)) + + if tmdb_title or tmdb_year or tmdb_episode_title: + hr() + print(c(" TMDB data used:", DIM)) + if tmdb_title: kv(" tmdb_title", tmdb_title) + if tmdb_year: kv(" tmdb_year", str(tmdb_year)) + if tmdb_episode_title: kv(" tmdb_episode_title", tmdb_episode_title) + + print(c("━" * 64, BOLD)) + print() + + +# --------------------------------------------------------------------------- +# Interactive mode +# --------------------------------------------------------------------------- + +def _interactive() -> None: + print(c("\n Alfred — Release Parser REPL", BOLD, CYAN)) + print(c(" Type a release name, or 'q' to quit.", DIM)) + print(c(" Inline overrides: ::title=Oz ::year=1997 ::ep=The.Routine ::ext=.mkv\n", DIM)) + + while True: + try: + raw = input(c(" release> ", BOLD)).strip() + except (EOFError, KeyboardInterrupt): + print() + break + + if not raw or raw.lower() in ("q", "quit", "exit"): + break + + # Parse inline overrides: "Oz.S03E01... ::title=Oz ::year=1997 ::tmdb" + parts = raw.split("::") + release = parts[0].strip() + overrides: dict[str, str] = {} + for part in parts[1:]: + part = part.strip() + if "=" in part: + k, _, v = part.partition("=") + overrides[k.strip()] = v.strip() + else: + overrides[part] = "1" # flag-style: ::tmdb + + tmdb_title = overrides.get("title") + tmdb_year = int(overrides["year"]) if "year" in overrides else None + tmdb_episode_title = overrides.get("ep") + ext = overrides.get("ext", ".mkv") + try: + _show(release, tmdb_title, tmdb_year, tmdb_episode_title, ext) + except Exception as e: + print(c(f" Error: {e}", RED)) + + +# --------------------------------------------------------------------------- +# CLI +# --------------------------------------------------------------------------- + +def main() -> None: + global USE_COLOR + + parser = argparse.ArgumentParser( + description="Test ParsedRelease from domain/release/release_parser.py", + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + parser.add_argument("release", nargs="?", help="Release name to parse") + parser.add_argument("-i", "--interactive", action="store_true", + help="Interactive REPL mode") + parser.add_argument("--tmdb-title", metavar="TITLE", + help="Override TMDB title for name generation") + parser.add_argument("--tmdb-year", metavar="YEAR", type=int, + help="Override TMDB year for name generation") + parser.add_argument("--episode-title", metavar="TITLE", + help="TMDB episode title for episode_filename()") + parser.add_argument("--ext", default=".mkv", metavar="EXT", + help="File extension for filename generation (default: .mkv)") + parser.add_argument("--no-color", action="store_true") + args = parser.parse_args() + + if args.no_color or not sys.stdout.isatty(): + USE_COLOR = False + + if args.interactive: + _interactive() + return + + if not args.release: + parser.print_help() + sys.exit(1) + + try: + _show(args.release, args.tmdb_title, args.tmdb_year, args.episode_title, args.ext) + except Exception as e: + print(c(f"Error: {e}", RED), file=sys.stderr) + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/testing/probe_video.py b/testing/probe_video.py new file mode 100644 index 0000000..8decbe7 --- /dev/null +++ b/testing/probe_video.py @@ -0,0 +1,160 @@ +#!/usr/bin/env python3 +""" +probe_video.py — Display MediaInfo extracted by ffprobe for a video file. + +Usage: + uv run testing/probe_video.py /path/to/video.mkv + uv run testing/probe_video.py /path/to/video.mkv --no-color +""" + +import argparse +import sys +from pathlib import Path + +_PROJECT_ROOT = Path(__file__).resolve().parents[1] +if str(_PROJECT_ROOT) not in sys.path: + sys.path.insert(0, str(_PROJECT_ROOT)) + +# --------------------------------------------------------------------------- +# Colours +# --------------------------------------------------------------------------- + +RESET = "\033[0m" +BOLD = "\033[1m" +DIM = "\033[2m" +GREEN = "\033[32m" +YELLOW = "\033[33m" +RED = "\033[31m" +CYAN = "\033[36m" +BLUE = "\033[34m" + +USE_COLOR = True + + +def c(text: str, *codes: str) -> str: + if not USE_COLOR: + return str(text) + return "".join(codes) + str(text) + RESET + + +def kv(key: str, val: str, indent: int = 4, color: str = CYAN) -> None: + print(f"{' ' * indent}{c(key + ':', BOLD)} {c(val, color)}") + + +def section(title: str) -> None: + print() + print(f" {c('▸ ' + title, BOLD, BLUE)}") + + +def hr() -> None: + print(c("─" * 70, DIM)) + + +# --------------------------------------------------------------------------- +# Formatting helpers +# --------------------------------------------------------------------------- + +def fmt_duration(seconds: float) -> str: + h = int(seconds // 3600) + m = int((seconds % 3600) // 60) + s = int(seconds % 60) + if h: + return f"{h}h {m:02d}m {s:02d}s" + return f"{m}m {s:02d}s" + + +def fmt_channels(channels: int | None, layout: str | None) -> str: + parts = [] + if channels is not None: + parts.append(str(channels) + "ch") + if layout: + parts.append(f"({layout})") + return " ".join(parts) if parts else "—" + + +def flag(val: bool) -> str: + return c("yes", GREEN) if val else c("no", DIM) + + +# --------------------------------------------------------------------------- +# Main +# --------------------------------------------------------------------------- + +def main() -> None: + global USE_COLOR + + parser = argparse.ArgumentParser(description="Probe a video file with ffprobe") + parser.add_argument("file", help="Path to the video file") + parser.add_argument("--no-color", action="store_true") + args = parser.parse_args() + + if args.no_color or not sys.stdout.isatty(): + USE_COLOR = False + + path = Path(args.file) + if not path.exists(): + print(c(f"Error: {path} does not exist", RED), file=sys.stderr) + sys.exit(1) + + from alfred.infrastructure.filesystem.ffprobe import probe + + info = probe(path) + if info is None: + print(c("Error: ffprobe failed to probe the file", RED), file=sys.stderr) + sys.exit(1) + + print() + print(c("━" * 70, BOLD)) + print(c(f" {path.name}", BOLD, CYAN)) + print(c(f" {path}", DIM)) + print(c("━" * 70, BOLD)) + + # --- Video --- + section("Video") + kv("codec", info.video_codec or c("—", DIM)) + kv("resolution", info.resolution or c("—", DIM)) + if info.width and info.height: + kv("dimensions", f"{info.width} × {info.height}") + if info.duration_seconds is not None: + kv("duration", fmt_duration(info.duration_seconds)) + if info.bitrate_kbps is not None: + kv("bitrate", f"{info.bitrate_kbps} kbps") + + # --- Audio --- + section(f"Audio {c(str(len(info.audio_tracks)) + ' track(s)', DIM)}") + if not info.audio_tracks: + print(f" {c('no audio tracks found', DIM)}") + for track in info.audio_tracks: + lang = track.language or "und" + default_marker = f" {c('default', GREEN, DIM)}" if track.is_default else "" + print(f" {c(f'[{track.index}]', BOLD)} {c(lang, YELLOW)}{default_marker}") + kv("codec", track.codec or c("—", DIM), indent=8) + kv("channels", fmt_channels(track.channels, track.channel_layout), indent=8) + + # --- Subtitles --- + section(f"Subtitles {c(str(len(info.subtitle_tracks)) + ' track(s)', DIM)}") + if not info.subtitle_tracks: + print(f" {c('no embedded subtitle tracks', DIM)}") + for track in info.subtitle_tracks: + lang = track.language or "und" + markers = [] + if track.is_default: + markers.append(c("default", GREEN, DIM)) + if track.is_forced: + markers.append(c("forced", YELLOW, DIM)) + marker_str = (" " + " ".join(markers)) if markers else "" + print(f" {c(f'[{track.index}]', BOLD)} {c(lang, YELLOW)}{marker_str}") + kv("codec", track.codec or c("—", DIM), indent=8) + + # --- Summary --- + print() + hr() + multi = c("yes", GREEN) if info.is_multi_audio else c("no", DIM) + langs = ", ".join(info.audio_languages) if info.audio_languages else c("—", DIM) + print(f" {c('multi-audio:', BOLD)} {multi} {c('languages:', BOLD)} {c(langs, CYAN)}") + hr() + print() + + +if __name__ == "__main__": + main() diff --git a/testing/recognize_folders_in_downloads.py b/testing/recognize_folders_in_downloads.py new file mode 100644 index 0000000..3ec0f31 --- /dev/null +++ b/testing/recognize_folders_in_downloads.py @@ -0,0 +1,203 @@ +#!/usr/bin/env python3 +""" +recognize_folders_in_downloads.py — Parse every folder/file in the downloads directory. + +Usage: + uv run testing/recognize_folders_in_downloads.py + uv run testing/recognize_folders_in_downloads.py --path /mnt/testipool/downloads + uv run testing/recognize_folders_in_downloads.py --failures-only + uv run testing/recognize_folders_in_downloads.py --successes-only +""" + +import argparse +import sys +from pathlib import Path + +_PROJECT_ROOT = Path(__file__).resolve().parents[1] +if str(_PROJECT_ROOT) not in sys.path: + sys.path.insert(0, str(_PROJECT_ROOT)) + +# --------------------------------------------------------------------------- +# Colours +# --------------------------------------------------------------------------- + +RESET = "\033[0m" +BOLD = "\033[1m" +DIM = "\033[2m" +GREEN = "\033[32m" +YELLOW = "\033[33m" +RED = "\033[31m" +CYAN = "\033[36m" + +USE_COLOR = True + + +def c(text: str, *codes: str) -> str: + if not USE_COLOR: + return str(text) + return "".join(codes) + str(text) + RESET + + +def kv(key: str, val: str, indent: int = 4, color: str = CYAN) -> None: + print(f"{' ' * indent}{c(key + ':', BOLD)} {c(val, color)}") + + +def hr() -> None: + print(c("─" * 70, DIM)) + + +# --------------------------------------------------------------------------- +# Parsing quality check +# --------------------------------------------------------------------------- + +def _assess(p) -> list[str]: + """Return a list of warning strings for fields that look wrong.""" + if p.media_type in ("other", "unknown"): + return [] + warnings = [] + if p.group == "UNKNOWN": + warnings.append("group not found") + if not p.quality: + warnings.append("resolution not found") + if not p.codec: + warnings.append("codec not found") + if not p.title or p.title == p.normalised: + warnings.append("title extraction likely wrong") + return warnings + + +# --------------------------------------------------------------------------- +# Main +# --------------------------------------------------------------------------- + +def main() -> None: + global USE_COLOR + + parser = argparse.ArgumentParser(description="Recognize release folders in downloads") + parser.add_argument("--path", default="/mnt/testipool/downloads", + help="Downloads directory (default: /mnt/testipool/downloads)") + parser.add_argument("--failures-only", action="store_true", + help="Show only entries with warnings") + parser.add_argument("--successes-only", action="store_true", + help="Show only fully parsed entries") + parser.add_argument("--no-color", action="store_true") + args = parser.parse_args() + + if args.no_color or not sys.stdout.isatty(): + USE_COLOR = False + + downloads = Path(args.path) + if not downloads.exists(): + print(c(f"Error: {downloads} does not exist", RED), file=sys.stderr) + sys.exit(1) + + from alfred.domain.release.services import parse_release + from alfred.application.filesystem.detect_media_type import detect_media_type + from alfred.application.filesystem.enrich_from_probe import enrich_from_probe + from alfred.infrastructure.filesystem.find_video import find_video_file + from alfred.infrastructure.filesystem.ffprobe import probe + + entries = sorted(downloads.iterdir(), key=lambda p: p.name.lower()) + total = len(entries) + ok_count = 0 + warn_count = 0 + + print() + print(c("━" * 70, BOLD)) + print(c(f" Downloads — {downloads}", BOLD, CYAN)) + print(c(f" {total} entries", DIM)) + print(c("━" * 70, BOLD)) + + for entry in entries: + name = entry.name + + try: + p = parse_release(name) + p.media_type = detect_media_type(p, entry) + if p.media_type not in ("unknown", "other"): + video_file = find_video_file(entry) + if video_file: + media_info = probe(video_file) + if media_info: + enrich_from_probe(p, media_info) + warnings = _assess(p) + except Exception as e: + warnings = [f"parse error: {e}"] + p = None + + has_warnings = bool(warnings) + + if args.failures_only and not has_warnings: + continue + if args.successes_only and has_warnings: + continue + + print() + path_label = "" + if p: + path_label = { + "direct": c("direct", GREEN, DIM), + "sanitized": c("sanitized", YELLOW), + "ai": c("ai", RED), + }.get(p.parse_path, p.parse_path) + + if has_warnings: + warn_count += 1 + print(f" {c('⚠', YELLOW, BOLD)} {c(name, YELLOW)} {path_label}") + else: + ok_count += 1 + print(f" {c('✓', GREEN, BOLD)} {c(name, BOLD)} {path_label}") + + if p: + kind = { + "movie": "movie", + "tv_show": "season pack" if p.is_season_pack else "episode", + "tv_complete": c("tv complete", CYAN), + "documentary": c("documentary", CYAN), + "concert": c("concert", CYAN), + "other": c("other", RED), + "unknown": c("unknown", YELLOW), + }.get(p.media_type, p.media_type) + kv("type", kind) + kv("title", p.title) + if p.season is not None: + ep = f"E{p.episode:02d}" if p.episode is not None else "—" + kv("season/ep", f"S{p.season:02d} / {ep}") + if p.year: + kv("year", str(p.year)) + if p.languages: + kv("langs", " ".join(p.languages)) + kv("quality", p.quality or c("—", DIM)) + kv("source", p.source or c("—", DIM)) + kv("codec", p.codec or c("—", DIM)) + if p.audio_codec: + ch = f" {p.audio_channels}" if p.audio_channels else "" + kv("audio", f"{p.audio_codec}{ch}") + if p.bit_depth or p.hdr_format: + hdr_parts = [x for x in [p.bit_depth, p.hdr_format] if x] + kv("hdr/depth", " ".join(hdr_parts)) + if p.edition: + kv("edition", p.edition, color=YELLOW) + kv("group", p.group, + color=YELLOW if p.group == "UNKNOWN" else GREEN) + if p.site_tag: + kv("site tag", p.site_tag, color=YELLOW) + + if warnings: + for w in warnings: + print(f" {c('→ ' + w, YELLOW)}") + + # Summary + print() + hr() + skipped = total - ok_count - warn_count + print(f" {c('Total:', BOLD)} {total} " + f"{c(str(ok_count) + ' ok', GREEN, BOLD)} " + f"{c(str(warn_count) + ' warnings', YELLOW, BOLD)}" + + (f" {c(str(skipped) + ' filtered', DIM)}" if skipped else "")) + hr() + print() + + +if __name__ == "__main__": + main() diff --git a/testing/workflows/run_workflow.py b/testing/workflows/run_workflow.py index 208f57e..ab46fbe 100755 --- a/testing/workflows/run_workflow.py +++ b/testing/workflows/run_workflow.py @@ -79,24 +79,67 @@ def kv(key: str, val: str) -> None: # Dry-run tool stubs # --------------------------------------------------------------------------- -def _dry_list_folder(folder_type: str, path: str = ".") -> dict[str, Any]: - return { - "status": "ok", - "folder_type": folder_type, - "path": path, - "entries": ["[dry-run — no real listing]"], - "count": 1, - } +def _real_list_folder(folder_type: str, path: str = ".") -> dict[str, Any]: + """Call the real list_folder (read-only, safe in dry-run).""" + # TODO: remove hardcoded fallback once download path is configured in LTM + _HARDCODED_DOWNLOAD_ROOT = "/mnt/testipool/downloads" + + try: + from alfred.infrastructure.persistence import get_memory, init_memory + try: + get_memory() + except Exception: + init_memory() + from alfred.agent.tools.filesystem import list_folder + result = list_folder(folder_type=folder_type, path=path) + if result.get("status") == "error" and folder_type == "download": + raise RuntimeError(result.get("message", "not configured")) + return result + except Exception as e: + if folder_type == "download": + warn(f"list_folder: {e} — using hardcoded download root: {_HARDCODED_DOWNLOAD_ROOT}") + import os + resolved = os.path.join(_HARDCODED_DOWNLOAD_ROOT, path) if path != "." else _HARDCODED_DOWNLOAD_ROOT + try: + entries = sorted(os.listdir(resolved)) + except OSError as oe: + return {"status": "error", "error": "os_error", "message": str(oe)} + return { + "status": "ok", + "folder_type": folder_type, + "path": resolved, + "entries": entries, + "count": len(entries), + } + warn(f"list_folder: filesystem unavailable ({e}), falling back to stub") + return { + "status": "ok", + "folder_type": folder_type, + "path": path, + "entries": ["[stub — filesystem unavailable]"], + "count": 1, + } -def _dry_find_media_imdb_id(**kwargs) -> dict[str, Any]: - return { - "status": "ok", - "imdb_id": kwargs.get("imdb_id") or "tt0000000", - "title": "Dry Run Show", - "type": "tv_show", - "year": 2024, - } +def _real_find_media_imdb_id(media_title: str, **kwargs) -> dict[str, Any]: + """Call the real TMDB API even in dry-run (read-only, no filesystem side effects).""" + try: + from alfred.infrastructure.persistence import get_memory, init_memory + try: + get_memory() + except Exception: + init_memory() + from alfred.agent.tools.api import find_media_imdb_id + return find_media_imdb_id(media_title=media_title) + except Exception as e: + warn(f"find_media_imdb_id: TMDB unavailable ({e}), falling back to stub") + return { + "status": "ok", + "imdb_id": "tt0000000", + "title": media_title, + "media_type": "tv_show", + "year": 2024, + } def _dry_resolve_destination( @@ -107,7 +150,7 @@ def _dry_resolve_destination( tmdb_episode_title: str | None = None, confirmed_folder: str | None = None, ) -> dict[str, Any]: - from alfred.domain.media.release_parser import parse_release + from alfred.domain.release import parse_release parsed = parse_release(release_name) ext = Path(source_file).suffix if parsed.is_movie: @@ -170,8 +213,8 @@ def _dry_create_seed_links(library_file: str, original_download_folder: str) -> DRY_RUN_TOOLS: dict[str, Any] = { - "list_folder": _dry_list_folder, - "find_media_imdb_id": _dry_find_media_imdb_id, + "list_folder": _real_list_folder, + "find_media_imdb_id": _real_find_media_imdb_id, "resolve_destination": _dry_resolve_destination, "move_media": _dry_move_media, "manage_subtitles": _dry_manage_subtitles, @@ -316,10 +359,22 @@ class WorkflowRunner: self.step_results.append({"id": step_id, "result": {"status": "error", "error": str(e)}}) return - self._print_result(result) + self._print_result(result, tool_name=tool_name) self.context[step_id] = result self.step_results.append({"id": step_id, "result": result}) + # After list_downloads: confirm the requested media folder exists in downloads + if tool_name == "list_folder" and result.get("status") == "ok" and self.args.source: + folder_path = result.get("path", "") + entries = result.get("entries", []) + if self.args.source in entries: + media_folder = str(Path(folder_path) / self.args.source) + self.context["media_folder"] = media_folder + print() + print(f" {c('Dossier media trouvé:', BOLD, GREEN)} {c(media_folder, CYAN, BOLD)}") + else: + warn(f"Dossier '{self.args.source}' introuvable dans {folder_path}") + def _build_kwargs(self, tool_name: str, step: dict) -> dict[str, Any]: """Build tool kwargs from step params + CLI args + previous context.""" # Start from step-level params (static defaults from YAML) @@ -335,12 +390,13 @@ class WorkflowRunner: kwargs["imdb_id"] = a.imdb_id elif tool_name == "resolve_destination": + media_folder = self.context.get("media_folder") if a.release: kwargs["release_name"] = a.release elif a.source: - kwargs.setdefault("release_name", Path(a.source).parent.name) - if a.source: - kwargs["source_file"] = a.source + kwargs.setdefault("release_name", a.source) + if media_folder: + kwargs["source_file"] = media_folder if a.tmdb_title: kwargs["tmdb_title"] = a.tmdb_title if a.tmdb_year: @@ -351,16 +407,18 @@ class WorkflowRunner: elif tool_name == "move_media": # If resolve_destination ran, use its library_file as destination resolved = self.context.get("resolve_destination", {}) - if a.source: - kwargs["source"] = a.source + media_folder = self.context.get("media_folder") + if media_folder: + kwargs["source"] = media_folder dest = a.dest or resolved.get("library_file") if dest: kwargs["destination"] = dest elif tool_name == "manage_subtitles": resolved = self.context.get("resolve_destination", {}) - if a.source: - kwargs["source_video"] = a.source + media_folder = self.context.get("media_folder") + if media_folder: + kwargs["source_video"] = media_folder dest = a.dest or resolved.get("library_file") if dest: kwargs["destination_video"] = dest @@ -372,12 +430,16 @@ class WorkflowRunner: kwargs["library_file"] = library_file if a.download_folder: kwargs["original_download_folder"] = a.download_folder - elif a.source: - kwargs.setdefault("original_download_folder", str(Path(a.source).parent)) + else: + # Use the resolved folder path from list_downloads context + list_result = self.context.get("list_downloads", {}) + folder_path = list_result.get("path") + if folder_path: + kwargs.setdefault("original_download_folder", folder_path) return kwargs - def _print_result(self, result: dict) -> None: + def _print_result(self, result: dict, tool_name: str = "") -> None: status = result.get("status", "?") if status == "ok": ok(f"status={c('ok', GREEN)}") @@ -387,6 +449,11 @@ class WorkflowRunner: err(f"status={c(status, RED)} error={result.get('error')} msg={result.get('message')}") return + # Highlight resolved folder path for list_folder + if tool_name == "list_folder" and result.get("path"): + print() + print(f" {c('Dossier résolu:', BOLD, GREEN)} {c(result['path'], CYAN, BOLD)}") + # Pretty-print notable fields skip = {"status", "error", "message"} for k, v in result.items(): @@ -420,8 +487,8 @@ def parse_args() -> argparse.Namespace: help="Simulate steps without executing tools (default)") parser.add_argument("--live", action="store_true", help="Actually execute tools against the real filesystem") - parser.add_argument("--source", metavar="PATH", - help="Source video file (in download folder)") + parser.add_argument("--source", metavar="FOLDER_NAME", + help="Release folder name inside the download root (e.g. Oz.S03.1080p.WEBRip.x265-KONTRAST)") parser.add_argument("--dest", metavar="PATH", help="Destination video file (in library, overrides resolve_destination)") parser.add_argument("--download-folder", metavar="PATH", diff --git a/tests/domain/test_release_parser.py b/tests/domain/test_release_parser.py index 8bc0fc9..bf737ed 100644 --- a/tests/domain/test_release_parser.py +++ b/tests/domain/test_release_parser.py @@ -1,5 +1,5 @@ """ -Tests for alfred.domain.media.release_parser +Tests for alfred.domain.release.release_parser Real-data cases sourced from /mnt/testipool/downloads/. Covers: parsing, normalisation, naming methods, edge cases. @@ -7,13 +7,9 @@ Covers: parsing, normalisation, naming methods, edge cases. import pytest -from alfred.domain.media.release_parser import ( - ParsedRelease, - _normalise, - _sanitise_for_fs, - _strip_episode_from_normalised, - parse_release, -) +from alfred.domain.release import ParsedRelease, parse_release +from alfred.domain.release.services import _normalise +from alfred.domain.release.value_objects import _sanitise_for_fs, _strip_episode_from_normalised # ---------------------------------------------------------------------------