From 249c5de76aceeb3fa9395864b03845747ecfc17c Mon Sep 17 00:00:00 2001 From: Francwa Date: Mon, 11 May 2026 21:33:37 +0200 Subject: [PATCH] feat: major architectural refactor - Refactor memory system (episodic/STM/LTM with components) - Implement complete subtitle domain (scanner, matcher, placer) - Add YAML workflow infrastructure - Externalize knowledge base (patterns, release groups) - Add comprehensive testing suite - Create manual testing CLIs --- README.md | 524 ++++++---------- alfred/agent/prompts.py | 30 +- alfred/agent/registry.py | 5 + alfred/agent/tools/filesystem.py | 192 +++++- alfred/agent/workflows/__init__.py | 3 + alfred/agent/workflows/loader.py | 52 ++ alfred/agent/workflows/manage_subtitles.yaml | 69 +++ alfred/agent/workflows/organize_media.yaml | 82 +++ alfred/application/filesystem/__init__.py | 22 +- .../filesystem/create_seed_links.py | 54 ++ alfred/application/filesystem/dto.py | 150 ++++- .../filesystem/manage_subtitles.py | 258 ++++++++ alfred/application/filesystem/move_media.py | 43 ++ .../filesystem/resolve_destination.py | 246 ++++++++ alfred/domain/media/__init__.py | 5 + alfred/domain/media/release_parser.py | 306 ++++++++++ alfred/domain/subtitles/__init__.py | 37 +- alfred/domain/subtitles/aggregates.py | 90 +++ alfred/domain/subtitles/entities.py | 145 +++-- alfred/domain/subtitles/knowledge/__init__.py | 4 + alfred/domain/subtitles/knowledge/base.py | 151 +++++ alfred/domain/subtitles/knowledge/loader.py | 131 ++++ alfred/domain/subtitles/scanner.py | 221 +++++++ alfred/domain/subtitles/services/__init__.py | 13 + .../domain/subtitles/services/identifier.py | 287 +++++++++ alfred/domain/subtitles/services/matcher.py | 118 ++++ .../subtitles/services/pattern_detector.py | 205 +++++++ alfred/domain/subtitles/services/placer.py | 93 +++ alfred/domain/subtitles/services/utils.py | 21 + alfred/domain/subtitles/value_objects.py | 142 ++--- alfred/domain/tv_shows/entities.py | 11 - .../infrastructure/filesystem/file_manager.py | 308 +++++----- alfred/infrastructure/persistence/__init__.py | 17 +- alfred/infrastructure/persistence/context.py | 36 +- .../persistence/json/subtitle_repository.py | 8 - .../persistence/json/tvshow_repository.py | 9 - alfred/infrastructure/persistence/memory.py | 577 ------------------ .../persistence/memory/__init__.py | 4 + .../infrastructure/persistence/memory/base.py | 90 +++ .../persistence/memory/episodic/__init__.py | 3 + .../memory/episodic/components/__init__.py | 6 + .../memory/episodic/components/downloads.py | 56 ++ .../memory/episodic/components/errors.py | 46 ++ .../memory/episodic/components/events.py | 49 ++ .../episodic/components/search_results.py | 52 ++ .../persistence/memory/episodic/episodic.py | 126 ++++ .../persistence/memory/ltm/__init__.py | 3 + .../memory/ltm/components/__init__.py | 15 + .../memory/ltm/components/following.py | 43 ++ .../memory/ltm/components/library.py | 64 ++ .../memory/ltm/components/library_paths.py | 70 +++ .../ltm/components/media_preferences.py | 52 ++ .../ltm/components/subtitle_preferences.py | 80 +++ .../memory/ltm/components/workspace.py | 57 ++ .../persistence/memory/ltm/ltm.py | 65 ++ .../persistence/memory/registry.py | 80 +++ .../persistence/memory/stm/__init__.py | 3 + .../memory/stm/components/__init__.py | 5 + .../memory/stm/components/conversation.py | 55 ++ .../memory/stm/components/entities.py | 48 ++ .../memory/stm/components/workflow.py | 53 ++ .../persistence/memory/stm/stm.py | 91 +++ alfred/infrastructure/subtitle/__init__.py | 6 + .../infrastructure/subtitle/metadata_store.py | 144 +++++ .../subtitle/rule_repository.py | 116 ++++ alfred/knowledge/patterns/adjacent.yaml | 13 + alfred/knowledge/patterns/embedded.yaml | 14 + .../knowledge/patterns/episode_subfolder.yaml | 16 + alfred/knowledge/patterns/subs_flat.yaml | 14 + .../knowledge/release_groups/KONSTRAST.yaml | 5 + alfred/knowledge/release_groups/RARBG.yaml | 2 + alfred/knowledge/subtitles.yaml | 89 +++ testing/subtitles/scan_subtitles.py | 528 ++++++++++++++++ testing/workflows/run_workflow.py | 479 +++++++++++++++ tests/agent/__init__.py | 0 tests/agent/test_registry.py | 208 +++++++ tests/application/__init__.py | 0 tests/application/conftest.py | 41 ++ tests/application/test_create_seed_links.py | 117 ++++ .../test_list_folder_move_media.py | 179 ++++++ tests/application/test_resolve_destination.py | 315 ++++++++++ tests/conftest.py | 8 +- tests/domain/__init__.py | 0 tests/domain/test_release_parser.py | 465 ++++++++++++++ tests/domain/test_shared_value_objects.py | 136 +++++ tests/domain/test_subtitle_scanner.py | 217 +++++++ tests/domain/test_tv_shows.py | 223 +++++++ tests/infrastructure/__init__.py | 0 tests/infrastructure/conftest.py | 43 ++ tests/infrastructure/test_file_manager.py | 325 ++++++++++ tests/test_agent.py | 8 +- tests/test_agent_edge_cases.py | 6 +- tests/test_api_edge_cases.py | 8 +- tests/test_memory.py | 14 +- tests/test_prompts.py | 2 +- tests/test_prompts_critical.py | 6 +- tests/test_prompts_edge_cases.py | 12 +- tests/test_registry_edge_cases.py | 6 +- tests/test_tools_edge_cases.py | 18 +- tests/test_tools_filesystem.py | 32 +- tests/test_tools_language.py | 41 ++ tests/workflows/__init__.py | 0 tests/workflows/test_workflow_loader.py | 168 +++++ 103 files changed, 8559 insertions(+), 1346 deletions(-) create mode 100644 alfred/agent/workflows/__init__.py create mode 100644 alfred/agent/workflows/loader.py create mode 100644 alfred/agent/workflows/manage_subtitles.yaml create mode 100644 alfred/agent/workflows/organize_media.yaml create mode 100644 alfred/application/filesystem/create_seed_links.py create mode 100644 alfred/application/filesystem/manage_subtitles.py create mode 100644 alfred/application/filesystem/move_media.py create mode 100644 alfred/application/filesystem/resolve_destination.py create mode 100644 alfred/domain/media/__init__.py create mode 100644 alfred/domain/media/release_parser.py create mode 100644 alfred/domain/subtitles/aggregates.py create mode 100644 alfred/domain/subtitles/knowledge/__init__.py create mode 100644 alfred/domain/subtitles/knowledge/base.py create mode 100644 alfred/domain/subtitles/knowledge/loader.py create mode 100644 alfred/domain/subtitles/scanner.py create mode 100644 alfred/domain/subtitles/services/__init__.py create mode 100644 alfred/domain/subtitles/services/identifier.py create mode 100644 alfred/domain/subtitles/services/matcher.py create mode 100644 alfred/domain/subtitles/services/pattern_detector.py create mode 100644 alfred/domain/subtitles/services/placer.py create mode 100644 alfred/domain/subtitles/services/utils.py delete mode 100644 alfred/infrastructure/persistence/memory.py create mode 100644 alfred/infrastructure/persistence/memory/__init__.py create mode 100644 alfred/infrastructure/persistence/memory/base.py create mode 100644 alfred/infrastructure/persistence/memory/episodic/__init__.py create mode 100644 alfred/infrastructure/persistence/memory/episodic/components/__init__.py create mode 100644 alfred/infrastructure/persistence/memory/episodic/components/downloads.py create mode 100644 alfred/infrastructure/persistence/memory/episodic/components/errors.py create mode 100644 alfred/infrastructure/persistence/memory/episodic/components/events.py create mode 100644 alfred/infrastructure/persistence/memory/episodic/components/search_results.py create mode 100644 alfred/infrastructure/persistence/memory/episodic/episodic.py create mode 100644 alfred/infrastructure/persistence/memory/ltm/__init__.py create mode 100644 alfred/infrastructure/persistence/memory/ltm/components/__init__.py create mode 100644 alfred/infrastructure/persistence/memory/ltm/components/following.py create mode 100644 alfred/infrastructure/persistence/memory/ltm/components/library.py create mode 100644 alfred/infrastructure/persistence/memory/ltm/components/library_paths.py create mode 100644 alfred/infrastructure/persistence/memory/ltm/components/media_preferences.py create mode 100644 alfred/infrastructure/persistence/memory/ltm/components/subtitle_preferences.py create mode 100644 alfred/infrastructure/persistence/memory/ltm/components/workspace.py create mode 100644 alfred/infrastructure/persistence/memory/ltm/ltm.py create mode 100644 alfred/infrastructure/persistence/memory/registry.py create mode 100644 alfred/infrastructure/persistence/memory/stm/__init__.py create mode 100644 alfred/infrastructure/persistence/memory/stm/components/__init__.py create mode 100644 alfred/infrastructure/persistence/memory/stm/components/conversation.py create mode 100644 alfred/infrastructure/persistence/memory/stm/components/entities.py create mode 100644 alfred/infrastructure/persistence/memory/stm/components/workflow.py create mode 100644 alfred/infrastructure/persistence/memory/stm/stm.py create mode 100644 alfred/infrastructure/subtitle/__init__.py create mode 100644 alfred/infrastructure/subtitle/metadata_store.py create mode 100644 alfred/infrastructure/subtitle/rule_repository.py create mode 100644 alfred/knowledge/patterns/adjacent.yaml create mode 100644 alfred/knowledge/patterns/embedded.yaml create mode 100644 alfred/knowledge/patterns/episode_subfolder.yaml create mode 100644 alfred/knowledge/patterns/subs_flat.yaml create mode 100644 alfred/knowledge/release_groups/KONSTRAST.yaml create mode 100644 alfred/knowledge/release_groups/RARBG.yaml create mode 100644 alfred/knowledge/subtitles.yaml create mode 100644 testing/subtitles/scan_subtitles.py create mode 100755 testing/workflows/run_workflow.py create mode 100644 tests/agent/__init__.py create mode 100644 tests/agent/test_registry.py create mode 100644 tests/application/__init__.py create mode 100644 tests/application/conftest.py create mode 100644 tests/application/test_create_seed_links.py create mode 100644 tests/application/test_list_folder_move_media.py create mode 100644 tests/application/test_resolve_destination.py create mode 100644 tests/domain/__init__.py create mode 100644 tests/domain/test_release_parser.py create mode 100644 tests/domain/test_shared_value_objects.py create mode 100644 tests/domain/test_subtitle_scanner.py create mode 100644 tests/domain/test_tv_shows.py create mode 100644 tests/infrastructure/__init__.py create mode 100644 tests/infrastructure/conftest.py create mode 100644 tests/infrastructure/test_file_manager.py create mode 100644 tests/test_tools_language.py create mode 100644 tests/workflows/__init__.py create mode 100644 tests/workflows/test_workflow_loader.py diff --git a/README.md b/README.md index 44727b6..72a09ba 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@ An AI-powered agent for managing your local media library with natural language. Search, download, and organize movies and TV shows effortlessly through a conversational interface. [![Python 3.14](https://img.shields.io/badge/python-3.14-blue.svg)](https://www.python.org/downloads/) -[![Poetry](https://img.shields.io/badge/dependency%20manager-poetry-blue)](https://python-poetry.org/) +[![uv](https://img.shields.io/badge/dependency%20manager-uv-purple)](https://github.com/astral-sh/uv) [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT) [![Code style: ruff](https://img.shields.io/badge/code%20style-ruff-000000.svg)](https://github.com/astral-sh/ruff) @@ -13,9 +13,10 @@ An AI-powered agent for managing your local media library with natural language. - πŸ” **Smart Search** β€” Find movies and TV shows via TMDB with rich metadata - πŸ“₯ **Torrent Integration** β€” Search and download via qBittorrent - 🧠 **Contextual Memory** β€” Remembers your preferences and conversation history -- πŸ“ **Auto-Organization** β€” Keeps your media library tidy and well-structured -- 🌐 **OpenAI-Compatible API** β€” Works with any OpenAI-compatible client -- πŸ–₯️ **LibreChat Frontend** β€” Beautiful web UI included out of the box +- πŸ“ **Auto-Organization** β€” Moves and renames media files, resolves destinations, handles subtitles +- 🎞️ **Subtitle Pipeline** β€” Identifies, matches, and places subtitle tracks automatically +- πŸ”„ **Workflow Engine** β€” YAML-defined multi-step workflows (e.g. `organize_media`) +- 🌐 **OpenAI-Compatible API** β€” Works with any OpenAI-compatible client (LibreChat, OpenWebUI, etc.) - πŸ”’ **Secure by Default** β€” Auto-generated secrets and encrypted credentials ## πŸ—οΈ Architecture @@ -26,33 +27,50 @@ Built with **Domain-Driven Design (DDD)** principles for clean separation of con alfred/ β”œβ”€β”€ agent/ # AI agent orchestration β”‚ β”œβ”€β”€ llm/ # LLM clients (Ollama, DeepSeek) -β”‚ └── tools/ # Tool implementations +β”‚ β”œβ”€β”€ tools/ # Tool implementations (api, filesystem, language) +β”‚ └── workflows/ # YAML-defined multi-step workflows β”œβ”€β”€ application/ # Use cases & DTOs -β”‚ β”œβ”€β”€ movies/ # Movie search use cases +β”‚ β”œβ”€β”€ movies/ # Movie search β”‚ β”œβ”€β”€ torrents/ # Torrent management -β”‚ └── filesystem/ # File operations +β”‚ └── filesystem/ # File operations (move, list, subtitles, seed links) β”œβ”€β”€ domain/ # Business logic & entities +β”‚ β”œβ”€β”€ media/ # Release parsing β”‚ β”œβ”€β”€ movies/ # Movie entities -β”‚ β”œβ”€β”€ tv_shows/ # TV show entities -β”‚ └── subtitles/ # Subtitle entities +β”‚ β”œβ”€β”€ tv_shows/ # TV show entities & value objects +β”‚ β”œβ”€β”€ subtitles/ # Subtitle scanner, services, knowledge base +β”‚ └── shared/ # Common value objects (ImdbId, FilePath, FileSize) └── infrastructure/ # External services & persistence - β”œβ”€β”€ api/ # External API clients (TMDB, qBittorrent) - β”œβ”€β”€ filesystem/ # File system operations - └── persistence/ # Memory & repositories + β”œβ”€β”€ api/ # External API clients (TMDB, qBittorrent, Knaben) + β”œβ”€β”€ filesystem/ # File manager (hard-link based, path-traversal safe) + β”œβ”€β”€ persistence/ # Three-tier memory (LTM/STM/Episodic) + JSON repositories + └── subtitle/ # Subtitle infrastructure ``` -See [docs/architecture_diagram.md](docs/architecture_diagram.md) for detailed architectural diagrams. +### Key flows + +**Agent execution:** `agent.step(user_input)` β†’ LLM call β†’ if tool_calls, execute each via registry β†’ loop until no tool calls or `max_tool_iterations` β†’ return final response. + +**Media organization workflow:** +1. `resolve_destination` β€” Determines target folder/filename from release name +2. `move_media` β€” Hard-links file to library, deletes source +3. `manage_subtitles` β€” Scans, classifies, and places subtitle tracks +4. `create_seed_links` β€” Hard-links library file back to torrents/ for continued seeding + +**Memory tiers:** +- **LTM** (`data/memory/ltm.json`) β€” Persisted config, media library, watchlist +- **STM** β€” Conversation history (capped at `MAX_HISTORY_MESSAGES`) +- **Episodic** β€” Transient search results, active downloads, recent errors ## πŸš€ Quick Start ### Prerequisites -- **Python 3.14+** (required) -- **Poetry** (dependency manager) +- **Python 3.14+** +- **uv** (dependency manager) - **Docker & Docker Compose** (recommended for full stack) - **API Keys:** - TMDB API key ([get one here](https://www.themoviedb.org/settings/api)) - - Optional: DeepSeek, OpenAI, Anthropic, or other LLM provider keys + - Optional: DeepSeek or other LLM provider keys ### Installation @@ -64,9 +82,15 @@ cd alfred_media_organizer # Install dependencies make install +# Install pre-commit hooks +make install-hooks + # Bootstrap environment (generates .env with secure secrets) make bootstrap +# Validate your .env against the schema +make validate + # Edit .env with your API keys nano .env ``` @@ -94,162 +118,95 @@ The web interface will be available at **http://localhost:3080** ### Running Locally (Development) ```bash -# Install dependencies -poetry install - -# Start the API server -poetry run uvicorn alfred.app:app --reload --port 8000 +uv run uvicorn alfred.app:app --reload --port 8000 ``` ## βš™οΈ Configuration -### Environment Bootstrap +### Settings system -Alfred uses a smart bootstrap system that: +`settings.toml` is the single source of truth. The schema flows: -1. **Generates secure secrets** automatically (JWT tokens, database passwords, encryption keys) -2. **Syncs build variables** from `pyproject.toml` (versions, image names) -3. **Preserves existing secrets** when re-running (never overwrites your API keys) -4. **Computes database URIs** automatically from individual components +``` +settings.toml β†’ settings_schema.py β†’ settings_bootstrap.py β†’ .env + .env.make β†’ settings.py +``` + +To add a setting: define it in `settings.toml`, run `make bootstrap`, then access via `settings.my_new_setting`. ```bash # First time setup make bootstrap -# Re-run after updating pyproject.toml (secrets are preserved) +# Validate existing .env against schema +make validate + +# Re-run after settings.toml changes (existing secrets preserved) make bootstrap ``` -### Configuration File (.env) +**Never commit `.env` or `.env.make`** β€” both are gitignored and auto-generated. -The `.env` file is generated from `.env.example` with secure defaults: +### Key settings (.env) ```bash -# --- CORE SETTINGS --- -HOST=0.0.0.0 -PORT=3080 +# --- CORE --- MAX_HISTORY_MESSAGES=10 MAX_TOOL_ITERATIONS=10 -# --- LLM CONFIGURATION --- -# Providers: 'local' (Ollama), 'deepseek', 'openai', 'anthropic', 'google' -DEFAULT_LLM_PROVIDER=local - -# Local LLM (Ollama - included in Docker stack) +# --- LLM --- +DEFAULT_LLM_PROVIDER=local # local (Ollama) | deepseek OLLAMA_BASE_URL=http://ollama:11434 OLLAMA_MODEL=llama3.3:latest LLM_TEMPERATURE=0.2 -# --- API KEYS (fill only what you need) --- -TMDB_API_KEY=your-tmdb-key-here # Required for movie search -DEEPSEEK_API_KEY= # Optional -OPENAI_API_KEY= # Optional -ANTHROPIC_API_KEY= # Optional +# --- API KEYS --- +TMDB_API_KEY=your-tmdb-key # Required for movie/show search +DEEPSEEK_API_KEY= # Optional -# --- SECURITY (auto-generated, don't modify) --- -JWT_SECRET= -JWT_REFRESH_SECRET= -CREDS_KEY= -CREDS_IV= - -# --- DATABASES (auto-generated passwords) --- -MONGO_PASSWORD= -POSTGRES_PASSWORD= +# --- SECURITY (auto-generated) --- +JWT_SECRET= +CREDS_KEY= +MONGO_PASSWORD= ``` -### Security Keys - -Security keys are defined in `pyproject.toml` and generated automatically: - -```toml -[tool.alfred.security] -jwt_secret = "32:b64" # 32 bytes, base64 URL-safe -jwt_refresh_secret = "32:b64" -creds_key = "32:hex" # 32 bytes, hexadecimal (AES-256) -creds_iv = "16:hex" # 16 bytes, hexadecimal (AES IV) -mongo_password = "16:hex" -postgres_password = "16:hex" -``` - -**Formats:** -- `b64` β€” Base64 URL-safe (for JWT tokens) -- `hex` β€” Hexadecimal (for encryption keys, passwords) - ## 🐳 Docker Services -### Service Architecture - -``` -β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” -β”‚ alfred-net (bridge) β”‚ -β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€ -β”‚ β”‚ -β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚ -β”‚ β”‚ LibreChat │───▢│ Alfred │───▢│ MongoDB β”‚ β”‚ -β”‚ β”‚ :3080 β”‚ β”‚ (core) β”‚ β”‚ :27017 β”‚ β”‚ -β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ -β”‚ β”‚ β”‚ β”‚ -β”‚ β”‚ β–Ό β”‚ -β”‚ β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚ -β”‚ β”‚ β”‚ Ollama β”‚ β”‚ -β”‚ β”‚ β”‚ (local) β”‚ β”‚ -β”‚ β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ -β”‚ β”‚ β”‚ -β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚ -β”‚ β”‚ Optional Services (profiles) β”‚ β”‚ -β”‚ β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€ β”‚ -β”‚ β”‚ Meilisearch β”‚ RAG API β”‚ VectorDB β”‚qBittor- β”‚ β”‚ -β”‚ β”‚ :7700 β”‚ :8000 β”‚ :5432 β”‚ rent β”‚ β”‚ -β”‚ β”‚ [meili] β”‚ [rag] β”‚ [rag] β”‚[qbit..] β”‚ β”‚ -β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ -β”‚ β”‚ -β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ -``` - ### Docker Profiles -| Profile | Services | Use Case | -|---------|----------|----------| -| (default) | LibreChat, Alfred, MongoDB, Ollama | Basic setup | -| `meili` | + Meilisearch | Fast search | -| `rag` | + RAG API, VectorDB | Document retrieval | -| `qbittorrent` | + qBittorrent | Torrent downloads | -| `full` | All services | Complete setup | +| Profile | Extra services | Use case | +|---------|---------------|----------| +| (default) | β€” | LibreChat + Alfred + MongoDB + Ollama | +| `meili` | Meilisearch | Fast full-text search | +| `rag` | RAG API + VectorDB (PostgreSQL) | Document retrieval | +| `qbittorrent` | qBittorrent | Torrent downloads | +| `full` | All of the above | Complete setup | ```bash -# Start with specific profiles -make up p=rag,meili -make up p=full -``` - -### Docker Commands - -```bash -make up # Start containers (default profile) +make up # Start (default profile) make up p=full # Start with all services -make down # Stop all containers -make restart # Restart containers +make down # Stop +make restart # Restart make logs # Follow logs -make ps # Show container status -make shell # Open bash in Alfred container -make build # Build production image -make build-test # Build test image +make ps # Container status ``` ## πŸ› οΈ Available Tools -The agent has access to these tools for interacting with your media library: - | Tool | Description | |------|-------------| | `find_media_imdb_id` | Search for movies/TV shows on TMDB by title | | `find_torrent` | Search for torrents across multiple indexers | -| `get_torrent_by_index` | Get detailed info about a specific torrent result | -| `add_torrent_by_index` | Download a torrent by its index in search results | +| `get_torrent_by_index` | Get detailed info about a specific result | +| `add_torrent_by_index` | Download a torrent from search results | | `add_torrent_to_qbittorrent` | Add a torrent via magnet link directly | -| `set_path_for_folder` | Configure folder paths for media organization | -| `list_folder` | List contents of a folder | -| `set_language` | Set preferred language for searches | +| `resolve_destination` | Compute the target library path for a release | +| `move_media` | Hard-link a file to its library destination | +| `manage_subtitles` | Scan, classify, and place subtitle tracks | +| `create_seed_links` | Prepare torrent folder so qBittorrent keeps seeding | +| `learn` | Teach Alfred a new pattern (release group, naming convention) | +| `set_path_for_folder` | Configure folder paths | +| `list_folder` | List contents of a configured folder | +| `set_language` | Set preferred language for the session | ## πŸ’¬ Usage Examples @@ -266,11 +223,12 @@ Alfred: I found 3 torrents for Inception (2010): You: Download the first one Alfred: βœ“ Added to qBittorrent! Download started. - Saving to: /downloads/Movies/Inception (2010)/ -You: What's downloading right now? -Alfred: You have 1 active download: - - Inception.2010.1080p.BluRay.x264 (45% complete, ETA: 12 min) +You: Organize the Breaking Bad S01 download +Alfred: βœ“ Resolved destination: /tv_shows/Breaking.Bad/Season 01/ + βœ“ Moved 6 episode files + βœ“ Placed 6 subtitle tracks (fr, en) + βœ“ Seed links created in /torrents/ ``` ### Via API @@ -279,219 +237,147 @@ Alfred: You have 1 active download: # Health check curl http://localhost:8000/health -# Chat with the agent (OpenAI-compatible) +# Chat (OpenAI-compatible) curl -X POST http://localhost:8000/v1/chat/completions \ -H "Content-Type: application/json" \ -d '{ "model": "alfred", - "messages": [ - {"role": "user", "content": "Find The Matrix 4K"} - ] + "messages": [{"role": "user", "content": "Find The Matrix 4K"}] }' -# List available models +# List models curl http://localhost:8000/v1/models -# View memory state (debug) +# View memory state curl http://localhost:8000/memory/state - -# Clear session memory -curl -X POST http://localhost:8000/memory/clear-session ``` -### Via OpenWebUI or Other Clients - -Alfred is compatible with any OpenAI-compatible client: - -1. Add as OpenAI-compatible endpoint: `http://localhost:8000/v1` -2. Model name: `alfred` -3. No API key required (or use any placeholder) +Alfred is compatible with any OpenAI-compatible client. Point it at `http://localhost:8000/v1`, model `alfred`. ## 🧠 Memory System -Alfred uses a three-tier memory system for context management: +Alfred uses a three-tier memory system: -### Long-Term Memory (LTM) -- **Persistent** β€” Saved to JSON files -- **Contents:** Configuration, user preferences, media library state -- **Survives:** Application restarts - -### Short-Term Memory (STM) -- **Session-based** β€” Stored in RAM -- **Contents:** Conversation history, current workflow state -- **Cleared:** On session end or restart - -### Episodic Memory -- **Transient** β€” Stored in RAM -- **Contents:** Search results, active downloads, recent errors -- **Cleared:** Frequently, after task completion +| Tier | Storage | Contents | Lifetime | +|------|---------|----------|----------| +| **LTM** | JSON file (`data/memory/ltm.json`) | Config, library, watchlist, learned patterns | Permanent | +| **STM** | RAM | Conversation history (capped) | Session | +| **Episodic** | RAM | Search results, active downloads, errors | Short-lived | ## πŸ§ͺ Development -### Project Setup - -```bash -# Install all dependencies (including dev) -poetry install - -# Install pre-commit hooks -make install-hooks - -# Run the development server -poetry run uvicorn alfred.app:app --reload -``` - ### Running Tests ```bash -# Run all tests (parallel execution) +# Run full suite (parallel) make test # Run with coverage report make coverage -# Run specific test file -poetry run pytest tests/test_agent.py -v +# Run a single file +uv run pytest tests/test_agent.py -v -# Run specific test -poetry run pytest tests/test_config_loader.py::TestBootstrapEnv -v +# Run a single class +uv run pytest tests/test_agent.py::TestAgentInit -v + +# Skip slow tests +uv run pytest -m "not slow" ``` +### Test coverage + +The suite covers: +- **Agent loop** β€” tool execution, history, max iterations, error handling +- **Tool registry** β€” OpenAI schema format, parameter extraction +- **Prompts** β€” system prompt building, tool inclusion +- **Memory** β€” LTM/STM/Episodic operations, persistence +- **Filesystem tools** β€” path traversal security, folder listing +- **File manager** β€” hard-link, move, seed links (real filesystem, no mocks) +- **Application use cases** β€” `resolve_destination`, `create_seed_links`, `list_folder`, `move_media` +- **Domain** β€” TV show/movie entities, shared value objects (`ImdbId`, `FilePath`, `FileSize`), subtitle scanner +- **Repositories** β€” JSON-backed movie, TV show, subtitle repos +- **Bootstrap** β€” secret generation, idempotency, URI construction +- **Workflows** β€” YAML loading, structure validation +- **Configuration** β€” boundary validation for all settings + ### Code Quality ```bash -# Lint and auto-fix -make lint - -# Format code -make format - -# Clean build artifacts -make clean +make lint # Ruff check --fix +make format # Ruff format + check --fix ``` ### Adding a New Tool -1. **Create the tool function** in `alfred/agent/tools/`: +1. Implement the function in `alfred/agent/tools/`: ```python # alfred/agent/tools/api.py def my_new_tool(param: str) -> dict[str, Any]: - """ - Short description of what this tool does. - - This will be shown to the LLM to help it decide when to use this tool. - """ + """Short description shown to the LLM to decide when to call this tool.""" memory = get_memory() - - # Your implementation here - result = do_something(param) - - return { - "status": "success", - "data": result - } + # ... + return {"status": "ok", "data": result} ``` -2. **Register in the registry** (`alfred/agent/registry.py`): +2. Register it in `alfred/agent/registry.py`: ```python tool_functions = [ # ... existing tools ... - api_tools.my_new_tool, # Add your tool here + api_tools.my_new_tool, ] ``` -The tool will be automatically registered with its parameters extracted from the function signature. +The registry auto-generates the JSON schema from the function signature and docstring. + +### Adding a Workflow + +Create a YAML file in `alfred/agent/workflows/`: + +```yaml +name: my_workflow +description: What this workflow does +steps: + - tool: resolve_destination + description: Find where the file should go + - tool: move_media + description: Move the file +``` + +Workflows are loaded automatically at startup. ### Version Management ```bash -# Bump version (must be on main branch) -make patch # 0.1.7 -> 0.1.8 -make minor # 0.1.7 -> 0.2.0 -make major # 0.1.7 -> 1.0.0 +# Must be on main branch +make patch # 0.1.7 β†’ 0.1.8 +make minor # 0.1.7 β†’ 0.2.0 +make major # 0.1.7 β†’ 1.0.0 ``` ## πŸ“š API Reference ### Endpoints -#### `GET /health` -Health check endpoint. - -```json -{ - "status": "healthy", - "version": "0.1.7" -} -``` - -#### `GET /v1/models` -List available models (OpenAI-compatible). - -```json -{ - "object": "list", - "data": [ - { - "id": "alfred", - "object": "model", - "owned_by": "alfred" - } - ] -} -``` - -#### `POST /v1/chat/completions` -Chat with the agent (OpenAI-compatible). - -**Request:** -```json -{ - "model": "alfred", - "messages": [ - {"role": "user", "content": "Find Inception"} - ], - "stream": false -} -``` - -**Response:** -```json -{ - "id": "chatcmpl-xxx", - "object": "chat.completion", - "created": 1234567890, - "model": "alfred", - "choices": [{ - "index": 0, - "message": { - "role": "assistant", - "content": "I found Inception (2010)..." - }, - "finish_reason": "stop" - }] -} -``` - -#### `GET /memory/state` -View full memory state (debug endpoint). - -#### `POST /memory/clear-session` -Clear session memories (STM + Episodic). +| Method | Path | Description | +|--------|------|-------------| +| `GET` | `/health` | Health check | +| `GET` | `/v1/models` | List models (OpenAI-compatible) | +| `POST` | `/v1/chat/completions` | Chat (OpenAI-compatible, streaming supported) | +| `GET` | `/memory/state` | Full memory dump (debug) | +| `POST` | `/memory/clear-session` | Clear STM + Episodic | +| `GET` | `/memory/episodic/search-results` | Current search results | ## πŸ”§ Troubleshooting ### Agent doesn't respond 1. Check API keys in `.env` -2. Verify LLM provider is running: +2. Verify the LLM is running: ```bash - # For Ollama docker logs alfred-ollama - - # Check if model is pulled docker exec alfred-ollama ollama list ``` 3. Check Alfred logs: `docker logs alfred-core` @@ -499,76 +385,34 @@ Clear session memories (STM + Episodic). ### qBittorrent connection failed 1. Verify qBittorrent is running: `docker ps | grep qbittorrent` -2. Check Web UI is enabled in qBittorrent settings -3. Verify credentials in `.env`: - ```bash - QBITTORRENT_URL=http://qbittorrent:16140 - QBITTORRENT_USERNAME=admin - QBITTORRENT_PASSWORD= - ``` - -### Database connection issues - -1. Check MongoDB is healthy: `docker logs alfred-mongodb` -2. Verify credentials match in `.env` -3. Try restarting: `make restart` +2. Check credentials in `.env` (`QBITTORRENT_URL`, `QBITTORRENT_USERNAME`, `QBITTORRENT_PASSWORD`) ### Memory not persisting -1. Check `data/` directory exists and is writable +1. Check `data/` directory is writable 2. Verify volume mounts in `docker-compose.yaml` -3. Check file permissions: `ls -la data/` ### Bootstrap fails -1. Ensure `.env.example` exists -2. Check `pyproject.toml` has required sections: - ```toml - [tool.alfred.settings] - [tool.alfred.security] - ``` -3. Run manually: `python scripts/bootstrap.py` +```bash +make validate # Check what's wrong with .env +make bootstrap # Regenerate (preserves existing secrets) +``` ### Tests failing -1. Update dependencies: `poetry install` -2. Check Python version: `python --version` (needs 3.14+) -3. Run specific failing test with verbose output: - ```bash - poetry run pytest tests/test_failing.py -v --tb=long - ``` +```bash +uv run pytest tests/test_failing.py -v --tb=long +``` ## 🀝 Contributing -Contributions are welcome! Please follow these steps: - -1. **Fork** the repository -2. **Create** a feature branch: `git checkout -b feature/my-feature` -3. **Make** your changes -4. **Run** tests: `make test` -5. **Run** linting: `make lint && make format` -6. **Commit**: `git commit -m "feat: add my feature"` -7. **Push**: `git push origin feature/my-feature` -8. **Create** a Pull Request - -### Commit Convention - -We use [Conventional Commits](https://www.conventionalcommits.org/): - -- `feat:` New feature -- `fix:` Bug fix -- `docs:` Documentation -- `refactor:` Code refactoring -- `test:` Adding tests -- `chore:` Maintenance - -## πŸ“– Documentation - -- [Architecture Diagram](docs/architecture_diagram.md) β€” System architecture overview -- [Class Diagram](docs/class_diagram.md) β€” Class structure and relationships -- [Component Diagram](docs/component_diagram.md) β€” Component interactions -- [Sequence Diagram](docs/sequence_diagram.md) β€” Sequence flows -- [Flowchart](docs/flowchart.md) β€” System flowcharts +1. Fork the repository +2. Create a feature branch: `git checkout -b feat/my-feature` +3. Make your changes + add tests +4. Run `make test && make lint && make format` +5. Commit with [Conventional Commits](https://www.conventionalcommits.org/): `feat:`, `fix:`, `docs:`, `refactor:`, `test:`, `chore:`, `infra:` +6. Open a Pull Request ## πŸ“„ License @@ -576,19 +420,13 @@ MIT License β€” see [LICENSE](LICENSE) file for details. ## πŸ™ Acknowledgments -- [LibreChat](https://github.com/danny-avila/LibreChat) β€” Beautiful chat interface +- [LibreChat](https://github.com/danny-avila/LibreChat) β€” Chat interface - [Ollama](https://ollama.ai/) β€” Local LLM runtime - [DeepSeek](https://www.deepseek.com/) β€” LLM provider -- [TMDB](https://www.themoviedb.org/) β€” Movie database +- [TMDB](https://www.themoviedb.org/) β€” Movie & TV database - [qBittorrent](https://www.qbittorrent.org/) β€” Torrent client - [FastAPI](https://fastapi.tiangolo.com/) β€” Web framework -- [Pydantic](https://docs.pydantic.dev/) β€” Data validation - -## πŸ“¬ Support - -- πŸ“§ Email: francois.hodiaumont@gmail.com -- πŸ› Issues: [GitHub Issues](https://github.com/francwa/alfred_media_organizer/issues) -- πŸ’¬ Discussions: [GitHub Discussions](https://github.com/francwa/alfred_media_organizer/discussions) +- [uv](https://github.com/astral-sh/uv) β€” Fast Python package manager --- diff --git a/alfred/agent/prompts.py b/alfred/agent/prompts.py index d568b9d..cfb5ff3 100644 --- a/alfred/agent/prompts.py +++ b/alfred/agent/prompts.py @@ -4,6 +4,7 @@ import json from typing import Any from alfred.infrastructure.persistence import get_memory +from alfred.infrastructure.persistence.memory import MemoryRegistry from .registry import Tool @@ -13,6 +14,7 @@ class PromptBuilder: def __init__(self, tools: dict[str, Tool]): self.tools = tools + self._memory_registry = MemoryRegistry() def build_tools_spec(self) -> list[dict[str, Any]]: """Build the tool specification for the LLM API.""" @@ -109,11 +111,30 @@ class PromptBuilder: return "\n".join(lines) + def _format_memory_schema(self) -> str: + """Describe available memory components so the agent knows what to read/write and when.""" + schema = self._memory_registry.schema() + tier_labels = {"ltm": "LONG-TERM (persisted)", "stm": "SHORT-TERM (session)", "episodic": "EPISODIC (volatile)"} + lines = ["MEMORY COMPONENTS:"] + + for tier, components in schema.items(): + if not components: + continue + lines.append(f"\n [{tier_labels.get(tier, tier.upper())}]") + for c in components: + access = c.get("access", "read") + lines.append(f" {c['name']} ({access}): {c['description']}") + for field_name, field_desc in c.get("fields", {}).items(): + lines.append(f" Β· {field_name}: {field_desc}") + + return "\n".join(lines) + def _format_config_context(self, memory) -> str: """Format configuration context.""" lines = ["CURRENT CONFIGURATION:"] - if memory.ltm.config: - for key, value in memory.ltm.config.items(): + folders = {**memory.ltm.workspace.as_dict(), **memory.ltm.library_paths.to_dict()} + if folders: + for key, value in folders.items(): lines.append(f" - {key}: {value}") else: lines.append(" (no configuration set)") @@ -138,6 +159,9 @@ class PromptBuilder: tools_desc = self._format_tools_description() tools_section = f"\nAVAILABLE TOOLS:\n{tools_desc}" if tools_desc else "" + # Memory schema + memory_schema = self._format_memory_schema() + # Configuration config_section = self._format_config_context(memory) if config_section: @@ -172,6 +196,8 @@ EXAMPLES: {language_instruction} {tools_section} + +{memory_schema} {config_section} {stm_context} {episodic_context} diff --git a/alfred/agent/registry.py b/alfred/agent/registry.py index 7cf5fac..d9ac6b2 100644 --- a/alfred/agent/registry.py +++ b/alfred/agent/registry.py @@ -97,6 +97,11 @@ def make_tools(settings) -> dict[str, Tool]: tool_functions = [ fs_tools.set_path_for_folder, fs_tools.list_folder, + fs_tools.resolve_destination, + fs_tools.move_media, + fs_tools.manage_subtitles, + fs_tools.create_seed_links, + fs_tools.learn, api_tools.find_media_imdb_id, api_tools.find_torrent, api_tools.add_torrent_by_index, diff --git a/alfred/agent/tools/filesystem.py b/alfred/agent/tools/filesystem.py index 017c339..5ce4cfb 100644 --- a/alfred/agent/tools/filesystem.py +++ b/alfred/agent/tools/filesystem.py @@ -1,10 +1,200 @@ """Filesystem tools for folder management.""" +from pathlib import Path from typing import Any -from alfred.application.filesystem import ListFolderUseCase, SetFolderPathUseCase +import alfred as _alfred_pkg +import yaml + +from alfred.application.filesystem import ( + CreateSeedLinksUseCase, + ListFolderUseCase, + ManageSubtitlesUseCase, + MoveMediaUseCase, + ResolveDestinationUseCase, + SetFolderPathUseCase, +) from alfred.infrastructure.filesystem import FileManager +_LEARNED_ROOT = Path(_alfred_pkg.__file__).parent.parent / "data" / "knowledge" + + +def move_media(source: str, destination: str) -> dict[str, Any]: + """ + Move a media file to a destination path. + + Copies the file safely first (with integrity check), then deletes the source. + Use this to organise a downloaded file into the media library. + + Args: + source: Absolute path to the source file. + destination: Absolute path to the destination file (must not already exist). + + Returns: + Dict with status, source, destination, filename, and size β€” or error details. + """ + file_manager = FileManager() + use_case = MoveMediaUseCase(file_manager) + return use_case.execute(source, destination).to_dict() + + +def resolve_destination( + release_name: str, + source_file: str, + tmdb_title: str, + tmdb_year: int, + tmdb_episode_title: str | None = None, + confirmed_folder: str | None = None, +) -> dict[str, Any]: + """ + Compute the destination path in the media library for a release. + + Call this before move_media to get the correct library path. Handles: + - Parsing the release name (quality, codec, group, season/episode) + - Looking up any existing series folder in the library + - Applying group-conflict rules (asks user if ambiguous) + - Building the full destination path with correct naming conventions + + Args: + release_name: Raw release folder or file name + (e.g. "Oz.S03.1080p.WEBRip.x265-KONTRAST"). + source_file: Absolute path to the source video file (used for extension). + tmdb_title: Canonical show/movie title from TMDB (e.g. "Oz"). + tmdb_year: Release/start year from TMDB (e.g. 1997). + tmdb_episode_title: Episode title from TMDB for single-episode releases + (e.g. "The Routine"). Omit for season packs and movies. + confirmed_folder: If a previous call returned needs_clarification, pass + the user-chosen folder name here to proceed. + + Returns: + On success: dict with status, library_file, series_folder, season_folder, + series_folder_name, season_folder_name, filename, + is_new_series_folder. + On ambiguity: dict with status="needs_clarification", question, options. + On error: dict with status="error", error, message. + """ + use_case = ResolveDestinationUseCase() + return use_case.execute( + release_name=release_name, + source_file=source_file, + tmdb_title=tmdb_title, + tmdb_year=tmdb_year, + tmdb_episode_title=tmdb_episode_title, + confirmed_folder=confirmed_folder, + ).to_dict() + + +def create_seed_links(library_file: str, original_download_folder: str) -> dict[str, Any]: + """ + Prepare a torrent subfolder so qBittorrent can keep seeding after a move. + + Hard-links the video file from the library into torrents//, + then copies all remaining files from the original download folder (subtitles, + .nfo, .jpg, .txt, …) so the torrent data is complete. + + Call this after move_media when the user wants to keep seeding. + + Args: + library_file: Absolute path to the video file now in the library. + original_download_folder: Absolute path to the original download folder + (may still contain subs, nfo, and other release files). + + Returns: + Dict with status, torrent_subfolder, linked_file, copied_files, + copied_count, skipped β€” or error details. + """ + file_manager = FileManager() + use_case = CreateSeedLinksUseCase(file_manager) + return use_case.execute(library_file, original_download_folder).to_dict() + + +def manage_subtitles(source_video: str, destination_video: str) -> dict[str, Any]: + """ + Place subtitle files alongside an organised video file. + + Scans for subtitle files (.srt, .ass, .ssa, .vtt, .sub) next to the source + video, filters them according to the user's SubtitlePreferences (languages, + min size, SDH, forced), and hard-links the passing files next to the + destination video with the correct naming convention: + fr.srt / fr.sdh.srt / fr.forced.srt / en.srt … + + Call this right after move_media or copy_media, passing the same source and + destination paths. If no subtitles are found, returns ok with placed_count=0. + + Args: + source_video: Absolute path to the original video file (in the download folder). + destination_video: Absolute path to the placed video file (in the library). + + Returns: + Dict with status, placed list (source, destination, filename), placed_count, + skipped_count β€” or error details. + """ + file_manager = FileManager() + use_case = ManageSubtitlesUseCase(file_manager) + return use_case.execute(source_video, destination_video).to_dict() + + +def learn(pack: str, category: str, key: str, values: list[str]) -> dict[str, Any]: + """ + Teach Alfred a new token mapping and persist it to the learned knowledge pack. + + Use this when a subtitle file contains an unrecognised token β€” after confirming + with the user what the token means, call learn() to persist it so Alfred + recognises it in future scans. + + Args: + pack: Knowledge pack name. Currently only "subtitles" is supported. + category: Category within the pack: "languages", "types", or "formats". + key: The entry key β€” e.g. ISO 639-1 language code ("es"), type id ("sdh"). + values: List of tokens to add β€” e.g. ["spanish", "espanol", "spa"]. + + Returns: + Dict with status, added_count, and the updated token list. + """ + _VALID_PACKS = {"subtitles"} + _VALID_CATEGORIES = {"languages", "types", "formats"} + + if pack not in _VALID_PACKS: + return {"status": "error", "error": "unknown_pack", "message": f"Unknown pack '{pack}'. Valid: {sorted(_VALID_PACKS)}"} + + if category not in _VALID_CATEGORIES: + return {"status": "error", "error": "unknown_category", "message": f"Unknown category '{category}'. Valid: {sorted(_VALID_CATEGORIES)}"} + + learned_path = _LEARNED_ROOT / "subtitles_learned.yaml" + _LEARNED_ROOT.mkdir(parents=True, exist_ok=True) + + data: dict = {} + if learned_path.exists(): + try: + with open(learned_path, encoding="utf-8") as f: + data = yaml.safe_load(f) or {} + except Exception as e: + return {"status": "error", "error": "read_failed", "message": str(e)} + + cat_data = data.setdefault(category, {}) + entry = cat_data.setdefault(key, {"tokens": []}) + existing = entry.get("tokens", []) + new_tokens = [v for v in values if v not in existing] + entry["tokens"] = existing + new_tokens + + tmp = learned_path.with_suffix(".yaml.tmp") + try: + with open(tmp, "w", encoding="utf-8") as f: + yaml.safe_dump(data, f, allow_unicode=True, default_flow_style=False, sort_keys=False) + tmp.rename(learned_path) + except Exception as e: + tmp.unlink(missing_ok=True) + return {"status": "error", "error": "write_failed", "message": str(e)} + + return { + "status": "ok", + "pack": pack, + "category": category, + "key": key, + "added_count": len(new_tokens), + "tokens": entry["tokens"], + } + def set_path_for_folder(folder_name: str, path_value: str) -> dict[str, Any]: """ diff --git a/alfred/agent/workflows/__init__.py b/alfred/agent/workflows/__init__.py new file mode 100644 index 0000000..cccb3a7 --- /dev/null +++ b/alfred/agent/workflows/__init__.py @@ -0,0 +1,3 @@ +from .loader import WorkflowLoader + +__all__ = ["WorkflowLoader"] diff --git a/alfred/agent/workflows/loader.py b/alfred/agent/workflows/loader.py new file mode 100644 index 0000000..10ec464 --- /dev/null +++ b/alfred/agent/workflows/loader.py @@ -0,0 +1,52 @@ +"""WorkflowLoader β€” autodiscovers and loads workflow YAML files. + +Scans the workflows/ directory for all .yaml files and exposes them +as dicts. No manual registration needed β€” drop a new .yaml file and +it will be picked up automatically. +""" + +import logging +from pathlib import Path + +import yaml + +logger = logging.getLogger(__name__) + +_WORKFLOWS_DIR = Path(__file__).parent + + +class WorkflowLoader: + """ + Loads all workflow definitions from the workflows/ directory. + + Usage: + loader = WorkflowLoader() + all_workflows = loader.all() + workflow = loader.get("organize_media") + """ + + def __init__(self): + self._workflows: dict[str, dict] = {} + self._load() + + def _load(self) -> None: + for path in sorted(_WORKFLOWS_DIR.glob("*.yaml")): + try: + data = yaml.safe_load(path.read_text(encoding="utf-8")) + name = data.get("name") or path.stem + self._workflows[name] = data + logger.info(f"WorkflowLoader: Loaded '{name}' from {path.name}") + except Exception as e: + logger.warning(f"WorkflowLoader: Could not load {path.name}: {e}") + + def all(self) -> dict[str, dict]: + """Return all loaded workflows keyed by name.""" + return self._workflows + + def get(self, name: str) -> dict | None: + """Return a specific workflow by name, or None if not found.""" + return self._workflows.get(name) + + def names(self) -> list[str]: + """Return all available workflow names.""" + return list(self._workflows.keys()) diff --git a/alfred/agent/workflows/manage_subtitles.yaml b/alfred/agent/workflows/manage_subtitles.yaml new file mode 100644 index 0000000..253d4de --- /dev/null +++ b/alfred/agent/workflows/manage_subtitles.yaml @@ -0,0 +1,69 @@ +name: manage_subtitles +description: > + Place subtitle files alongside a video that has just been organised into the library. + Detects the release pattern automatically, identifies and classifies all tracks, + filters by user rules, and hard-links matching files to the destination. + If any tracks are unrecognised, asks the user and optionally teaches Alfred. + +trigger: + examples: + - "handle subtitles for The X-Files S01E01" + - "place the subs next to the file" + - "subtitles are in the Subs/ folder" + - "add subtitles" + +tools: + - manage_subtitles + - learn + +memory: + SubtitlePreferences: read + Workflow: read-write + +steps: + - id: place_subtitles + tool: manage_subtitles + description: > + Detect release pattern, identify and classify all subtitle tracks, + filter by rules, hard-link matching files next to the destination video. + Reads SubtitlePreferences from LTM for language/type/format filtering. + params: + source_video: "{source_video}" + destination_video: "{destination_video}" + imdb_id: "{imdb_id}" + media_type: "{media_type}" + release_group: "{release_group}" + season: "{season}" + episode: "{episode}" + on_result: + ok_placed_zero: skip # no subtitles found β€” not an error + needs_clarification: ask_user # unrecognised tokens found + + - id: ask_user + description: > + Some tracks could not be classified. Show the user the unresolved tokens + and ask if they want to teach Alfred what they mean. + If yes β†’ go to learn_tokens. If no β†’ end workflow. + ask_user: + question: > + I could not identify some tokens in the subtitle files: {unresolved}. + Do you want to teach me what they mean? + answers: + yes: { next_step: learn_tokens } + no: { next_step: end } + + - id: learn_tokens + tool: learn + description: > + Persist a new token mapping to the learned knowledge pack so Alfred + recognises it in future scans without asking again. + params: + pack: "subtitles" + category: "{token_category}" # "languages" or "types" + key: "{token_key}" # e.g. "es", "de" + values: "{token_values}" # e.g. ["spanish", "espanol"] + +subtitle_naming: + standard: "{lang}.{ext}" + sdh: "{lang}.sdh.{ext}" + forced: "{lang}.forced.{ext}" diff --git a/alfred/agent/workflows/organize_media.yaml b/alfred/agent/workflows/organize_media.yaml new file mode 100644 index 0000000..22eead0 --- /dev/null +++ b/alfred/agent/workflows/organize_media.yaml @@ -0,0 +1,82 @@ +name: organize_media +description: > + Organise a downloaded series or movie into the media library. + Triggered when the user asks to move/organize a specific title. + Always moves the video file. Optionally creates seed links in the + torrents folder so qBittorrent can keep seeding. + +trigger: + examples: + - "organize Breaking Bad" + - "organise Severance season 2" + - "move Inception to my library" + - "organize Breaking Bad season 1, keep seeding" + +tools: + - list_folder + - find_media_imdb_id + - resolve_destination + - move_media + - manage_subtitles + - create_seed_links + +memory: + WorkspacePaths: read + LibraryPaths: read + Library: read-write + Workflow: read-write + Entities: read-write + +steps: + - id: list_downloads + tool: list_folder + description: List the download folder to find the target files. + params: + folder_type: download + + - id: identify_media + tool: find_media_imdb_id + description: Confirm title, type (series/movie), and metadata via TMDB. + + - id: resolve_destination + tool: resolve_destination + description: > + Compute the correct destination path in the library. + Uses the release name + TMDB metadata to build folder and file names. + If multiple series folders exist for this title, returns + needs_clarification and the user must pick one (re-call with confirmed_folder). + + - id: move_file + tool: move_media + description: > + Move the video file to library_file returned by resolve_destination. + + - id: handle_subtitles + tool: manage_subtitles + description: > + Place subtitle files alongside the video in the library. + Pass the original source path and the new library destination path. + on_missing: skip + + - id: ask_seeding + ask_user: + question: "Do you want to keep seeding this torrent?" + answers: + "yes": { next_step: create_seed_links } + "no": { next_step: update_library } + + - id: create_seed_links + tool: create_seed_links + description: > + Hard-link the library video file back into torrents// + and copy all remaining files from the original download folder + (subs, nfo, jpg, …) so the torrent stays complete for seeding. + + - id: update_library + memory_write: Library + description: Add the entry to the LTM library after a successful move. + +naming_convention: + # Resolved by domain entities (Movie, Episode) β€” not hardcoded here + tv_show: "{title}/Season {season:02d}/{title}.S{season:02d}E{episode:02d}.{ext}" + movie: "{title} ({year})/{title}.{year}.{ext}" diff --git a/alfred/application/filesystem/__init__.py b/alfred/application/filesystem/__init__.py index de5b3b9..7dc259b 100644 --- a/alfred/application/filesystem/__init__.py +++ b/alfred/application/filesystem/__init__.py @@ -1,12 +1,32 @@ """Filesystem use cases.""" -from .dto import ListFolderResponse, SetFolderPathResponse +from .create_seed_links import CreateSeedLinksUseCase +from .dto import ( + CreateSeedLinksResponse, + ListFolderResponse, + ManageSubtitlesResponse, + MoveMediaResponse, + PlacedSubtitle, + SetFolderPathResponse, +) from .list_folder import ListFolderUseCase +from .manage_subtitles import ManageSubtitlesUseCase +from .move_media import MoveMediaUseCase +from .resolve_destination import ResolveDestinationUseCase, ResolvedDestination from .set_folder_path import SetFolderPathUseCase __all__ = [ "SetFolderPathUseCase", "ListFolderUseCase", + "CreateSeedLinksUseCase", + "MoveMediaUseCase", + "ManageSubtitlesUseCase", + "ResolveDestinationUseCase", + "ResolvedDestination", "SetFolderPathResponse", "ListFolderResponse", + "CreateSeedLinksResponse", + "MoveMediaResponse", + "ManageSubtitlesResponse", + "PlacedSubtitle", ] diff --git a/alfred/application/filesystem/create_seed_links.py b/alfred/application/filesystem/create_seed_links.py new file mode 100644 index 0000000..39c6ec4 --- /dev/null +++ b/alfred/application/filesystem/create_seed_links.py @@ -0,0 +1,54 @@ +"""CreateSeedLinksUseCase β€” prepares a torrent folder for continued seeding.""" + +import logging + +from alfred.infrastructure.filesystem import FileManager +from alfred.infrastructure.persistence import get_memory + +from .dto import CreateSeedLinksResponse + +logger = logging.getLogger(__name__) + + +class CreateSeedLinksUseCase: + """ + Prepares a torrent subfolder so qBittorrent can keep seeding after a move. + + Hard-links the video file from the library back into torrents//, + then copies all remaining files from the original download folder (subs, nfo, …). + """ + + def __init__(self, file_manager: FileManager): + self.file_manager = file_manager + + def execute( + self, library_file: str, original_download_folder: str + ) -> CreateSeedLinksResponse: + memory = get_memory() + torrent_folder = memory.ltm.workspace.torrent + + if not torrent_folder: + return CreateSeedLinksResponse( + status="error", + error="torrent_folder_not_set", + message="Torrent folder is not configured. Use set_path_for_folder to set it.", + ) + + result = self.file_manager.create_seed_links( + library_file, original_download_folder, torrent_folder + ) + + if result.get("status") == "ok": + return CreateSeedLinksResponse( + status="ok", + torrent_subfolder=result.get("torrent_subfolder"), + linked_file=result.get("linked_file"), + copied_files=result.get("copied_files"), + copied_count=result.get("copied_count", 0), + skipped=result.get("skipped"), + ) + return CreateSeedLinksResponse( + status="error", + error=result.get("error"), + message=result.get("message"), + ) diff --git a/alfred/application/filesystem/dto.py b/alfred/application/filesystem/dto.py index 7060b38..743e336 100644 --- a/alfred/application/filesystem/dto.py +++ b/alfred/application/filesystem/dto.py @@ -1,6 +1,56 @@ """Filesystem application DTOs.""" -from dataclasses import dataclass +from __future__ import annotations + +from dataclasses import dataclass, field + + +@dataclass +class CopyMediaResponse: + """Response from copying a media file.""" + + status: str + source: str | None = None + destination: str | None = None + filename: str | None = None + size: int | None = None + error: str | None = None + message: str | None = None + + def to_dict(self) -> dict: + if self.error: + return {"status": self.status, "error": self.error, "message": self.message} + return { + "status": self.status, + "source": self.source, + "destination": self.destination, + "filename": self.filename, + "size": self.size, + } + + +@dataclass +class MoveMediaResponse: + """Response from moving a media file.""" + + status: str + source: str | None = None + destination: str | None = None + filename: str | None = None + size: int | None = None + error: str | None = None + message: str | None = None + + def to_dict(self) -> dict: + if self.error: + return {"status": self.status, "error": self.error, "message": self.message} + return { + "status": self.status, + "source": self.source, + "destination": self.destination, + "filename": self.filename, + "size": self.size, + } @dataclass @@ -29,6 +79,104 @@ class SetFolderPathResponse: return result +@dataclass +class PlacedSubtitle: + """One subtitle file successfully placed.""" + + source: str + destination: str + filename: str + + def to_dict(self) -> dict: + return {"source": self.source, "destination": self.destination, "filename": self.filename} + + +@dataclass +class UnresolvedTrack: + """A subtitle track that needs agent clarification before placement.""" + + raw_tokens: list[str] + file_path: str | None = None + file_size_kb: float | None = None + reason: str = "" # "unknown_language" | "low_confidence" + + def to_dict(self) -> dict: + return { + "raw_tokens": self.raw_tokens, + "file_path": self.file_path, + "file_size_kb": self.file_size_kb, + "reason": self.reason, + } + + +@dataclass +class AvailableSubtitle: + """One subtitle track available on an embedded media item.""" + + language: str # ISO 639-2 code + subtitle_type: str # "standard" | "sdh" | "forced" | "unknown" + + def to_dict(self) -> dict: + return {"language": self.language, "type": self.subtitle_type} + + +@dataclass +class ManageSubtitlesResponse: + """Response from the manage_subtitles use case.""" + + status: str # "ok" | "needs_clarification" | "error" + video_path: str | None = None + placed: list[PlacedSubtitle] | None = None + skipped_count: int = 0 + unresolved: list[UnresolvedTrack] | None = None + available: list[AvailableSubtitle] | None = None # embedded tracks summary + error: str | None = None + message: str | None = None + + def to_dict(self) -> dict: + if self.error: + return {"status": self.status, "error": self.error, "message": self.message} + result = { + "status": self.status, + "video_path": self.video_path, + "placed": [p.to_dict() for p in (self.placed or [])], + "placed_count": len(self.placed or []), + "skipped_count": self.skipped_count, + } + if self.unresolved: + result["unresolved"] = [u.to_dict() for u in self.unresolved] + result["unresolved_count"] = len(self.unresolved) + if self.available: + result["available"] = [a.to_dict() for a in self.available] + return result + + +@dataclass +class CreateSeedLinksResponse: + """Response from creating seed links for a torrent.""" + + status: str + torrent_subfolder: str | None = None + linked_file: str | None = None + copied_files: list[str] | None = None + copied_count: int = 0 + skipped: list[str] | None = None + error: str | None = None + message: str | None = None + + def to_dict(self) -> dict: + if self.error: + return {"status": self.status, "error": self.error, "message": self.message} + return { + "status": self.status, + "torrent_subfolder": self.torrent_subfolder, + "linked_file": self.linked_file, + "copied_files": self.copied_files or [], + "copied_count": self.copied_count, + "skipped": self.skipped or [], + } + + @dataclass class ListFolderResponse: """Response from listing a folder.""" diff --git a/alfred/application/filesystem/manage_subtitles.py b/alfred/application/filesystem/manage_subtitles.py new file mode 100644 index 0000000..473f905 --- /dev/null +++ b/alfred/application/filesystem/manage_subtitles.py @@ -0,0 +1,258 @@ +"""ManageSubtitlesUseCase β€” orchestrates the full subtitle pipeline for a video file.""" + +import logging +from pathlib import Path + +from alfred.domain.shared.value_objects import ImdbId +from alfred.domain.subtitles.entities import SubtitleTrack +from alfred.domain.subtitles.knowledge.base import SubtitleKnowledgeBase +from alfred.domain.subtitles.knowledge.loader import KnowledgeLoader +from alfred.domain.subtitles.services.identifier import SubtitleIdentifier +from alfred.domain.subtitles.services.matcher import SubtitleMatcher +from alfred.domain.subtitles.services.pattern_detector import PatternDetector +from alfred.domain.subtitles.services.placer import PlacedTrack, SubtitlePlacer +from alfred.domain.subtitles.services.utils import available_subtitles +from alfred.domain.subtitles.value_objects import ScanStrategy +from alfred.infrastructure.persistence.context import get_memory +from alfred.infrastructure.subtitle.metadata_store import SubtitleMetadataStore +from alfred.infrastructure.subtitle.rule_repository import RuleSetRepository + +from .dto import AvailableSubtitle, ManageSubtitlesResponse, PlacedSubtitle, UnresolvedTrack + +logger = logging.getLogger(__name__) + + +def _infer_library_root(dest_video: Path, media_type: str) -> Path: + """ + Infer the media library root folder from the destination video path. + + TV show: video β†’ Season 01 β†’ The X-Files (3 levels up) + Movie: video β†’ Inception (2010) (1 level up) + """ + if media_type == "tv_show": + return dest_video.parent.parent + return dest_video.parent + + +def _to_imdb_id(raw: str | None) -> ImdbId | None: + if not raw: + return None + try: + return ImdbId(raw) + except Exception: + return None + + +class ManageSubtitlesUseCase: + """ + Full subtitle pipeline: + + 1. Load knowledge base + 2. Detect (or confirm) the release pattern + 3. Identify all tracks (ffprobe + filesystem scan) + 4. Load + resolve rules for this media + 5. Match tracks against rules + 6. If any tracks are unresolved β†’ return needs_clarification (don't place yet) + 7. Place matched tracks via hard-link + 8. Persist to .alfred/metadata.yaml + + The use case is stateless β€” all dependencies are instantiated inline. + """ + + def execute( + self, + source_video: str, + destination_video: str, + imdb_id: str | None = None, + media_type: str = "tv_show", + release_group: str | None = None, + season: int | None = None, + episode: int | None = None, + confirmed_pattern_id: str | None = None, + ) -> ManageSubtitlesResponse: + source_path = Path(source_video) + dest_path = Path(destination_video) + + if not source_path.exists(): + return ManageSubtitlesResponse( + status="error", + error="source_not_found", + message=f"Source video not found: {source_video}", + ) + + kb = SubtitleKnowledgeBase(KnowledgeLoader()) + library_root = _infer_library_root(dest_path, media_type) + store = SubtitleMetadataStore(library_root) + repo = RuleSetRepository(library_root) + + # --- Pattern resolution --- + pattern = self._resolve_pattern( + kb, store, source_path, confirmed_pattern_id, release_group + ) + if pattern is None: + return ManageSubtitlesResponse( + status="error", + error="pattern_not_found", + message="Could not determine subtitle pattern for this release.", + ) + + # --- Identify --- + media_id = _to_imdb_id(imdb_id) + identifier = SubtitleIdentifier(kb) + metadata = identifier.identify( + video_path=source_path, + pattern=pattern, + media_id=media_id, + media_type=media_type, + release_group=release_group, + ) + + if metadata.total_count == 0: + logger.info(f"ManageSubtitles: no subtitle tracks found for {source_path.name}") + return ManageSubtitlesResponse( + status="ok", + video_path=destination_video, + placed=[], + skipped_count=0, + ) + + # --- Embedded short-circuit --- + if pattern.scan_strategy == ScanStrategy.EMBEDDED: + logger.info("ManageSubtitles: embedded pattern β€” skipping matcher") + available = [ + AvailableSubtitle( + language=t.language.code if t.language else "?", + subtitle_type=t.subtitle_type.value, + ) + for t in available_subtitles(metadata.embedded_tracks) + ] + return ManageSubtitlesResponse( + status="ok", + video_path=destination_video, + placed=[], + skipped_count=0, + available=available, + ) + + # --- Match (external only) --- + subtitle_prefs = None + try: + memory = get_memory() + subtitle_prefs = memory.ltm.subtitle_preferences + except Exception: + pass + rules = repo.load(release_group, subtitle_prefs).resolve() + matcher = SubtitleMatcher() + matched, unresolved = matcher.match(metadata.external_tracks, rules) + + if unresolved: + logger.info( + f"ManageSubtitles: {len(unresolved)} unresolved track(s) β€” needs clarification" + ) + return ManageSubtitlesResponse( + status="needs_clarification", + video_path=destination_video, + placed=[], + unresolved=[_to_unresolved_dto(t) for t in unresolved], + ) + + if not matched: + return ManageSubtitlesResponse( + status="ok", + video_path=destination_video, + placed=[], + skipped_count=metadata.total_count, + ) + + # --- Place --- + placer = SubtitlePlacer() + place_result = placer.place(matched, dest_path) + + # --- Persist --- + if place_result.placed: + pairs = _pair_placed_with_tracks(place_result.placed, matched) + store.append_history(pairs, season, episode, release_group) + + placed_dtos = [ + PlacedSubtitle( + source=str(p.source), + destination=str(p.destination), + filename=p.filename, + ) + for p in place_result.placed + ] + + return ManageSubtitlesResponse( + status="ok", + video_path=destination_video, + placed=placed_dtos, + skipped_count=place_result.skipped_count, + ) + + def _resolve_pattern( + self, + kb: SubtitleKnowledgeBase, + store: SubtitleMetadataStore, + source_path: Path, + confirmed_pattern_id: str | None, + release_group: str | None, + ): + # 1. Explicit override from caller + if confirmed_pattern_id: + p = kb.pattern(confirmed_pattern_id) + if p: + return p + logger.warning(f"ManageSubtitles: unknown pattern '{confirmed_pattern_id}'") + + # 2. Previously confirmed in metadata store + stored_id = store.confirmed_pattern() + if stored_id: + p = kb.pattern(stored_id) + if p: + logger.debug(f"ManageSubtitles: using confirmed pattern '{stored_id}'") + return p + + # 3. Auto-detect + release_root = source_path.parent + detector = PatternDetector(kb) + result = detector.detect(release_root, source_path) + + if result["detected"] and result["confidence"] >= 0.6: + logger.info( + f"ManageSubtitles: auto-detected pattern '{result['detected'].id}' " + f"(confidence={result['confidence']:.2f})" + ) + return result["detected"] + + # 4. Fallback β€” adjacent (safest default) + logger.info("ManageSubtitles: falling back to 'adjacent' pattern") + return kb.pattern("adjacent") + + +def _to_unresolved_dto(track: SubtitleTrack, min_confidence: float = 0.7) -> UnresolvedTrack: + reason = "unknown_language" if track.language is None else "low_confidence" + return UnresolvedTrack( + raw_tokens=track.raw_tokens, + file_path=str(track.file_path) if track.file_path else None, + file_size_kb=track.file_size_kb, + reason=reason, + ) + + +def _pair_placed_with_tracks( + placed: list[PlacedTrack], + tracks: list[SubtitleTrack], +) -> list[tuple[PlacedTrack, SubtitleTrack]]: + """ + Pair each PlacedTrack with its originating SubtitleTrack by source path. + Falls back to positional matching if paths don't align. + """ + track_by_path = {t.file_path: t for t in tracks if t.file_path} + pairs = [] + for p in placed: + track = track_by_path.get(p.source) + if track is None and tracks: + track = tracks[0] # positional fallback + if track: + pairs.append((p, track)) + return pairs diff --git a/alfred/application/filesystem/move_media.py b/alfred/application/filesystem/move_media.py new file mode 100644 index 0000000..75810b9 --- /dev/null +++ b/alfred/application/filesystem/move_media.py @@ -0,0 +1,43 @@ +"""Move media use case.""" + +import logging + +from alfred.infrastructure.filesystem import FileManager + +from .dto import MoveMediaResponse + +logger = logging.getLogger(__name__) + + +class MoveMediaUseCase: + """Use case for moving a media file to a destination (copy + delete source).""" + + def __init__(self, file_manager: FileManager): + self.file_manager = file_manager + + def execute(self, source: str, destination: str) -> MoveMediaResponse: + """ + Move a media file from source to destination. + + Args: + source: Absolute path to the source file. + destination: Absolute path to the destination file. + + Returns: + MoveMediaResponse with success or error information. + """ + result = self.file_manager.move_file(source, destination) + + if result.get("status") == "ok": + return MoveMediaResponse( + status="ok", + source=result.get("source"), + destination=result.get("destination"), + filename=result.get("filename"), + size=result.get("size"), + ) + return MoveMediaResponse( + status="error", + error=result.get("error"), + message=result.get("message"), + ) diff --git a/alfred/application/filesystem/resolve_destination.py b/alfred/application/filesystem/resolve_destination.py new file mode 100644 index 0000000..dc16af2 --- /dev/null +++ b/alfred/application/filesystem/resolve_destination.py @@ -0,0 +1,246 @@ +""" +ResolveDestinationUseCase β€” compute the library destination path for a release. + +Steps: +1. Parse the release name +2. Look up TMDB for title + year (+ episode title if single episode) +3. Scan the library for an existing series folder +4. Apply group-conflict rules +5. Return the computed paths (or needs_clarification if ambiguous) +""" + +from __future__ import annotations + +import logging +import re +from dataclasses import dataclass, field +from pathlib import Path + +from alfred.domain.media.release_parser import ParsedRelease, parse_release +from alfred.infrastructure.persistence import get_memory + +logger = logging.getLogger(__name__) + +# Characters forbidden on Windows filesystems (served via NFS) +_WIN_FORBIDDEN = re.compile(r'[?:*"<>|\\]') + + +def _sanitise(text: str) -> str: + return _WIN_FORBIDDEN.sub("", text) + + +# --------------------------------------------------------------------------- +# DTOs +# --------------------------------------------------------------------------- + +@dataclass +class ResolvedDestination: + """All computed paths for a release, ready to hand to move_media.""" + + status: str # "ok" | "needs_clarification" | "error" + + # Populated on "ok" + library_file: str | None = None # absolute path of the destination video file + series_folder: str | None = None # absolute path of the series root folder + season_folder: str | None = None # absolute path of the season subfolder + series_folder_name: str | None = None # just the folder name (for display) + season_folder_name: str | None = None + filename: str | None = None + is_new_series_folder: bool = False # True if we're creating the folder + + # Populated on "needs_clarification" + question: str | None = None + options: list[str] | None = None # existing group folder names to pick from + + # Populated on "error" + error: str | None = None + message: str | None = None + + def to_dict(self) -> dict: + if self.status == "error": + return {"status": self.status, "error": self.error, "message": self.message} + if self.status == "needs_clarification": + return { + "status": self.status, + "question": self.question, + "options": self.options or [], + } + return { + "status": self.status, + "library_file": self.library_file, + "series_folder": self.series_folder, + "season_folder": self.season_folder, + "series_folder_name": self.series_folder_name, + "season_folder_name": self.season_folder_name, + "filename": self.filename, + "is_new_series_folder": self.is_new_series_folder, + } + + +# --------------------------------------------------------------------------- +# Use case +# --------------------------------------------------------------------------- + +class ResolveDestinationUseCase: + """ + Compute the full destination path for a media file being organised. + + The caller provides: + - release_name: the raw release folder/file name + - source_file: path to the actual video file (to get extension) + - tmdb_title: canonical title from TMDB + - tmdb_year: release year from TMDB + - tmdb_episode_title: episode title from TMDB (None for movies / season packs) + - confirmed_folder: if the user already answered needs_clarification, pass + the chosen folder name here to skip the check + + Returns a ResolvedDestination. + """ + + def execute( + self, + release_name: str, + source_file: str, + tmdb_title: str, + tmdb_year: int, + tmdb_episode_title: str | None = None, + confirmed_folder: str | None = None, + ) -> ResolvedDestination: + parsed = parse_release(release_name) + ext = Path(source_file).suffix # ".mkv" + + if parsed.is_movie: + return self._resolve_movie(parsed, tmdb_title, tmdb_year, ext) + return self._resolve_tvshow( + parsed, tmdb_title, tmdb_year, tmdb_episode_title, ext, confirmed_folder + ) + + # ------------------------------------------------------------------ + # Movie + # ------------------------------------------------------------------ + + def _resolve_movie( + self, parsed: ParsedRelease, tmdb_title: str, tmdb_year: int, ext: str + ) -> ResolvedDestination: + memory = get_memory() + movies_root = memory.ltm.library_paths.get("movie") + if not movies_root: + return ResolvedDestination( + status="error", + error="library_not_set", + message="Movie library path is not configured.", + ) + + folder_name = _sanitise(parsed.movie_folder_name(tmdb_title, tmdb_year)) + filename = _sanitise(parsed.movie_filename(tmdb_title, tmdb_year, ext)) + + folder_path = Path(movies_root) / folder_name + file_path = folder_path / filename + + return ResolvedDestination( + status="ok", + library_file=str(file_path), + series_folder=str(folder_path), + series_folder_name=folder_name, + filename=filename, + is_new_series_folder=not folder_path.exists(), + ) + + # ------------------------------------------------------------------ + # TV show + # ------------------------------------------------------------------ + + def _resolve_tvshow( + self, + parsed: ParsedRelease, + tmdb_title: str, + tmdb_year: int, + tmdb_episode_title: str | None, + ext: str, + confirmed_folder: str | None, + ) -> ResolvedDestination: + memory = get_memory() + tv_root = memory.ltm.library_paths.get("tv_show") + if not tv_root: + return ResolvedDestination( + status="error", + error="library_not_set", + message="TV show library path is not configured.", + ) + + tv_root_path = Path(tv_root) + + # --- Find existing series folders for this title --- + existing = _find_existing_series_folders(tv_root_path, tmdb_title, tmdb_year) + + # --- Determine series folder name --- + if confirmed_folder: + series_folder_name = confirmed_folder + is_new = not (tv_root_path / confirmed_folder).exists() + elif len(existing) == 0: + # No existing folder β€” create with release group + series_folder_name = _sanitise(parsed.show_folder_name(tmdb_title, tmdb_year)) + is_new = True + elif len(existing) == 1: + # Exactly one match β€” use it regardless of group + series_folder_name = existing[0] + is_new = False + else: + # Multiple folders β€” ask user + return ResolvedDestination( + status="needs_clarification", + question=( + f"Multiple folders found for '{tmdb_title}' in your library. " + f"Which one should I use for this release ({parsed.group})?" + ), + options=existing, + ) + + # --- Build paths --- + season_folder_name = parsed.season_folder_name() + filename = _sanitise( + parsed.episode_filename(tmdb_episode_title, ext) + if not parsed.is_season_pack + else parsed.season_folder_name() + ext + ) + + series_path = tv_root_path / series_folder_name + season_path = series_path / season_folder_name + file_path = season_path / filename + + return ResolvedDestination( + status="ok", + library_file=str(file_path), + series_folder=str(series_path), + season_folder=str(season_path), + series_folder_name=series_folder_name, + season_folder_name=season_folder_name, + filename=filename, + is_new_series_folder=is_new, + ) + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +def _find_existing_series_folders(tv_root: Path, tmdb_title: str, tmdb_year: int) -> list[str]: + """ + Return names of folders in tv_root that match the given title + year. + + Matching is loose: normalised title (dots, no special chars) + year must + appear at the start of the folder name. + """ + if not tv_root.exists(): + return [] + + # Build a normalised prefix to match against: "Oz.1997" + clean_title = _sanitise(tmdb_title).replace(" ", ".") + prefix = f"{clean_title}.{tmdb_year}".lower() + + matches = [] + for entry in tv_root.iterdir(): + if entry.is_dir() and entry.name.lower().startswith(prefix): + matches.append(entry.name) + + return sorted(matches) diff --git a/alfred/domain/media/__init__.py b/alfred/domain/media/__init__.py new file mode 100644 index 0000000..b474b59 --- /dev/null +++ b/alfred/domain/media/__init__.py @@ -0,0 +1,5 @@ +"""Media domain β€” shared naming and release parsing.""" + +from .release_parser import ParsedRelease, parse_release + +__all__ = ["ParsedRelease", "parse_release"] diff --git a/alfred/domain/media/release_parser.py b/alfred/domain/media/release_parser.py new file mode 100644 index 0000000..734a0b0 --- /dev/null +++ b/alfred/domain/media/release_parser.py @@ -0,0 +1,306 @@ +""" +release_parser.py β€” Parse a release name into structured components. + +Handles both dot-separated and space-separated release names: + Oz.S03.1080p.WEBRip.x265-KONTRAST + Oz S03 1080p WEBRip x265-KONTRAST + Inception.2010.1080p.BluRay.x265-GROUP +""" + +from __future__ import annotations + +import re +from dataclasses import dataclass, field + +# Known quality tokens +_QUALITIES = {"2160p", "1080p", "720p", "480p", "576p", "4k", "8k"} + +# Known source tokens (case-insensitive match) +_SOURCES = { + "bluray", "blu-ray", "bdrip", "brrip", + "webrip", "web-rip", "webdl", "web-dl", "web", + "hdtv", "hdrip", "dvdrip", "dvd", "vodrip", + "amzn", "nf", "dsnp", "hmax", "atvp", +} + +# Known codec tokens +_CODECS = { + "x264", "x265", "h264", "h265", "hevc", "avc", + "xvid", "divx", "av1", "vp9", + "h.264", "h.265", +} + +# Windows-forbidden characters (we strip these from display names) +_WIN_FORBIDDEN = re.compile(r'[?:*"<>|\\]') + +# Episode/season pattern: S01, S01E02, S01E02E03, 1x02, etc. +_SEASON_EP_RE = re.compile( + r"S(\d{1,2})(?:E(\d{2})(?:E(\d{2}))?)?", + re.IGNORECASE, +) + +# Year pattern +_YEAR_RE = re.compile(r"\b(19\d{2}|20\d{2})\b") + + +@dataclass +class ParsedRelease: + """Structured representation of a parsed release name.""" + + raw: str # original release name (untouched) + normalised: str # dots instead of spaces + title: str # show/movie title (dots, no year/season/tech) + year: int | None # movie year or show start year (from TMDB) + season: int | None # season number (None for movies) + episode: int | None # first episode number (None if season-pack) + episode_end: int | None # last episode for multi-ep (None otherwise) + quality: str | None # 1080p, 2160p, … + source: str | None # WEBRip, BluRay, … + codec: str | None # x265, HEVC, … + group: str # release group, "UNKNOWN" if missing + tech_string: str # quality.source.codec joined with dots + + # ------------------------------------------------------------------------- + # Derived helpers + # ------------------------------------------------------------------------- + + @property + def is_movie(self) -> bool: + return self.season is None + + @property + def is_season_pack(self) -> bool: + return self.season is not None and self.episode is None + + def show_folder_name(self, tmdb_title: str, tmdb_year: int) -> str: + """ + Build the series root folder name. + + Format: {Title}.{Year}.{Tech}-{Group} + Example: Oz.1997.1080p.WEBRip.x265-KONTRAST + """ + title_part = _sanitise_for_fs(tmdb_title).replace(" ", ".") + tech = self.tech_string or "Unknown" + return f"{title_part}.{tmdb_year}.{tech}-{self.group}" + + def season_folder_name(self) -> str: + """ + Build the season subfolder name = normalised release name (no episode). + + Example: Oz.S03.1080p.WEBRip.x265-KONTRAST + For a single-episode release we still strip the episode token so the + folder can hold the whole season. + """ + return _strip_episode_from_normalised(self.normalised) + + def episode_filename(self, tmdb_episode_title: str | None, ext: str) -> str: + """ + Build the episode filename. + + Format: {Title}.{SxxExx}.{EpisodeTitle}.{Tech}-{Group}.{ext} + Example: Oz.S01E01.The.Routine.1080p.WEBRip.x265-KONTRAST.mkv + + If tmdb_episode_title is None, omits the episode title segment. + """ + title_part = _sanitise_for_fs(self.title) # already dotted from normalised + s = f"S{self.season:02d}" if self.season is not None else "" + e = f"E{self.episode:02d}" if self.episode is not None else "" + se = s + e + + ep_title = "" + if tmdb_episode_title: + ep_title = "." + _sanitise_for_fs(tmdb_episode_title).replace(" ", ".") + + tech = self.tech_string or "Unknown" + ext_clean = ext.lstrip(".") + return f"{title_part}.{se}{ep_title}.{tech}-{self.group}.{ext_clean}" + + def movie_folder_name(self, tmdb_title: str, tmdb_year: int) -> str: + """ + Build the movie folder name. + + Format: {Title}.{Year}.{Tech}-{Group} + Example: Inception.2010.1080p.BluRay.x265-GROUP + """ + return self.show_folder_name(tmdb_title, tmdb_year) + + def movie_filename(self, tmdb_title: str, tmdb_year: int, ext: str) -> str: + """ + Build the movie filename (same as folder name + extension). + + Example: Inception.2010.1080p.BluRay.x265-GROUP.mkv + """ + ext_clean = ext.lstrip(".") + return f"{self.movie_folder_name(tmdb_title, tmdb_year)}.{ext_clean}" + + +# --------------------------------------------------------------------------- +# Public API +# --------------------------------------------------------------------------- + +def parse_release(name: str) -> ParsedRelease: + """ + Parse a release name and return a ParsedRelease. + + Accepts both dot-separated and space-separated names. + """ + normalised = _normalise(name) + tokens = normalised.split(".") + + season, episode, episode_end = _extract_season_episode(tokens) + quality, source, codec, group, tech_tokens = _extract_tech(tokens) + title = _extract_title(tokens, season, episode, tech_tokens) + year = _extract_year(tokens, title) + + tech_parts = [p for p in [quality, source, codec] if p] + tech_string = ".".join(tech_parts) + + return ParsedRelease( + raw=name, + normalised=normalised, + title=title, + year=year, + season=season, + episode=episode, + episode_end=episode_end, + quality=quality, + source=source, + codec=codec, + group=group, + tech_string=tech_string, + ) + + +# --------------------------------------------------------------------------- +# Internal helpers +# --------------------------------------------------------------------------- + +def _normalise(name: str) -> str: + """Replace spaces with dots, collapse multiple dots.""" + s = name.replace(" ", ".") + s = re.sub(r"\.{2,}", ".", s) + return s.strip(".") + + +def _sanitise_for_fs(text: str) -> str: + """Remove Windows-forbidden characters from a string.""" + return _WIN_FORBIDDEN.sub("", text) + + +def _extract_season_episode(tokens: list[str]) -> tuple[int | None, int | None, int | None]: + joined = ".".join(tokens) + m = _SEASON_EP_RE.search(joined) + if not m: + return None, None, None + season = int(m.group(1)) + episode = int(m.group(2)) if m.group(2) else None + episode_end = int(m.group(3)) if m.group(3) else None + return season, episode, episode_end + + +def _extract_tech( + tokens: list[str], +) -> tuple[str | None, str | None, str | None, str, set[str]]: + """ + Extract quality, source, codec, group from tokens. + + Returns (quality, source, codec, group, tech_token_set). + + Group extraction strategy (in priority order): + 1. Token where prefix is a known codec: x265-GROUP + 2. Last token in the list that contains a dash (fallback for 10bit-GROUP, AAC5.1-GROUP, etc.) + """ + quality: str | None = None + source: str | None = None + codec: str | None = None + group = "UNKNOWN" + tech_tokens: set[str] = set() + + for tok in tokens: + tl = tok.lower() + + if tl in _QUALITIES: + quality = tok + tech_tokens.add(tok) + continue + + if tl in _SOURCES: + source = tok + tech_tokens.add(tok) + continue + + if "-" in tok: + parts = tok.rsplit("-", 1) + # codec-GROUP (highest priority for group) + if parts[0].lower() in _CODECS: + codec = parts[0] + group = parts[1] if parts[1] else "UNKNOWN" + tech_tokens.add(tok) + continue + # source with dash: Web-DL, WEB-DL, etc. + if parts[0].lower() in _SOURCES or tok.lower().replace("-", "") in _SOURCES: + source = tok + tech_tokens.add(tok) + continue + + if tl in _CODECS: + codec = tok + tech_tokens.add(tok) + + # Fallback: if group still UNKNOWN, use the rightmost token with a dash + # that isn't a known source (handles "10bit-Protozoan", "AAC5.1-YTS", etc.) + if group == "UNKNOWN": + for tok in reversed(tokens): + if "-" in tok: + parts = tok.rsplit("-", 1) + tl = tok.lower() + if tl in _SOURCES or tok.lower().replace("-", "") in _SOURCES: + continue + if parts[1]: # non-empty group part + group = parts[1] + break + + return quality, source, codec, group, tech_tokens + + +def _extract_title(tokens: list[str], season: int | None, episode: int | None, tech_tokens: set[str]) -> str: + """ + Extract the title portion: everything before the first season/year/tech token. + """ + title_parts = [] + for tok in tokens: + # Stop at season token + if _SEASON_EP_RE.match(tok): + break + # Stop at year + if _YEAR_RE.fullmatch(tok): + break + # Stop at tech tokens + if tok in tech_tokens or tok.lower() in _QUALITIES | _SOURCES | _CODECS: + break + # Stop if token contains a dash (likely codec-GROUP) + if "-" in tok and any(p.lower() in _CODECS | _SOURCES for p in tok.split("-")): + break + title_parts.append(tok) + + return ".".join(title_parts) if title_parts else tokens[0] + + +def _extract_year(tokens: list[str], title: str) -> int | None: + """Extract a 4-digit year from tokens (only after the title).""" + title_len = len(title.split(".")) + for tok in tokens[title_len:]: + m = _YEAR_RE.fullmatch(tok) + if m: + return int(m.group(1)) + return None + + +def _strip_episode_from_normalised(normalised: str) -> str: + """ + Remove all episode parts (Exx) from a normalised release name, keeping Sxx. + + Oz.S03E01.1080p... β†’ Oz.S03.1080p... + Archer.S14E09E10E11.1080p... β†’ Archer.S14.1080p... + """ + return re.sub(r"(S\d{2})(E\d{2})+", r"\1", normalised, flags=re.IGNORECASE) diff --git a/alfred/domain/subtitles/__init__.py b/alfred/domain/subtitles/__init__.py index 802d335..75595a2 100644 --- a/alfred/domain/subtitles/__init__.py +++ b/alfred/domain/subtitles/__init__.py @@ -1,14 +1,37 @@ -"""Subtitles domain - Business logic for subtitle management (shared across movies and TV shows).""" +"""Subtitles domain β€” subtitle identification, classification and placement.""" -from .entities import Subtitle +from .aggregates import SubtitleRuleSet +from .entities import MediaSubtitleMetadata, SubtitleTrack from .exceptions import SubtitleNotFound -from .services import SubtitleService -from .value_objects import Language, SubtitleFormat +from .knowledge import KnowledgeLoader, SubtitleKnowledgeBase +from .services import PatternDetector, SubtitleIdentifier, SubtitleMatcher +from .value_objects import ( + RuleScope, + ScanStrategy, + SubtitleFormat, + SubtitleLanguage, + SubtitleMatchingRules, + SubtitlePattern, + SubtitleType, + TypeDetectionMethod, +) __all__ = [ - "Subtitle", - "Language", + "SubtitleTrack", + "MediaSubtitleMetadata", + "SubtitleRuleSet", + "SubtitleKnowledgeBase", + "KnowledgeLoader", + "SubtitleIdentifier", + "SubtitleMatcher", + "PatternDetector", "SubtitleFormat", + "SubtitleLanguage", + "SubtitlePattern", + "SubtitleType", + "ScanStrategy", + "TypeDetectionMethod", + "SubtitleMatchingRules", + "RuleScope", "SubtitleNotFound", - "SubtitleService", ] diff --git a/alfred/domain/subtitles/aggregates.py b/alfred/domain/subtitles/aggregates.py new file mode 100644 index 0000000..26541ab --- /dev/null +++ b/alfred/domain/subtitles/aggregates.py @@ -0,0 +1,90 @@ +"""Subtitle domain aggregates.""" + +from dataclasses import dataclass, field +from typing import Any + +from ..shared.value_objects import ImdbId +from .knowledge.base import SubtitleKnowledgeBase +from .value_objects import RuleScope, SubtitleMatchingRules + + +def DEFAULT_RULES() -> SubtitleMatchingRules: + """Load default matching rules from subtitles.yaml (defaults section).""" + return SubtitleKnowledgeBase().default_rules() + + +@dataclass +class SubtitleRuleSet: + """ + Rules for subtitle selection at a given scope level, with inheritance. + + Only delta fields are stored β€” None means "inherit from parent". + Resolution order: global β†’ release_group β†’ show/movie β†’ season β†’ episode. + + A RuleSet can also be pinned to a specific media item (imdb_id), + bypassing the scope hierarchy for that item. + """ + + scope: RuleScope + parent: "SubtitleRuleSet | None" = None + pinned_to: ImdbId | None = None + + # Deltas β€” None = inherit + _languages: list[str] | None = field(default=None, repr=False) + _formats: list[str] | None = field(default=None, repr=False) + _types: list[str] | None = field(default=None, repr=False) + _format_priority: list[str] | None = field(default=None, repr=False) + _min_confidence: float | None = field(default=None, repr=False) + + def resolve(self) -> SubtitleMatchingRules: + """ + Walk the parent chain and merge deltas into effective rules. + Falls back to DEFAULT_RULES at the top of the chain. + """ + base = self.parent.resolve() if self.parent else DEFAULT_RULES() + return SubtitleMatchingRules( + preferred_languages=self._languages or base.preferred_languages, + preferred_formats=self._formats or base.preferred_formats, + allowed_types=self._types or base.allowed_types, + format_priority=self._format_priority or base.format_priority, + min_confidence=self._min_confidence if self._min_confidence is not None else base.min_confidence, + ) + + def override( + self, + languages: list[str] | None = None, + formats: list[str] | None = None, + types: list[str] | None = None, + format_priority: list[str] | None = None, + min_confidence: float | None = None, + ) -> None: + """Set delta overrides at this scope level.""" + if languages is not None: + self._languages = languages + if formats is not None: + self._formats = formats + if types is not None: + self._types = types + if format_priority is not None: + self._format_priority = format_priority + if min_confidence is not None: + self._min_confidence = min_confidence + + def to_dict(self) -> dict: + """Serialize deltas only (for persistence in rules.yaml).""" + delta: dict[str, Any] = {} + if self._languages is not None: + delta["languages"] = self._languages + if self._formats is not None: + delta["formats"] = self._formats + if self._types is not None: + delta["types"] = self._types + if self._format_priority is not None: + delta["format_priority"] = self._format_priority + if self._min_confidence is not None: + delta["min_confidence"] = self._min_confidence + return {"scope": {"level": self.scope.level, "identifier": self.scope.identifier}, "override": delta} + + @classmethod + def global_default(cls) -> "SubtitleRuleSet": + return cls(scope=RuleScope(level="global")) diff --git a/alfred/domain/subtitles/entities.py b/alfred/domain/subtitles/entities.py index f5a5427..96e5a1d 100644 --- a/alfred/domain/subtitles/entities.py +++ b/alfred/domain/subtitles/entities.py @@ -1,96 +1,87 @@ """Subtitle domain entities.""" -from dataclasses import dataclass +from dataclasses import dataclass, field +from pathlib import Path -from ..shared.value_objects import FilePath, ImdbId -from .value_objects import Language, SubtitleFormat, TimingOffset +from ..shared.value_objects import ImdbId +from .value_objects import SubtitleFormat, SubtitleLanguage, SubtitleMatchingRules, SubtitleType @dataclass -class Subtitle: +class SubtitleTrack: """ - Subtitle entity representing a subtitle file. + A single subtitle track β€” either an external file or an embedded stream. - Can be associated with either a movie or a TV show episode. + State can evolve: unknown β†’ resolved after user clarification. + confidence reflects how certain we are about language + type classification. """ - media_imdb_id: ImdbId - language: Language - format: SubtitleFormat - file_path: FilePath + # Classification (may be None if not yet resolved) + language: SubtitleLanguage | None + format: SubtitleFormat | None + subtitle_type: SubtitleType = SubtitleType.UNKNOWN - # Optional: for TV shows - season_number: int | None = None - episode_number: int | None = None + # Source + is_embedded: bool = False + file_path: Path | None = None # None if embedded + file_size_kb: float | None = None + entry_count: int | None = None # number of subtitle cues in the file - # Subtitle metadata - timing_offset: TimingOffset = TimingOffset(0) - hearing_impaired: bool = False - forced: bool = False # Forced subtitles (for foreign language parts) + # Matching state + confidence: float = 0.0 # 0.0 β†’ 1.0, not applicable for embedded + raw_tokens: list[str] = field(default_factory=list) # tokens extracted from filename - # Source information - source: str | None = None # e.g., "OpenSubtitles", "Subscene" - uploader: str | None = None - download_count: int | None = None - rating: float | None = None + def is_resolved(self) -> bool: + return self.language is not None - def __post_init__(self): - """Validate subtitle entity.""" - # Ensure ImdbId is actually an ImdbId instance - if not isinstance(self.media_imdb_id, ImdbId): - if isinstance(self.media_imdb_id, str): - object.__setattr__(self, "media_imdb_id", ImdbId(self.media_imdb_id)) - - # Ensure Language is actually a Language instance - if not isinstance(self.language, Language): - if isinstance(self.language, str): - object.__setattr__(self, "language", Language.from_code(self.language)) - - # Ensure SubtitleFormat is actually a SubtitleFormat instance - if not isinstance(self.format, SubtitleFormat): - if isinstance(self.format, str): - object.__setattr__( - self, "format", SubtitleFormat.from_extension(self.format) - ) - - # Ensure FilePath is actually a FilePath instance - if not isinstance(self.file_path, FilePath): - object.__setattr__(self, "file_path", FilePath(self.file_path)) - - def is_for_movie(self) -> bool: - """Check if this subtitle is for a movie.""" - return self.season_number is None and self.episode_number is None - - def is_for_episode(self) -> bool: - """Check if this subtitle is for a TV show episode.""" - return self.season_number is not None and self.episode_number is not None - - def get_filename(self) -> str: + @property + def destination_name(self) -> str: """ - Get the suggested filename for this subtitle. - - Format for movies: "Movie.Title.{lang}.{format}" - Format for episodes: "S01E05.{lang}.{format}" + Compute the output filename per naming convention: + {lang}.{ext} + {lang}.sdh.{ext} + {lang}.forced.{ext} """ - if self.is_for_episode(): - base = f"S{self.season_number:02d}E{self.episode_number:02d}" - else: - # For movies, use the file path stem - base = self.file_path.value.stem - - parts = [base, self.language.value] - - if self.hearing_impaired: - parts.append("hi") - if self.forced: + if not self.language or not self.format: + raise ValueError("Cannot compute destination_name: language or format missing") + ext = self.format.extensions[0].lstrip(".") + parts = [self.language.code] + if self.subtitle_type == SubtitleType.SDH: + parts.append("sdh") + elif self.subtitle_type == SubtitleType.FORCED: parts.append("forced") - - return f"{'.'.join(parts)}.{self.format.value}" - - def __str__(self) -> str: - if self.is_for_episode(): - return f"Subtitle S{self.season_number:02d}E{self.episode_number:02d} ({self.language.value})" - return f"Subtitle ({self.language.value})" + return ".".join(parts) + "." + ext def __repr__(self) -> str: - return f"Subtitle(media={self.media_imdb_id}, lang={self.language.value})" + lang = self.language.code if self.language else "?" + fmt = self.format.id if self.format else "?" + src = "embedded" if self.is_embedded else str(self.file_path.name if self.file_path else "?") + return f"SubtitleTrack({lang}, {self.subtitle_type.value}, {fmt}, src={src}, conf={self.confidence:.2f})" + + +@dataclass +class MediaSubtitleMetadata: + """ + Snapshot of all subtitle information known for a given media item. + Populated by the identifier service (ffprobe + filesystem scan). + """ + + media_id: ImdbId | None + media_type: str # "movie" | "tv_show" + embedded_tracks: list[SubtitleTrack] = field(default_factory=list) + external_tracks: list[SubtitleTrack] = field(default_factory=list) + release_group: str | None = None + detected_pattern_id: str | None = None # pattern id from knowledge base + pattern_confirmed: bool = False + + @property + def all_tracks(self) -> list[SubtitleTrack]: + return self.embedded_tracks + self.external_tracks + + @property + def total_count(self) -> int: + return len(self.embedded_tracks) + len(self.external_tracks) + + @property + def unresolved_tracks(self) -> list[SubtitleTrack]: + return [t for t in self.external_tracks if t.language is None] diff --git a/alfred/domain/subtitles/knowledge/__init__.py b/alfred/domain/subtitles/knowledge/__init__.py new file mode 100644 index 0000000..25fac6d --- /dev/null +++ b/alfred/domain/subtitles/knowledge/__init__.py @@ -0,0 +1,4 @@ +from .base import SubtitleKnowledgeBase +from .loader import KnowledgeLoader + +__all__ = ["SubtitleKnowledgeBase", "KnowledgeLoader"] diff --git a/alfred/domain/subtitles/knowledge/base.py b/alfred/domain/subtitles/knowledge/base.py new file mode 100644 index 0000000..c00e29b --- /dev/null +++ b/alfred/domain/subtitles/knowledge/base.py @@ -0,0 +1,151 @@ +"""SubtitleKnowledgeBase β€” parsed, typed view of the loaded knowledge.""" + +import logging +from functools import cached_property + +from ..value_objects import ( + ScanStrategy, + SubtitleFormat, + SubtitleLanguage, + SubtitleMatchingRules, + SubtitlePattern, + SubtitleType, + TypeDetectionMethod, +) +from .loader import KnowledgeLoader + +logger = logging.getLogger(__name__) + + +class SubtitleKnowledgeBase: + """ + Typed access to subtitle knowledge (formats, types, languages, patterns). + + Built from KnowledgeLoader β€” call kb.reload() to pick up newly learned entries + without restarting. + """ + + def __init__(self, loader: KnowledgeLoader | None = None): + self._loader = loader or KnowledgeLoader() + self._build() + + def _build(self) -> None: + data = self._loader.subtitles() + + self._formats: dict[str, SubtitleFormat] = {} + for fid, fdata in data.get("formats", {}).items(): + self._formats[fid] = SubtitleFormat( + id=fid, + extensions=fdata.get("extensions", []), + description=fdata.get("description", ""), + ) + + self._languages: dict[str, SubtitleLanguage] = {} + for code, ldata in data.get("languages", {}).items(): + self._languages[code] = SubtitleLanguage( + code=code, + tokens=ldata.get("tokens", []), + ) + + # Build reverse token β†’ language code map + self._lang_token_map: dict[str, str] = {} + for code, lang in self._languages.items(): + for token in lang.tokens: + self._lang_token_map[token.lower()] = code + + # Build reverse token β†’ type map + self._type_token_map: dict[str, SubtitleType] = {} + for type_id, tdata in data.get("types", {}).items(): + stype = SubtitleType(type_id) + for token in tdata.get("tokens", []): + self._type_token_map[token.lower()] = stype + + d = data.get("defaults", {}) + self._default_rules = SubtitleMatchingRules( + preferred_languages=d.get("languages", ["fra", "eng"]), + preferred_formats=d.get("formats", ["srt"]), + allowed_types=d.get("types", ["standard", "forced"]), + format_priority=d.get("format_priority", ["srt", "ass"]), + min_confidence=d.get("min_confidence", 0.7), + ) + + self._patterns: dict[str, SubtitlePattern] = {} + for pid, pdata in self._loader.patterns().items(): + try: + self._patterns[pid] = SubtitlePattern( + id=pid, + description=pdata.get("description", ""), + scan_strategy=ScanStrategy(pdata.get("scan_strategy", "adjacent")), + root_folder=pdata.get("root_folder"), + type_detection=TypeDetectionMethod( + pdata.get("type_detection", {}).get("method", "token_in_name") + ), + version=pdata.get("version", "1.0"), + ) + except ValueError as e: + logger.warning(f"SubtitleKnowledgeBase: skipping pattern '{pid}': {e}") + + def reload(self) -> None: + self._loader = KnowledgeLoader() + self._build() + logger.info("SubtitleKnowledgeBase: reloaded") + + # --- Defaults --- + + def default_rules(self) -> SubtitleMatchingRules: + return self._default_rules + + # --- Formats --- + + def formats(self) -> dict[str, SubtitleFormat]: + return self._formats + + def format_for_extension(self, ext: str) -> SubtitleFormat | None: + for fmt in self._formats.values(): + if fmt.matches_extension(ext): + return fmt + return None + + def known_extensions(self) -> set[str]: + exts = set() + for fmt in self._formats.values(): + exts.update(fmt.extensions) + return exts + + # --- Languages --- + + def languages(self) -> dict[str, SubtitleLanguage]: + return self._languages + + def language_for_token(self, token: str) -> SubtitleLanguage | None: + code = self._lang_token_map.get(token.lower()) + return self._languages.get(code) if code else None + + def is_known_lang_token(self, token: str) -> bool: + return token.lower() in self._lang_token_map + + # --- Types --- + + def type_for_token(self, token: str) -> SubtitleType | None: + return self._type_token_map.get(token.lower()) + + def is_known_type_token(self, token: str) -> bool: + return token.lower() in self._type_token_map + + # --- Patterns --- + + def patterns(self) -> dict[str, SubtitlePattern]: + return self._patterns + + def pattern(self, pattern_id: str) -> SubtitlePattern | None: + return self._patterns.get(pattern_id) + + def patterns_for_group(self, group_name: str) -> list[SubtitlePattern]: + group = self._loader.release_group(group_name) + if not group: + return [] + return [ + self._patterns[pid] + for pid in group.get("known_patterns", []) + if pid in self._patterns + ] diff --git a/alfred/domain/subtitles/knowledge/loader.py b/alfred/domain/subtitles/knowledge/loader.py new file mode 100644 index 0000000..96802f6 --- /dev/null +++ b/alfred/domain/subtitles/knowledge/loader.py @@ -0,0 +1,131 @@ +"""KnowledgeLoader β€” autodiscovers and merges builtin + learned YAML knowledge packs.""" + +import logging +from pathlib import Path + +import yaml + +logger = logging.getLogger(__name__) + +import alfred as _alfred_pkg + +# Builtin knowledge β€” anchored on the alfred package itself, not on this file's depth +_BUILTIN_ROOT = Path(_alfred_pkg.__file__).parent / "knowledge" + +# Learned knowledge β€” local to this instance, gitignored +_LEARNED_ROOT = Path(_alfred_pkg.__file__).parent.parent / "data" / "knowledge" + + +def _load_yaml(path: Path) -> dict: + try: + with open(path, encoding="utf-8") as f: + return yaml.safe_load(f) or {} + except FileNotFoundError: + return {} + except Exception as e: + logger.warning(f"KnowledgeLoader: could not load {path}: {e}") + return {} + + +def _merge(base: dict, override: dict) -> dict: + """ + Deep merge override into base. + Lists are extended (not replaced) β€” learned tokens are additive. + Scalar values in override win over base. + """ + result = dict(base) + for key, val in override.items(): + if key in result and isinstance(result[key], dict) and isinstance(val, dict): + result[key] = _merge(result[key], val) + elif key in result and isinstance(result[key], list) and isinstance(val, list): + # Extend list, deduplicate, preserve order + combined = result[key] + [v for v in val if v not in result[key]] + result[key] = combined + else: + result[key] = val + return result + + +class KnowledgeLoader: + """ + Loads subtitle knowledge from YAML files. + + Builtin packs live in alfred/knowledge/ (versioned). + Learned packs live in data/knowledge/ (gitignored, instance-local). + + Learned entries are merged additively β€” they can only add tokens/patterns, + never remove builtin ones. + + Usage: + loader = KnowledgeLoader() + subtitles = loader.subtitles() # merged subtitles.yaml + patterns = loader.patterns() # all patterns, keyed by id + groups = loader.release_groups() # all release groups, keyed by name + """ + + def __init__(self): + self._cache: dict[str, dict] = {} + self._load() + + def _load(self) -> None: + # Main subtitles knowledge + builtin = _load_yaml(_BUILTIN_ROOT / "subtitles.yaml") + learned = _load_yaml(_LEARNED_ROOT / "subtitles_learned.yaml") + self._cache["subtitles"] = _merge(builtin, learned) + + # Patterns + self._cache["patterns"] = {} + for path in sorted((_BUILTIN_ROOT / "patterns").glob("*.yaml")): + data = _load_yaml(path) + pid = data.get("id", path.stem) + self._cache["patterns"][pid] = data + + for path in sorted((_LEARNED_ROOT / "patterns").glob("*.yaml")): + data = _load_yaml(path) + pid = data.get("id", path.stem) + if pid in self._cache["patterns"]: + self._cache["patterns"][pid] = _merge(self._cache["patterns"][pid], data) + else: + self._cache["patterns"][pid] = data + logger.info(f"KnowledgeLoader: learned new pattern '{pid}'") + + # Release groups + self._cache["release_groups"] = {} + for path in sorted((_BUILTIN_ROOT / "release_groups").glob("*.yaml")): + data = _load_yaml(path) + name = data.get("name", path.stem) + self._cache["release_groups"][name] = data + + for path in sorted((_LEARNED_ROOT / "release_groups").glob("*.yaml")): + data = _load_yaml(path) + name = data.get("name", path.stem) + if name in self._cache["release_groups"]: + self._cache["release_groups"][name] = _merge(self._cache["release_groups"][name], data) + else: + self._cache["release_groups"][name] = data + logger.info(f"KnowledgeLoader: learned new release group '{name}'") + + logger.info( + f"KnowledgeLoader: {len(self._cache['patterns'])} patterns, " + f"{len(self._cache['release_groups'])} release groups loaded" + ) + + def subtitles(self) -> dict: + return self._cache["subtitles"] + + def patterns(self) -> dict[str, dict]: + return self._cache["patterns"] + + def pattern(self, pattern_id: str) -> dict | None: + return self._cache["patterns"].get(pattern_id) + + def release_groups(self) -> dict[str, dict]: + return self._cache["release_groups"] + + def release_group(self, name: str) -> dict | None: + """Case-insensitive lookup.""" + name_lower = name.lower() + for key, val in self._cache["release_groups"].items(): + if key.lower() == name_lower: + return val + return None diff --git a/alfred/domain/subtitles/scanner.py b/alfred/domain/subtitles/scanner.py new file mode 100644 index 0000000..ba81520 --- /dev/null +++ b/alfred/domain/subtitles/scanner.py @@ -0,0 +1,221 @@ +"""SubtitleScanner β€” inspects local subtitle files and filters them per user preferences. + +Given a video file path, the scanner: + 1. Looks for subtitle files in the same directory as the video. + 2. Optionally also inspects a Subs/ subfolder adjacent to the video. + 3. Classifies each file (language, SDH, forced) from its filename. + 4. Filters according to SubtitlePreferences (languages, min_size_kb, keep_sdh, keep_forced). + 5. Returns a list of SubtitleCandidate β€” one per file that passes the filter, + with the destination filename already computed. + +Filename classification heuristics +----------------------------------- +We parse the stem of each subtitle file looking for known patterns: + + fr.srt β†’ lang=fr, sdh=False, forced=False + fr.sdh.srt β†’ lang=fr, sdh=True + fr.hi.srt β†’ lang=fr, sdh=True (hi = hearing-impaired, alias for sdh) + fr.forced.srt β†’ lang=fr, forced=True + Breaking.Bad.S01E01.French.srt β†’ lang=fr (keyword match) + Breaking.Bad.S01E01.VOSTFR.srt β†’ lang=fr (VOSTFR = French forced/foreign subs) + +Output naming convention (matches SubtitlePreferences docstring): + {lang}.srt + {lang}.sdh.srt + {lang}.forced.srt +""" + +import logging +from dataclasses import dataclass, field +from pathlib import Path + +logger = logging.getLogger(__name__) + +# Subtitle file extensions we handle +SUBTITLE_EXTENSIONS = {".srt", ".ass", ".ssa", ".vtt", ".sub"} + +# Language keyword map: lowercase token β†’ ISO 639-1 code +_LANG_KEYWORDS: dict[str, str] = { + # French + "fr": "fr", + "fra": "fr", + "french": "fr", + "francais": "fr", + "franΓ§ais": "fr", + "vf": "fr", + "vff": "fr", + "vostfr": "fr", + # English + "en": "en", + "eng": "en", + "english": "en", + # Spanish + "es": "es", + "spa": "es", + "spanish": "es", + "espanol": "es", + # German + "de": "de", + "deu": "de", + "ger": "de", + "german": "de", + # Italian + "it": "it", + "ita": "it", + "italian": "it", + # Portuguese + "pt": "pt", + "por": "pt", + "portuguese": "pt", + # Dutch + "nl": "nl", + "nld": "nl", + "dutch": "nl", + # Japanese + "ja": "ja", + "jpn": "ja", + "japanese": "ja", +} + +# Tokens that indicate SDH / hearing-impaired +_SDH_TOKENS = {"sdh", "hi", "hearing", "impaired", "cc", "closedcaption"} + +# Tokens that indicate forced subtitles +_FORCED_TOKENS = {"forced", "foreign"} + + +@dataclass +class SubtitleCandidate: + """A subtitle file that passed the filter, ready to be placed.""" + + source_path: Path + language: str # ISO 639-1 code, e.g. "fr" + is_sdh: bool + is_forced: bool + extension: str # e.g. ".srt" + + @property + def destination_name(self) -> str: + """ + Compute the destination filename per naming convention: + {lang}.srt + {lang}.sdh.srt + {lang}.forced.srt + """ + ext = self.extension.lstrip(".") + parts = [self.language] + if self.is_sdh: + parts.append("sdh") + elif self.is_forced: + parts.append("forced") + return ".".join(parts) + "." + ext + + +def _classify(path: Path) -> tuple[str | None, bool, bool]: + """ + Parse a subtitle filename and return (language_code, is_sdh, is_forced). + + Returns (None, False, False) if the language cannot be determined. + """ + stem = path.stem.lower() + # Split on dots, spaces, underscores, hyphens + import re + tokens = re.split(r"[\.\s_\-]+", stem) + + language: str | None = None + is_sdh = False + is_forced = False + + for token in tokens: + if token in _LANG_KEYWORDS: + language = _LANG_KEYWORDS[token] + if token in _SDH_TOKENS: + is_sdh = True + if token in _FORCED_TOKENS: + is_forced = True + + return language, is_sdh, is_forced + + +class SubtitleScanner: + """ + Scans subtitle files next to a video and filters them per SubtitlePreferences. + + Usage: + scanner = SubtitleScanner(prefs) + candidates = scanner.scan(video_path) + # Each candidate has .source_path and .destination_name + """ + + def __init__(self, languages: list[str], min_size_kb: int, keep_sdh: bool, keep_forced: bool): + self.languages = [l.lower() for l in languages] + self.min_size_kb = min_size_kb + self.keep_sdh = keep_sdh + self.keep_forced = keep_forced + + def scan(self, video_path: Path) -> list[SubtitleCandidate]: + """ + Return all subtitle candidates found next to the video that pass the filter. + + Scans: + - Same directory as the video (flat siblings) + - Subs/ subfolder if present + """ + candidates: list[SubtitleCandidate] = [] + search_dirs = [video_path.parent] + + subs_dir = video_path.parent / "Subs" + if subs_dir.is_dir(): + search_dirs.append(subs_dir) + logger.debug(f"SubtitleScanner: found Subs/ folder at {subs_dir}") + + for directory in search_dirs: + for path in sorted(directory.iterdir()): + if not path.is_file(): + continue + if path.suffix.lower() not in SUBTITLE_EXTENSIONS: + continue + + candidate = self._evaluate(path) + if candidate is not None: + candidates.append(candidate) + + logger.info(f"SubtitleScanner: {len(candidates)} candidate(s) found for {video_path.name}") + return candidates + + def _evaluate(self, path: Path) -> SubtitleCandidate | None: + """Apply all filters to a single subtitle file. Returns None if it should be dropped.""" + # Size filter + size_kb = path.stat().st_size / 1024 + if size_kb < self.min_size_kb: + logger.debug(f"SubtitleScanner: skip {path.name} (too small: {size_kb:.1f} KB)") + return None + + language, is_sdh, is_forced = _classify(path) + + # Language filter + if language is None: + logger.debug(f"SubtitleScanner: skip {path.name} (language unknown)") + return None + + if language not in self.languages: + logger.debug(f"SubtitleScanner: skip {path.name} (language '{language}' not in prefs)") + return None + + # SDH filter + if is_sdh and not self.keep_sdh: + logger.debug(f"SubtitleScanner: skip {path.name} (SDH not wanted)") + return None + + # Forced filter + if is_forced and not self.keep_forced: + logger.debug(f"SubtitleScanner: skip {path.name} (forced not wanted)") + return None + + return SubtitleCandidate( + source_path=path, + language=language, + is_sdh=is_sdh, + is_forced=is_forced, + extension=path.suffix.lower(), + ) diff --git a/alfred/domain/subtitles/services/__init__.py b/alfred/domain/subtitles/services/__init__.py new file mode 100644 index 0000000..a429b25 --- /dev/null +++ b/alfred/domain/subtitles/services/__init__.py @@ -0,0 +1,13 @@ +from .identifier import SubtitleIdentifier +from .matcher import SubtitleMatcher +from .pattern_detector import PatternDetector +from .placer import PlacedTrack, PlaceResult, SubtitlePlacer + +__all__ = [ + "SubtitleIdentifier", + "SubtitleMatcher", + "PatternDetector", + "SubtitlePlacer", + "PlacedTrack", + "PlaceResult", +] diff --git a/alfred/domain/subtitles/services/identifier.py b/alfred/domain/subtitles/services/identifier.py new file mode 100644 index 0000000..74d9935 --- /dev/null +++ b/alfred/domain/subtitles/services/identifier.py @@ -0,0 +1,287 @@ +"""SubtitleIdentifier β€” finds and classifies all subtitle tracks for a video file.""" + +import logging +import re +import subprocess +import json +from pathlib import Path + +from ...shared.value_objects import ImdbId +from ..entities import MediaSubtitleMetadata, SubtitleTrack +from ..knowledge.base import SubtitleKnowledgeBase +from ..value_objects import ScanStrategy, SubtitlePattern, SubtitleType + +logger = logging.getLogger(__name__) + + +def _tokenize(name: str) -> list[str]: + """Split a filename stem into lowercase tokens.""" + return [t.lower() for t in re.split(r"[\.\s_\-]+", name) if t] + + +def _count_entries(path: Path) -> int: + """Return the entry count of an SRT file by finding the last cue number.""" + try: + with open(path, encoding="utf-8", errors="replace") as f: + lines = f.read().splitlines() + for line in reversed(lines): + if line.strip().isdigit(): + return int(line.strip()) + return 0 + except Exception: + return 0 + + +class SubtitleIdentifier: + """ + Finds all subtitle tracks for a given video file using a known pattern, + then attempts to classify each track (language, type, format). + + Returns a MediaSubtitleMetadata with embedded + external tracks. + External tracks with unknown language or low confidence are left as-is β€” + the caller (use case) decides whether to ask the user for clarification. + """ + + def __init__(self, kb: SubtitleKnowledgeBase): + self.kb = kb + + def identify( + self, + video_path: Path, + pattern: SubtitlePattern, + media_id: ImdbId | None, + media_type: str, + release_group: str | None = None, + ) -> MediaSubtitleMetadata: + metadata = MediaSubtitleMetadata( + media_id=media_id, + media_type=media_type, + release_group=release_group, + detected_pattern_id=pattern.id, + ) + + if pattern.scan_strategy == ScanStrategy.EMBEDDED: + metadata.embedded_tracks = self._scan_embedded(video_path) + else: + metadata.external_tracks = self._scan_external(video_path, pattern) + # Always also check for embedded tracks + metadata.embedded_tracks = self._scan_embedded(video_path) + + return metadata + + # ------------------------------------------------------------------ + # Embedded tracks β€” ffprobe + # ------------------------------------------------------------------ + + def _scan_embedded(self, video_path: Path) -> list[SubtitleTrack]: + if not video_path.exists(): + return [] + try: + result = subprocess.run( + [ + "ffprobe", "-v", "quiet", + "-print_format", "json", + "-show_streams", + "-select_streams", "s", + str(video_path), + ], + capture_output=True, text=True, timeout=30, + ) + data = json.loads(result.stdout) + except (subprocess.TimeoutExpired, json.JSONDecodeError, FileNotFoundError) as e: + logger.debug(f"SubtitleIdentifier: ffprobe failed for {video_path.name}: {e}") + return [] + + tracks = [] + for stream in data.get("streams", []): + tags = stream.get("tags", {}) + disposition = stream.get("disposition", {}) + lang_code = tags.get("language", "") + title = tags.get("title", "") + + lang = self.kb.language_for_token(lang_code) if lang_code else None + + if disposition.get("hearing_impaired"): + stype = SubtitleType.SDH + elif disposition.get("forced"): + stype = SubtitleType.FORCED + else: + stype = SubtitleType.STANDARD + + tracks.append(SubtitleTrack( + language=lang, + format=None, + subtitle_type=stype, + is_embedded=True, + raw_tokens=[lang_code] if lang_code else [], + )) + + logger.debug(f"SubtitleIdentifier: {len(tracks)} embedded track(s) in {video_path.name}") + return tracks + + # ------------------------------------------------------------------ + # External tracks β€” filesystem scan per pattern strategy + # ------------------------------------------------------------------ + + def _scan_external(self, video_path: Path, pattern: SubtitlePattern) -> list[SubtitleTrack]: + strategy = pattern.scan_strategy + + if strategy == ScanStrategy.ADJACENT: + candidates = self._find_adjacent(video_path) + elif strategy == ScanStrategy.FLAT: + candidates = self._find_flat(video_path, pattern.root_folder or "Subs") + elif strategy == ScanStrategy.EPISODE_SUBFOLDER: + candidates = self._find_episode_subfolder(video_path, pattern.root_folder or "Subs") + else: + return [] + + return self._classify_files(candidates, pattern) + + def _find_adjacent(self, video_path: Path) -> list[Path]: + return [ + p for p in sorted(video_path.parent.iterdir()) + if p.is_file() and p.suffix.lower() in self.kb.known_extensions() + and p.stem != video_path.stem + ] + + def _find_flat(self, video_path: Path, root_folder: str) -> list[Path]: + subs_dir = video_path.parent / root_folder + if not subs_dir.is_dir(): + # Also look at release root (one level up) + subs_dir = video_path.parent.parent / root_folder + if not subs_dir.is_dir(): + return [] + return [ + p for p in sorted(subs_dir.iterdir()) + if p.is_file() and p.suffix.lower() in self.kb.known_extensions() + ] + + def _find_episode_subfolder(self, video_path: Path, root_folder: str) -> list[Path]: + """ + Look for Subs/{episode_stem}/*.srt + + Checks two locations: + 1. Adjacent to the video: video_path.parent / root_folder / video_path.stem + 2. Release root (one level up): video_path.parent.parent / root_folder / video_path.stem + """ + episode_stem = video_path.stem + candidates_dirs = [ + video_path.parent / root_folder / episode_stem, + video_path.parent.parent / root_folder / episode_stem, + ] + for subs_dir in candidates_dirs: + if subs_dir.is_dir(): + files = [ + p for p in sorted(subs_dir.iterdir()) + if p.is_file() and p.suffix.lower() in self.kb.known_extensions() + ] + if files: + logger.debug(f"SubtitleIdentifier: found {len(files)} file(s) in {subs_dir}") + return files + return [] + + # ------------------------------------------------------------------ + # Classification + # ------------------------------------------------------------------ + + def _classify_files(self, paths: list[Path], pattern: SubtitlePattern) -> list[SubtitleTrack]: + tracks = [] + for path in paths: + track = self._classify_single(path) + tracks.append(track) + + # Post-process: if multiple tracks share same language but type is ambiguous, + # apply size_and_count disambiguation + if pattern.type_detection.value == "size_and_count": + tracks = self._disambiguate_by_size(tracks) + + return tracks + + def _classify_single(self, path: Path) -> SubtitleTrack: + fmt = self.kb.format_for_extension(path.suffix) + tokens = _tokenize(path.stem) + + language = None + subtitle_type = SubtitleType.UNKNOWN + unknown_tokens = [] + matched_tokens = 0 + + for token in tokens: + if self.kb.is_known_lang_token(token): + language = self.kb.language_for_token(token) + matched_tokens += 1 + elif self.kb.is_known_type_token(token): + subtitle_type = self.kb.type_for_token(token) or subtitle_type + matched_tokens += 1 + elif token.isdigit(): + pass # numeric prefix β€” ignore + elif len(token) > 1: + unknown_tokens.append(token) + + # Confidence: proportion of meaningful tokens that were recognized + meaningful = [t for t in tokens if not t.isdigit() and len(t) > 1] + confidence = matched_tokens / max(len(meaningful), 1) if meaningful else 0.5 + + if unknown_tokens: + logger.debug( + f"SubtitleIdentifier: unknown tokens in '{path.name}': {unknown_tokens}" + ) + + size_kb = path.stat().st_size / 1024 if path.exists() else None + entry_count = _count_entries(path) if path.exists() else None + + return SubtitleTrack( + language=language, + format=fmt, + subtitle_type=subtitle_type, + is_embedded=False, + file_path=path, + file_size_kb=size_kb, + entry_count=entry_count, + confidence=confidence, + raw_tokens=tokens, + ) + + def _disambiguate_by_size(self, tracks: list[SubtitleTrack]) -> list[SubtitleTrack]: + """ + When multiple tracks share the same language and type is UNKNOWN/STANDARD, + the one with the most entries (lines) is SDH, the smallest is FORCED if + there are 3+, otherwise the smaller is STANDARD. + + Only applied when type_detection = size_and_count. + """ + from itertools import groupby + + # Group by language code + lang_groups: dict[str, list[SubtitleTrack]] = {} + for track in tracks: + key = track.language.code if track.language else "__unknown__" + lang_groups.setdefault(key, []).append(track) + + result = [] + for lang_code, group in lang_groups.items(): + if len(group) == 1: + result.extend(group) + continue + + # Sort by entry_count ascending (None treated as 0) + sorted_group = sorted(group, key=lambda t: t.entry_count or 0) + + if len(sorted_group) == 2: + # smaller = standard, larger = sdh + self._set_type(sorted_group[0], SubtitleType.STANDARD) + self._set_type(sorted_group[1], SubtitleType.SDH) + elif len(sorted_group) >= 3: + # smallest = forced, middle = standard, largest = sdh + self._set_type(sorted_group[0], SubtitleType.FORCED) + for t in sorted_group[1:-1]: + self._set_type(t, SubtitleType.STANDARD) + self._set_type(sorted_group[-1], SubtitleType.SDH) + + result.extend(sorted_group) + + return result + + def _set_type(self, track: SubtitleTrack, stype: SubtitleType) -> None: + """Mutate track type in-place.""" + track.subtitle_type = stype diff --git a/alfred/domain/subtitles/services/matcher.py b/alfred/domain/subtitles/services/matcher.py new file mode 100644 index 0000000..ecb2d0f --- /dev/null +++ b/alfred/domain/subtitles/services/matcher.py @@ -0,0 +1,118 @@ +"""SubtitleMatcher β€” filters tracks against resolved rules.""" + +import logging + +from ..entities import SubtitleTrack +from ..value_objects import SubtitleMatchingRules, SubtitleType + +logger = logging.getLogger(__name__) + + +class SubtitleMatcher: + """ + Filters a list of SubtitleTrack against effective SubtitleMatchingRules. + + Returns matched tracks (pass all filters, confidence >= min_confidence) + and unresolved tracks (need user clarification). + + Conflict resolution: when two tracks share the same language + type, + format_priority decides which one to keep. + """ + + def match( + self, + tracks: list[SubtitleTrack], + rules: SubtitleMatchingRules, + ) -> tuple[list[SubtitleTrack], list[SubtitleTrack]]: + """ + Returns (matched, unresolved). + """ + matched: list[SubtitleTrack] = [] + unresolved: list[SubtitleTrack] = [] + + for track in tracks: + if track.is_embedded: + continue + + if track.language is None or track.confidence < rules.min_confidence: + unresolved.append(track) + continue + + if not self._passes_filters(track, rules): + logger.debug(f"SubtitleMatcher: filtered out {track}") + continue + + matched.append(track) + + matched = self._resolve_conflicts(matched, rules) + logger.info( + f"SubtitleMatcher: {len(matched)} matched, {len(unresolved)} unresolved" + ) + return matched, unresolved + + def _passes_filters(self, track: SubtitleTrack, rules: SubtitleMatchingRules) -> bool: + # Language filter + if rules.preferred_languages: + if not track.language: + return False + if track.language.code not in rules.preferred_languages: + return False + + # Format filter (only for external files) + if rules.preferred_formats and not track.is_embedded: + if not track.format: + return False + if track.format.id not in rules.preferred_formats: + return False + + # Type filter + if rules.allowed_types: + if track.subtitle_type.value not in rules.allowed_types: + return False + + return True + + def _resolve_conflicts( + self, + tracks: list[SubtitleTrack], + rules: SubtitleMatchingRules, + ) -> list[SubtitleTrack]: + """ + When multiple tracks have same language + type, keep only the best one + according to format_priority. If no format_priority applies, keep the first. + """ + seen: dict[tuple, SubtitleTrack] = {} + + for track in tracks: + lang = track.language.code if track.language else None + stype = track.subtitle_type.value + key = (lang, stype) + + if key not in seen: + seen[key] = track + else: + existing = seen[key] + if self._prefer(track, existing, rules.format_priority): + logger.debug( + f"SubtitleMatcher: conflict {key} β€” " + f"preferring {track.format.id if track.format else 'embedded'} " + f"over {existing.format.id if existing.format else 'embedded'}" + ) + seen[key] = track + + return list(seen.values()) + + def _prefer( + self, + candidate: SubtitleTrack, + existing: SubtitleTrack, + format_priority: list[str], + ) -> bool: + """Return True if candidate is preferable to existing.""" + if not format_priority: + return False + c_fmt = candidate.format.id if candidate.format else "" + e_fmt = existing.format.id if existing.format else "" + c_rank = format_priority.index(c_fmt) if c_fmt in format_priority else 999 + e_rank = format_priority.index(e_fmt) if e_fmt in format_priority else 999 + return c_rank < e_rank diff --git a/alfred/domain/subtitles/services/pattern_detector.py b/alfred/domain/subtitles/services/pattern_detector.py new file mode 100644 index 0000000..f430428 --- /dev/null +++ b/alfred/domain/subtitles/services/pattern_detector.py @@ -0,0 +1,205 @@ +"""PatternDetector β€” discovers the subtitle structure of a release folder.""" + +import json +import logging +import subprocess +from pathlib import Path + +from ..knowledge.base import SubtitleKnowledgeBase +from ..value_objects import ScanStrategy, SubtitlePattern + +logger = logging.getLogger(__name__) + + +class PatternDetector: + """ + Inspects a release folder and returns the best matching known pattern, + plus a confidence score and a description of what was found. + + Used for "pattern discovery" β€” when we don't yet know which pattern + a release follows. The result is proposed to the user for confirmation. + """ + + def __init__(self, kb: SubtitleKnowledgeBase): + self.kb = kb + + def detect(self, release_root: Path, sample_video: Path) -> dict: + """ + Analyse the release folder and return: + { + "detected": SubtitlePattern | None, + "confidence": float, + "description": str, # human-readable description of what was found + "candidate_pattern_ids": list[str], + } + """ + findings = self._inspect(release_root, sample_video) + best, confidence = self._match_pattern(findings) + + return { + "detected": best, + "confidence": confidence, + "description": self._describe(findings), + "candidate_pattern_ids": [best.id] if best else [], + "raw_findings": findings, + } + + def _has_embedded_subtitles(self, video_path: Path) -> bool: + """Run ffprobe to check whether the video has embedded subtitle streams.""" + try: + result = subprocess.run( + [ + "ffprobe", "-v", "quiet", + "-print_format", "json", + "-show_streams", + "-select_streams", "s", + str(video_path), + ], + capture_output=True, text=True, timeout=30, + ) + data = json.loads(result.stdout) + return len(data.get("streams", [])) > 0 + except (subprocess.TimeoutExpired, json.JSONDecodeError, FileNotFoundError): + return False + + def _inspect(self, release_root: Path, sample_video: Path) -> dict: + """Gather structural facts about the release.""" + known_exts = self.kb.known_extensions() + findings: dict = { + "has_subs_folder": False, + "subs_strategy": None, # "flat" | "episode_subfolder" + "subs_root": None, + "adjacent_subs": False, + "has_embedded": self._has_embedded_subtitles(sample_video), + "files_per_episode": 0, + "has_lang_tokens": False, + "has_numeric_prefix": False, + } + + # Check for Subs/ folder β€” adjacent or at release root + for subs_candidate in [ + sample_video.parent / "Subs", + release_root / "Subs", + ]: + if subs_candidate.is_dir(): + findings["has_subs_folder"] = True + findings["subs_root"] = str(subs_candidate) + + # Is it flat or episode_subfolder? + children = list(subs_candidate.iterdir()) + sub_files = [c for c in children if c.is_file() and c.suffix.lower() in known_exts] + sub_dirs = [c for c in children if c.is_dir()] + + if sub_dirs and not sub_files: + findings["subs_strategy"] = "episode_subfolder" + # Count files in a sample subfolder + sample_sub = sub_dirs[0] + sample_files = [f for f in sample_sub.iterdir() + if f.is_file() and f.suffix.lower() in known_exts] + findings["files_per_episode"] = len(sample_files) + # Check naming conventions + for f in sample_files: + stem = f.stem + parts = stem.split("_") + if parts[0].isdigit(): + findings["has_numeric_prefix"] = True + if any(self.kb.is_known_lang_token(t.lower()) + for t in stem.replace("_", ".").split(".")): + findings["has_lang_tokens"] = True + else: + findings["subs_strategy"] = "flat" + findings["files_per_episode"] = len(sub_files) + for f in sub_files: + if any(self.kb.is_known_lang_token(t.lower()) + for t in f.stem.replace("_", ".").split(".")): + findings["has_lang_tokens"] = True + break + + # Check adjacent subs (next to the video) + if not findings["has_subs_folder"]: + adjacent = [ + p for p in sample_video.parent.iterdir() + if p.is_file() and p.suffix.lower() in known_exts + ] + if adjacent: + findings["adjacent_subs"] = True + findings["files_per_episode"] = len(adjacent) + + return findings + + def _match_pattern(self, findings: dict) -> tuple[SubtitlePattern | None, float]: + """Score all known patterns against the findings.""" + scores: list[tuple[float, SubtitlePattern]] = [] + + for pattern in self.kb.patterns().values(): + score = self._score(pattern, findings) + scores.append((score, pattern)) + + if not scores: + return None, 0.0 + + scores.sort(key=lambda x: x[0], reverse=True) + best_score, best_pattern = scores[0] + + if best_score < 0.4: + return None, best_score + + return best_pattern, best_score + + def _score(self, pattern: SubtitlePattern, findings: dict) -> float: + """Return a 0.0–1.0 match score for this pattern against the findings.""" + score = 0.0 + total = 0.0 + + strategy = pattern.scan_strategy + + if strategy == ScanStrategy.EMBEDDED: + total += 1 + if findings.get("has_embedded"): + score += 1.0 + if not findings.get("has_subs_folder") and not findings.get("adjacent_subs"): + score += 0.5 + total += 0.5 + + elif strategy == ScanStrategy.EPISODE_SUBFOLDER: + total += 3 + if findings.get("has_subs_folder"): + score += 1.0 + if findings.get("subs_strategy") == "episode_subfolder": + score += 2.0 + + elif strategy == ScanStrategy.FLAT: + total += 2 + if findings.get("has_subs_folder"): + score += 1.0 + if findings.get("subs_strategy") == "flat": + score += 1.0 + + elif strategy == ScanStrategy.ADJACENT: + total += 2 + if findings.get("adjacent_subs"): + score += 1.0 + if not findings.get("has_subs_folder"): + score += 1.0 + + return score / total if total > 0 else 0.0 + + def _describe(self, findings: dict) -> str: + parts = [] + if findings.get("has_subs_folder"): + strategy = findings.get("subs_strategy", "?") + n = findings.get("files_per_episode", 0) + parts.append(f"Subs/ folder found ({strategy}), ~{n} file(s) per episode") + if findings.get("has_numeric_prefix"): + parts.append("files have numeric prefix (e.g. 2_English.srt)") + if findings.get("has_lang_tokens"): + parts.append("language tokens found in filenames") + elif findings.get("adjacent_subs"): + parts.append("subtitle files adjacent to video") + else: + parts.append("no external subtitle files found") + + if findings.get("has_embedded"): + parts.append("embedded tracks detected (ffprobe)") + + return " β€” ".join(parts) if parts else "nothing found" diff --git a/alfred/domain/subtitles/services/placer.py b/alfred/domain/subtitles/services/placer.py new file mode 100644 index 0000000..da82648 --- /dev/null +++ b/alfred/domain/subtitles/services/placer.py @@ -0,0 +1,93 @@ +"""SubtitlePlacer β€” hard-links matched subtitle tracks next to the destination video.""" + +import logging +import os +from dataclasses import dataclass +from pathlib import Path + +from ..entities import SubtitleTrack + +logger = logging.getLogger(__name__) + + +@dataclass +class PlacedTrack: + source: Path + destination: Path + filename: str + + +@dataclass +class PlaceResult: + placed: list[PlacedTrack] + skipped: list[tuple[SubtitleTrack, str]] # (track, reason) + + @property + def placed_count(self) -> int: + return len(self.placed) + + @property + def skipped_count(self) -> int: + return len(self.skipped) + + +class SubtitlePlacer: + """ + Hard-links matched SubtitleTrack files next to a destination video. + + Uses the same hard-link strategy as FileManager.copy_file: + instant, no data duplication, qBittorrent keeps seeding. + + Embedded tracks are skipped β€” nothing to place on disk. + """ + + def place( + self, + tracks: list[SubtitleTrack], + destination_video: Path, + ) -> PlaceResult: + placed: list[PlacedTrack] = [] + skipped: list[tuple[SubtitleTrack, str]] = [] + + dest_dir = destination_video.parent + + for track in tracks: + if track.is_embedded: + logger.debug(f"SubtitlePlacer: skip embedded track ({track.language})") + skipped.append((track, "embedded β€” no file to place")) + continue + + if not track.file_path or not track.file_path.exists(): + skipped.append((track, "source file not found")) + continue + + try: + dest_name = track.destination_name + except ValueError as e: + skipped.append((track, str(e))) + continue + + dest_path = dest_dir / dest_name + + if dest_path.exists(): + logger.debug(f"SubtitlePlacer: skip {dest_name} β€” already exists") + skipped.append((track, "destination already exists")) + continue + + try: + os.link(track.file_path, dest_path) + placed.append(PlacedTrack( + source=track.file_path, + destination=dest_path, + filename=dest_name, + )) + logger.info(f"SubtitlePlacer: placed {dest_name}") + except OSError as e: + logger.warning(f"SubtitlePlacer: failed to place {dest_name}: {e}") + skipped.append((track, str(e))) + + logger.info( + f"SubtitlePlacer: {len(placed)} placed, {len(skipped)} skipped " + f"for {destination_video.name}" + ) + return PlaceResult(placed=placed, skipped=skipped) diff --git a/alfred/domain/subtitles/services/utils.py b/alfred/domain/subtitles/services/utils.py new file mode 100644 index 0000000..ebf871a --- /dev/null +++ b/alfred/domain/subtitles/services/utils.py @@ -0,0 +1,21 @@ +"""Subtitle service utilities.""" + +from ..entities import SubtitleTrack + + +def available_subtitles(tracks: list[SubtitleTrack]) -> list[SubtitleTrack]: + """ + Return the distinct subtitle tracks available, deduped by (language, type). + + Useful to display what is available for a media item regardless of user + preferences β€” e.g. eng, eng.sdh, fra all show up as separate entries. + """ + seen: set[tuple] = set() + result: list[SubtitleTrack] = [] + for track in tracks: + lang = track.language.code if track.language else None + key = (lang, track.subtitle_type) + if key not in seen: + seen.add(key) + result.append(track) + return result diff --git a/alfred/domain/subtitles/value_objects.py b/alfred/domain/subtitles/value_objects.py index 6fe13a4..f03ab7a 100644 --- a/alfred/domain/subtitles/value_objects.py +++ b/alfred/domain/subtitles/value_objects.py @@ -1,91 +1,93 @@ """Subtitle domain value objects.""" -from dataclasses import dataclass +from dataclasses import dataclass, field from enum import Enum - -from ..shared.exceptions import ValidationError +from pathlib import Path +from typing import Any -class Language(Enum): - """Supported subtitle languages.""" +class ScanStrategy(Enum): + """How to locate subtitle files for a given release.""" - ENGLISH = "en" - FRENCH = "fr" - - @classmethod - def from_code(cls, code: str) -> "Language": - """ - Get language from ISO 639-1 code. - - Args: - code: Two-letter language code - - Returns: - Language enum value - - Raises: - ValidationError: If code is not supported - """ - code_lower = code.lower() - for lang in cls: - if lang.value == code_lower: - return lang - raise ValidationError(f"Unsupported language code: {code}") + ADJACENT = "adjacent" # .srt next to the video + FLAT = "flat" # Subs/*.srt + EPISODE_SUBFOLDER = "episode_subfolder" # Subs/{episode_name}/*.srt + EMBEDDED = "embedded" # tracks inside the video container -class SubtitleFormat(Enum): - """Supported subtitle formats.""" +class TypeDetectionMethod(Enum): + """How to differentiate standard / SDH / forced when tokens are ambiguous.""" - SRT = "srt" # SubRip - ASS = "ass" # Advanced SubStation Alpha - SSA = "ssa" # SubStation Alpha - VTT = "vtt" # WebVTT - SUB = "sub" # MicroDVD + TOKEN_IN_NAME = "token_in_name" + SIZE_AND_COUNT = "size_and_count" + FFPROBE_METADATA = "ffprobe_metadata" - @classmethod - def from_extension(cls, extension: str) -> "SubtitleFormat": - """ - Get format from file extension. - Args: - extension: File extension (with or without dot) - - Returns: - SubtitleFormat enum value - - Raises: - ValidationError: If extension is not supported - """ - ext = extension.lower().lstrip(".") - for fmt in cls: - if fmt.value == ext: - return fmt - raise ValidationError(f"Unsupported subtitle format: {extension}") +class SubtitleType(Enum): + STANDARD = "standard" + SDH = "sdh" + FORCED = "forced" + UNKNOWN = "unknown" @dataclass(frozen=True) -class TimingOffset: - """ - Value object representing subtitle timing offset in milliseconds. +class SubtitleFormat: + """A known subtitle file format.""" - Used for synchronizing subtitles with video. + id: str + extensions: list[str] + description: str = "" + + def matches_extension(self, ext: str) -> bool: + return ext.lower() in [e.lower() for e in self.extensions] + + +@dataclass(frozen=True) +class SubtitleLanguage: + """A known subtitle language with its recognition tokens.""" + + code: str # ISO 639-1 + tokens: list[str] # lowercase + + def matches_token(self, token: str) -> bool: + return token.lower() in self.tokens + + +@dataclass(frozen=True) +class SubtitlePattern: + """ + A known structural pattern for how a release group organises subtitle files. + + Patterns are loaded from alfred/knowledge/patterns/*.yaml and are + independent of any specific release group β€” multiple groups can share + the same pattern. """ - milliseconds: int + id: str + description: str + scan_strategy: ScanStrategy + root_folder: str | None # e.g. "Subs", None for adjacent/embedded + type_detection: TypeDetectionMethod + version: str = "1.0" - def __post_init__(self): - """Validate timing offset.""" - if not isinstance(self.milliseconds, int): - raise ValidationError( - f"Timing offset must be an integer, got {type(self.milliseconds)}" - ) - def to_seconds(self) -> float: - """Convert to seconds.""" - return self.milliseconds / 1000.0 +@dataclass(frozen=True) +class SubtitleMatchingRules: + """ + Effective rules after scope resolution (global β†’ group β†’ show β†’ season β†’ episode). + Only stores actual values β€” None means "inherited, not overridden at this level". + """ - def __str__(self) -> str: - return f"{self.milliseconds}ms" + preferred_languages: list[str] = field(default_factory=list) # ISO 639-1 codes + preferred_formats: list[str] = field(default_factory=list) # format ids + allowed_types: list[str] = field(default_factory=list) # SubtitleType ids + format_priority: list[str] = field(default_factory=list) # ordered format ids + min_confidence: float = 0.7 - def __repr__(self) -> str: - return f"TimingOffset({self.milliseconds})" + +@dataclass(frozen=True) +class RuleScope: + """At which level a rule set applies.""" + + level: str # "global" | "release_group" | "movie" | "show" | "season" | "episode" + identifier: str | None = None # imdb_id, group name, "S01", "S01E03"… diff --git a/alfred/domain/tv_shows/entities.py b/alfred/domain/tv_shows/entities.py index 53517ce..5d03568 100644 --- a/alfred/domain/tv_shows/entities.py +++ b/alfred/domain/tv_shows/entities.py @@ -2,7 +2,6 @@ import re from dataclasses import dataclass, field -from datetime import datetime from ..shared.value_objects import FilePath, FileSize, ImdbId from .value_objects import EpisodeNumber, SeasonNumber, ShowStatus @@ -22,8 +21,6 @@ class TVShow: seasons_count: int status: ShowStatus tmdb_id: int | None = None - first_air_date: str | None = None - added_at: datetime = field(default_factory=datetime.now) def __post_init__(self): """Validate TV show entity.""" @@ -87,9 +84,6 @@ class Season: season_number: SeasonNumber episode_count: int name: str | None = None - overview: str | None = None - air_date: str | None = None - poster_path: str | None = None def __post_init__(self): """Validate season entity.""" @@ -146,11 +140,6 @@ class Episode: title: str file_path: FilePath | None = None file_size: FileSize | None = None - overview: str | None = None - air_date: str | None = None - still_path: str | None = None - vote_average: float | None = None - runtime: int | None = None # in minutes def __post_init__(self): """Validate episode entity.""" diff --git a/alfred/infrastructure/filesystem/file_manager.py b/alfred/infrastructure/filesystem/file_manager.py index 854bf07..f1ee173 100644 --- a/alfred/infrastructure/filesystem/file_manager.py +++ b/alfred/infrastructure/filesystem/file_manager.py @@ -2,8 +2,7 @@ import logging import os -import shutil -from enum import Enum +from collections import namedtuple from pathlib import Path from typing import Any @@ -13,14 +12,11 @@ from .exceptions import PathTraversalError logger = logging.getLogger(__name__) +FileOperationResult = namedtuple("FileOperationResult", ["success", "error", "message"]) -class FolderName(Enum): - """Types of folders that can be managed.""" - DOWNLOAD = "download" - TVSHOW = "tvshow" - MOVIE = "movie" - TORRENT = "torrent" +def _err(error: str, message: str) -> dict[str, Any]: + return {"status": "error", "error": error, "message": message} class FileManager: @@ -35,8 +31,6 @@ class FileManager: """ Set a folder path in the configuration. - Validates that the path exists, is a directory, and is readable. - Args: folder_name: Name of folder (download, tvshow, movie, torrent). path_value: Absolute path to the folder. @@ -45,53 +39,39 @@ class FileManager: Dict with status or error information. """ try: - self._validate_folder_name(folder_name) path_obj = Path(path_value).resolve() if not path_obj.exists(): - logger.warning(f"Path does not exist: {path_value}") - return { - "error": "invalid_path", - "message": f"Path does not exist: {path_value}", - } + return _err("invalid_path", f"Path does not exist: {path_value}") if not path_obj.is_dir(): - logger.warning(f"Path is not a directory: {path_value}") - return { - "error": "invalid_path", - "message": f"Path is not a directory: {path_value}", - } + return _err("invalid_path", f"Path is not a directory: {path_value}") if not os.access(path_obj, os.R_OK): - logger.warning(f"Path is not readable: {path_value}") - return { - "error": "permission_denied", - "message": f"Path is not readable: {path_value}", - } + return _err("permission_denied", f"Path is not readable: {path_value}") memory = get_memory() - memory.ltm.set_config(f"{folder_name}_folder", str(path_obj)) + # workspace folders have fixed attributes; library folders go in the dict + if folder_name in ("download", "torrent"): + setattr(memory.ltm.workspace, folder_name, str(path_obj)) + else: + memory.ltm.library_paths.set(folder_name, str(path_obj)) memory.save() - logger.info(f"Set {folder_name}_folder to: {path_obj}") + logger.info(f"Set {folder_name} to: {path_obj}") return {"status": "ok", "folder_name": folder_name, "path": str(path_obj)} except ValueError as e: - logger.error(f"Validation error: {e}") - return {"error": "validation_failed", "message": str(e)} + return _err("validation_failed", str(e)) except Exception as e: logger.error(f"Unexpected error setting path: {e}", exc_info=True) - return {"error": "internal_error", "message": "Failed to set path"} + return _err("internal_error", "Failed to set path") - def list_folder( # noqa: PLR0911 - self, folder_type: str, path: str = "." - ) -> dict[str, Any]: + def list_folder(self, folder_type: str, path: str = ".") -> dict[str, Any]: """ List contents of a configured folder. - Includes security checks to prevent path traversal. - Args: folder_type: Type of folder (download, tvshow, movie, torrent). path: Relative path within the folder (default: root). @@ -100,43 +80,28 @@ class FileManager: Dict with folder contents or error information. """ try: - self._validate_folder_name(folder_type) safe_path = self._sanitize_path(path) memory = get_memory() - folder_key = f"{folder_type}_folder" - folder_path = memory.ltm.get_config(folder_key) + if folder_type in ("download", "torrent"): + folder_path = getattr(memory.ltm.workspace, folder_type, None) + else: + folder_path = memory.ltm.library_paths.get(folder_type) if not folder_path: - logger.warning(f"Folder not configured: {folder_type}") - return { - "error": "folder_not_set", - "message": f"{folder_type.capitalize()} folder not configured.", - } + return _err("folder_not_set", f"{folder_type.capitalize()} folder not configured.") root = Path(folder_path) target = root / safe_path if not self._is_safe_path(root, target): - logger.warning(f"Path traversal attempt: {path}") - return { - "error": "forbidden", - "message": "Access denied: path outside allowed directory", - } + return _err("forbidden", "Access denied: path outside allowed directory") if not target.exists(): - logger.warning(f"Path does not exist: {target}") - return { - "error": "not_found", - "message": f"Path does not exist: {safe_path}", - } + return _err("not_found", f"Path does not exist: {safe_path}") if not target.is_dir(): - logger.warning(f"Path is not a directory: {target}") - return { - "error": "not_a_directory", - "message": f"Path is not a directory: {safe_path}", - } + return _err("not_a_directory", f"Path is not a directory: {safe_path}") try: entries = [entry.name for entry in target.iterdir()] @@ -149,35 +114,28 @@ class FileManager: "count": len(entries), } except PermissionError: - logger.warning(f"Permission denied: {target}") - return { - "error": "permission_denied", - "message": f"Permission denied: {safe_path}", - } + return _err("permission_denied", f"Permission denied: {safe_path}") except PathTraversalError as e: - logger.warning(f"Path traversal attempt: {e}") - return {"error": "forbidden", "message": str(e)} + return _err("forbidden", str(e)) except ValueError as e: - logger.error(f"Validation error: {e}") - return {"error": "validation_failed", "message": str(e)} + return _err("validation_failed", str(e)) except Exception as e: logger.error(f"Unexpected error listing folder: {e}", exc_info=True) - return {"error": "internal_error", "message": "Failed to list folder"} + return _err("internal_error", "Failed to list folder") - def move_file( # noqa: PLR0911 - self, source: str, destination: str - ) -> dict[str, Any]: + def copy_file(self, source: str, destination: str) -> dict[str, Any]: """ - Move a file from one location to another. + Hard-link a file to a destination (instant, no data duplication). - Includes validation and verification after move. + Both paths must be on the same filesystem. qBittorrent keeps seeding + the original inode unaffected. Args: - source: Source file path. - destination: Destination file path. + source: Absolute path to the source file. + destination: Absolute path to the destination file. Returns: Dict with status or error information. @@ -186,126 +144,174 @@ class FileManager: source_path = Path(source).resolve() dest_path = Path(destination).resolve() - logger.info(f"Moving file: {source_path} -> {dest_path}") + logger.info(f"Hard-linking: {source_path} -> {dest_path}") if not source_path.exists(): - return { - "error": "source_not_found", - "message": f"Source does not exist: {source}", - } + return _err("source_not_found", f"Source does not exist: {source}") if not source_path.is_file(): - return { - "error": "source_not_file", - "message": f"Source is not a file: {source}", - } + return _err("source_not_file", f"Source is not a file: {source}") - source_size = source_path.stat().st_size - dest_parent = dest_path.parent - - if not dest_parent.exists(): - return { - "error": "destination_dir_not_found", - "message": f"Destination directory does not exist: {dest_parent}", - } + if not dest_path.parent.exists(): + return _err("destination_dir_not_found", f"Destination directory does not exist: {dest_path.parent}") if dest_path.exists(): - return { - "error": "destination_exists", - "message": f"Destination already exists: {destination}", - } + return _err("destination_exists", f"Destination already exists: {destination}") - shutil.move(str(source_path), str(dest_path)) + os.link(source_path, dest_path) - # Verify move - if not dest_path.exists(): - return { - "error": "move_verification_failed", - "message": "File was not moved successfully", - } - - dest_size = dest_path.stat().st_size - if dest_size != source_size: - return { - "error": "size_mismatch", - "message": "File size mismatch after move", - } - - logger.info(f"File moved successfully: {dest_path.name}") + logger.info(f"Hard link created: {dest_path.name}") return { "status": "ok", "source": str(source_path), "destination": str(dest_path), "filename": dest_path.name, - "size": dest_size, + "size": source_path.stat().st_size, + } + + except OSError as e: + logger.error(f"Error creating hard link: {e}", exc_info=True) + return _err("link_failed", str(e)) + + def move_file(self, source: str, destination: str) -> dict[str, Any]: + """ + Move a file via hard link + source deletion. + + Hard-links the file to the destination, then removes the source. + qBittorrent keeps seeding during the operation since the inode + is still referenced until the source is removed. + + Args: + source: Absolute path to the source file. + destination: Absolute path to the destination file. + + Returns: + Dict with status or error information. + """ + try: + source_path = Path(source).resolve() + + link_result = self.copy_file(source, destination) + if link_result.get("status") != "ok": + return link_result + + source_path.unlink() + + logger.info(f"File moved: {source_path.name} -> {link_result['destination']}") + return { + "status": "ok", + "source": str(source_path), + "destination": link_result["destination"], + "filename": link_result["filename"], + "size": link_result["size"], } except Exception as e: logger.error(f"Error moving file: {e}", exc_info=True) - return {"error": "move_failed", "message": str(e)} + return _err("move_failed", str(e)) - def _validate_folder_name(self, folder_name: str) -> bool: + def create_seed_links( + self, library_file: str, original_download_folder: str, torrent_folder: str + ) -> dict[str, Any]: """ - Validate folder name against allowed values. + Prepare a torrent folder so qBittorrent can keep seeding after a move. + + - Hard-links the moved video file from the library back into + torrents// (same inode, no data copy). + - Copies every other file from the original download folder + (.srt, .nfo, .jpg, .txt, …) into the same torrent subfolder, + preserving relative paths. Args: - folder_name: Name to validate. + library_file: Absolute path to the video file in the library. + original_download_folder: Absolute path to the download folder + that contained the original release (may still have subs etc.). + torrent_folder: Absolute path to the root torrents/ directory. Returns: - True if valid. - - Raises: - ValueError: If folder name is invalid. + Dict with status, linked_file, copied_files list, skipped list. """ - valid_names = [fn.value for fn in FolderName] - if folder_name not in valid_names: - raise ValueError( - f"Invalid folder_name '{folder_name}'. " - f"Must be one of: {', '.join(valid_names)}" - ) - return True + try: + lib_path = Path(library_file).resolve() + src_folder = Path(original_download_folder).resolve() + torrent_root = Path(torrent_folder).resolve() + + if not lib_path.exists(): + return _err("library_file_not_found", f"Library file not found: {library_file}") + if not src_folder.exists(): + return _err("source_folder_not_found", f"Download folder not found: {original_download_folder}") + if not torrent_root.exists(): + return _err("torrent_folder_not_found", f"Torrent folder not found: {torrent_folder}") + + dest_folder = torrent_root / src_folder.name + dest_folder.mkdir(parents=True, exist_ok=True) + + # Hard-link the video file from library β†’ torrent subfolder + link_dest = dest_folder / lib_path.name + if link_dest.exists(): + return _err("destination_exists", f"Link already exists: {link_dest}") + os.link(lib_path, link_dest) + logger.info(f"Hard-linked for seeding: {lib_path.name} β†’ {dest_folder}") + + # Copy everything else from the original download folder + copied: list[str] = [] + skipped: list[str] = [] + for item in src_folder.rglob("*"): + if not item.is_file(): + continue + rel = item.relative_to(src_folder) + dest_item = dest_folder / rel + dest_item.parent.mkdir(parents=True, exist_ok=True) + if dest_item.exists(): + skipped.append(str(rel)) + continue + import shutil + shutil.copy2(item, dest_item) + copied.append(str(rel)) + logger.debug(f"Copied for seeding: {rel}") + + return { + "status": "ok", + "torrent_subfolder": str(dest_folder), + "linked_file": str(link_dest), + "copied_files": copied, + "copied_count": len(copied), + "skipped": skipped, + } + + except OSError as e: + logger.error(f"create_seed_links failed: {e}", exc_info=True) + return _err("link_failed", str(e)) + except Exception as e: + logger.error(f"create_seed_links unexpected error: {e}", exc_info=True) + return _err("internal_error", str(e)) def _sanitize_path(self, path: str) -> str: """ - Sanitize path to prevent path traversal attacks. + Sanitize a relative path to prevent path traversal attacks. - Args: - path: Path to sanitize. - - Returns: - Sanitized path. - - Raises: - PathTraversalError: If path contains traversal attempts. + Raises PathTraversalError if the path tries to escape the root. """ normalized = os.path.normpath(path) + # Reject absolute paths if os.path.isabs(normalized): raise PathTraversalError("Absolute paths are not allowed") + # Reject parent directory references if normalized.startswith("..") or "/.." in normalized or "\\.." in normalized: raise PathTraversalError("Parent directory references not allowed") + # Reject null bytes if "\x00" in normalized: raise PathTraversalError("Null bytes in path not allowed") return normalized def _is_safe_path(self, base_path: Path, target_path: Path) -> bool: - """ - Check if target path is within base path. - - Args: - base_path: The allowed base directory. - target_path: The path to check. - - Returns: - True if target is within base, False otherwise. - """ + """Return True if target_path is inside base_path (prevents traversal).""" try: - base_resolved = base_path.resolve() - target_resolved = target_path.resolve() - target_resolved.relative_to(base_resolved) + target_path.resolve().relative_to(base_path.resolve()) return True except (ValueError, OSError): return False diff --git a/alfred/infrastructure/persistence/__init__.py b/alfred/infrastructure/persistence/__init__.py index 9d87f48..009eea1 100644 --- a/alfred/infrastructure/persistence/__init__.py +++ b/alfred/infrastructure/persistence/__init__.py @@ -1,17 +1,10 @@ """Persistence layer - Data storage implementations.""" -from .context import ( - get_memory, - has_memory, - init_memory, - set_memory, -) -from .memory import ( - EpisodicMemory, - LongTermMemory, - Memory, - ShortTermMemory, -) +from .context import get_memory, has_memory, init_memory, set_memory +from .memory import Memory +from .memory.episodic import EpisodicMemory +from .memory.ltm import LongTermMemory +from .memory.stm import ShortTermMemory __all__ = [ "Memory", diff --git a/alfred/infrastructure/persistence/context.py b/alfred/infrastructure/persistence/context.py index 8cddbd3..e5c7e27 100644 --- a/alfred/infrastructure/persistence/context.py +++ b/alfred/infrastructure/persistence/context.py @@ -1,28 +1,26 @@ """ -Memory context using contextvars. +Memory context β€” global singleton. -Provides thread-safe and async-safe access to the Memory instance +Provides async-safe access to the Memory instance without passing it explicitly through all function calls. Usage: # At application startup from alfred.infrastructure.persistence import init_memory, get_memory - init_memory("memory_data") + init_memory("memory") # Anywhere in the code memory = get_memory() memory.ltm.set_config("key", "value") """ -from contextvars import ContextVar - from .memory import Memory -_memory_ctx: ContextVar[Memory | None] = ContextVar("memory", default=None) +_memory: Memory | None = None -def init_memory(storage_dir: str = "memory_data") -> Memory: +def init_memory(storage_dir: str = "memory") -> Memory: """ Initialize the memory and set it in the context. @@ -34,9 +32,9 @@ def init_memory(storage_dir: str = "memory_data") -> Memory: Returns: The initialized Memory instance. """ - memory = Memory(storage_dir=storage_dir) - _memory_ctx.set(memory) - return memory + global _memory + _memory = Memory(storage_dir=storage_dir) + return _memory def set_memory(memory: Memory) -> None: @@ -48,7 +46,8 @@ def set_memory(memory: Memory) -> None: Args: memory: Memory instance to set. """ - _memory_ctx.set(memory) + global _memory + _memory = memory def get_memory() -> Memory: @@ -61,12 +60,11 @@ def get_memory() -> Memory: Raises: RuntimeError: If memory has not been initialized. """ - memory = _memory_ctx.get() - if memory is None: + if _memory is None: raise RuntimeError( "Memory not initialized. Call init_memory() at application startup." ) - return memory + return _memory def has_memory() -> bool: @@ -76,4 +74,12 @@ def has_memory() -> bool: Returns: True if memory is available, False otherwise. """ - return _memory_ctx.get() is not None + return _memory is not None + + +def reset_memory() -> None: + """ + Reset the memory singleton to None. For use in tests only. + """ + global _memory + _memory = None diff --git a/alfred/infrastructure/persistence/json/subtitle_repository.py b/alfred/infrastructure/persistence/json/subtitle_repository.py index c0ce6c5..05a5119 100644 --- a/alfred/infrastructure/persistence/json/subtitle_repository.py +++ b/alfred/infrastructure/persistence/json/subtitle_repository.py @@ -119,10 +119,6 @@ class JsonSubtitleRepository(SubtitleRepository): "timing_offset": subtitle.timing_offset.milliseconds, "hearing_impaired": subtitle.hearing_impaired, "forced": subtitle.forced, - "source": subtitle.source, - "uploader": subtitle.uploader, - "download_count": subtitle.download_count, - "rating": subtitle.rating, } def _from_dict(self, data: dict[str, Any]) -> Subtitle: @@ -137,8 +133,4 @@ class JsonSubtitleRepository(SubtitleRepository): timing_offset=TimingOffset(data.get("timing_offset", 0)), hearing_impaired=data.get("hearing_impaired", False), forced=data.get("forced", False), - source=data.get("source"), - uploader=data.get("uploader"), - download_count=data.get("download_count"), - rating=data.get("rating"), ) diff --git a/alfred/infrastructure/persistence/json/tvshow_repository.py b/alfred/infrastructure/persistence/json/tvshow_repository.py index 2e79836..254051d 100644 --- a/alfred/infrastructure/persistence/json/tvshow_repository.py +++ b/alfred/infrastructure/persistence/json/tvshow_repository.py @@ -1,7 +1,6 @@ """JSON-based TV show repository implementation.""" import logging -from datetime import datetime from typing import Any from alfred.domain.shared.value_objects import ImdbId @@ -115,8 +114,6 @@ class JsonTVShowRepository(TVShowRepository): "seasons_count": show.seasons_count, "status": show.status.value, "tmdb_id": show.tmdb_id, - "first_air_date": show.first_air_date, - "added_at": show.added_at.isoformat(), } def _from_dict(self, data: dict[str, Any]) -> TVShow: @@ -127,10 +124,4 @@ class JsonTVShowRepository(TVShowRepository): seasons_count=data["seasons_count"], status=ShowStatus.from_string(data["status"]), tmdb_id=data.get("tmdb_id"), - first_air_date=data.get("first_air_date"), - added_at=( - datetime.fromisoformat(data["added_at"]) - if data.get("added_at") - else datetime.now() - ), ) diff --git a/alfred/infrastructure/persistence/memory.py b/alfred/infrastructure/persistence/memory.py deleted file mode 100644 index ba7c8ff..0000000 --- a/alfred/infrastructure/persistence/memory.py +++ /dev/null @@ -1,577 +0,0 @@ -""" -Memory - Unified management of 3 memory types. - -Architecture: -- LTM (Long-Term Memory): Configuration, library, preferences - Persistent -- STM (Short-Term Memory): Conversation, current workflow - Volatile -- Episodic Memory: Search results, transient states - Very volatile -""" - -import json -import logging -from dataclasses import dataclass, field -from datetime import datetime -from pathlib import Path -from typing import Any - -logger = logging.getLogger(__name__) - - -# ============================================================================= -# LONG-TERM MEMORY (LTM) - Persistent -# ============================================================================= - - -@dataclass -class LongTermMemory: - """ - Long-term memory - Persistent and static. - - Stores: - - User configuration (folders, URLs) - - Preferences (quality, languages) - - Library (owned movies/TV shows) - - Followed shows (watchlist) - """ - - # Folder and service configuration - config: dict[str, str] = field(default_factory=dict) - - # User preferences - preferences: dict[str, Any] = field( - default_factory=lambda: { - "preferred_quality": "1080p", - "preferred_languages": ["en", "fr"], - "auto_organize": False, - "naming_format": "{title}.{year}.{quality}", - } - ) - - # Library of owned media - library: dict[str, list[dict]] = field( - default_factory=lambda: {"movies": [], "tv_shows": []} - ) - - # Followed shows (watchlist) - following: list[dict] = field(default_factory=list) - - def get_config(self, key: str, default: Any = None) -> Any: - """Get a configuration value.""" - return self.config.get(key, default) - - def set_config(self, key: str, value: Any) -> None: - """Set a configuration value.""" - self.config[key] = value - logger.debug(f"LTM: Set config {key}") - - def has_config(self, key: str) -> bool: - """Check if a configuration exists.""" - return key in self.config and self.config[key] is not None - - def add_to_library(self, media_type: str, media: dict) -> None: - """Add a media item to the library.""" - if media_type not in self.library: - self.library[media_type] = [] - - # Avoid duplicates by imdb_id - existing_ids = [m.get("imdb_id") for m in self.library[media_type]] - if media.get("imdb_id") not in existing_ids: - media["added_at"] = datetime.now().isoformat() - self.library[media_type].append(media) - logger.info(f"LTM: Added {media.get('title')} to {media_type}") - - def get_library(self, media_type: str) -> list[dict]: - """Get the library for a media type.""" - return self.library.get(media_type, []) - - def follow_show(self, show: dict) -> None: - """Add a show to the watchlist.""" - existing_ids = [s.get("imdb_id") for s in self.following] - if show.get("imdb_id") not in existing_ids: - show["followed_at"] = datetime.now().isoformat() - self.following.append(show) - logger.info(f"LTM: Now following {show.get('title')}") - - def to_dict(self) -> dict: - """Convert to dictionary for serialization.""" - return { - "config": self.config, - "preferences": self.preferences, - "library": self.library, - "following": self.following, - } - - @classmethod - def from_dict(cls, data: dict) -> "LongTermMemory": - """Create an instance from a dictionary.""" - return cls( - config=data.get("config", {}), - preferences=data.get( - "preferences", - { - "preferred_quality": "1080p", - "preferred_languages": ["en", "fr"], - "auto_organize": False, - "naming_format": "{title}.{year}.{quality}", - }, - ), - library=data.get("library", {"movies": [], "tv_shows": []}), - following=data.get("following", []), - ) - - -# ============================================================================= -# SHORT-TERM MEMORY (STM) - Conversation -# ============================================================================= - - -@dataclass -class ShortTermMemory: - """ - Short-term memory - Volatile and conversational. - - Stores: - - Current conversation history - - Current workflow (what we're doing) - - Extracted entities from conversation - - Current discussion topic - """ - - # Conversation message history - conversation_history: list[dict[str, str]] = field(default_factory=list) - - # Current workflow - current_workflow: dict | None = None - - # Extracted entities (title, year, requested quality, etc.) - extracted_entities: dict[str, Any] = field(default_factory=dict) - - # Current conversation topic - current_topic: str | None = None - - # Conversation language - language: str = "en" - - # History message limit - max_history: int = 20 - - def add_message(self, role: str, content: str) -> None: - """Add a message to history.""" - self.conversation_history.append( - {"role": role, "content": content, "timestamp": datetime.now().isoformat()} - ) - # Keep only the last N messages - if len(self.conversation_history) > self.max_history: - self.conversation_history = self.conversation_history[-self.max_history :] - logger.debug(f"STM: Added {role} message") - - def get_recent_history(self, n: int = 10) -> list[dict]: - """Get the last N messages.""" - return self.conversation_history[-n:] - - def start_workflow(self, workflow_type: str, target: dict) -> None: - """Start a new workflow.""" - self.current_workflow = { - "type": workflow_type, - "target": target, - "stage": "started", - "started_at": datetime.now().isoformat(), - } - logger.info(f"STM: Started workflow '{workflow_type}'") - - def update_workflow_stage(self, stage: str) -> None: - """Update the workflow stage.""" - if self.current_workflow: - self.current_workflow["stage"] = stage - logger.debug(f"STM: Workflow stage -> {stage}") - - def end_workflow(self) -> None: - """End the current workflow.""" - if self.current_workflow: - logger.info(f"STM: Ended workflow '{self.current_workflow.get('type')}'") - self.current_workflow = None - - def set_entity(self, key: str, value: Any) -> None: - """Store an extracted entity.""" - self.extracted_entities[key] = value - logger.debug(f"STM: Set entity {key}={value}") - - def get_entity(self, key: str, default: Any = None) -> Any: - """Get an extracted entity.""" - return self.extracted_entities.get(key, default) - - def clear_entities(self) -> None: - """Clear extracted entities.""" - self.extracted_entities = {} - - def set_topic(self, topic: str) -> None: - """Set the current topic.""" - self.current_topic = topic - logger.debug(f"STM: Topic -> {topic}") - - def set_language(self, language: str) -> None: - """Set the conversation language.""" - self.language = language - logger.debug(f"STM: Language -> {language}") - - def clear(self) -> None: - """Reset short-term memory.""" - self.conversation_history = [] - self.current_workflow = None - self.extracted_entities = {} - self.current_topic = None - self.language = "en" - logger.info("STM: Cleared") - - def to_dict(self) -> dict: - """Convert to dictionary.""" - return { - "conversation_history": self.conversation_history, - "current_workflow": self.current_workflow, - "extracted_entities": self.extracted_entities, - "current_topic": self.current_topic, - "language": self.language, - } - - -# ============================================================================= -# EPISODIC MEMORY - Transient states -# ============================================================================= - - -@dataclass -class EpisodicMemory: - """ - Episodic/sensory memory - Temporary and event-driven. - - Stores: - - Last search results - - Active downloads - - Recent errors - - Pending questions awaiting user response - - Background events - """ - - # Last search results - last_search_results: dict | None = None - - # Active downloads - active_downloads: list[dict] = field(default_factory=list) - - # Recent errors - recent_errors: list[dict] = field(default_factory=list) - - # Pending question awaiting user response - pending_question: dict | None = None - - # Background events (download complete, new files, etc.) - background_events: list[dict] = field(default_factory=list) - - # Limits for errors/events kept - max_errors: int = 5 - max_events: int = 10 - - def store_search_results( - self, query: str, results: list[dict], search_type: str = "torrent" - ) -> None: - """ - Store search results with index. - - Args: - query: The search query - results: List of results - search_type: Type of search (torrent, movie, tvshow) - """ - self.last_search_results = { - "query": query, - "type": search_type, - "timestamp": datetime.now().isoformat(), - "results": [{"index": i + 1, **r} for i, r in enumerate(results)], - } - logger.info(f"Episodic: Stored {len(results)} search results for '{query}'") - - def get_result_by_index(self, index: int) -> dict | None: - """ - Get a result by its number (1-indexed). - - Args: - index: Result number (1, 2, 3, ...) - - Returns: - The result or None if not found - """ - if not self.last_search_results: - logger.warning("Episodic: No search results stored") - return None - - for result in self.last_search_results.get("results", []): - if result.get("index") == index: - return result - - logger.warning(f"Episodic: Result #{index} not found") - return None - - def get_search_results(self) -> dict | None: - """Get the last search results.""" - return self.last_search_results - - def clear_search_results(self) -> None: - """Clear search results.""" - self.last_search_results = None - - def add_active_download(self, download: dict) -> None: - """Add an active download.""" - download["started_at"] = datetime.now().isoformat() - self.active_downloads.append(download) - logger.info(f"Episodic: Added download '{download.get('name')}'") - - def update_download_progress( - self, task_id: str, progress: int, status: str = "downloading" - ) -> None: - """Update download progress.""" - for dl in self.active_downloads: - if dl.get("task_id") == task_id: - dl["progress"] = progress - dl["status"] = status - dl["updated_at"] = datetime.now().isoformat() - break - - def complete_download(self, task_id: str, file_path: str) -> dict | None: - """Mark a download as complete and remove it.""" - for i, dl in enumerate(self.active_downloads): - if dl.get("task_id") == task_id: - completed = self.active_downloads.pop(i) - completed["status"] = "completed" - completed["file_path"] = file_path - completed["completed_at"] = datetime.now().isoformat() - - # Add a background event - self.add_background_event( - "download_complete", - {"name": completed.get("name"), "file_path": file_path}, - ) - - logger.info(f"Episodic: Download completed '{completed.get('name')}'") - return completed - return None - - def get_active_downloads(self) -> list[dict]: - """Get active downloads.""" - return self.active_downloads - - def add_error(self, action: str, error: str, context: dict | None = None) -> None: - """Record a recent error.""" - self.recent_errors.append( - { - "timestamp": datetime.now().isoformat(), - "action": action, - "error": error, - "context": context or {}, - } - ) - # Keep only the last N errors - self.recent_errors = self.recent_errors[-self.max_errors :] - logger.warning(f"Episodic: Error in '{action}': {error}") - - def get_recent_errors(self) -> list[dict]: - """Get recent errors.""" - return self.recent_errors - - def set_pending_question( - self, - question: str, - options: list[dict], - context: dict, - question_type: str = "choice", - ) -> None: - """ - Record a question awaiting user response. - - Args: - question: The question asked - options: List of possible options - context: Question context - question_type: Type of question (choice, confirmation, input) - """ - self.pending_question = { - "type": question_type, - "question": question, - "options": options, - "context": context, - "timestamp": datetime.now().isoformat(), - } - logger.info(f"Episodic: Pending question set ({question_type})") - - def get_pending_question(self) -> dict | None: - """Get the pending question.""" - return self.pending_question - - def resolve_pending_question(self, answer_index: int | None = None) -> dict | None: - """ - Resolve the pending question and return the chosen option. - - Args: - answer_index: Answer index (1-indexed) or None to cancel - - Returns: - The chosen option or None - """ - if not self.pending_question: - return None - - result = None - if answer_index is not None and self.pending_question.get("options"): - for opt in self.pending_question["options"]: - if opt.get("index") == answer_index: - result = opt - break - - self.pending_question = None - logger.info("Episodic: Pending question resolved") - return result - - def add_background_event(self, event_type: str, data: dict) -> None: - """Add a background event.""" - self.background_events.append( - { - "type": event_type, - "timestamp": datetime.now().isoformat(), - "data": data, - "read": False, - } - ) - # Keep only the last N events - self.background_events = self.background_events[-self.max_events :] - logger.info(f"Episodic: Background event '{event_type}'") - - def get_unread_events(self) -> list[dict]: - """Get unread events and mark them as read.""" - unread = [e for e in self.background_events if not e.get("read")] - for e in self.background_events: - e["read"] = True - return unread - - def clear(self) -> None: - """Reset episodic memory.""" - self.last_search_results = None - self.active_downloads = [] - self.recent_errors = [] - self.pending_question = None - self.background_events = [] - logger.info("Episodic: Cleared") - - def to_dict(self) -> dict: - """Convert to dictionary.""" - return { - "last_search_results": self.last_search_results, - "active_downloads": self.active_downloads, - "recent_errors": self.recent_errors, - "pending_question": self.pending_question, - "background_events": self.background_events, - } - - -# ============================================================================= -# MEMORY MANAGER - Unified manager -# ============================================================================= - - -class Memory: - """ - Unified manager for the 3 memory types. - - Usage: - memory = Memory("memory_data") - memory.ltm.set_config("download_folder", "/path") - memory.stm.add_message("user", "Hello") - memory.episodic.store_search_results("query", results) - memory.save() - """ - - def __init__(self, storage_dir: str = "memory_data"): - """ - Initialize the memory. - - Args: - storage_dir: Directory for persistent storage - """ - self.storage_dir = Path(storage_dir) - self.storage_dir.mkdir(parents=True, exist_ok=True) - - self.ltm_file = self.storage_dir / "ltm.json" - - # Initialize the 3 memory types - self.ltm = self._load_ltm() - self.stm = ShortTermMemory() - self.episodic = EpisodicMemory() - - logger.info(f"Memory initialized (storage: {storage_dir})") - - def _load_ltm(self) -> LongTermMemory: - """Load LTM from file.""" - if self.ltm_file.exists(): - try: - data = json.loads(self.ltm_file.read_text(encoding="utf-8")) - logger.info("LTM loaded from file") - return LongTermMemory.from_dict(data) - except (OSError, json.JSONDecodeError) as e: - logger.warning(f"Could not load LTM: {e}") - return LongTermMemory() - - def save(self) -> None: - """Save LTM (the only persistent memory).""" - try: - self.ltm_file.write_text( - json.dumps(self.ltm.to_dict(), indent=2, ensure_ascii=False), - encoding="utf-8", - ) - logger.debug("LTM saved to file") - except OSError as e: - logger.error(f"Failed to save LTM: {e}") - raise - - def get_context_for_prompt(self) -> dict: - """ - Generate context to include in the system prompt. - - Returns: - Dictionary with relevant context from all 3 memories - """ - return { - "config": self.ltm.config, - "preferences": self.ltm.preferences, - "current_workflow": self.stm.current_workflow, - "current_topic": self.stm.current_topic, - "extracted_entities": self.stm.extracted_entities, - "last_search": { - "query": ( - self.episodic.last_search_results.get("query") - if self.episodic.last_search_results - else None - ), - "result_count": ( - len(self.episodic.last_search_results.get("results", [])) - if self.episodic.last_search_results - else 0 - ), - }, - "active_downloads_count": len(self.episodic.active_downloads), - "pending_question": self.episodic.pending_question is not None, - "unread_events": len( - [e for e in self.episodic.background_events if not e.get("read")] - ), - } - - def get_full_state(self) -> dict: - """Return the full state of all 3 memories (for debug).""" - return { - "ltm": self.ltm.to_dict(), - "stm": self.stm.to_dict(), - "episodic": self.episodic.to_dict(), - } - - def clear_session(self) -> None: - """Clear session memories (STM + Episodic).""" - self.stm.clear() - self.episodic.clear() - logger.info("Session memories cleared") diff --git a/alfred/infrastructure/persistence/memory/__init__.py b/alfred/infrastructure/persistence/memory/__init__.py new file mode 100644 index 0000000..523f50b --- /dev/null +++ b/alfred/infrastructure/persistence/memory/__init__.py @@ -0,0 +1,4 @@ +from .base import Memory +from .registry import MemoryRegistry + +__all__ = ["Memory", "MemoryRegistry"] diff --git a/alfred/infrastructure/persistence/memory/base.py b/alfred/infrastructure/persistence/memory/base.py new file mode 100644 index 0000000..145638a --- /dev/null +++ b/alfred/infrastructure/persistence/memory/base.py @@ -0,0 +1,90 @@ +"""Memory β€” unified manager for the 3 memory tiers.""" + +import json +import logging +from pathlib import Path + +from .episodic import EpisodicMemory +from .ltm import LongTermMemory +from .stm import ShortTermMemory + +logger = logging.getLogger(__name__) + + +class Memory: + """ + Unified manager for the 3 memory tiers. + + Usage: + memory = Memory("data/memory") + memory.ltm.workspace.download = "/downloads" + memory.stm.add_message("user", "Hello") + memory.episodic.store_search_results("query", results) + memory.save() + """ + + def __init__(self, storage_dir: str = "memory"): + self.storage_dir = Path(storage_dir) + self.storage_dir.mkdir(parents=True, exist_ok=True) + self.ltm_file = self.storage_dir / "ltm.json" + + self.ltm = self._load_ltm() + self.stm = ShortTermMemory() + self.episodic = EpisodicMemory() + + logger.info(f"Memory initialized (storage: {storage_dir})") + + def _load_ltm(self) -> LongTermMemory: + """Load LTM from disk, or return a fresh instance.""" + if self.ltm_file.exists(): + try: + data = json.loads(self.ltm_file.read_text(encoding="utf-8")) + logger.info("LTM loaded from file") + return LongTermMemory.from_dict(data) + except (OSError, json.JSONDecodeError) as e: + logger.warning(f"Could not load LTM: {e}") + return LongTermMemory() + + def save(self) -> None: + """Persist LTM to disk (STM and Episodic are volatile).""" + try: + self.ltm_file.write_text( + json.dumps(self.ltm.to_dict(), indent=2, ensure_ascii=False), + encoding="utf-8", + ) + logger.debug("LTM saved") + except OSError as e: + logger.error(f"Failed to save LTM: {e}") + raise + + def get_context_for_prompt(self) -> dict: + """Snapshot of relevant memory for the system prompt.""" + return { + "workspace": self.ltm.workspace.as_dict(), + "library_paths": self.ltm.library_paths.to_dict(), + "preferences": self.ltm.preferences.to_dict(), + "current_workflow": self.stm.workflow.to_dict(), + "current_topic": self.stm.entities.topic, + "extracted_entities": self.stm.entities.data, + "last_search": { + "query": self.episodic.search_results.last.get("query") if self.episodic.search_results.last else None, + "result_count": len(self.episodic.search_results.last.get("results", [])) if self.episodic.search_results.last else 0, + }, + "active_downloads_count": len(self.episodic.downloads.active), + "pending_question": self.episodic.pending_question is not None, + "unread_events": len([e for e in self.episodic.events.items if not e.get("read")]), + } + + def get_full_state(self) -> dict: + """Full state dump for debug/API.""" + return { + "ltm": self.ltm.to_dict(), + "stm": self.stm.to_dict(), + "episodic": self.episodic.to_dict(), + } + + def clear_session(self) -> None: + """Reset volatile memories (STM + Episodic).""" + self.stm.clear() + self.episodic.clear() + logger.info("Session memories cleared") diff --git a/alfred/infrastructure/persistence/memory/episodic/__init__.py b/alfred/infrastructure/persistence/memory/episodic/__init__.py new file mode 100644 index 0000000..3180a91 --- /dev/null +++ b/alfred/infrastructure/persistence/memory/episodic/__init__.py @@ -0,0 +1,3 @@ +from .episodic import EpisodicMemory + +__all__ = ["EpisodicMemory"] diff --git a/alfred/infrastructure/persistence/memory/episodic/components/__init__.py b/alfred/infrastructure/persistence/memory/episodic/components/__init__.py new file mode 100644 index 0000000..ddbfeef --- /dev/null +++ b/alfred/infrastructure/persistence/memory/episodic/components/__init__.py @@ -0,0 +1,6 @@ +from .downloads import Downloads +from .errors import Errors +from .events import Events +from .search_results import SearchResults + +__all__ = ["SearchResults", "Downloads", "Events", "Errors"] diff --git a/alfred/infrastructure/persistence/memory/episodic/components/downloads.py b/alfred/infrastructure/persistence/memory/episodic/components/downloads.py new file mode 100644 index 0000000..60ce9dd --- /dev/null +++ b/alfred/infrastructure/persistence/memory/episodic/components/downloads.py @@ -0,0 +1,56 @@ +"""Downloads β€” active torrent downloads being tracked.""" + +import logging +from dataclasses import dataclass, field +from datetime import datetime + +logger = logging.getLogger(__name__) + + +@dataclass +class Downloads: + active: list[dict] = field(default_factory=list) + + def add(self, download: dict) -> None: + download["started_at"] = datetime.now().isoformat() + self.active.append(download) + logger.info(f"Downloads: Added '{download.get('name')}'") + + def update_progress(self, task_id: str, progress: int, status: str = "downloading") -> None: + for dl in self.active: + if dl.get("task_id") == task_id: + dl["progress"] = progress + dl["status"] = status + dl["updated_at"] = datetime.now().isoformat() + break + + def complete(self, task_id: str, file_path: str) -> dict | None: + for i, dl in enumerate(self.active): + if dl.get("task_id") == task_id: + completed = self.active.pop(i) + completed.update({"status": "completed", "file_path": file_path, "completed_at": datetime.now().isoformat()}) + logger.info(f"Downloads: Completed '{completed.get('name')}'") + return completed + return None + + def clear(self) -> None: + self.active = [] + + @classmethod + def describe(cls) -> dict: + return { + "name": "Downloads", + "tier": "episodic", + "access": "read-write", + "description": ( + "Active torrent downloads currently in progress. " + "Read to report download status to the user. " + "Write to track new downloads or update progress." + ), + "fields": { + "active": "List of active downloads. Each entry has task_id, name, progress, status, started_at.", + }, + } + + def to_dict(self) -> list: + return self.active diff --git a/alfred/infrastructure/persistence/memory/episodic/components/errors.py b/alfred/infrastructure/persistence/memory/episodic/components/errors.py new file mode 100644 index 0000000..7709781 --- /dev/null +++ b/alfred/infrastructure/persistence/memory/episodic/components/errors.py @@ -0,0 +1,46 @@ +"""Errors β€” recent agent errors for context and debugging.""" + +import logging +from dataclasses import dataclass, field +from datetime import datetime + +logger = logging.getLogger(__name__) + +MAX_ERRORS = 5 + + +@dataclass +class Errors: + items: list[dict] = field(default_factory=list) + max_errors: int = MAX_ERRORS + + def add(self, action: str, error: str, context: dict | None = None) -> None: + self.items.append({ + "timestamp": datetime.now().isoformat(), + "action": action, + "error": error, + "context": context or {}, + }) + self.items = self.items[-self.max_errors:] + logger.warning(f"Errors: '{action}': {error}") + + def clear(self) -> None: + self.items = [] + + @classmethod + def describe(cls) -> dict: + return { + "name": "Errors", + "tier": "episodic", + "access": "read", + "description": ( + "Recent errors encountered during tool execution. " + "Read when something goes wrong to understand what failed and avoid repeating the same mistake." + ), + "fields": { + "items": f"Last {MAX_ERRORS} errors. Each has timestamp, action, error message, context.", + }, + } + + def to_dict(self) -> list: + return self.items diff --git a/alfred/infrastructure/persistence/memory/episodic/components/events.py b/alfred/infrastructure/persistence/memory/episodic/components/events.py new file mode 100644 index 0000000..097c82e --- /dev/null +++ b/alfred/infrastructure/persistence/memory/episodic/components/events.py @@ -0,0 +1,49 @@ +"""Events β€” background events (download complete, new files, etc.).""" + +import logging +from dataclasses import dataclass, field +from datetime import datetime + +logger = logging.getLogger(__name__) + +MAX_EVENTS = 10 + + +@dataclass +class Events: + items: list[dict] = field(default_factory=list) + max_events: int = MAX_EVENTS + + def add(self, event_type: str, data: dict) -> None: + self.items.append({"type": event_type, "timestamp": datetime.now().isoformat(), "data": data, "read": False}) + self.items = self.items[-self.max_events:] + logger.info(f"Events: '{event_type}'") + + def get_unread(self) -> list[dict]: + """Return unread events and mark them as read.""" + unread = [e for e in self.items if not e.get("read")] + for e in self.items: + e["read"] = True + return unread + + def clear(self) -> None: + self.items = [] + + @classmethod + def describe(cls) -> dict: + return { + "name": "Events", + "tier": "episodic", + "access": "read", + "description": ( + "Background events that occurred during the session (download complete, file moved, etc.). " + "Read unread events at the start of each turn to surface anything that happened in the background. " + "Events are written automatically by other components." + ), + "fields": { + "items": f"Last {MAX_EVENTS} events. Each has type, timestamp, data, read flag.", + }, + } + + def to_dict(self) -> list: + return self.items diff --git a/alfred/infrastructure/persistence/memory/episodic/components/search_results.py b/alfred/infrastructure/persistence/memory/episodic/components/search_results.py new file mode 100644 index 0000000..f541e98 --- /dev/null +++ b/alfred/infrastructure/persistence/memory/episodic/components/search_results.py @@ -0,0 +1,52 @@ +"""SearchResults β€” last torrent/media search results.""" + +import logging +from dataclasses import dataclass +from datetime import datetime + +logger = logging.getLogger(__name__) + + +@dataclass +class SearchResults: + last: dict | None = None + + def store(self, query: str, results: list[dict], search_type: str = "torrent") -> None: + self.last = { + "query": query, + "type": search_type, + "timestamp": datetime.now().isoformat(), + "results": [{"index": i + 1, **r} for i, r in enumerate(results)], + } + logger.info(f"SearchResults: Stored {len(results)} results for '{query}'") + + def get_by_index(self, index: int) -> dict | None: + if not self.last: + return None + for result in self.last.get("results", []): + if result.get("index") == index: + return result + logger.warning(f"SearchResults: #{index} not found") + return None + + def clear(self) -> None: + self.last = None + + @classmethod + def describe(cls) -> dict: + return { + "name": "SearchResults", + "tier": "episodic", + "access": "read-write", + "description": ( + "Last torrent or media search results. " + "Read to let the user pick a result by index without searching again. " + "Write after every search to store fresh results." + ), + "fields": { + "last": "Dict with query, type, timestamp, and results list. Each result has an index (1-based) plus provider fields.", + }, + } + + def to_dict(self) -> dict | None: + return self.last diff --git a/alfred/infrastructure/persistence/memory/episodic/episodic.py b/alfred/infrastructure/persistence/memory/episodic/episodic.py new file mode 100644 index 0000000..208799b --- /dev/null +++ b/alfred/infrastructure/persistence/memory/episodic/episodic.py @@ -0,0 +1,126 @@ +"""EpisodicMemory β€” transient event-driven memory, reset on restart.""" + +import logging +from dataclasses import dataclass, field + +from .components import Downloads, Errors, Events, SearchResults + +logger = logging.getLogger(__name__) + + +@dataclass +class EpisodicMemory: + """ + Episodic memory β€” very short-lived, event-driven. + + Composed of: + - search_results: last torrent/media search + - downloads: active downloads being tracked + - events: background events (download complete, etc.) + - errors: recent errors for context + """ + + search_results: SearchResults = field(default_factory=SearchResults) + downloads: Downloads = field(default_factory=Downloads) + events: Events = field(default_factory=Events) + errors: Errors = field(default_factory=Errors) + + # Convenience proxies for backward compatibility + @property + def last_search_results(self) -> dict | None: + return self.search_results.last + + @property + def active_downloads(self) -> list[dict]: + return self.downloads.active + + @property + def background_events(self) -> list[dict]: + return self.events.items + + @property + def recent_errors(self) -> list[dict]: + return self.errors.items + + # Pending question β€” not a component yet, kept simple + pending_question: dict | None = None + + # Convenience methods forwarded to components + def store_search_results(self, query: str, results: list[dict], search_type: str = "torrent") -> None: + self.search_results.store(query, results, search_type) + + def get_result_by_index(self, index: int) -> dict | None: + return self.search_results.get_by_index(index) + + def get_search_results(self) -> dict | None: + return self.search_results.last + + def clear_search_results(self) -> None: + self.search_results.clear() + + def add_active_download(self, download: dict) -> None: + self.downloads.add(download) + + def update_download_progress(self, task_id: str, progress: int, status: str = "downloading") -> None: + self.downloads.update_progress(task_id, progress, status) + + def complete_download(self, task_id: str, file_path: str) -> dict | None: + completed = self.downloads.complete(task_id, file_path) + if completed: + self.events.add("download_complete", {"name": completed.get("name"), "file_path": file_path}) + return completed + + def get_active_downloads(self) -> list[dict]: + return self.downloads.active + + def add_error(self, action: str, error: str, context: dict | None = None) -> None: + self.errors.add(action, error, context) + + def get_recent_errors(self) -> list[dict]: + return self.errors.items + + def set_pending_question(self, question: str, options: list[dict], context: dict, question_type: str = "choice") -> None: + self.pending_question = { + "type": question_type, + "question": question, + "options": options, + "context": context, + } + + def get_pending_question(self) -> dict | None: + return self.pending_question + + def resolve_pending_question(self, answer_index: int | None = None) -> dict | None: + if not self.pending_question: + return None + result = None + if answer_index is not None: + for opt in self.pending_question.get("options", []): + if opt.get("index") == answer_index: + result = opt + break + self.pending_question = None + return result + + def add_background_event(self, event_type: str, data: dict) -> None: + self.events.add(event_type, data) + + def get_unread_events(self) -> list[dict]: + return self.events.get_unread() + + def clear(self) -> None: + self.search_results.clear() + self.downloads.clear() + self.events.clear() + self.errors.clear() + self.pending_question = None + logger.info("Episodic: Cleared") + + def to_dict(self) -> dict: + return { + "last_search_results": self.search_results.to_dict(), + "active_downloads": self.downloads.to_dict(), + "recent_errors": self.errors.to_dict(), + "pending_question": self.pending_question, + "background_events": self.events.to_dict(), + } diff --git a/alfred/infrastructure/persistence/memory/ltm/__init__.py b/alfred/infrastructure/persistence/memory/ltm/__init__.py new file mode 100644 index 0000000..e2129d1 --- /dev/null +++ b/alfred/infrastructure/persistence/memory/ltm/__init__.py @@ -0,0 +1,3 @@ +from .ltm import LongTermMemory + +__all__ = ["LongTermMemory"] diff --git a/alfred/infrastructure/persistence/memory/ltm/components/__init__.py b/alfred/infrastructure/persistence/memory/ltm/components/__init__.py new file mode 100644 index 0000000..5ba293a --- /dev/null +++ b/alfred/infrastructure/persistence/memory/ltm/components/__init__.py @@ -0,0 +1,15 @@ +from .following import Following +from .library import Library +from .library_paths import LibraryPaths +from .media_preferences import MediaPreferences +from .subtitle_preferences import SubtitlePreferences +from .workspace import WorkspacePaths + +__all__ = [ + "WorkspacePaths", + "LibraryPaths", + "MediaPreferences", + "SubtitlePreferences", + "Library", + "Following", +] diff --git a/alfred/infrastructure/persistence/memory/ltm/components/following.py b/alfred/infrastructure/persistence/memory/ltm/components/following.py new file mode 100644 index 0000000..741bee7 --- /dev/null +++ b/alfred/infrastructure/persistence/memory/ltm/components/following.py @@ -0,0 +1,43 @@ +"""Following β€” watchlist of TV shows being followed.""" + +import logging +from dataclasses import dataclass, field +from datetime import datetime + +logger = logging.getLogger(__name__) + + +@dataclass +class Following: + shows: list[dict] = field(default_factory=list) + + def add(self, show: dict) -> None: + """Follow a show, skipping duplicates by imdb_id.""" + existing_ids = [s.get("imdb_id") for s in self.shows] + if show.get("imdb_id") not in existing_ids: + show["followed_at"] = datetime.now().isoformat() + self.shows.append(show) + logger.info(f"Following: Now following {show.get('title')}") + + def to_dict(self) -> list: + return self.shows + + @classmethod + def describe(cls) -> dict: + return { + "name": "Following", + "tier": "ltm", + "access": "read-write", + "description": ( + "Watchlist of TV shows the user is actively following. " + "Read to check if a show should be monitored for new episodes. " + "Write (add) when the user explicitly asks to follow a show." + ), + "fields": { + "shows": "List of followed shows. Each entry has imdb_id, title, followed_at.", + }, + } + + @classmethod + def from_dict(cls, data: list) -> "Following": + return cls(shows=data) diff --git a/alfred/infrastructure/persistence/memory/ltm/components/library.py b/alfred/infrastructure/persistence/memory/ltm/components/library.py new file mode 100644 index 0000000..6595dbf --- /dev/null +++ b/alfred/infrastructure/persistence/memory/ltm/components/library.py @@ -0,0 +1,64 @@ +"""Library β€” owned movies and TV shows.""" + +import logging +from dataclasses import dataclass, field +from datetime import datetime + +logger = logging.getLogger(__name__) + + +@dataclass +class Library: + movies: list[dict] = field(default_factory=list) + tv_shows: list[dict] = field(default_factory=list) + + def add(self, media_type: str, media: dict) -> None: + """Add a media item, skipping duplicates by imdb_id.""" + collection = self._collection(media_type) + if collection is None: + return + + existing_ids = [m.get("imdb_id") for m in collection] + if media.get("imdb_id") not in existing_ids: + media["added_at"] = datetime.now().isoformat() + collection.append(media) + logger.info(f"Library: Added {media.get('title')} to {media_type}") + + def get(self, media_type: str) -> list[dict]: + """Get all items for a media type.""" + return self._collection(media_type) or [] + + @classmethod + def describe(cls) -> dict: + return { + "name": "Library", + "tier": "ltm", + "access": "read-write", + "description": ( + "Catalogue of media owned by the user. " + "Read to check if a title is already in the library before downloading. " + "Write (add) after successfully moving a media file to its destination." + ), + "fields": { + "movies": "List of owned movies. Each entry has imdb_id, title, year, quality, file_path, added_at.", + "tv_shows": "List of owned TV shows. Each entry has imdb_id, title, seasons, added_at.", + }, + } + + def _collection(self, media_type: str) -> list[dict] | None: + if media_type == "movies": + return self.movies + if media_type == "tv_shows": + return self.tv_shows + logger.warning(f"Library: Unknown media type '{media_type}'") + return None + + def to_dict(self) -> dict: + return {"movies": self.movies, "tv_shows": self.tv_shows} + + @classmethod + def from_dict(cls, data: dict) -> "Library": + return cls( + movies=data.get("movies", []), + tv_shows=data.get("tv_shows", []), + ) diff --git a/alfred/infrastructure/persistence/memory/ltm/components/library_paths.py b/alfred/infrastructure/persistence/memory/ltm/components/library_paths.py new file mode 100644 index 0000000..beb6617 --- /dev/null +++ b/alfred/infrastructure/persistence/memory/ltm/components/library_paths.py @@ -0,0 +1,70 @@ +"""LibraryPaths β€” user-defined media library folders. + +Extensible: the user creates collections as needed (tv_shows, movies, music, games…). +Each collection name maps to its root folder on disk. + +Set via /set_path, never modified by the agent autonomously. +Access: READ ONLY for the agent β€” used to resolve destination paths when organizing media. +""" + +from dataclasses import dataclass, field + + +@dataclass +class LibraryPaths: + """ + User-defined media library folders. + + folders is a free dict: {"tv_shows": "/media/tv", "movies": "/media/movies", ...} + Add new collections simply by setting a new key via /set_path. + + Access: READ ONLY for the agent β€” set via /set_path only. + """ + + folders: dict[str, str] = field(default_factory=dict) + + def get(self, collection: str) -> str | None: + return self.folders.get(collection) + + def set(self, collection: str, path: str) -> None: + self.folders[collection] = path + + def to_dict(self) -> dict: + return self.folders + + @classmethod + def describe(cls) -> dict: + return { + "name": "LibraryPaths", + "tier": "ltm", + "access": "read", + "description": ( + "User-defined media library folders. " + "Read these paths to resolve where to move an organised media file. " + "Keys are collection names (tv_shows, movies, music, games…), values are root paths. " + "New collections are added by the user via /set_path β€” never by the agent." + ), + "fields": { + "folders": "Dict of collection_name β†’ absolute path. E.g. {'tv_shows': '/media/tv', 'movies': '/media/movies'}.", + }, + } + + @classmethod + def from_dict(cls, data: dict) -> "LibraryPaths": + # Migrate from old flat format (tvshow_folder, movie_folder) + folders = dict(data) + if not folders: + return cls() + + migrated = {} + legacy_map = { + "tvshow_folder": "tv_shows", + "movie_folder": "movies", + } + for old_key, new_key in legacy_map.items(): + if old_key in folders: + migrated[new_key] = folders.pop(old_key) + + # Keep any already-migrated keys + migrated.update(folders) + return cls(folders=migrated) diff --git a/alfred/infrastructure/persistence/memory/ltm/components/media_preferences.py b/alfred/infrastructure/persistence/memory/ltm/components/media_preferences.py new file mode 100644 index 0000000..0e80602 --- /dev/null +++ b/alfred/infrastructure/persistence/memory/ltm/components/media_preferences.py @@ -0,0 +1,52 @@ +"""MediaPreferences β€” user preferences for video quality and audio.""" + +from dataclasses import dataclass, field + + +@dataclass +class MediaPreferences: + """Quality and audio preferences for media downloads and organisation.""" + + quality: str = "1080p" + audio_languages: list[str] = field(default_factory=lambda: ["fr", "en"]) + auto_organize: bool = False + + def to_dict(self) -> dict: + return { + "quality": self.quality, + "audio_languages": self.audio_languages, + "auto_organize": self.auto_organize, + } + + @classmethod + def describe(cls) -> dict: + return { + "name": "MediaPreferences", + "tier": "ltm", + "access": "read", + "description": ( + "User preferences for video quality and audio. " + "Use these when searching torrents or choosing a release to download. " + "Never modify autonomously β€” only via explicit user command." + ), + "fields": { + "quality": "Preferred video quality, e.g. '1080p', '4K', '720p'.", + "audio_languages": ( + "Ordered list of preferred audio languages (ISO 639-1). " + "First = most preferred." + ), + "auto_organize": "If True, organise files into the library automatically after download.", + }, + } + + @classmethod + def from_dict(cls, data: dict) -> "MediaPreferences": + return cls( + # migration: old key was preferred_quality / preferred_languages + quality=data.get("quality") or data.get("preferred_quality", "1080p"), + audio_languages=( + data.get("audio_languages") + or data.get("preferred_languages", ["fr", "en"]) + ), + auto_organize=data.get("auto_organize", False), + ) diff --git a/alfred/infrastructure/persistence/memory/ltm/components/subtitle_preferences.py b/alfred/infrastructure/persistence/memory/ltm/components/subtitle_preferences.py new file mode 100644 index 0000000..a60db05 --- /dev/null +++ b/alfred/infrastructure/persistence/memory/ltm/components/subtitle_preferences.py @@ -0,0 +1,80 @@ +"""SubtitlePreferences β€” user preferences for subtitle handling.""" + +from dataclasses import dataclass, field + + +@dataclass +class SubtitlePreferences: + """ + User-level defaults for subtitle selection, applied globally as the + base of the SubtitleRuleSet inheritance chain. + + These are the top-level defaults β€” individual shows/movies/release groups + can override them via .alfred/rules.yaml. + + Naming convention used when placing subtitle files alongside a video: + {lang}.srt β†’ standard track (e.g. fr.srt, en.srt) + {lang}.sdh.srt β†’ SDH / hearing-impaired track + {lang}.forced.srt β†’ forced track (foreign lines only) + + Fields mirror SubtitleRuleSet.override() parameters: + - languages: ordered list of ISO 639-1 codes to keep (others ignored) + - formats: list of subtitle formats to keep (e.g. ["srt", "ass"]) + - types: list of subtitle types to keep (e.g. ["standard", "forced", "sdh"]) + """ + + languages: list[str] = field(default_factory=lambda: ["fr", "en"]) + formats: list[str] = field(default_factory=lambda: ["srt", "ass"]) + types: list[str] = field(default_factory=lambda: ["standard", "forced", "sdh"]) + + def to_dict(self) -> dict: + return { + "languages": self.languages, + "formats": self.formats, + "types": self.types, + } + + @classmethod + def describe(cls) -> dict: + return { + "name": "SubtitlePreferences", + "tier": "ltm", + "access": "read", + "description": ( + "User defaults for subtitle selection. Applied as global base rules; " + "overridden per show/movie/release group via .alfred/rules.yaml. " + "Never modify autonomously β€” only via explicit user command." + ), + "fields": { + "languages": ( + "Ordered list of subtitle languages to keep (ISO 639-1). " + "Others are ignored. First = most preferred." + ), + "formats": ( + "List of subtitle formats to keep, e.g. ['srt', 'ass']. " + "Others are skipped." + ), + "types": ( + "List of subtitle types to keep: 'standard', 'sdh', 'forced'. " + "Omit a type to drop those tracks globally." + ), + }, + } + + @classmethod + def from_dict(cls, data: dict) -> "SubtitlePreferences": + # Migration: old fields (min_size_kb, keep_sdh, keep_forced, link_subs_folder) are silently dropped + prefs = cls( + languages=data.get("languages", ["fr", "en"]), + formats=data.get("formats", ["srt", "ass"]), + types=data.get("types", ["standard", "forced", "sdh"]), + ) + # Back-compat: keep_sdh / keep_forced β†’ types list + if "types" not in data: + types = ["standard"] + if data.get("keep_sdh", True): + types.append("sdh") + if data.get("keep_forced", True): + types.append("forced") + prefs.types = types + return prefs diff --git a/alfred/infrastructure/persistence/memory/ltm/components/workspace.py b/alfred/infrastructure/persistence/memory/ltm/components/workspace.py new file mode 100644 index 0000000..85c9613 --- /dev/null +++ b/alfred/infrastructure/persistence/memory/ltm/components/workspace.py @@ -0,0 +1,57 @@ +"""WorkspacePaths β€” fixed infrastructure folders. + +Set once via /set_path, never modified by the agent. +These are operational paths (where files land), not the media library. +""" + +from dataclasses import dataclass + + +@dataclass +class WorkspacePaths: + """ + Fixed infrastructure folders. + + - download: where qBittorrent drops completed downloads + - torrent: where .torrent files are stored + + Access: READ ONLY for the agent β€” set via /set_path only. + """ + + download: str | None = None + torrent: str | None = None + + def as_dict(self) -> dict[str, str]: + """Return configured paths, skipping unset values.""" + return {k: v for k, v in { + "download": self.download, + "torrent": self.torrent, + }.items() if v is not None} + + def to_dict(self) -> dict: + return {"download": self.download, "torrent": self.torrent} + + @classmethod + def describe(cls) -> dict: + return { + "name": "WorkspacePaths", + "tier": "ltm", + "access": "read", + "description": ( + "Fixed infrastructure folders used during file operations. " + "Read these paths to know where to find downloaded files or .torrent files. " + "Never modify β€” set exclusively via /set_path." + ), + "fields": { + "download": "Root folder where qBittorrent drops completed downloads.", + "torrent": "Folder where .torrent files are stored.", + }, + } + + @classmethod + def from_dict(cls, data: dict) -> "WorkspacePaths": + # Migrate from old flat format (download_folder, torrent_folder) + return cls( + download=data.get("download") or data.get("download_folder"), + torrent=data.get("torrent") or data.get("torrent_folder"), + ) diff --git a/alfred/infrastructure/persistence/memory/ltm/ltm.py b/alfred/infrastructure/persistence/memory/ltm/ltm.py new file mode 100644 index 0000000..d48f826 --- /dev/null +++ b/alfred/infrastructure/persistence/memory/ltm/ltm.py @@ -0,0 +1,65 @@ +"""LongTermMemory β€” persistent memory across sessions.""" + +import logging +from dataclasses import dataclass, field + +from .components import ( + Following, + Library, + LibraryPaths, + MediaPreferences, + SubtitlePreferences, + WorkspacePaths, +) + +logger = logging.getLogger(__name__) + + +@dataclass +class LongTermMemory: + """ + Long-term memory β€” persisted to disk, survives restarts. + + - workspace: fixed infrastructure paths (download, torrent) β€” READ ONLY for agent + - library_paths: user-defined media folders (tv_shows, movies, …) β€” READ ONLY for agent + - media_preferences: quality and audio language preferences + - subtitle_preferences: subtitle selection and naming rules + - library: owned media catalogue + - following: watchlist + """ + + workspace: WorkspacePaths = field(default_factory=WorkspacePaths) + library_paths: LibraryPaths = field(default_factory=LibraryPaths) + media_preferences: MediaPreferences = field(default_factory=MediaPreferences) + subtitle_preferences: SubtitlePreferences = field(default_factory=SubtitlePreferences) + library: Library = field(default_factory=Library) + following: Following = field(default_factory=Following) + + def to_dict(self) -> dict: + return { + "workspace": self.workspace.to_dict(), + "library_paths": self.library_paths.to_dict(), + "media_preferences": self.media_preferences.to_dict(), + "subtitle_preferences": self.subtitle_preferences.to_dict(), + "library": self.library.to_dict(), + "following": self.following.to_dict(), + } + + @classmethod + def from_dict(cls, data: dict) -> "LongTermMemory": + # Migration: old flat format had paths at the top level + workspace_data = data.get("workspace") or data + library_paths_data = data.get("library_paths") or data.get("paths") or data + return cls( + workspace=WorkspacePaths.from_dict(workspace_data), + library_paths=LibraryPaths.from_dict(library_paths_data), + # migration: old key was "preferences" + media_preferences=MediaPreferences.from_dict( + data.get("media_preferences") or data.get("preferences", {}) + ), + subtitle_preferences=SubtitlePreferences.from_dict( + data.get("subtitle_preferences", {}) + ), + library=Library.from_dict(data.get("library", {})), + following=Following.from_dict(data.get("following", [])), + ) diff --git a/alfred/infrastructure/persistence/memory/registry.py b/alfred/infrastructure/persistence/memory/registry.py new file mode 100644 index 0000000..f394c30 --- /dev/null +++ b/alfred/infrastructure/persistence/memory/registry.py @@ -0,0 +1,80 @@ +"""MemoryRegistry β€” autodiscovers and describes all memory components. + +Scans the components/ subfolder of each memory tier (ltm, stm, episodic), +imports every class that has a describe() classmethod, and exposes their +descriptions for use in the system prompt. + +No manual registration needed β€” drop a new component file in the right +components/ folder and it will be picked up automatically. +""" + +import importlib +import inspect +import logging +import pkgutil +from pathlib import Path + +logger = logging.getLogger(__name__) + +# Tier packages relative to this file's package +_TIER_PACKAGES = [ + "alfred.infrastructure.persistence.memory.ltm.components", + "alfred.infrastructure.persistence.memory.stm.components", + "alfred.infrastructure.persistence.memory.episodic.components", +] + + +def _load_components(package_name: str) -> list[dict]: + """Import all modules in a package and collect describe() results.""" + descriptions = [] + try: + package = importlib.import_module(package_name) + package_path = Path(package.__file__).parent + + for module_info in pkgutil.iter_modules([str(package_path)]): + module = importlib.import_module(f"{package_name}.{module_info.name}") + for _, cls in inspect.getmembers(module, inspect.isclass): + if cls.__module__ == module.__name__ and hasattr(cls, "describe"): + try: + descriptions.append(cls.describe()) + except Exception as e: + logger.warning(f"MemoryRegistry: describe() failed on {cls.__name__}: {e}") + + except Exception as e: + logger.warning(f"MemoryRegistry: Could not load package {package_name}: {e}") + + return descriptions + + +class MemoryRegistry: + """ + Autodiscovers memory components and exposes their descriptions. + + Usage: + registry = MemoryRegistry() + all_components = registry.all() # flat list + ltm_components = registry.by_tier("ltm") # filtered by tier + schema = registry.schema() # grouped by tier, for the prompt + """ + + def __init__(self): + self._components: list[dict] = [] + for package in _TIER_PACKAGES: + self._components.extend(_load_components(package)) + logger.info(f"MemoryRegistry: Loaded {len(self._components)} components") + + def all(self) -> list[dict]: + """Return all component descriptions.""" + return self._components + + def by_tier(self, tier: str) -> list[dict]: + """Return components for a specific tier (ltm, stm, episodic).""" + return [c for c in self._components if c.get("tier") == tier] + + def schema(self) -> dict[str, list[dict]]: + """Return components grouped by tier.""" + result: dict[str, list[dict]] = {"ltm": [], "stm": [], "episodic": []} + for component in self._components: + tier = component.get("tier", "unknown") + result.setdefault(tier, []).append(component) + return result diff --git a/alfred/infrastructure/persistence/memory/stm/__init__.py b/alfred/infrastructure/persistence/memory/stm/__init__.py new file mode 100644 index 0000000..e69103f --- /dev/null +++ b/alfred/infrastructure/persistence/memory/stm/__init__.py @@ -0,0 +1,3 @@ +from .stm import ShortTermMemory + +__all__ = ["ShortTermMemory"] diff --git a/alfred/infrastructure/persistence/memory/stm/components/__init__.py b/alfred/infrastructure/persistence/memory/stm/components/__init__.py new file mode 100644 index 0000000..22cddcc --- /dev/null +++ b/alfred/infrastructure/persistence/memory/stm/components/__init__.py @@ -0,0 +1,5 @@ +from .conversation import Conversation +from .entities import Entities +from .workflow import Workflow + +__all__ = ["Conversation", "Workflow", "Entities"] diff --git a/alfred/infrastructure/persistence/memory/stm/components/conversation.py b/alfred/infrastructure/persistence/memory/stm/components/conversation.py new file mode 100644 index 0000000..199d68d --- /dev/null +++ b/alfred/infrastructure/persistence/memory/stm/components/conversation.py @@ -0,0 +1,55 @@ +"""Conversation β€” message history for the current session.""" + +import logging +from dataclasses import dataclass, field +from datetime import datetime + +logger = logging.getLogger(__name__) + +MAX_HISTORY = 20 + + +@dataclass +class Conversation: + messages: list[dict] = field(default_factory=list) + max_history: int = MAX_HISTORY + language: str = "en" + + def add(self, role: str, content: str) -> None: + """Append a message, capping at max_history.""" + self.messages.append({"role": role, "content": content, "timestamp": datetime.now().isoformat()}) + if len(self.messages) > self.max_history: + self.messages = self.messages[-self.max_history:] + logger.debug(f"Conversation: Added {role} message") + + def recent(self, n: int = 10) -> list[dict]: + """Return the last N messages.""" + return self.messages[-n:] + + def set_language(self, language: str) -> None: + self.language = language + logger.debug(f"Conversation: Language -> {language}") + + def clear(self) -> None: + self.messages = [] + self.language = "en" + + @classmethod + def describe(cls) -> dict: + return { + "name": "Conversation", + "tier": "stm", + "access": "read", + "description": ( + "Current session message history and detected language. " + "Read to maintain conversational context. " + "Messages are managed automatically β€” never write directly." + ), + "fields": { + "messages": f"Last {MAX_HISTORY} messages (role, content, timestamp).", + "language": "Detected conversation language (ISO 639-1 code, e.g. 'fr', 'en').", + }, + } + + def to_dict(self) -> dict: + return {"messages": self.messages, "language": self.language} diff --git a/alfred/infrastructure/persistence/memory/stm/components/entities.py b/alfred/infrastructure/persistence/memory/stm/components/entities.py new file mode 100644 index 0000000..85f1c2d --- /dev/null +++ b/alfred/infrastructure/persistence/memory/stm/components/entities.py @@ -0,0 +1,48 @@ +"""Entities β€” extracted entities from the current conversation (title, year, quality, etc.).""" + +import logging +from dataclasses import dataclass, field +from typing import Any + +logger = logging.getLogger(__name__) + + +@dataclass +class Entities: + data: dict[str, Any] = field(default_factory=dict) + topic: str | None = None + + def set(self, key: str, value: Any) -> None: + self.data[key] = value + logger.debug(f"Entities: {key}={value}") + + def get(self, key: str, default: Any = None) -> Any: + return self.data.get(key, default) + + def set_topic(self, topic: str) -> None: + self.topic = topic + logger.debug(f"Entities: Topic -> {topic}") + + def clear(self) -> None: + self.data = {} + self.topic = None + + @classmethod + def describe(cls) -> dict: + return { + "name": "Entities", + "tier": "stm", + "access": "read-write", + "description": ( + "Entities and topic extracted from the current conversation. " + "Read to retrieve what the user is talking about (title, year, quality, etc.) without re-parsing the history. " + "Write when you identify a new entity or topic shift." + ), + "fields": { + "data": "Key-value pairs of extracted entities. E.g. {'title': 'Breaking Bad', 'year': 2008, 'quality': '1080p'}.", + "topic": "Current conversation topic as a short string. E.g. 'media_search', 'organize_file'.", + }, + } + + def to_dict(self) -> dict: + return {"data": self.data, "topic": self.topic} diff --git a/alfred/infrastructure/persistence/memory/stm/components/workflow.py b/alfred/infrastructure/persistence/memory/stm/components/workflow.py new file mode 100644 index 0000000..c8cd44e --- /dev/null +++ b/alfred/infrastructure/persistence/memory/stm/components/workflow.py @@ -0,0 +1,53 @@ +"""Workflow β€” tracks the current in-progress agent task.""" + +import logging +from dataclasses import dataclass +from datetime import datetime + +logger = logging.getLogger(__name__) + + +@dataclass +class Workflow: + current: dict | None = None + + def start(self, workflow_type: str, target: dict) -> None: + self.current = { + "type": workflow_type, + "target": target, + "stage": "started", + "started_at": datetime.now().isoformat(), + } + logger.info(f"Workflow: Started '{workflow_type}'") + + def update_stage(self, stage: str) -> None: + if self.current: + self.current["stage"] = stage + logger.debug(f"Workflow: Stage -> {stage}") + + def end(self) -> None: + if self.current: + logger.info(f"Workflow: Ended '{self.current.get('type')}'") + self.current = None + + def clear(self) -> None: + self.current = None + + @classmethod + def describe(cls) -> dict: + return { + "name": "Workflow", + "tier": "stm", + "access": "read-write", + "description": ( + "Tracks the current in-progress multi-step task. " + "Read to know what you are currently doing and what stage you are at. " + "Write to start, advance, or end a workflow as you execute steps." + ), + "fields": { + "current": "Active workflow dict with keys: type, target, stage, started_at. None if idle.", + }, + } + + def to_dict(self) -> dict | None: + return self.current diff --git a/alfred/infrastructure/persistence/memory/stm/stm.py b/alfred/infrastructure/persistence/memory/stm/stm.py new file mode 100644 index 0000000..b26b70f --- /dev/null +++ b/alfred/infrastructure/persistence/memory/stm/stm.py @@ -0,0 +1,91 @@ +"""ShortTermMemory β€” volatile session memory, reset on restart.""" + +import logging +from dataclasses import dataclass, field + +from .components import Conversation, Entities, Workflow + +logger = logging.getLogger(__name__) + + +@dataclass +class ShortTermMemory: + """ + Short-term memory β€” lives for the duration of a session. + + Composed of: + - conversation: message history + language + - workflow: current in-progress task + - entities: extracted context (title, year, quality…) + """ + + conversation: Conversation = field(default_factory=Conversation) + workflow: Workflow = field(default_factory=Workflow) + entities: Entities = field(default_factory=Entities) + + # Convenience proxies kept for backward compatibility with existing callers + @property + def conversation_history(self) -> list[dict]: + return self.conversation.messages + + @property + def current_workflow(self) -> dict | None: + return self.workflow.current + + @property + def extracted_entities(self) -> dict: + return self.entities.data + + @property + def current_topic(self) -> str | None: + return self.entities.topic + + @property + def language(self) -> str: + return self.conversation.language + + # Convenience methods forwarded to components + def add_message(self, role: str, content: str) -> None: + self.conversation.add(role, content) + + def get_recent_history(self, n: int = 10) -> list[dict]: + return self.conversation.recent(n) + + def start_workflow(self, workflow_type: str, target: dict) -> None: + self.workflow.start(workflow_type, target) + + def update_workflow_stage(self, stage: str) -> None: + self.workflow.update_stage(stage) + + def end_workflow(self) -> None: + self.workflow.end() + + def set_entity(self, key: str, value) -> None: + self.entities.set(key, value) + + def get_entity(self, key: str, default=None): + return self.entities.get(key, default) + + def clear_entities(self) -> None: + self.entities.clear() + + def set_topic(self, topic: str) -> None: + self.entities.set_topic(topic) + + def set_language(self, language: str) -> None: + self.conversation.set_language(language) + + def clear(self) -> None: + self.conversation.clear() + self.workflow.clear() + self.entities.clear() + logger.info("STM: Cleared") + + def to_dict(self) -> dict: + return { + "conversation_history": self.conversation.messages, + "current_workflow": self.workflow.to_dict(), + "extracted_entities": self.entities.data, + "current_topic": self.entities.topic, + "language": self.conversation.language, + } diff --git a/alfred/infrastructure/subtitle/__init__.py b/alfred/infrastructure/subtitle/__init__.py new file mode 100644 index 0000000..9bf8e20 --- /dev/null +++ b/alfred/infrastructure/subtitle/__init__.py @@ -0,0 +1,6 @@ +"""Infrastructure adapters for subtitle persistence.""" + +from .metadata_store import SubtitleMetadataStore +from .rule_repository import RuleSetRepository + +__all__ = ["SubtitleMetadataStore", "RuleSetRepository"] diff --git a/alfred/infrastructure/subtitle/metadata_store.py b/alfred/infrastructure/subtitle/metadata_store.py new file mode 100644 index 0000000..31e6e3a --- /dev/null +++ b/alfred/infrastructure/subtitle/metadata_store.py @@ -0,0 +1,144 @@ +"""SubtitleMetadataStore β€” reads/writes .alfred/metadata.yaml colocated with media.""" + +import logging +from datetime import datetime, timezone +from pathlib import Path +from typing import Any + +import yaml + +from alfred.domain.subtitles.entities import SubtitleTrack +from alfred.domain.subtitles.services.placer import PlacedTrack + +logger = logging.getLogger(__name__) + + +class SubtitleMetadataStore: + """ + Manages the .alfred/metadata.yaml file that lives inside the media library folder. + + For TV shows: /media/tv_shows/The X-Files/.alfred/metadata.yaml + For movies: /media/movies/Inception (2010)/.alfred/metadata.yaml + + The store never raises on a missing file β€” it returns empty defaults. + Writes are atomic (write to .tmp then rename). + """ + + def __init__(self, library_root: Path): + self._root = library_root + self._alfred_dir = library_root / ".alfred" + self._metadata_path = self._alfred_dir / "metadata.yaml" + + # ------------------------------------------------------------------ + # Load / Save + # ------------------------------------------------------------------ + + def load(self) -> dict: + """Return the full metadata dict. Empty dict if file absent.""" + if not self._metadata_path.exists(): + return {} + try: + with open(self._metadata_path, encoding="utf-8") as f: + return yaml.safe_load(f) or {} + except Exception as e: + logger.warning(f"MetadataStore: could not read {self._metadata_path}: {e}") + return {} + + def save(self, data: dict) -> None: + """Atomically write metadata.yaml. Creates .alfred/ if needed.""" + self._alfred_dir.mkdir(parents=True, exist_ok=True) + tmp = self._metadata_path.with_suffix(".yaml.tmp") + try: + with open(tmp, "w", encoding="utf-8") as f: + yaml.safe_dump(data, f, allow_unicode=True, default_flow_style=False, sort_keys=False) + tmp.rename(self._metadata_path) + except Exception as e: + logger.error(f"MetadataStore: could not write {self._metadata_path}: {e}") + tmp.unlink(missing_ok=True) + raise + + # ------------------------------------------------------------------ + # Pattern + # ------------------------------------------------------------------ + + def confirmed_pattern(self) -> str | None: + """Return the confirmed pattern_id, or None.""" + data = self.load() + if data.get("pattern_confirmed"): + return data.get("detected_pattern") + return None + + def mark_pattern_confirmed(self, pattern_id: str, media_info: dict | None = None) -> None: + """Persist detected_pattern + pattern_confirmed=true.""" + data = self.load() + data["detected_pattern"] = pattern_id + data["pattern_confirmed"] = True + if media_info: + data.setdefault("media_type", media_info.get("media_type")) + data.setdefault("imdb_id", media_info.get("imdb_id")) + data.setdefault("title", media_info.get("title")) + self.save(data) + logger.info(f"MetadataStore: confirmed pattern '{pattern_id}' for {self._root.name}") + + # ------------------------------------------------------------------ + # Subtitle history + # ------------------------------------------------------------------ + + def append_history( + self, + placed_pairs: list[tuple[PlacedTrack, SubtitleTrack]], + season: int | None = None, + episode: int | None = None, + release_group: str | None = None, + ) -> None: + """Append one history entry with all placed tracks.""" + if not placed_pairs: + return + + data = self.load() + history = data.setdefault("subtitle_history", []) + + tracks_data: list[dict[str, Any]] = [] + for placed, track in placed_pairs: + # Infer type from destination filename parts (e.g. en.sdh.srt β†’ sdh) + parts = placed.filename.rsplit(".", 2) # ["en", "sdh", "srt"] or ["en", "srt"] + inferred_type = parts[1] if len(parts) == 3 else "standard" + + tracks_data.append({ + "language": track.language.code if track.language else "unknown", + "type": inferred_type, + "format": placed.destination.suffix.lstrip("."), + "is_embedded": track.is_embedded, + "source_file": placed.source.name, + "placed_as": placed.filename, + "confidence": round(track.confidence, 3), + }) + + entry: dict[str, Any] = { + "placed_at": datetime.now(timezone.utc).isoformat(), + "release_group": release_group, + "tracks": tracks_data, + } + if season is not None: + entry["season"] = season + if episode is not None: + entry["episode"] = episode + + history.append(entry) + + # Update release_groups list + if release_group: + groups = data.setdefault("release_groups", []) + if release_group not in groups: + groups.append(release_group) + + self.save(data) + logger.info( + f"MetadataStore: appended history " + f"({'S%02dE%02d' % (season, episode) if season and episode else 'movie'}) " + f"β€” {len(tracks_data)} track(s)" + ) + + def history(self) -> list[dict]: + """Return the raw history list.""" + return self.load().get("subtitle_history", []) diff --git a/alfred/infrastructure/subtitle/rule_repository.py b/alfred/infrastructure/subtitle/rule_repository.py new file mode 100644 index 0000000..7e4925a --- /dev/null +++ b/alfred/infrastructure/subtitle/rule_repository.py @@ -0,0 +1,116 @@ +"""RuleSetRepository β€” loads SubtitleRuleSet from .alfred/ YAML files.""" + +import logging +from pathlib import Path +from typing import TYPE_CHECKING + +import yaml + +from alfred.domain.subtitles.aggregates import SubtitleRuleSet +from alfred.domain.subtitles.value_objects import RuleScope + +if TYPE_CHECKING: + from alfred.infrastructure.persistence.memory.ltm.components.subtitle_preferences import SubtitlePreferences + +logger = logging.getLogger(__name__) + + +def _load_yaml(path: Path) -> dict: + if not path.exists(): + return {} + try: + with open(path, encoding="utf-8") as f: + return yaml.safe_load(f) or {} + except Exception as e: + logger.warning(f"RuleSetRepository: could not read {path}: {e}") + return {} + + +class RuleSetRepository: + """ + Builds a fully chained SubtitleRuleSet by reading YAML from .alfred/. + + Inheritance chain: + global (hardcoded defaults) + └── release_group (.alfred/release_groups/{GROUP}.yaml) + └── local (.alfred/rules.yaml) + + Rules are delta-only β€” None means "inherit from parent". + The repository only creates intermediate nodes when the corresponding + file exists and contains an override section. + """ + + def __init__(self, library_root: Path): + self._alfred_dir = library_root / ".alfred" + + def load( + self, + release_group: str | None = None, + subtitle_preferences: "SubtitlePreferences | None" = None, + ) -> SubtitleRuleSet: + """ + Build and return the resolved RuleSet chain. + + If subtitle_preferences is provided, it seeds the global base rule set + from LTM (overriding the hardcoded DEFAULT_RULES). + Returns global default if no overrides exist. + """ + base = SubtitleRuleSet.global_default() + if subtitle_preferences is not None: + base.override( + languages=subtitle_preferences.languages, + formats=subtitle_preferences.formats, + types=subtitle_preferences.types, + ) + current = base + + # Release group level + if release_group: + rg_path = self._alfred_dir / "release_groups" / f"{release_group}.yaml" + rg_data = _load_yaml(rg_path).get("override", {}) + if rg_data: + rg_ruleset = SubtitleRuleSet( + scope=RuleScope(level="release_group", identifier=release_group), + parent=current, + ) + rg_ruleset.override(**_filter_override(rg_data)) + current = rg_ruleset + logger.debug(f"RuleSetRepository: loaded release_group override for '{release_group}'") + + # Local (show/movie) level + local_data = _load_yaml(self._alfred_dir / "rules.yaml").get("override", {}) + if local_data: + local_ruleset = SubtitleRuleSet( + scope=RuleScope(level="show"), + parent=current, + ) + local_ruleset.override(**_filter_override(local_data)) + current = local_ruleset + logger.debug("RuleSetRepository: loaded local rules.yaml override") + + return current + + def save_local(self, delta: dict) -> None: + """Write or update .alfred/rules.yaml with override delta.""" + self._alfred_dir.mkdir(parents=True, exist_ok=True) + path = self._alfred_dir / "rules.yaml" + existing = _load_yaml(path) + existing_override = existing.get("override", {}) + existing_override.update(delta) + data = {"override": existing_override} + tmp = path.with_suffix(".yaml.tmp") + try: + with open(tmp, "w", encoding="utf-8") as f: + yaml.safe_dump(data, f, allow_unicode=True, default_flow_style=False, sort_keys=False) + tmp.rename(path) + logger.info(f"RuleSetRepository: saved local rules to {path}") + except Exception as e: + logger.error(f"RuleSetRepository: could not write {path}: {e}") + tmp.unlink(missing_ok=True) + raise + + +def _filter_override(data: dict) -> dict: + """Keep only keys that SubtitleRuleSet.override() accepts.""" + valid = {"languages", "formats", "types", "format_priority", "min_confidence"} + return {k: v for k, v in data.items() if k in valid} diff --git a/alfred/knowledge/patterns/adjacent.yaml b/alfred/knowledge/patterns/adjacent.yaml new file mode 100644 index 0000000..e3ab408 --- /dev/null +++ b/alfred/knowledge/patterns/adjacent.yaml @@ -0,0 +1,13 @@ +id: adjacent +version: "1.0" +description: > + Subtitle files sit directly alongside the video file, in the same directory. + Example: Show.S01E01.mkv + Show.S01E01.English.srt in the same folder. + +scan_strategy: adjacent +root_folder: null + +type_detection: + method: token_in_name + description: > + Type (standard/SDH/forced) is determined from tokens in the filename. diff --git a/alfred/knowledge/patterns/embedded.yaml b/alfred/knowledge/patterns/embedded.yaml new file mode 100644 index 0000000..f07cb2c --- /dev/null +++ b/alfred/knowledge/patterns/embedded.yaml @@ -0,0 +1,14 @@ +id: embedded +version: "1.0" +description: > + Subtitle tracks are embedded inside the video container (MKV, MP4). + Detected via ffprobe β€” no external files. + +scan_strategy: embedded +root_folder: null + +type_detection: + method: ffprobe_metadata + description: > + Language, type (SDH/forced) and format are read directly from the + container track metadata via ffprobe. diff --git a/alfred/knowledge/patterns/episode_subfolder.yaml b/alfred/knowledge/patterns/episode_subfolder.yaml new file mode 100644 index 0000000..d3e9fdf --- /dev/null +++ b/alfred/knowledge/patterns/episode_subfolder.yaml @@ -0,0 +1,16 @@ +id: episode_subfolder +version: "1.0" +description: > + Subtitle files are in a Subs/ folder at the release root, with one subfolder + per episode named after the episode filename (without extension). + Example: Subs/Show.S01E01.BluRay.x265-RARBG/2_English.srt + +scan_strategy: episode_subfolder +root_folder: "Subs" + +type_detection: + method: size_and_count + description: > + When multiple files share the same detected language, differentiate + standard vs SDH by comparing file size and subtitle entry count. + Larger file (more entries) = SDH. diff --git a/alfred/knowledge/patterns/subs_flat.yaml b/alfred/knowledge/patterns/subs_flat.yaml new file mode 100644 index 0000000..c3a126d --- /dev/null +++ b/alfred/knowledge/patterns/subs_flat.yaml @@ -0,0 +1,14 @@ +id: subs_flat +version: "1.0" +description: > + Subtitle files are directly in a Subs/ folder at the release root, + with no per-episode subfolder. + Example: Subs/Show.S01E01.English.srt + +scan_strategy: flat +root_folder: "Subs" + +type_detection: + method: token_in_name + description: > + Type (standard/SDH/forced) is determined from tokens in the filename. diff --git a/alfred/knowledge/release_groups/KONSTRAST.yaml b/alfred/knowledge/release_groups/KONSTRAST.yaml new file mode 100644 index 0000000..91b678d --- /dev/null +++ b/alfred/knowledge/release_groups/KONSTRAST.yaml @@ -0,0 +1,5 @@ +name: KONSTRAST +known_patterns: ["episode_subfolder", "embedded"] +notes: > + Follows similar conventions to RARBG. Pattern varies per release β€” always + verify per season. diff --git a/alfred/knowledge/release_groups/RARBG.yaml b/alfred/knowledge/release_groups/RARBG.yaml new file mode 100644 index 0000000..c5055a1 --- /dev/null +++ b/alfred/knowledge/release_groups/RARBG.yaml @@ -0,0 +1,2 @@ +name: RARBG +known_patterns: ["episode_subfolder"] diff --git a/alfred/knowledge/subtitles.yaml b/alfred/knowledge/subtitles.yaml new file mode 100644 index 0000000..f45f587 --- /dev/null +++ b/alfred/knowledge/subtitles.yaml @@ -0,0 +1,89 @@ +name: subtitles +version: "1.0" +description: "Subtitle classification rules β€” formats, types, languages and their tokens" + +defaults: + languages: ["fra", "eng"] + formats: ["srt"] + types: ["standard", "forced", "sdh"] + format_priority: ["srt", "ass"] + min_confidence: 0.7 + +formats: + srt: + extensions: [".srt"] + description: "SubRip β€” plain text, universal" + ass: + extensions: [".ass", ".ssa"] + description: "Advanced SubStation Alpha β€” with styles and positioning" + +types: + standard: + tokens: [] + description: "Normal subtitle track" + sdh: + tokens: ["sdh", "hi", "cc", "hearing"] + description: "Hearing-impaired β€” includes sound effects and speaker labels" + forced: + tokens: ["forced", "foreign"] + description: "Foreign lines only β€” e.g. alien speech in an otherwise English film" + +languages: + fra: + tokens: ["fr", "fra", "french", "francais", "vf", "vff", "vostfr"] + eng: + tokens: ["en", "eng", "english"] + spa: + tokens: ["es", "spa", "spanish", "espanol", "espaΓ±ol"] + deu: + tokens: ["de", "deu", "ger", "german", "deutsch"] + ita: + tokens: ["it", "ita", "italian", "italiano"] + por: + tokens: ["pt", "por", "portuguese", "portugues", "portuguΓͺs"] + nld: + tokens: ["nl", "nld", "dut", "dutch", "nederlands"] + nor: + tokens: ["no", "nor", "norwegian", "norsk"] + swe: + tokens: ["sv", "swe", "swedish", "svenska"] + dan: + tokens: ["da", "dan", "danish", "dansk"] + fin: + tokens: ["fi", "fin", "finnish", "suomi"] + pol: + tokens: ["pl", "pol", "polish", "polski"] + ces: + tokens: ["cs", "ces", "cze", "czech"] + slk: + tokens: ["sk", "slk", "slo", "slovak"] + hun: + tokens: ["hu", "hun", "hungarian", "magyar"] + ron: + tokens: ["ro", "ron", "rum", "romanian", "romana", "romΓ’nΔƒ"] + bul: + tokens: ["bg", "bul", "bulgarian"] + hrv: + tokens: ["hr", "hrv", "croatian", "hrvatski"] + srp: + tokens: ["sr", "srp", "serbian", "srpski"] + slv: + tokens: ["sl", "slv", "slovenian", "slovensko"] + est: + tokens: ["et", "est", "estonian", "eesti"] + lav: + tokens: ["lv", "lav", "latvian", "latviesu"] + lit: + tokens: ["lt", "lit", "lithuanian", "lietuviu"] + mkd: + tokens: ["mk", "mkd", "mac", "macedonian"] + jpn: + tokens: ["ja", "jpn", "japanese"] + zho: + tokens: ["zh", "zho", "chi", "chinese"] + kor: + tokens: ["ko", "kor", "korean"] + ara: + tokens: ["ar", "ara", "arabic"] + tur: + tokens: ["tr", "tur", "turkish"] diff --git a/testing/subtitles/scan_subtitles.py b/testing/subtitles/scan_subtitles.py new file mode 100644 index 0000000..0f0387f --- /dev/null +++ b/testing/subtitles/scan_subtitles.py @@ -0,0 +1,528 @@ +#!/usr/bin/env python3 +""" +scan_subtitles.py β€” CLI pour tester le pipeline de scan de sous-titres Alfred. + +Usage: + uv run testing/subtitles/scan_subtitles.py [options] + +Options: + --release-group RARBG Groupe de release (optionnel β€” active les known patterns) + --pattern adjacent Forcer un pattern (adjacent|flat|episode_subfolder|embedded) + --video FILE Fichier vidΓ©o de rΓ©fΓ©rence (dΓ©faut: premier .mkv/.mp4 trouvΓ©) + --verbose DΓ©tails sur chaque token analysΓ© + --no-color DΓ©sactive la colorisation + +Exemples: + uv run scripts/scan_subtitles.py "/media/tv/The X-Files/Season 01" + uv run scripts/scan_subtitles.py "/media/tv/The X-Files/Season 01" --release-group RARBG + uv run scripts/scan_subtitles.py "/media/tv/The X-Files/Season 01" --pattern episode_subfolder --verbose +""" + +import argparse +import sys +import textwrap +from pathlib import Path + +# Ajoute la racine du projet au path (testing/subtitles/ β†’ ../../) +_PROJECT_ROOT = Path(__file__).resolve().parents[2] +if str(_PROJECT_ROOT) not in sys.path: + sys.path.insert(0, str(_PROJECT_ROOT)) + +# --------------------------------------------------------------------------- +# Colorisation simple (pas de dΓ©pendance externe) +# --------------------------------------------------------------------------- + +USE_COLOR = True + +RESET = "\033[0m" +BOLD = "\033[1m" +DIM = "\033[2m" +GREEN = "\033[32m" +YELLOW = "\033[33m" +RED = "\033[31m" +CYAN = "\033[36m" +BLUE = "\033[34m" +MAGENTA = "\033[35m" + + +def c(text: str, *codes: str) -> str: + if not USE_COLOR: + return text + return "".join(codes) + text + RESET + + +def section(title: str) -> None: + width = 70 + print() + print(c("─" * width, DIM)) + print(c(f" {title}", BOLD, CYAN)) + print(c("─" * width, DIM)) + + +def ok(msg: str) -> None: + print(c(" βœ“ ", GREEN, BOLD) + msg) + + +def warn(msg: str) -> None: + print(c(" ⚠ ", YELLOW, BOLD) + msg) + + +def err(msg: str) -> None: + print(c(" βœ— ", RED, BOLD) + msg) + + +def info(msg: str, indent: int = 2) -> None: + print(" " * indent + msg) + + +def kv(key: str, value: str, indent: int = 4) -> None: + print(" " * indent + c(f"{key}: ", BOLD) + value) + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +VIDEO_EXTS = {".mkv", ".mp4", ".avi", ".mov", ".ts", ".m2ts"} + + +def find_videos(folder: Path) -> list[Path]: + return sorted( + p for p in folder.iterdir() + if p.is_file() and p.suffix.lower() in VIDEO_EXTS + ) + + +def confidence_bar(conf: float, width: int = 20) -> str: + filled = int(conf * width) + bar = "β–ˆ" * filled + "β–‘" * (width - filled) + if conf >= 0.8: + color = GREEN + elif conf >= 0.5: + color = YELLOW + else: + color = RED + return c(bar, color) + c(f" {conf:.0%}", BOLD) + + +def track_summary(track, verbose: bool = False) -> None: + lang = track.language.code if track.language else c("?", RED) + fmt = track.format.id if track.format else c("?", RED) + typ = track.subtitle_type.value + src = "embedded" if track.is_embedded else (track.file_path.name if track.file_path else "?") + + # Couleur du type + type_colors = { + "standard": GREEN, + "sdh": YELLOW, + "forced": BLUE, + "unknown": RED, + } + typ_str = c(typ, type_colors.get(typ, RESET)) + + unresolved = not track.is_embedded and track.language is None + clarif = c(" [langue inconnue]", RED, BOLD) if unresolved else "" + + print(f" {c(src, BOLD)}") + print(f" lang={c(lang, CYAN)} type={typ_str} format={fmt}") + conf_str = c("n/a (embedded)", DIM) if track.is_embedded else confidence_bar(track.confidence) + print(f" confidence={conf_str}{clarif}") + + if track.entry_count is not None: + print(f" entries={track.entry_count} size={track.file_size_kb:.1f} KB" if track.file_size_kb else f" entries={track.entry_count}") + + if verbose and track.raw_tokens: + print(f" tokens={track.raw_tokens}") + + if track.is_resolved() and track.language and track.format: + try: + dest = track.destination_name + print(f" β†’ {c(dest, GREEN, BOLD)}") + except ValueError: + pass + + +# --------------------------------------------------------------------------- +# Γ‰tapes du pipeline +# --------------------------------------------------------------------------- + +def step_load_kb() -> "SubtitleKnowledgeBase": + from alfred.domain.subtitles.knowledge.base import SubtitleKnowledgeBase + from alfred.domain.subtitles.knowledge.loader import KnowledgeLoader + + section("Γ‰TAPE 1 β€” Chargement de la base de connaissances") + kb = SubtitleKnowledgeBase(KnowledgeLoader()) + + fmts = kb.formats() + langs = kb.languages() + patterns = kb.patterns() + + ok(f"{len(fmts)} format(s) connu(s): {', '.join(fmts.keys())}") + ok(f"{len(langs)} langue(s) connue(s): {', '.join(langs.keys())}") + ok(f"{len(patterns)} pattern(s) connu(s): {', '.join(patterns.keys())}") + + total_tokens = sum(len(l.tokens) for l in langs.values()) + info(c(f"β†’ {total_tokens} tokens de langue au total", DIM), indent=4) + + return kb + + +def step_detect_pattern( + kb: "SubtitleKnowledgeBase", + season_folder: Path, + sample_video: Path, + release_group: str | None, + forced_pattern: str | None, +) -> "SubtitlePattern": + from alfred.domain.subtitles.services.pattern_detector import PatternDetector + + section("Γ‰TAPE 2 β€” DΓ©tection du pattern de release") + + # PrioritΓ©: forced > known patterns from release_group > auto-detect + if forced_pattern: + pattern = kb.pattern(forced_pattern) + if not pattern: + err(f"Pattern inconnu: '{forced_pattern}'") + print(f" Patterns disponibles: {', '.join(kb.patterns().keys())}") + sys.exit(1) + ok(f"Pattern forcΓ©: {c(forced_pattern, CYAN, BOLD)}") + return pattern + + if release_group: + known = kb.patterns_for_group(release_group) + if known: + kv("Release group", release_group) + ok(f"Pattern(s) connu(s) pour {release_group}: {', '.join(p.id for p in known)}") + pattern = known[0] + kv("Pattern sΓ©lectionnΓ©", c(pattern.id, CYAN, BOLD)) + return pattern + else: + warn(f"Groupe '{release_group}' inconnu β€” lancement de la dΓ©tection auto") + + # Auto-detect + kv("Dossier analysΓ©", str(season_folder)) + kv("VidΓ©o de rΓ©fΓ©rence", sample_video.name) + + detector = PatternDetector(kb) + result = detector.detect(season_folder, sample_video) + + findings = result.get("raw_findings", {}) + info(c("Observations:", BOLD), indent=4) + for key, val in findings.items(): + if val not in (False, None, 0): + info(f" {key}: {c(str(val), CYAN)}", indent=4) + + detected = result.get("detected") + confidence = result.get("confidence", 0.0) + description = result.get("description", "") + + print() + info(c(f'Description: "{description}"', DIM), indent=4) + print(f" Confiance: {confidence_bar(confidence)}") + + if detected: + ok(f"Pattern dΓ©tectΓ©: {c(detected.id, CYAN, BOLD)}") + kv("StratΓ©gie de scan", detected.scan_strategy.value) + kv("DΓ©tection de type", detected.type_detection.value) + if detected.root_folder: + kv("Dossier racine", detected.root_folder) + return detected + else: + warn("Aucun pattern dΓ©tectΓ© avec confiance suffisante β€” fallback: adjacent") + fallback = kb.pattern("adjacent") + if not fallback: + err("Pattern 'adjacent' introuvable dans la KB !") + sys.exit(1) + return fallback + + +def step_identify_tracks( + kb: "SubtitleKnowledgeBase", + sample_video: Path, + pattern: "SubtitlePattern", + release_group: str | None, + verbose: bool, +) -> "MediaSubtitleMetadata": + from alfred.domain.subtitles.services.identifier import SubtitleIdentifier + + section("Γ‰TAPE 3 β€” Identification des pistes") + + kv("VidΓ©o", sample_video.name) + kv("Pattern", pattern.id) + + identifier = SubtitleIdentifier(kb) + metadata = identifier.identify( + video_path=sample_video, + pattern=pattern, + media_id=None, + media_type="tv_show", + release_group=release_group, + ) + + n_emb = len(metadata.embedded_tracks) + n_ext = len(metadata.external_tracks) + n_unresolved = len(metadata.unresolved_tracks) + + print() + ok(f"{n_ext} piste(s) externe(s) trouvΓ©e(s)") + if n_emb: + ok(f"{n_emb} piste(s) embarquΓ©e(s) (ffprobe)") + if n_unresolved: + warn(f"{n_unresolved} piste(s) externe(s) sans langue reconnue") + + if metadata.external_tracks: + print() + info(c("Pistes externes:", BOLD)) + for track in metadata.external_tracks: + track_summary(track, verbose) + + if metadata.embedded_tracks: + print() + info(c("Pistes embarquΓ©es:", BOLD)) + for track in metadata.embedded_tracks: + track_summary(track, verbose) + + return metadata + + +def step_apply_rules( + metadata: "MediaSubtitleMetadata", + release_group: str | None, +) -> tuple["SubtitleMatchingRules | None", list, list]: + from alfred.domain.subtitles.aggregates import DEFAULT_RULES + from alfred.domain.subtitles.services.matcher import SubtitleMatcher + from alfred.domain.subtitles.services.utils import available_subtitles + from alfred.domain.subtitles.value_objects import ScanStrategy + + section("Γ‰TAPE 4 β€” Application des rΓ¨gles") + + # Cas embedded : pas de matcher, on liste directement les pistes disponibles + if metadata.detected_pattern_id == ScanStrategy.EMBEDDED.value: + info(c("Pattern embedded β€” le matcher est court-circuitΓ©", DIM), indent=4) + tracks = available_subtitles(metadata.embedded_tracks) + ok(f"{len(tracks)} piste(s) disponible(s)") + return None, tracks, [] + + rules = DEFAULT_RULES() + kv("Langues prΓ©fΓ©rΓ©es", str(rules.preferred_languages)) + kv("Formats prΓ©fΓ©rΓ©s", str(rules.preferred_formats)) + kv("Types autorisΓ©s", str(rules.allowed_types)) + kv("Confiance min", str(rules.min_confidence)) + info(c("(rΓ¨gles globales par dΓ©faut β€” pas de .alfred/ en mode scan)", DIM), indent=4) + + matcher = SubtitleMatcher() + matched, unresolved = matcher.match(metadata.external_tracks, rules) + + print() + ok(f"{len(matched)} piste(s) retenue(s)") + if unresolved: + warn(f"{len(unresolved)} piste(s) Γ©cartΓ©e(s) ou non rΓ©solue(s)") + + return rules, matched, unresolved + + +def step_show_results( + matched: list, + unresolved: list, + is_embedded: bool, + verbose: bool, +) -> None: + section("RΓ‰SULTAT FINAL") + + if matched: + label = "piste(s) disponible(s)" if is_embedded else "piste(s) qui seraient placΓ©es" + ok(f"{len(matched)} {label}:") + for track in matched: + lang = track.language.code if track.language else "?" + typ = track.subtitle_type.value + if is_embedded: + print(f" {c(lang, CYAN)} {c(typ, GREEN)}") + else: + try: + dest = track.destination_name + src = track.file_path.name if track.file_path else "?" + print(f" {c(src, DIM)} β†’ {c(dest, GREEN, BOLD)}") + except ValueError: + warn(f" Piste incomplΓ¨te (lang ou format manquant): {track}") + else: + warn("Aucune piste retenue.") + + if unresolved: + print() + warn(f"{len(unresolved)} piste(s) Γ©cartΓ©es ou Γ  clarifier:") + for track in unresolved: + src = track.file_path.name if track.file_path else "?" + reason = "langue inconnue" if track.language is None else "confiance insuffisante" + line = f" {c(src, DIM)} ({reason})" + if verbose and track.raw_tokens: + line += c(f" tokens: {track.raw_tokens}", YELLOW) + print(line) + + print() + + +# --------------------------------------------------------------------------- +# Scan multi-Γ©pisodes (rΓ©sumΓ©) +# --------------------------------------------------------------------------- + +def scan_season( + kb: "SubtitleKnowledgeBase", + pattern: "SubtitlePattern", + season_folder: Path, + release_group: str | None, + verbose: bool, +) -> None: + from alfred.domain.subtitles.aggregates import DEFAULT_RULES + from alfred.domain.subtitles.services.identifier import SubtitleIdentifier + from alfred.domain.subtitles.services.matcher import SubtitleMatcher + + videos = find_videos(season_folder) + + section(f"SCAN COMPLET DE LA SAISON ({len(videos)} Γ©pisode(s))") + + if not videos: + warn("Aucun fichier vidΓ©o trouvΓ© dans ce dossier.") + return + + identifier = SubtitleIdentifier(kb) + matcher = SubtitleMatcher() + rules = DEFAULT_RULES() + + col_w = max(len(v.name) for v in videos) + 2 + + for video in videos: + metadata = identifier.identify( + video_path=video, + pattern=pattern, + media_id=None, + media_type="tv_show", + release_group=release_group, + ) + matched, unresolved = matcher.match(metadata.external_tracks, rules) + + placed_names = [] + for t in matched: + try: + placed_names.append(t.destination_name) + except ValueError: + pass + + status_icon = c("βœ“", GREEN, BOLD) if placed_names else c("βœ—", RED, BOLD) + warn_icon = c(f" [{len(unresolved)} non-rΓ©solue(s)]", YELLOW) if unresolved else "" + + print(f" {status_icon} {video.name:{col_w}} {c(', '.join(placed_names) or 'β€”', GREEN if placed_names else DIM)}{warn_icon}") + + +# --------------------------------------------------------------------------- +# Main +# --------------------------------------------------------------------------- + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser( + description="Scanner de sous-titres Alfred β€” pipeline de diagnostic", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=textwrap.dedent(__doc__ or ""), + ) + parser.add_argument("season_folder", help="Dossier de la saison (ou du film)") + parser.add_argument("--release-group", "-g", metavar="GROUP", + help="Groupe de release (ex: RARBG, KONSTRAST)") + parser.add_argument("--pattern", "-p", metavar="PATTERN", + help="Forcer un pattern (adjacent|flat|episode_subfolder|embedded)") + parser.add_argument("--video", "-v", metavar="FILE", + help="Fichier vidΓ©o de rΓ©fΓ©rence (dΓ©faut: premier trouvΓ©)") + parser.add_argument("--verbose", action="store_true", + help="Affiche les tokens bruts par piste") + parser.add_argument("--no-color", action="store_true", + help="DΓ©sactive la colorisation ANSI") + parser.add_argument("--season-scan", action="store_true", + help="AprΓ¨s le diagnostic, scanner tous les Γ©pisodes de la saison") + return parser.parse_args() + + +def main() -> None: + global USE_COLOR + + args = parse_args() + + if args.no_color or not sys.stdout.isatty(): + USE_COLOR = False + + season_folder = Path(args.season_folder).expanduser().resolve() + if not season_folder.is_dir(): + print(f"Erreur: '{season_folder}' n'est pas un dossier.", file=sys.stderr) + sys.exit(1) + + print() + print(c("━" * 70, BOLD)) + print(c(" Alfred β€” Subtitle Scanner", BOLD, MAGENTA)) + print(c("━" * 70, BOLD)) + kv("Dossier", str(season_folder), indent=2) + + # Trouver la vidΓ©o de rΓ©fΓ©rence + if args.video: + sample_video = Path(args.video).expanduser().resolve() + if not sample_video.exists(): + print(f"Erreur: '{sample_video}' introuvable.", file=sys.stderr) + sys.exit(1) + else: + videos = find_videos(season_folder) + if not videos: + # Chercher un niveau plus bas (structure release root) + for sub in season_folder.iterdir(): + if sub.is_dir(): + videos = find_videos(sub) + if videos: + break + if not videos: + print("Erreur: aucun fichier vidΓ©o trouvΓ© dans ce dossier.", file=sys.stderr) + sys.exit(1) + sample_video = videos[0] + + kv("VidΓ©o de rΓ©fΓ©rence", sample_video.name, indent=2) + + # ---- Pipeline ---- + kb = step_load_kb() + + pattern = step_detect_pattern( + kb=kb, + season_folder=season_folder, + sample_video=sample_video, + release_group=args.release_group, + forced_pattern=args.pattern, + ) + + metadata = step_identify_tracks( + kb=kb, + sample_video=sample_video, + pattern=pattern, + release_group=args.release_group, + verbose=args.verbose, + ) + + rules, matched, unresolved = step_apply_rules( + metadata=metadata, + release_group=args.release_group, + ) + + step_show_results( + matched=matched, + unresolved=unresolved, + is_embedded=rules is None, + verbose=args.verbose, + ) + + if args.season_scan: + scan_season( + kb=kb, + pattern=pattern, + season_folder=season_folder, + release_group=args.release_group, + verbose=args.verbose, + ) + + print(c("━" * 70, BOLD)) + print() + + +if __name__ == "__main__": + main() diff --git a/testing/workflows/run_workflow.py b/testing/workflows/run_workflow.py new file mode 100755 index 0000000..208f57e --- /dev/null +++ b/testing/workflows/run_workflow.py @@ -0,0 +1,479 @@ +#!/usr/bin/env python3 +""" +run_workflow.py β€” Simulate an Alfred workflow step by step (dry-run or live). + +Usage: + uv run testing/workflows/run_workflow.py organize_media [options] + +Options: + --dry-run Print what each step would do without executing tools (default). + --live Actually execute the tools (uses real filesystem + memory). + --source PATH Source video file (download folder). + --dest PATH Destination video file (library path). + --download-folder P Original download folder (for create_seed_links). + --imdb-id ID IMDb ID for identify_media step (tt1234567). + --seed Answer "yes" to the seeding question. + --no-color Disable ANSI colours. + +Examples: + uv run testing/workflows/run_workflow.py organize_media --dry-run \\ + --source "/downloads/Breaking.Bad.S01E01.mkv" \\ + --dest "/tv/Breaking Bad/Season 01/Breaking Bad.S01E01.mkv" + + uv run testing/workflows/run_workflow.py organize_media --live \\ + --source "/downloads/BB/Breaking.Bad.S01E01.mkv" \\ + --dest "/tv/Breaking Bad/Season 01/Breaking Bad.S01E01.mkv" \\ + --download-folder "/downloads/BB" --seed +""" + +import argparse +import sys +import textwrap +from pathlib import Path +from typing import Any + +# Project root on sys.path +_PROJECT_ROOT = Path(__file__).resolve().parents[2] +if str(_PROJECT_ROOT) not in sys.path: + sys.path.insert(0, str(_PROJECT_ROOT)) + +# --------------------------------------------------------------------------- +# Colours +# --------------------------------------------------------------------------- + +USE_COLOR = True + +RESET = "\033[0m" +BOLD = "\033[1m" +DIM = "\033[2m" +GREEN = "\033[32m" +YELLOW = "\033[33m" +RED = "\033[31m" +CYAN = "\033[36m" +BLUE = "\033[34m" +MAGENTA = "\033[35m" + + +def c(text: str, *codes: str) -> str: + if not USE_COLOR: + return text + return "".join(codes) + str(text) + RESET + + +def section(title: str) -> None: + print() + print(c("─" * 70, DIM)) + print(c(f" {title}", BOLD, CYAN)) + print(c("─" * 70, DIM)) + + +def ok(msg: str) -> None: print(c(" βœ“ ", GREEN, BOLD) + msg) +def warn(msg: str) -> None: print(c(" ⚠ ", YELLOW, BOLD) + msg) +def err(msg: str) -> None: print(c(" βœ— ", RED, BOLD) + msg) +def info(msg: str) -> None: print(f" {msg}") +def kv(key: str, val: str) -> None: + print(f" {c(key + ':', BOLD)} {val}") + + +# --------------------------------------------------------------------------- +# Dry-run tool stubs +# --------------------------------------------------------------------------- + +def _dry_list_folder(folder_type: str, path: str = ".") -> dict[str, Any]: + return { + "status": "ok", + "folder_type": folder_type, + "path": path, + "entries": ["[dry-run β€” no real listing]"], + "count": 1, + } + + +def _dry_find_media_imdb_id(**kwargs) -> dict[str, Any]: + return { + "status": "ok", + "imdb_id": kwargs.get("imdb_id") or "tt0000000", + "title": "Dry Run Show", + "type": "tv_show", + "year": 2024, + } + + +def _dry_resolve_destination( + release_name: str, + source_file: str, + tmdb_title: str, + tmdb_year: int, + tmdb_episode_title: str | None = None, + confirmed_folder: str | None = None, +) -> dict[str, Any]: + from alfred.domain.media.release_parser import parse_release + parsed = parse_release(release_name) + ext = Path(source_file).suffix + if parsed.is_movie: + folder = parsed.movie_folder_name(tmdb_title, tmdb_year) + fname = parsed.movie_filename(tmdb_title, tmdb_year, ext) + return { + "status": "ok", + "library_file": f"/movies/{folder}/{fname}", + "series_folder": f"/movies/{folder}", + "series_folder_name": folder, + "season_folder": None, + "season_folder_name": None, + "filename": fname, + "is_new_series_folder": True, + } + season_folder = parsed.season_folder_name() + show_folder = confirmed_folder or parsed.show_folder_name(tmdb_title, tmdb_year) + fname = parsed.episode_filename(tmdb_episode_title, ext) if not parsed.is_season_pack else season_folder + ext + return { + "status": "ok", + "library_file": f"/tv/{show_folder}/{season_folder}/{fname}", + "series_folder": f"/tv/{show_folder}", + "season_folder": f"/tv/{show_folder}/{season_folder}", + "series_folder_name": show_folder, + "season_folder_name": season_folder, + "filename": fname, + "is_new_series_folder": confirmed_folder is None, + } + + +def _dry_move_media(source: str, destination: str) -> dict[str, Any]: + return { + "status": "ok", + "source": source, + "destination": destination, + "filename": Path(destination).name, + "size": 0, + } + + +def _dry_manage_subtitles(source_video: str, destination_video: str) -> dict[str, Any]: + return { + "status": "ok", + "video_path": destination_video, + "placed": [], + "placed_count": 0, + "skipped_count": 0, + } + + +def _dry_create_seed_links(library_file: str, original_download_folder: str) -> dict[str, Any]: + return { + "status": "ok", + "torrent_subfolder": f"/torrents/{Path(original_download_folder).name}", + "linked_file": f"/torrents/{Path(original_download_folder).name}/{Path(library_file).name}", + "copied_files": ["[dry-run β€” no real copy]"], + "copied_count": 1, + "skipped": [], + } + + +DRY_RUN_TOOLS: dict[str, Any] = { + "list_folder": _dry_list_folder, + "find_media_imdb_id": _dry_find_media_imdb_id, + "resolve_destination": _dry_resolve_destination, + "move_media": _dry_move_media, + "manage_subtitles": _dry_manage_subtitles, + "create_seed_links": _dry_create_seed_links, +} + + +# --------------------------------------------------------------------------- +# Live tools +# --------------------------------------------------------------------------- + +def _load_live_tools() -> dict[str, Any]: + from alfred.agent.tools.filesystem import ( + create_seed_links, + list_folder, + manage_subtitles, + move_media, + ) + # find_media_imdb_id lives in the api tools + try: + from alfred.agent.tools.api import find_media_imdb_id + except ImportError: + def find_media_imdb_id(**kwargs): # type: ignore[misc] + return {"status": "error", "error": "not_available", "message": "api tools not loaded"} + + return { + "list_folder": list_folder, + "find_media_imdb_id": find_media_imdb_id, + "move_media": move_media, + "manage_subtitles": manage_subtitles, + "create_seed_links": create_seed_links, + } + + +# --------------------------------------------------------------------------- +# Workflow runner +# --------------------------------------------------------------------------- + +class WorkflowRunner: + def __init__(self, workflow: dict, tools: dict[str, Any], live: bool, args: argparse.Namespace): + self.workflow = workflow + self.tools = tools + self.live = live + self.args = args + self.context: dict[str, Any] = {} # step results accumulate here + self.step_results: list[dict] = [] + + def run(self) -> None: + name = self.workflow.get("name", "?") + desc = self.workflow.get("description", "").strip() + mode = c("LIVE", RED, BOLD) if self.live else c("DRY-RUN", YELLOW, BOLD) + + print() + print(c("━" * 70, BOLD)) + print(c(f" Alfred β€” Workflow Simulator [{mode}]", BOLD, MAGENTA)) + print(c("━" * 70, BOLD)) + kv("Workflow", c(name, CYAN, BOLD)) + kv("Description", desc) + kv("Tools allowed", ", ".join(self.workflow.get("tools", []))) + + steps = self.workflow.get("steps", []) + for step in steps: + self._run_step(step) + + section("SIMULATION TERMINΓ‰E") + ok(f"{len(self.step_results)} step(s) exΓ©cutΓ©(s)") + errors = [r for r in self.step_results if r.get("result", {}).get("status") == "error"] + if errors: + warn(f"{len(errors)} step(s) en erreur") + for r in errors: + err(f" {r['id']}: {r['result'].get('error')} β€” {r['result'].get('message')}") + print() + print(c("━" * 70, BOLD)) + print() + + def _run_step(self, step: dict) -> None: + step_id = step.get("id", "?") + + # --- ask_user step --- + if "ask_user" in step: + section(f"STEP [{step_id}] β€” ask_user") + q = step["ask_user"].get("question", "") + answers = step["ask_user"].get("answers", {}) + info(c(f'Question: "{q}"', BOLD)) + info(f"RΓ©ponses possibles: {', '.join(str(k) for k in answers.keys())}") + + answer = "yes" if self.args.seed else "no" + # PyYAML parses bare yes/no as booleans β€” normalise keys to str + answers_str = {str(k): v for k, v in answers.items()} + next_step = answers_str.get(answer, {}).get("next_step", "update_library") + ok(f"RΓ©ponse simulΓ©e: {c(answer, CYAN)} β†’ next: {c(next_step, CYAN)}") + self.context["seeding"] = (answer == "yes") + self.context["ask_seeding_answer"] = answer + self.context["next_after_ask"] = next_step + + # If "no", skip create_seed_links + if answer == "no": + self.context["skip_create_seed_links"] = True + return + + # --- memory_write step --- + if "memory_write" in step: + section(f"STEP [{step_id}] β€” memory_write ({step['memory_write']})") + if self.live: + warn("memory_write: pas encore implΓ©mentΓ© dans le simulator live") + else: + ok("(dry-run) Library entry would be written to LTM") + self.step_results.append({"id": step_id, "result": {"status": "ok"}}) + return + + # --- tool step --- + tool_name = step.get("tool") + if not tool_name: + warn(f"Step '{step_id}' has no tool or ask_user β€” skipped") + return + + # Skip create_seed_links if user said no to seeding + if tool_name == "create_seed_links" and self.context.get("skip_create_seed_links"): + section(f"STEP [{step_id}] β€” {tool_name}") + warn("Skipped (user chose not to seed)") + return + + section(f"STEP [{step_id}] β€” {c(tool_name, CYAN, BOLD)}") + + desc = step.get("description", "").strip() + if desc: + info(c(desc, DIM)) + + kwargs = self._build_kwargs(tool_name, step) + for k, v in kwargs.items(): + kv(k, str(v)) + + if tool_name not in self.tools: + err(f"Tool '{tool_name}' not found in tool registry") + self.step_results.append({"id": step_id, "result": {"status": "error", "error": "unknown_tool"}}) + return + + try: + result = self.tools[tool_name](**kwargs) + except Exception as e: + err(f"Tool raised an exception: {e}") + self.step_results.append({"id": step_id, "result": {"status": "error", "error": str(e)}}) + return + + self._print_result(result) + self.context[step_id] = result + self.step_results.append({"id": step_id, "result": result}) + + def _build_kwargs(self, tool_name: str, step: dict) -> dict[str, Any]: + """Build tool kwargs from step params + CLI args + previous context.""" + # Start from step-level params (static defaults from YAML) + kwargs: dict[str, Any] = dict(step.get("params") or {}) + + a = self.args + + if tool_name == "list_folder": + kwargs.setdefault("folder_type", "download") + + elif tool_name == "find_media_imdb_id": + if a.imdb_id: + kwargs["imdb_id"] = a.imdb_id + + elif tool_name == "resolve_destination": + if a.release: + kwargs["release_name"] = a.release + elif a.source: + kwargs.setdefault("release_name", Path(a.source).parent.name) + if a.source: + kwargs["source_file"] = a.source + if a.tmdb_title: + kwargs["tmdb_title"] = a.tmdb_title + if a.tmdb_year: + kwargs["tmdb_year"] = a.tmdb_year + if a.episode_title: + kwargs["tmdb_episode_title"] = a.episode_title + + elif tool_name == "move_media": + # If resolve_destination ran, use its library_file as destination + resolved = self.context.get("resolve_destination", {}) + if a.source: + kwargs["source"] = a.source + dest = a.dest or resolved.get("library_file") + if dest: + kwargs["destination"] = dest + + elif tool_name == "manage_subtitles": + resolved = self.context.get("resolve_destination", {}) + if a.source: + kwargs["source_video"] = a.source + dest = a.dest or resolved.get("library_file") + if dest: + kwargs["destination_video"] = dest + + elif tool_name == "create_seed_links": + resolved = self.context.get("resolve_destination", {}) + library_file = a.dest or resolved.get("library_file") + if library_file: + kwargs["library_file"] = library_file + if a.download_folder: + kwargs["original_download_folder"] = a.download_folder + elif a.source: + kwargs.setdefault("original_download_folder", str(Path(a.source).parent)) + + return kwargs + + def _print_result(self, result: dict) -> None: + status = result.get("status", "?") + if status == "ok": + ok(f"status={c('ok', GREEN)}") + elif status == "needs_clarification": + warn(f"status={c('needs_clarification', YELLOW)}") + else: + err(f"status={c(status, RED)} error={result.get('error')} msg={result.get('message')}") + return + + # Pretty-print notable fields + skip = {"status", "error", "message"} + for k, v in result.items(): + if k in skip: + continue + if isinstance(v, list): + if v: + info(c(f"{k}:", BOLD)) + for item in v[:10]: + info(f" β€’ {item}") + if len(v) > 10: + info(c(f" … and {len(v) - 10} more", DIM)) + else: + info(f"{c(k + ':', BOLD)} (empty)") + else: + kv(k, str(v)) + + +# --------------------------------------------------------------------------- +# CLI +# --------------------------------------------------------------------------- + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser( + description="Alfred workflow simulator", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=textwrap.dedent(__doc__ or ""), + ) + parser.add_argument("workflow", help="Workflow name (e.g. organize_media)") + parser.add_argument("--dry-run", dest="dry_run", action="store_true", default=True, + help="Simulate steps without executing tools (default)") + parser.add_argument("--live", action="store_true", + help="Actually execute tools against the real filesystem") + parser.add_argument("--source", metavar="PATH", + help="Source video file (in download folder)") + parser.add_argument("--dest", metavar="PATH", + help="Destination video file (in library, overrides resolve_destination)") + parser.add_argument("--download-folder", metavar="PATH", + help="Original download folder (for create_seed_links)") + parser.add_argument("--imdb-id", metavar="ID", + help="IMDb ID for identify_media (tt1234567)") + parser.add_argument("--release", metavar="NAME", + help="Release name (e.g. Oz.S03.1080p.WEBRip.x265-KONTRAST)") + parser.add_argument("--tmdb-title", metavar="TITLE", + help="Canonical title from TMDB (e.g. 'Oz')") + parser.add_argument("--tmdb-year", metavar="YEAR", type=int, + help="Start/release year from TMDB (e.g. 1997)") + parser.add_argument("--episode-title", metavar="TITLE", + help="Episode title from TMDB for single-episode releases") + parser.add_argument("--seed", action="store_true", + help='Answer "yes" to the seeding question') + parser.add_argument("--no-color", action="store_true") + return parser.parse_args() + + +def main() -> None: + global USE_COLOR + args = parse_args() + + if args.no_color or not sys.stdout.isatty(): + USE_COLOR = False + + if args.live: + args.dry_run = False + + # Load workflow + from alfred.agent.workflows.loader import WorkflowLoader + loader = WorkflowLoader() + workflow = loader.get(args.workflow) + if not workflow: + print(f"Erreur: workflow '{args.workflow}' introuvable.", file=sys.stderr) + print(f"Disponibles: {', '.join(loader.names())}", file=sys.stderr) + sys.exit(1) + + # Load tools + if args.live: + try: + tools = _load_live_tools() + except Exception as e: + print(f"Erreur chargement des tools live: {e}", file=sys.stderr) + sys.exit(1) + else: + tools = DRY_RUN_TOOLS + + runner = WorkflowRunner(workflow, tools, live=args.live, args=args) + runner.run() + + +if __name__ == "__main__": + main() diff --git a/tests/agent/__init__.py b/tests/agent/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/agent/test_registry.py b/tests/agent/test_registry.py new file mode 100644 index 0000000..305c2a2 --- /dev/null +++ b/tests/agent/test_registry.py @@ -0,0 +1,208 @@ +""" +Tests for alfred.agent.registry β€” tool registration and JSON schema generation. +""" + +import pytest + +from alfred.agent.registry import Tool, _create_tool_from_function, make_tools +from alfred.settings import settings + + +# --------------------------------------------------------------------------- +# _create_tool_from_function +# --------------------------------------------------------------------------- + +class TestCreateToolFromFunction: + + def test_name_from_function(self): + def my_tool(x: str) -> dict: + """Does something.""" + return {} + tool = _create_tool_from_function(my_tool) + assert tool.name == "my_tool" + + def test_description_from_docstring_first_line(self): + def my_tool(x: str) -> dict: + """First line description. + + More details here. + """ + return {} + tool = _create_tool_from_function(my_tool) + assert tool.description == "First line description." + + def test_description_fallback_to_name(self): + def no_doc(x: str) -> dict: + return {} + tool = _create_tool_from_function(no_doc) + assert tool.description == "no_doc" + + def test_required_params_without_default(self): + def tool(a: str, b: int) -> dict: + """Tool.""" + return {} + t = _create_tool_from_function(tool) + assert "a" in t.parameters["required"] + assert "b" in t.parameters["required"] + + def test_optional_params_not_required(self): + def tool(a: str, b: str = "default") -> dict: + """Tool.""" + return {} + t = _create_tool_from_function(tool) + assert "a" in t.parameters["required"] + assert "b" not in t.parameters["required"] + + def test_none_default_not_required(self): + def tool(a: str, b: str | None = None) -> dict: + """Tool.""" + return {} + t = _create_tool_from_function(tool) + assert "b" not in t.parameters["required"] + + def test_type_mapping_str(self): + def tool(x: str) -> dict: + """T.""" + return {} + t = _create_tool_from_function(tool) + assert t.parameters["properties"]["x"]["type"] == "string" + + def test_type_mapping_int(self): + def tool(x: int) -> dict: + """T.""" + return {} + t = _create_tool_from_function(tool) + assert t.parameters["properties"]["x"]["type"] == "integer" + + def test_type_mapping_float(self): + def tool(x: float) -> dict: + """T.""" + return {} + t = _create_tool_from_function(tool) + assert t.parameters["properties"]["x"]["type"] == "number" + + def test_type_mapping_bool(self): + def tool(x: bool) -> dict: + """T.""" + return {} + t = _create_tool_from_function(tool) + assert t.parameters["properties"]["x"]["type"] == "boolean" + + def test_unknown_type_defaults_to_string(self): + def tool(x: list) -> dict: + """T.""" + return {} + t = _create_tool_from_function(tool) + assert t.parameters["properties"]["x"]["type"] == "string" + + def test_no_annotation_defaults_to_string(self): + def tool(x) -> dict: + """T.""" + return {} + t = _create_tool_from_function(tool) + assert t.parameters["properties"]["x"]["type"] == "string" + + def test_self_param_excluded(self): + class MyClass: + def tool(self, x: str) -> dict: + """T.""" + return {} + t = _create_tool_from_function(MyClass().tool) + assert "self" not in t.parameters["properties"] + + def test_parameters_schema_structure(self): + def tool(a: str, b: int = 0) -> dict: + """T.""" + return {} + t = _create_tool_from_function(tool) + assert t.parameters["type"] == "object" + assert "properties" in t.parameters + assert "required" in t.parameters + + def test_func_stored_on_tool(self): + def tool(x: str) -> dict: + """T.""" + return {"x": x} + t = _create_tool_from_function(tool) + assert t.func("hello") == {"x": "hello"} + + +# --------------------------------------------------------------------------- +# make_tools +# --------------------------------------------------------------------------- + +class TestMakeTools: + + def test_returns_dict(self): + tools = make_tools(settings) + assert isinstance(tools, dict) + + def test_all_expected_tools_present(self): + tools = make_tools(settings) + expected = { + "set_path_for_folder", + "list_folder", + "resolve_destination", + "move_media", + "manage_subtitles", + "create_seed_links", + "learn", + "find_media_imdb_id", + "find_torrent", + "add_torrent_by_index", + "add_torrent_to_qbittorrent", + "get_torrent_by_index", + "set_language", + } + assert expected.issubset(tools.keys()) + + def test_each_tool_is_tool_instance(self): + tools = make_tools(settings) + for name, tool in tools.items(): + assert isinstance(tool, Tool), f"{name} is not a Tool instance" + + def test_each_tool_has_callable_func(self): + tools = make_tools(settings) + for name, tool in tools.items(): + assert callable(tool.func), f"{name}.func is not callable" + + def test_tool_name_matches_key(self): + tools = make_tools(settings) + for key, tool in tools.items(): + assert tool.name == key + + def test_resolve_destination_schema(self): + tools = make_tools(settings) + t = tools["resolve_destination"] + props = t.parameters["properties"] + required = t.parameters["required"] + # Required args + assert "release_name" in required + assert "source_file" in required + assert "tmdb_title" in required + assert "tmdb_year" in required + # Optional args not required + assert "tmdb_episode_title" not in required + assert "confirmed_folder" not in required + # tmdb_year is int + assert props["tmdb_year"]["type"] == "integer" + + def test_move_media_schema(self): + tools = make_tools(settings) + t = tools["move_media"] + required = t.parameters["required"] + assert "source" in required + assert "destination" in required + + def test_create_seed_links_schema(self): + tools = make_tools(settings) + t = tools["create_seed_links"] + required = t.parameters["required"] + assert "library_file" in required + assert "original_download_folder" in required + + def test_no_duplicate_tools(self): + tools = make_tools(settings) + # dict keys are unique by definition, but verify no name conflicts + names = [t.name for t in tools.values()] + assert len(names) == len(set(names)) diff --git a/tests/application/__init__.py b/tests/application/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/application/conftest.py b/tests/application/conftest.py new file mode 100644 index 0000000..fd85bdb --- /dev/null +++ b/tests/application/conftest.py @@ -0,0 +1,41 @@ +"""Fixtures for application-layer tests.""" + +import shutil +import tempfile +from pathlib import Path + +import pytest + +from alfred.infrastructure.persistence import Memory, set_memory + + +@pytest.fixture +def app_temp(tmp_path): + """Real folder structure: downloads, movies, tv_shows, torrents.""" + (tmp_path / "downloads").mkdir() + (tmp_path / "movies").mkdir() + (tmp_path / "tv_shows").mkdir() + (tmp_path / "torrents").mkdir() + return tmp_path + + +@pytest.fixture +def memory_configured(app_temp, tmp_path): + """ + Fresh Memory with library_paths and workspace configured using the real API. + Replaces the broken memory_with_config from root conftest for these tests. + """ + import tempfile, os + storage = tempfile.mkdtemp() + mem = Memory(storage_dir=storage) + set_memory(mem) + + mem.ltm.workspace.download = str(app_temp / "downloads") + mem.ltm.workspace.torrent = str(app_temp / "torrents") + mem.ltm.library_paths.set("movie", str(app_temp / "movies")) + mem.ltm.library_paths.set("tv_show", str(app_temp / "tv_shows")) + mem.save() + + yield mem + + shutil.rmtree(storage, ignore_errors=True) diff --git a/tests/application/test_create_seed_links.py b/tests/application/test_create_seed_links.py new file mode 100644 index 0000000..b34ceda --- /dev/null +++ b/tests/application/test_create_seed_links.py @@ -0,0 +1,117 @@ +""" +Tests for alfred.application.filesystem.create_seed_links.CreateSeedLinksUseCase +""" + +import os +from pathlib import Path +from unittest.mock import MagicMock, patch + +import pytest + +from alfred.application.filesystem.create_seed_links import CreateSeedLinksUseCase +from alfred.infrastructure.filesystem.file_manager import FileManager + + +@pytest.fixture +def fm(): + return FileManager() + + +@pytest.fixture +def use_case(fm): + return CreateSeedLinksUseCase(fm) + + +@pytest.fixture +def seed_env(tmp_path_factory): + """ + Realistic post-move environment (uses its own tmp dir, independent of app_temp): + - library video file (hard-linked from original) + - original download folder with remaining files + - torrents root folder + """ + d = tmp_path_factory.mktemp("seed_env") + + lib_dir = d / "tv" / "Oz.1997.1080p.WEBRip.x265-KONTRAST" / "Oz.S01.1080p.WEBRip.x265-KONTRAST" + lib_dir.mkdir(parents=True) + lib_video = lib_dir / "Oz.S01E01.1080p.WEBRip.x265-KONTRAST.mp4" + lib_video.write_bytes(b"video") + + dl = d / "downloads" / "Oz.S01.1080p.WEBRip.x265-KONTRAST" + dl.mkdir(parents=True) + (dl / "KONTRAST.txt").write_text("release notes") + (dl / "[TGx]info.txt").write_text("tgx") + subs = dl / "Subs" / "Oz.S01E01.1080p.WEBRip.x265-KONTRAST" + subs.mkdir(parents=True) + (subs / "2_eng,English [CC][SDH].srt").write_text("1\n00:00:01 --> 00:00:02\nHello\n") + + torrents = d / "torrents" + torrents.mkdir() + + return lib_video, dl, torrents + + +# --------------------------------------------------------------------------- +# Happy path +# --------------------------------------------------------------------------- + +class TestCreateSeedLinksHappyPath: + + def test_ok_when_torrent_folder_configured(self, use_case, seed_env, memory_configured): + from alfred.infrastructure.persistence import get_memory + mem = get_memory() + lib_video, dl, torrents = seed_env + mem.ltm.workspace.torrent = str(torrents) + mem.save() + + result = use_case.execute(str(lib_video), str(dl)) + + assert result.status == "ok" + assert result.torrent_subfolder is not None + assert result.linked_file is not None + assert result.copied_count > 0 + + def test_to_dict_ok(self, use_case, seed_env, memory_configured): + from alfred.infrastructure.persistence import get_memory + mem = get_memory() + lib_video, dl, torrents = seed_env + mem.ltm.workspace.torrent = str(torrents) + mem.save() + + d = use_case.execute(str(lib_video), str(dl)).to_dict() + assert d["status"] == "ok" + assert "torrent_subfolder" in d + assert "copied_files" in d + assert isinstance(d["copied_files"], list) + + +# --------------------------------------------------------------------------- +# Error: torrent folder not configured +# --------------------------------------------------------------------------- + +class TestCreateSeedLinksErrors: + + def test_error_when_torrent_not_configured(self, use_case, seed_env, memory): + lib_video, dl, _ = seed_env + result = use_case.execute(str(lib_video), str(dl)) + + assert result.status == "error" + assert result.error == "torrent_folder_not_set" + assert result.message is not None + + def test_to_dict_error(self, use_case, seed_env, memory): + lib_video, dl, _ = seed_env + d = use_case.execute(str(lib_video), str(dl)).to_dict() + assert d["status"] == "error" + assert "error" in d + assert "message" in d + + def test_error_delegates_to_file_manager(self, memory_configured): + """FileManager errors are propagated correctly.""" + from alfred.infrastructure.persistence import get_memory + mem = get_memory() + # torrent already configured by memory_configured fixture + # library_file does not exist β†’ should propagate error from FileManager + uc = CreateSeedLinksUseCase(FileManager()) + result = uc.execute("/nonexistent/lib.mkv", "/nonexistent/dl") + assert result.status == "error" diff --git a/tests/application/test_list_folder_move_media.py b/tests/application/test_list_folder_move_media.py new file mode 100644 index 0000000..aee4092 --- /dev/null +++ b/tests/application/test_list_folder_move_media.py @@ -0,0 +1,179 @@ +"""Tests for ListFolderUseCase and MoveMediaUseCase.""" + +import pytest +from unittest.mock import MagicMock + +from alfred.application.filesystem.list_folder import ListFolderUseCase +from alfred.application.filesystem.move_media import MoveMediaUseCase + + +# --------------------------------------------------------------------------- +# ListFolderUseCase +# --------------------------------------------------------------------------- + +class TestListFolderUseCase: + + def _use_case(self, fm_result): + fm = MagicMock() + fm.list_folder.return_value = fm_result + return ListFolderUseCase(fm) + + def test_success_returns_response(self): + uc = self._use_case({ + "status": "ok", + "folder_type": "download", + "path": ".", + "entries": ["movie.mkv", "show/"], + "count": 2, + }) + resp = uc.execute("download") + assert resp.status == "ok" + assert resp.folder_type == "download" + assert resp.path == "." + assert resp.entries == ["movie.mkv", "show/"] + assert resp.count == 2 + + def test_error_propagates(self): + uc = self._use_case({ + "status": "error", + "error": "folder_not_set", + "message": "Download folder not configured.", + }) + resp = uc.execute("download") + assert resp.status == "error" + assert resp.error == "folder_not_set" + assert resp.message == "Download folder not configured." + + def test_delegates_folder_type_and_path(self): + fm = MagicMock() + fm.list_folder.return_value = { + "status": "ok", + "folder_type": "tv_show", + "path": "Breaking Bad", + "entries": [], + "count": 0, + } + uc = ListFolderUseCase(fm) + uc.execute("tv_show", "Breaking Bad") + fm.list_folder.assert_called_once_with("tv_show", "Breaking Bad") + + def test_default_path_is_dot(self): + fm = MagicMock() + fm.list_folder.return_value = { + "status": "ok", "folder_type": "download", + "path": ".", "entries": [], "count": 0, + } + uc = ListFolderUseCase(fm) + uc.execute("download") + fm.list_folder.assert_called_once_with("download", ".") + + def test_success_response_has_no_error(self): + uc = self._use_case({ + "status": "ok", + "folder_type": "movie", + "path": ".", + "entries": [], + "count": 0, + }) + resp = uc.execute("movie") + assert resp.error is None + + def test_error_response_has_no_entries(self): + uc = self._use_case({ + "status": "error", + "error": "not_found", + "message": "Path does not exist", + }) + resp = uc.execute("download", "some/path") + assert resp.entries is None + assert resp.count is None + + +# --------------------------------------------------------------------------- +# MoveMediaUseCase +# --------------------------------------------------------------------------- + +class TestMoveMediaUseCase: + + def _use_case(self, fm_result): + fm = MagicMock() + fm.move_file.return_value = fm_result + return MoveMediaUseCase(fm) + + def test_success_returns_response(self, tmp_path): + src = str(tmp_path / "src.mkv") + dst = str(tmp_path / "dst.mkv") + uc = self._use_case({ + "status": "ok", + "source": src, + "destination": dst, + "filename": "dst.mkv", + "size": 1024, + }) + resp = uc.execute(src, dst) + assert resp.status == "ok" + assert resp.source == src + assert resp.destination == dst + assert resp.filename == "dst.mkv" + assert resp.size == 1024 + + def test_error_propagates(self, tmp_path): + uc = self._use_case({ + "status": "error", + "error": "source_not_found", + "message": "Source does not exist: /ghost.mkv", + }) + resp = uc.execute("/ghost.mkv", str(tmp_path / "dst.mkv")) + assert resp.status == "error" + assert resp.error == "source_not_found" + + def test_delegates_to_file_manager(self, tmp_path): + src = "/downloads/movie.mkv" + dst = "/movies/Movie.2024/movie.mkv" + fm = MagicMock() + fm.move_file.return_value = { + "status": "ok", "source": src, "destination": dst, + "filename": "movie.mkv", "size": 1, + } + uc = MoveMediaUseCase(fm) + uc.execute(src, dst) + fm.move_file.assert_called_once_with(src, dst) + + def test_error_response_has_no_paths(self): + uc = self._use_case({ + "status": "error", + "error": "destination_exists", + "message": "File already exists", + }) + resp = uc.execute("/src.mkv", "/dst.mkv") + assert resp.source is None + assert resp.destination is None + assert resp.filename is None + + def test_to_dict_success(self, tmp_path): + src = "/downloads/movie.mkv" + dst = "/movies/movie.mkv" + uc = self._use_case({ + "status": "ok", + "source": src, + "destination": dst, + "filename": "movie.mkv", + "size": 2048, + }) + resp = uc.execute(src, dst) + d = resp.to_dict() + assert d["status"] == "ok" + assert d["filename"] == "movie.mkv" + assert d["size"] == 2048 + + def test_to_dict_error(self): + uc = self._use_case({ + "status": "error", + "error": "link_failed", + "message": "Cross-device link not permitted", + }) + resp = uc.execute("/src.mkv", "/dst.mkv") + d = resp.to_dict() + assert d["status"] == "error" + assert "error" in d + assert "message" in d diff --git a/tests/application/test_resolve_destination.py b/tests/application/test_resolve_destination.py new file mode 100644 index 0000000..a4af274 --- /dev/null +++ b/tests/application/test_resolve_destination.py @@ -0,0 +1,315 @@ +""" +Tests for alfred.application.filesystem.resolve_destination + +Uses a real temp filesystem + a real Memory instance (via conftest fixtures). +No network calls β€” TMDB data is passed in directly. +""" + +from pathlib import Path + +import pytest + +from alfred.application.filesystem.resolve_destination import ( + ResolveDestinationUseCase, + _find_existing_series_folders, +) + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +def _use_case(): + return ResolveDestinationUseCase() + + +# --------------------------------------------------------------------------- +# Movies +# --------------------------------------------------------------------------- + +class TestResolveMovie: + + def test_basic_movie(self, memory_configured): + result = _use_case().execute( + release_name="Another.Round.2020.1080p.BluRay.x264-YTS", + source_file="/downloads/Another.Round.2020.1080p.BluRay.x264-YTS/Another.Round.2020.1080p.BluRay.x264-YTS.mp4", + tmdb_title="Another Round", + tmdb_year=2020, + ) + assert result.status == "ok" + assert "Another.Round.2020" in result.series_folder_name + assert "1080p.BluRay.x264-YTS" in result.series_folder_name + assert result.filename.endswith(".mp4") + assert result.season_folder is None + + def test_movie_library_file_path_is_inside_series_folder(self, memory_configured): + result = _use_case().execute( + release_name="Revolver.2005.1080p.BluRay.x265-RARBG", + source_file="/downloads/Revolver.2005.1080p.BluRay.x265-RARBG.mkv", + tmdb_title="Revolver", + tmdb_year=2005, + ) + assert result.status == "ok" + assert result.library_file.startswith(result.series_folder) + + def test_movie_library_not_set(self, memory): + # memory has no library paths configured + result = _use_case().execute( + release_name="Revolver.2005.1080p.BluRay.x265-RARBG", + source_file="/downloads/Revolver.2005.1080p.BluRay.x265-RARBG.mkv", + tmdb_title="Revolver", + tmdb_year=2005, + ) + assert result.status == "error" + assert result.error == "library_not_set" + + def test_movie_folder_marked_new(self, memory_configured): + # No existing folder β†’ is_new_series_folder = True + result = _use_case().execute( + release_name="Godzilla.Minus.One.2023.1080p.BluRay.x265-YTS", + source_file="/downloads/Godzilla.Minus.One.2023.1080p.BluRay.x265-YTS.mp4", + tmdb_title="Godzilla Minus One", + tmdb_year=2023, + ) + assert result.status == "ok" + assert result.is_new_series_folder is True + + def test_movie_sanitises_forbidden_chars_in_title(self, memory_configured): + result = _use_case().execute( + release_name="Alien.Earth.2024.1080p.WEBRip.x265-KONTRAST", + source_file="/downloads/Alien.Earth.2024.1080p.WEBRip.x265-KONTRAST.mkv", + tmdb_title="Alien: Earth", + tmdb_year=2024, + ) + assert result.status == "ok" + assert ":" not in result.series_folder_name + + def test_to_dict_ok(self, memory_configured): + result = _use_case().execute( + release_name="Revolver.2005.1080p.BluRay.x265-RARBG", + source_file="/downloads/Revolver.mkv", + tmdb_title="Revolver", + tmdb_year=2005, + ) + d = result.to_dict() + assert d["status"] == "ok" + assert "library_file" in d + assert "series_folder_name" in d + + +# --------------------------------------------------------------------------- +# TV shows β€” no existing folder +# --------------------------------------------------------------------------- + +class TestResolveTVShowNewFolder: + + def test_oz_s01_creates_new_folder(self, memory_configured): + result = _use_case().execute( + release_name="Oz.S01.1080p.WEBRip.x265-KONTRAST", + source_file="/downloads/Oz.S01.1080p.WEBRip.x265-KONTRAST/Oz.S01E01.1080p.WEBRip.x265-KONTRAST.mp4", + tmdb_title="Oz", + tmdb_year=1997, + ) + assert result.status == "ok" + assert result.is_new_series_folder is True + assert result.series_folder_name == "Oz.1997.1080p.WEBRip.x265-KONTRAST" + assert result.season_folder_name == "Oz.S01.1080p.WEBRip.x265-KONTRAST" + + def test_tv_library_not_set(self, memory): + result = _use_case().execute( + release_name="Oz.S01.1080p.WEBRip.x265-KONTRAST", + source_file="/downloads/Oz.S01E01.mp4", + tmdb_title="Oz", + tmdb_year=1997, + ) + assert result.status == "error" + assert result.error == "library_not_set" + + def test_single_episode_filename(self, memory_configured): + result = _use_case().execute( + release_name="Fallout.2024.S02E01.1080p.x265-ELiTE", + source_file="/downloads/Fallout.2024.S02E01.1080p.x265-ELiTE.mkv", + tmdb_title="Fallout", + tmdb_year=2024, + tmdb_episode_title="The Beginning", + ) + assert result.status == "ok" + assert "S02E01" in result.filename + assert "The.Beginning" in result.filename + assert result.filename.endswith(".mkv") + + def test_season_pack_filename_is_folder_name_plus_ext(self, memory_configured): + result = _use_case().execute( + release_name="Oz.S01.1080p.WEBRip.x265-KONTRAST", + source_file="/downloads/Oz.S01.1080p.WEBRip.x265-KONTRAST/Oz.S01E01.mp4", + tmdb_title="Oz", + tmdb_year=1997, + ) + assert result.status == "ok" + # Season pack: filename = season_folder_name + ext + assert result.filename == result.season_folder_name + ".mp4" + + def test_library_file_is_inside_season_folder(self, memory_configured): + result = _use_case().execute( + release_name="Oz.S01.1080p.WEBRip.x265-KONTRAST", + source_file="/downloads/Oz.S01E01.mp4", + tmdb_title="Oz", + tmdb_year=1997, + ) + assert result.library_file.startswith(result.season_folder) + assert result.season_folder.startswith(result.series_folder) + + +# --------------------------------------------------------------------------- +# TV shows β€” existing folder matching +# --------------------------------------------------------------------------- + +class TestResolveTVShowExistingFolder: + + def _make_series_folder(self, tv_root, name): + """Create a series folder in the tv library.""" + import os + path = tv_root / name + path.mkdir(parents=True, exist_ok=True) + return path + + def test_uses_existing_single_folder(self, memory_configured, app_temp): + """When exactly one folder matches title+year, use it regardless of group.""" + from alfred.infrastructure.persistence import get_memory + mem = get_memory() + tv_root = Path(mem.ltm.library_paths.get("tv_show")) + + existing = tv_root / "Oz.1997.1080p.WEBRip.x265-RARBG" + existing.mkdir(parents=True, exist_ok=True) + + result = _use_case().execute( + release_name="Oz.S02.1080p.WEBRip.x265-KONTRAST", + source_file="/downloads/Oz.S02E01.mp4", + tmdb_title="Oz", + tmdb_year=1997, + ) + assert result.status == "ok" + assert result.series_folder_name == "Oz.1997.1080p.WEBRip.x265-RARBG" + assert result.is_new_series_folder is False + + def test_needs_clarification_on_multiple_folders(self, memory_configured, app_temp): + """When multiple folders match, return needs_clarification with options.""" + from alfred.infrastructure.persistence import get_memory + mem = get_memory() + tv_root = Path(mem.ltm.library_paths.get("tv_show")) + + (tv_root / "Slow.Horses.2022.1080p.WEBRip.x265-RARBG").mkdir(parents=True, exist_ok=True) + (tv_root / "Slow.Horses.2022.1080p.WEBRip.x265-KONTRAST").mkdir(parents=True, exist_ok=True) + + result = _use_case().execute( + release_name="Slow.Horses.S05.1080p.WEBRip.x265-KONTRAST", + source_file="/downloads/Slow.Horses.S05E01.mkv", + tmdb_title="Slow Horses", + tmdb_year=2022, + ) + assert result.status == "needs_clarification" + assert result.question is not None + assert len(result.options) == 2 + assert "Slow.Horses.2022.1080p.WEBRip.x265-RARBG" in result.options + assert "Slow.Horses.2022.1080p.WEBRip.x265-KONTRAST" in result.options + + def test_confirmed_folder_bypasses_detection(self, memory_configured, app_temp): + """confirmed_folder skips the folder search.""" + from alfred.infrastructure.persistence import get_memory + mem = get_memory() + tv_root = Path(mem.ltm.library_paths.get("tv_show")) + chosen = "Slow.Horses.2022.1080p.WEBRip.x265-RARBG" + (tv_root / chosen).mkdir(parents=True, exist_ok=True) + + result = _use_case().execute( + release_name="Slow.Horses.S05.1080p.WEBRip.x265-KONTRAST", + source_file="/downloads/Slow.Horses.S05E01.mkv", + tmdb_title="Slow Horses", + tmdb_year=2022, + confirmed_folder=chosen, + ) + assert result.status == "ok" + assert result.series_folder_name == chosen + + def test_to_dict_needs_clarification(self, memory_configured, app_temp): + from alfred.infrastructure.persistence import get_memory + mem = get_memory() + tv_root = Path(mem.ltm.library_paths.get("tv_show")) + (tv_root / "Oz.1997.1080p.WEBRip.x265-RARBG").mkdir(parents=True, exist_ok=True) + (tv_root / "Oz.1997.1080p.WEBRip.x265-KONTRAST").mkdir(parents=True, exist_ok=True) + + result = _use_case().execute( + release_name="Oz.S03.1080p.WEBRip.x265-KONTRAST", + source_file="/downloads/Oz.S03E01.mp4", + tmdb_title="Oz", + tmdb_year=1997, + ) + d = result.to_dict() + assert d["status"] == "needs_clarification" + assert "question" in d + assert isinstance(d["options"], list) + + def test_to_dict_error(self, memory): + result = _use_case().execute( + release_name="Oz.S01.1080p.WEBRip.x265-KONTRAST", + source_file="/downloads/Oz.S01E01.mp4", + tmdb_title="Oz", + tmdb_year=1997, + ) + d = result.to_dict() + assert d["status"] == "error" + assert "error" in d + assert "message" in d + + +# --------------------------------------------------------------------------- +# _find_existing_series_folders +# --------------------------------------------------------------------------- + +class TestFindExistingSeriesFolders: + + def test_empty_library(self, tmp_path): + assert _find_existing_series_folders(tmp_path, "Oz", 1997) == [] + + def test_nonexistent_root(self, tmp_path): + assert _find_existing_series_folders(tmp_path / "nope", "Oz", 1997) == [] + + def test_single_match(self, tmp_path): + (tmp_path / "Oz.1997.1080p.WEBRip.x265-KONTRAST").mkdir() + result = _find_existing_series_folders(tmp_path, "Oz", 1997) + assert result == ["Oz.1997.1080p.WEBRip.x265-KONTRAST"] + + def test_multiple_matches(self, tmp_path): + (tmp_path / "Oz.1997.1080p.WEBRip.x265-KONTRAST").mkdir() + (tmp_path / "Oz.1997.1080p.WEBRip.x265-RARBG").mkdir() + result = _find_existing_series_folders(tmp_path, "Oz", 1997) + assert len(result) == 2 + assert sorted(result) == result # sorted + + def test_no_match_different_year(self, tmp_path): + (tmp_path / "Oz.1997.1080p.WEBRip.x265-KONTRAST").mkdir() + result = _find_existing_series_folders(tmp_path, "Oz", 2000) + assert result == [] + + def test_no_match_different_title(self, tmp_path): + (tmp_path / "Oz.1997.1080p.WEBRip.x265-KONTRAST").mkdir() + result = _find_existing_series_folders(tmp_path, "Breaking Bad", 2008) + assert result == [] + + def test_ignores_files_not_dirs(self, tmp_path): + (tmp_path / "Oz.1997.1080p.WEBRip.x265-KONTRAST").mkdir() + (tmp_path / "Oz.1997.some.file.txt").touch() + result = _find_existing_series_folders(tmp_path, "Oz", 1997) + assert len(result) == 1 + + def test_case_insensitive_prefix(self, tmp_path): + # Folder stored with mixed case + (tmp_path / "OZ.1997.1080p.WEBRip.x265-KONTRAST").mkdir() + result = _find_existing_series_folders(tmp_path, "Oz", 1997) + assert len(result) == 1 + + def test_title_with_special_chars_sanitised(self, tmp_path): + # "Star Wars: Andor" β†’ sanitised (colon removed) + spacesβ†’dots β†’ "Star.Wars.Andor.2022" + (tmp_path / "Star.Wars.Andor.2022.1080p.WEBRip.x265-GROUP").mkdir() + result = _find_existing_series_folders(tmp_path, "Star Wars: Andor", 2022) + assert len(result) == 1 diff --git a/tests/conftest.py b/tests/conftest.py index e376e39..fb84627 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -50,10 +50,10 @@ def memory(temp_dir): @pytest.fixture def memory_with_config(memory): """Memory with pre-configured folders.""" - memory.ltm.set_config("download_folder", "/tmp/downloads") - memory.ltm.set_config("movie_folder", "/tmp/movies") - memory.ltm.set_config("tvshow_folder", "/tmp/tvshows") - memory.ltm.set_config("torrent_folder", "/tmp/torrents") + memory.ltm.download_folder = "/tmp/downloads" + memory.ltm.movie_folder = "/tmp/movies" + memory.ltm.tvshow_folder = "/tmp/tvshows" + memory.ltm.torrent_folder = "/tmp/torrents" return memory diff --git a/tests/domain/__init__.py b/tests/domain/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/domain/test_release_parser.py b/tests/domain/test_release_parser.py new file mode 100644 index 0000000..8bc0fc9 --- /dev/null +++ b/tests/domain/test_release_parser.py @@ -0,0 +1,465 @@ +""" +Tests for alfred.domain.media.release_parser + +Real-data cases sourced from /mnt/testipool/downloads/. +Covers: parsing, normalisation, naming methods, edge cases. +""" + +import pytest + +from alfred.domain.media.release_parser import ( + ParsedRelease, + _normalise, + _sanitise_for_fs, + _strip_episode_from_normalised, + parse_release, +) + + +# --------------------------------------------------------------------------- +# _normalise +# --------------------------------------------------------------------------- + +class TestNormalise: + def test_dots_unchanged(self): + assert _normalise("Oz.S01.1080p.WEBRip.x265-KONTRAST") == "Oz.S01.1080p.WEBRip.x265-KONTRAST" + + def test_spaces_become_dots(self): + assert _normalise("Oz S01 1080p WEBRip x265-KONTRAST") == "Oz.S01.1080p.WEBRip.x265-KONTRAST" + + def test_double_dots_collapsed(self): + assert _normalise("Oz..S01..1080p") == "Oz.S01.1080p" + + def test_leading_trailing_dots_stripped(self): + assert _normalise(".Oz.S01.") == "Oz.S01" + + def test_mixed_spaces_and_dots(self): + # "Archer 2009 S14E09E10E11 Into the Cold 1080p HULU WEB-DL DDP5 1 H 264-NTb" + result = _normalise("Archer 2009 S14E09E10E11 Into the Cold 1080p HULU WEB-DL DDP5 1 H 264-NTb") + assert " " not in result + assert ".." not in result + + +# --------------------------------------------------------------------------- +# _sanitise_for_fs +# --------------------------------------------------------------------------- + +class TestSanitiseForFs: + def test_clean_string_unchanged(self): + assert _sanitise_for_fs("Oz.S01.1080p-KONTRAST") == "Oz.S01.1080p-KONTRAST" + + def test_removes_question_mark(self): + assert _sanitise_for_fs("What's Up?") == "What's Up" + + def test_removes_colon(self): + assert _sanitise_for_fs("He Said: She Said") == "He Said She Said" + + def test_removes_all_forbidden(self): + assert _sanitise_for_fs('a?b:c*d"eg|h\\i') == "abcdefghi" + + def test_apostrophe_kept(self): + # apostrophe is not in the forbidden set + assert _sanitise_for_fs("What's Up") == "What's Up" + + def test_ellipsis_kept(self): + assert _sanitise_for_fs("What If...") == "What If..." + + +# --------------------------------------------------------------------------- +# _strip_episode_from_normalised +# --------------------------------------------------------------------------- + +class TestStripEpisode: + def test_strips_single_episode(self): + assert _strip_episode_from_normalised("Oz.S01E01.1080p.WEBRip.x265-KONTRAST") \ + == "Oz.S01.1080p.WEBRip.x265-KONTRAST" + + def test_strips_multi_episode(self): + assert _strip_episode_from_normalised("Archer.S14E09E10E11.1080p.HULU.WEB-DL-NTb") \ + == "Archer.S14.1080p.HULU.WEB-DL-NTb" + + def test_season_pack_unchanged(self): + assert _strip_episode_from_normalised("Oz.S01.1080p.WEBRip.x265-KONTRAST") \ + == "Oz.S01.1080p.WEBRip.x265-KONTRAST" + + def test_case_insensitive(self): + assert _strip_episode_from_normalised("oz.s01e01.1080p-KONTRAST") \ + == "oz.s01.1080p-KONTRAST" + + +# --------------------------------------------------------------------------- +# parse_release β€” Season packs (dots) +# --------------------------------------------------------------------------- + +class TestSeasonPackDots: + """Real cases: Oz.S01-S06 KONTRAST, Archer S03 EDGE2020, etc.""" + + def test_oz_s01_kontrast(self): + p = parse_release("Oz.S01.1080p.WEBRip.x265-KONTRAST") + assert p.title == "Oz" + assert p.season == 1 + assert p.episode is None + assert p.quality == "1080p" + assert p.source == "WEBRip" + assert p.codec == "x265" + assert p.group == "KONTRAST" + assert p.is_season_pack + assert not p.is_movie + + def test_fallout_s02_kontrast(self): + p = parse_release("Fallout.2024.S02.1080p.WEBRip.x265-KONTRAST") + assert p.title == "Fallout" + assert p.year == 2024 + assert p.season == 2 + assert p.episode is None + assert p.group == "KONTRAST" + + def test_archer_s03_edge2020(self): + p = parse_release("Archer.2009.S03.1080p.BluRay.DDP.5.1.x265-EDGE2020") + assert p.title == "Archer" + assert p.year == 2009 + assert p.season == 3 + assert p.quality == "1080p" + assert p.source == "BluRay" + assert p.codec == "x265" + assert p.group == "EDGE2020" + + def test_fargo_s05_hulu_webdl(self): + p = parse_release("Fargo.S05.1080p.HULU.WEB-DL.x265.10bit-Protozoan") + assert p.title == "Fargo" + assert p.season == 5 + assert p.quality == "1080p" + assert p.group == "Protozoan" + + def test_xfiles_s01_bluray_rarbg(self): + p = parse_release("The.X-Files.S01.1080p.BluRay.x265-RARBG") + assert p.title == "The.X-Files" + assert p.season == 1 + assert p.source == "BluRay" + assert p.group == "RARBG" + + def test_gilmore_girls_s01_s07_repack(self): + p = parse_release("Gilmore.Girls.Complete.S01-S07.REPACK.1080p.WEB-DL.x265.10bit.HEVC-MONOLITH") + # Season range β€” we parse the first season number found + assert p.season == 1 + assert p.group == "MONOLITH" + + def test_plot_against_america_4k(self): + p = parse_release("The.Plot.Against.America.S01.2160p.MAX.WEB-DL.x265.10bit.HDR.DDP5.1.x265-SH3LBY") + assert p.title == "The.Plot.Against.America" + assert p.season == 1 + assert p.quality == "2160p" + assert p.group == "SH3LBY" + + def test_foundation_with_year_in_title(self): + p = parse_release("Foundation.2021.S01.1080p.WEBRip.x265-RARBG") + assert p.title == "Foundation" + assert p.year == 2021 + assert p.season == 1 + assert p.group == "RARBG" + + def test_gen_v_s02(self): + p = parse_release("Gen.V.S02.1080p.WEBRip.x265-KONTRAST") + assert p.title == "Gen.V" + assert p.season == 2 + assert p.group == "KONTRAST" + + +# --------------------------------------------------------------------------- +# parse_release β€” Single episodes (dots) +# --------------------------------------------------------------------------- + +class TestSingleEpisodeDots: + """Real cases: Fallout S02Exx ELiTE, Mare of Easttown PSA, etc.""" + + def test_fallout_s02e01_elite(self): + p = parse_release("Fallout.2024.S02E01.1080p.x265-ELiTE") + assert p.title == "Fallout" + assert p.year == 2024 + assert p.season == 2 + assert p.episode == 1 + assert p.episode_end is None + assert p.group == "ELiTE" + assert not p.is_season_pack + + def test_mare_of_easttown_with_episode_title_in_filename(self): + # Episode filenames often embed the title β€” we parse the release folder name + p = parse_release("Mare.of.Easttown.S01.1080p.10bit.WEBRip.6CH.x265.HEVC-PSA") + assert p.title == "Mare.of.Easttown" + assert p.season == 1 + assert p.group == "PSA" + + def test_it_welcome_to_derry_s01e01(self): + p = parse_release("IT.Welcome.to.Derry.S01E01.1080p.x265-ELiTE") + assert p.title == "IT.Welcome.to.Derry" + assert p.season == 1 + assert p.episode == 1 + assert p.group == "ELiTE" + + def test_landman_s02e01(self): + p = parse_release("Landman.S02E01.1080p.x265-ELiTE") + assert p.title == "Landman" + assert p.season == 2 + assert p.episode == 1 + + def test_prodiges_episode_with_number_in_title(self): + # "Prodiges.S12E01.1ere.demi-finale..." β€” accented chars in episode title + p = parse_release("Prodiges.S12E01.1080p.WEB.H264-THESYNDiCATE") + assert p.title == "Prodiges" + assert p.season == 12 + assert p.episode == 1 + assert p.group == "THESYNDiCATE" + + +# --------------------------------------------------------------------------- +# parse_release β€” Multi-episode +# --------------------------------------------------------------------------- + +class TestMultiEpisode: + def test_archer_triple_episode(self): + # "Archer 2009 S14E09E10E11 Into the Cold 1080p HULU WEB-DL DDP5 1 H 264-NTb" + p = parse_release("Archer.2009.S14E09E10E11.Into.the.Cold.1080p.HULU.WEB-DL.DDP5.1.H.264-NTb") + assert p.season == 14 + assert p.episode == 9 + assert p.episode_end == 10 # only first E-pair captured by regex group 2+3 + + +# --------------------------------------------------------------------------- +# parse_release β€” Movies +# --------------------------------------------------------------------------- + +class TestMovies: + def test_another_round_yts(self): + # "Another Round (2020) [1080p] [BluRay] [YTS.MX]" β†’ normalised + p = parse_release("Another.Round.2020.1080p.BluRay.x264-YTS") + assert p.is_movie + assert p.title == "Another.Round" + assert p.year == 2020 + assert p.quality == "1080p" + assert p.source == "BluRay" + assert p.group == "YTS" + + def test_godzilla_minus_one(self): + p = parse_release("Godzilla.Minus.One.2023.1080p.BluRay.x265.10bit.AAC5.1-YTS") + assert p.title == "Godzilla.Minus.One" + assert p.year == 2023 + assert p.is_movie + assert p.group == "YTS" + + def test_deadwood_movie_2019(self): + p = parse_release("Deadwood.The.Movie.2019.1080p.BluRay.x265-RARBG") + assert p.year == 2019 + assert p.is_movie + assert p.group == "RARBG" + + def test_revolver_2005_bluray(self): + p = parse_release("Revolver.2005.1080p.BluRay.x265-RARBG") + assert p.title == "Revolver" + assert p.year == 2005 + assert p.is_movie + + def test_the_xfiles_movie_1998(self): + p = parse_release("The.X.Files.1998.1080p.BluRay.x265-RARBG") + assert p.year == 1998 + assert p.is_movie + assert p.group == "RARBG" + + def test_movie_no_group(self): + p = parse_release("Jurassic.Park.1993.1080p.BluRay.x265") + assert p.is_movie + assert p.year == 1993 + assert p.group == "UNKNOWN" + + def test_multi_language_movie(self): + p = parse_release("Jumanji.1995.MULTi.1080p.DSNP.WEB.H265-THESYNDiCATE") + assert p.year == 1995 + assert p.group == "THESYNDiCATE" + + +# --------------------------------------------------------------------------- +# parse_release β€” Space-separated (no dots) +# --------------------------------------------------------------------------- + +class TestSpaceSeparated: + def test_oz_spaces(self): + p = parse_release("Oz S01 1080p WEBRip x265-KONTRAST") + assert p.title == "Oz" + assert p.season == 1 + assert p.quality == "1080p" + assert p.group == "KONTRAST" + + def test_archer_spaces(self): + p = parse_release("Archer 2009 S14E09E10E11 Into the Cold 1080p HULU WEB-DL DDP5 1 H 264-NTb") + assert p.season == 14 + assert p.episode == 9 + assert p.group == "NTb" + + +# --------------------------------------------------------------------------- +# parse_release β€” tech_string +# --------------------------------------------------------------------------- + +class TestTechString: + def test_full_tech(self): + p = parse_release("Oz.S01.1080p.WEBRip.x265-KONTRAST") + assert p.tech_string == "1080p.WEBRip.x265" + + def test_tech_string_used_in_folder_name(self): + p = parse_release("Oz.S01.1080p.WEBRip.x265-KONTRAST") + folder = p.show_folder_name("Oz", 1997) + assert "1080p.WEBRip.x265" in folder + + def test_no_tech_fallback(self): + p = parse_release("SomeShow.S01") + # tech_string is empty, show_folder_name uses "Unknown" + folder = p.show_folder_name("SomeShow", 2020) + assert "Unknown" in folder + + def test_4k_hdr(self): + p = parse_release("The.Plot.Against.America.S01.2160p.MAX.WEB-DL.x265.10bit.HDR.DDP5.1-SH3LBY") + assert p.quality == "2160p" + + +# --------------------------------------------------------------------------- +# ParsedRelease β€” naming methods +# --------------------------------------------------------------------------- + +class TestNamingMethods: + + def test_show_folder_name(self): + p = parse_release("Oz.S01.1080p.WEBRip.x265-KONTRAST") + assert p.show_folder_name("Oz", 1997) == "Oz.1997.1080p.WEBRip.x265-KONTRAST" + + def test_show_folder_name_sanitises_title(self): + p = parse_release("Oz.S01.1080p.WEBRip.x265-KONTRAST") + # Colon in TMDB title should be stripped, spaces become dots + folder = p.show_folder_name("Star Wars: Andor", 2022) + assert ":" not in folder + assert "Star.Wars.Andor" in folder + + def test_season_folder_name_from_season_pack(self): + p = parse_release("Oz.S01.1080p.WEBRip.x265-KONTRAST") + assert p.season_folder_name() == "Oz.S01.1080p.WEBRip.x265-KONTRAST" + + def test_season_folder_name_strips_episode(self): + p = parse_release("Fallout.2024.S02E01.1080p.x265-ELiTE") + assert p.season_folder_name() == "Fallout.2024.S02.1080p.x265-ELiTE" + + def test_episode_filename_with_title(self): + p = parse_release("Oz.S01.1080p.WEBRip.x265-KONTRAST") + fname = p.episode_filename("The Routine", ".mkv") + assert fname == "Oz.S01.The.Routine.1080p.WEBRip.x265-KONTRAST.mkv" + + def test_episode_filename_with_episode_number(self): + p = parse_release("Fallout.2024.S02E01.1080p.x265-ELiTE") + fname = p.episode_filename("The Beginning", ".mkv") + assert fname == "Fallout.S02E01.The.Beginning.1080p.x265-ELiTE.mkv" + + def test_episode_filename_without_episode_title(self): + p = parse_release("Oz.S01E01.1080p.WEBRip.x265-KONTRAST") + fname = p.episode_filename(None, ".mp4") + assert fname == "Oz.S01E01.1080p.WEBRip.x265-KONTRAST.mp4" + + def test_episode_filename_sanitises_episode_title(self): + p = parse_release("Oz.S01E01.1080p.WEBRip.x265-KONTRAST") + fname = p.episode_filename("What's Up?", ".mkv") + assert "?" not in fname + assert "What's.Up" in fname + + def test_episode_filename_strips_leading_dot_from_ext(self): + p = parse_release("Oz.S01E01.1080p.WEBRip.x265-KONTRAST") + fname_with = p.episode_filename(None, ".mkv") + fname_without = p.episode_filename(None, "mkv") + assert fname_with == fname_without + + def test_movie_folder_name(self): + p = parse_release("Another.Round.2020.1080p.BluRay.x264-YTS") + assert p.movie_folder_name("Another Round", 2020) == "Another.Round.2020.1080p.BluRay.x264-YTS" + + def test_movie_filename(self): + p = parse_release("Another.Round.2020.1080p.BluRay.x264-YTS") + fname = p.movie_filename("Another Round", 2020, ".mp4") + assert fname == "Another.Round.2020.1080p.BluRay.x264-YTS.mp4" + + def test_movie_folder_same_as_show_folder(self): + p = parse_release("Revolver.2005.1080p.BluRay.x265-RARBG") + assert p.movie_folder_name("Revolver", 2005) == p.show_folder_name("Revolver", 2005) + + +# --------------------------------------------------------------------------- +# ParsedRelease β€” is_movie / is_season_pack +# --------------------------------------------------------------------------- + +class TestMediaTypeFlags: + def test_season_pack_is_not_movie(self): + p = parse_release("Oz.S01.1080p.WEBRip.x265-KONTRAST") + assert not p.is_movie + assert p.is_season_pack + + def test_single_episode_is_not_season_pack(self): + p = parse_release("Oz.S01E01.1080p.WEBRip.x265-KONTRAST") + assert not p.is_movie + assert not p.is_season_pack + + def test_movie_is_not_season_pack(self): + p = parse_release("Revolver.2005.1080p.BluRay.x265-RARBG") + assert p.is_movie + assert not p.is_season_pack + + def test_no_season_no_year_treated_as_movie(self): + # No S/E marker β†’ is_movie = True + p = parse_release("SomeContent.1080p.WEBRip.x265-GROUP") + assert p.is_movie + + +# --------------------------------------------------------------------------- +# Tricky real-world releases +# --------------------------------------------------------------------------- + +class TestRealWorldEdgeCases: + + def test_angel_integrale_multi(self): + # "Angel.1999.INTEGRALE.MULTI.1080p.WEBRip.10bits.x265.DD-Jarod" + p = parse_release("Angel.1999.INTEGRALE.MULTI.1080p.WEBRip.10bits.x265.DD-Jarod") + assert p.year == 1999 + assert p.quality == "1080p" + assert p.source == "WEBRip" + + def test_group_unknown_when_no_dash(self): + p = parse_release("Oz.S01.1080p.WEBRip.x265") + assert p.group == "UNKNOWN" + + def test_normalised_stored_on_parsed(self): + p = parse_release("Oz S01 1080p WEBRip x265-KONTRAST") + assert p.normalised == "Oz.S01.1080p.WEBRip.x265-KONTRAST" + + def test_raw_stored_as_is(self): + raw = "Oz S01 1080p WEBRip x265-KONTRAST" + p = parse_release(raw) + assert p.raw == raw + + def test_hevc_codec(self): + # "Mare.of.Easttown.S01.1080p.10bit.WEBRip.6CH.x265.HEVC-PSA" + p = parse_release("Mare.of.Easttown.S01.1080p.10bit.WEBRip.6CH.x265.HEVC-PSA") + assert p.codec in ("x265", "HEVC") + assert p.group == "PSA" + + def test_xfiles_hyphen_in_title(self): + p = parse_release("The.X-Files.S01.1080p.BluRay.x265-RARBG") + # Title should preserve the hyphen + assert "X-Files" in p.title + + def test_foundation_s02_no_year(self): + # Foundation.S02 has no year in release name β€” year is None + p = parse_release("Foundation.S02.1080p.x265-ELiTE") + assert p.year is None + assert p.season == 2 + assert p.group == "ELiTE" + + def test_slow_horses_two_groups_same_show(self): + # Same show, different groups across seasons + s01 = parse_release("Slow.Horses.S01.1080p.WEBRip.x265-RARBG") + s04 = parse_release("Slow.Horses.S04.1080p.WEBRip.x265-KONTRAST") + assert s01.title == s04.title == "Slow.Horses" + assert s01.group == "RARBG" + assert s04.group == "KONTRAST" diff --git a/tests/domain/test_shared_value_objects.py b/tests/domain/test_shared_value_objects.py new file mode 100644 index 0000000..6ae1435 --- /dev/null +++ b/tests/domain/test_shared_value_objects.py @@ -0,0 +1,136 @@ +"""Tests for shared domain value objects: ImdbId, FilePath, FileSize.""" + +import pytest +from pathlib import Path + +from alfred.domain.shared.exceptions import ValidationError +from alfred.domain.shared.value_objects import FilePath, FileSize, ImdbId + + +# --------------------------------------------------------------------------- +# ImdbId +# --------------------------------------------------------------------------- + +class TestImdbId: + + def test_valid_7_digits(self): + id_ = ImdbId("tt1375666") + assert str(id_) == "tt1375666" + + def test_valid_8_digits(self): + id_ = ImdbId("tt12345678") + assert str(id_) == "tt12345678" + + def test_empty_raises(self): + with pytest.raises(ValidationError): + ImdbId("") + + def test_no_tt_prefix_raises(self): + with pytest.raises(ValidationError): + ImdbId("1375666") + + def test_too_few_digits_raises(self): + with pytest.raises(ValidationError): + ImdbId("tt12345") # only 5 digits + + def test_too_many_digits_raises(self): + with pytest.raises(ValidationError): + ImdbId("tt123456789") # 9 digits + + def test_non_string_raises(self): + with pytest.raises(ValidationError): + ImdbId(1375666) # type: ignore + + def test_repr(self): + assert "tt1375666" in repr(ImdbId("tt1375666")) + + def test_equality(self): + assert ImdbId("tt1375666") == ImdbId("tt1375666") + assert ImdbId("tt1375666") != ImdbId("tt0903747") + + def test_hashable(self): + # Frozen dataclass should be hashable + ids = {ImdbId("tt1375666"), ImdbId("tt0903747")} + assert len(ids) == 2 + + +# --------------------------------------------------------------------------- +# FilePath +# --------------------------------------------------------------------------- + +class TestFilePath: + + def test_from_string(self, tmp_path): + p = FilePath(str(tmp_path)) + assert isinstance(p.value, Path) + + def test_from_path(self, tmp_path): + p = FilePath(tmp_path) + assert p.value == tmp_path + + def test_invalid_type_raises(self): + with pytest.raises(ValidationError): + FilePath(123) # type: ignore + + def test_exists_true(self, tmp_path): + p = FilePath(tmp_path) + assert p.exists() + + def test_exists_false(self, tmp_path): + p = FilePath(tmp_path / "nonexistent") + assert not p.exists() + + def test_is_file(self, tmp_path): + f = tmp_path / "file.txt" + f.write_text("x") + assert FilePath(f).is_file() + assert not FilePath(tmp_path).is_file() + + def test_is_dir(self, tmp_path): + assert FilePath(tmp_path).is_dir() + + def test_str(self, tmp_path): + p = FilePath(tmp_path) + assert str(p) == str(tmp_path) + + +# --------------------------------------------------------------------------- +# FileSize +# --------------------------------------------------------------------------- + +class TestFileSize: + + def test_bytes(self): + s = FileSize(500) + assert s.bytes == 500 + + def test_negative_raises(self): + with pytest.raises(ValidationError): + FileSize(-1) + + def test_non_integer_raises(self): + with pytest.raises(ValidationError): + FileSize(1.5) # type: ignore + + def test_zero_is_valid(self): + s = FileSize(0) + assert s.bytes == 0 + + def test_human_readable_bytes(self): + assert FileSize(500).to_human_readable() == "500 B" + + def test_human_readable_kb(self): + result = FileSize(2048).to_human_readable() + assert "KB" in result + + def test_human_readable_mb(self): + result = FileSize(5 * 1024 * 1024).to_human_readable() + assert "MB" in result + + def test_human_readable_gb(self): + result = FileSize(2 * 1024 ** 3).to_human_readable() + assert "GB" in result + + def test_str_is_human_readable(self): + s = FileSize(1024) + assert str(s) == s.to_human_readable() diff --git a/tests/domain/test_subtitle_scanner.py b/tests/domain/test_subtitle_scanner.py new file mode 100644 index 0000000..3220b77 --- /dev/null +++ b/tests/domain/test_subtitle_scanner.py @@ -0,0 +1,217 @@ +"""Tests for SubtitleScanner and _classify helper.""" + +import pytest +from pathlib import Path + +from alfred.domain.subtitles.scanner import ( + SubtitleCandidate, + SubtitleScanner, + _classify, +) + + +# --------------------------------------------------------------------------- +# _classify β€” unit tests for the filename parser +# --------------------------------------------------------------------------- + +class TestClassify: + + def test_iso_lang_code(self, tmp_path): + p = tmp_path / "fr.srt" + p.write_text("") + lang, is_sdh, is_forced = _classify(p) + assert lang == "fr" + assert not is_sdh + assert not is_forced + + def test_english_keyword(self, tmp_path): + p = tmp_path / "english.srt" + p.write_text("") + lang, _, _ = _classify(p) + assert lang == "en" + + def test_french_keyword(self, tmp_path): + p = tmp_path / "Show.S01E01.French.srt" + p.write_text("") + lang, _, _ = _classify(p) + assert lang == "fr" + + def test_vostfr_is_french(self, tmp_path): + p = tmp_path / "Show.S01E01.VOSTFR.srt" + p.write_text("") + lang, _, _ = _classify(p) + assert lang == "fr" + + def test_sdh_token(self, tmp_path): + p = tmp_path / "fr.sdh.srt" + p.write_text("") + lang, is_sdh, _ = _classify(p) + assert lang == "fr" + assert is_sdh + + def test_hi_alias_for_sdh(self, tmp_path): + p = tmp_path / "en.hi.srt" + p.write_text("") + _, is_sdh, _ = _classify(p) + assert is_sdh + + def test_forced_token(self, tmp_path): + p = tmp_path / "fr.forced.srt" + p.write_text("") + _, _, is_forced = _classify(p) + assert is_forced + + def test_unknown_language_returns_none(self, tmp_path): + p = tmp_path / "Show.S01E01.720p.srt" + p.write_text("") + lang, _, _ = _classify(p) + assert lang is None + + def test_dot_separator(self, tmp_path): + p = tmp_path / "fr.sdh.srt" + p.write_text("") + lang, is_sdh, _ = _classify(p) + assert lang == "fr" + assert is_sdh + + def test_hyphen_separator(self, tmp_path): + p = tmp_path / "fr-forced.srt" + p.write_text("") + lang, _, is_forced = _classify(p) + assert lang == "fr" + assert is_forced + + +# --------------------------------------------------------------------------- +# SubtitleCandidate.destination_name +# --------------------------------------------------------------------------- + +class TestSubtitleCandidateDestinationName: + + def _make(self, lang="fr", is_sdh=False, is_forced=False, ext=".srt", path=None): + return SubtitleCandidate( + source_path=path or Path("/fake/fr.srt"), + language=lang, + is_sdh=is_sdh, + is_forced=is_forced, + extension=ext, + ) + + def test_standard(self): + assert self._make().destination_name == "fr.srt" + + def test_sdh(self): + assert self._make(is_sdh=True).destination_name == "fr.sdh.srt" + + def test_forced(self): + assert self._make(is_forced=True).destination_name == "fr.forced.srt" + + def test_ass_extension(self): + assert self._make(ext=".ass").destination_name == "fr.ass" + + def test_english_standard(self): + assert self._make(lang="en").destination_name == "en.srt" + + +# --------------------------------------------------------------------------- +# SubtitleScanner β€” integration with real filesystem +# --------------------------------------------------------------------------- + +class TestSubtitleScanner: + + def _scanner(self, languages=None, min_size_kb=0, keep_sdh=True, keep_forced=True): + return SubtitleScanner( + languages=languages or ["fr", "en"], + min_size_kb=min_size_kb, + keep_sdh=keep_sdh, + keep_forced=keep_forced, + ) + + def _video(self, tmp_path): + video = tmp_path / "Movie.mkv" + video.write_bytes(b"video") + return video + + def test_finds_adjacent_subtitle(self, tmp_path): + video = self._video(tmp_path) + (tmp_path / "fr.srt").write_text("subtitle content") + + candidates = self._scanner().scan(video) + + assert len(candidates) == 1 + assert candidates[0].language == "fr" + + def test_finds_multiple_languages(self, tmp_path): + video = self._video(tmp_path) + (tmp_path / "fr.srt").write_text("fr subtitle") + (tmp_path / "en.srt").write_text("en subtitle") + + candidates = self._scanner().scan(video) + langs = {c.language for c in candidates} + assert langs == {"fr", "en"} + + def test_scans_subs_subfolder(self, tmp_path): + video = self._video(tmp_path) + subs = tmp_path / "Subs" + subs.mkdir() + (subs / "fr.srt").write_text("subtitle") + + candidates = self._scanner().scan(video) + assert any(c.language == "fr" for c in candidates) + + def test_filters_unknown_language(self, tmp_path): + video = self._video(tmp_path) + (tmp_path / "unknown.srt").write_text("subtitle") + + candidates = self._scanner().scan(video) + assert len(candidates) == 0 + + def test_filters_wrong_language(self, tmp_path): + video = self._video(tmp_path) + (tmp_path / "de.srt").write_text("german subtitle") + + candidates = self._scanner(languages=["fr"]).scan(video) + assert len(candidates) == 0 + + def test_filters_too_small_file(self, tmp_path): + video = self._video(tmp_path) + small = tmp_path / "fr.srt" + small.write_bytes(b"x") # 1 byte, well below any min_size_kb + + candidates = self._scanner(min_size_kb=10).scan(video) + assert len(candidates) == 0 + + def test_filters_sdh_when_not_wanted(self, tmp_path): + video = self._video(tmp_path) + (tmp_path / "fr.sdh.srt").write_text("sdh subtitle") + + candidates = self._scanner(keep_sdh=False).scan(video) + assert len(candidates) == 0 + + def test_filters_forced_when_not_wanted(self, tmp_path): + video = self._video(tmp_path) + (tmp_path / "fr.forced.srt").write_text("forced subtitle") + + candidates = self._scanner(keep_forced=False).scan(video) + assert len(candidates) == 0 + + def test_keeps_sdh_when_wanted(self, tmp_path): + video = self._video(tmp_path) + (tmp_path / "fr.sdh.srt").write_text("sdh subtitle") + + candidates = self._scanner(keep_sdh=True).scan(video) + assert len(candidates) == 1 + assert candidates[0].is_sdh + + def test_ignores_non_subtitle_files(self, tmp_path): + video = self._video(tmp_path) + (tmp_path / "fr.nfo").write_text("nfo file") + (tmp_path / "fr.jpg").write_bytes(b"image") + + candidates = self._scanner().scan(video) + assert len(candidates) == 0 + + def test_returns_empty_when_no_subtitles(self, tmp_path): + video = self._video(tmp_path) + candidates = self._scanner().scan(video) + assert candidates == [] diff --git a/tests/domain/test_tv_shows.py b/tests/domain/test_tv_shows.py new file mode 100644 index 0000000..a91b7f9 --- /dev/null +++ b/tests/domain/test_tv_shows.py @@ -0,0 +1,223 @@ +"""Tests for TV Show domain β€” entities and value objects.""" + +import pytest + +from alfred.domain.shared.exceptions import ValidationError +from alfred.domain.tv_shows.entities import Episode, Season, TVShow +from alfred.domain.tv_shows.value_objects import EpisodeNumber, SeasonNumber, ShowStatus + + +# --------------------------------------------------------------------------- +# ShowStatus +# --------------------------------------------------------------------------- + +class TestShowStatus: + + def test_from_string_ongoing(self): + assert ShowStatus.from_string("ongoing") == ShowStatus.ONGOING + + def test_from_string_ended(self): + assert ShowStatus.from_string("ended") == ShowStatus.ENDED + + def test_from_string_case_insensitive(self): + assert ShowStatus.from_string("ONGOING") == ShowStatus.ONGOING + assert ShowStatus.from_string("Ended") == ShowStatus.ENDED + + def test_from_string_unknown(self): + assert ShowStatus.from_string("cancelled") == ShowStatus.UNKNOWN + assert ShowStatus.from_string("") == ShowStatus.UNKNOWN + + +# --------------------------------------------------------------------------- +# SeasonNumber +# --------------------------------------------------------------------------- + +class TestSeasonNumber: + + def test_valid_season(self): + s = SeasonNumber(1) + assert s.value == 1 + + def test_season_zero_is_specials(self): + s = SeasonNumber(0) + assert s.is_special() + + def test_normal_season_not_special(self): + assert not SeasonNumber(3).is_special() + + def test_negative_raises(self): + with pytest.raises(ValidationError): + SeasonNumber(-1) + + def test_too_high_raises(self): + with pytest.raises(ValidationError): + SeasonNumber(101) + + def test_non_integer_raises(self): + with pytest.raises((ValidationError, TypeError)): + SeasonNumber("1") # type: ignore + + def test_str_and_int(self): + s = SeasonNumber(5) + assert str(s) == "5" + assert int(s) == 5 + + +# --------------------------------------------------------------------------- +# EpisodeNumber +# --------------------------------------------------------------------------- + +class TestEpisodeNumber: + + def test_valid_episode(self): + e = EpisodeNumber(1) + assert e.value == 1 + + def test_zero_raises(self): + with pytest.raises(ValidationError): + EpisodeNumber(0) + + def test_negative_raises(self): + with pytest.raises(ValidationError): + EpisodeNumber(-5) + + def test_too_high_raises(self): + with pytest.raises(ValidationError): + EpisodeNumber(1001) + + def test_str_and_int(self): + e = EpisodeNumber(12) + assert str(e) == "12" + assert int(e) == 12 + + +# --------------------------------------------------------------------------- +# TVShow entity +# --------------------------------------------------------------------------- + +class TestTVShow: + + def _make(self, imdb_id="tt0903747", title="Breaking Bad", seasons=5, status="ended"): + return TVShow(imdb_id=imdb_id, title=title, seasons_count=seasons, status=status) + + def test_basic_creation(self): + show = self._make() + assert show.title == "Breaking Bad" + assert show.seasons_count == 5 + + def test_coerces_string_imdb_id(self): + show = self._make() + from alfred.domain.shared.value_objects import ImdbId + assert isinstance(show.imdb_id, ImdbId) + + def test_coerces_string_status(self): + show = self._make(status="ongoing") + assert show.status == ShowStatus.ONGOING + + def test_is_ongoing(self): + show = self._make(status="ongoing") + assert show.is_ongoing() + assert not show.is_ended() + + def test_is_ended(self): + show = self._make(status="ended") + assert show.is_ended() + assert not show.is_ongoing() + + def test_negative_seasons_raises(self): + with pytest.raises(ValueError): + TVShow(imdb_id="tt0903747", title="X", seasons_count=-1, status="ended") + + def test_invalid_imdb_id_type_raises(self): + with pytest.raises(ValueError): + TVShow(imdb_id=12345, title="X", seasons_count=1, status="ended") # type: ignore + + def test_get_folder_name_replaces_spaces(self): + show = self._make(title="Breaking Bad") + assert show.get_folder_name() == "Breaking.Bad" + + def test_get_folder_name_strips_special_chars(self): + show = self._make(title="It's Always Sunny") + name = show.get_folder_name() + assert "'" not in name + + def test_str_repr(self): + show = self._make() + assert "Breaking Bad" in str(show) + assert "tt0903747" in repr(show) + + +# --------------------------------------------------------------------------- +# Season entity +# --------------------------------------------------------------------------- + +class TestSeason: + + def test_basic_creation(self): + s = Season(show_imdb_id="tt0903747", season_number=1, episode_count=7) + assert s.episode_count == 7 + + def test_get_folder_name_normal(self): + s = Season(show_imdb_id="tt0903747", season_number=2, episode_count=13) + assert s.get_folder_name() == "Season 02" + + def test_get_folder_name_specials(self): + s = Season(show_imdb_id="tt0903747", season_number=0, episode_count=3) + assert s.get_folder_name() == "Specials" + assert s.is_special() + + def test_negative_episode_count_raises(self): + with pytest.raises(ValueError): + Season(show_imdb_id="tt0903747", season_number=1, episode_count=-1) + + def test_str(self): + s = Season(show_imdb_id="tt0903747", season_number=1, episode_count=7, name="Pilot Season") + assert "Pilot Season" in str(s) + + +# --------------------------------------------------------------------------- +# Episode entity +# --------------------------------------------------------------------------- + +class TestEpisode: + + def test_basic_creation(self): + e = Episode( + show_imdb_id="tt0903747", + season_number=1, + episode_number=1, + title="Pilot", + ) + assert e.title == "Pilot" + + def test_get_filename_format(self): + e = Episode( + show_imdb_id="tt0903747", + season_number=1, + episode_number=5, + title="Gray Matter", + ) + filename = e.get_filename() + assert filename.startswith("S01E05") + assert "Gray.Matter" in filename + + def test_has_file_false_when_no_path(self): + e = Episode( + show_imdb_id="tt0903747", + season_number=1, + episode_number=1, + title="Pilot", + ) + assert not e.has_file() + assert not e.is_downloaded() + + def test_str_format(self): + e = Episode( + show_imdb_id="tt0903747", + season_number=2, + episode_number=3, + title="Bit by a Dead Bee", + ) + s = str(e) + assert "S02E03" in s + assert "Bit by a Dead Bee" in s diff --git a/tests/infrastructure/__init__.py b/tests/infrastructure/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/infrastructure/conftest.py b/tests/infrastructure/conftest.py new file mode 100644 index 0000000..aa4f551 --- /dev/null +++ b/tests/infrastructure/conftest.py @@ -0,0 +1,43 @@ +"""Fixtures for infrastructure-layer tests.""" + +import shutil +import tempfile +from pathlib import Path + +import pytest + +from alfred.infrastructure.persistence import Memory, set_memory + + +@pytest.fixture +def infra_temp(tmp_path): + """Real folder layout: downloads (with files), movies, tv_shows, torrents.""" + dl = tmp_path / "downloads" + dl.mkdir() + (dl / "test_movie.mkv").write_bytes(b"fake video") + series_dir = dl / "test_series" + series_dir.mkdir() + (series_dir / "episode1.mkv").write_bytes(b"fake episode") + + (tmp_path / "movies").mkdir() + (tmp_path / "tv_shows").mkdir() + (tmp_path / "torrents").mkdir() + return tmp_path + + +@pytest.fixture +def memory_configured(infra_temp): + """Fresh Memory configured with the real workspace/library_paths API.""" + storage = tempfile.mkdtemp() + mem = Memory(storage_dir=storage) + set_memory(mem) + + mem.ltm.workspace.download = str(infra_temp / "downloads") + mem.ltm.workspace.torrent = str(infra_temp / "torrents") + mem.ltm.library_paths.set("movie", str(infra_temp / "movies")) + mem.ltm.library_paths.set("tv_show", str(infra_temp / "tv_shows")) + mem.save() + + yield mem + + shutil.rmtree(storage, ignore_errors=True) diff --git a/tests/infrastructure/test_file_manager.py b/tests/infrastructure/test_file_manager.py new file mode 100644 index 0000000..3951350 --- /dev/null +++ b/tests/infrastructure/test_file_manager.py @@ -0,0 +1,325 @@ +""" +Tests for alfred.infrastructure.filesystem.file_manager.FileManager + +Uses real temp filesystem. No mocks on os.link β€” we test the actual behavior. +""" + +import os +import stat +from pathlib import Path + +import pytest + +from alfred.infrastructure.filesystem.file_manager import FileManager +from alfred.infrastructure.filesystem.exceptions import PathTraversalError + + +@pytest.fixture +def fm(): + return FileManager() + + +# --------------------------------------------------------------------------- +# copy_file (hard-link) +# --------------------------------------------------------------------------- + +class TestCopyFile: + + def test_creates_hard_link(self, fm, tmp_path): + src = tmp_path / "source.mkv" + src.write_bytes(b"video data") + dst = tmp_path / "dest.mkv" + + result = fm.copy_file(str(src), str(dst)) + + assert result["status"] == "ok" + assert dst.exists() + # Same inode = hard link + assert src.stat().st_ino == dst.stat().st_ino + + def test_returns_correct_metadata(self, fm, tmp_path): + src = tmp_path / "movie.mkv" + src.write_bytes(b"x" * 1024) + dst = tmp_path / "movie_copy.mkv" + + result = fm.copy_file(str(src), str(dst)) + + assert result["filename"] == "movie_copy.mkv" + assert result["size"] == 1024 + assert result["source"] == str(src) + assert result["destination"] == str(dst) + + def test_source_not_found(self, fm, tmp_path): + result = fm.copy_file(str(tmp_path / "nope.mkv"), str(tmp_path / "dst.mkv")) + assert result["status"] == "error" + assert result["error"] == "source_not_found" + + def test_source_is_directory(self, fm, tmp_path): + src_dir = tmp_path / "a_dir" + src_dir.mkdir() + result = fm.copy_file(str(src_dir), str(tmp_path / "dst.mkv")) + assert result["error"] == "source_not_file" + + def test_destination_already_exists(self, fm, tmp_path): + src = tmp_path / "src.mkv" + src.write_bytes(b"data") + dst = tmp_path / "dst.mkv" + dst.write_bytes(b"other") + + result = fm.copy_file(str(src), str(dst)) + assert result["error"] == "destination_exists" + + def test_destination_dir_not_found(self, fm, tmp_path): + src = tmp_path / "src.mkv" + src.write_bytes(b"data") + result = fm.copy_file(str(src), str(tmp_path / "nonexistent" / "dst.mkv")) + assert result["error"] == "destination_dir_not_found" + + +# --------------------------------------------------------------------------- +# move_file +# --------------------------------------------------------------------------- + +class TestMoveFile: + + def test_moves_file(self, fm, tmp_path): + src = tmp_path / "episode.mkv" + src.write_bytes(b"video") + dst_dir = tmp_path / "library" + dst_dir.mkdir() + dst = dst_dir / "episode.mkv" + + result = fm.move_file(str(src), str(dst)) + + assert result["status"] == "ok" + assert dst.exists() + assert not src.exists() + + def test_source_deleted_after_move(self, fm, tmp_path): + src = tmp_path / "src.mkv" + src.write_bytes(b"data") + dst = tmp_path / "dst.mkv" + + fm.move_file(str(src), str(dst)) + assert not src.exists() + + def test_move_preserves_content(self, fm, tmp_path): + content = b"important video content" + src = tmp_path / "src.mkv" + src.write_bytes(content) + dst = tmp_path / "dst.mkv" + + fm.move_file(str(src), str(dst)) + assert dst.read_bytes() == content + + def test_move_fails_if_source_missing(self, fm, tmp_path): + result = fm.move_file(str(tmp_path / "ghost.mkv"), str(tmp_path / "dst.mkv")) + assert result["status"] == "error" + + def test_move_fails_if_destination_exists(self, fm, tmp_path): + src = tmp_path / "src.mkv" + src.write_bytes(b"a") + dst = tmp_path / "dst.mkv" + dst.write_bytes(b"b") + + result = fm.move_file(str(src), str(dst)) + assert result["status"] == "error" + # Source should NOT be deleted since the link failed + assert src.exists() + + +# --------------------------------------------------------------------------- +# create_seed_links +# --------------------------------------------------------------------------- + +class TestCreateSeedLinks: + + def _setup(self, tmp_path): + """Create realistic download + library + torrent structure.""" + download = tmp_path / "downloads" / "Oz.S01.1080p.WEBRip.x265-KONTRAST" + download.mkdir(parents=True) + video = download / "Oz.S01E01.1080p.WEBRip.x265-KONTRAST.mp4" + video.write_bytes(b"video content") + (download / "KONTRAST.txt").write_text("release info") + (download / "[TGx]info.txt").write_text("tgx info") + subs = download / "Subs" / "Oz.S01E01.1080p.WEBRip.x265-KONTRAST" + subs.mkdir(parents=True) + (subs / "2_eng.srt").write_text("subtitle content") + + library = tmp_path / "tv" / "Oz.1997.1080p.WEBRip.x265-KONTRAST" / "Oz.S01.1080p.WEBRip.x265-KONTRAST" + library.mkdir(parents=True) + lib_video = library / "Oz.S01E01.1080p.WEBRip.x265-KONTRAST.mp4" + # Hard-link the video to simulate post-move state + os.link(video, lib_video) + video.unlink() # simulate the move + + torrents = tmp_path / "torrents" + torrents.mkdir() + + return lib_video, download, torrents + + def test_creates_torrent_subfolder(self, fm, tmp_path): + lib_video, download, torrents = self._setup(tmp_path) + result = fm.create_seed_links(str(lib_video), str(download), str(torrents)) + + assert result["status"] == "ok" + expected = torrents / "Oz.S01.1080p.WEBRip.x265-KONTRAST" + assert expected.is_dir() + + def test_hard_links_library_video(self, fm, tmp_path): + lib_video, download, torrents = self._setup(tmp_path) + fm.create_seed_links(str(lib_video), str(download), str(torrents)) + + linked = torrents / "Oz.S01.1080p.WEBRip.x265-KONTRAST" / lib_video.name + assert linked.exists() + assert linked.stat().st_ino == lib_video.stat().st_ino + + def test_copies_remaining_files(self, fm, tmp_path): + lib_video, download, torrents = self._setup(tmp_path) + result = fm.create_seed_links(str(lib_video), str(download), str(torrents)) + + assert result["status"] == "ok" + torrent_sub = torrents / "Oz.S01.1080p.WEBRip.x265-KONTRAST" + # txt files should be copied + assert (torrent_sub / "KONTRAST.txt").exists() + assert (torrent_sub / "[TGx]info.txt").exists() + + def test_copies_subs_subdirectory(self, fm, tmp_path): + lib_video, download, torrents = self._setup(tmp_path) + fm.create_seed_links(str(lib_video), str(download), str(torrents)) + + srt = torrents / "Oz.S01.1080p.WEBRip.x265-KONTRAST" / "Subs" / "Oz.S01E01.1080p.WEBRip.x265-KONTRAST" / "2_eng.srt" + assert srt.exists() + + def test_returns_copied_and_skipped(self, fm, tmp_path): + lib_video, download, torrents = self._setup(tmp_path) + result = fm.create_seed_links(str(lib_video), str(download), str(torrents)) + + assert result["copied_count"] >= 3 # txt x2 + srt + assert result["skipped"] == [] + + def test_skips_already_existing_files(self, fm, tmp_path): + lib_video, download, torrents = self._setup(tmp_path) + # First call + fm.create_seed_links(str(lib_video), str(download), str(torrents)) + + # Add a new txt file and call again with a fresh lib_video copy + lib2 = lib_video.parent / "Oz.S01E02.1080p.WEBRip.x265-KONTRAST.mp4" + lib2.write_bytes(b"ep2") + (download / "extra.nfo").write_text("nfo") + result2 = fm.create_seed_links(str(lib2), str(download), str(torrents)) + + assert result2["status"] == "ok" + # The already-copied files should appear in skipped + skipped_names = [Path(s).name for s in result2["skipped"]] + assert "KONTRAST.txt" in skipped_names + + def test_error_library_file_not_found(self, fm, tmp_path): + download = tmp_path / "downloads" / "SomeShow.S01" + download.mkdir(parents=True) + torrents = tmp_path / "torrents" + torrents.mkdir() + + result = fm.create_seed_links( + str(tmp_path / "ghost.mkv"), + str(download), + str(torrents), + ) + assert result["status"] == "error" + assert result["error"] == "library_file_not_found" + + def test_error_source_folder_not_found(self, fm, tmp_path): + lib = tmp_path / "lib.mkv" + lib.write_bytes(b"v") + torrents = tmp_path / "torrents" + torrents.mkdir() + + result = fm.create_seed_links( + str(lib), + str(tmp_path / "ghost_folder"), + str(torrents), + ) + assert result["status"] == "error" + assert result["error"] == "source_folder_not_found" + + def test_error_torrent_folder_not_found(self, fm, tmp_path): + lib = tmp_path / "lib.mkv" + lib.write_bytes(b"v") + download = tmp_path / "dl" + download.mkdir() + + result = fm.create_seed_links( + str(lib), + str(download), + str(tmp_path / "no_torrents"), + ) + assert result["status"] == "error" + assert result["error"] == "torrent_folder_not_found" + + def test_error_link_already_exists(self, fm, tmp_path): + lib_video, download, torrents = self._setup(tmp_path) + fm.create_seed_links(str(lib_video), str(download), str(torrents)) + + # Second call with same video β†’ link_dest already exists + result = fm.create_seed_links(str(lib_video), str(download), str(torrents)) + assert result["status"] == "error" + assert result["error"] == "destination_exists" + + +# --------------------------------------------------------------------------- +# list_folder +# --------------------------------------------------------------------------- + +class TestListFolder: + + def test_lists_entries(self, fm, memory_configured, infra_temp): + result = fm.list_folder("download") + assert result["status"] == "ok" + assert result["count"] > 0 + assert isinstance(result["entries"], list) + + def test_entries_are_sorted(self, fm, memory_configured, infra_temp): + result = fm.list_folder("download") + assert result["entries"] == sorted(result["entries"]) + + def test_folder_not_set(self, fm, memory): + result = fm.list_folder("tv_show") + assert result["status"] == "error" + assert result["error"] == "folder_not_set" + + def test_invalid_folder_type(self, fm, memory): + result = fm.list_folder("nonexistent_type") + assert result["status"] == "error" + + def test_relative_path_within_folder(self, fm, memory_configured, infra_temp): + result = fm.list_folder("download", "test_series") + assert result["status"] == "ok" + + +# --------------------------------------------------------------------------- +# _sanitize_path +# --------------------------------------------------------------------------- + +class TestSanitizePath: + + def test_normal_path(self, fm): + assert fm._sanitize_path("some/path") == "some/path" + + def test_dot_path(self, fm): + assert fm._sanitize_path(".") == "." + + def test_absolute_path_rejected(self, fm): + with pytest.raises(PathTraversalError): + fm._sanitize_path("/etc/passwd") + + def test_parent_traversal_rejected(self, fm): + with pytest.raises(PathTraversalError): + fm._sanitize_path("../../etc/passwd") + + def test_null_byte_rejected(self, fm): + with pytest.raises(PathTraversalError): + fm._sanitize_path("some\x00path") + + def test_normalises_redundant_dots(self, fm): + result = fm._sanitize_path("some/./path") + assert ".." not in result diff --git a/tests/test_agent.py b/tests/test_agent.py index 39d03ed..d503377 100644 --- a/tests/test_agent.py +++ b/tests/test_agent.py @@ -49,7 +49,7 @@ class TestExecuteToolCall: def test_execute_known_tool(self, memory, mock_settings, mock_llm, real_folder): """Should execute known tool.""" agent = Agent(settings=mock_settings, llm=mock_llm) - memory.ltm.set_config("download_folder", str(real_folder["downloads"])) + memory.ltm.download_folder = str(real_folder["downloads"]) tool_call = { "id": "call_123", @@ -145,7 +145,7 @@ class TestStep: self, memory, mock_settings, mock_llm_with_tool_call, real_folder ): """Should execute tool and continue.""" - memory.ltm.set_config("download_folder", str(real_folder["downloads"])) + memory.ltm.download_folder = str(real_folder["downloads"]) agent = Agent(settings=mock_settings, llm=mock_llm_with_tool_call) @@ -229,8 +229,8 @@ class TestAgentIntegration: def test_multiple_tool_calls(self, memory, mock_settings, mock_llm, real_folder): """Should handle multiple tool calls in sequence.""" - memory.ltm.set_config("download_folder", str(real_folder["downloads"])) - memory.ltm.set_config("movie_folder", str(real_folder["movies"])) + memory.ltm.download_folder = str(real_folder["downloads"]) + memory.ltm.movie_folder = str(real_folder["movies"]) call_count = [0] diff --git a/tests/test_agent_edge_cases.py b/tests/test_agent_edge_cases.py index 61af156..9f59240 100644 --- a/tests/test_agent_edge_cases.py +++ b/tests/test_agent_edge_cases.py @@ -55,7 +55,7 @@ class TestExecuteToolCallEdgeCases: def test_tool_with_extra_args(self, memory, mock_llm, real_folder): """Should handle extra arguments gracefully.""" agent = Agent(settings=settings, llm=mock_llm) - memory.ltm.set_config("download_folder", str(real_folder["downloads"])) + memory.ltm.download_folder = str(real_folder["downloads"]) tool_call = { "id": "call_123", @@ -244,7 +244,7 @@ class TestAgentConcurrencyEdgeCases: def test_tool_modifies_memory_during_step(self, memory, mock_llm, real_folder): """Should handle memory modifications during step.""" - memory.ltm.set_config("download_folder", str(real_folder["downloads"])) + memory.ltm.download_folder = str(real_folder["downloads"]) call_count = [0] @@ -272,7 +272,7 @@ class TestAgentConcurrencyEdgeCases: agent.step("Set movie folder") mem = get_memory() - assert mem.ltm.get_config("movie_folder") == str(real_folder["movies"]) + assert mem.ltm.movie_folder == str(real_folder["movies"]) class TestAgentErrorRecovery: diff --git a/tests/test_api_edge_cases.py b/tests/test_api_edge_cases.py index 7910909..d4be194 100644 --- a/tests/test_api_edge_cases.py +++ b/tests/test_api_edge_cases.py @@ -337,7 +337,7 @@ class TestChatCompletionsEdgeCases: from alfred.infrastructure.persistence import get_memory mem = get_memory() - mem.ltm.set_config("download_folder", str(real_folder["downloads"])) + mem.ltm.download_folder = str(real_folder["downloads"]) call_count = [0] @@ -453,7 +453,7 @@ class TestMemoryEndpointsEdgeCases: mock_llm.return_value = Mock() from alfred.app import app - memory.ltm.set_config("japanese", "ζ—₯本θͺžγƒ†γ‚Ήγƒˆ") + memory.ltm.download_folder = "/path/ζ—₯本θͺžγƒ†γ‚Ήγƒˆ" memory.stm.add_message("user", "🎬 Movie request") client = TestClient(app) @@ -501,7 +501,7 @@ class TestMemoryEndpointsEdgeCases: mock_llm.return_value = Mock() from alfred.app import app - memory.ltm.set_config("important", "data") + memory.ltm.download_folder = "/important/data" memory.stm.add_message("user", "Hello") client = TestClient(app) @@ -510,7 +510,7 @@ class TestMemoryEndpointsEdgeCases: response = client.get("/memory/state") data = response.json() - assert data["ltm"]["config"]["important"] == "data" + assert data["ltm"]["download_folder"] == "/important/data" assert data["stm"]["conversation_history"] == [] diff --git a/tests/test_memory.py b/tests/test_memory.py index 2ecca6e..1bd451e 100644 --- a/tests/test_memory.py +++ b/tests/test_memory.py @@ -13,7 +13,7 @@ from alfred.infrastructure.persistence import ( has_memory, init_memory, ) -from alfred.infrastructure.persistence.context import _memory_ctx +from alfred.infrastructure.persistence.context import reset_memory def is_iso_format(s: str) -> bool: @@ -33,10 +33,8 @@ class TestLongTermMemory: def test_default_values(self): ltm = LongTermMemory() - assert ltm.config == {} - assert ltm.preferences["preferred_quality"] == "1080p" - assert "en" in ltm.preferences["preferred_languages"] - assert ltm.library == {"movies": [], "tv_shows": []} + assert ltm.media_preferences.quality == "1080p" + assert "en" in ltm.media_preferences.audio_languages assert ltm.following == [] def test_set_and_get_config(self): @@ -124,7 +122,7 @@ class TestLongTermMemory: } ltm = LongTermMemory.from_dict(data) assert ltm.get_config("download_folder") == "/downloads" - assert ltm.preferences["preferred_quality"] == "4K" + assert ltm.media_preferences.quality == "4K" assert len(ltm.library["movies"]) == 1 @@ -230,12 +228,12 @@ class TestMemoryContext: """Tests for memory context functions.""" def test_get_memory_not_initialized(self): - _memory_ctx.set(None) + reset_memory() with pytest.raises(RuntimeError, match="Memory not initialized"): get_memory() def test_init_memory(self, temp_dir): - _memory_ctx.set(None) + reset_memory() memory = init_memory(str(temp_dir)) assert has_memory() assert get_memory() is memory diff --git a/tests/test_prompts.py b/tests/test_prompts.py index 8878392..0d72d9f 100644 --- a/tests/test_prompts.py +++ b/tests/test_prompts.py @@ -38,7 +38,7 @@ class TestPromptBuilder: def test_includes_config(self, memory): """Should include current configuration.""" - memory.ltm.set_config("download_folder", "/path/to/downloads") + memory.ltm.download_folder = "/path/to/downloads" tools = make_tools(settings) builder = PromptBuilder(tools) diff --git a/tests/test_prompts_critical.py b/tests/test_prompts_critical.py index c031016..763c2db 100644 --- a/tests/test_prompts_critical.py +++ b/tests/test_prompts_critical.py @@ -117,7 +117,7 @@ class TestPromptBuilderMemoryContext: tools = make_tools(settings) builder = PromptBuilder(tools) - memory.ltm.set_config("download_folder", "/test/downloads") + memory.ltm.download_folder = "/test/downloads" prompt = builder.build_system_prompt() @@ -212,12 +212,12 @@ class TestPromptBuilderStructure: tools = make_tools(settings) builder = PromptBuilder(tools) - memory.ltm.set_config("test_key", "test_value") + memory.ltm.download_folder = "/test/downloads" context = builder._format_config_context(memory) assert "CONFIGURATION" in context - assert "test_key" in context + assert "download_folder" in context class TestPromptBuilderEdgeCases: diff --git a/tests/test_prompts_edge_cases.py b/tests/test_prompts_edge_cases.py index fe86b86..ebf94ca 100644 --- a/tests/test_prompts_edge_cases.py +++ b/tests/test_prompts_edge_cases.py @@ -20,8 +20,8 @@ class TestPromptBuilderEdgeCases: def test_prompt_with_unicode_config(self, memory): """Should handle unicode in config.""" - memory.ltm.set_config("folder_ζ—₯本θͺž", "/path/to/ζ—₯本θͺž") - memory.ltm.set_config("emoji_folder", "/path/🎬") + memory.ltm.download_folder = "/path/to/ζ—₯本θͺž" + memory.ltm.tvshow_folder = "/path/🎬" tools = make_tools(settings) builder = PromptBuilder(tools) @@ -34,7 +34,7 @@ class TestPromptBuilderEdgeCases: def test_prompt_with_very_long_config_value(self, memory): """Should handle very long config values.""" long_path = "/very/long/path/" + "x" * 1000 - memory.ltm.set_config("download_folder", long_path) + memory.ltm.download_folder = long_path tools = make_tools(settings) builder = PromptBuilder(tools) @@ -46,7 +46,7 @@ class TestPromptBuilderEdgeCases: def test_prompt_with_special_chars_in_config(self, memory): """Should escape special characters in config.""" - memory.ltm.set_config("path", '/path/with "quotes" and \\backslash') + memory.ltm.download_folder = '/path/with "quotes" and \\backslash' tools = make_tools(settings) builder = PromptBuilder(tools) @@ -198,7 +198,7 @@ class TestPromptBuilderEdgeCases: def test_prompt_with_all_sections(self, memory): """Should include all sections when all data present.""" # Config - memory.ltm.set_config("download_folder", "/downloads") + memory.ltm.download_folder = "/downloads" # Search results memory.episodic.store_search_results("test", [{"name": "Result"}]) @@ -242,7 +242,7 @@ class TestPromptBuilderEdgeCases: def test_prompt_json_serializable(self, memory): """Should produce JSON-serializable content.""" - memory.ltm.set_config("key", {"nested": [1, 2, 3]}) + memory.ltm.download_folder = "/some/path" memory.stm.set_entity("complex", {"a": {"b": {"c": "d"}}}) tools = make_tools(settings) diff --git a/tests/test_registry_edge_cases.py b/tests/test_registry_edge_cases.py index a629de6..1b6e142 100644 --- a/tests/test_registry_edge_cases.py +++ b/tests/test_registry_edge_cases.py @@ -258,7 +258,7 @@ class TestToolExecution: def test_tool_returns_dict(self, memory, real_folder): """Should return dict from tool execution.""" tools = make_tools(settings) - memory.ltm.set_config("download_folder", str(real_folder["downloads"])) + memory.ltm.download_folder = str(real_folder["downloads"]) result = tools["list_folder"].func(folder_type="download") @@ -267,7 +267,7 @@ class TestToolExecution: def test_tool_returns_status(self, memory, real_folder): """Should return status in result.""" tools = make_tools(settings) - memory.ltm.set_config("download_folder", str(real_folder["downloads"])) + memory.ltm.download_folder = str(real_folder["downloads"]) result = tools["list_folder"].func(folder_type="download") @@ -295,7 +295,7 @@ class TestToolExecution: def test_tool_handles_extra_args(self, memory, real_folder): """Should handle extra arguments.""" tools = make_tools(settings) - memory.ltm.set_config("download_folder", str(real_folder["downloads"])) + memory.ltm.download_folder = str(real_folder["downloads"]) # Extra args should raise TypeError with pytest.raises(TypeError): diff --git a/tests/test_tools_edge_cases.py b/tests/test_tools_edge_cases.py index 6ccc1e4..6a0ef85 100644 --- a/tests/test_tools_edge_cases.py +++ b/tests/test_tools_edge_cases.py @@ -271,7 +271,7 @@ class TestFilesystemEdgeCases: """Should list hidden files.""" hidden_file = real_folder["downloads"] / ".hidden" hidden_file.touch() - memory.ltm.set_config("download_folder", str(real_folder["downloads"])) + memory.ltm.download_folder = str(real_folder["downloads"]) result = fs_tools.list_folder("download") @@ -285,7 +285,7 @@ class TestFilesystemEdgeCases: except OSError: pytest.skip("Cannot create symlinks") - memory.ltm.set_config("download_folder", str(real_folder["downloads"])) + memory.ltm.download_folder = str(real_folder["downloads"]) result = fs_tools.list_folder("download") @@ -301,7 +301,7 @@ class TestFilesystemEdgeCases: try: os.chmod(no_read, 0o000) - memory.ltm.set_config("download_folder", str(real_folder["downloads"])) + memory.ltm.download_folder = str(real_folder["downloads"]) result = fs_tools.list_folder("download") @@ -312,7 +312,7 @@ class TestFilesystemEdgeCases: def test_list_folder_case_sensitivity(self, memory, real_folder): """Should handle case sensitivity correctly.""" - memory.ltm.set_config("download_folder", str(real_folder["downloads"])) + memory.ltm.download_folder = str(real_folder["downloads"]) # Try with different cases result_lower = fs_tools.list_folder("download") @@ -324,7 +324,7 @@ class TestFilesystemEdgeCases: """Should handle spaces in path.""" space_dir = real_folder["downloads"] / "folder with spaces" space_dir.mkdir() - memory.ltm.set_config("download_folder", str(real_folder["downloads"])) + memory.ltm.download_folder = str(real_folder["downloads"]) result = fs_tools.list_folder("download", "folder with spaces") @@ -332,7 +332,7 @@ class TestFilesystemEdgeCases: def test_path_traversal_with_encoded_chars(self, memory, real_folder): """Should block URL-encoded traversal attempts.""" - memory.ltm.set_config("download_folder", str(real_folder["downloads"])) + memory.ltm.download_folder = str(real_folder["downloads"]) # Various encoding attempts attempts = [ @@ -352,7 +352,7 @@ class TestFilesystemEdgeCases: def test_path_with_null_byte(self, memory, real_folder): """Should block null byte injection.""" - memory.ltm.set_config("download_folder", str(real_folder["downloads"])) + memory.ltm.download_folder = str(real_folder["downloads"]) result = fs_tools.list_folder("download", "file\x00.txt") @@ -366,7 +366,7 @@ class TestFilesystemEdgeCases: deep_path = deep_path / f"level{i}" deep_path.mkdir(parents=True) - memory.ltm.set_config("download_folder", str(real_folder["downloads"])) + memory.ltm.download_folder = str(real_folder["downloads"]) # Navigate to deep path relative_path = "/".join([f"level{i}" for i in range(20)]) @@ -380,7 +380,7 @@ class TestFilesystemEdgeCases: for i in range(1000): (real_folder["downloads"] / f"file_{i:04d}.txt").touch() - memory.ltm.set_config("download_folder", str(real_folder["downloads"])) + memory.ltm.download_folder = str(real_folder["downloads"]) result = fs_tools.list_folder("download") diff --git a/tests/test_tools_filesystem.py b/tests/test_tools_filesystem.py index 8245594..7ac4a1e 100644 --- a/tests/test_tools_filesystem.py +++ b/tests/test_tools_filesystem.py @@ -24,7 +24,7 @@ class TestSetPathForFolder: fs_tools.set_path_for_folder("download", str(real_folder["downloads"])) mem = get_memory() - assert mem.ltm.get_config("download_folder") == str(real_folder["downloads"]) + assert mem.ltm.download_folder == str(real_folder["downloads"]) def test_all_folder_types(self, memory, real_folder): """Should accept all valid folder types.""" @@ -73,7 +73,7 @@ class TestListFolder: def test_success(self, memory, real_folder): """Should list folder contents.""" - memory.ltm.set_config("download_folder", str(real_folder["downloads"])) + memory.ltm.download_folder = str(real_folder["downloads"]) result = fs_tools.list_folder("download") @@ -84,7 +84,7 @@ class TestListFolder: def test_subfolder(self, memory, real_folder): """Should list subfolder contents.""" - memory.ltm.set_config("download_folder", str(real_folder["downloads"])) + memory.ltm.download_folder = str(real_folder["downloads"]) result = fs_tools.list_folder("download", "test_series") @@ -105,7 +105,7 @@ class TestListFolder: def test_path_traversal_dotdot(self, memory, real_folder): """Should block path traversal with ..""" - memory.ltm.set_config("download_folder", str(real_folder["downloads"])) + memory.ltm.download_folder = str(real_folder["downloads"]) result = fs_tools.list_folder("download", "../") @@ -113,7 +113,7 @@ class TestListFolder: def test_path_traversal_absolute(self, memory, real_folder): """Should block absolute paths.""" - memory.ltm.set_config("download_folder", str(real_folder["downloads"])) + memory.ltm.download_folder = str(real_folder["downloads"]) result = fs_tools.list_folder("download", "/etc/passwd") @@ -121,7 +121,7 @@ class TestListFolder: def test_path_traversal_encoded(self, memory, real_folder): """Should block encoded traversal attempts.""" - memory.ltm.set_config("download_folder", str(real_folder["downloads"])) + memory.ltm.download_folder = str(real_folder["downloads"]) result = fs_tools.list_folder("download", "..%2F..%2Fetc") @@ -130,7 +130,7 @@ class TestListFolder: def test_path_not_exists(self, memory, real_folder): """Should return error for non-existent path.""" - memory.ltm.set_config("download_folder", str(real_folder["downloads"])) + memory.ltm.download_folder = str(real_folder["downloads"]) result = fs_tools.list_folder("download", "nonexistent_folder") @@ -138,7 +138,7 @@ class TestListFolder: def test_path_is_file(self, memory, real_folder): """Should return error if path is a file.""" - memory.ltm.set_config("download_folder", str(real_folder["downloads"])) + memory.ltm.download_folder = str(real_folder["downloads"]) result = fs_tools.list_folder("download", "test_movie.mkv") @@ -148,7 +148,7 @@ class TestListFolder: """Should handle empty folder.""" empty_dir = real_folder["downloads"] / "empty" empty_dir.mkdir() - memory.ltm.set_config("download_folder", str(real_folder["downloads"])) + memory.ltm.download_folder = str(real_folder["downloads"]) result = fs_tools.list_folder("download", "empty") @@ -161,7 +161,7 @@ class TestListFolder: # Create files with different names (real_folder["downloads"] / "zebra.txt").touch() (real_folder["downloads"] / "alpha.txt").touch() - memory.ltm.set_config("download_folder", str(real_folder["downloads"])) + memory.ltm.download_folder = str(real_folder["downloads"]) result = fs_tools.list_folder("download") @@ -176,7 +176,7 @@ class TestFileManagerSecurity: def test_null_byte_injection(self, memory, real_folder): """Should block null byte injection.""" - memory.ltm.set_config("download_folder", str(real_folder["downloads"])) + memory.ltm.download_folder = str(real_folder["downloads"]) result = fs_tools.list_folder("download", "test\x00.txt") @@ -184,7 +184,7 @@ class TestFileManagerSecurity: def test_path_outside_root(self, memory, real_folder): """Should block paths that escape root.""" - memory.ltm.set_config("download_folder", str(real_folder["downloads"])) + memory.ltm.download_folder = str(real_folder["downloads"]) # Try to access parent directory result = fs_tools.list_folder("download", "test_series/../../") @@ -200,7 +200,7 @@ class TestFileManagerSecurity: except OSError: pytest.skip("Cannot create symlinks") - memory.ltm.set_config("download_folder", str(real_folder["downloads"])) + memory.ltm.download_folder = str(real_folder["downloads"]) result = fs_tools.list_folder("download", "escape_link") @@ -212,7 +212,7 @@ class TestFileManagerSecurity: """Should handle special characters in path.""" special_dir = real_folder["downloads"] / "special !@#$%" special_dir.mkdir() - memory.ltm.set_config("download_folder", str(real_folder["downloads"])) + memory.ltm.download_folder = str(real_folder["downloads"]) result = fs_tools.list_folder("download", "special !@#$%") @@ -222,7 +222,7 @@ class TestFileManagerSecurity: """Should handle unicode in path.""" unicode_dir = real_folder["downloads"] / "ζ—₯本θͺžγƒ•ォルダ" unicode_dir.mkdir() - memory.ltm.set_config("download_folder", str(real_folder["downloads"])) + memory.ltm.download_folder = str(real_folder["downloads"]) result = fs_tools.list_folder("download", "ζ—₯本θͺžγƒ•ォルダ") @@ -230,7 +230,7 @@ class TestFileManagerSecurity: def test_very_long_path(self, memory, real_folder): """Should handle very long paths gracefully.""" - memory.ltm.set_config("download_folder", str(real_folder["downloads"])) + memory.ltm.download_folder = str(real_folder["downloads"]) long_path = "a" * 1000 diff --git a/tests/test_tools_language.py b/tests/test_tools_language.py new file mode 100644 index 0000000..9b2b4fa --- /dev/null +++ b/tests/test_tools_language.py @@ -0,0 +1,41 @@ +"""Tests for language tools.""" + +import pytest + +from alfred.agent.tools.language import set_language + + +class TestSetLanguage: + + def test_success_returns_ok(self, memory): + result = set_language("fr") + assert result["status"] == "ok" + assert result["language"] == "fr" + + def test_message_contains_language(self, memory): + result = set_language("en") + assert "en" in result["message"] + + def test_persists_to_memory(self, memory): + set_language("es") + # Verify it's stored in STM + from alfred.infrastructure.persistence import get_memory + mem = get_memory() + assert mem.stm.language == "es" + + def test_various_language_codes(self, memory): + for lang in ["en", "fr", "es", "de", "it", "pt"]: + result = set_language(lang) + assert result["status"] == "ok" + assert result["language"] == lang + + def test_error_on_memory_failure(self, monkeypatch): + from alfred.agent.tools import language as lang_module + + def broken_get_memory(): + raise RuntimeError("memory unavailable") + + monkeypatch.setattr(lang_module, "get_memory", broken_get_memory) + result = set_language("fr") + assert result["status"] == "error" + assert "error" in result diff --git a/tests/workflows/__init__.py b/tests/workflows/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/workflows/test_workflow_loader.py b/tests/workflows/test_workflow_loader.py new file mode 100644 index 0000000..9b23752 --- /dev/null +++ b/tests/workflows/test_workflow_loader.py @@ -0,0 +1,168 @@ +""" +Tests for alfred.agent.workflows.loader.WorkflowLoader +""" + +import pytest +import yaml +from pathlib import Path + +from alfred.agent.workflows.loader import WorkflowLoader + + +# --------------------------------------------------------------------------- +# Fixtures +# --------------------------------------------------------------------------- + +@pytest.fixture +def workflows_dir(tmp_path): + """A temp directory pre-populated with one valid workflow YAML.""" + wf = { + "name": "test_workflow", + "description": "A test workflow", + "tools": ["list_folder", "move_media"], + "steps": [ + {"id": "step1", "tool": "list_folder"}, + ], + } + (tmp_path / "test_workflow.yaml").write_text(yaml.dump(wf)) + return tmp_path + + +@pytest.fixture +def loader_from_dir(workflows_dir, monkeypatch): + """WorkflowLoader pointed at our temp dir.""" + import alfred.agent.workflows.loader as loader_module + monkeypatch.setattr(loader_module, "_WORKFLOWS_DIR", workflows_dir) + return WorkflowLoader() + + +# --------------------------------------------------------------------------- +# Real loader (loads actual YAML files from the repo) +# --------------------------------------------------------------------------- + +class TestRealWorkflows: + + def test_organize_media_loaded(self): + loader = WorkflowLoader() + assert "organize_media" in loader.names() + + def test_organize_media_has_required_keys(self): + loader = WorkflowLoader() + wf = loader.get("organize_media") + assert "name" in wf + assert "steps" in wf + assert "tools" in wf + + def test_organize_media_tools_list(self): + loader = WorkflowLoader() + wf = loader.get("organize_media") + tools = wf["tools"] + assert "list_folder" in tools + assert "move_media" in tools + assert "manage_subtitles" in tools + assert "create_seed_links" in tools + assert "resolve_destination" in tools + + def test_organize_media_steps_order(self): + loader = WorkflowLoader() + wf = loader.get("organize_media") + step_ids = [s["id"] for s in wf["steps"]] + # resolve_destination must come before move_file + assert step_ids.index("resolve_destination") < step_ids.index("move_file") + # move_file before handle_subtitles + assert step_ids.index("move_file") < step_ids.index("handle_subtitles") + # ask_seeding before create_seed_links + assert step_ids.index("ask_seeding") < step_ids.index("create_seed_links") + + def test_ask_seeding_has_yes_no_answers(self): + loader = WorkflowLoader() + wf = loader.get("organize_media") + ask_step = next(s for s in wf["steps"] if s["id"] == "ask_seeding") + answers = ask_step["ask_user"]["answers"] + # PyYAML parses yes/no as booleans β€” we normalise to str in runtime + answer_keys = {str(k) for k in answers.keys()} + assert "yes" in answer_keys + assert "no" in answer_keys + + def test_naming_convention_present(self): + loader = WorkflowLoader() + wf = loader.get("organize_media") + assert "naming_convention" in wf + assert "tv_show" in wf["naming_convention"] + assert "movie" in wf["naming_convention"] + + +# --------------------------------------------------------------------------- +# WorkflowLoader mechanics (via monkeypatched dir) +# --------------------------------------------------------------------------- + +class TestLoaderMechanics: + + def test_get_returns_workflow(self, loader_from_dir): + wf = loader_from_dir.get("test_workflow") + assert wf is not None + assert wf["name"] == "test_workflow" + + def test_get_returns_none_for_unknown(self, loader_from_dir): + assert loader_from_dir.get("nonexistent") is None + + def test_names_returns_list(self, loader_from_dir): + names = loader_from_dir.names() + assert isinstance(names, list) + assert "test_workflow" in names + + def test_all_returns_dict(self, loader_from_dir): + all_wf = loader_from_dir.all() + assert isinstance(all_wf, dict) + assert "test_workflow" in all_wf + + def test_uses_yaml_name_field(self, tmp_path, monkeypatch): + """name from YAML content takes priority over filename stem.""" + import alfred.agent.workflows.loader as loader_module + monkeypatch.setattr(loader_module, "_WORKFLOWS_DIR", tmp_path) + + wf = {"name": "my_custom_name", "steps": []} + (tmp_path / "completely_different_filename.yaml").write_text(yaml.dump(wf)) + + loader = WorkflowLoader() + assert "my_custom_name" in loader.names() + assert "completely_different_filename" not in loader.names() + + def test_falls_back_to_stem_when_no_name(self, tmp_path, monkeypatch): + import alfred.agent.workflows.loader as loader_module + monkeypatch.setattr(loader_module, "_WORKFLOWS_DIR", tmp_path) + + (tmp_path / "my_workflow.yaml").write_text(yaml.dump({"steps": []})) + loader = WorkflowLoader() + assert "my_workflow" in loader.names() + + def test_skips_malformed_yaml(self, tmp_path, monkeypatch): + import alfred.agent.workflows.loader as loader_module + monkeypatch.setattr(loader_module, "_WORKFLOWS_DIR", tmp_path) + + (tmp_path / "valid.yaml").write_text(yaml.dump({"name": "valid", "steps": []})) + (tmp_path / "broken.yaml").write_text("key: [unclosed bracket") + + loader = WorkflowLoader() + assert "valid" in loader.names() + assert "broken" not in loader.names() + + def test_deterministic_load_order(self, tmp_path, monkeypatch): + """Files loaded in sorted order β€” later file wins on name collision.""" + import alfred.agent.workflows.loader as loader_module + monkeypatch.setattr(loader_module, "_WORKFLOWS_DIR", tmp_path) + + (tmp_path / "a_workflow.yaml").write_text(yaml.dump({"name": "duplicate", "version": 1})) + (tmp_path / "b_workflow.yaml").write_text(yaml.dump({"name": "duplicate", "version": 2})) + + loader = WorkflowLoader() + # b_workflow loaded last β†’ version 2 wins + assert loader.get("duplicate")["version"] == 2 + + def test_empty_directory(self, tmp_path, monkeypatch): + import alfred.agent.workflows.loader as loader_module + monkeypatch.setattr(loader_module, "_WORKFLOWS_DIR", tmp_path) + + loader = WorkflowLoader() + assert loader.names() == [] + assert loader.all() == {}