refactor: tech debt mini-pass (items 5, 6, 7, 20)

Low-risk cleanup items, no functional change to the parser. The philosophy remains: keep the parser simple, the AI handles edge cases. - Extract duplicated 'fs-safe title → dot-folder-name' regex into to_dot_folder_name() in domain/shared/value_objects.py. Used by both MovieTitle.normalized() and TVShow.get_folder_name() (item #5). - ParsedRelease.languages now uses field(default_factory=list) instead of a manual __post_init__ assigning [] via object.__setattr__ (#6). - tv_shows/entities.py module docstring: prepend ASCII ownership tree for quicker visual scan of the aggregate hierarchy (#7). - file_extensions.yaml: split subtitle sidecars (.srt/.sub/.idx/.ass/.ssa) into a dedicated 'subtitle:' category instead of lumping them under 'metadata:'. _METADATA_EXTENSIONS at the value_objects.py level remains the union of both — detect_media_type behavior unchanged. New loader load_subtitle_extensions() exposes the distinct subtitle set for future callers in the subtitles domain (#20). Suite: 1020 passed, 8 skipped.
test(fixtures): seed PATH OF PAIN bucket with 10 worst-case fixtures
2026-05-18 16:24:28 +02:00 · 2026-05-18 15:57:56 +02:00 · 2026-05-18 15:51:11 +02:00 · 2026-05-18 15:48:41 +02:00 · 2026-05-18 15:39:04 +02:00 · 2026-05-18 15:36:19 +02:00
277 changed files with 32027 additions and 8820 deletions
@@ -0,0 +1,80 @@
 # --- IMPORTANT ---
 # Settings are split across multiple files for clarity.
 # Files (loaded in this order, last wins):
 #   .env.alfred   — app config and service addresses (safe to commit)
 #   .env.secrets  — generated secrets, passwords, URIs and API keys (DO NOT COMMIT)
 #   .env.make     — build metadata synced from pyproject.toml (safe to commit)
 #
 # To customize: edit .env.alfred for config, .env.secrets for secrets.
 # --- Alfred ---
 MAX_HISTORY_MESSAGES=10
 MAX_TOOL_ITERATIONS=10
 REQUEST_TIMEOUT=30
 # LLM Settings
 LLM_TEMPERATURE=0.2
 # Persistence
 DATA_STORAGE_DIR=data
 # Network
 HOST=0.0.0.0
 PORT=3080
 # --- DATABASES ---
 # Passwords and connection URIs are auto-generated in .env.secrets.
 # Edit host/port/user/dbname here if needed.
 # MongoDB (Application Data)
 MONGO_HOST=mongodb
 MONGO_PORT=27017
 MONGO_USER=alfred
 MONGO_DB_NAME=alfred
 # PostgreSQL (Vector Database / RAG)
 POSTGRES_HOST=vectordb
 POSTGRES_PORT=5432
 POSTGRES_USER=alfred
 POSTGRES_DB_NAME=alfred
 # --- EXTERNAL SERVICES ---
 # TMDB — Media metadata (required). Get your key at https://www.themoviedb.org/
 # → TMDB_API_KEY goes in .env.secrets
 TMDB_BASE_URL=https://api.themoviedb.org/3
 # qBittorrent
 # → QBITTORRENT_PASSWORD goes in .env.secrets
 QBITTORRENT_URL=https://qb.lan.anustart.top
 QBITTORRENT_USERNAME=letmein
 QBITTORRENT_PORT=16140
 # Path translation: host-side prefix → container-side prefix
 QBITTORRENT_HOST_PATH=/mnt/testipool
 QBITTORRENT_CONTAINER_PATH=/mnt/data
 # Meilisearch
 # → MEILI_MASTER_KEY goes in .env.secrets
 # MEILI_ENABLED=false # KEY DOESN'T EXISTS => SEARCH IS THE PROPER KEY
 SEARCH=false
 MEILI_NO_ANALYTICS=true
 MEILI_HOST=http://meilisearch:7700
 # --- LLM CONFIGURATION ---
 # Providers: local, openai, anthropic, deepseek, google, kimi
 # → API keys go in .env.secrets
 DEFAULT_LLM_PROVIDER=deepseek
 # Local LLM (Ollama)
 #OLLAMA_BASE_URL=http://ollama:11434
 #OLLAMA_MODEL=llama3.3:latest
 OLLAMA_BASE_URL=http://10.0.0.11:11434
 OLLAMA_MODEL=glm-4.7-flash:latest
 # --- RAG ENGINE ---
 RAG_ENABLED=TRUE
 RAG_API_URL=http://rag_api:8000
 RAG_API_PORT=8000
 EMBEDDINGS_PROVIDER=ollama
 EMBEDDINGS_MODEL=nomic-embed-text
@@ -1,3 +1,13 @@
 # --- IMPORTANT ---
 # Settings are split across multiple files for clarity.
 # Files (loaded in this order, last wins):
 #   .env.alfred   — app config and service addresses (safe to commit)
 #   .env.secrets  — generated secrets, passwords, URIs and API keys (DO NOT COMMIT)
 #   .env.make     — build metadata synced from pyproject.toml (safe to commit)
 #
 # To customize: edit .env.alfred for config, .env.secrets for secrets.
 # --- Alfred ---
 MAX_HISTORY_MESSAGES=10
 MAX_TOOL_ITERATIONS=10
 REQUEST_TIMEOUT=30
@@ -8,84 +18,54 @@ LLM_TEMPERATURE=0.2
 # Persistence
 DATA_STORAGE_DIR=data
-# Network configuration
+# Network
 HOST=0.0.0.0
 PORT=3080
-# Build informations (Synced with pyproject.toml via bootstrap)
+# --- DATABASES ---
-ALFRED_VERSION=
+# Passwords and connection URIs are auto-generated in .env.secrets.
-IMAGE_NAME=
+# Edit host/port/user/dbname here if needed.
 LIBRECHAT_VERSION=
 PYTHON_VERSION=
 PYTHON_VERSION_SHORT=
 RAG_VERSION=
 RUNNER=
 SERVICE_NAME=
 # --- SECURITY KEYS (CRITICAL) ---
 # These are used for session tokens and encrypting sensitive data in MongoDB.
 # If you lose these, you lose access to encrypted stored credentials.
 JWT_SECRET=
 JWT_REFRESH_SECRET=
 CREDS_KEY=
 CREDS_IV=
 # --- DATABASES (AUTO-SECURED) ---
 # Alfred uses MongoDB for application state and PostgreSQL for Vector RAG.
 # Passwords will be generated as 24-character secure tokens if left blank.
 # MongoDB (Application Data)
 MONGO_URI=
 MONGO_HOST=mongodb
 MONGO_PORT=27017
 MONGO_USER=alfred
-MONGO_PASSWORD=
+MONGO_DB_NAME=LibreChat
 MONGO_DB_NAME=alfred
 # PostgreSQL (Vector Database / RAG)
 POSTGRES_URI=
 POSTGRES_HOST=vectordb
 POSTGRES_PORT=5432
 POSTGRES_USER=alfred
 POSTGRES_PASSWORD=
 POSTGRES_DB_NAME=alfred
 # --- EXTERNAL SERVICES ---
-# Media Metadata (Required)
+
-# Get your key at https://www.themoviedb.org/
+# TMDB — Media metadata (required). Get your key at https://www.themoviedb.org/
-TMDB_API_KEY=
+# → TMDB_API_KEY goes in .env.secrets
 TMDB_BASE_URL=https://api.themoviedb.org/3
-# qBittorrent integration
+# qBittorrent
 # → QBITTORRENT_PASSWORD goes in .env.secrets
 QBITTORRENT_URL=http://qbittorrent:16140
 QBITTORRENT_USERNAME=admin
 QBITTORRENT_PASSWORD=
 QBITTORRENT_PORT=16140
 # Meilisearch
 # → MEILI_MASTER_KEY goes in .env.secrets
 MEILI_ENABLED=FALSE
 MEILI_NO_ANALYTICS=TRUE
 MEILI_HOST=http://meilisearch:7700
 MEILI_MASTER_KEY=
 # --- LLM CONFIGURATION ---
-# Providers: 'local', 'openai', 'anthropic', 'deepseek', 'google', 'kimi'
+# Providers: local, openai, anthropic, deepseek, google, kimi
 # → API keys go in .env.secrets
 DEFAULT_LLM_PROVIDER=local
 # Local LLM (Ollama)
 OLLAMA_BASE_URL=http://ollama:11434
 OLLAMA_MODEL=llama3.3:latest
 # --- API KEYS (OPTIONAL) ---
 # Fill only the ones you intend to use.
 ANTHROPIC_API_KEY=
 DEEPSEEK_API_KEY=
 GOOGLE_API_KEY=
 KIMI_API_KEY=
 OPENAI_API_KEY=
 # --- RAG ENGINE ---
 # Enable/Disable the Retrieval Augmented Generation system
 RAG_ENABLED=TRUE
 RAG_API_URL=http://rag_api:8000
 RAG_API_PORT=8000
@@ -0,0 +1,878 @@
 #=====================================================================#
 #                       LibreChat Configuration                       #
 #=====================================================================#
 # Please refer to the reference documentation for assistance          #
 # with configuring your LibreChat environment.                        #
 #                                                                     #
 # https://www.librechat.ai/docs/configuration/dotenv                  #
 #=====================================================================#
 #==================================================#
 #               Server Configuration               #
 #==================================================#
 HOST=localhost
 PORT=3080
 MONGO_URI=mongodb://127.0.0.1:27017/LibreChat
 #The maximum number of connections in the connection pool. */
 MONGO_MAX_POOL_SIZE=
 #The minimum number of connections in the connection pool. */
 MONGO_MIN_POOL_SIZE=
 #The maximum number of connections that may be in the process of being established concurrently by the connection pool. */
 MONGO_MAX_CONNECTING=
 #The maximum number of milliseconds that a connection can remain idle in the pool before being removed and closed. */
 MONGO_MAX_IDLE_TIME_MS=
 #The maximum time in milliseconds that a thread can wait for a connection to become available. */
 MONGO_WAIT_QUEUE_TIMEOUT_MS=
 # Set to false to disable automatic index creation for all models associated with this connection. */
 MONGO_AUTO_INDEX=
 # Set to `false` to disable Mongoose automatically calling `createCollection()` on every model created on this connection. */
 MONGO_AUTO_CREATE=
 DOMAIN_CLIENT=http://localhost:3080
 DOMAIN_SERVER=http://localhost:3080
 NO_INDEX=true
 # Use the address that is at most n number of hops away from the Express application.
 # req.socket.remoteAddress is the first hop, and the rest are looked for in the X-Forwarded-For header from right to left.
 # A value of 0 means that the first untrusted address would be req.socket.remoteAddress, i.e. there is no reverse proxy.
 # Defaulted to 1.
 TRUST_PROXY=1
 # Minimum password length for user authentication
 # Default: 8
 # Note: When using LDAP authentication, you may want to set this to 1 
 # to bypass local password validation, as LDAP servers handle their own
 # password policies.
 # MIN_PASSWORD_LENGTH=8
 # When enabled, the app will continue running after encountering uncaught exceptions
 # instead of exiting the process. Not recommended for production unless necessary.
 # CONTINUE_ON_UNCAUGHT_EXCEPTION=false
 #===============#
 # JSON Logging  #
 #===============#
 # Use when process console logs in cloud deployment like GCP/AWS
 CONSOLE_JSON=false
 #===============#
 # Debug Logging #
 #===============#
 DEBUG_LOGGING=true
 DEBUG_CONSOLE=false
 # Set to true to enable agent debug logging
 AGENT_DEBUG_LOGGING=false
 # Enable memory diagnostics (logs heap/RSS snapshots every 60s, auto-enabled with --inspect)
 # MEM_DIAG=true
 #=============#
 # Permissions #
 #=============#
 # UID=1000
 # GID=1000
 #==============#
 # Node Options #
 #==============#
 # NOTE: NODE_MAX_OLD_SPACE_SIZE is NOT recognized by Node.js directly.
 # This variable is used as a build argument for Docker or CI/CD workflows,
 # and is NOT used by Node.js to set the heap size at runtime.
 # To configure Node.js memory, use NODE_OPTIONS, e.g.:
 # NODE_OPTIONS="--max-old-space-size=6144"
 # See: https://nodejs.org/api/cli.html#--max-old-space-sizesize-in-mib
 NODE_MAX_OLD_SPACE_SIZE=6144
 #===============#
 # Configuration #
 #===============#
 # Use an absolute path, a relative path, or a URL
 # CONFIG_PATH="/alternative/path/to/librechat.yaml"
 #==================#
 # Langfuse Tracing #
 #==================#
 # Get Langfuse API keys for your project from the project settings page: https://cloud.langfuse.com
 # LANGFUSE_PUBLIC_KEY=
 # LANGFUSE_SECRET_KEY=
 # LANGFUSE_BASE_URL=
 #===================================================#
 #                     Endpoints                     #
 #===================================================#
 # ENDPOINTS=openAI,assistants,azureOpenAI,google,anthropic
 PROXY=
 #===================================#
 # Known Endpoints - librechat.yaml  #
 #===================================#
 # https://www.librechat.ai/docs/configuration/librechat_yaml/ai_endpoints
 # ANYSCALE_API_KEY=
 # APIPIE_API_KEY=
 # COHERE_API_KEY=
 # DEEPSEEK_API_KEY=
 # DATABRICKS_API_KEY=
 # FIREWORKS_API_KEY=
 # GROQ_API_KEY=
 # HUGGINGFACE_TOKEN=
 # MISTRAL_API_KEY=
 # OPENROUTER_KEY=
 # PERPLEXITY_API_KEY=
 # SHUTTLEAI_API_KEY=
 # TOGETHERAI_API_KEY=
 # UNIFY_API_KEY=
 # XAI_API_KEY=
 #============#
 # Anthropic  #
 #============#
 ANTHROPIC_API_KEY=user_provided
 # ANTHROPIC_MODELS=claude-sonnet-4-6,claude-opus-4-6,claude-opus-4-20250514,claude-sonnet-4-20250514,claude-3-7-sonnet-20250219,claude-3-5-sonnet-20241022,claude-3-5-haiku-20241022,claude-3-opus-20240229,claude-3-sonnet-20240229,claude-3-haiku-20240307
 # ANTHROPIC_REVERSE_PROXY=
 # Set to true to use Anthropic models through Google Vertex AI instead of direct API
 # ANTHROPIC_USE_VERTEX=
 # ANTHROPIC_VERTEX_REGION=us-east5
 #============#
 # Azure      #
 #============#
 # Note: these variables are DEPRECATED
 # Use the `librechat.yaml` configuration for `azureOpenAI` instead
 # You may also continue to use them if you opt out of using the `librechat.yaml` configuration
 # AZURE_OPENAI_DEFAULT_MODEL=gpt-3.5-turbo # Deprecated
 # AZURE_OPENAI_MODELS=gpt-3.5-turbo,gpt-4 # Deprecated
 # AZURE_USE_MODEL_AS_DEPLOYMENT_NAME=TRUE # Deprecated
 # AZURE_API_KEY= # Deprecated
 # AZURE_OPENAI_API_INSTANCE_NAME= # Deprecated
 # AZURE_OPENAI_API_DEPLOYMENT_NAME= # Deprecated
 # AZURE_OPENAI_API_VERSION= # Deprecated
 # AZURE_OPENAI_API_COMPLETIONS_DEPLOYMENT_NAME= # Deprecated
 # AZURE_OPENAI_API_EMBEDDINGS_DEPLOYMENT_NAME= # Deprecated
 #=================#
 #   AWS Bedrock   #
 #=================#
 # BEDROCK_AWS_DEFAULT_REGION=us-east-1 # A default region must be provided
 # BEDROCK_AWS_ACCESS_KEY_ID=someAccessKey
 # BEDROCK_AWS_SECRET_ACCESS_KEY=someSecretAccessKey
 # BEDROCK_AWS_SESSION_TOKEN=someSessionToken
 # Note: This example list is not meant to be exhaustive. If omitted, all known, supported model IDs will be included for you.
 # BEDROCK_AWS_MODELS=anthropic.claude-sonnet-4-6,anthropic.claude-opus-4-6-v1,anthropic.claude-3-5-sonnet-20240620-v1:0,meta.llama3-1-8b-instruct-v1:0
 # Cross-region inference model IDs: us.anthropic.claude-sonnet-4-6,us.anthropic.claude-opus-4-6-v1,global.anthropic.claude-opus-4-6-v1
 # See all Bedrock model IDs here: https://docs.aws.amazon.com/bedrock/latest/userguide/model-ids.html#model-ids-arns
 # Notes on specific models:
 # The following models are not support due to not supporting streaming:
 # ai21.j2-mid-v1
 # The following models are not support due to not supporting conversation history:
 # ai21.j2-ultra-v1, cohere.command-text-v14, cohere.command-light-text-v14
 #============#
 # Google     #
 #============#
 GOOGLE_KEY=user_provided
 # GOOGLE_REVERSE_PROXY=
 # Some reverse proxies do not support the X-goog-api-key header, uncomment to pass the API key in Authorization header instead.
 # GOOGLE_AUTH_HEADER=true
 # Gemini API (AI Studio)
 # GOOGLE_MODELS=gemini-3.1-pro-preview,gemini-3.1-pro-preview-customtools,gemini-3.1-flash-lite-preview,gemini-2.5-pro,gemini-2.5-flash,gemini-2.5-flash-lite,gemini-2.0-flash,gemini-2.0-flash-lite
 # Vertex AI
 # GOOGLE_MODELS=gemini-3.1-pro-preview,gemini-3.1-pro-preview-customtools,gemini-3.1-flash-lite-preview,gemini-2.5-pro,gemini-2.5-flash,gemini-2.5-flash-lite,gemini-2.0-flash-001,gemini-2.0-flash-lite-001
 # GOOGLE_TITLE_MODEL=gemini-2.0-flash-lite-001
 # Google Cloud region for Vertex AI (used by both chat and image generation)
 # GOOGLE_LOC=us-central1
 # Alternative region env var for Gemini Image Generation
 # GOOGLE_CLOUD_LOCATION=global
 # Vertex AI Service Account Configuration
 # Path to your Google Cloud service account JSON file
 # GOOGLE_SERVICE_KEY_FILE=/path/to/service-account.json
 # Google Safety Settings
 # NOTE: These settings apply to both Vertex AI and Gemini API (AI Studio)
 #
 # For Vertex AI:
 # To use the BLOCK_NONE setting, you need either:
 # (a) Access through an allowlist via your Google account team, or
 # (b) Switch to monthly invoiced billing: https://cloud.google.com/billing/docs/how-to/invoiced-billing
 #
 # For Gemini API (AI Studio):
 # BLOCK_NONE is available by default, no special account requirements.
 #
 # Available options: BLOCK_NONE, BLOCK_ONLY_HIGH, BLOCK_MEDIUM_AND_ABOVE, BLOCK_LOW_AND_ABOVE
 #
 # GOOGLE_SAFETY_SEXUALLY_EXPLICIT=BLOCK_ONLY_HIGH
 # GOOGLE_SAFETY_HATE_SPEECH=BLOCK_ONLY_HIGH
 # GOOGLE_SAFETY_HARASSMENT=BLOCK_ONLY_HIGH
 # GOOGLE_SAFETY_DANGEROUS_CONTENT=BLOCK_ONLY_HIGH
 # GOOGLE_SAFETY_CIVIC_INTEGRITY=BLOCK_ONLY_HIGH
 #========================#
 # Gemini Image Generation #
 #========================#
 # Gemini Image Generation Tool (for Agents)
 # Supports multiple authentication methods in priority order:
 # 1. User-provided API key (via GUI)
 # 2. GEMINI_API_KEY env var (admin-configured)
 # 3. GOOGLE_KEY env var (shared with Google chat endpoint)
 # 4. Vertex AI service account (via GOOGLE_SERVICE_KEY_FILE)
 # Option A: Use dedicated Gemini API key for image generation
 # GEMINI_API_KEY=your-gemini-api-key
 # Vertex AI model for image generation (defaults to gemini-2.5-flash-image)
 # GEMINI_IMAGE_MODEL=gemini-2.5-flash-image
 #============#
 # OpenAI     #
 #============#
 OPENAI_API_KEY=user_provided
 # OPENAI_MODELS=gpt-5,gpt-5-codex,gpt-5-mini,gpt-5-nano,o3-pro,o3,o4-mini,gpt-4.1,gpt-4.1-mini,gpt-4.1-nano,o3-mini,o1-pro,o1,gpt-4o,gpt-4o-mini
 DEBUG_OPENAI=false
 # TITLE_CONVO=false
 # OPENAI_TITLE_MODEL=gpt-4o-mini
 # OPENAI_SUMMARIZE=true
 # OPENAI_SUMMARY_MODEL=gpt-4o-mini
 # OPENAI_FORCE_PROMPT=true
 # OPENAI_REVERSE_PROXY=
 # OPENAI_ORGANIZATION=
 #====================#
 #   Assistants API   #
 #====================#
 ASSISTANTS_API_KEY=user_provided
 # ASSISTANTS_BASE_URL=
 # ASSISTANTS_MODELS=gpt-4o,gpt-4o-mini,gpt-3.5-turbo-0125,gpt-3.5-turbo-16k-0613,gpt-3.5-turbo-16k,gpt-3.5-turbo,gpt-4,gpt-4-0314,gpt-4-32k-0314,gpt-4-0613,gpt-3.5-turbo-0613,gpt-3.5-turbo-1106,gpt-4-0125-preview,gpt-4-turbo-preview,gpt-4-1106-preview
 #==========================#
 #   Azure Assistants API   #
 #==========================#
 # Note: You should map your credentials with custom variables according to your Azure OpenAI Configuration
 # The models for Azure Assistants are also determined by your Azure OpenAI configuration.
 # More info, including how to enable use of Assistants with Azure here:
 # https://www.librechat.ai/docs/configuration/librechat_yaml/ai_endpoints/azure#using-assistants-with-azure
 CREDS_KEY=f34be427ebb29de8d88c107a71546019685ed8b241d8f2ed00c3df97ad2566f0
 CREDS_IV=e2341419ec3dd3d19b13a1a87fafcbfb
 # Azure AI Search
 #-----------------
 AZURE_AI_SEARCH_SERVICE_ENDPOINT=
 AZURE_AI_SEARCH_INDEX_NAME=
 AZURE_AI_SEARCH_API_KEY=
 AZURE_AI_SEARCH_API_VERSION=
 AZURE_AI_SEARCH_SEARCH_OPTION_QUERY_TYPE=
 AZURE_AI_SEARCH_SEARCH_OPTION_TOP=
 AZURE_AI_SEARCH_SEARCH_OPTION_SELECT=
 # OpenAI Image Tools Customization
 #----------------
 # IMAGE_GEN_OAI_API_KEY= # Create or reuse OpenAI API key for image generation tool
 # IMAGE_GEN_OAI_BASEURL= # Custom OpenAI base URL for image generation tool
 # IMAGE_GEN_OAI_AZURE_API_VERSION= # Custom Azure OpenAI deployments
 # IMAGE_GEN_OAI_MODEL=gpt-image-1 # OpenAI image model (e.g., gpt-image-1, gpt-image-1.5)
 # IMAGE_GEN_OAI_DESCRIPTION=
 # IMAGE_GEN_OAI_DESCRIPTION_WITH_FILES=Custom description for image generation tool when files are present
 # IMAGE_GEN_OAI_DESCRIPTION_NO_FILES=Custom description for image generation tool when no files are present
 # IMAGE_EDIT_OAI_DESCRIPTION=Custom description for image editing tool
 # IMAGE_GEN_OAI_PROMPT_DESCRIPTION=Custom prompt description for image generation tool
 # IMAGE_EDIT_OAI_PROMPT_DESCRIPTION=Custom prompt description for image editing tool
 # DALL·E
 #----------------
 # DALLE_API_KEY=
 # DALLE3_API_KEY=
 # DALLE2_API_KEY=
 # DALLE3_SYSTEM_PROMPT=
 # DALLE2_SYSTEM_PROMPT=
 # DALLE_REVERSE_PROXY=
 # DALLE3_BASEURL=
 # DALLE2_BASEURL=
 # DALL·E (via Azure OpenAI)
 # Note: requires some of the variables above to be set
 #----------------
 # DALLE3_AZURE_API_VERSION=
 # DALLE2_AZURE_API_VERSION=
 # Flux
 #-----------------
 FLUX_API_BASE_URL=https://api.us1.bfl.ai
 # FLUX_API_BASE_URL = 'https://api.bfl.ml';
 # Get your API key at https://api.us1.bfl.ai/auth/profile
 # FLUX_API_KEY=
 # Google
 #-----------------
 GOOGLE_SEARCH_API_KEY=
 GOOGLE_CSE_ID=
 # Stable Diffusion
 #-----------------
 SD_WEBUI_URL=http://host.docker.internal:7860
 # Tavily
 #-----------------
 TAVILY_API_KEY=
 # Traversaal
 #-----------------
 TRAVERSAAL_API_KEY=
 # WolframAlpha
 #-----------------
 WOLFRAM_APP_ID=
 # Zapier
 #-----------------
 ZAPIER_NLA_API_KEY=
 #==================================================#
 #                      Search                      #
 #==================================================#
 SEARCH=true
 MEILI_NO_ANALYTICS=true
 MEILI_HOST=http://0.0.0.0:7700
 MEILI_MASTER_KEY=DrhYf7zENyR6AlUCKmnz0eYASOQdl6zxH7s7MKFSfFCt
 # Optional: Disable indexing, useful in a multi-node setup
 # where only one instance should perform an index sync.
 # MEILI_NO_SYNC=true
 #==================================================#
 #          Speech to Text & Text to Speech         #
 #==================================================#
 STT_API_KEY=
 TTS_API_KEY=
 #==================================================#
 #                        RAG                       #
 #==================================================#
 # More info: https://www.librechat.ai/docs/configuration/rag_api
 # RAG_OPENAI_BASEURL=
 # RAG_OPENAI_API_KEY=
 # RAG_USE_FULL_CONTEXT=
 # EMBEDDINGS_PROVIDER=openai
 # EMBEDDINGS_MODEL=text-embedding-3-small
 #===================================================#
 #                    User System                    #
 #===================================================#
 #========================#
 # Moderation             #
 #========================#
 OPENAI_MODERATION=false
 OPENAI_MODERATION_API_KEY=
 # OPENAI_MODERATION_REVERSE_PROXY=
 BAN_VIOLATIONS=true
 BAN_DURATION=1000 * 60 * 60 * 2
 BAN_INTERVAL=20
 LOGIN_VIOLATION_SCORE=1
 REGISTRATION_VIOLATION_SCORE=1
 CONCURRENT_VIOLATION_SCORE=1
 MESSAGE_VIOLATION_SCORE=1
 NON_BROWSER_VIOLATION_SCORE=20
 TTS_VIOLATION_SCORE=0
 STT_VIOLATION_SCORE=0
 FORK_VIOLATION_SCORE=0
 IMPORT_VIOLATION_SCORE=0
 FILE_UPLOAD_VIOLATION_SCORE=0
 LOGIN_MAX=7
 LOGIN_WINDOW=5
 REGISTER_MAX=5
 REGISTER_WINDOW=60
 LIMIT_CONCURRENT_MESSAGES=true
 CONCURRENT_MESSAGE_MAX=2
 LIMIT_MESSAGE_IP=true
 MESSAGE_IP_MAX=40
 MESSAGE_IP_WINDOW=1
 LIMIT_MESSAGE_USER=false
 MESSAGE_USER_MAX=40
 MESSAGE_USER_WINDOW=1
 ILLEGAL_MODEL_REQ_SCORE=5
 #========================#
 # Balance                #
 #========================#
 # CHECK_BALANCE=false
 # START_BALANCE=20000 # note: the number of tokens that will be credited after registration.
 #========================#
 # Registration and Login #
 #========================#
 ALLOW_EMAIL_LOGIN=true
 ALLOW_REGISTRATION=true
 ALLOW_SOCIAL_LOGIN=false
 ALLOW_SOCIAL_REGISTRATION=false
 ALLOW_PASSWORD_RESET=false
 # ALLOW_ACCOUNT_DELETION=true # note: enabled by default if omitted/commented out
 ALLOW_UNVERIFIED_EMAIL_LOGIN=true
 SESSION_EXPIRY=1000 * 60 * 15
 REFRESH_TOKEN_EXPIRY=(1000 * 60 * 60 * 24) * 7
 JWT_SECRET=16f8c0ef4a5d391b26034086c628469d3f9f497f08163ab9b40137092f2909ef
 JWT_REFRESH_SECRET=eaa5191f2914e30b9387fd84e254e4ba6fc51b4654968a9b0803b456a54b8418
 # Discord
 DISCORD_CLIENT_ID=
 DISCORD_CLIENT_SECRET=
 DISCORD_CALLBACK_URL=/oauth/discord/callback
 # Facebook
 FACEBOOK_CLIENT_ID=
 FACEBOOK_CLIENT_SECRET=
 FACEBOOK_CALLBACK_URL=/oauth/facebook/callback
 # GitHub
 GITHUB_CLIENT_ID=
 GITHUB_CLIENT_SECRET=
 GITHUB_CALLBACK_URL=/oauth/github/callback
 # GitHub Enterprise
 # GITHUB_ENTERPRISE_BASE_URL=
 # GITHUB_ENTERPRISE_USER_AGENT=
 # Google
 GOOGLE_CLIENT_ID=
 GOOGLE_CLIENT_SECRET=
 GOOGLE_CALLBACK_URL=/oauth/google/callback
 # Apple
 APPLE_CLIENT_ID=
 APPLE_TEAM_ID=
 APPLE_KEY_ID=
 APPLE_PRIVATE_KEY_PATH=
 APPLE_CALLBACK_URL=/oauth/apple/callback
 # OpenID
 OPENID_CLIENT_ID=
 OPENID_CLIENT_SECRET=
 OPENID_ISSUER=
 OPENID_SESSION_SECRET=
 OPENID_SCOPE="openid profile email"
 OPENID_CALLBACK_URL=/oauth/openid/callback
 OPENID_REQUIRED_ROLE=
 OPENID_REQUIRED_ROLE_TOKEN_KIND=
 OPENID_REQUIRED_ROLE_PARAMETER_PATH=
 OPENID_ADMIN_ROLE=
 OPENID_ADMIN_ROLE_PARAMETER_PATH=
 OPENID_ADMIN_ROLE_TOKEN_KIND=
 # Set to determine which user info property returned from OpenID Provider to store as the User's username
 OPENID_USERNAME_CLAIM=
 # Set to determine which user info property returned from OpenID Provider to store as the User's name
 OPENID_NAME_CLAIM=
 # Set to determine which user info claim to use as the email/identifier for user matching (e.g., "upn" for Entra ID)
 # When not set, defaults to: email -> preferred_username -> upn
 OPENID_EMAIL_CLAIM=
 # Optional audience parameter for OpenID authorization requests
 OPENID_AUDIENCE=
 OPENID_BUTTON_LABEL=
 OPENID_IMAGE_URL=
 # Set to true to automatically redirect to the OpenID provider when a user visits the login page
 # This will bypass the login form completely for users, only use this if OpenID is your only authentication method
 OPENID_AUTO_REDIRECT=false
 # Set to true to use PKCE (Proof Key for Code Exchange) for OpenID authentication
 OPENID_USE_PKCE=false
 #Set to true to reuse openid tokens for authentication management instead of using the mongodb session and the custom refresh token.
 OPENID_REUSE_TOKENS=
 #By default, signing key verification results are cached in order to prevent excessive HTTP requests to the JWKS endpoint.
 #If a signing key matching the kid is found, this will be cached and the next time this kid is requested the signing key will be served from the cache.
 #Default is true.
 OPENID_JWKS_URL_CACHE_ENABLED=
 OPENID_JWKS_URL_CACHE_TIME= # 600000 ms eq to 10 minutes leave empty to disable caching
 #Set to true to trigger token exchange flow to acquire access token for the userinfo endpoint.
 OPENID_ON_BEHALF_FLOW_FOR_USERINFO_REQUIRED=
 OPENID_ON_BEHALF_FLOW_USERINFO_SCOPE="user.read" # example for Scope Needed for Microsoft Graph API
 # Set to true to use the OpenID Connect end session endpoint for logout
 OPENID_USE_END_SESSION_ENDPOINT=
 # URL to redirect to after OpenID logout (defaults to ${DOMAIN_CLIENT}/login)
 OPENID_POST_LOGOUT_REDIRECT_URI=
 # Maximum logout URL length before using logout_hint instead of id_token_hint (default: 2000)
 OPENID_MAX_LOGOUT_URL_LENGTH=
 #========================#
 # SharePoint Integration #
 #========================#
 # Requires Entra ID (OpenID) authentication to be configured
 # Enable SharePoint file picker in chat and agent panels
 # ENABLE_SHAREPOINT_FILEPICKER=true
 # SharePoint tenant base URL (e.g., https://yourtenant.sharepoint.com)
 # SHAREPOINT_BASE_URL=https://yourtenant.sharepoint.com
 # Microsoft Graph API And SharePoint scopes for file picker
 # SHAREPOINT_PICKER_SHAREPOINT_SCOPE==https://yourtenant.sharepoint.com/AllSites.Read
 # SHAREPOINT_PICKER_GRAPH_SCOPE=Files.Read.All
 #========================#
 # SAML
 # Note: If OpenID is enabled, SAML authentication will be automatically disabled.
 SAML_ENTRY_POINT=
 SAML_ISSUER=
 SAML_CERT=
 SAML_CALLBACK_URL=/oauth/saml/callback
 SAML_SESSION_SECRET=
 # Attribute mappings (optional)
 SAML_EMAIL_CLAIM=
 SAML_USERNAME_CLAIM=
 SAML_GIVEN_NAME_CLAIM=
 SAML_FAMILY_NAME_CLAIM=
 SAML_PICTURE_CLAIM=
 SAML_NAME_CLAIM=
 # Logint buttion settings (optional)
 SAML_BUTTON_LABEL=
 SAML_IMAGE_URL=
 # Whether the SAML Response should be signed.
 # - If "true", the entire `SAML Response` will be signed.
 # - If "false" or unset, only the `SAML Assertion` will be signed (default behavior).
 # SAML_USE_AUTHN_RESPONSE_SIGNED=
 #===============================================#
 # Microsoft Graph API / Entra ID Integration  #
 #===============================================#
 # Enable Entra ID people search integration in permissions/sharing system
 # When enabled, the people picker will search both local database and Entra ID
 USE_ENTRA_ID_FOR_PEOPLE_SEARCH=false
 # When enabled, entra id groups owners will be considered as members of the group
 ENTRA_ID_INCLUDE_OWNERS_AS_MEMBERS=false
 # Microsoft Graph API scopes needed for people/group search
 # Default scopes provide access to user profiles and group memberships
 OPENID_GRAPH_SCOPES=User.Read,People.Read,GroupMember.Read.All
 # LDAP
 LDAP_URL=
 LDAP_BIND_DN=
 LDAP_BIND_CREDENTIALS=
 LDAP_USER_SEARCH_BASE=
 #LDAP_SEARCH_FILTER="mail="
 LDAP_CA_CERT_PATH=
 # LDAP_TLS_REJECT_UNAUTHORIZED=
 # LDAP_STARTTLS=
 # LDAP_LOGIN_USES_USERNAME=true
 # LDAP_ID=
 # LDAP_USERNAME=
 # LDAP_EMAIL=
 # LDAP_FULL_NAME=
 #========================#
 # Email Password Reset   #
 #========================#
 EMAIL_SERVICE=
 EMAIL_HOST=
 EMAIL_PORT=25
 EMAIL_ENCRYPTION=
 EMAIL_ENCRYPTION_HOSTNAME=
 EMAIL_ALLOW_SELFSIGNED=
 # Leave both empty for SMTP servers that do not require authentication
 EMAIL_USERNAME=
 EMAIL_PASSWORD=
 EMAIL_FROM_NAME=
 EMAIL_FROM=noreply@librechat.ai
 #========================#
 #      Mailgun API       #
 #========================#
 # MAILGUN_API_KEY=your-mailgun-api-key
 # MAILGUN_DOMAIN=mg.yourdomain.com
 # EMAIL_FROM=noreply@yourdomain.com
 # EMAIL_FROM_NAME="LibreChat"
 # # Optional: For EU region
 # MAILGUN_HOST=https://api.eu.mailgun.net
 #========================#
 # Firebase CDN           #
 #========================#
 FIREBASE_API_KEY=
 FIREBASE_AUTH_DOMAIN=
 FIREBASE_PROJECT_ID=
 FIREBASE_STORAGE_BUCKET=
 FIREBASE_MESSAGING_SENDER_ID=
 FIREBASE_APP_ID=
 #========================#
 # S3 AWS Bucket          #
 #========================#
 AWS_ENDPOINT_URL=
 AWS_ACCESS_KEY_ID=
 AWS_SECRET_ACCESS_KEY=
 AWS_REGION=
 AWS_BUCKET_NAME=
 # Required for path-style S3-compatible providers (MinIO, Hetzner, Backblaze B2, etc.)
 # that don't support virtual-hosted-style URLs (bucket.endpoint). Not needed for AWS S3.
 # AWS_FORCE_PATH_STYLE=false
 #========================#
 # Azure Blob Storage     #
 #========================#
 AZURE_STORAGE_CONNECTION_STRING=
 AZURE_STORAGE_PUBLIC_ACCESS=false
 AZURE_CONTAINER_NAME=files
 #========================#
 # Shared Links           #
 #========================#
 ALLOW_SHARED_LINKS=true
 # Allows unauthenticated access to shared links. Defaults to false (auth required) if not set.
 ALLOW_SHARED_LINKS_PUBLIC=false
 #==============================#
 # Static File Cache Control    #
 #==============================#
 # Leave commented out to use defaults: 1 day (86400 seconds) for s-maxage and 2 days (172800 seconds) for max-age
 # NODE_ENV must be set to production for these to take effect
 # STATIC_CACHE_MAX_AGE=172800
 # STATIC_CACHE_S_MAX_AGE=86400
 # If you have another service in front of your LibreChat doing compression, disable express based compression here
 # DISABLE_COMPRESSION=true
 # If you have gzipped version of uploaded image images in the same folder, this will enable gzip scan and serving of these images
 # Note: The images folder will be scanned on startup and a ma kept in memory. Be careful for large number of images.
 # ENABLE_IMAGE_OUTPUT_GZIP_SCAN=true
 #===================================================#
 #                        UI                         #
 #===================================================#
 APP_TITLE=LibreChat
 # CUSTOM_FOOTER="My custom footer"
 HELP_AND_FAQ_URL=https://librechat.ai
 # SHOW_BIRTHDAY_ICON=true
 # Google tag manager id
 #ANALYTICS_GTM_ID=user provided google tag manager id
 # limit conversation file imports to a certain number of bytes in size to avoid the container
 # maxing out memory limitations by unremarking this line and supplying a file size in bytes
 # such as the below example of 250 mib
 # CONVERSATION_IMPORT_MAX_FILE_SIZE_BYTES=262144000
 #===============#
 # REDIS Options #
 #===============#
 # Enable Redis for caching and session storage
 # USE_REDIS=true
 # Enable Redis for resumable LLM streams (defaults to USE_REDIS value if not set)
 # Set to false to use in-memory storage for streams while keeping Redis for other caches
 # USE_REDIS_STREAMS=true
 # Single Redis instance
 # REDIS_URI=redis://127.0.0.1:6379
 # Redis cluster (multiple nodes)
 # REDIS_URI=redis://127.0.0.1:7001,redis://127.0.0.1:7002,redis://127.0.0.1:7003
 # Redis with TLS/SSL encryption and CA certificate
 # REDIS_URI=rediss://127.0.0.1:6380
 # REDIS_CA=/path/to/ca-cert.pem
 # Elasticache may need to use an alternate dnsLookup for TLS connections.  see "Special Note: Aws Elasticache Clusters with TLS" on this webpage: https://www.npmjs.com/package/ioredis
 # Enable alternative dnsLookup for redis
 # REDIS_USE_ALTERNATIVE_DNS_LOOKUP=true
 # Redis authentication (if required)
 # REDIS_USERNAME=your_redis_username
 # REDIS_PASSWORD=your_redis_password
 # Redis key prefix configuration
 # Use environment variable name for dynamic prefix (recommended for cloud deployments)
 # REDIS_KEY_PREFIX_VAR=K_REVISION
 # Or use static prefix directly
 # REDIS_KEY_PREFIX=librechat
 # Redis connection limits
 # REDIS_MAX_LISTENERS=40
 # Redis ping interval in seconds (0 = disabled, >0 = enabled)
 # When set to a positive integer, Redis clients will ping the server at this interval to keep connections alive
 # When unset or 0, no pinging is performed (recommended for most use cases)
 # REDIS_PING_INTERVAL=300
 # Force specific cache namespaces to use in-memory storage even when Redis is enabled
 # Comma-separated list of CacheKeys
 # Defaults to CONFIG_STORE,APP_CONFIG so YAML-derived config stays per-container (safe for blue/green deployments)
 # Set to empty string to force all namespaces through Redis: FORCED_IN_MEMORY_CACHE_NAMESPACES=
 # FORCED_IN_MEMORY_CACHE_NAMESPACES=CONFIG_STORE,APP_CONFIG
 # Leader Election Configuration (for multi-instance deployments with Redis)
 # Duration in seconds that the leader lease is valid before it expires (default: 25)
 # LEADER_LEASE_DURATION=25
 # Interval in seconds at which the leader renews its lease (default: 10)
 # LEADER_RENEW_INTERVAL=10
 # Maximum number of retry attempts when renewing the lease fails (default: 3)
 # LEADER_RENEW_ATTEMPTS=3
 # Delay in seconds between retry attempts when renewing the lease (default: 0.5)
 # LEADER_RENEW_RETRY_DELAY=0.5
 #==================================================#
 #                      Others                      #
 #==================================================#
 #   You should leave the following commented out   #
 # NODE_ENV=
 # E2E_USER_EMAIL=
 # E2E_USER_PASSWORD=
 #=====================================================#
 #                  Cache Headers                      #
 #=====================================================#
 #   Headers that control caching of the index.html    #
 #   Default configuration prevents caching to ensure  #
 #   users always get the latest version. Customize    #
 #   only if you understand caching implications.      #
 # INDEX_CACHE_CONTROL=no-cache, no-store, must-revalidate
 # INDEX_PRAGMA=no-cache
 # INDEX_EXPIRES=0
 # no-cache: Forces validation with server before using cached version
 # no-store: Prevents storing the response entirely
 # must-revalidate: Prevents using stale content when offline
 #=====================================================#
 #                  OpenWeather                        #
 #=====================================================#
 OPENWEATHER_API_KEY=
 #====================================#
 # LibreChat Code Interpreter API     #
 #====================================#
 # https://code.librechat.ai
 # LIBRECHAT_CODE_API_KEY=your-key
 #======================#
 # Web Search           #
 #======================#
 # Note: All of the following variable names can be customized.
 # Omit values to allow user to provide them.
 # For more information on configuration values, see:
 # https://librechat.ai/docs/features/web_search
 # Search Provider (Required)
 # SERPER_API_KEY=your_serper_api_key
 # Scraper (Required)
 # FIRECRAWL_API_KEY=your_firecrawl_api_key
 # Optional: Custom Firecrawl API URL
 # FIRECRAWL_API_URL=your_firecrawl_api_url
 # Reranker (Required)
 # JINA_API_KEY=your_jina_api_key
 # or
 # COHERE_API_KEY=your_cohere_api_key
 #======================#
 # MCP Configuration    #
 #======================#
 # Treat 401/403 responses as OAuth requirement when no oauth metadata found
 # MCP_OAUTH_ON_AUTH_ERROR=true
 # Timeout for OAuth detection requests in milliseconds
 # MCP_OAUTH_DETECTION_TIMEOUT=5000
 # Cache connection status checks for this many milliseconds to avoid expensive verification
 # MCP_CONNECTION_CHECK_TTL=60000
 # Skip code challenge method validation (e.g., for AWS Cognito that supports S256 but doesn't advertise it)
 # When set to true, forces S256 code challenge even if not advertised in .well-known/openid-configuration
 # MCP_SKIP_CODE_CHALLENGE_CHECK=false
 # Circuit breaker: max connect/disconnect cycles before tripping (per server)
 # MCP_CB_MAX_CYCLES=7
 # Circuit breaker: sliding window (ms) for counting cycles
 # MCP_CB_CYCLE_WINDOW_MS=45000
 # Circuit breaker: cooldown (ms) after the cycle breaker trips
 # MCP_CB_CYCLE_COOLDOWN_MS=15000
 # Circuit breaker: max consecutive failed connection rounds before backoff
 # MCP_CB_MAX_FAILED_ROUNDS=3
 # Circuit breaker: sliding window (ms) for counting failed rounds
 # MCP_CB_FAILED_WINDOW_MS=120000
 # Circuit breaker: base backoff (ms) after failed round threshold is reached
 # MCP_CB_BASE_BACKOFF_MS=30000
 # Circuit breaker: max backoff cap (ms) for exponential backoff
 # MCP_CB_MAX_BACKOFF_MS=300000
@@ -0,0 +1,8 @@
 # Auto-generated from pyproject.toml — do not edit manually
 ALFRED_VERSION=0.1.7
 PYTHON_VERSION=3.14.3
 IMAGE_NAME=alfred_media_organizer
 SERVICE_NAME=alfred
 LIBRECHAT_VERSION=v0.8.4
 RAG_VERSION=v0.7.3
 UV_VERSION=0.11.6
@@ -34,6 +34,9 @@ jobs:
      - name: Checkout code
        uses: actions/checkout@v4
      - name: Generate build variables
        run: python scripts/generate_build_vars.py
      - name: Load config from Makefile
        id: config
        run: make -s _ci-dump-config >> $GITHUB_OUTPUT
@@ -55,13 +55,24 @@ coverage.xml
 Thumbs.db
 # Secrets
-.env
+.env.secrets
 # Backup files
 *.backup
 *.bak
 env_backup/
 # Application data dir
 data/*
 # Application logs
 logs/*
 # Documentation folder
 docs/
 # .md files (project-level Markdown is brol-y; allow-list the ones we track)
 *.md
 !CHANGELOG.md
 #
@@ -0,0 +1,261 @@
 # Changelog
 All notable changes to Alfred are documented here.
 The format is loosely based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).
 Alfred is not yet on SemVer — entries are grouped by **dated work blocks** instead
 of release numbers. Granularity targets behavioral or API-visible changes; refer
 to `git log` for commit-level detail.
 Sections used per block: **Added** / **Changed** / **Deprecated** / **Removed** /
 **Fixed** / **Internal** (for tech-debt and refactor noise that doesn't affect
 callers).
 ---
 ## [Unreleased]
 ### Added
 - **Real-world release fixtures** under `tests/fixtures/releases/{easy,shitty,path_of_pain}/`,
  each documenting an expected `ParsedRelease` plus the future `routing`
  (library / torrents / seed_hardlinks) for the upcoming `organize_media`
  refactor. EASY bucket seeded with 5 cases (movie, single-episode, season
  pack, movie + noise, YTS bracket-heavy). SHITTY bucket seeded with 15
  anti-regression cases covering: 3-level INTEGRALE hierarchy (Angel),
  French custom titles (Buffy, La Nuit au Musée, Chérie j'ai agrandi),
  multi-episode chain `S14E09E10E11` (Archer, captures E11 loss),
  lowercase `s01e01` (Notre Planète), `NxNN` with ` - ` separators
  (Vinyl, captures dash artifact), title-with-year-suffix (Deutschland.83),
  season-range `S01-06` (Tatortreiniger, captures movie misclassification),
  bare folder name (Jurassic Park,
  media_type=unknown), apostrophe-in-name (Honey Don't, captures full AI-path
  degeneration), SUBS-tag movie (Hook), space separators (Predator Badlands,
  captures group=UNKNOWN), subs-only release (Westworld S04).
  PATH OF PAIN bucket seeded with 10 worst-case fixtures covering:
  UTF-8 wide pipe yt-dlp slug (Khruangbin), 3-show franchise box-set
  with double season range and parens-wrapped tech (Deutschland 83-86-89,
  captures `group=S03` misdetection), accented chars in title (Chérie
  BéBé with VFF), 8-word stand-up comedy title (Jimmy Carr), site-tag
  prefix + XviD (OxTorrent), episode title + air-date silently lost
  (Prodiges), full-chaos apostrophe + spaces + Blu-ray dash + 1080i +
  multi-word audio codec (The Prodigy, full AI-path degeneration),
  yt-dlp YouTube ID glued to year (Sleaford Mods), bilingual `[FR-EN]`
  tag mistaken for group (Super Mario Bros), COMPLETE + S01-S07 range +
  REPACK + HEVC (Gilmore Girls, the well-behaved exception).
  Parametrized over `tests/domain/test_release_fixtures.py` for anti-regression.
 - **`NxNN` alt season/episode form supported** by `parse_release`. Releases like
  `Show.1x05.720p.HDTV.x264-GRP` and `Show.2x07x08.1080p.WEB.x265-GRP` (multi-ep
  alt form) now parse as TV shows.
 - **`alfred/knowledge/release/separators.yaml`** declares the token separators
  used by the release-name tokenizer (`.`, ` `, `[`, `]`, `(`, `)`, `_`). New
  conventions can be added without code changes. The canonical `.` is always
  present even if missing from YAML.
 ### Changed
 - **`parse_release` tokenizer is now data-driven**: it splits on any character
  listed in `separators.yaml` (regex character class) instead of `name.split(".")`.
  This makes YTS-style releases (`The Father (2020) [1080p] [WEBRip] [5.1] [YTS.MX]`),
  space-separated names (`Inception 2010 1080p BluRay x264-GROUP`), and
  underscore-separated names parse correctly via the direct path — no more
  fallback through sanitization.
 - **`parse_release` flow simplified**: site-tag extraction always runs first
  (so `parse_path == "sanitized"` now reliably indicates a stripped `[tag]`),
  then well-formedness is checked only against truly forbidden chars
  (anything not in the configured separator set).
 - **ISO 639-2/B is now the canonical language code project-wide** (was a mix of
  639-1 and 639-2/T):
  - `SubtitlePreferences.languages` default is now `["fre", "eng"]` (was
    `["fr", "en"]`). Old LTM files are not auto-migrated — delete
    `data/memory/ltm.json` to regenerate with the new defaults.
  - Subtitle output filenames are now `{iso639_2b}.srt` (e.g. `fre.srt`,
    `fre.sdh.srt`). Existing `fr.srt` files are still **read** correctly
    (recognized as French via alias) but new files are written canonically.
  - `Language` value object docstring corrected: it has always stored 639-2/B
    (matching what ffprobe emits), not 639-2/T as previously documented.
 - **`MovieService.validate_movie_file` minimum size is now configurable** via
  `settings.min_movie_size_bytes` (default unchanged: 100 MB). Constructor
  accepts an optional `min_movie_size_bytes` override for tests.
 - **`SubtitleKnowledgeBase` delegates language lookup to `LanguageRegistry`**
  rather than duplicating tokens. `subtitles.yaml` now only declares
  subtitle-specific tokens (e.g. `vostfr`, `vf`, `vff`) under a new
  `language_tokens` section.
 ### Removed
 - **`alfred/domain/tv_shows/services.py`** and **`alfred/domain/movies/services.py`**
  deleted entirely. They held fossil parsers (`parse_episode_filename`,
  `extract_movie_metadata`, …) with zero production callers — superseded by
  `parse_release` as the single source of truth for release-name parsing.
  Associated tests (`tests/domain/test_movies.py`, `tests/domain/test_tv_shows_service.py`)
  removed as well.
 - `_sanitize` and `_normalize` helpers in `alfred/domain/release/services.py` —
  the new tokenizer makes them redundant.
 - `_LANG_KEYWORDS`, `_SDH_TOKENS`, `_FORCED_TOKENS`, `SUBTITLE_EXTENSIONS`
  hardcoded dicts in `alfred/domain/subtitles/scanner.py` — all knowledge now
  lives in YAML (CLAUDE.md compliance).
 - `_MIN_MOVIE_SIZE_BYTES` module-level constant in
  `alfred/domain/movies/services.py` — replaced by the new setting.
 - Top-level `languages:` block in `subtitles.yaml` — superseded by
  `language_tokens:` (subtitle-specific only) since iso_languages.yaml is the
  canonical source.
 ### Fixed
 - **`hi` token no longer marks a subtitle as SDH** (it conflicted with the
  ISO 639-1 alias for Hindi). SDH is now detected only via `sdh`, `cc`, and
  `hearing` tokens.
 - `SubtitleKnowledgeBase` default rules used `"fra"` while
  `iso_languages.yaml` exposes French as `"fre"` — preferred languages
  defaults now match the canonical form.
 ### Internal
 - **`to_dot_folder_name(title)` helper** in
  `alfred/domain/shared/value_objects.py` — extracts the
  `re.sub(r"[^\w\s\.\-]", "", title).replace(" ", ".")` pattern that was
  duplicated between `MovieTitle.normalized()` and `TVShow.get_folder_name()`.
 - **`ParsedRelease.languages` uses `field(default_factory=list)`** instead of
  a manual `__post_init__` that assigned `[]` via `object.__setattr__`.
 - **`file_extensions.yaml` splits subtitle sidecars (`.srt`, `.sub`, `.idx`,
  `.ass`, `.ssa`) into a dedicated `subtitle:` category** instead of lumping
  them under `metadata:`. The `_METADATA_EXTENSIONS` set used by
  `detect_media_type` remains the union of both (same behavior — subtitles
  are still ignored when deciding the media type of a folder), but a new
  `load_subtitle_extensions()` loader is now available for the subtitles
  domain. Sematic clarity, no functional change.
 - **`tv_shows/entities.py` module docstring** now shows the aggregate
  ownership as an ASCII tree before the rule text — quicker visual scan
  of the DDD structure.
 - Removed backward-compat shims `_sanitise_for_fs` /
  `_strip_episode_from_normalised` from `domain/release/value_objects.py`
  (zero callers).
 - Cleaned ruff warnings across the codebase: `subprocess.run` calls now pass
  explicit `check=False` (PLW1510); lazy imports promoted to module top where
  there was no cycle (PLC0415 in `manage_subtitles.py`, `placer.py`,
  `qbittorrent/client.py`, `file_manager.py`); fixed module-level import
  ordering (E402) in `language_registry.py` and `subtitles/knowledge/loader.py`;
  removed unused locals (F841 / B007); replaced unnecessary set comprehension
  with `set()` in `release/knowledge.py` (C416).
 - Ruff config: ignore `PLR0911` / `PLR0912` (too-many-returns / too-many-branches)
  globally — noisy on parser mappers and orchestrator use-cases where early-return
  validation is essential complexity. Ignore `PLW0603` for the documented memory
  singleton (`infrastructure/persistence/context.py`).
 ---
 ## [2026-05-17] — TVShow & Movie aggregate refactor
 Multi-phase refonte of the TV show domain into a real DDD aggregate, with
 matching parity work on `Movie`, a language knowledge system, and the
 `shared/media` restructure that supports both.
 ### Added
 - **Language knowledge system** (`alfred/knowledge/iso_languages.yaml` + 42
  languages including `und` for undetermined).
  - `Language` value object (frozen dataclass) with `iso`, `english_name`,
    `native_name`, `aliases`, and a `matches(raw)` cross-format helper.
  - `LanguageRegistry` loader (`alfred/domain/shared/knowledge/`) merging
    builtin + learned YAML. Not a singleton — the application layer
    instantiates it.
  - ISO 639-2/B is the canonical key; aliases cover 639-1, 639-2/T, English
    name, native name, and common spellings.
 - **`VideoTrack`** dataclass (`alfred/domain/shared/media/video.py`) with a
  `resolution` property using width-priority bucket detection (handles
  cinema/scope crops like 1920×960 → 1080p).
 - **`shared/media/matching.py`** — `track_lang_matches` helper shared by
  `Episode` and `Movie`. Implements the **"C+" contract** for language helpers:
  - `Language` query → cross-format match via `Language.matches()`
  - `str` query → case-insensitive direct comparison (no normalization)
 - **TVShow aggregate composition**:
  - `TVShow.seasons: dict[SeasonNumber, Season]`
  - `Season.episodes: dict[EpisodeNumber, Episode]`
  - `Season.expected_episodes` / `Season.aired_episodes` (split so collection
    state can compare "owned vs aired today" without confusing in-flight
    seasons with future ones)
 - **Aggregate methods on `TVShow`**:
  - `add_episode(ep)` — sole sanctioned mutation entry point (creates the
    season if missing)
  - `add_season(season)` — replaces a season wholesale
  - `collection_status()` → `CollectionStatus.{EMPTY, PARTIAL, COMPLETE}`
  - `is_complete_series()` — true iff `ENDED + COMPLETE`
  - `missing_episodes()` — flat list of all aired-but-not-owned
    `(season, episode)` pairs
 - **`CollectionStatus`** enum (orthogonal to `ShowStatus`).
 - **Episode track helpers** (`has_audio_in`, `has_subtitles_in`,
  `has_forced_subs`, `audio_languages`, `subtitle_languages`), driven by
  `Episode.audio_tracks` / `Episode.subtitle_tracks`.
 - **Movie aggregate parity** — `Movie` now carries `audio_tracks` /
  `subtitle_tracks` and exposes the same helpers as `Episode` (same C+
  contract).
 - **`CHANGELOG.md`** (this file).
 ### Changed
 - **`shared/media_info.py` exploded into `shared/media/{audio,video,subtitle,info,matching}.py`.**
  `MediaInfo` is now symmetric: every stream type is a `list[Track]`. Flat
  accessors (`width`, `height`, `video_codec`, `resolution`) remain as
  properties that read the first video track.
 - **`MediaInfo.duration_seconds` / `bitrate_kbps`** moved from `VideoTrack` to
  `MediaInfo` (file-level — they come from the ffprobe `format` block, not a
  stream). Files without a video stream now correctly expose duration.
 - **`ShowStatus.from_string`** extended to map TMDB strings (`Returning
  Series`, `In Production`, `Pilot`, `Planned`, `Canceled`, `Cancelled`).
  Comparison is whitespace-trimmed and case-insensitive.
 - **`Season` / `Episode`** dropped their `show_imdb_id` back-references. They
  are owned by `TVShow` and reached only through it.
 - **`TVShow.seasons_count` and `episode_count`** are now `@property` (computed
  from the dict) instead of stored ints.
 - **`TVShowService.parse_episode_from_filename`** rewritten in string
  operations (no regex). Supports `S01E05` / `s1e5` and `1x05` / `01x5` forms.
 - **`TVShowService.find_next_episode`** now drives off
  `show.missing_episodes()` instead of the hardcoded "max 50 episodes per
  season" heuristic.
 - **`TVShowService` constructor** no longer takes `season_repository` /
  `episode_repository` — the aggregate persists in one block via
  `TVShowRepository` only.
 - **`SubtitleTrack` in `alfred.domain.subtitles.entities` renamed to
  `SubtitleCandidate`.** Coexists with the `shared.media.SubtitleTrack`
  ffprobe-view dataclass (different bounded contexts, kept separate
  intentionally).
 - **`tv_shows/services.py` `_VIDEO_EXTENSIONS`** now loaded from
  `knowledge/release/file_extensions.yaml` via `load_video_extensions()`
  (single source of truth).
 - **`CLAUDE.md`** updated with three new policy sections:
  - "Tests" — small updates OK during normal work, no mass-update sprees
  - "Backwards-compatibility shims" — prefer clean migration over shims
  - "Regex" — not forbidden, use judgment when string ops would be fragile
 ### Removed
 - **Legacy `Season N Episode N` filename form** in
  `TVShowService.parse_episode_from_filename`. It never appears in the release
  names Alfred handles, and supporting it forced a regex.
 - **`SeasonRepository` and `EpisodeRepository`** — only the aggregate root has
  a repository (DDD rule: one repo per aggregate).
 - **`shared/media_info.py`** compatibility shim — callers updated.
 - **`SubtitleTrack` compatibility alias** in `subtitles.entities` — callers
  updated to `SubtitleCandidate`.
 ### Fixed
 - **`MediaInfo.duration_seconds` returns `None` on audio-only files** instead
  of crashing through `primary_video.duration_seconds` (see the duration/bitrate
  move under **Changed**).
 - **`MediaOrganizer`** (`infrastructure/filesystem/organizer.py`) no longer
  passes the removed `show_imdb_id` / `episode_count` kwargs when constructing
  a `Season` for folder-name generation.
 ### Internal
 - Test suite rewritten where the aggregate redesign broke fixtures:
  `tests/domain/test_tv_shows.py` (69 tests), `tests/domain/test_media_info.py`
  (rewritten for `VideoTrack`), `tests/application/test_enrich_from_probe.py`
  (helper added), `tests/infrastructure/test_filesystem_extras.py` (fixtures),
  `tests/domain/test_tv_shows_service.py` (find_next_episode driven by real
  aggregate state).
 - Subtitle services internal migration: `matcher.py`, `utils.py`, `placer.py`,
  `identifier.py` updated to import `SubtitleCandidate`.
 - Suite status at end of block: **1066 passed, 8 skipped, 0 failed**.
@@ -2,46 +2,36 @@
 # check=skip=InvalidDefaultArgInFrom
 ARG PYTHON_VERSION
-ARG PYTHON_VERSION_SHORT
+ARG UV_VERSION
-ARG RUNNER
+
 # Stage 0: uv binary (workaround — --from doesn't support ARG expansion)
 FROM ghcr.io/astral-sh/uv:${UV_VERSION} AS uv-bin
 # ===========================================
 # Stage 1: Builder
 # ===========================================
 FROM python:${PYTHON_VERSION}-slim-bookworm AS builder
 # Re-declare ARGs after FROM to make them available in this stage
 ARG RUNNER
 # STFU - No need - Write logs asap
 ENV DEBIAN_FRONTEND=noninteractive \
    PYTHONDONTWRITEBYTECODE=1 \
-    PYTHONUNBUFFERED=1
+    PYTHONUNBUFFERED=1 \
    UV_PROJECT_ENVIRONMENT=/venv
-# Install build dependencies (needs root)
+# Install build dependencies
-RUN apt-get update && apt-get install -y --no-install-recommends \
+RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
-    build-essential \
+    --mount=type=cache,target=/var/lib/apt,sharing=locked \
-    && rm -rf /var/lib/apt/lists/*
+    apt-get update \
    && apt-get install -y --no-install-recommends build-essential
-# Install runner globally (needs root) - Save cache for future
+# Install uv globally
-RUN --mount=type=cache,target=/root/.cache/pip \
+COPY --from=uv-bin /uv /usr/local/bin/uv
    pip install $RUNNER
 # Set working directory for dependency installation
 WORKDIR /tmp
-# Copy dependency files
+COPY pyproject.toml uv.lock Makefile ./
 COPY pyproject.toml poetry.lock* uv.lock* Makefile ./
-# Install dependencies as root (to avoid permission issues with system packages)
+# Install dependencies into /venv
-RUN --mount=type=cache,target=/root/.cache/pip \
+RUN --mount=type=cache,target=/root/.cache/uv uv sync
    --mount=type=cache,target=/root/.cache/pypoetry \
    --mount=type=cache,target=/root/.cache/uv \
    if [ "$RUNNER" = "poetry" ]; then \
        poetry config virtualenvs.create false && \
        poetry install --only main --no-root; \
    elif [ "$RUNNER" = "uv" ]; then \
        uv pip install --system -r pyproject.toml; \
    fi
 COPY scripts/ ./scripts/
 COPY .env.example ./
@@ -51,16 +41,7 @@ COPY .env.example ./
 # ===========================================
 FROM builder AS test
-ARG RUNNER
+RUN --mount=type=cache,target=/root/.cache/uv uv sync --group dev
 RUN --mount=type=cache,target=/root/.cache/pip \
    --mount=type=cache,target=/root/.cache/pypoetry \
    --mount=type=cache,target=/root/.cache/uv \
    if [ "$RUNNER" = "poetry" ]; then \
        poetry install --no-root; \
    elif [ "$RUNNER" = "uv" ]; then \
        uv pip install --system -e .[dev]; \
    fi
 COPY alfred/ ./alfred
 COPY scripts ./scripts
@@ -71,51 +52,39 @@ COPY tests/  ./tests
 # ===========================================
 FROM python:${PYTHON_VERSION}-slim-bookworm AS runtime
-ARG PYTHON_VERSION_SHORT
+ENV PYTHONDONTWRITEBYTECODE=1 \
-
+    PYTHONUNBUFFERED=1 \
 # TODO: A-t-on encore besoin de toutes les clés  ?
 ENV LLM_PROVIDER=deepseek \
    MEMORY_STORAGE_DIR=/data/memory \
    PYTHONDONTWRITEBYTECODE=1 \
    PYTHONPATH=/home/appuser \
-    PYTHONUNBUFFERED=1
+    PATH="/venv/bin:$PATH"
-# Install runtime dependencies (needs root)
+# Install runtime dependencies
-RUN apt-get update && apt-get install -y --no-install-recommends \
+RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
-    ca-certificates \
+    --mount=type=cache,target=/var/lib/apt,sharing=locked \
-    && rm -rf /var/lib/apt/lists/* \
+    apt-get update \
-    && apt-get clean
+    && apt-get install -y --no-install-recommends ca-certificates
 # Create non-root user
 RUN useradd -m -u 1000 -s /bin/bash appuser
-# Create data directories (needs root for /data)
+# Create data directories
 RUN mkdir -p /data /logs \
    && chown -R appuser:appuser /data /logs
 # Switch to non-root user
 USER appuser
 # Set working directory (owned by appuser)
 WORKDIR /home/appuser
-# Copy Python packages from builder stage
+# Copy venv from builder stage
-COPY --from=builder /usr/local/lib/python${PYTHON_VERSION_SHORT}/site-packages /usr/local/lib/python${PYTHON_VERSION_SHORT}/site-packages
+COPY --from=builder /venv /venv
 COPY --from=builder /usr/local/bin /usr/local/bin
-# Copy application code (already owned by appuser)
+# Copy application code
 COPY --chown=appuser:appuser alfred/ ./alfred
 COPY --chown=appuser:appuser scripts/ ./scripts
 COPY --chown=appuser:appuser .env.example ./
 COPY --chown=appuser:appuser pyproject.toml ./
 # Create volumes for persistent data
 VOLUME ["/data", "/logs"]
 # Expose port
 EXPOSE 8000
 # Health check
 HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
    CMD python -c "import requests; requests.get('http://localhost:8000/health', timeout=5).raise_for_status()" || exit 1
@@ -1,43 +1,46 @@
 .DEFAULT_GOAL := help
 # --- Load Config from pyproject.toml ---
 export
 -include .env.make
 # --- Profiles management ---
 # Usage: make up p=rag,meili
-p ?= core
+p ?= full
 PROFILES_PARAM	:= COMPOSE_PROFILES=$(p)
 # --- Commands ---
-DOCKER_COMPOSE	:= docker compose
+DOCKER_COMPOSE	:= docker compose \
-DOCKER_BUILD	:= docker build --no-cache \
+					--env-file .env.alfred \
 					--env-file .env.secrets \
 					--env-file .env.make
 DOCKER_BUILD	:= DOCKER_BUILDKIT=1 docker build \
 					--build-arg PYTHON_VERSION=$(PYTHON_VERSION) \
-					--build-arg PYTHON_VERSION_SHORT=$(PYTHON_VERSION_SHORT) \
+					--build-arg UV_VERSION=$(UV_VERSION)
 					--build-arg RUNNER=$(RUNNER)
 # --- Phony ---
-.PHONY: .env up down restart logs ps shell build build-test install update \
+.PHONY: bootstrap up down restart logs ps shell build build-test install \
-	install-hooks test coverage lint format clean major minor patch help
+	update install-hooks test coverage lint format clean major minor patch help
 # --- Setup ---
-.env .env.make:
+.env.alfred .env.librechat .env.secrets .env.make:
 	@echo "Initializing environment..."
-	@python scripts/bootstrap.py \
+	@uv run python scripts/bootstrap.py \
 		&& echo "✓ Environment ready" \
 		|| (echo "✗ Environment setup failed" && exit 1)
-bootstrap: .env .env.make
+bootstrap: .env.alfred .env.librechat .env.secrets .env.make
 # --- Docker ---
-up: .env
+up: .env.alfred .env.secrets
-	@echo "Starting containers with profiles: [$(p)]..."
+	@echo "Starting containers with profiles: [full]..."
 	@$(PROFILES_PARAM) $(DOCKER_COMPOSE) up -d --remove-orphans \
 		&& echo "✓ Containers started" \
 		|| (echo "✗ Failed to start containers" && exit 1)
 down:
 	@echo "Stopping containers..."
-	@$(DOCKER_COMPOSE) down \
+	@$(PROFILES_PARAM) $(DOCKER_COMPOSE) down \
 		&& echo "✓ Containers stopped" \
 		|| (echo "✗ Failed to stop containers" && exit 1)
@@ -74,45 +77,45 @@ build-test: .env.make
 # --- Dependencies ---
 install:
-	@echo "Installing dependencies with $(RUNNER)..."
+	@echo "Installing dependencies with uv..."
-	@$(RUNNER) install \
+	@uv install \
 		&& echo "✓ Dependencies installed" \
 		|| (echo "✗ Installation failed" && exit 1)
 install-hooks:
 	@echo "Installing pre-commit hooks..."
-	@$(RUNNER) run pre-commit install \
+	@uv run pre-commit install \
 		&& echo "✓ Hooks installed" \
 		|| (echo "✗ Hook installation failed" && exit 1)
 update:
-	@echo "Updating dependencies with $(RUNNER)..."
+	@echo "Updating dependencies with uv..."
-	@$(RUNNER) update \
+	@uv update \
 		&& echo "✓ Dependencies updated" \
 		|| (echo "✗ Update failed" && exit 1)
 # --- Quality ---
 test:
 	@echo "Running tests..."
-	@$(RUNNER) run pytest \
+	@uv run pytest \
 		&& echo "✓ Tests passed" \
 		|| (echo "✗ Tests failed" && exit 1)
 coverage:
 	@echo "Running tests with coverage..."
-	@$(RUNNER) run pytest --cov=. --cov-report=html --cov-report=term \
+	@uv run pytest --cov=. --cov-report=html --cov-report=term \
 		&& echo "✓ Coverage report generated" \
 		|| (echo "✗ Coverage failed" && exit 1)
 lint:
 	@echo "Linting code..."
-	@$(RUNNER) run ruff check --fix . \
+	@uv run ruff check --fix . \
 		&& echo "✓ Linting complete" \
 		|| (echo "✗ Linting failed" && exit 1)
 format:
 	@echo "Formatting code..."
-	@$(RUNNER) run ruff format . && $(RUNNER) run ruff check --fix . \
+	@uv run ruff format . && uv run ruff check --fix . \
 		&& echo "✓ Code formatted" \
 		|| (echo "✗ Formatting failed" && exit 1)
@@ -125,7 +128,7 @@ clean:
 # --- Versioning ---
 major minor patch: _check-main
 	@echo "Bumping $@ version..."
-	@$(RUNNER) run bump-my-version bump $@ \
+	@uv run bump-my-version bump $@ \
 		&& echo "✓ Version bumped" \
 		|| (echo "✗ Version bump failed" && exit 1)
@@ -138,8 +141,7 @@ major minor patch: _check-main
 _ci-dump-config:
 	@echo "image_name=$(IMAGE_NAME)"
 	@echo "python_version=$(PYTHON_VERSION)"
-	@echo "python_version_short=$(PYTHON_VERSION_SHORT)"
+	@echo "uv_version=$(UV_VERSION)"
 	@echo "runner=$(RUNNER)"
 	@echo "service_name=$(SERVICE_NAME)"
 _ci-run-tests:build-test
@@ -161,6 +163,9 @@ help:
 	@echo ""
 	@echo "Usage: make [target] [p=profile1,profile2]"
 	@echo ""
 	@echo "Setup:"
 	@echo "  bootstrap   Generate .env.alfred, .env.librechat, .env.secrets and .env.make"
 	@echo ""
 	@echo "Docker:"
 	@echo "  up          Start containers (default profile: core)"
 	@echo "              Example: make up p=rag,meili"
@@ -173,7 +178,7 @@ help:
 	@echo ""
 	@echo "Dev & Quality:"
 	@echo "  setup       Bootstrap .env and security keys"
-	@echo "  install     Install dependencies via $(RUNNER)"
+	@echo "  install     Install dependencies via uv"
 	@echo "  test        Run pytest suite"
 	@echo "  coverage    Run tests and generate HTML report"
 	@echo "  lint/format Quality and style checks"
@@ -1,89 +1,235 @@
-# Agent Media 🎬
+# Alfred Media Organizer 🎬
-An AI-powered agent for managing your local media library with natural language. Search, download, and organize movies and TV shows effortlessly.
+An AI-powered agent for managing your local media library with natural language. Search, download, and organize movies and TV shows effortlessly through a conversational interface.
-## Features
+[![Python 3.14](https://img.shields.io/badge/python-3.14-blue.svg)](https://www.python.org/downloads/)
 [![uv](https://img.shields.io/badge/dependency%20manager-uv-purple)](https://github.com/astral-sh/uv)
 [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
 [![Code style: ruff](https://img.shields.io/badge/code%20style-ruff-000000.svg)](https://github.com/astral-sh/ruff)
- 🤖 **Natural Language Interface**: Talk to your media library in plain language
+## ✨ Features
 - 🔍 **Smart Search**: Find movies and TV shows via TMDB
 - 📥 **Torrent Integration**: Search and download via qBittorrent
 - 🧠 **Contextual Memory**: Remembers your preferences and conversation history
 - 📁 **Auto-Organization**: Keeps your media library tidy
 - 🌐 **API Compatible**: OpenAI-compatible API for easy integration
-## Architecture
+- 🤖 **Natural Language Interface** — Talk to your media library in plain language
 - 🔍 **Smart Search** — Find movies and TV shows via TMDB with rich metadata
 - 📥 **Torrent Integration** — Search and download via qBittorrent
 - 🧠 **Contextual Memory** — Remembers your preferences and conversation history
 - 📁 **Auto-Organization** — Moves and renames media files, resolves destinations, handles subtitles
 - 🎞️ **Subtitle Pipeline** — Identifies, matches, and places subtitle tracks automatically
 - 🔄 **Workflow Engine** — YAML-defined multi-step workflows (e.g. `organize_media`)
 - 🌐 **OpenAI-Compatible API** — Works with any OpenAI-compatible client (LibreChat, OpenWebUI, etc.)
 - 🔒 **Secure by Default** — Auto-generated secrets and encrypted credentials
-Built with **Domain-Driven Design (DDD)** principles:
+## 🏗️ Architecture
 Built with **Domain-Driven Design (DDD)** principles for clean separation of concerns:
 ```
-agent_media/
+alfred/
 ├── agent/              # AI agent orchestration
 │   ├── llm/            # LLM clients (Ollama, DeepSeek)
 │   ├── tools/          # Tool implementations (api, filesystem, language)
 │   └── workflows/      # YAML-defined multi-step workflows
 ├── application/        # Use cases & DTOs
 │   ├── movies/         # Movie search
 │   ├── torrents/       # Torrent management
 │   └── filesystem/     # File operations (move, list, subtitles, seed links)
 ├── domain/             # Business logic & entities
 │   ├── media/          # Release parsing
 │   ├── movies/         # Movie entities
 │   ├── tv_shows/       # TV show entities & value objects
 │   ├── subtitles/      # Subtitle scanner, services, knowledge base
 │   └── shared/         # Common value objects (ImdbId, FilePath, FileSize)
 └── infrastructure/     # External services & persistence
    ├── api/            # External API clients (TMDB, qBittorrent, Knaben)
    ├── filesystem/     # File manager (hard-link based, path-traversal safe)
    ├── persistence/    # Three-tier memory (LTM/STM/Episodic) + JSON repositories
    └── subtitle/       # Subtitle infrastructure
 ```
-See [architecture_diagram.md](docs/architecture_diagram.md) for architectural details.
+### Key flows
-## Quick Start
+**Agent execution:** `agent.step(user_input)` → LLM call → if tool_calls, execute each via registry → loop until no tool calls or `max_tool_iterations` → return final response.
 **Media organization workflow:**
 1. `resolve_destination` — Determines target folder/filename from release name
 2. `move_media` — Hard-links file to library, deletes source
 3. `manage_subtitles` — Scans, classifies, and places subtitle tracks
 4. `create_seed_links` — Hard-links library file back to torrents/ for continued seeding
 **Memory tiers:**
 - **LTM** (`data/memory/ltm.json`) — Persisted config, media library, watchlist
 - **STM** — Conversation history (capped at `MAX_HISTORY_MESSAGES`)
 - **Episodic** — Transient search results, active downloads, recent errors
 ## 🚀 Quick Start
 ### Prerequisites
- Python 3.12+
+- **Python 3.14+**
- Poetry
+- **uv** (dependency manager)
- qBittorrent (optional, for downloads)
+- **Docker & Docker Compose** (recommended for full stack)
- API Keys:
+- **API Keys:**
-  - DeepSeek API key (or Ollama for local LLM)
+  - TMDB API key ([get one here](https://www.themoviedb.org/settings/api))
-  - TMDB API key
+  - Optional: DeepSeek or other LLM provider keys
 ### Installation
 ```bash
 # Clone the repository
-git clone https://github.com/your-username/agent-media.git
+git clone https://github.com/francwa/alfred_media_organizer.git
-cd agent-media
+cd alfred_media_organizer
 # Install dependencies
-poetry install
+make install
-# Copy environment template
+# Install pre-commit hooks
-cp .env.example .env
+make install-hooks
 # Bootstrap environment (generates .env with secure secrets)
 make bootstrap
 # Validate your .env against the schema
 make validate
 # Edit .env with your API keys
 nano .env
 ```
-### Configuration
+### Running with Docker (Recommended)
 Edit `.env`:
 ```bash
-# LLM Provider (deepseek or ollama)
+# Start all services (LibreChat + Alfred + MongoDB + Ollama)
-LLM_PROVIDER=deepseek
+make up
 DEEPSEEK_API_KEY=your-api-key-here
-# TMDB (for movie/TV show metadata)
+# Or start with specific profiles
-TMDB_API_KEY=your-tmdb-key-here
+make up p=rag,meili      # Include RAG and Meilisearch
 make up p=qbittorrent    # Include qBittorrent
 make up p=full           # Everything
-# qBittorrent (optional)
+# View logs
-QBITTORRENT_HOST=http://localhost:8080
+make logs
-QBITTORRENT_USERNAME=admin
+
-QBITTORRENT_PASSWORD=adminadmin
+# Stop all services
 make down
 ```
-### Run
+The web interface will be available at **http://localhost:3080**
 ### Running Locally (Development)
 ```bash
-# Start the API server
+uv run uvicorn alfred.app:app --reload --port 8000
 poetry run uvicorn app:app --reload
 # Or with Docker
 docker-compose up
 ```
-The API will be available at `http://localhost:8000`
+## ⚙️ Configuration
-## Usage
+### Settings system
 `settings.toml` is the single source of truth. The schema flows:
 ```
 settings.toml → settings_schema.py → settings_bootstrap.py → .env + .env.make → settings.py
 ```
 To add a setting: define it in `settings.toml`, run `make bootstrap`, then access via `settings.my_new_setting`.
 ```bash
 # First time setup
 make bootstrap
 # Validate existing .env against schema
 make validate
 # Re-run after settings.toml changes (existing secrets preserved)
 make bootstrap
 ```
 **Never commit `.env` or `.env.make`** — both are gitignored and auto-generated.
 ### Key settings (.env)
 ```bash
 # --- CORE ---
 MAX_HISTORY_MESSAGES=10
 MAX_TOOL_ITERATIONS=10
 # --- LLM ---
 DEFAULT_LLM_PROVIDER=local     # local (Ollama) | deepseek
 OLLAMA_BASE_URL=http://ollama:11434
 OLLAMA_MODEL=llama3.3:latest
 LLM_TEMPERATURE=0.2
 # --- API KEYS ---
 TMDB_API_KEY=your-tmdb-key     # Required for movie/show search
 DEEPSEEK_API_KEY=              # Optional
 # --- SECURITY (auto-generated) ---
 JWT_SECRET=<auto>
 CREDS_KEY=<auto>
 MONGO_PASSWORD=<auto>
 ```
 ## 🐳 Docker Services
 ### Docker Profiles
 | Profile | Extra services | Use case |
 |---------|---------------|----------|
 | (default) | — | LibreChat + Alfred + MongoDB + Ollama |
 | `meili` | Meilisearch | Fast full-text search |
 | `rag` | RAG API + VectorDB (PostgreSQL) | Document retrieval |
 | `qbittorrent` | qBittorrent | Torrent downloads |
 | `full` | All of the above | Complete setup |
 ```bash
 make up              # Start (default profile)
 make up p=full       # Start with all services
 make down            # Stop
 make restart         # Restart
 make logs            # Follow logs
 make ps              # Container status
 ```
 ## 🛠️ Available Tools
 | Tool | Description |
 |------|-------------|
 | `find_media_imdb_id` | Search for movies/TV shows on TMDB by title |
 | `find_torrent` | Search for torrents across multiple indexers |
 | `get_torrent_by_index` | Get detailed info about a specific result |
 | `add_torrent_by_index` | Download a torrent from search results |
 | `add_torrent_to_qbittorrent` | Add a torrent via magnet link directly |
 | `resolve_destination` | Compute the target library path for a release |
 | `move_media` | Hard-link a file to its library destination |
 | `manage_subtitles` | Scan, classify, and place subtitle tracks |
 | `create_seed_links` | Prepare torrent folder so qBittorrent keeps seeding |
 | `learn` | Teach Alfred a new pattern (release group, naming convention) |
 | `set_path_for_folder` | Configure folder paths |
 | `list_folder` | List contents of a configured folder |
 | `set_language` | Set preferred language for the session |
 ## 💬 Usage Examples
 ### Via Web Interface (LibreChat)
 Navigate to **http://localhost:3080** and start chatting:
 ```
 You: Find Inception in 1080p
 Alfred: I found 3 torrents for Inception (2010):
        1. Inception.2010.1080p.BluRay.x264 (150 seeders) - 2.1 GB
        2. Inception.2010.1080p.WEB-DL.x265 (80 seeders) - 1.8 GB
        3. Inception.2010.1080p.REMUX (45 seeders) - 25 GB
 You: Download the first one
 Alfred: ✓ Added to qBittorrent! Download started.
 You: Organize the Breaking Bad S01 download
 Alfred: ✓ Resolved destination: /tv_shows/Breaking.Bad/Season 01/
        ✓ Moved 6 episode files
        ✓ Placed 6 subtitle tracks (fr, en)
        ✓ Seed links created in /torrents/
 ```
 ### Via API
@@ -91,310 +237,197 @@ The API will be available at `http://localhost:8000`
 # Health check
 curl http://localhost:8000/health
-# Chat with the agent
+# Chat (OpenAI-compatible)
 curl -X POST http://localhost:8000/v1/chat/completions \
  -H "Content-Type: application/json" \
  -d '{
-    "model": "agent-media",
+    "model": "alfred",
-    "messages": [
+    "messages": [{"role": "user", "content": "Find The Matrix 4K"}]
      {"role": "user", "content": "Find Inception 1080p"}
    ]
  }'
 # List models
 curl http://localhost:8000/v1/models
 # View memory state
 curl http://localhost:8000/memory/state
 ```
-### Via OpenWebUI
+Alfred is compatible with any OpenAI-compatible client. Point it at `http://localhost:8000/v1`, model `alfred`.
-Agent Media is compatible with [OpenWebUI](https://github.com/open-webui/open-webui):
+## 🧠 Memory System
-1. Add as OpenAI-compatible endpoint: `http://localhost:8000/v1`
+Alfred uses a three-tier memory system:
 2. Model name: `agent-media`
 3. Start chatting!
-### Example Conversations
+| Tier | Storage | Contents | Lifetime |
 |------|---------|----------|----------|
 | **LTM** | JSON file (`data/memory/ltm.json`) | Config, library, watchlist, learned patterns | Permanent |
 | **STM** | RAM | Conversation history (capped) | Session |
 | **Episodic** | RAM | Search results, active downloads, errors | Short-lived |
-```
+## 🧪 Development
 You: Find Inception in 1080p
 Agent: I found 3 torrents for Inception:
       1. Inception.2010.1080p.BluRay.x264 (150 seeders)
       2. Inception.2010.1080p.WEB-DL.x265 (80 seeders)
       3. Inception.2010.720p.BluRay (45 seeders)
 You: Download the first one
 Agent: Added to qBittorrent! Download started.
 You: List my downloads
 Agent: You have 1 active download:
       - Inception.2010.1080p.BluRay.x264 (45% complete)
 ```
 ## Available Tools
 The agent has access to these tools:
 | Tool | Description |
 |------|-------------|
 | `find_media_imdb_id` | Search for movies/TV shows on TMDB |
 | `find_torrents` | Search for torrents |
 | `get_torrent_by_index` | Get torrent details by index |
 | `add_torrent_by_index` | Download torrent by index |
 | `add_torrent_to_qbittorrent` | Add torrent via magnet link |
 | `set_path_for_folder` | Configure folder paths |
 | `list_folder` | List folder contents |
 ## Memory System
 Agent Media uses a three-tier memory system:
 ### Long-Term Memory (LTM)
 - **Persistent** (saved to JSON)
 - Configuration, preferences, media library
 - Survives restarts
 ### Short-Term Memory (STM)
 - **Session-based** (RAM only)
 - Conversation history, current workflow
 - Cleared on restart
 ### Episodic Memory
 - **Transient** (RAM only)
 - Search results, active downloads, recent errors
 - Cleared frequently
 ## Development
 ### Project Structure
 ```
 agent_media/
 ├── agent/
 │   ├── agent.py          # Main agent orchestrator
 │   ├── prompts.py        # System prompt builder
 │   ├── registry.py       # Tool registration
 │   ├── tools/            # Tool implementations
 │   └── llm/              # LLM clients (DeepSeek, Ollama)
 ├── application/
 │   ├── movies/           # Movie use cases
 │   ├── torrents/         # Torrent use cases
 │   └── filesystem/       # Filesystem use cases
 ├── domain/
 │   ├── movies/           # Movie entities & value objects
 │   ├── tv_shows/         # TV show entities
 │   ├── subtitles/        # Subtitle entities
 │   └── shared/           # Shared value objects
 ├── infrastructure/
 │   ├── api/              # External API clients
 │   │   ├── tmdb/         # TMDB client
 │   │   ├── knaben/       # Torrent search
 │   │   └── qbittorrent/  # qBittorrent client
 │   ├── filesystem/       # File operations
 │   └── persistence/      # Memory & repositories
 ├── tests/                # Test suite (~500 tests)
 └── docs/                 # Documentation
 ```
 ### Running Tests
 ```bash
-# Run all tests
+# Run full suite (parallel)
-poetry run pytest
+make test
-# Run with coverage
+# Run with coverage report
-poetry run pytest --cov
+make coverage
-# Run specific test file
+# Run a single file
-poetry run pytest tests/test_agent.py
+uv run pytest tests/test_agent.py -v
-# Run specific test
+# Run a single class
-poetry run pytest tests/test_agent.py::TestAgent::test_step
+uv run pytest tests/test_agent.py::TestAgentInit -v
 # Skip slow tests
 uv run pytest -m "not slow"
 ```
 ### Test coverage
 The suite covers:
 - **Agent loop** — tool execution, history, max iterations, error handling
 - **Tool registry** — OpenAI schema format, parameter extraction
 - **Prompts** — system prompt building, tool inclusion
 - **Memory** — LTM/STM/Episodic operations, persistence
 - **Filesystem tools** — path traversal security, folder listing
 - **File manager** — hard-link, move, seed links (real filesystem, no mocks)
 - **Application use cases** — `resolve_destination`, `create_seed_links`, `list_folder`, `move_media`
 - **Domain** — TV show/movie entities, shared value objects (`ImdbId`, `FilePath`, `FileSize`), subtitle scanner
 - **Repositories** — JSON-backed movie, TV show, subtitle repos
 - **Bootstrap** — secret generation, idempotency, URI construction
 - **Workflows** — YAML loading, structure validation
 - **Configuration** — boundary validation for all settings
 ### Code Quality
 ```bash
-# Linting
+make lint        # Ruff check --fix
-poetry run ruff check .
+make format      # Ruff format + check --fix
 # Formatting
 poetry run black .
 # Type checking (if mypy is installed)
 poetry run mypy .
 ```
 ### Adding a New Tool
-Quick example:
+1. Implement the function in `alfred/agent/tools/`:
 ```python
-# 1. Create the tool function in agent/tools/api.py
+# alfred/agent/tools/api.py
-def my_new_tool(param: str) -> Dict[str, Any]:
+def my_new_tool(param: str) -> dict[str, Any]:
-    """Tool description."""
+    """Short description shown to the LLM to decide when to call this tool."""
    memory = get_memory()
-    # Implementation
+    # ...
-    return {"status": "ok", "data": "result"}
+    return {"status": "ok", "data": result}
 # 2. Register in agent/registry.py
 Tool(
    name="my_new_tool",
    description="What this tool does",
    func=api_tools.my_new_tool,
    parameters={
        "type": "object",
        "properties": {
            "param": {"type": "string", "description": "Parameter description"},
        },
        "required": ["param"],
    },
 ),
 ```
-## Docker
+2. Register it in `alfred/agent/registry.py`:
-### Build
+```python
 tool_functions = [
    # ... existing tools ...
    api_tools.my_new_tool,
 ]
 ```
 The registry auto-generates the JSON schema from the function signature and docstring.
 ### Adding a Workflow
 Create a YAML file in `alfred/agent/workflows/`:
 ```yaml
 name: my_workflow
 description: What this workflow does
 steps:
  - tool: resolve_destination
    description: Find where the file should go
  - tool: move_media
    description: Move the file
 ```
 Workflows are loaded automatically at startup.
 ### Version Management
 ```bash
-docker build -t agent-media .
+# Must be on main branch
 make patch    # 0.1.7 → 0.1.8
 make minor    # 0.1.7 → 0.2.0
 make major    # 0.1.7 → 1.0.0
 ```
-### Run
+## 📚 API Reference
 ```bash
 docker run -p 8000:8000 \
  -e DEEPSEEK_API_KEY=your-key \
  -e TMDB_API_KEY=your-key \
  -v $(pwd)/memory_data:/app/memory_data \
  agent-media
 ```
 ### Docker Compose
 ```bash
 # Start all services (agent + qBittorrent)
 docker-compose up -d
 # View logs
 docker-compose logs -f
 # Stop
 docker-compose down
 ```
 ## API Documentation
 ### Endpoints
-#### `GET /health`
+| Method | Path | Description |
-Health check endpoint.
+|--------|------|-------------|
 | `GET` | `/health` | Health check |
 | `GET` | `/v1/models` | List models (OpenAI-compatible) |
 | `POST` | `/v1/chat/completions` | Chat (OpenAI-compatible, streaming supported) |
 | `GET` | `/memory/state` | Full memory dump (debug) |
 | `POST` | `/memory/clear-session` | Clear STM + Episodic |
 | `GET` | `/memory/episodic/search-results` | Current search results |
-**Response:**
+## 🔧 Troubleshooting
 ```json
 {
  "status": "healthy",
  "version": "0.2.0"
 }
 ```
 #### `GET /v1/models`
 List available models (OpenAI-compatible).
 #### `POST /v1/chat/completions`
 Chat with the agent (OpenAI-compatible).
 **Request:**
 ```json
 {
  "model": "agent-media",
  "messages": [
    {"role": "user", "content": "Find Inception"}
  ],
  "stream": false
 }
 ```
 **Response:**
 ```json
 {
  "id": "chatcmpl-xxx",
  "object": "chat.completion",
  "created": 1234567890,
  "model": "agent-media",
  "choices": [{
    "index": 0,
    "message": {
      "role": "assistant",
      "content": "I found Inception (2010)..."
    },
    "finish_reason": "stop"
  }]
 }
 ```
 #### `GET /memory/state`
 View full memory state (debug).
 #### `POST /memory/clear-session`
 Clear session memories (STM + Episodic).
 ## Troubleshooting
 ### Agent doesn't respond
- Check API keys in `.env`
+
- Verify LLM provider is running (Ollama) or accessible (DeepSeek)
+1. Check API keys in `.env`
- Check logs: `docker-compose logs agent-media`
+2. Verify the LLM is running:
   ```bash
   docker logs alfred-ollama
   docker exec alfred-ollama ollama list
   ```
 3. Check Alfred logs: `docker logs alfred-core`
 ### qBittorrent connection failed
- Verify qBittorrent is running
+
- Check `QBITTORRENT_HOST` in `.env`
+1. Verify qBittorrent is running: `docker ps | grep qbittorrent`
- Ensure Web UI is enabled in qBittorrent settings
+2. Check credentials in `.env` (`QBITTORRENT_URL`, `QBITTORRENT_USERNAME`, `QBITTORRENT_PASSWORD`)
 ### Memory not persisting
- Check `memory_data/` directory exists and is writable
+
- Verify volume mounts in Docker
+1. Check `data/` directory is writable
 2. Verify volume mounts in `docker-compose.yaml`
 ### Bootstrap fails
 ```bash
 make validate    # Check what's wrong with .env
 make bootstrap   # Regenerate (preserves existing secrets)
 ```
 ### Tests failing
 - Run `poetry install` to ensure dependencies are up to date
 - Check logs for specific error messages
-## Contributing
+```bash
 uv run pytest tests/test_failing.py -v --tb=long
 ```
-Contributions are welcome!
+## 🤝 Contributing
 ### Development Workflow
 1. Fork the repository
-2. Create a feature branch: `git checkout -b feature/my-feature`
+2. Create a feature branch: `git checkout -b feat/my-feature`
-3. Make your changes
+3. Make your changes + add tests
-4. Run tests: `poetry run pytest`
+4. Run `make test && make lint && make format`
-5. Run linting: `poetry run ruff check . && poetry run black .`
+5. Commit with [Conventional Commits](https://www.conventionalcommits.org/): `feat:`, `fix:`, `docs:`, `refactor:`, `test:`, `chore:`, `infra:`
-6. Commit: `git commit -m "Add my feature"`
+6. Open a Pull Request
 7. Push: `git push origin feature/my-feature`
 8. Create a Pull Request
-## Documentation
+## 📄 License
- [Architecture Diagram](docs/architecture_diagram.md) - System architecture overview
+MIT License — see [LICENSE](LICENSE) file for details.
 - [Class Diagram](docs/class_diagram.md) - Class structure and relationships
 - [Component Diagram](docs/component_diagram.md) - Component interactions
 - [Sequence Diagram](docs/sequence_diagram.md) - Sequence flows
 - [Flowchart](docs/flowchart.md) - System flowcharts
-## License
+## 🙏 Acknowledgments
-MIT License - see [LICENSE](LICENSE) file for details.
+- [LibreChat](https://github.com/danny-avila/LibreChat) — Chat interface
-
+- [Ollama](https://ollama.ai/) — Local LLM runtime
-## Acknowledgments
+- [DeepSeek](https://www.deepseek.com/) — LLM provider
-
+- [TMDB](https://www.themoviedb.org/) — Movie & TV database
- [DeepSeek](https://www.deepseek.com/) - LLM provider
+- [qBittorrent](https://www.qbittorrent.org/) — Torrent client
- [TMDB](https://www.themoviedb.org/) - Movie database
+- [FastAPI](https://fastapi.tiangolo.com/) — Web framework
- [qBittorrent](https://www.qbittorrent.org/) - Torrent client
+- [uv](https://github.com/astral-sh/uv) — Fast Python package manager
 - [FastAPI](https://fastapi.tiangolo.com/) - Web framework
 ## Support
 - 📧 Email: francois.hodiaumont@gmail.com
 - 🐛 Issues: [GitHub Issues](https://github.com/your-username/agent-media/issues)
 - 💬 Discussions: [GitHub Discussions](https://github.com/your-username/agent-media/discussions)
 ---
-Made with ❤️ by Francwa
+<p align="center">Made with ❤️ by <a href="https://github.com/francwa">Francwa</a></p>
@@ -3,13 +3,16 @@
 import json
 import logging
 from collections.abc import AsyncGenerator
 from pathlib import Path
 from typing import Any
 from alfred.infrastructure.metadata import MetadataStore
 from alfred.infrastructure.persistence import get_memory
 from alfred.settings import settings
-from .prompts import PromptBuilder
+from .prompt import PromptBuilder
 from .registry import Tool, make_tools
 from .workflows import WorkflowLoader
 logger = logging.getLogger(__name__)
@@ -33,8 +36,8 @@ class Agent:
        self.settings = settings
        self.llm = llm
        self.tools: dict[str, Tool] = make_tools(settings)
-        self.prompt_builder = PromptBuilder(self.tools)
+        self.workflow_loader = WorkflowLoader()
-        self.settings = settings
+        self.prompt_builder = PromptBuilder(self.tools, self.workflow_loader)
        self.max_tool_iterations = max_tool_iterations
    def step(self, user_input: str) -> str:
@@ -139,7 +142,7 @@ class Agent:
        memory.save()
        return final_response
-    def _execute_tool_call(self, tool_call: dict[str, Any]) -> dict[str, Any]:
+    def _execute_tool_call(self, tool_call: dict[str, Any]) -> dict[str, Any]:  # noqa: PLR0911
        """
        Execute a single tool call.
@@ -168,29 +171,163 @@ class Agent:
                "available_tools": available,
            }
        # Defensive: reject calls to tools that are not currently in scope.
        visible = set(self.prompt_builder.visible_tool_names())
        if tool_name not in visible:
            return {
                "error": "tool_out_of_scope",
                "message": (
                    f"Tool '{tool_name}' is not available in the current "
                    "workflow scope. Call end_workflow first or start the "
                    "appropriate workflow."
                ),
                "available_tools": sorted(visible),
            }
        tool = self.tools[tool_name]
        memory = get_memory()
        # Cache lookup — for tools flagged cacheable, short-circuit on hit.
        cache_key_value = self._cache_key_for(tool, args)
        if cache_key_value is not None:
            cached = memory.stm.tool_results.get(tool_name, cache_key_value)
            if cached is not None:
                logger.info(f"Tool cache HIT: {tool_name}[{cache_key_value}]")
                self._post_tool_side_effects(tool_name, args, cached, from_cache=True)
                return {**cached, "_from_cache": True}
        # Execute tool
        try:
            result = tool.func(**args)
            return result
        except KeyboardInterrupt:
            # Don't catch KeyboardInterrupt - let it propagate
            raise
        except TypeError as e:
            # Bad arguments
            memory = get_memory()
            memory.episodic.add_error(tool_name, f"bad_args: {e}")
            return {"error": "bad_args", "message": str(e), "tool": tool_name}
        except Exception as e:
            # Other errors
            memory = get_memory()
            memory.episodic.add_error(tool_name, str(e))
            return {"error": "execution_failed", "message": str(e), "tool": tool_name}
        # Persist + side effects only on successful results.
        if isinstance(result, dict) and result.get("status") == "ok":
            if cache_key_value is not None:
                memory.stm.tool_results.put(tool_name, cache_key_value, result)
            self._post_tool_side_effects(tool_name, args, result, from_cache=False)
            memory.save()
        return result
    @staticmethod
    def _cache_key_for(tool: Tool, args: dict[str, Any]) -> str | None:
        """Return the cache key value for this call, or None if not cacheable."""
        if tool.cache_key is None:
            return None
        value = args.get(tool.cache_key)
        if value is None:
            return None
        return str(value)
    def _post_tool_side_effects(
        self,
        tool_name: str,
        args: dict[str, Any],
        result: dict[str, Any],
        *,
        from_cache: bool,
    ) -> None:
        """
        Tool-agnostic side effects applied after a successful run or cache hit.
        Today:
          - Update release_focus when a path-keyed inspector runs.
          - Persist inspector results into the release's `.alfred/metadata.yaml`.
          - Refresh episodic.last_search_results on find_torrent cache hits so
            get_torrent_by_index keeps pointing at the right list.
        """
        memory = get_memory()
        tool = self.tools.get(tool_name)
        # Release focus: any path-keyed inspector updates current_release_path.
        if tool is not None and tool.cache_key in {"source_path"}:
            path = args.get(tool.cache_key)
            if isinstance(path, str) and path:
                memory.stm.release_focus.focus(path)
        # Persist inspector results to .alfred/metadata.yaml (skip on cache
        # hit — the file is already up to date from the original run).
        if not from_cache:
            self._maybe_update_alfred(tool_name, args, result)
        # Episodic refresh when find_torrent's cache short-circuits the call.
        if from_cache and tool_name == "find_torrent":
            torrents = result.get("torrents") or []
            query = args.get("media_title") or ""
            memory.episodic.store_search_results(
                query=query, results=torrents, search_type="torrent"
            )
    def _maybe_update_alfred(
        self,
        tool_name: str,
        args: dict[str, Any],
        result: dict[str, Any],
    ) -> None:
        """
        Persist a successful inspector result into the release's
        `.alfred/metadata.yaml`. No-op when the release root can't be resolved.
        """
        if tool_name not in {"analyze_release", "probe_media", "find_media_imdb_id"}:
            return
        release_root = self._resolve_release_root(tool_name, args)
        if release_root is None:
            return
        try:
            store = MetadataStore(release_root)
            if tool_name == "analyze_release":
                store.update_parse(result)
            elif tool_name == "probe_media":
                store.update_probe(result)
            elif tool_name == "find_media_imdb_id":
                store.update_tmdb(result)
        except Exception as e:
            logger.warning(
                f"Failed to update .alfred for {tool_name} at {release_root}: {e}"
            )
    @staticmethod
    def _resolve_release_root(
        tool_name: str,
        args: dict[str, Any],
    ) -> Path | None:
        """
        Figure out which release folder owns this call.
        - analyze_release / probe_media: derived from source_path
          (folder kept as-is, file walked up to its parent).
        - find_media_imdb_id: follow the current release focus in STM.
        """
        if tool_name in {"analyze_release", "probe_media"}:
            raw = args.get("source_path")
            if not isinstance(raw, str) or not raw:
                return None
            path = Path(raw)
            return path if path.is_dir() else path.parent
        # find_media_imdb_id has no path arg — rely on release focus.
        focus = get_memory().stm.release_focus.current_release_path
        if not focus:
            return None
        path = Path(focus)
        return path if path.is_dir() else path.parent
    async def step_streaming(
        self, user_input: str, completion_id: str, created_ts: int, model: str
-    ) -> AsyncGenerator[dict[str, Any], None]:
+    ) -> AsyncGenerator[dict[str, Any]]:
        """
        Execute agent step with streaming support for LibreChat.
@@ -0,0 +1,79 @@
 """Expression loader — charge et merge les fichiers YAML d'expressions par user."""
 import random
 from pathlib import Path
 import yaml
 _USERS_DIR = Path(__file__).parent.parent / "knowledge" / "users"
 def _load_yaml(path: Path) -> dict:
    if not path.exists():
        return {}
    return yaml.safe_load(path.read_text(encoding="utf-8")) or {}
 def load_expressions(username: str | None) -> dict:
    """
    Charge common.yaml et le merge avec {username}.yaml.
    Retourne un dict avec :
      - nickname: str (surnom de l'user, ou username en fallback)
      - expressions: dict[situation -> list[str]]
    """
    common = _load_yaml(_USERS_DIR / "common.yaml")
    user_data = _load_yaml(_USERS_DIR / f"{username}.yaml") if username else {}
    # Merge expressions : common + user (les phrases user s'ajoutent)
    common_exprs: dict[str, list] = common.get("expressions", {})
    user_exprs: dict[str, list] = user_data.get("expressions", {})
    merged: dict[str, list] = {}
    all_situations = set(common_exprs) | set(user_exprs)
    for situation in all_situations:
        base = list(common_exprs.get(situation, []))
        extra = list(user_exprs.get(situation, []))
        merged[situation] = base + extra
    nickname = user_data.get("user", {}).get("nickname") or username or "mec"
    return {
        "nickname": nickname,
        "expressions": merged,
    }
 def pick(expressions: dict, situation: str, nickname: str | None = None) -> str:
    """
    Pioche une expression aléatoire pour une situation donnée.
    Résout {user} avec le nickname si fourni.
    Retourne une string vide si la situation n'existe pas.
    """
    options = expressions.get("expressions", {}).get(situation, [])
    if not options:
        return ""
    chosen = random.choice(options)
    if nickname:
        chosen = chosen.replace("{user}", nickname)
    return chosen
 def build_expressions_context(username: str | None) -> dict:
    """
    Point d'entrée principal.
    Retourne :
      - nickname: str
      - samples: dict[situation -> une phrase résolue]  — une seule par situation
    """
    data = load_expressions(username)
    nickname = data["nickname"]
    samples = {
        situation: pick(data, situation, nickname) for situation in data["expressions"]
    }
    return {
        "nickname": nickname,
        "samples": samples,
    }
@@ -6,7 +6,8 @@ from typing import Any
 import requests
 from requests.exceptions import HTTPError, RequestException, Timeout
-from alfred.settings import Settings, settings
+from alfred.settings import Settings
 from alfred.settings import settings as default_settings
 from .exceptions import LLMAPIError, LLMConfigurationError
@@ -36,6 +37,7 @@ class DeepSeekClient:
        Raises:
            LLMConfigurationError: If API key is missing
        """
        self.settings = settings or default_settings
        self.api_key = api_key or self.settings.deepseek_api_key
        self.base_url = base_url or self.settings.deepseek_base_url
        self.model = model or self.settings.deepseek_model
@@ -96,7 +98,7 @@ class DeepSeekClient:
        payload = {
            "model": self.model,
            "messages": messages,
-            "temperature": settings.llm_temperature,
+            "temperature": self.settings.llm_temperature,
        }
        # Add tools if provided
@@ -7,6 +7,7 @@ import requests
 from requests.exceptions import HTTPError, RequestException, Timeout
 from alfred.settings import Settings
 from alfred.settings import settings as default_settings
 from .exceptions import LLMAPIError, LLMConfigurationError
@@ -46,11 +47,12 @@ class OllamaClient:
        Raises:
            LLMConfigurationError: If configuration is invalid
        """
-        self.base_url = base_url or settings.ollama_base_url
+        self.settings = settings or default_settings
-        self.model = model or settings.ollama_model
+        self.base_url = base_url or self.settings.ollama_base_url
-        self.timeout = timeout or settings.request_timeout
+        self.model = model or self.settings.ollama_model
        self.timeout = timeout or self.settings.request_timeout
        self.temperature = (
-            temperature if temperature is not None else settings.llm_temperature
+            temperature if temperature is not None else self.settings.llm_temperature
        )
        if not self.base_url:
@@ -1,101 +0,0 @@
 # agent/parameters.py
 from collections.abc import Callable
 from dataclasses import dataclass
 from typing import Any
@dataclass
 class ParameterSchema:
    """Describes a required parameter for the agent."""
    key: str
    description: str
    why_needed: str  # Explanation for the AI
    type: str  # "string", "number", "object", etc.
    validator: Callable[[Any], bool] | None = None
    default: Any = None
    required: bool = True
 # Define all required parameters
 REQUIRED_PARAMETERS = [
    ParameterSchema(
        key="config",
        description="Configuration object containing all folder paths",
        why_needed=(
            "This contains the paths to all important folders:\n"
            "- download_folder: Where downloaded files arrive before being organized\n"
            "- tvshow_folder: Where TV show files are organized and stored\n"
            "- movie_folder: Where movie files are organized and stored\n"
            "- torrent_folder: Where torrent structures are saved for the torrent client"
        ),
        type="object",
        validator=lambda x: isinstance(x, dict),
        required=True,
        default={},
    ),
    ParameterSchema(
        key="tv_shows",
        description="List of TV shows the user is following",
        why_needed=(
            "This tracks which TV shows you're following. "
            "Each show includes: IMDB ID, title, number of seasons, and status (ongoing or ended)."
        ),
        type="array",
        validator=lambda x: isinstance(x, list),
        required=False,
        default=[],
    ),
 ]
 def get_parameter_schema(key: str) -> ParameterSchema | None:
    """Get schema for a specific parameter."""
    for param in REQUIRED_PARAMETERS:
        if param.key == key:
            return param
    return None
 def get_missing_required_parameters(memory_data: dict) -> list[ParameterSchema]:
    """Get list of required parameters that are missing or None."""
    missing = []
    for param in REQUIRED_PARAMETERS:
        if param.required:
            value = memory_data.get(param.key)
            if value is None:
                missing.append(param)
    return missing
 def format_parameters_for_prompt() -> str:
    """Format parameter descriptions for the AI system prompt."""
    lines = ["REQUIRED PARAMETERS:"]
    for param in REQUIRED_PARAMETERS:
        status = "REQUIRED" if param.required else "OPTIONAL"
        lines.append(f"\n- {param.key} ({status}):")
        lines.append(f"  Description: {param.description}")
        lines.append(f"  Why needed: {param.why_needed}")
        lines.append(f"  Type: {param.type}")
    return "\n".join(lines)
 def validate_parameter(key: str, value: Any) -> tuple[bool, str | None]:
    """
    Validate a parameter value against its schema.
    Returns:
        (is_valid, error_message)
    """
    schema = get_parameter_schema(key)
    if not schema:
        return True, None  # Unknown parameters are allowed
    if schema.validator:
        try:
            if not schema.validator(value):
                return False, f"Validation failed for {key}"
        except Exception as e:
            return False, f"Validation error for {key}: {str(e)}"
    return True, None
@@ -0,0 +1,333 @@
 """Prompt builder for the agent system."""
 import json
 from typing import Any
 from alfred.infrastructure.persistence import get_memory
 from alfred.infrastructure.persistence.memory import MemoryRegistry
 from .expressions import build_expressions_context
 from .registry import Tool
 from .workflows import WorkflowLoader
 # Tools that are always available, regardless of workflow scope.
 # Kept small on purpose — the noyau is what the agent uses to either
 # answer trivially or pivot into a workflow.
 CORE_TOOLS: tuple[str, ...] = (
    "set_language",
    "set_path_for_folder",
    "list_folder",
    "read_release_metadata",
    "query_library",
    "start_workflow",
    "end_workflow",
 )
 class PromptBuilder:
    """Builds system prompts for the agent with memory context."""
    def __init__(
        self,
        tools: dict[str, Tool],
        workflow_loader: WorkflowLoader | None = None,
    ):
        self.tools = tools
        self.workflow_loader = workflow_loader or WorkflowLoader()
        self._memory_registry = MemoryRegistry()
    def _active_workflow(self, memory) -> dict | None:
        """Return the YAML definition of the active workflow, or None."""
        current = memory.stm.workflow.current
        if current is None:
            return None
        return self.workflow_loader.get(current.get("name"))
    def visible_tool_names(self) -> list[str]:
        """
        Return the names of the tools currently in scope.
        - Idle (no workflow): core noyau only. The LLM enters a workflow
          via start_workflow to access more tools.
        - Workflow active: core noyau + the workflow's declared tools.
        """
        memory = get_memory()
        visible = set(CORE_TOOLS)
        workflow = self._active_workflow(memory)
        if workflow is not None:
            for name in workflow.get("tools", []):
                visible.add(name)
        # Only return tools that actually exist in the registry.
        return [name for name in self.tools if name in visible]
    def _format_identity(self, memory) -> str:
        """Build Alfred's identity and personality section."""
        username = memory.stm.get_entity("username")
        expr = build_expressions_context(username)
        nickname = expr["nickname"]
        samples = expr["samples"]
        # Format expressions as situational guidance for the LLM
        expr_lines = []
        situation_labels = {
            "greeting": "Salutation",
            "success": "Succès",
            "working": "En cours",
            "error": "Erreur",
            "unclear": "Demande floue",
            "warning": "Avertissement",
            "not_found": "Introuvable",
        }
        for situation, label in situation_labels.items():
            phrase = samples.get(situation, "")
            if phrase:
                expr_lines.append(f'  {label}: "{phrase}"')
        expressions_block = "\n".join(expr_lines)
        return f"""Tu t'appelles Alfred. Tu es un assistant d'organisation de médiathèque — direct, opérationnel, légèrement impertinent.
 PERSONNALITÉ:
 - Tu parles franglais : français avec des touches d'anglais quand ça colle mieux
 - Tu es sarcastique et n'as pas peur de te moquer si l'user fait une connerie
 - Tu envoies chier poliment (mais clairement) quand la demande est trop floue
 - Tu ne fais pas de blabla inutile. Si "ok c'est fait" suffit, c'est tout ce que tu dis
 - Tu peux jurer (putain, merde, con, ...) — c'est naturel, pas du remplissage
 - Jamais de "Great question!" ou de politesse creuse
 USER COURANT: {nickname}
 EXPRESSIONS À UTILISER (une par situation, naturellement intégrées dans ta réponse) :
 {expressions_block}"""
    def build_tools_spec(self) -> list[dict[str, Any]]:
        """Build the tool specification for the LLM API (scope-filtered)."""
        visible = set(self.visible_tool_names())
        tool_specs = []
        for tool in self.tools.values():
            if tool.name not in visible:
                continue
            spec = {
                "type": "function",
                "function": {
                    "name": tool.name,
                    "description": tool.description,
                    "parameters": tool.parameters,
                },
            }
            tool_specs.append(spec)
        return tool_specs
    def _format_tools_description(self) -> str:
        """Format the currently-visible tools with description + params."""
        visible = set(self.visible_tool_names())
        visible_tools = [t for t in self.tools.values() if t.name in visible]
        if not visible_tools:
            return ""
        return "\n".join(
            f"- {tool.name}: {tool.description}\n"
            f"  Parameters: {json.dumps(tool.parameters, ensure_ascii=False)}"
            for tool in visible_tools
        )
    def _format_workflow_scope(self, memory) -> str:
        """Describe the current workflow scope so the LLM has a plan."""
        workflow = self._active_workflow(memory)
        if workflow is None:
            available = self.workflow_loader.names()
            if not available:
                return ""
            lines = ["WORKFLOW SCOPE: idle (broad catalog narrowed to core noyau)."]
            lines.append(
                "  Call start_workflow(workflow_name, params) to enter a scope."
            )
            lines.append("  Available workflows:")
            for name in available:
                wf = self.workflow_loader.get(name) or {}
                desc = (wf.get("description") or "").strip().splitlines()
                summary = desc[0] if desc else ""
                lines.append(f"    - {name}: {summary}")
            return "\n".join(lines)
        current = memory.stm.workflow.current or {}
        lines = [
            f"WORKFLOW SCOPE: active — {current.get('name')} "
            f"(stage: {current.get('stage')})",
        ]
        params = current.get("params")
        if params:
            lines.append(f"  Params: {params}")
        wf_desc = (workflow.get("description") or "").strip()
        if wf_desc:
            lines.append(f"  Goal: {wf_desc}")
        steps = workflow.get("steps", [])
        if steps:
            lines.append("  Steps:")
            for step in steps:
                step_id = step.get("id", "?")
                step_tool = step.get("tool") or (
                    "ask_user" if step.get("ask_user") else "—"
                )
                lines.append(f"    - {step_id} ({step_tool})")
        lines.append("  Call end_workflow(reason) when done, cancelled, or off-topic.")
        return "\n".join(lines)
    def _format_episodic_context(self, memory) -> str:
        """Format episodic memory context for the prompt."""
        lines = []
        if memory.episodic.last_search_results:
            results = memory.episodic.last_search_results
            result_list = results.get("results", [])
            lines.append(
                f"\nLAST SEARCH: '{results.get('query')}' ({len(result_list)} results)"
            )
            # Show first 5 results
            for i, result in enumerate(result_list[:5]):
                name = result.get("name", "Unknown")
                lines.append(f"  {i + 1}. {name}")
            if len(result_list) > 5:
                lines.append(f"  ... and {len(result_list) - 5} more")
        if memory.episodic.pending_question:
            question = memory.episodic.pending_question
            lines.append(f"\nPENDING QUESTION: {question.get('question')}")
            lines.append(f"  Type: {question.get('type')}")
            if question.get("options"):
                lines.append(f"  Options: {len(question.get('options'))}")
        if memory.episodic.active_downloads:
            lines.append(f"\nACTIVE DOWNLOADS: {len(memory.episodic.active_downloads)}")
            for dl in memory.episodic.active_downloads[:3]:
                lines.append(f"  - {dl.get('name')}: {dl.get('progress', 0)}%")
        if memory.episodic.recent_errors:
            lines.append("\nRECENT ERRORS (up to 3):")
            for error in memory.episodic.recent_errors[-3:]:
                lines.append(
                    f"  - Action '{error.get('action')}' failed: {error.get('error')}"
                )
        # Unread events
        unread = [e for e in memory.episodic.background_events if not e.get("read")]
        if unread:
            lines.append(f"\nUNREAD EVENTS: {len(unread)}")
            for event in unread[:3]:
                lines.append(f"  - {event.get('type')}: {event.get('data')}")
        return "\n".join(lines)
    def _format_stm_context(self, memory) -> str:
        """Format short-term memory context for the prompt."""
        lines = []
        if memory.stm.current_workflow:
            workflow = memory.stm.current_workflow
            lines.append(
                f"CURRENT WORKFLOW: {workflow.get('name')} (stage: {workflow.get('stage')})"
            )
            if workflow.get("params"):
                lines.append(f"  Params: {workflow.get('params')}")
        if memory.stm.current_topic:
            lines.append(f"CURRENT TOPIC: {memory.stm.current_topic}")
        if memory.stm.extracted_entities:
            lines.append("EXTRACTED ENTITIES:")
            for key, value in memory.stm.extracted_entities.items():
                lines.append(f"  - {key}: {value}")
        if memory.stm.language:
            lines.append(f"CONVERSATION LANGUAGE: {memory.stm.language}")
        return "\n".join(lines)
    def _format_memory_schema(self) -> str:
        """Describe available memory components so the agent knows what to read/write and when."""
        schema = self._memory_registry.schema()
        tier_labels = {
            "ltm": "LONG-TERM (persisted)",
            "stm": "SHORT-TERM (session)",
            "episodic": "EPISODIC (volatile)",
        }
        lines = ["MEMORY COMPONENTS:"]
        for tier, components in schema.items():
            if not components:
                continue
            lines.append(f"\n  [{tier_labels.get(tier, tier.upper())}]")
            for c in components:
                access = c.get("access", "read")
                lines.append(f"  {c['name']} ({access}): {c['description']}")
                for field_name, field_desc in c.get("fields", {}).items():
                    lines.append(f"    · {field_name}: {field_desc}")
        return "\n".join(lines)
    def _format_config_context(self, memory) -> str:
        """Format configuration context."""
        lines = ["CURRENT CONFIGURATION:"]
        folders = {
            **memory.ltm.workspace.as_dict(),
            **memory.ltm.library_paths.to_dict(),
        }
        if folders:
            for key, value in folders.items():
                lines.append(f"  - {key}: {value}")
        else:
            lines.append("  (no configuration set)")
        return "\n".join(lines)
    def build_system_prompt(self) -> str:
        """Build the complete system prompt."""
        memory = get_memory()
        # Identity + personality
        identity = self._format_identity(memory)
        # Language instruction
        language_instruction = (
            "Si la langue de l'user est différente de la langue courante en STM, "
            "appelle `set_language` en premier avant de répondre."
        )
        # Configuration
        config_section = self._format_config_context(memory)
        # STM context
        stm_context = self._format_stm_context(memory)
        # Episodic context
        episodic_context = self._format_episodic_context(memory)
        # Memory schema
        memory_schema = self._format_memory_schema()
        # Workflow scope (active workflow plan or list of options)
        workflow_section = self._format_workflow_scope(memory)
        # Available tools (already filtered by scope)
        tools_desc = self._format_tools_description()
        tools_section = f"\nOUTILS DISPONIBLES:\n{tools_desc}" if tools_desc else ""
        rules = """
 RÈGLES:
 - Utilise les outils pour accomplir les tâches, pas pour décorer
 - Si des résultats de recherche sont dispo en mémoire épisodique, référence-les par index
 - Confirme toujours avant une opération destructive (move, delete, overwrite)
 - Réponses courtes — si c'est fait, dis-le en une ligne
 - Si la demande est floue, demande un éclaircissement AVANT de lancer quoi que ce soit
 """
        sections = [
            identity,
            language_instruction,
            config_section,
            stm_context,
            episodic_context,
            memory_schema,
            workflow_section,
            tools_section,
            rules,
        ]
        return "\n\n".join(s for s in sections if s and s.strip())
@@ -1,180 +0,0 @@
 """Prompt builder for the agent system."""
 import json
 from typing import Any
 from alfred.infrastructure.persistence import get_memory
 from .registry import Tool
 class PromptBuilder:
    """Builds system prompts for the agent with memory context."""
    def __init__(self, tools: dict[str, Tool]):
        self.tools = tools
    def build_tools_spec(self) -> list[dict[str, Any]]:
        """Build the tool specification for the LLM API."""
        tool_specs = []
        for tool in self.tools.values():
            spec = {
                "type": "function",
                "function": {
                    "name": tool.name,
                    "description": tool.description,
                    "parameters": tool.parameters,
                },
            }
            tool_specs.append(spec)
        return tool_specs
    def _format_tools_description(self) -> str:
        """Format tools with their descriptions and parameters."""
        if not self.tools:
            return ""
        return "\n".join(
            f"- {tool.name}: {tool.description}\n"
            f"  Parameters: {json.dumps(tool.parameters, ensure_ascii=False)}"
            for tool in self.tools.values()
        )
    def _format_episodic_context(self, memory) -> str:
        """Format episodic memory context for the prompt."""
        lines = []
        if memory.episodic.last_search_results:
            results = memory.episodic.last_search_results
            result_list = results.get("results", [])
            lines.append(
                f"\nLAST SEARCH: '{results.get('query')}' ({len(result_list)} results)"
            )
            # Show first 5 results
            for i, result in enumerate(result_list[:5]):
                name = result.get("name", "Unknown")
                lines.append(f"  {i + 1}. {name}")
            if len(result_list) > 5:
                lines.append(f"  ... and {len(result_list) - 5} more")
        if memory.episodic.pending_question:
            question = memory.episodic.pending_question
            lines.append(f"\nPENDING QUESTION: {question.get('question')}")
            lines.append(f"  Type: {question.get('type')}")
            if question.get("options"):
                lines.append(f"  Options: {len(question.get('options'))}")
        if memory.episodic.active_downloads:
            lines.append(f"\nACTIVE DOWNLOADS: {len(memory.episodic.active_downloads)}")
            for dl in memory.episodic.active_downloads[:3]:
                lines.append(f"  - {dl.get('name')}: {dl.get('progress', 0)}%")
        if memory.episodic.recent_errors:
            lines.append("\nRECENT ERRORS (up to 3):")
            for error in memory.episodic.recent_errors[-3:]:
                lines.append(
                    f"  - Action '{error.get('action')}' failed: {error.get('error')}"
                )
        # Unread events
        unread = [e for e in memory.episodic.background_events if not e.get("read")]
        if unread:
            lines.append(f"\nUNREAD EVENTS: {len(unread)}")
            for event in unread[:3]:
                lines.append(f"  - {event.get('type')}: {event.get('data')}")
        return "\n".join(lines)
    def _format_stm_context(self, memory) -> str:
        """Format short-term memory context for the prompt."""
        lines = []
        if memory.stm.current_workflow:
            workflow = memory.stm.current_workflow
            lines.append(
                f"CURRENT WORKFLOW: {workflow.get('type')} (stage: {workflow.get('stage')})"
            )
            if workflow.get("target"):
                lines.append(f"  Target: {workflow.get('target')}")
        if memory.stm.current_topic:
            lines.append(f"CURRENT TOPIC: {memory.stm.current_topic}")
        if memory.stm.extracted_entities:
            lines.append("EXTRACTED ENTITIES:")
            for key, value in memory.stm.extracted_entities.items():
                lines.append(f"  - {key}: {value}")
        if memory.stm.language:
            lines.append(f"CONVERSATION LANGUAGE: {memory.stm.language}")
        return "\n".join(lines)
    def _format_config_context(self, memory) -> str:
        """Format configuration context."""
        lines = ["CURRENT CONFIGURATION:"]
        if memory.ltm.config:
            for key, value in memory.ltm.config.items():
                lines.append(f"  - {key}: {value}")
        else:
            lines.append("  (no configuration set)")
        return "\n".join(lines)
    def build_system_prompt(self) -> str:
        """Build the complete system prompt."""
        # Get memory once for all context formatting
        memory = get_memory()
        # Base instruction
        base = "You are a helpful AI assistant for managing a media library."
        # Language instruction
        language_instruction = (
            "Your first task is to determine the user's language from their message "
            "and use the `set_language` tool if it's different from the current one. "
            "After that, proceed to help the user."
        )
        # Available tools
        tools_desc = self._format_tools_description()
        tools_section = f"\nAVAILABLE TOOLS:\n{tools_desc}" if tools_desc else ""
        # Configuration
        config_section = self._format_config_context(memory)
        if config_section:
            config_section = f"\n{config_section}"
        # STM context
        stm_context = self._format_stm_context(memory)
        if stm_context:
            stm_context = f"\n{stm_context}"
        # Episodic context
        episodic_context = self._format_episodic_context(memory)
        # Important rules
        rules = """
 IMPORTANT RULES:
 - Use tools to accomplish tasks
 - When search results are available, reference them by index (e.g., "add_torrent_by_index")
 - Always confirm actions with the user before executing destructive operations
 - Provide clear, concise responses
 """
        # Examples
        examples = """
 EXAMPLES:
 - User: "Find Inception" → Use find_media_imdb_id, then find_torrent
 - User: "download the 3rd one" → Use add_torrent_by_index with index=3
 - User: "List my downloads" → Use list_folder with folder_type="download"
 """
        return f"""{base}
 {language_instruction}
 {tools_section}
 {config_section}
 {stm_context}
 {episodic_context}
 {rules}
 {examples}
 """
@@ -1,4 +1,4 @@
-"""Tool registry - defines and registers all available tools for the agent."""
+"""Tool registry — defines and registers all available tools for the agent."""
 import inspect
 import logging
@@ -6,6 +6,9 @@ from collections.abc import Callable
 from dataclasses import dataclass
 from typing import Any
 from .tools.spec import ToolSpec, ToolSpecError
 from .tools.spec_loader import load_tool_specs
 logger = logging.getLogger(__name__)
@@ -17,51 +20,63 @@ class Tool:
    description: str
    func: Callable[..., dict[str, Any]]
    parameters: dict[str, Any]
    cache_key: str | None = None  # Parameter name to use as STM cache key.
-def _create_tool_from_function(func: Callable) -> Tool:
+_PY_TYPE_TO_JSON = {
-    """
+    str: "string",
-    Create a Tool object from a function.
+    int: "integer",
-
+    float: "number",
-    Args:
+    bool: "boolean",
-        func: Function to convert to a tool
+    list: "array",
-
+    dict: "object",
    Returns:
        Tool object with metadata extracted from function
    """
    sig = inspect.signature(func)
    doc = inspect.getdoc(func)
    # Extract description from docstring (first line)
    description = doc.strip().split("\n")[0] if doc else func.__name__
    # Build JSON schema from function signature
    properties = {}
    required = []
    for param_name, param in sig.parameters.items():
        if param_name == "self":
            continue
        # Map Python types to JSON schema types
        param_type = "string"  # default
        if param.annotation != inspect.Parameter.empty:
            if param.annotation is str:
                param_type = "string"
            elif param.annotation is int:
                param_type = "integer"
            elif param.annotation is float:
                param_type = "number"
            elif param.annotation is bool:
                param_type = "boolean"
        properties[param_name] = {
            "type": param_type,
            "description": f"Parameter {param_name}",
 }
-        # Add to required if no default value
+
-        if param.default == inspect.Parameter.empty:
+def _json_type_for(annotation) -> str:
    """Map a Python type annotation to a JSON Schema 'type' string."""
    if annotation is inspect.Parameter.empty:
        return "string"
    # Strip Optional[X] / X | None to X.
    args = getattr(annotation, "__args__", None)
    if args:
        non_none = [a for a in args if a is not type(None)]
        if len(non_none) == 1:
            annotation = non_none[0]
    return _PY_TYPE_TO_JSON.get(annotation, "string")
 def _create_tool_from_function(func: Callable, spec: ToolSpec | None = None) -> Tool:
    """
    Create a Tool object from a function, optionally enriched with a spec.
    Types and required-ness always come from the Python signature (source of
    truth for the API contract). When a spec is provided, the description
    and per-parameter docs come from the YAML spec instead of the docstring.
    """
    sig = inspect.signature(func)
    sig_params = {name: p for name, p in sig.parameters.items() if name != "self"}
    if spec is not None:
        _validate_spec_matches_signature(func.__name__, sig_params, spec)
        description = spec.compile_description()
        param_descriptions = {
            name: spec.compile_parameter_description(name) for name in sig_params
        }
    else:
        doc = inspect.getdoc(func)
        description = doc.strip().split("\n")[0] if doc else func.__name__
        param_descriptions = {name: f"Parameter {name}" for name in sig_params}
    properties: dict[str, dict[str, Any]] = {}
    required: list[str] = []
    for param_name, param in sig_params.items():
        properties[param_name] = {
            "type": _json_type_for(param.annotation),
            "description": param_descriptions[param_name],
        }
        if param.default is inspect.Parameter.empty:
            required.append(param_name)
    parameters = {
@@ -70,11 +85,38 @@ def _create_tool_from_function(func: Callable) -> Tool:
        "required": required,
    }
    cache_key = spec.cache.key if spec is not None and spec.cache is not None else None
    return Tool(
        name=func.__name__,
        description=description,
        func=func,
        parameters=parameters,
        cache_key=cache_key,
    )
 def _validate_spec_matches_signature(
    func_name: str,
    sig_params: dict[str, inspect.Parameter],
    spec: ToolSpec,
 ) -> None:
    """Ensure every signature param has a spec entry and vice versa."""
    sig_names = set(sig_params.keys())
    spec_names = set(spec.parameters.keys())
    missing_in_spec = sig_names - spec_names
    if missing_in_spec:
        raise ToolSpecError(
            f"tool '{func_name}': spec is missing entries for parameter(s) "
            f"{sorted(missing_in_spec)}"
        )
    extra_in_spec = spec_names - sig_names
    if extra_in_spec:
        raise ToolSpecError(
            f"tool '{func_name}': spec has entries for unknown parameter(s) "
            f"{sorted(extra_in_spec)} (not in function signature)"
        )
@@ -83,33 +125,54 @@ def make_tools(settings) -> dict[str, Tool]:
    Create and register all available tools.
    Args:
-        settings: Application settings instance
+        settings: Application settings instance.
    Returns:
-        Dictionary mapping tool names to Tool objects
+        Dictionary mapping tool names to Tool objects.
    """
    # Import tools here to avoid circular dependencies
    from .tools import api as api_tools  # noqa: PLC0415
    from .tools import filesystem as fs_tools  # noqa: PLC0415
    from .tools import language as lang_tools  # noqa: PLC0415
    from .tools import workflow as wf_tools  # noqa: PLC0415
    # List of all tool functions
    tool_functions = [
        fs_tools.set_path_for_folder,
        fs_tools.list_folder,
        fs_tools.read_release_metadata,
        fs_tools.query_library,
        fs_tools.analyze_release,
        fs_tools.probe_media,
        fs_tools.resolve_season_destination,
        fs_tools.resolve_episode_destination,
        fs_tools.resolve_movie_destination,
        fs_tools.resolve_series_destination,
        fs_tools.move_media,
        fs_tools.move_to_destination,
        fs_tools.manage_subtitles,
        fs_tools.create_seed_links,
        fs_tools.learn,
        api_tools.find_media_imdb_id,
        api_tools.find_torrent,
        api_tools.add_torrent_by_index,
        api_tools.add_torrent_to_qbittorrent,
        api_tools.get_torrent_by_index,
        lang_tools.set_language,
        wf_tools.start_workflow,
        wf_tools.end_workflow,
    ]
-    # Create Tool objects from functions
+    specs = load_tool_specs()
-    tools = {}
+
    tools: dict[str, Tool] = {}
    for func in tool_functions:
-        tool = _create_tool_from_function(func)
+        spec = specs.get(func.__name__)
        tool = _create_tool_from_function(func, spec=spec)
        tools[tool.name] = tool
-    logger.info(f"Registered {len(tools)} tools: {list(tools.keys())}")
+    with_spec = sum(1 for fn in tool_functions if fn.__name__ in specs)
    logger.info(
        f"Registered {len(tools)} tools "
        f"({with_spec} with YAML spec, {len(tools) - with_spec} doc-only): "
        f"{list(tools.keys())}"
    )
    return tools
@@ -14,15 +14,7 @@ logger = logging.getLogger(__name__)
 def find_media_imdb_id(media_title: str) -> dict[str, Any]:
-    """
+    """Thin tool wrapper — semantics live in alfred/agent/tools/specs/find_media_imdb_id.yaml."""
    Find the IMDb ID for a given media title using TMDB API.
    Args:
        media_title: Title of the media to search for.
    Returns:
        Dict with IMDb ID and media info, or error details.
    """
    use_case = SearchMovieUseCase(tmdb_client)
    response = use_case.execute(media_title)
    result = response.to_dict()
@@ -45,18 +37,7 @@ def find_media_imdb_id(media_title: str) -> dict[str, Any]:
 def find_torrent(media_title: str) -> dict[str, Any]:
-    """
+    """Thin tool wrapper — semantics live in alfred/agent/tools/specs/find_torrent.yaml."""
    Find torrents for a given media title using Knaben API.
    Results are stored in episodic memory so the user can reference them
    by index (e.g., "download the 3rd one").
    Args:
        media_title: Title of the media to search for.
    Returns:
        Dict with torrent list or error details.
    """
    logger.info(f"Searching torrents for: {media_title}")
    use_case = SearchTorrentsUseCase(knaben_client)
@@ -76,17 +57,7 @@ def find_torrent(media_title: str) -> dict[str, Any]:
 def get_torrent_by_index(index: int) -> dict[str, Any]:
-    """
+    """Thin tool wrapper — semantics live in alfred/agent/tools/specs/get_torrent_by_index.yaml."""
    Get a torrent from the last search results by its index.
    Allows the user to reference results by number after a search.
    Args:
        index: 1-based index of the torrent in the search results.
    Returns:
        Dict with torrent data or error if not found.
    """
    logger.info(f"Getting torrent at index: {index}")
    memory = get_memory()
@@ -113,15 +84,7 @@ def get_torrent_by_index(index: int) -> dict[str, Any]:
 def add_torrent_to_qbittorrent(magnet_link: str) -> dict[str, Any]:
-    """
+    """Thin tool wrapper — semantics live in alfred/agent/tools/specs/add_torrent_to_qbittorrent.yaml."""
    Add a torrent to qBittorrent using a magnet link.
    Args:
        magnet_link: Magnet link of the torrent to add.
    Returns:
        Dict with success status or error details.
    """
    logger.info("Adding torrent to qBittorrent")
    use_case = AddTorrentUseCase(qbittorrent_client)
@@ -157,17 +120,7 @@ def add_torrent_to_qbittorrent(magnet_link: str) -> dict[str, Any]:
 def add_torrent_by_index(index: int) -> dict[str, Any]:
-    """
+    """Thin tool wrapper — semantics live in alfred/agent/tools/specs/add_torrent_by_index.yaml."""
    Add a torrent from the last search results by its index.
    Combines get_torrent_by_index and add_torrent_to_qbittorrent.
    Args:
        index: 1-based index of the torrent in the search results.
    Returns:
        Dict with success status or error details.
    """
    logger.info(f"Adding torrent by index: {index}")
    torrent_result = get_torrent_by_index(index)
@@ -1,40 +1,365 @@
 """Filesystem tools for folder management."""
 from pathlib import Path
 from typing import Any
-from alfred.application.filesystem import ListFolderUseCase, SetFolderPathUseCase
+import yaml
-from alfred.infrastructure.filesystem import FileManager
+
 import alfred as _alfred_pkg
 from alfred.application.filesystem import (
    CreateSeedLinksUseCase,
    ListFolderUseCase,
    ManageSubtitlesUseCase,
    MoveMediaUseCase,
    SetFolderPathUseCase,
 )
 from alfred.application.filesystem.detect_media_type import detect_media_type
 from alfred.application.filesystem.enrich_from_probe import enrich_from_probe
 from alfred.application.filesystem.resolve_destination import (
    resolve_episode_destination as _resolve_episode_destination,
 )
 from alfred.application.filesystem.resolve_destination import (
    resolve_movie_destination as _resolve_movie_destination,
 )
 from alfred.application.filesystem.resolve_destination import (
    resolve_season_destination as _resolve_season_destination,
 )
 from alfred.application.filesystem.resolve_destination import (
    resolve_series_destination as _resolve_series_destination,
 )
 from alfred.infrastructure.filesystem import FileManager, create_folder, move
 from alfred.infrastructure.filesystem.ffprobe import probe
 from alfred.infrastructure.filesystem.find_video import find_video_file
 from alfred.infrastructure.metadata import MetadataStore
 from alfred.infrastructure.persistence import get_memory
 _LEARNED_ROOT = Path(_alfred_pkg.__file__).parent.parent / "data" / "knowledge"
 def move_media(source: str, destination: str) -> dict[str, Any]:
    """Thin tool wrapper — semantics live in alfred/agent/tools/specs/move_media.yaml."""
    file_manager = FileManager()
    use_case = MoveMediaUseCase(file_manager)
    return use_case.execute(source, destination).to_dict()
 def move_to_destination(source: str, destination: str) -> dict[str, Any]:
    """Thin tool wrapper — semantics live in alfred/agent/tools/specs/move_to_destination.yaml."""
    parent = str(Path(destination).parent)
    result = create_folder(parent)
    if result["status"] != "ok":
        return result
    return move(source, destination)
 def resolve_season_destination(
    release_name: str,
    tmdb_title: str,
    tmdb_year: int,
    confirmed_folder: str | None = None,
 ) -> dict[str, Any]:
    """Thin tool wrapper — semantics live in alfred/agent/tools/specs/resolve_season_destination.yaml."""
    return _resolve_season_destination(
        release_name, tmdb_title, tmdb_year, confirmed_folder
    ).to_dict()
 def resolve_episode_destination(
    release_name: str,
    source_file: str,
    tmdb_title: str,
    tmdb_year: int,
    tmdb_episode_title: str | None = None,
    confirmed_folder: str | None = None,
 ) -> dict[str, Any]:
    """Thin tool wrapper — semantics live in alfred/agent/tools/specs/resolve_episode_destination.yaml."""
    return _resolve_episode_destination(
        release_name,
        source_file,
        tmdb_title,
        tmdb_year,
        tmdb_episode_title,
        confirmed_folder,
    ).to_dict()
 def resolve_movie_destination(
    release_name: str,
    source_file: str,
    tmdb_title: str,
    tmdb_year: int,
 ) -> dict[str, Any]:
    """Thin tool wrapper — semantics live in alfred/agent/tools/specs/resolve_movie_destination.yaml."""
    return _resolve_movie_destination(
        release_name, source_file, tmdb_title, tmdb_year
    ).to_dict()
 def resolve_series_destination(
    release_name: str,
    tmdb_title: str,
    tmdb_year: int,
    confirmed_folder: str | None = None,
 ) -> dict[str, Any]:
    """Thin tool wrapper — semantics live in alfred/agent/tools/specs/resolve_series_destination.yaml."""
    return _resolve_series_destination(
        release_name, tmdb_title, tmdb_year, confirmed_folder
    ).to_dict()
 def create_seed_links(
    library_file: str, original_download_folder: str
 ) -> dict[str, Any]:
    """Thin tool wrapper — semantics live in alfred/agent/tools/specs/create_seed_links.yaml."""
    file_manager = FileManager()
    use_case = CreateSeedLinksUseCase(file_manager)
    return use_case.execute(library_file, original_download_folder).to_dict()
 def manage_subtitles(source_video: str, destination_video: str) -> dict[str, Any]:
    """Thin tool wrapper — semantics live in alfred/agent/tools/specs/manage_subtitles.yaml."""
    file_manager = FileManager()
    use_case = ManageSubtitlesUseCase(file_manager)
    return use_case.execute(source_video, destination_video).to_dict()
 def learn(pack: str, category: str, key: str, values: list[str]) -> dict[str, Any]:
    """Thin tool wrapper — semantics live in alfred/agent/tools/specs/learn.yaml."""
    _VALID_PACKS = {"subtitles"}
    _VALID_CATEGORIES = {"languages", "types", "formats"}
    if pack not in _VALID_PACKS:
        return {
            "status": "error",
            "error": "unknown_pack",
            "message": f"Unknown pack '{pack}'. Valid: {sorted(_VALID_PACKS)}",
        }
    if category not in _VALID_CATEGORIES:
        return {
            "status": "error",
            "error": "unknown_category",
            "message": f"Unknown category '{category}'. Valid: {sorted(_VALID_CATEGORIES)}",
        }
    learned_path = _LEARNED_ROOT / "subtitles_learned.yaml"
    _LEARNED_ROOT.mkdir(parents=True, exist_ok=True)
    data: dict = {}
    if learned_path.exists():
        try:
            with open(learned_path, encoding="utf-8") as f:
                data = yaml.safe_load(f) or {}
        except Exception as e:
            return {"status": "error", "error": "read_failed", "message": str(e)}
    cat_data = data.setdefault(category, {})
    entry = cat_data.setdefault(key, {"tokens": []})
    existing = entry.get("tokens", [])
    new_tokens = [v for v in values if v not in existing]
    entry["tokens"] = existing + new_tokens
    tmp = learned_path.with_suffix(".yaml.tmp")
    try:
        with open(tmp, "w", encoding="utf-8") as f:
            yaml.safe_dump(
                data, f, allow_unicode=True, default_flow_style=False, sort_keys=False
            )
        tmp.rename(learned_path)
    except Exception as e:
        tmp.unlink(missing_ok=True)
        return {"status": "error", "error": "write_failed", "message": str(e)}
    return {
        "status": "ok",
        "pack": pack,
        "category": category,
        "key": key,
        "added_count": len(new_tokens),
        "tokens": entry["tokens"],
    }
 def set_path_for_folder(folder_name: str, path_value: str) -> dict[str, Any]:
-    """
+    """Thin tool wrapper — semantics live in alfred/agent/tools/specs/set_path_for_folder.yaml."""
    Set a folder path in the configuration.
    Args:
        folder_name: Name of folder to set (download, tvshow, movie, torrent).
        path_value: Absolute path to the folder.
    Returns:
        Dict with status or error information.
    """
    file_manager = FileManager()
    use_case = SetFolderPathUseCase(file_manager)
    response = use_case.execute(folder_name, path_value)
    return response.to_dict()
 def analyze_release(release_name: str, source_path: str) -> dict[str, Any]:
    """Thin tool wrapper — semantics live in alfred/agent/tools/specs/analyze_release.yaml."""
    from alfred.domain.release.services import parse_release  # noqa: PLC0415
    path = Path(source_path)
    parsed = parse_release(release_name)
    parsed.media_type = detect_media_type(parsed, path)
    probe_used = False
    if parsed.media_type not in ("unknown", "other"):
        video_file = find_video_file(path)
        if video_file:
            media_info = probe(video_file)
            if media_info:
                enrich_from_probe(parsed, media_info)
                probe_used = True
    return {
        "status": "ok",
        "media_type": parsed.media_type,
        "parse_path": parsed.parse_path,
        "title": parsed.title,
        "year": parsed.year,
        "season": parsed.season,
        "episode": parsed.episode,
        "episode_end": parsed.episode_end,
        "quality": parsed.quality,
        "source": parsed.source,
        "codec": parsed.codec,
        "group": parsed.group,
        "languages": parsed.languages,
        "audio_codec": parsed.audio_codec,
        "audio_channels": parsed.audio_channels,
        "bit_depth": parsed.bit_depth,
        "hdr_format": parsed.hdr_format,
        "edition": parsed.edition,
        "site_tag": parsed.site_tag,
        "is_season_pack": parsed.is_season_pack,
        "probe_used": probe_used,
    }
 def probe_media(source_path: str) -> dict[str, Any]:
    """Thin tool wrapper — semantics live in alfred/agent/tools/specs/probe_media.yaml."""
    path = Path(source_path)
    if not path.exists():
        return {
            "status": "error",
            "error": "not_found",
            "message": f"{source_path} does not exist",
        }
    media_info = probe(path)
    if media_info is None:
        return {
            "status": "error",
            "error": "probe_failed",
            "message": "ffprobe failed to read the file",
        }
    return {
        "status": "ok",
        "video": {
            "codec": media_info.video_codec,
            "resolution": media_info.resolution,
            "width": media_info.width,
            "height": media_info.height,
            "duration_seconds": media_info.duration_seconds,
            "bitrate_kbps": media_info.bitrate_kbps,
        },
        "audio_tracks": [
            {
                "index": t.index,
                "codec": t.codec,
                "channels": t.channels,
                "channel_layout": t.channel_layout,
                "language": t.language,
                "is_default": t.is_default,
            }
            for t in media_info.audio_tracks
        ],
        "subtitle_tracks": [
            {
                "index": t.index,
                "codec": t.codec,
                "language": t.language,
                "is_default": t.is_default,
                "is_forced": t.is_forced,
            }
            for t in media_info.subtitle_tracks
        ],
        "audio_languages": media_info.audio_languages,
        "is_multi_audio": media_info.is_multi_audio,
    }
 def list_folder(folder_type: str, path: str = ".") -> dict[str, Any]:
-    """
+    """Thin tool wrapper — semantics live in alfred/agent/tools/specs/list_folder.yaml."""
    List contents of a configured folder.
    Args:
        folder_type: Type of folder to list (download, tvshow, movie, torrent).
        path: Relative path within the folder (default: root).
    Returns:
        Dict with folder contents or error information.
    """
    file_manager = FileManager()
    use_case = ListFolderUseCase(file_manager)
    response = use_case.execute(folder_type, path)
    return response.to_dict()
 def read_release_metadata(release_path: str) -> dict[str, Any]:
    """Thin tool wrapper — semantics live in alfred/agent/tools/specs/read_release_metadata.yaml."""
    path = Path(release_path)
    if not path.exists():
        return {
            "status": "error",
            "error": "not_found",
            "message": f"{release_path} does not exist",
        }
    root = path if path.is_dir() else path.parent
    store = MetadataStore(root)
    if not store.exists():
        return {
            "status": "ok",
            "release_path": str(root),
            "has_metadata": False,
            "metadata": {},
        }
    return {
        "status": "ok",
        "release_path": str(root),
        "has_metadata": True,
        "metadata": store.load(),
    }
 def query_library(name: str) -> dict[str, Any]:
    """Thin tool wrapper — semantics live in alfred/agent/tools/specs/query_library.yaml."""
    needle = name.strip().lower()
    if not needle:
        return {
            "status": "error",
            "error": "empty_name",
            "message": "name must be a non-empty string",
        }
    memory = get_memory()
    roots = memory.ltm.library_paths.to_dict() or {}
    if not roots:
        return {
            "status": "error",
            "error": "no_libraries",
            "message": "No library paths configured — call set_path_for_folder first.",
        }
    matches: list[dict[str, Any]] = []
    for collection, root in roots.items():
        root_path = Path(root)
        if not root_path.is_dir():
            continue
        for entry in root_path.iterdir():
            if not entry.is_dir():
                continue
            if needle not in entry.name.lower():
                continue
            store = MetadataStore(entry)
            matches.append(
                {
                    "collection": collection,
                    "name": entry.name,
                    "path": str(entry),
                    "has_metadata": store.exists(),
                }
            )
    return {
        "status": "ok",
        "query": name,
        "match_count": len(matches),
        "matches": matches,
    }
@@ -9,15 +9,7 @@ logger = logging.getLogger(__name__)
 def set_language(language: str) -> dict[str, Any]:
-    """
+    """Thin tool wrapper — semantics live in alfred/agent/tools/specs/set_language.yaml."""
    Set the conversation language.
    Args:
        language: Language code (e.g., 'en', 'fr', 'es', 'de')
    Returns:
        Status dictionary
    """
    try:
        memory = get_memory()
        memory.stm.set_language(language)
@@ -0,0 +1,221 @@
 """
 ToolSpec — semantic description of a tool, loaded from YAML.
 Each tool exposed to the agent has a matching YAML spec under
 alfred/agent/tools/specs/{tool_name}.yaml. The spec carries everything the
 LLM needs to decide *when* and *why* to call the tool — separated from the
 Python signature, which remains the source of truth for *how* (types,
 required-ness).
 The YAML structure is documented in the dataclasses below. Loading a spec
 validates its shape; missing or unexpected fields raise ToolSpecError.
 """
 from __future__ import annotations
 from dataclasses import dataclass, field
 from pathlib import Path
 import yaml
 class ToolSpecError(ValueError):
    """Raised when a YAML tool spec is malformed or inconsistent."""
@dataclass(frozen=True)
 class ParameterSpec:
    """Semantic description of a single tool parameter."""
    description: str  # Short: what the value represents.
    why_needed: str  # Why the tool needs this — drives LLM reasoning.
    example: str | None = None  # Concrete example value, shown to the LLM.
    @classmethod
    def from_dict(cls, name: str, data: dict) -> ParameterSpec:
        _require(data, "description", f"parameter '{name}'")
        _require(data, "why_needed", f"parameter '{name}'")
        return cls(
            description=str(data["description"]).strip(),
            why_needed=str(data["why_needed"]).strip(),
            example=str(data["example"]).strip()
            if data.get("example") is not None
            else None,
        )
@dataclass(frozen=True)
 class ReturnsSpec:
    """Description of one possible return shape (ok / needs_clarification / error / ...)."""
    description: str
    fields: dict[str, str] = field(default_factory=dict)
    @classmethod
    def from_dict(cls, key: str, data: dict) -> ReturnsSpec:
        _require(data, "description", f"returns.{key}")
        fields = data.get("fields") or {}
        if not isinstance(fields, dict):
            raise ToolSpecError(
                f"returns.{key}.fields must be a dict, got {type(fields).__name__}"
            )
        return cls(
            description=str(data["description"]).strip(),
            fields={str(k): str(v).strip() for k, v in fields.items()},
        )
@dataclass(frozen=True)
 class CacheSpec:
    """Marks a tool as cacheable in STM.tool_results, keyed by one of its parameters."""
    key: str  # Name of the parameter whose value is the cache key.
    @classmethod
    def from_dict(cls, data: dict) -> CacheSpec:
        _require(data, "key", "cache")
        return cls(key=str(data["key"]).strip())
@dataclass(frozen=True)
 class ToolSpec:
    """Full semantic spec for one tool."""
    name: str
    summary: str  # One-liner — becomes Tool.description.
    description: str  # Longer paragraph.
    when_to_use: str
    when_not_to_use: str | None
    next_steps: str | None
    parameters: dict[str, ParameterSpec]  # name -> ParameterSpec
    returns: dict[str, ReturnsSpec]  # status_key -> ReturnsSpec
    cache: CacheSpec | None = None  # If present, tool is cached.
    @classmethod
    def from_yaml_path(cls, path: Path) -> ToolSpec:
        with open(path, encoding="utf-8") as f:
            data = yaml.safe_load(f) or {}
        if not isinstance(data, dict):
            raise ToolSpecError(f"{path}: top-level must be a mapping")
        try:
            return cls.from_dict(data)
        except ToolSpecError as e:
            raise ToolSpecError(f"{path}: {e}") from e
    @classmethod
    def from_dict(cls, data: dict) -> ToolSpec:
        _require(data, "name", "spec")
        _require(data, "summary", "spec")
        _require(data, "description", "spec")
        _require(data, "when_to_use", "spec")
        params_raw = data.get("parameters") or {}
        if not isinstance(params_raw, dict):
            raise ToolSpecError("parameters must be a mapping")
        parameters = {
            pname: ParameterSpec.from_dict(pname, pdata or {})
            for pname, pdata in params_raw.items()
        }
        returns_raw = data.get("returns") or {}
        if not isinstance(returns_raw, dict):
            raise ToolSpecError("returns must be a mapping")
        returns = {
            rkey: ReturnsSpec.from_dict(rkey, rdata or {})
            for rkey, rdata in returns_raw.items()
        }
        cache_raw = data.get("cache")
        if cache_raw is not None and not isinstance(cache_raw, dict):
            raise ToolSpecError("cache must be a mapping")
        cache = CacheSpec.from_dict(cache_raw) if cache_raw else None
        spec = cls(
            name=str(data["name"]).strip(),
            summary=str(data["summary"]).strip(),
            description=str(data["description"]).strip(),
            when_to_use=str(data["when_to_use"]).strip(),
            when_not_to_use=_strip_or_none(data.get("when_not_to_use")),
            next_steps=_strip_or_none(data.get("next_steps")),
            parameters=parameters,
            returns=returns,
            cache=cache,
        )
        if cache is not None and cache.key not in parameters:
            raise ToolSpecError(
                f"cache.key '{cache.key}' is not a declared parameter "
                f"(declared: {sorted(parameters)})"
            )
        return spec
    def compile_description(self) -> str:
        """
        Build the long description text passed to the LLM as Tool.description.
        Layout:
            <summary>
            <description>
            When to use:
              <when_to_use>
            When NOT to use:    (if present)
              <when_not_to_use>
            Next steps:         (if present)
              <next_steps>
            Returns:
              <status>: <description>
                · <field>: <desc>
        """
        parts = [self.summary, "", self.description]
        parts += ["", "When to use:", _indent(self.when_to_use)]
        if self.when_not_to_use:
            parts += ["", "When NOT to use:", _indent(self.when_not_to_use)]
        if self.next_steps:
            parts += ["", "Next steps:", _indent(self.next_steps)]
        if self.returns:
            parts += ["", "Returns:"]
            for status, ret in self.returns.items():
                parts.append(f"  {status}: {ret.description}")
                for fname, fdesc in ret.fields.items():
                    parts.append(f"    · {fname}: {fdesc}")
        return "\n".join(parts)
    def compile_parameter_description(self, name: str) -> str:
        """Build the JSON Schema 'description' field for one parameter."""
        p = self.parameters.get(name)
        if p is None:
            raise ToolSpecError(f"tool '{self.name}': no spec for parameter '{name}'")
        text = f"{p.description} (Why: {p.why_needed})"
        if p.example:
            text += f" Example: {p.example}"
        return text
 # ---------------------------------------------------------------------------
 # Helpers
 # ---------------------------------------------------------------------------
 def _require(data: dict, key: str, where: str) -> None:
    if data.get(key) is None or (isinstance(data[key], str) and not data[key].strip()):
        raise ToolSpecError(f"{where}: missing required field '{key}'")
 def _strip_or_none(value) -> str | None:
    if value is None:
        return None
    s = str(value).strip()
    return s or None
 def _indent(text: str, prefix: str = "  ") -> str:
    return "\n".join(prefix + line for line in text.splitlines())
@@ -0,0 +1,53 @@
 """
 ToolSpecLoader — discover and load all YAML tool specs from a directory.
 Convention: one YAML file per tool, named exactly like the Python function
 that implements it (e.g. resolve_season_destination.yaml).
 """
 from __future__ import annotations
 import logging
 from pathlib import Path
 from .spec import ToolSpec, ToolSpecError
 logger = logging.getLogger(__name__)
 _DEFAULT_SPECS_DIR = Path(__file__).parent / "specs"
 def load_tool_specs(specs_dir: Path | None = None) -> dict[str, ToolSpec]:
    """
    Load every {tool}.yaml under specs_dir into a {name -> ToolSpec} mapping.
    Args:
        specs_dir: Directory to scan. Defaults to alfred/agent/tools/specs/.
    Returns:
        Mapping from tool name to its parsed ToolSpec.
    Raises:
        ToolSpecError: if a spec is malformed, or if the filename doesn't
                       match the 'name' field inside the YAML.
    """
    root = specs_dir or _DEFAULT_SPECS_DIR
    if not root.exists():
        logger.warning(f"Tool specs directory not found: {root}")
        return {}
    specs: dict[str, ToolSpec] = {}
    for path in sorted(root.glob("*.yaml")):
        spec = ToolSpec.from_yaml_path(path)
        expected_name = path.stem
        if spec.name != expected_name:
            raise ToolSpecError(
                f"{path}: filename stem '{expected_name}' "
                f"does not match spec.name '{spec.name}'"
            )
        if spec.name in specs:
            raise ToolSpecError(f"duplicate tool spec name: '{spec.name}'")
        specs[spec.name] = spec
    logger.info(f"Loaded {len(specs)} tool spec(s) from {root}")
    return specs
@@ -0,0 +1,53 @@
 name: add_torrent_by_index
 summary: >
  Pick a torrent from the last find_torrent results by index and add
  it to qBittorrent in one call.
 description: |
  Convenience wrapper that combines get_torrent_by_index +
  add_torrent_to_qbittorrent. Looks up the torrent at the given
  1-based index, extracts its magnet link, and sends it to
  qBittorrent. The result mirrors add_torrent_to_qbittorrent's, with
  the chosen torrent's name appended on success.
 when_to_use: |
  The default action after find_torrent when the user picks a hit by
  number ("download the second one"). One call, two side effects:
  episodic memory updated + download started.
 when_not_to_use: |
  - When the user only wants to inspect, not download — use
    get_torrent_by_index.
  - When the magnet comes from outside the search results — use
    add_torrent_to_qbittorrent directly.
 next_steps: |
  - On status=ok: confirm the download started and end the workflow
    if not already ended.
  - On status=error (not_found): the index is out of range; show the
    available count from episodic memory.
  - On status=error (no_magnet): the search result was malformed —
    suggest re-running find_torrent.
 parameters:
  index:
    description: 1-based position of the torrent in the last find_torrent results.
    why_needed: |
      Identifies which torrent to add. Out-of-range indices return
      not_found.
    example: 3
 returns:
  ok:
    description: Torrent was added to qBittorrent.
    fields:
      status: "'ok'"
      message: Confirmation message.
      torrent_name: Name of the torrent that was added.
  error:
    description: Failed to add.
    fields:
      error: Short error code (not_found, no_magnet, ...).
      message: Human-readable explanation.
@@ -0,0 +1,48 @@
 name: add_torrent_to_qbittorrent
 summary: >
  Send a magnet link to qBittorrent and start the download.
 description: |
  Adds a torrent to qBittorrent using its WebUI API. On success, the
  download is also recorded in episodic memory as an active_download
  so the agent can track its progress later, the STM topic is set to
  "downloading", and the current workflow is ended (the user typically
  leaves the find-and-download scope at this point).
 when_to_use: |
  When the user provides a raw magnet link, or when chaining manually
  after get_torrent_by_index. For the common "user picked search hit
  N" case, prefer add_torrent_by_index — one call instead of two.
 when_not_to_use: |
  - For .torrent files (not supported by this tool — magnet only).
  - When qBittorrent is not configured / reachable — the call will
    fail and the user has to fix the config first.
 next_steps: |
  - On status=ok: the workflow is already ended; confirm to the user
    that the download has started.
  - On status=error: surface the message; common causes are auth
    failure or qBittorrent being unreachable.
 parameters:
  magnet_link:
    description: Magnet URI of the torrent to add (magnet:?xt=urn:btih:...).
    why_needed: |
      The actual payload sent to qBittorrent. Must be a full magnet
      URI, not a hash alone.
    example: "magnet:?xt=urn:btih:abc123..."
 returns:
  ok:
    description: Torrent accepted by qBittorrent.
    fields:
      status: "'ok'"
      message: Confirmation message.
  error:
    description: qBittorrent rejected the request or is unreachable.
    fields:
      error: Short error code.
      message: Human-readable explanation.
@@ -0,0 +1,82 @@
 name: analyze_release
 summary: >
  One-shot analyzer that parses a release name, detects its media type
  from the folder layout, and enriches the result with ffprobe data.
 description: |
  Combines three steps in a single call so the agent gets a complete
  picture before routing:
    1. parse_release(release_name) — extracts title, year, season,
       episode, quality, source, codec, group, languages, audio info,
       HDR, edition, site tag.
    2. detect_media_type(parsed, path) — uses the on-disk layout
       (single file vs. folder, presence of S01 dirs, episode count)
       to choose: movie / tv_episode / tv_season / tv_complete /
       other / unknown.
    3. ffprobe enrichment — when the media type is recognised, runs
       ffprobe on the first video file found and fills in audio
       codec/channels, bit depth, HDR format. Sets probe_used=true.
 when_to_use: |
  As the very first step of any organize workflow, right after
  list_folder, on each release the user wants to handle. The output
  drives which resolve_*_destination to call next.
 when_not_to_use: |
  - When you only need codec/audio info on a specific video file:
    use probe_media (no parsing, no media-type detection).
  - For releases the user has already analyzed earlier in the same
    workflow — the parse is deterministic, no need to re-run.
 next_steps: |
  - media_type == movie       → resolve_movie_destination
  - media_type == tv_season   → resolve_season_destination
  - media_type == tv_episode  → resolve_episode_destination
  - media_type == tv_complete → resolve_series_destination
  - media_type in (other, unknown) → ask the user what to do; do not
    auto-route.
 cache:
  key: source_path
 parameters:
  release_name:
    description: Raw release folder or file name as it appears on disk.
    why_needed: |
      Source of all the parsed tokens (quality, codec, group, ...).
      Don't sanitise it — the parser relies on the exact spelling.
    example: Breaking.Bad.S01.1080p.BluRay.x265-GROUP
  source_path:
    description: Absolute path to the release folder or file on disk.
    why_needed: |
      Required for layout-based media-type detection and for ffprobe
      to find a video file inside the release.
    example: /downloads/Breaking.Bad.S01.1080p.BluRay.x265-GROUP
 returns:
  ok:
    description: Release analyzed.
    fields:
      status: "'ok'"
      media_type: "One of: movie, tv_episode, tv_season, tv_complete, other, unknown."
      parse_path: "Which parser branch was taken (debug)."
      title: Parsed title.
      year: Parsed year (int) or null.
      season: Season number (int) or null.
      episode: Episode number (int) or null.
      episode_end: Range end episode (multi-episode releases) or null.
      quality: Resolution token (e.g. 1080p, 2160p).
      source: Source token (BluRay, WEB-DL, ...).
      codec: Video codec token (x264, x265, ...).
      group: Release group name or null.
      languages: List of detected language tokens.
      audio_codec: Audio codec from ffprobe (when probe_used=true).
      audio_channels: Audio channel count from ffprobe.
      bit_depth: Bit depth from ffprobe.
      hdr_format: HDR format from ffprobe (HDR10, DV, ...) or null.
      edition: Edition tag (Extended, Director's Cut, ...) or null.
      site_tag: Source-site tag if present.
      is_season_pack: True when the folder contains a full season.
      probe_used: True when ffprobe successfully enriched the result.
@@ -0,0 +1,59 @@
 name: create_seed_links
 summary: >
  Recreate the original torrent folder structure with hard-links so
  qBittorrent can keep seeding after the library move.
 description: |
  Hard-links the library video file back into torrents/<original_folder_name>/
  and copies all remaining files from the original download folder
  (subtitles, .nfo, .jpg, .txt, …) so the torrent data is complete on
  disk. qBittorrent then sees the same content at the location it
  expects and can keep seeding without rehashing the whole torrent.
 when_to_use: |
  Only when the user has confirmed they want to keep seeding after a
  move. Call right after manage_subtitles (or after move_media if there
  are no subs).
 when_not_to_use: |
  - When the user explicitly answered "no" to "keep seeding?".
  - When the download was not from a torrent (e.g. direct download).
  - Before the library file is in place — this tool reads it.
 next_steps: |
  - After success: optionally call qBittorrent to update the torrent's
    save path / force a recheck (not yet covered by a tool).
  - End the workflow.
 parameters:
  library_file:
    description: Absolute path to the video file now in the library.
    why_needed: |
      The source for the hard-link — same inode means qBittorrent sees
      identical bytes at the seeding path.
    example: /tv_shows/Oz.1997.1080p.WEBRip.x265-KONTRAST/Season 03/Oz.S03E01.mkv
  original_download_folder:
    description: Absolute path to the original download folder.
    why_needed: |
      Provides the folder name to recreate under torrents/ and the
      auxiliary files (subs, nfo, ...) to copy over.
    example: /downloads/Oz.S03.1080p.WEBRip.x265-KONTRAST
 returns:
  ok:
    description: Seeding folder rebuilt.
    fields:
      status: "'ok'"
      torrent_subfolder: Absolute path of the recreated folder under torrents/.
      linked_file: Absolute path of the hard-linked video.
      copied_files: List of auxiliary files that were copied.
      copied_count: Number of auxiliary files copied.
      skipped: List of files skipped (already present, unreadable, ...).
  error:
    description: Failed to rebuild the seeding folder.
    fields:
      error: Short error code.
      message: Human-readable explanation.
@@ -0,0 +1,48 @@
 name: end_workflow
 summary: >
  Leave the current workflow scope and return to the broad-catalog mode.
 description: |
  Clears the active workflow from STM. After this call the visible tool
  catalog returns to the core noyau plus start_workflow, so the agent is
  ready to handle a different request.
 when_to_use: |
  - When all the workflow's steps have completed successfully.
  - When the user explicitly cancels the current task.
  - When the user changes subject mid-conversation and the active
    workflow is no longer relevant.
  - When an unrecoverable error makes continuing pointless — explain
    in 'reason'.
 when_not_to_use: |
  - Do not call when there is no active workflow — it will return an
    error. Just call start_workflow for the new request instead.
  - Do not call mid-step just to "free up tools"; finish the step
    or fail it explicitly first.
 next_steps: |
  - After ending, you can either call start_workflow for a new task or
    answer the user directly from the broad catalog.
 parameters:
  reason:
    description: Short reason for ending — completed, cancelled, changed_subject, error, ...
    why_needed: |
      Recorded in episodic memory for debugging and future audits. A
      structured short string is more useful than a long sentence.
    example: completed
 returns:
  ok:
    description: Workflow ended; catalog is back to the broad noyau.
    fields:
      workflow: Name of the workflow that just ended.
      reason: The reason that was passed in.
  error:
    description: Could not end — typically because nothing was active.
    fields:
      error: Short error code (no_active_workflow).
      message: Human-readable explanation.
@@ -0,0 +1,56 @@
 name: find_media_imdb_id
 summary: >
  Search TMDB for a media title and return its canonical title, year,
  IMDb id, and TMDB id.
 description: |
  Looks up a title on TMDB and returns the canonical metadata needed by
  the resolve_*_destination tools. On success, the result is also
  stashed in short-term memory under "last_media_search" so later steps
  in the workflow can read it without re-calling TMDB. The STM topic
  is set to "searching_media".
 when_to_use: |
  Right after analyze_release, before calling resolve_*_destination —
  the resolvers need the canonical title + year and refuse to guess
  them from the raw release name.
 when_not_to_use: |
  - When you already have the IMDb id in STM from an earlier step in
    the same workflow.
  - For torrent search — use find_torrent instead.
 next_steps: |
  - On status=ok: call the appropriate resolve_*_destination with
    tmdb_title and tmdb_year from the result.
  - On status=error (not_found): show the error and ask the user for
    a more precise title.
 cache:
  key: media_title
 parameters:
  media_title:
    description: Title to search for. Free-form — TMDB does the matching.
    why_needed: |
      Drives the TMDB query. Pass a sanitized version (no resolution
      tokens, no group name) for best results.
    example: Breaking Bad
 returns:
  ok:
    description: Match found.
    fields:
      status: "'ok'"
      title: Canonical title as returned by TMDB.
      year: Release year (movies) or first-air year (series).
      media_type: "'movie' or 'tv'."
      imdb_id: IMDb identifier (ttXXXXXXX) or null.
      tmdb_id: TMDB numeric id.
  error:
    description: No match or API failure.
    fields:
      error: Short error code (not_found, api_error, ...).
      message: Human-readable explanation.
@@ -0,0 +1,52 @@
 name: find_torrent
 summary: >
  Search Knaben for torrents matching a media title; cache results in
  episodic memory.
 description: |
  Queries the Knaben aggregator for up to 10 torrents matching the
  given title, then stores the result list in episodic memory under
  "last_search_results". The user can then refer to a torrent by
  1-based index ("download the 3rd one") via get_torrent_by_index or
  add_torrent_by_index. The STM topic is set to "selecting_torrent".
 when_to_use: |
  When the user wants to download something new — typically the first
  step of a "find + download" sub-task. The agent should usually
  pre-filter the title (canonical name + year) before searching for
  cleaner results.
 when_not_to_use: |
  - For TMDB metadata lookup — use find_media_imdb_id.
  - When a search was already performed in the same session and the
    user is just picking from the existing list.
 next_steps: |
  - Present the indexed results to the user.
  - Once chosen: call add_torrent_by_index(N) — that wraps
    get_torrent_by_index + add_torrent_to_qbittorrent.
 cache:
  key: media_title
 parameters:
  media_title:
    description: Title to search for on Knaben. Free-form.
    why_needed: |
      Drives the search query. Use the canonical title (from
      find_media_imdb_id) plus quality preferences for better hits.
    example: Inception 2010 1080p
 returns:
  ok:
    description: Search returned a list of torrents.
    fields:
      status: "'ok'"
      torrents: "List of {name, size, seeders, leechers, magnet, ...}, up to 10."
  error:
    description: Search failed.
    fields:
      error: Short error code.
      message: Human-readable explanation.
@@ -0,0 +1,48 @@
 name: get_torrent_by_index
 summary: >
  Retrieve a torrent from the last find_torrent search by its 1-based
  index.
 description: |
  Reads episodic memory's last_search_results and returns the entry at
  the given 1-based position. Pure lookup — does not start a download.
  Fails when the search results are missing or the index is out of
  range.
 when_to_use: |
  When the user references a search hit by number ("show me the second
  one") but doesn't yet want to download — e.g. inspection, sharing
  the magnet, ...
 when_not_to_use: |
  - When the user wants to start downloading: use add_torrent_by_index
    instead (one call instead of two).
  - When no search has been performed yet — the result will be
    not_found.
 next_steps: |
  - Display the torrent to the user.
  - If they then say "add it", call add_torrent_to_qbittorrent with the
    magnet, or add_torrent_by_index with the same index.
 parameters:
  index:
    description: 1-based position in the last find_torrent result list.
    why_needed: |
      Maps to a specific torrent entry. Out-of-range values return an
      error, not a wraparound.
    example: 3
 returns:
  ok:
    description: Torrent found at that index.
    fields:
      status: "'ok'"
      torrent: "Full torrent dict (name, size, seeders, leechers, magnet, ...)."
  error:
    description: No torrent at that index.
    fields:
      error: Short error code (not_found).
      message: Human-readable explanation, e.g. "Search for torrents first."
@@ -0,0 +1,76 @@
 name: learn
 summary: >
  Teach Alfred a new token mapping and persist it to the learned
  knowledge pack so future scans recognise it.
 description: |
  Appends a new token (or list of tokens) to a key inside a knowledge
  pack and writes the result to `data/knowledge/<pack>_learned.yaml`.
  The change is persisted atomically (write-tmp + rename) so a crash
  cannot corrupt the file. Currently only the `subtitles` pack is
  supported.
 when_to_use: |
  When manage_subtitles returns needs_clarification with unresolved
  tokens, after confirming with the user what the tokens mean. Call
  once per (category, key) — multiple values can be added in a single
  call.
 when_not_to_use: |
  - Without explicit user confirmation of what the token means.
  - For knowledge that belongs in the static pack
    (alfred/knowledge/<pack>.yaml) — that's editor territory, not
    runtime learning.
 next_steps: |
  - After success: re-run the workflow step that triggered the
    clarification (typically manage_subtitles) so the new mapping is
    applied.
 parameters:
  pack:
    description: Knowledge pack name. Currently only "subtitles" is supported.
    why_needed: |
      Decides which `*_learned.yaml` file under data/knowledge/ gets
      written. The pack name is namespaced to avoid collisions across
      domains.
    example: subtitles
  category:
    description: Category within the pack — "languages", "types", or "formats".
    why_needed: |
      Different categories use different lookup tables at scan time.
      A wrong category silently has no effect.
    example: languages
  key:
    description: Canonical entry id — ISO 639-1 code, type name, format name.
    why_needed: |
      The destination bucket for the new tokens. Existing tokens under
      this key are kept; only new values are appended.
    example: es
  values:
    description: List of token spellings to add.
    why_needed: |
      Release groups use many spellings for the same language/type;
      pass them all in one call instead of multiple round-trips.
    example: '["spanish", "espanol", "spa"]'
 returns:
  ok:
    description: Mapping saved.
    fields:
      status: "'ok'"
      pack: Name of the pack that was written to.
      category: Category that was updated.
      key: Key that was updated.
      added_count: Number of values that were actually new (deduplicated).
      tokens: Full updated token list for that key.
  error:
    description: Save failed.
    fields:
      error: Short error code (unknown_pack, unknown_category, read_failed, write_failed).
      message: Human-readable explanation.
@@ -0,0 +1,63 @@
 name: list_folder
 summary: >
  List the contents of a configured folder, optionally below a
  relative subpath.
 description: |
  Reads a folder previously configured via set_path_for_folder and
  returns its entries (files + directories). A relative `path` lets you
  drill down without re-specifying the absolute root each time. Path
  traversal is rejected (no `..`, no absolute paths) so the agent
  cannot escape the configured root.
 when_to_use: |
  - At the start of an organize workflow to discover what's available
    in the download folder.
  - To browse a library collection ("what tv shows do I have?").
  - As a sanity check before any move to confirm the target exists.
 when_not_to_use: |
  - For folders that are not configured — call set_path_for_folder
    first.
  - To list arbitrary system paths — this tool is intentionally scoped
    to the known roots.
 next_steps: |
  - After listing the download folder: typically call analyze_release
    on a specific entry.
  - After listing a library folder: use the result to disambiguate a
    destination during resolve_*_destination.
 cache:
  key: path
 parameters:
  folder_type:
    description: Logical folder key (download, torrent, movie, tv_show, ...).
    why_needed: |
      Resolves to an absolute root through LTM. Must have been set via
      set_path_for_folder beforehand.
    example: download
  path:
    description: Relative subpath inside the root (default ".").
    why_needed: |
      Lets you drill into a subfolder without expanding the root. No
      ".." or absolute path is allowed.
    example: Breaking.Bad.S01.1080p.BluRay.x265-GROUP
 returns:
  ok:
    description: Listing returned.
    fields:
      status: "'ok'"
      folder_type: The key that was listed.
      path: The relative path that was listed.
      entries: List of {name, type, size?} for each entry.
  error:
    description: Could not list the folder.
    fields:
      error: Short error code (folder_not_configured, path_not_found, path_traversal, ...).
      message: Human-readable explanation.
@@ -0,0 +1,67 @@
 name: manage_subtitles
 summary: >
  Detect, filter, and place subtitle tracks next to a video that has just
  been organised into the library.
 description: |
  Scans the source video's surroundings for subtitle files
  (.srt, .ass, .ssa, .vtt, .sub), classifies them by language and type
  (standard / SDH / forced), filters by the user's SubtitlePreferences
  (languages, min size, keep_sdh, keep_forced), and hard-links the
  passing files next to the destination video using the convention
  `<lang>.<ext>`, `<lang>.sdh.<ext>`, `<lang>.forced.<ext>`.
  If no subtitles are found, returns status=ok with placed_count=0 — not
  an error.
 when_to_use: |
  Always after a successful move_media / move_to_destination, before
  closing the workflow. Pass the original source path (where subs live)
  and the new library path (where they should land).
 when_not_to_use: |
  - Do not call before the video itself has been moved — the destination
    must exist for hard-links to make sense.
  - Skip when the user explicitly asks not to handle subtitles.
 next_steps: |
  - On status=ok: continue with create_seed_links (if seeding) or end
    the workflow.
  - On status=needs_clarification: ask the user about the unresolved
    tokens, then optionally call learn() to teach the new mapping.
 parameters:
  source_video:
    description: Absolute path to the original video file (in the download folder).
    why_needed: |
      Subtitles typically live next to the source, either as siblings or
      in a Subs/ subfolder. The scanner walks from this path.
    example: /downloads/Oz.S03.1080p.WEBRip.x265-KONTRAST/Oz.S03E01.mkv
  destination_video:
    description: Absolute path to the video file in its library location.
    why_needed: |
      Subtitles are hard-linked next to this file so media players pick
      them up automatically.
    example: /tv_shows/Oz.1997.1080p.WEBRip.x265-KONTRAST/Season 03/Oz.S03E01.mkv
 returns:
  ok:
    description: Subtitles scanned (and possibly placed).
    fields:
      status: "'ok'"
      placed: List of {source, destination, filename} for each linked file.
      placed_count: Number of subtitle files placed.
      skipped_count: Number of subtitle files filtered out.
  needs_clarification:
    description: One or more tokens could not be classified.
    fields:
      unresolved: List of unrecognised tokens with their context.
      question: Human-readable question to relay to the user.
  error:
    description: Scan or placement failed.
    fields:
      error: Short error code.
      message: Human-readable explanation.
@@ -0,0 +1,58 @@
 name: move_media
 summary: >
  Safely move a media file with copy + integrity check + delete source.
 description: |
  Copies the source file to the destination with an integrity check,
  then deletes the source. Slower than move_to_destination (which is a
  plain rename) but safer across filesystems where rename is not atomic
  or when you want a checksum verification.
 when_to_use: |
  Use to move a single file across filesystems or when paranoia about
  data integrity is justified — e.g. moving a finished download from a
  scratch disk to the main library array.
 when_not_to_use: |
  - For same-filesystem moves where speed matters: use move_to_destination
    (instant rename on ZFS/ext4 within the same dataset).
  - For folder-level moves of complete packs: use move_to_destination —
    move_media is a single-file operation.
 next_steps: |
  - After a successful move: call manage_subtitles to place any subtitle
    tracks, then create_seed_links if the user wants to keep seeding.
  - On error: surface the error code (file_not_found, destination_exists,
    integrity_check_failed) and ask the user how to proceed.
 parameters:
  source:
    description: Absolute path to the source video file.
    why_needed: |
      The file being moved. Typically lives under the downloads folder
      after a torrent completes.
    example: /downloads/Inception.2010.1080p.BluRay.x265-GROUP/movie.mkv
  destination:
    description: Absolute path of the destination file — must not already exist.
    why_needed: |
      Where the file lands in the library. Comes from a resolve_*_destination
      call so the naming convention is respected.
    example: /movies/Inception.2010.1080p.BluRay.x265-GROUP/Inception.2010.1080p.BluRay.x265-GROUP.mkv
 returns:
  ok:
    description: Move succeeded.
    fields:
      status: "'ok'"
      source: Absolute path of the source (now gone).
      destination: Absolute path of the destination (now in place).
      filename: Basename of the destination file.
      size: Size in bytes.
  error:
    description: Move failed.
    fields:
      error: Short error code (file_not_found, destination_exists, integrity_check_failed, ...).
      message: Human-readable explanation.
@@ -0,0 +1,55 @@
 name: move_to_destination
 summary: >
  Move a file or folder to a destination, creating parent directories as needed.
 description: |
  Performs an actual move on disk. Uses the system 'mv' command, so on the
  same filesystem (e.g. ZFS) this is an instant rename. Creates the parent
  directory of the destination if it doesn't exist yet, then moves. Returns
  before/after paths on success, or an error if the destination already
  exists or the source can't be moved.
 when_to_use: |
  Use after one of the resolve_*_destination tools returned status=ok, to
  perform the move it described. The 'source' and 'destination' arguments
  come directly from the resolved paths.
 when_not_to_use: |
  - Never move when status was not 'ok' (clarification still pending or
    error happened) — that would leave the library in a half-broken state.
  - Don't use this for the seed-link step; use create_seed_links for that.
 next_steps: |
  - After a successful move: call manage_subtitles to place any subtitle
    tracks, then create_seed_links to keep qBittorrent seeding.
  - On error: surface the message; do not retry blindly — check whether
    the destination already exists or the source path is correct.
 parameters:
  source:
    description: Absolute path to the source file or folder to move.
    why_needed: |
      The thing being moved. Comes from the user's download folder or from
      a previous tool's output.
    example: /downloads/Oz.S03.1080p.WEBRip.x265-KONTRAST
  destination:
    description: Absolute path of the destination — must not already exist.
    why_needed: |
      Where to put the source. Comes from a resolve_*_destination call so
      that the path matches the library's naming convention.
    example: /tv_shows/Oz.1997.1080p.WEBRip.x265-KONTRAST/Oz.S03.1080p.WEBRip.x265-KONTRAST
 returns:
  ok:
    description: Move succeeded.
    fields:
      source: Absolute path of the source (now gone).
      destination: Absolute path of the destination (now in place).
  error:
    description: Move failed.
    fields:
      error: Short error code (source_not_found, destination_exists, mkdir_failed, move_failed).
      message: Human-readable explanation of what went wrong.
@@ -0,0 +1,56 @@
 name: probe_media
 summary: >
  Run ffprobe on a single video file and return its technical details.
 description: |
  Inspects a specific video file with ffprobe and returns codec,
  resolution, duration, bitrate, the list of audio tracks (with
  language and channel layout), and the list of embedded subtitle
  tracks. Independent of any release-name parsing — works on any file
  you can point at.
 when_to_use: |
  - To inspect a file's audio/subtitle tracks before deciding what to
    do (e.g. choose a default audio language).
  - To verify a video's resolution / codec when the release name is
    unreliable.
  - As a building block when analyze_release is overkill.
 when_not_to_use: |
  - For full release routing — analyze_release does parsing + media
    type detection + probe in one call.
  - On non-video files — ffprobe will return probe_failed.
 next_steps: |
  - The returned info typically feeds a user-facing decision (e.g.
    "this is 7.1 DTS, want to keep it?"); rarely chained directly to
    another tool.
 cache:
  key: source_path
 parameters:
  source_path:
    description: Absolute path to the video file to probe.
    why_needed: |
      ffprobe needs the exact file (not a folder). For releases use
      analyze_release; for a known file path, pass it here.
    example: /downloads/Inception.2010.1080p.BluRay.x265-GROUP/movie.mkv
 returns:
  ok:
    description: Probe succeeded.
    fields:
      status: "'ok'"
      video: "Dict with codec, resolution, width, height, duration_seconds, bitrate_kbps."
      audio_tracks: "List of {index, codec, channels, channel_layout, language, is_default}."
      subtitle_tracks: "List of {index, codec, language, is_default, is_forced}."
      audio_languages: List of language codes present in audio tracks.
      is_multi_audio: True when more than one audio language is present.
  error:
    description: Probe failed.
    fields:
      error: Short error code (not_found, probe_failed).
      message: Human-readable explanation.
@@ -0,0 +1,54 @@
 name: query_library
 summary: >
  Find release folders across all configured library roots whose name
  contains a substring (case-insensitive).
 description: |
  Scans every configured library root (movies, tv_shows, …) at depth 1
  and returns folders whose name contains the query. For each match,
  reports whether a `.alfred/metadata.yaml` exists — handy to spot
  releases that have not been inspected yet. Does not recurse into
  seasons / episodes; one entry per release folder.
 when_to_use: |
  - To answer "do I already have X?" without listing whole library
    roots one by one.
  - To pick the release_path to feed read_release_metadata or any
    inspector tool.
 when_not_to_use: |
  - To list the *whole* library — that scan should live behind a
    dedicated tool (not implemented yet).
  - To browse a single root — use list_folder instead, it's cheaper
    and doesn't open every library.
 next_steps: |
  - When one match is found: feed its path to read_release_metadata or
    analyze_release.
  - When several match: surface the indexed list to the user and ask
    which one they mean.
 parameters:
  name:
    description: Case-insensitive substring of the release name to look for.
    why_needed: |
      Library folders are named after the release (Title.Year.... or
      Title (Year)). A substring is enough to catch typical user
      phrasings ("foundation", "inception 2010").
    example: foundation
 returns:
  ok:
    description: Scan completed (possibly zero matches).
    fields:
      status: "'ok'"
      query: The query string as received.
      match_count: Number of matching folders.
      matches: "List of {collection, name, path, has_metadata}."
  error:
    description: Scan could not run.
    fields:
      error: Short error code (no_libraries, empty_name).
      message: Human-readable explanation.
@@ -0,0 +1,55 @@
 name: read_release_metadata
 summary: >
  Read the `.alfred/metadata.yaml` file for a release folder.
 description: |
  Returns whatever has been previously persisted by inspector tools
  (analyze_release, probe_media, find_media_imdb_id) and by the subtitle
  pipeline. Works for any folder — download or library — as long as the
  release has been touched at least once. Missing metadata is not an
  error: the tool returns `has_metadata=false` with an empty dict.
 when_to_use: |
  - Before re-running analyze_release / probe_media on a release you
    might have already seen — saves a full re-inspection.
  - To answer "what do we know about X?" without scanning.
  - To list which releases in a library have no `.alfred` yet (loop +
    `has_metadata`).
 when_not_to_use: |
  - To search a library by name — use query_library.
  - When you need a fresh probe/parse — call the inspector directly,
    the result will be persisted automatically.
 next_steps: |
  - If `has_metadata=false`, decide whether to inspect now
    (analyze_release / probe_media).
  - If `has_metadata=true`, read `metadata.parse`, `metadata.probe`,
    `metadata.tmdb` blocks before deciding next actions.
 cache:
  key: release_path
 parameters:
  release_path:
    description: Absolute path to the release folder (or any file inside it).
    why_needed: |
      The store lives at `<release_root>/.alfred/metadata.yaml`. A file
      path is auto-resolved to its parent folder.
    example: /mnt/library/tv_shows/Foundation.2021.1080p.WEBRip.x265-RARBG
 returns:
  ok:
    description: Release inspected (file may or may not exist).
    fields:
      status: "'ok'"
      release_path: Absolute path of the release folder.
      has_metadata: True if `.alfred/metadata.yaml` exists.
      metadata: Full content of the file, or empty dict.
  error:
    description: Path does not exist on disk.
    fields:
      error: Short error code (not_found).
      message: Human-readable explanation.
@@ -0,0 +1,93 @@
 name: resolve_episode_destination
 summary: >
  Compute destination paths for a single TV episode file (file move).
 description: |
  Resolves the target series folder, season subfolder, and full destination
  filename for a single-episode release. Returns paths only — does not move
  anything. If a series folder with a different name already exists, returns
  needs_clarification.
 when_to_use: |
  Use after analyze_release has identified the release as a single episode
  (media_type=tv_show, season AND episode both set). TMDB must already be
  queried for the canonical title/year, and optionally the episode title.
 when_not_to_use: |
  - Season packs (folder containing many episodes): use resolve_season_destination.
  - Multi-season packs: use resolve_series_destination.
  - Movies: use resolve_movie_destination.
 next_steps: |
  - On status=ok: call move_to_destination with the source video file and
    destination=library_file.
  - On status=needs_clarification: present question/options to the user,
    then re-call with confirmed_folder set.
  - On status=error: surface the message; do not move.
 parameters:
  release_name:
    description: Raw release file name (with extension).
    why_needed: |
      Drives extraction of quality/source/codec/group, which become part of
      the destination filename so each file is self-describing.
    example: Oz.S03E01.1080p.WEBRip.x265-KONTRAST.mkv
  source_file:
    description: Absolute path to the source video file on disk.
    why_needed: |
      Used to read the source file extension (.mkv, .mp4, .avi…) for the
      destination filename — release names don't always carry the extension.
    example: /downloads/Oz.S03E01.1080p.WEBRip.x265-KONTRAST/file.mkv
  tmdb_title:
    description: Canonical show title from TMDB.
    why_needed: |
      Title prefix for both the series folder and the destination filename;
      ensures consistent naming across all episodes of the show.
    example: Oz
  tmdb_year:
    description: Show start year from TMDB.
    why_needed: |
      Disambiguates remakes/reboots sharing a title; year is part of the
      series folder identity.
    example: "1997"
  tmdb_episode_title:
    description: Episode title from TMDB. Optional.
    why_needed: |
      When present, the destination filename embeds the episode title for
      human-readability (e.g. Oz.S01E01.The.Routine...).
    example: The Routine
  confirmed_folder:
    description: Folder name the user picked after needs_clarification.
    why_needed: |
      Forces the use case to skip detection and use this exact folder name.
    example: Oz.1997.1080p.WEBRip.x265-KONTRAST
 returns:
  ok:
    description: Paths resolved; ready to move the episode file.
    fields:
      series_folder: Absolute path to the series root folder.
      season_folder: Absolute path to the season subfolder.
      library_file: Absolute path to the destination .mkv file (move target).
      series_folder_name: Series folder name for display.
      season_folder_name: Season folder name for display.
      filename: Destination filename for display.
      is_new_series_folder: True if the series folder doesn't exist yet.
  needs_clarification:
    description: A folder exists with a different name; user must choose.
    fields:
      question: Human-readable question.
      options: List of folder names to pick from.
  error:
    description: Resolution failed.
    fields:
      error: Short error code.
      message: Human-readable explanation.
@@ -0,0 +1,72 @@
 name: resolve_movie_destination
 summary: >
  Compute destination paths for a movie file (file move).
 description: |
  Resolves the target movie folder and full destination filename for a movie
  release. Returns paths only — does not move anything. Movies do not have
  the existing-folder disambiguation problem that TV shows have (each
  release lands in its own folder named after the canonical title + year +
  tech).
 when_to_use: |
  Use after analyze_release has identified the release as a movie
  (media_type=movie). TMDB must already be queried for the canonical title
  and release year.
 when_not_to_use: |
  - TV shows in any form: use resolve_season_destination /
    resolve_episode_destination / resolve_series_destination.
  - Documentaries when they're treated as series rather than standalone
    films: route them through the TV-show resolvers.
 next_steps: |
  - On status=ok: call move_to_destination with the source video file and
    destination=library_file.
  - On status=error: surface the message; do not move.
 parameters:
  release_name:
    description: Raw release folder or file name.
    why_needed: |
      Drives extraction of quality/source/codec/group/edition tokens, which
      become part of both the movie folder and filename so each release is
      self-describing on disk.
    example: Inception.2010.1080p.BluRay.x265-GROUP
  source_file:
    description: Absolute path to the source video file on disk.
    why_needed: |
      Used to read the file extension for the destination filename.
    example: /downloads/Inception.2010.1080p.BluRay.x265-GROUP/movie.mkv
  tmdb_title:
    description: Canonical movie title from TMDB.
    why_needed: |
      Title prefix for the destination folder/file; ensures the library
      uses the canonical title and not a sanitized release-name title.
    example: Inception
  tmdb_year:
    description: Movie release year from TMDB.
    why_needed: |
      Disambiguates remakes that share a title (Dune 1984 vs Dune 2021)
      and locks the folder identity in time.
    example: "2010"
 returns:
  ok:
    description: Paths resolved; ready to move.
    fields:
      movie_folder: Absolute path to the movie folder.
      library_file: Absolute path to the destination .mkv file (move target).
      movie_folder_name: Folder name for display.
      filename: Destination filename for display.
      is_new_folder: True if the movie folder doesn't exist yet.
  error:
    description: Resolution failed.
    fields:
      error: Short error code (e.g. library_not_set).
      message: Human-readable explanation.
@@ -0,0 +1,84 @@
 name: resolve_season_destination
 summary: >
  Compute destination paths for a season pack (folder move) in the TV library.
 description: |
  Resolves the target series folder and season subfolder for a complete-season
  download. Returns the paths only — does not perform any move. If a series
  folder for this show already exists in the library with a different name
  (different group/quality/source), returns needs_clarification so the user
  can decide whether to merge into the existing folder or create a new one.
 when_to_use: |
  Use after analyze_release has identified the release as a season pack
  (media_type=tv_show, season set, episode unset). TMDB must already be
  queried so tmdb_title and tmdb_year are canonical values, not raw tokens
  from the release name.
 when_not_to_use: |
  - Single-episode files: use resolve_episode_destination instead.
  - Multi-season packs (S01-S05 etc.): use resolve_series_destination.
  - Movies: use resolve_movie_destination.
 next_steps: |
  - On status=ok: call move_to_destination with source=<download folder> and
    destination=season_folder.
  - On status=needs_clarification: present the question and options to the
    user, then re-call this tool with confirmed_folder set to the user's pick.
  - On status=error: surface the message to the user; do not move anything.
 parameters:
  release_name:
    description: Raw release folder name as it appears on disk.
    why_needed: |
      Drives extraction of quality/source/codec/group tokens — these are
      embedded in the target folder name (Title.Year.Quality.Source.Codec-GROUP)
      to make releases self-describing on the filesystem.
    example: Oz.S03.1080p.WEBRip.x265-KONTRAST
  tmdb_title:
    description: Canonical show title from TMDB.
    why_needed: |
      Builds the title prefix of the folder name. Must come from TMDB to
      avoid typos and variant spellings present in the raw release name.
    example: Oz
  tmdb_year:
    description: Show start year from TMDB.
    why_needed: |
      Disambiguates shows that share a title across decades (e.g. multiple
      remakes of "The Office") and locks the folder identity.
    example: "1997"
  confirmed_folder:
    description: |
      Folder name chosen by the user after a previous needs_clarification
      response.
    why_needed: |
      Short-circuits the existing-folder detection and forces the use case
      to use this exact folder name, even if it doesn't match the computed
      one.
    example: Oz.1997.1080p.WEBRip.x265-KONTRAST
 returns:
  ok:
    description: Paths resolved unambiguously; ready to move.
    fields:
      series_folder: Absolute path to the series root folder.
      season_folder: Absolute path to the season subfolder (move target).
      series_folder_name: Just the series folder name, for display.
      season_folder_name: Just the season folder name, for display.
      is_new_series_folder: True if the series folder doesn't exist yet.
  needs_clarification:
    description: A folder already exists with a different name; ask the user.
    fields:
      question: Human-readable question for the user.
      options: List of folder names the user can pick from.
  error:
    description: Resolution failed (config missing, invalid release name, etc.).
    fields:
      error: Short error code (e.g. library_not_set).
      message: Human-readable explanation.
@@ -0,0 +1,77 @@
 name: resolve_series_destination
 summary: >
  Compute the destination path for a complete multi-season series pack (folder move).
 description: |
  Resolves the target series folder for a pack that contains multiple seasons
  (e.g. S01-S05 in a single release). Returns only the series folder — the
  whole source folder is moved as-is into the library, no per-season
  restructuring. If a folder with a different name already exists for this
  show, returns needs_clarification.
 when_to_use: |
  Use after analyze_release has identified the release as a complete-series
  pack (media_type=tv_complete, or multi-season indicators). TMDB must
  already be queried for canonical title/year.
 when_not_to_use: |
  - Single-season packs: use resolve_season_destination.
  - Single episodes: use resolve_episode_destination.
  - Movies: use resolve_movie_destination.
 next_steps: |
  - On status=ok: call move_to_destination with source=<download folder> and
    destination=series_folder.
  - On status=needs_clarification: ask the user, re-call with
    confirmed_folder set.
  - On status=error: surface the message; do not move.
 parameters:
  release_name:
    description: Raw release folder name as it appears on disk.
    why_needed: |
      Drives extraction of quality/source/codec/group tokens for the target
      folder name, even though the multi-season structure inside is kept
      as-is.
    example: The.Wire.S01-S05.1080p.BluRay.x265-GROUP
  tmdb_title:
    description: Canonical show title from TMDB.
    why_needed: |
      Title prefix of the series folder; comes from TMDB to avoid raw
      release-name spellings.
    example: The Wire
  tmdb_year:
    description: Show start year from TMDB.
    why_needed: |
      Disambiguates shows that share a title across eras and locks the
      folder identity.
    example: "2002"
  confirmed_folder:
    description: Folder name chosen by the user after needs_clarification.
    why_needed: |
      Forces the use case to use this exact folder name and skip detection.
    example: The.Wire.2002.1080p.BluRay.x265-GROUP
 returns:
  ok:
    description: Path resolved; ready to move the pack.
    fields:
      series_folder: Absolute path to the destination series folder.
      series_folder_name: Folder name for display.
      is_new_series_folder: True if the folder doesn't exist yet.
  needs_clarification:
    description: A folder exists with a different name; ask the user.
    fields:
      question: Human-readable question.
      options: List of folder names to pick from.
  error:
    description: Resolution failed.
    fields:
      error: Short error code.
      message: Human-readable explanation.
@@ -0,0 +1,47 @@
 name: set_language
 summary: >
  Set the conversation language so all subsequent assistant messages
  match it.
 description: |
  Persists an ISO 639-1 language code in short-term memory under
  conversation.language. Read by the prompt builder and any tool that
  needs to localise output. Does not validate the code against an ISO
  list — the LLM is trusted to pass a sensible value.
 when_to_use: |
  As the very first call when the user writes in a language different
  from the current STM language. Doing it before answering avoids a
  mid-reply switch.
 when_not_to_use: |
  - On every turn — only when the language actually changes.
  - To pick a subtitle language — that lives in SubtitlePreferences,
    not the conversation language.
 next_steps: |
  - After success: continue the user's request in the newly set
    language.
 parameters:
  language:
    description: ISO 639-1 language code (en, fr, es, de, ...).
    why_needed: |
      Identifies the target language unambiguously across the UI and
      any localisation logic.
    example: fr
 returns:
  ok:
    description: Language saved.
    fields:
      status: "'ok'"
      message: Confirmation message.
      language: The language code that was saved.
  error:
    description: Could not save the language.
    fields:
      status: "'error'"
      error: Short error code or exception message.
@@ -0,0 +1,58 @@
 name: set_path_for_folder
 summary: >
  Configure where a known folder lives on disk (download, torrent, or
  any library collection).
 description: |
  Stores an absolute path in long-term memory under a folder key. Two
  classes of folders exist:
    - Workspace paths: "download", "torrent" — single-valued each, used
      by the organize workflows.
    - Library paths: any other key (e.g. "movie", "tv_show",
      "documentary") — these are the collections you organise into.
  The path must exist and be a directory; otherwise the call fails
  without changing memory.
 when_to_use: |
  On first run, or when the user moves a folder, or when introducing a
  new library collection (e.g. "set the documentaries folder to ...").
 when_not_to_use: |
  - For one-off listings — list_folder works without configuration only
    if the folder is already set.
  - To rename or delete an existing folder — this only sets paths.
 next_steps: |
  - After success: typical follow-ups are list_folder on the same key,
    or starting a workflow that needs the path.
 parameters:
  folder_name:
    description: Logical name of the folder (download, torrent, movie, tv_show, ...).
    why_needed: |
      The key the agent uses everywhere afterwards. "download" and
      "torrent" are reserved for workspace; anything else becomes a
      library collection.
    example: tv_show
  path_value:
    description: Absolute path to the folder on disk.
    why_needed: |
      Must exist and be readable. Stored verbatim in LTM — relative
      paths are rejected.
    example: /tank/library/tv_shows
 returns:
  ok:
    description: Path saved to long-term memory.
    fields:
      status: "'ok'"
      folder_name: The logical name that was set.
      path_value: The absolute path that was saved.
  error:
    description: Could not set the path.
    fields:
      error: Short error code (path_not_found, not_a_directory, invalid_path, ...).
      message: Human-readable explanation.
@@ -0,0 +1,64 @@
 name: start_workflow
 summary: >
  Enter a workflow scope — narrows the visible tool catalog and gives the
  agent a clear multi-step plan to follow.
 description: |
  Activates a named workflow defined in YAML under agent/workflows/.
  Once active, only the workflow's declared tools (plus the core noyau)
  are exposed to the LLM, which keeps the decision space small and
  focused. The returned plan (description + steps) is the script the
  agent should execute until end_workflow is called.
 when_to_use: |
  Use as the very first action whenever the user request maps to a
  known workflow (e.g. "organize Breaking Bad" → media.organize_media).
  Pass any parameters you already know (release name, target media,
  flags) in 'params' so later steps can read them from STM.
 when_not_to_use: |
  - Do not start a workflow for purely conversational replies or
    one-shot lookups that need a single tool call.
  - Do not start a new workflow while one is already active — call
    end_workflow first.
 next_steps: |
  - On status=ok: follow the returned 'steps' list, calling the tools
    in order. The visible tool catalog has already been narrowed.
  - On status=error (unknown_workflow): surface the available list to
    the user and ask which one they meant.
  - On status=error (workflow_already_active): either continue the
    active workflow or call end_workflow first.
 parameters:
  workflow_name:
    description: Fully-qualified name of the workflow to start (e.g. media.organize_media).
    why_needed: |
      Identifies which YAML definition to load. Names use the
      'domain.action' convention (media.*, mail.*, ...).
    example: media.organize_media
  params:
    description: Initial parameters to seed the workflow with (release name, target, flags).
    why_needed: |
      Later steps read these from STM instead of asking the user again.
      Pass whatever you already extracted from the user's message.
    example: '{"release_name": "Breaking.Bad.S01.1080p.BluRay.x265-GROUP", "keep_seeding": true}'
 returns:
  ok:
    description: Workflow activated; catalog has been narrowed.
    fields:
      workflow: Name of the activated workflow.
      description: Human-readable description of what the workflow does.
      steps: Ordered list of steps to execute.
      tools: Tools that are now visible (in addition to the core noyau).
  error:
    description: Could not activate the workflow.
    fields:
      error: Short error code (unknown_workflow, workflow_already_active).
      message: Human-readable explanation.
      available_workflows: List of valid workflow names (only on unknown_workflow).
      active_workflow: Name of the currently active workflow (only on workflow_already_active).
@@ -0,0 +1,86 @@
 """Workflow scoping tools — start_workflow / end_workflow meta-tools.
 These tools let the agent enter and leave a workflow scope. While a
 workflow is active, the PromptBuilder narrows the visible tool catalog
 to the noyau + the workflow's declared tools, so the LLM doesn't have
 to reason over the full set.
 """
 import logging
 from typing import Any
 from alfred.infrastructure.persistence import get_memory
 from ..workflows import WorkflowLoader
 logger = logging.getLogger(__name__)
 _loader_cache: list[WorkflowLoader] = []
 def _get_loader() -> WorkflowLoader:
    """Lazily build the module-level WorkflowLoader."""
    if not _loader_cache:
        _loader_cache.append(WorkflowLoader())
    return _loader_cache[0]
 def start_workflow(workflow_name: str, params: dict) -> dict[str, Any]:
    """See specs/start_workflow.yaml for full description."""
    loader = _get_loader()
    workflow = loader.get(workflow_name)
    if workflow is None:
        return {
            "status": "error",
            "error": "unknown_workflow",
            "message": f"Workflow '{workflow_name}' not found",
            "available_workflows": loader.names(),
        }
    memory = get_memory()
    current = memory.stm.workflow.current
    if current is not None:
        return {
            "status": "error",
            "error": "workflow_already_active",
            "message": (
                f"Workflow '{current.get('name')}' is already active. "
                "Call end_workflow before starting a new one."
            ),
            "active_workflow": current.get("name"),
        }
    memory.stm.start_workflow(workflow_name, params or {})
    memory.save()
    logger.info(f"start_workflow: '{workflow_name}' with params={params}")
    return {
        "status": "ok",
        "workflow": workflow_name,
        "description": workflow.get("description", ""),
        "steps": workflow.get("steps", []),
        "tools": workflow.get("tools", []),
    }
 def end_workflow(reason: str) -> dict[str, Any]:
    """See specs/end_workflow.yaml for full description."""
    memory = get_memory()
    current = memory.stm.workflow.current
    if current is None:
        return {
            "status": "error",
            "error": "no_active_workflow",
            "message": "No workflow is currently active.",
        }
    workflow_name = current.get("name")
    memory.stm.end_workflow()
    memory.save()
    logger.info(f"end_workflow: '{workflow_name}' reason={reason!r}")
    return {
        "status": "ok",
        "workflow": workflow_name,
        "reason": reason,
    }
@@ -0,0 +1,3 @@
 from .loader import WorkflowLoader
 __all__ = ["WorkflowLoader"]
@@ -0,0 +1,52 @@
 """WorkflowLoader — autodiscovers and loads workflow YAML files.
 Scans the workflows/ directory for all .yaml files and exposes them
 as dicts. No manual registration needed — drop a new .yaml file and
 it will be picked up automatically.
 """
 import logging
 from pathlib import Path
 import yaml
 logger = logging.getLogger(__name__)
 _WORKFLOWS_DIR = Path(__file__).parent
 class WorkflowLoader:
    """
    Loads all workflow definitions from the workflows/ directory.
    Usage:
        loader = WorkflowLoader()
        all_workflows = loader.all()
        workflow = loader.get("media.organize_media")
    """
    def __init__(self):
        self._workflows: dict[str, dict] = {}
        self._load()
    def _load(self) -> None:
        for path in sorted(_WORKFLOWS_DIR.glob("*.yaml")):
            try:
                data = yaml.safe_load(path.read_text(encoding="utf-8"))
                name = data.get("name") or path.stem
                self._workflows[name] = data
                logger.info(f"WorkflowLoader: Loaded '{name}' from {path.name}")
            except Exception as e:
                logger.warning(f"WorkflowLoader: Could not load {path.name}: {e}")
    def all(self) -> dict[str, dict]:
        """Return all loaded workflows keyed by name."""
        return self._workflows
    def get(self, name: str) -> dict | None:
        """Return a specific workflow by name, or None if not found."""
        return self._workflows.get(name)
    def names(self) -> list[str]:
        """Return all available workflow names."""
        return list(self._workflows.keys())
@@ -0,0 +1,69 @@
 name: media.manage_subtitles
 description: >
  Place subtitle files alongside a video that has just been organised into the library.
  Detects the release pattern automatically, identifies and classifies all tracks,
  filters by user rules, and hard-links matching files to the destination.
  If any tracks are unrecognised, asks the user and optionally teaches Alfred.
 trigger:
  examples:
    - "handle subtitles for The X-Files S01E01"
    - "place the subs next to the file"
    - "subtitles are in the Subs/ folder"
    - "add subtitles"
 tools:
  - manage_subtitles
  - learn
 memory:
  SubtitlePreferences: read
  Workflow: read-write
 steps:
  - id: place_subtitles
    tool: manage_subtitles
    description: >
      Detect release pattern, identify and classify all subtitle tracks,
      filter by rules, hard-link matching files next to the destination video.
      Reads SubtitlePreferences from LTM for language/type/format filtering.
    params:
      source_video: "{source_video}"
      destination_video: "{destination_video}"
      imdb_id: "{imdb_id}"
      media_type: "{media_type}"
      release_group: "{release_group}"
      season: "{season}"
      episode: "{episode}"
    on_result:
      ok_placed_zero: skip               # no subtitles found — not an error
      needs_clarification: ask_user      # unrecognised tokens found
  - id: ask_user
    description: >
      Some tracks could not be classified. Show the user the unresolved tokens
      and ask if they want to teach Alfred what they mean.
      If yes → go to learn_tokens. If no → end workflow.
    ask_user:
      question: >
        I could not identify some tokens in the subtitle files: {unresolved}.
        Do you want to teach me what they mean?
      answers:
        yes: { next_step: learn_tokens }
        no:  { next_step: end }
  - id: learn_tokens
    tool: learn
    description: >
      Persist a new token mapping to the learned knowledge pack so Alfred
      recognises it in future scans without asking again.
    params:
      pack: "subtitles"
      category: "{token_category}"   # "languages" or "types"
      key: "{token_key}"             # e.g. "es", "de"
      values: "{token_values}"       # e.g. ["spanish", "espanol"]
 subtitle_naming:
  standard:  "{lang}.{ext}"
  sdh:       "{lang}.sdh.{ext}"
  forced:    "{lang}.forced.{ext}"
@@ -0,0 +1,92 @@
 name: media.organize_media
 description: >
  Organise a downloaded series or movie into the media library.
  Triggered when the user asks to move/organize a specific title.
  Always moves the video file. Optionally creates seed links in the
  torrents folder so qBittorrent can keep seeding.
 trigger:
  examples:
    - "organize Breaking Bad"
    - "organise Severance season 2"
    - "move Inception to my library"
    - "organize Breaking Bad season 1, keep seeding"
 tools:
  - list_folder
  - analyze_release
  - probe_media
  - find_media_imdb_id
  - resolve_season_destination
  - resolve_episode_destination
  - resolve_movie_destination
  - resolve_series_destination
  - move_to_destination
  - manage_subtitles
  - create_seed_links
 memory:
  WorkspacePaths: read
  LibraryPaths: read
  Library: read-write
  Workflow: read-write
  Entities: read-write
 steps:
  - id: list_downloads
    tool: list_folder
    description: List the download folder to find the target files.
    params:
      folder_type: download
  - id: analyze
    tool: analyze_release
    description: >
      Parse the release name to detect media_type (movie / tv_season /
      tv_episode / tv_complete) and extract season/episode info.
  - id: identify_media
    tool: find_media_imdb_id
    description: Confirm canonical title and year via TMDB.
  - id: resolve_destination
    description: >
      Call the resolver that matches media_type from analyze_release:
        movie       → resolve_movie_destination
        tv_season   → resolve_season_destination
        tv_episode  → resolve_episode_destination
        tv_complete → resolve_series_destination
      If the resolver returns needs_clarification, ask the user and
      re-call with confirmed_folder.
  - id: move_file
    tool: move_to_destination
    description: >
      Move the video file/folder to the destination returned by the
      resolver above.
  - id: handle_subtitles
    tool: manage_subtitles
    description: >
      Place subtitle files alongside the video in the library.
      Pass the original source path and the new library destination path.
    on_missing: skip
  - id: ask_seeding
    ask_user:
      question: "Do you want to keep seeding this torrent?"
      answers:
        "yes": { next_step: create_seed_links }
        "no":  { next_step: end }
  - id: create_seed_links
    tool: create_seed_links
    description: >
      Hard-link the library video file back into torrents/<original_folder>/
      and copy all remaining files from the original download folder
      (subs, nfo, jpg, …) so the torrent stays complete for seeding.
 naming_convention:
  # Resolved by domain entities (Movie, Episode) — not hardcoded here
  tv_show: "{title}/Season {season:02d}/{title}.S{season:02d}E{episode:02d}.{ext}"
  movie: "{title} ({year})/{title}.{year}.{ext}"
@@ -29,7 +29,7 @@ app = FastAPI(
    version="0.2.0",
 )
-memory_path = Path(settings.data_storage) / "memory"
+memory_path = Path(settings.data_storage_dir) / "memory"
 init_memory(storage_dir=str(memory_path))
 logger.info(f"Memory context initialized (path: {memory_path})")
@@ -37,6 +37,21 @@ logger.info(f"Memory context initialized (path: {memory_path})")
 llm_provider = settings.default_llm_provider.lower()
 class _UnconfiguredLLM:
    """Placeholder LLM used when no provider could be configured at import time.
    Importing the FastAPI app must not fail just because credentials are
    absent (e.g. during test collection). Any actual call surfaces a clear
    503 error at request time via the handlers below.
    """
    def __init__(self, reason: str):
        self.reason = reason
    def complete(self, *args, **kwargs):
        raise LLMAPIError(f"LLM is not configured: {self.reason}")
 try:
    if llm_provider == "local":
        logger.info("Using local Ollama LLM")
@@ -49,8 +64,11 @@ try:
    else:
        raise ValueError(f"Unknown LLM provider: {llm_provider}")
 except LLMConfigurationError as e:
    # Degrade gracefully: keep the app importable so tests can patch agent.step
    # and so missing credentials surface as a 503 at the endpoint, not as an
    # import error.
    logger.error(f"Failed to initialize LLM: {e}")
-    raise
+    llm = _UnconfiguredLLM(str(e))
 # Initialize agent
 agent = Agent(
@@ -1,12 +1,47 @@
 """Filesystem use cases."""
-from .dto import ListFolderResponse, SetFolderPathResponse
+from .create_seed_links import CreateSeedLinksUseCase
 from .dto import (
    CreateSeedLinksResponse,
    ListFolderResponse,
    ManageSubtitlesResponse,
    MoveMediaResponse,
    PlacedSubtitle,
    SetFolderPathResponse,
 )
 from .list_folder import ListFolderUseCase
 from .manage_subtitles import ManageSubtitlesUseCase
 from .move_media import MoveMediaUseCase
 from .resolve_destination import (
    ResolvedEpisodeDestination,
    ResolvedMovieDestination,
    ResolvedSeasonDestination,
    ResolvedSeriesDestination,
    resolve_episode_destination,
    resolve_movie_destination,
    resolve_season_destination,
    resolve_series_destination,
 )
 from .set_folder_path import SetFolderPathUseCase
 __all__ = [
    "SetFolderPathUseCase",
    "ListFolderUseCase",
    "CreateSeedLinksUseCase",
    "MoveMediaUseCase",
    "ManageSubtitlesUseCase",
    "ResolvedSeasonDestination",
    "ResolvedEpisodeDestination",
    "ResolvedMovieDestination",
    "ResolvedSeriesDestination",
    "resolve_season_destination",
    "resolve_episode_destination",
    "resolve_movie_destination",
    "resolve_series_destination",
    "SetFolderPathResponse",
    "ListFolderResponse",
    "CreateSeedLinksResponse",
    "MoveMediaResponse",
    "ManageSubtitlesResponse",
    "PlacedSubtitle",
 ]
@@ -0,0 +1,54 @@
 """CreateSeedLinksUseCase — prepares a torrent folder for continued seeding."""
 import logging
 from alfred.infrastructure.filesystem import FileManager
 from alfred.infrastructure.persistence import get_memory
 from .dto import CreateSeedLinksResponse
 logger = logging.getLogger(__name__)
 class CreateSeedLinksUseCase:
    """
    Prepares a torrent subfolder so qBittorrent can keep seeding after a move.
    Hard-links the video file from the library back into torrents/<original_folder>/,
    then copies all remaining files from the original download folder (subs, nfo, …).
    """
    def __init__(self, file_manager: FileManager):
        self.file_manager = file_manager
    def execute(
        self, library_file: str, original_download_folder: str
    ) -> CreateSeedLinksResponse:
        memory = get_memory()
        torrent_folder = memory.ltm.workspace.torrent
        if not torrent_folder:
            return CreateSeedLinksResponse(
                status="error",
                error="torrent_folder_not_set",
                message="Torrent folder is not configured. Use set_path_for_folder to set it.",
            )
        result = self.file_manager.create_seed_links(
            library_file, original_download_folder, torrent_folder
        )
        if result.get("status") == "ok":
            return CreateSeedLinksResponse(
                status="ok",
                torrent_subfolder=result.get("torrent_subfolder"),
                linked_file=result.get("linked_file"),
                copied_files=result.get("copied_files"),
                copied_count=result.get("copied_count", 0),
                skipped=result.get("skipped"),
            )
        return CreateSeedLinksResponse(
            status="error",
            error=result.get("error"),
            message=result.get("message"),
        )
@@ -0,0 +1,69 @@
 """
 detect_media_type — filesystem-based media type refinement.
 Enriches a ParsedRelease.media_type with evidence from the actual source path
 (file or folder). Called after parse_release() to produce a final classification.
 Classification logic:
  1. If source_path is a file — check its extension directly.
  2. If source_path is a folder — collect all extensions inside (non-recursive
     for the first level, then recursive if nothing conclusive found).
  3. Decision:
     - Any non_video extension AND no video extension → "other"
     - Any video extension → keep parsed media_type ("movie" | "tv_show" | "unknown")
     - No conclusive extension found → keep parsed media_type as-is
     - Mixed (video + non_video) → "unknown"
 """
 from __future__ import annotations
 from pathlib import Path
 from alfred.domain.release.value_objects import (
    _METADATA_EXTENSIONS,
    _NON_VIDEO_EXTENSIONS,
    _VIDEO_EXTENSIONS,
    ParsedRelease,
 )
 def detect_media_type(parsed: ParsedRelease, source_path: Path) -> str:
    """
    Return a refined media_type string for the given source_path.
    Does not mutate parsed — returns the new media_type value only.
    The caller is responsible for updating the ParsedRelease if needed.
    """
    extensions = _collect_extensions(source_path)
    # Metadata extensions (.nfo, .srt, …) are always present alongside releases
    # and must not influence the type decision.
    conclusive = extensions - _METADATA_EXTENSIONS
    has_video = bool(conclusive & _VIDEO_EXTENSIONS)
    has_non_video = bool(conclusive & _NON_VIDEO_EXTENSIONS)
    if has_video and has_non_video:
        return "unknown"
    if has_non_video and not has_video:
        return "other"
    if has_video:
        return parsed.media_type  # trust token-level inference
    # No conclusive extension — trust token-level inference
    return parsed.media_type
 def _collect_extensions(path: Path) -> set[str]:
    """Return the set of lowercase extensions found at path (file or folder)."""
    if not path.exists():
        return set()
    if path.is_file():
        return {path.suffix.lower()}
    # Folder — scan first level only
    exts: set[str] = set()
    for child in path.iterdir():
        if child.is_file():
            exts.add(child.suffix.lower())
    return exts
@@ -1,8 +1,58 @@
 """Filesystem application DTOs."""
 from __future__ import annotations
 from dataclasses import dataclass
@dataclass
 class CopyMediaResponse:
    """Response from copying a media file."""
    status: str
    source: str | None = None
    destination: str | None = None
    filename: str | None = None
    size: int | None = None
    error: str | None = None
    message: str | None = None
    def to_dict(self) -> dict:
        if self.error:
            return {"status": self.status, "error": self.error, "message": self.message}
        return {
            "status": self.status,
            "source": self.source,
            "destination": self.destination,
            "filename": self.filename,
            "size": self.size,
        }
@dataclass
 class MoveMediaResponse:
    """Response from moving a media file."""
    status: str
    source: str | None = None
    destination: str | None = None
    filename: str | None = None
    size: int | None = None
    error: str | None = None
    message: str | None = None
    def to_dict(self) -> dict:
        if self.error:
            return {"status": self.status, "error": self.error, "message": self.message}
        return {
            "status": self.status,
            "source": self.source,
            "destination": self.destination,
            "filename": self.filename,
            "size": self.size,
        }
@dataclass
 class SetFolderPathResponse:
    """Response from setting a folder path."""
@@ -29,6 +79,108 @@ class SetFolderPathResponse:
        return result
@dataclass
 class PlacedSubtitle:
    """One subtitle file successfully placed."""
    source: str
    destination: str
    filename: str
    def to_dict(self) -> dict:
        return {
            "source": self.source,
            "destination": self.destination,
            "filename": self.filename,
        }
@dataclass
 class UnresolvedTrack:
    """A subtitle track that needs agent clarification before placement."""
    raw_tokens: list[str]
    file_path: str | None = None
    file_size_kb: float | None = None
    reason: str = ""  # "unknown_language" | "low_confidence"
    def to_dict(self) -> dict:
        return {
            "raw_tokens": self.raw_tokens,
            "file_path": self.file_path,
            "file_size_kb": self.file_size_kb,
            "reason": self.reason,
        }
@dataclass
 class AvailableSubtitle:
    """One subtitle track available on an embedded media item."""
    language: str  # ISO 639-2 code
    subtitle_type: str  # "standard" | "sdh" | "forced" | "unknown"
    def to_dict(self) -> dict:
        return {"language": self.language, "type": self.subtitle_type}
@dataclass
 class ManageSubtitlesResponse:
    """Response from the manage_subtitles use case."""
    status: str  # "ok" | "needs_clarification" | "error"
    video_path: str | None = None
    placed: list[PlacedSubtitle] | None = None
    skipped_count: int = 0
    unresolved: list[UnresolvedTrack] | None = None
    available: list[AvailableSubtitle] | None = None  # embedded tracks summary
    error: str | None = None
    message: str | None = None
    def to_dict(self) -> dict:
        if self.error:
            return {"status": self.status, "error": self.error, "message": self.message}
        result = {
            "status": self.status,
            "video_path": self.video_path,
            "placed": [p.to_dict() for p in (self.placed or [])],
            "placed_count": len(self.placed or []),
            "skipped_count": self.skipped_count,
        }
        if self.unresolved:
            result["unresolved"] = [u.to_dict() for u in self.unresolved]
            result["unresolved_count"] = len(self.unresolved)
        if self.available:
            result["available"] = [a.to_dict() for a in self.available]
        return result
@dataclass
 class CreateSeedLinksResponse:
    """Response from creating seed links for a torrent."""
    status: str
    torrent_subfolder: str | None = None
    linked_file: str | None = None
    copied_files: list[str] | None = None
    copied_count: int = 0
    skipped: list[str] | None = None
    error: str | None = None
    message: str | None = None
    def to_dict(self) -> dict:
        if self.error:
            return {"status": self.status, "error": self.error, "message": self.message}
        return {
            "status": self.status,
            "torrent_subfolder": self.torrent_subfolder,
            "linked_file": self.linked_file,
            "copied_files": self.copied_files or [],
            "copied_count": self.copied_count,
            "skipped": self.skipped or [],
        }
@dataclass
 class ListFolderResponse:
    """Response from listing a folder."""
@@ -0,0 +1,82 @@
 """enrich_from_probe — fill missing ParsedRelease fields from MediaInfo."""
 from __future__ import annotations
 from alfred.domain.release.value_objects import ParsedRelease
 from alfred.domain.shared.media import MediaInfo
 # Map ffprobe codec names to scene-style codec tokens
 _VIDEO_CODEC_MAP = {
    "hevc": "x265",
    "h264": "x264",
    "h265": "x265",
    "av1": "AV1",
    "vp9": "VP9",
    "mpeg4": "XviD",
 }
 # Map ffprobe audio codec names to scene-style tokens
 _AUDIO_CODEC_MAP = {
    "eac3": "EAC3",
    "ac3": "AC3",
    "dts": "DTS",
    "truehd": "TrueHD",
    "aac": "AAC",
    "flac": "FLAC",
    "opus": "OPUS",
    "mp3": "MP3",
    "pcm_s16l": "PCM",
    "pcm_s24l": "PCM",
 }
 # Map channel count to standard layout string
 _CHANNEL_MAP = {
    8: "7.1",
    6: "5.1",
    2: "2.0",
    1: "1.0",
 }
 def enrich_from_probe(parsed: ParsedRelease, info: MediaInfo) -> None:
    """
    Fill None fields in parsed using data from ffprobe MediaInfo.
    Only overwrites fields that are currently None — token-level values
    from the release name always take priority.
    Mutates parsed in place.
    """
    if parsed.quality is None and info.resolution:
        parsed.quality = info.resolution
    if parsed.codec is None and info.video_codec:
        parsed.codec = _VIDEO_CODEC_MAP.get(
            info.video_codec.lower(), info.video_codec.upper()
        )
    if parsed.bit_depth is None and info.video_codec:
        # ffprobe exposes bit depth via pix_fmt — not in MediaInfo yet, skip for now
        pass
    # Audio — use the default track, fallback to first
    default_track = next((t for t in info.audio_tracks if t.is_default), None)
    track = default_track or (info.audio_tracks[0] if info.audio_tracks else None)
    if track:
        if parsed.audio_codec is None and track.codec:
            parsed.audio_codec = _AUDIO_CODEC_MAP.get(
                track.codec.lower(), track.codec.upper()
            )
        if parsed.audio_channels is None and track.channels:
            parsed.audio_channels = _CHANNEL_MAP.get(
                track.channels, f"{track.channels}ch"
            )
    # Languages — merge ffprobe languages with token-level ones
    # "und" = undetermined, not useful
    if info.audio_languages:
        existing = set(parsed.languages)
        for lang in info.audio_languages:
            if lang.lower() != "und" and lang.upper() not in existing:
                parsed.languages.append(lang)
@@ -0,0 +1,296 @@
 """ManageSubtitlesUseCase — orchestrates the full subtitle pipeline for a video file."""
 import logging
 from pathlib import Path
 from alfred.domain.shared.value_objects import ImdbId
 from alfred.domain.subtitles.entities import SubtitleCandidate
 from alfred.domain.subtitles.knowledge.base import SubtitleKnowledgeBase
 from alfred.domain.subtitles.knowledge.loader import KnowledgeLoader
 from alfred.domain.subtitles.services.identifier import SubtitleIdentifier
 from alfred.domain.subtitles.services.matcher import SubtitleMatcher
 from alfred.domain.subtitles.services.pattern_detector import PatternDetector
 from alfred.domain.subtitles.services.placer import (
    PlacedTrack,
    SubtitlePlacer,
    _build_dest_name,
 )
 from alfred.domain.subtitles.services.utils import available_subtitles
 from alfred.domain.subtitles.value_objects import ScanStrategy
 from alfred.infrastructure.persistence.context import get_memory
 from alfred.infrastructure.subtitle.metadata_store import SubtitleMetadataStore
 from alfred.infrastructure.subtitle.rule_repository import RuleSetRepository
 from .dto import (
    AvailableSubtitle,
    ManageSubtitlesResponse,
    PlacedSubtitle,
    UnresolvedTrack,
 )
 logger = logging.getLogger(__name__)
 def _infer_library_root(dest_video: Path, media_type: str) -> Path:
    """
    Infer the media library root folder from the destination video path.
    TV show:  video → Season 01 → The X-Files   (3 levels up)
    Movie:    video → Inception (2010)           (1 level up)
    """
    if media_type == "tv_show":
        return dest_video.parent.parent
    return dest_video.parent
 def _to_imdb_id(raw: str | None) -> ImdbId | None:
    if not raw:
        return None
    try:
        return ImdbId(raw)
    except Exception:
        return None
 class ManageSubtitlesUseCase:
    """
    Full subtitle pipeline:
    1. Load knowledge base
    2. Detect (or confirm) the release pattern
    3. Identify all tracks (ffprobe + filesystem scan)
    4. Load + resolve rules for this media
    5. Match tracks against rules
    6. If any tracks are unresolved → return needs_clarification (don't place yet)
    7. Place matched tracks via hard-link
    8. Persist to .alfred/metadata.yaml
    The use case is stateless — all dependencies are instantiated inline.
    """
    def execute(
        self,
        source_video: str,
        destination_video: str,
        imdb_id: str | None = None,
        media_type: str = "tv_show",
        release_group: str | None = None,
        season: int | None = None,
        episode: int | None = None,
        confirmed_pattern_id: str | None = None,
        dry_run: bool = False,
    ) -> ManageSubtitlesResponse:
        source_path = Path(source_video)
        dest_path = Path(destination_video)
        if not source_path.exists() and not source_path.parent.exists():
            return ManageSubtitlesResponse(
                status="error",
                error="source_not_found",
                message=f"Source video not found: {source_video}",
            )
        kb = SubtitleKnowledgeBase(KnowledgeLoader())
        library_root = _infer_library_root(dest_path, media_type)
        store = SubtitleMetadataStore(library_root)
        repo = RuleSetRepository(library_root)
        # --- Pattern resolution ---
        pattern = self._resolve_pattern(
            kb, store, source_path, confirmed_pattern_id, release_group
        )
        if pattern is None:
            return ManageSubtitlesResponse(
                status="error",
                error="pattern_not_found",
                message="Could not determine subtitle pattern for this release.",
            )
        # --- Identify ---
        media_id = _to_imdb_id(imdb_id)
        identifier = SubtitleIdentifier(kb)
        metadata = identifier.identify(
            video_path=source_path,
            pattern=pattern,
            media_id=media_id,
            media_type=media_type,
            release_group=release_group,
        )
        if metadata.total_count == 0:
            logger.info(
                f"ManageSubtitles: no subtitle tracks found for {source_path.name}"
            )
            return ManageSubtitlesResponse(
                status="ok",
                video_path=destination_video,
                placed=[],
                skipped_count=0,
            )
        # --- Embedded short-circuit ---
        if pattern.scan_strategy == ScanStrategy.EMBEDDED:
            logger.info("ManageSubtitles: embedded pattern — skipping matcher")
            available = [
                AvailableSubtitle(
                    language=t.language.code if t.language else "?",
                    subtitle_type=t.subtitle_type.value,
                )
                for t in available_subtitles(metadata.embedded_tracks)
            ]
            return ManageSubtitlesResponse(
                status="ok",
                video_path=destination_video,
                placed=[],
                skipped_count=0,
                available=available,
            )
        # --- Match (external only) ---
        subtitle_prefs = None
        try:
            memory = get_memory()
            subtitle_prefs = memory.ltm.subtitle_preferences
        except Exception:
            pass
        rules = repo.load(release_group, subtitle_prefs).resolve()
        matcher = SubtitleMatcher()
        matched, unresolved = matcher.match(metadata.external_tracks, rules)
        if unresolved:
            logger.info(
                f"ManageSubtitles: {len(unresolved)} unresolved track(s) — needs clarification"
            )
            return ManageSubtitlesResponse(
                status="needs_clarification",
                video_path=destination_video,
                placed=[],
                unresolved=[_to_unresolved_dto(t) for t in unresolved],
            )
        if not matched:
            return ManageSubtitlesResponse(
                status="ok",
                video_path=destination_video,
                placed=[],
                skipped_count=metadata.total_count,
            )
        # --- Dry run: skip placement ---
        if dry_run:
            placed_dtos = []
            for t in matched:
                if not t.file_path:
                    continue
                try:
                    filename = _build_dest_name(t, dest_path.stem)
                except ValueError:
                    continue
                placed_dtos.append(
                    PlacedSubtitle(
                        source=str(t.file_path),
                        destination=str(dest_path.parent / filename),
                        filename=filename,
                    )
                )
            return ManageSubtitlesResponse(
                status="ok",
                video_path=destination_video,
                placed=placed_dtos,
                skipped_count=0,
            )
        # --- Place ---
        placer = SubtitlePlacer()
        place_result = placer.place(matched, dest_path)
        # --- Persist ---
        if place_result.placed:
            pairs = _pair_placed_with_tracks(place_result.placed, matched)
            store.append_history(pairs, season, episode, release_group)
        placed_dtos = [
            PlacedSubtitle(
                source=str(p.source),
                destination=str(p.destination),
                filename=p.filename,
            )
            for p in place_result.placed
        ]
        return ManageSubtitlesResponse(
            status="ok",
            video_path=destination_video,
            placed=placed_dtos,
            skipped_count=place_result.skipped_count,
        )
    def _resolve_pattern(
        self,
        kb: SubtitleKnowledgeBase,
        store: SubtitleMetadataStore,
        source_path: Path,
        confirmed_pattern_id: str | None,
        release_group: str | None,
    ):
        # 1. Explicit override from caller
        if confirmed_pattern_id:
            p = kb.pattern(confirmed_pattern_id)
            if p:
                return p
            logger.warning(f"ManageSubtitles: unknown pattern '{confirmed_pattern_id}'")
        # 2. Previously confirmed in metadata store
        stored_id = store.confirmed_pattern()
        if stored_id:
            p = kb.pattern(stored_id)
            if p:
                logger.debug(f"ManageSubtitles: using confirmed pattern '{stored_id}'")
                return p
        # 3. Auto-detect
        release_root = source_path.parent
        detector = PatternDetector(kb)
        result = detector.detect(release_root, source_path)
        if result["detected"] and result["confidence"] >= 0.6:
            logger.info(
                f"ManageSubtitles: auto-detected pattern '{result['detected'].id}' "
                f"(confidence={result['confidence']:.2f})"
            )
            return result["detected"]
        # 4. Fallback — adjacent (safest default)
        logger.info("ManageSubtitles: falling back to 'adjacent' pattern")
        return kb.pattern("adjacent")
 def _to_unresolved_dto(
    track: SubtitleCandidate, min_confidence: float = 0.7
 ) -> UnresolvedTrack:
    reason = "unknown_language" if track.language is None else "low_confidence"
    return UnresolvedTrack(
        raw_tokens=track.raw_tokens,
        file_path=str(track.file_path) if track.file_path else None,
        file_size_kb=track.file_size_kb,
        reason=reason,
    )
 def _pair_placed_with_tracks(
    placed: list[PlacedTrack],
    tracks: list[SubtitleCandidate],
 ) -> list[tuple[PlacedTrack, SubtitleCandidate]]:
    """
    Pair each PlacedTrack with its originating SubtitleCandidate by source path.
    Falls back to positional matching if paths don't align.
    """
    track_by_path = {t.file_path: t for t in tracks if t.file_path}
    pairs = []
    for p in placed:
        track = track_by_path.get(p.source)
        if track is None and tracks:
            track = tracks[0]  # positional fallback
        if track:
            pairs.append((p, track))
    return pairs
@@ -0,0 +1,43 @@
 """Move media use case."""
 import logging
 from alfred.infrastructure.filesystem import FileManager
 from .dto import MoveMediaResponse
 logger = logging.getLogger(__name__)
 class MoveMediaUseCase:
    """Use case for moving a media file to a destination (copy + delete source)."""
    def __init__(self, file_manager: FileManager):
        self.file_manager = file_manager
    def execute(self, source: str, destination: str) -> MoveMediaResponse:
        """
        Move a media file from source to destination.
        Args:
            source: Absolute path to the source file.
            destination: Absolute path to the destination file.
        Returns:
            MoveMediaResponse with success or error information.
        """
        result = self.file_manager.move_file(source, destination)
        if result.get("status") == "ok":
            return MoveMediaResponse(
                status="ok",
                source=result.get("source"),
                destination=result.get("destination"),
                filename=result.get("filename"),
                size=result.get("size"),
            )
        return MoveMediaResponse(
            status="error",
            error=result.get("error"),
            message=result.get("message"),
        )
@@ -0,0 +1,411 @@
 """
 Destination resolution — compute library paths for releases.
 Four distinct use cases, one per release type:
 - resolve_season_destination    : season pack (folder move)
 - resolve_episode_destination   : single episode (file move)
 - resolve_movie_destination     : movie (file move)
 - resolve_series_destination    : complete series multi-season pack (folder move)
 Each returns a dedicated DTO with only the fields that make sense for that type.
 """
 from __future__ import annotations
 import logging
 import re
 from dataclasses import dataclass
 from pathlib import Path
 from alfred.domain.release import parse_release
 from alfred.infrastructure.persistence import get_memory
 logger = logging.getLogger(__name__)
 _WIN_FORBIDDEN = re.compile(r'[?:*"<>|\\]')
 def _sanitize(text: str) -> str:
    return _WIN_FORBIDDEN.sub("", text)
 def _find_existing_tvshow_folders(
    tv_root: Path, tmdb_title: str, tmdb_year: int
 ) -> list[str]:
    """Return folder names in tv_root that match title + year prefix."""
    if not tv_root.exists():
        return []
    clean_title = _sanitize(tmdb_title).replace(" ", ".")
    prefix = f"{clean_title}.{tmdb_year}".lower()
    return sorted(
        entry.name
        for entry in tv_root.iterdir()
        if entry.is_dir() and entry.name.lower().startswith(prefix)
    )
 def _get_tv_root() -> Path | None:
    memory = get_memory()
    tv_root = memory.ltm.library_paths.get("tv_show")
    return Path(tv_root) if tv_root else None
 # ---------------------------------------------------------------------------
 # Internal sentinel + series-folder resolver (shared by the 3 TV use cases)
 # ---------------------------------------------------------------------------
@dataclass
 class _Clarification:
    """Module-private sentinel signalling that user input is needed."""
    question: str
    options: list[str]
 def _resolve_series_folder(
    tv_root: Path,
    tmdb_title: str,
    tmdb_year: int,
    computed_name: str,
    confirmed_folder: str | None,
 ) -> tuple[str, bool] | _Clarification:
    """
    Resolve which series folder to use.
    Returns:
        (folder_name, is_new) if resolved unambiguously,
        _Clarification(question, options) if the caller must ask the user.
    """
    if confirmed_folder:
        return confirmed_folder, not (tv_root / confirmed_folder).exists()
    existing = _find_existing_tvshow_folders(tv_root, tmdb_title, tmdb_year)
    if not existing:
        return computed_name, True
    if len(existing) == 1 and existing[0] == computed_name:
        return existing[0], False
    options = existing + ([computed_name] if computed_name not in existing else [])
    return _Clarification(
        question=(
            f"Un dossier série existe déjà pour '{tmdb_title}' "
            f"mais son nom diffère du nom calculé ({computed_name}). "
            f"Lequel utiliser ?"
        ),
        options=options,
    )
 # ---------------------------------------------------------------------------
 # DTOs
 # ---------------------------------------------------------------------------
@dataclass
 class _ResolvedDestinationBase:
    """
    Shared shape across all resolution DTOs.
    Holds the status flag and the fields used in non-ok states
    (error / needs_clarification). Subclasses add their own ok-state fields
    and a to_dict() that delegates the non-ok cases via _base_dict().
    """
    status: str  # "ok" | "needs_clarification" | "error"
    # needs_clarification
    question: str | None = None
    options: list[str] | None = None
    # error
    error: str | None = None
    message: str | None = None
    def _base_dict(self) -> dict | None:
        """Return the dict for error/needs_clarification, or None for ok."""
        if self.status == "error":
            return {"status": self.status, "error": self.error, "message": self.message}
        if self.status == "needs_clarification":
            return {
                "status": self.status,
                "question": self.question,
                "options": self.options or [],
            }
        return None
@dataclass
 class ResolvedSeasonDestination(_ResolvedDestinationBase):
    """Paths for a season pack — folder move, no individual file paths."""
    series_folder: str | None = None
    season_folder: str | None = None
    series_folder_name: str | None = None
    season_folder_name: str | None = None
    is_new_series_folder: bool = False
    def to_dict(self) -> dict:
        return self._base_dict() or {
            "status": self.status,
            "series_folder": self.series_folder,
            "season_folder": self.season_folder,
            "series_folder_name": self.series_folder_name,
            "season_folder_name": self.season_folder_name,
            "is_new_series_folder": self.is_new_series_folder,
        }
@dataclass
 class ResolvedEpisodeDestination(_ResolvedDestinationBase):
    """Paths for a single episode — file move."""
    series_folder: str | None = None
    season_folder: str | None = None
    library_file: str | None = None  # full path to destination .mkv
    series_folder_name: str | None = None
    season_folder_name: str | None = None
    filename: str | None = None
    is_new_series_folder: bool = False
    def to_dict(self) -> dict:
        return self._base_dict() or {
            "status": self.status,
            "series_folder": self.series_folder,
            "season_folder": self.season_folder,
            "library_file": self.library_file,
            "series_folder_name": self.series_folder_name,
            "season_folder_name": self.season_folder_name,
            "filename": self.filename,
            "is_new_series_folder": self.is_new_series_folder,
        }
@dataclass
 class ResolvedMovieDestination(_ResolvedDestinationBase):
    """Paths for a movie — file move."""
    movie_folder: str | None = None
    library_file: str | None = None
    movie_folder_name: str | None = None
    filename: str | None = None
    is_new_folder: bool = False
    def to_dict(self) -> dict:
        return self._base_dict() or {
            "status": self.status,
            "movie_folder": self.movie_folder,
            "library_file": self.library_file,
            "movie_folder_name": self.movie_folder_name,
            "filename": self.filename,
            "is_new_folder": self.is_new_folder,
        }
@dataclass
 class ResolvedSeriesDestination(_ResolvedDestinationBase):
    """Paths for a complete multi-season series pack — folder move."""
    series_folder: str | None = None
    series_folder_name: str | None = None
    is_new_series_folder: bool = False
    def to_dict(self) -> dict:
        return self._base_dict() or {
            "status": self.status,
            "series_folder": self.series_folder,
            "series_folder_name": self.series_folder_name,
            "is_new_series_folder": self.is_new_series_folder,
        }
 # ---------------------------------------------------------------------------
 # Use cases
 # ---------------------------------------------------------------------------
 def resolve_season_destination(
    release_name: str,
    tmdb_title: str,
    tmdb_year: int,
    confirmed_folder: str | None = None,
 ) -> ResolvedSeasonDestination:
    """
    Compute destination paths for a season pack.
    Returns series_folder + season_folder. No file paths — the whole
    source folder is moved as-is into season_folder.
    """
    tv_root = _get_tv_root()
    if not tv_root:
        return ResolvedSeasonDestination(
            status="error",
            error="library_not_set",
            message="TV show library path is not configured.",
        )
    parsed = parse_release(release_name)
    computed_name = _sanitize(parsed.show_folder_name(tmdb_title, tmdb_year))
    resolved = _resolve_series_folder(
        tv_root, tmdb_title, tmdb_year, computed_name, confirmed_folder
    )
    if isinstance(resolved, _Clarification):
        return ResolvedSeasonDestination(
            status="needs_clarification",
            question=resolved.question,
            options=resolved.options,
        )
    series_folder_name, is_new = resolved
    season_folder_name = parsed.season_folder_name()
    series_path = tv_root / series_folder_name
    season_path = series_path / season_folder_name
    return ResolvedSeasonDestination(
        status="ok",
        series_folder=str(series_path),
        season_folder=str(season_path),
        series_folder_name=series_folder_name,
        season_folder_name=season_folder_name,
        is_new_series_folder=is_new,
    )
 def resolve_episode_destination(
    release_name: str,
    source_file: str,
    tmdb_title: str,
    tmdb_year: int,
    tmdb_episode_title: str | None = None,
    confirmed_folder: str | None = None,
 ) -> ResolvedEpisodeDestination:
    """
    Compute destination paths for a single episode file.
    Returns series_folder + season_folder + library_file (full path to .mkv).
    """
    tv_root = _get_tv_root()
    if not tv_root:
        return ResolvedEpisodeDestination(
            status="error",
            error="library_not_set",
            message="TV show library path is not configured.",
        )
    parsed = parse_release(release_name)
    ext = Path(source_file).suffix
    computed_name = _sanitize(parsed.show_folder_name(tmdb_title, tmdb_year))
    resolved = _resolve_series_folder(
        tv_root, tmdb_title, tmdb_year, computed_name, confirmed_folder
    )
    if isinstance(resolved, _Clarification):
        return ResolvedEpisodeDestination(
            status="needs_clarification",
            question=resolved.question,
            options=resolved.options,
        )
    series_folder_name, is_new = resolved
    season_folder_name = parsed.season_folder_name()
    filename = _sanitize(parsed.episode_filename(tmdb_episode_title, ext))
    series_path = tv_root / series_folder_name
    season_path = series_path / season_folder_name
    file_path = season_path / filename
    return ResolvedEpisodeDestination(
        status="ok",
        series_folder=str(series_path),
        season_folder=str(season_path),
        library_file=str(file_path),
        series_folder_name=series_folder_name,
        season_folder_name=season_folder_name,
        filename=filename,
        is_new_series_folder=is_new,
    )
 def resolve_movie_destination(
    release_name: str,
    source_file: str,
    tmdb_title: str,
    tmdb_year: int,
 ) -> ResolvedMovieDestination:
    """
    Compute destination paths for a movie file.
    Returns movie_folder + library_file (full path to .mkv).
    """
    memory = get_memory()
    movies_root = memory.ltm.library_paths.get("movie")
    if not movies_root:
        return ResolvedMovieDestination(
            status="error",
            error="library_not_set",
            message="Movie library path is not configured.",
        )
    parsed = parse_release(release_name)
    ext = Path(source_file).suffix
    folder_name = _sanitize(parsed.movie_folder_name(tmdb_title, tmdb_year))
    filename = _sanitize(parsed.movie_filename(tmdb_title, tmdb_year, ext))
    folder_path = Path(movies_root) / folder_name
    file_path = folder_path / filename
    return ResolvedMovieDestination(
        status="ok",
        movie_folder=str(folder_path),
        library_file=str(file_path),
        movie_folder_name=folder_name,
        filename=filename,
        is_new_folder=not folder_path.exists(),
    )
 def resolve_series_destination(
    release_name: str,
    tmdb_title: str,
    tmdb_year: int,
    confirmed_folder: str | None = None,
 ) -> ResolvedSeriesDestination:
    """
    Compute destination path for a complete multi-season series pack.
    Returns only series_folder — the whole pack lands directly inside it.
    """
    tv_root = _get_tv_root()
    if not tv_root:
        return ResolvedSeriesDestination(
            status="error",
            error="library_not_set",
            message="TV show library path is not configured.",
        )
    parsed = parse_release(release_name)
    computed_name = _sanitize(parsed.show_folder_name(tmdb_title, tmdb_year))
    resolved = _resolve_series_folder(
        tv_root, tmdb_title, tmdb_year, computed_name, confirmed_folder
    )
    if isinstance(resolved, _Clarification):
        return ResolvedSeriesDestination(
            status="needs_clarification",
            question=resolved.question,
            options=resolved.options,
        )
    series_folder_name, is_new = resolved
    series_path = tv_root / series_folder_name
    return ResolvedSeriesDestination(
        status="ok",
        series_folder=str(series_path),
        series_folder_name=series_folder_name,
        is_new_series_folder=is_new,
    )
@@ -2,7 +2,6 @@
 from .entities import Movie
 from .exceptions import InvalidMovieData, MovieNotFound
 from .services import MovieService
 from .value_objects import MovieTitle, Quality, ReleaseYear
 __all__ = [
@@ -12,5 +11,4 @@ __all__ = [
    "Quality",
    "MovieNotFound",
    "InvalidMovieData",
    "MovieService",
 ]
@@ -3,16 +3,23 @@
 from dataclasses import dataclass, field
 from datetime import datetime
-from ..shared.value_objects import FilePath, FileSize, ImdbId
+from ..shared.media import AudioTrack, SubtitleTrack, track_lang_matches
 from ..shared.value_objects import FilePath, FileSize, ImdbId, Language
 from .value_objects import MovieTitle, Quality, ReleaseYear
@dataclass
 class Movie:
    """
-    Movie entity representing a movie in the media library.
+    Movie aggregate root for the movies domain.
-    This is the main aggregate root for the movies domain.
+    Carries file metadata (path, size) and the tracks discovered by the
    ffprobe + subtitle scan pipeline. The track lists may be empty when the
    movie is known but not yet scanned, or when no file is downloaded.
    Track helpers follow the same "C+" contract as ``Episode``: pass a
    ``Language`` for cross-format matching, or a ``str`` for case-insensitive
    direct comparison.
    """
    imdb_id: ImdbId
@@ -23,6 +30,8 @@ class Movie:
    file_size: FileSize | None = None
    tmdb_id: int | None = None
    added_at: datetime = field(default_factory=datetime.now)
    audio_tracks: list[AudioTrack] = field(default_factory=list)
    subtitle_tracks: list[SubtitleTrack] = field(default_factory=list)
    def __post_init__(self):
        """Validate movie entity."""
@@ -52,6 +61,42 @@ class Movie:
        """Check if the movie is downloaded (has a file)."""
        return self.has_file()
    # ── Audio helpers ──────────────────────────────────────────────────────
    def has_audio_in(self, lang: str | Language) -> bool:
        """True if at least one audio track is in the given language."""
        return any(track_lang_matches(t.language, lang) for t in self.audio_tracks)
    def audio_languages(self) -> list[str]:
        """Unique audio languages across all tracks, in track order."""
        seen: set[str] = set()
        result: list[str] = []
        for t in self.audio_tracks:
            if t.language and t.language not in seen:
                seen.add(t.language)
                result.append(t.language)
        return result
    # ── Subtitle helpers ───────────────────────────────────────────────────
    def has_subtitles_in(self, lang: str | Language) -> bool:
        """True if at least one subtitle track is in the given language."""
        return any(track_lang_matches(t.language, lang) for t in self.subtitle_tracks)
    def has_forced_subs(self) -> bool:
        """True if at least one subtitle track is flagged as forced."""
        return any(t.is_forced for t in self.subtitle_tracks)
    def subtitle_languages(self) -> list[str]:
        """Unique subtitle languages across all tracks, in track order."""
        seen: set[str] = set()
        result: list[str] = []
        for t in self.subtitle_tracks:
            if t.language and t.language not in seen:
                seen.add(t.language)
                result.append(t.language)
        return result
    def get_folder_name(self) -> str:
        """
        Get the folder name for this movie.
@@ -1,192 +0,0 @@
 """Movie domain services - Business logic."""
 import logging
 import re
 from ..shared.value_objects import FilePath, ImdbId
 from .entities import Movie
 from .exceptions import MovieAlreadyExists, MovieNotFound
 from .repositories import MovieRepository
 from .value_objects import Quality
 logger = logging.getLogger(__name__)
 class MovieService:
    """
    Domain service for movie-related business logic.
    This service contains business rules that don't naturally fit
    within a single entity.
    """
    def __init__(self, repository: MovieRepository):
        """
        Initialize movie service.
        Args:
            repository: Movie repository for persistence
        """
        self.repository = repository
    def add_movie(self, movie: Movie) -> None:
        """
        Add a new movie to the library.
        Args:
            movie: Movie entity to add
        Raises:
            MovieAlreadyExists: If movie with same IMDb ID already exists
        """
        if self.repository.exists(movie.imdb_id):
            raise MovieAlreadyExists(
                f"Movie with IMDb ID {movie.imdb_id} already exists"
            )
        self.repository.save(movie)
        logger.info(f"Added movie: {movie.title.value} ({movie.imdb_id})")
    def get_movie(self, imdb_id: ImdbId) -> Movie:
        """
        Get a movie by IMDb ID.
        Args:
            imdb_id: IMDb ID of the movie
        Returns:
            Movie entity
        Raises:
            MovieNotFound: If movie not found
        """
        movie = self.repository.find_by_imdb_id(imdb_id)
        if not movie:
            raise MovieNotFound(f"Movie with IMDb ID {imdb_id} not found")
        return movie
    def get_all_movies(self) -> list[Movie]:
        """
        Get all movies in the library.
        Returns:
            List of all movies
        """
        return self.repository.find_all()
    def update_movie(self, movie: Movie) -> None:
        """
        Update an existing movie.
        Args:
            movie: Movie entity with updated data
        Raises:
            MovieNotFound: If movie doesn't exist
        """
        if not self.repository.exists(movie.imdb_id):
            raise MovieNotFound(f"Movie with IMDb ID {movie.imdb_id} not found")
        self.repository.save(movie)
        logger.info(f"Updated movie: {movie.title.value} ({movie.imdb_id})")
    def remove_movie(self, imdb_id: ImdbId) -> None:
        """
        Remove a movie from the library.
        Args:
            imdb_id: IMDb ID of the movie to remove
        Raises:
            MovieNotFound: If movie not found
        """
        if not self.repository.delete(imdb_id):
            raise MovieNotFound(f"Movie with IMDb ID {imdb_id} not found")
        logger.info(f"Removed movie with IMDb ID: {imdb_id}")
    def detect_quality_from_filename(self, filename: str) -> Quality:
        """
        Detect video quality from filename.
        Args:
            filename: Filename to analyze
        Returns:
            Detected quality or UNKNOWN
        """
        filename_lower = filename.lower()
        # Check for quality indicators
        if "2160p" in filename_lower or "4k" in filename_lower:
            return Quality.UHD_4K
        elif "1080p" in filename_lower:
            return Quality.FULL_HD
        elif "720p" in filename_lower:
            return Quality.HD
        elif "480p" in filename_lower:
            return Quality.SD
        return Quality.UNKNOWN
    def extract_year_from_filename(self, filename: str) -> int | None:
        """
        Extract release year from filename.
        Args:
            filename: Filename to analyze
        Returns:
            Year if found, None otherwise
        """
        # Look for 4-digit year in parentheses or standalone
        # Examples: "Movie (2010)", "Movie.2010.1080p"
        patterns = [
            r"\((\d{4})\)",  # (2010)
            r"\.(\d{4})\.",  # .2010.
            r"\s(\d{4})\s",  # 2010
        ]
        for pattern in patterns:
            match = re.search(pattern, filename)
            if match:
                year = int(match.group(1))
                # Validate year is reasonable
                if 1888 <= year <= 2100:
                    return year
        return None
    def validate_movie_file(self, file_path: FilePath) -> bool:
        """
        Validate that a file is a valid movie file.
        Args:
            file_path: Path to the file
        Returns:
            True if valid movie file, False otherwise
        """
        if not file_path.exists():
            logger.warning(f"File does not exist: {file_path}")
            return False
        if not file_path.is_file():
            logger.warning(f"Path is not a file: {file_path}")
            return False
        # Check file extension
        valid_extensions = {".mkv", ".mp4", ".avi", ".mov", ".wmv", ".flv", ".webm"}
        if file_path.value.suffix.lower() not in valid_extensions:
            logger.warning(f"Invalid file extension: {file_path.value.suffix}")
            return False
        # Check file size (should be at least 100 MB for a movie)
        min_size = 100 * 1024 * 1024  # 100 MB
        if file_path.value.stat().st_size < min_size:
            logger.warning(
                f"File too small to be a movie: {file_path.value.stat().st_size} bytes"
            )
            return False
        return True
@@ -1,10 +1,10 @@
 """Movie domain value objects."""
 import re
 from dataclasses import dataclass
 from enum import Enum
 from ..shared.exceptions import ValidationError
 from ..shared.value_objects import to_dot_folder_name
 class Quality(Enum):
@@ -17,7 +17,7 @@ class Quality(Enum):
    UNKNOWN = "unknown"
    @classmethod
-    def from_string(cls, quality_str: str) -> "Quality":
+    def from_string(cls, quality_str: str) -> Quality:
        """
        Parse quality from string.
@@ -67,11 +67,7 @@ class MovieTitle:
        Removes special characters and replaces spaces with dots.
        """
-        # Remove special characters except spaces, dots, and hyphens
+        return to_dot_folder_name(self.value)
        cleaned = re.sub(r"[^\w\s\.\-]", "", self.value)
        # Replace spaces with dots
        normalized = cleaned.replace(" ", ".")
        return normalized
    def __str__(self) -> str:
        return self.value
@@ -0,0 +1,6 @@
 """Release domain — release name parsing and naming conventions."""
 from .services import parse_release
 from .value_objects import ParsedRelease
 __all__ = ["ParsedRelease", "parse_release"]
@@ -0,0 +1,140 @@
 """Release knowledge loader.
 Three-layer merge (lowest → highest priority):
  1. Builtin   — alfred/knowledge/release/
  2. Sites     — alfred/knowledge/release/sites/*.yaml (all trackers)
  3. Learned   — data/knowledge/release/ (user additions via the learn tool)
 Lists are extended additively, scalars from higher layers win.
 """
 from pathlib import Path
 import yaml
 import alfred as _alfred_pkg
 _BUILTIN_ROOT = Path(_alfred_pkg.__file__).parent / "knowledge" / "release"
 _SITES_ROOT = _BUILTIN_ROOT / "sites"
 _LEARNED_ROOT = (
    Path(_alfred_pkg.__file__).parent.parent / "data" / "knowledge" / "release"
 )
 def _merge(base: dict, overlay: dict) -> dict:
    """Merge overlay into base — lists are extended, scalars from overlay win."""
    result = dict(base)
    for key, val in overlay.items():
        if key in result and isinstance(result[key], list) and isinstance(val, list):
            result[key] = result[key] + [v for v in val if v not in result[key]]
        else:
            result[key] = val
    return result
 def _read(path: Path) -> dict:
    try:
        with open(path, encoding="utf-8") as f:
            return yaml.safe_load(f) or {}
    except FileNotFoundError:
        return {}
 def _load(filename: str) -> dict:
    result = _read(_BUILTIN_ROOT / filename)
    result = _merge(result, _read(_LEARNED_ROOT / filename))
    return result
 def _load_sites() -> dict:
    """Merge all site YAML files into a single dict."""
    result: dict = {}
    for site_file in sorted(_SITES_ROOT.glob("*.yaml")):
        result = _merge(result, _read(site_file))
    return result
 def load_resolutions() -> set[str]:
    return set(_load("resolutions.yaml").get("resolutions", []))
 def load_sources() -> set[str]:
    return set(_load("sources.yaml").get("sources", []))
 def load_codecs() -> set[str]:
    return set(_load("codecs.yaml").get("codecs", []))
 def load_win_forbidden_chars() -> list[str]:
    return _load("filesystem.yaml").get("win_forbidden_chars", [])
 def load_video_extensions() -> set[str]:
    return set(_load("file_extensions.yaml").get("video", []))
 def load_non_video_extensions() -> set[str]:
    return set(_load("file_extensions.yaml").get("non_video", []))
 def load_metadata_extensions() -> set[str]:
    return set(_load("file_extensions.yaml").get("metadata", []))
 def load_subtitle_extensions() -> set[str]:
    return set(_load("file_extensions.yaml").get("subtitle", []))
 def load_forbidden_chars() -> set[str]:
    return set(_load("release_format.yaml").get("forbidden_chars", []))
 def load_language_tokens() -> set[str]:
    base = {t.upper() for t in _load("languages.yaml").get("tokens", [])}
    sites = {t.upper() for t in _load_sites().get("languages", [])}
    return base | sites
 def load_audio() -> dict:
    return _load("audio.yaml")
 def load_video() -> dict:
    return _load("video.yaml")
 def load_editions() -> dict:
    base = _load("editions.yaml")
    site_tokens = _load_sites().get("editions", {}).get("tokens", [])
    if site_tokens:
        existing = base.get("tokens", [])
        base["tokens"] = existing + [t for t in site_tokens if t not in existing]
    return base
 def load_sources_extra() -> set[str]:
    """Additional source tokens from site files."""
    return set(_load_sites().get("sources", []))
 def load_hdr_extra() -> set[str]:
    """Additional HDR tokens from site files."""
    return {t.upper() for t in _load_sites().get("hdr", [])}
 def load_media_type_tokens() -> dict:
    """Site-specific media type tokens (doc, concert, collection, integrale)."""
    return _load_sites().get("media_type_tokens", {})
 def load_separators() -> list[str]:
    """Single-char token separators used by the release name tokenizer.
    Always includes the canonical "." even if absent from YAML, to prevent a
    misconfigured file from breaking the parser entirely.
    """
    seps = _load("separators.yaml").get("separators", []) or []
    if "." not in seps:
        seps = [".", *seps]
    return seps
@@ -0,0 +1,506 @@
 """Release domain — parsing service."""
 from __future__ import annotations
 import re
 from .knowledge import load_separators
 from .value_objects import (
    _AUDIO,
    _CODECS,
    _EDITIONS,
    _FORBIDDEN_CHARS,
    _HDR_EXTRA,
    _LANGUAGE_TOKENS,
    _MEDIA_TYPE_TOKENS,
    _RESOLUTIONS,
    _SOURCES,
    _VIDEO_META,
    ParsedRelease,
 )
 def _tokenize(name: str) -> list[str]:
    """Split a release name on the configured separators, dropping empty tokens."""
    pattern = "[" + re.escape("".join(load_separators())) + "]+"
    return [t for t in re.split(pattern, name) if t]
 def parse_release(name: str) -> ParsedRelease:
    """
    Parse a release name and return a ParsedRelease.
    Flow:
      1. Strip a leading/trailing [site.tag] if present (sets parse_path="sanitized").
      2. Check the remainder for truly forbidden chars (anything not in the
         configured separators list). If any remain → media_type="unknown",
         parse_path="ai", and the LLM handles it.
      3. Tokenize using the configured separators (".", " ", "[", "]", "(", ")", "_", ...)
         and run token-level matchers (season/episode, tech, languages, audio,
         video, edition, title, year).
    """
    parse_path = "direct"
    # Always try to extract a bracket-enclosed site tag first.
    clean, site_tag = _strip_site_tag(name)
    if site_tag is not None:
        parse_path = "sanitized"
    if not _is_well_formed(clean):
        return ParsedRelease(
            raw=name,
            normalised=clean,
            title=clean,
            year=None,
            season=None,
            episode=None,
            episode_end=None,
            quality=None,
            source=None,
            codec=None,
            group="UNKNOWN",
            tech_string="",
            media_type="unknown",
            site_tag=site_tag,
            parse_path="ai",
        )
    name = clean
    tokens = _tokenize(name)
    season, episode, episode_end = _extract_season_episode(tokens)
    quality, source, codec, group, tech_tokens = _extract_tech(tokens)
    languages, lang_tokens = _extract_languages(tokens)
    audio_codec, audio_channels, audio_tokens = _extract_audio(tokens)
    bit_depth, hdr_format, video_tokens = _extract_video_meta(tokens)
    edition, edition_tokens = _extract_edition(tokens)
    title = _extract_title(
        tokens,
        tech_tokens | lang_tokens | audio_tokens | video_tokens | edition_tokens,
    )
    year = _extract_year(tokens, title)
    media_type = _infer_media_type(
        season, quality, source, codec, year, edition, tokens
    )
    tech_parts = [p for p in [quality, source, codec] if p]
    tech_string = ".".join(tech_parts)
    return ParsedRelease(
        raw=name,
        normalised=name,
        title=title,
        year=year,
        season=season,
        episode=episode,
        episode_end=episode_end,
        quality=quality,
        source=source,
        codec=codec,
        group=group,
        tech_string=tech_string,
        media_type=media_type,
        site_tag=site_tag,
        parse_path=parse_path,
        languages=languages,
        audio_codec=audio_codec,
        audio_channels=audio_channels,
        bit_depth=bit_depth,
        hdr_format=hdr_format,
        edition=edition,
    )
 def _infer_media_type(
    season: int | None,
    quality: str | None,
    source: str | None,
    codec: str | None,
    year: int | None,
    edition: str | None,
    tokens: list[str],
 ) -> str:
    """
    Infer media_type from token-level evidence only (no filesystem access).
    - documentary  : DOC token present
    - concert      : CONCERT token present
    - tv_complete  : INTEGRALE/COMPLETE token, no season
    - tv_show      : season token found
    - movie        : no season, at least one tech marker
    - unknown      : no conclusive evidence
    """
    upper_tokens = {t.upper() for t in tokens}
    doc_tokens = {t.upper() for t in _MEDIA_TYPE_TOKENS.get("doc", [])}
    concert_tokens = {t.upper() for t in _MEDIA_TYPE_TOKENS.get("concert", [])}
    integrale_tokens = {t.upper() for t in _MEDIA_TYPE_TOKENS.get("integrale", [])}
    if upper_tokens & doc_tokens:
        return "documentary"
    if upper_tokens & concert_tokens:
        return "concert"
    if (
        edition in {"COMPLETE", "INTEGRALE", "COLLECTION"}
        or upper_tokens & integrale_tokens
    ) and season is None:
        return "tv_complete"
    if season is not None:
        return "tv_show"
    if any([quality, source, codec, year]):
        return "movie"
    return "unknown"
 def _is_well_formed(name: str) -> bool:
    """Return True if name contains no forbidden characters per scene naming rules.
    Characters listed as token separators (spaces, brackets, parens, …) are NOT
    considered malforming — the tokenizer handles them. Only truly broken chars
    like '@', '#', '!', '%' make a name malformed.
    """
    tokenizable = set(load_separators())
    return not any(c in name for c in _FORBIDDEN_CHARS if c not in tokenizable)
 def _strip_site_tag(name: str) -> tuple[str, str | None]:
    """
    Strip a site watermark tag from the release name and return (clean_name, tag).
    Handles two positions:
    - Prefix:  "[ OxTorrent.vc ] The.Title.S01..."
    - Suffix:  "The.Title.S01...-NTb[TGx]"
    Anything between [...] is treated as a site tag.
    Returns (original_name, None) if no tag found.
    """
    s = name.strip()
    if s.startswith("["):
        close = s.find("]")
        if close != -1:
            tag = s[1:close].strip()
            remainder = s[close + 1 :].strip()
            if tag and remainder:
                return remainder, tag
    if s.endswith("]"):
        open_bracket = s.rfind("[")
        if open_bracket != -1:
            tag = s[open_bracket + 1 : -1].strip()
            remainder = s[:open_bracket].strip()
            if tag and remainder:
                return remainder, tag
    return s, None
 def _parse_season_episode(tok: str) -> tuple[int, int | None, int | None] | None:
    """
    Parse a single token as a season/episode marker.
    Handles:
      - SxxExx / SxxExxExx / Sxx        (canonical scene form)
      - NxNN / NxNNxNN                  (alt form: 1x05, 12x07x08)
    Returns (season, episode, episode_end) or None if not a season token.
    """
    upper = tok.upper()
    # SxxExx form
    if len(upper) >= 3 and upper[0] == "S" and upper[1:3].isdigit():
        season = int(upper[1:3])
        rest = upper[3:]
        if not rest:
            return season, None, None
        episodes: list[int] = []
        while rest.startswith("E") and len(rest) >= 3 and rest[1:3].isdigit():
            episodes.append(int(rest[1:3]))
            rest = rest[3:]
        if not episodes:
            return None  # malformed token like "S03XYZ"
        return season, episodes[0], episodes[1] if len(episodes) >= 2 else None
    # NxNN form — split on "X" (uppercased), all parts must be digits
    if "X" in upper:
        parts = upper.split("X")
        if len(parts) >= 2 and all(p.isdigit() and p for p in parts):
            season = int(parts[0])
            episode = int(parts[1])
            episode_end = int(parts[2]) if len(parts) >= 3 else None
            return season, episode, episode_end
    return None
 def _extract_season_episode(
    tokens: list[str],
 ) -> tuple[int | None, int | None, int | None]:
    for tok in tokens:
        parsed = _parse_season_episode(tok)
        if parsed is not None:
            return parsed
    return None, None, None
 def _extract_tech(
    tokens: list[str],
 ) -> tuple[str | None, str | None, str | None, str, set[str]]:
    """
    Extract quality, source, codec, group from tokens.
    Returns (quality, source, codec, group, tech_token_set).
    Group extraction strategy (in priority order):
    1. Token where prefix is a known codec: x265-GROUP
    2. Rightmost token with a dash that isn't a known source
    """
    quality: str | None = None
    source: str | None = None
    codec: str | None = None
    group = "UNKNOWN"
    tech_tokens: set[str] = set()
    for tok in tokens:
        tl = tok.lower()
        if tl in _RESOLUTIONS:
            quality = tok
            tech_tokens.add(tok)
            continue
        if tl in _SOURCES:
            source = tok
            tech_tokens.add(tok)
            continue
        if "-" in tok:
            parts = tok.rsplit("-", 1)
            # codec-GROUP (highest priority for group)
            if parts[0].lower() in _CODECS:
                codec = parts[0]
                group = parts[1] if parts[1] else "UNKNOWN"
                tech_tokens.add(tok)
                continue
            # source with dash: Web-DL, WEB-DL, etc.
            if parts[0].lower() in _SOURCES or tok.lower().replace("-", "") in _SOURCES:
                source = tok
                tech_tokens.add(tok)
                continue
        if tl in _CODECS:
            codec = tok
            tech_tokens.add(tok)
    # Fallback: rightmost token with a dash that isn't a known source
    if group == "UNKNOWN":
        for tok in reversed(tokens):
            if "-" in tok:
                parts = tok.rsplit("-", 1)
                tl = tok.lower()
                if tl in _SOURCES or tok.lower().replace("-", "") in _SOURCES:
                    continue
                if parts[1]:
                    group = parts[1]
                    break
    return quality, source, codec, group, tech_tokens
 def _is_year_token(tok: str) -> bool:
    """Return True if tok is a 4-digit year between 1900 and 2099."""
    return len(tok) == 4 and tok.isdigit() and 1900 <= int(tok) <= 2099
 def _extract_title(tokens: list[str], tech_tokens: set[str]) -> str:
    """Extract the title portion: everything before the first season/year/tech token."""
    title_parts = []
    for tok in tokens:
        if _parse_season_episode(tok) is not None:
            break
        if _is_year_token(tok):
            break
        if tok in tech_tokens or tok.lower() in _RESOLUTIONS | _SOURCES | _CODECS:
            break
        if "-" in tok and any(p.lower() in _CODECS | _SOURCES for p in tok.split("-")):
            break
        title_parts.append(tok)
    return ".".join(title_parts) if title_parts else tokens[0]
 def _extract_year(tokens: list[str], title: str) -> int | None:
    """Extract a 4-digit year from tokens (only after the title)."""
    title_len = len(title.split("."))
    for tok in tokens[title_len:]:
        if _is_year_token(tok):
            return int(tok)
    return None
 # ---------------------------------------------------------------------------
 # Sequence matcher
 # ---------------------------------------------------------------------------
 def _match_sequences(
    tokens: list[str],
    sequences: list[dict],
    key: str,
 ) -> tuple[str | None, set[str]]:
    """
    Try to match multi-token sequences against consecutive tokens.
    Returns (matched_value, set_of_matched_tokens) or (None, empty_set).
    Sequences must be ordered most-specific first in the YAML.
    """
    upper_tokens = [t.upper() for t in tokens]
    for seq in sequences:
        seq_upper = [s.upper() for s in seq["tokens"]]
        n = len(seq_upper)
        for i in range(len(upper_tokens) - n + 1):
            if upper_tokens[i : i + n] == seq_upper:
                matched = set(tokens[i : i + n])
                return seq[key], matched
    return None, set()
 # ---------------------------------------------------------------------------
 # Language extraction
 # ---------------------------------------------------------------------------
 def _extract_languages(tokens: list[str]) -> tuple[list[str], set[str]]:
    """Extract language tokens. Returns (languages, matched_token_set)."""
    languages = []
    lang_tokens: set[str] = set()
    for tok in tokens:
        if tok.upper() in _LANGUAGE_TOKENS:
            languages.append(tok.upper())
            lang_tokens.add(tok)
    return languages, lang_tokens
 # ---------------------------------------------------------------------------
 # Audio extraction
 # ---------------------------------------------------------------------------
 def _extract_audio(
    tokens: list[str],
 ) -> tuple[str | None, str | None, set[str]]:
    """
    Extract audio codec and channel layout.
    Returns (audio_codec, audio_channels, matched_token_set).
    Sequences are tried first (DTS.HD.MA, TrueHD.Atmos, …), then single tokens.
    """
    audio_codec: str | None = None
    audio_channels: str | None = None
    audio_tokens: set[str] = set()
    known_codecs = {c.upper() for c in _AUDIO.get("codecs", [])}
    known_channels = set(_AUDIO.get("channels", []))
    # Try multi-token sequences first
    matched_codec, matched_set = _match_sequences(
        tokens, _AUDIO.get("sequences", []), "codec"
    )
    if matched_codec:
        audio_codec = matched_codec
        audio_tokens |= matched_set
    # Channel layouts like "5.1" or "7.1" are split into two tokens by normalize —
    # detect them as consecutive pairs "X" + "Y" where "X.Y" is a known channel.
    # The second token may have a "-GROUP" suffix (e.g. "1-KTH" → strip it).
    for i in range(len(tokens) - 1):
        second = tokens[i + 1].split("-")[0]
        candidate = f"{tokens[i]}.{second}"
        if candidate in known_channels and audio_channels is None:
            audio_channels = candidate
            audio_tokens.add(tokens[i])
            audio_tokens.add(tokens[i + 1])
    for tok in tokens:
        if tok in audio_tokens:
            continue
        if tok.upper() in known_codecs and audio_codec is None:
            audio_codec = tok
            audio_tokens.add(tok)
        elif tok in known_channels and audio_channels is None:
            audio_channels = tok
            audio_tokens.add(tok)
    return audio_codec, audio_channels, audio_tokens
 # ---------------------------------------------------------------------------
 # Video metadata extraction (bit depth, HDR)
 # ---------------------------------------------------------------------------
 def _extract_video_meta(
    tokens: list[str],
 ) -> tuple[str | None, str | None, set[str]]:
    """
    Extract bit depth and HDR format.
    Returns (bit_depth, hdr_format, matched_token_set).
    """
    bit_depth: str | None = None
    hdr_format: str | None = None
    video_tokens: set[str] = set()
    known_hdr = {h.upper() for h in _VIDEO_META.get("hdr", [])} | _HDR_EXTRA
    known_depth = {d.lower() for d in _VIDEO_META.get("bit_depth", [])}
    # Try HDR sequences first
    matched_hdr, matched_set = _match_sequences(
        tokens, _VIDEO_META.get("sequences", []), "hdr"
    )
    if matched_hdr:
        hdr_format = matched_hdr
        video_tokens |= matched_set
    for tok in tokens:
        if tok in video_tokens:
            continue
        if tok.upper() in known_hdr and hdr_format is None:
            hdr_format = tok.upper()
            video_tokens.add(tok)
        elif tok.lower() in known_depth and bit_depth is None:
            bit_depth = tok.lower()
            video_tokens.add(tok)
    return bit_depth, hdr_format, video_tokens
 # ---------------------------------------------------------------------------
 # Edition extraction
 # ---------------------------------------------------------------------------
 def _extract_edition(tokens: list[str]) -> tuple[str | None, set[str]]:
    """
    Extract release edition (UNRATED, EXTENDED, DIRECTORS.CUT, …).
    Returns (edition, matched_token_set).
    """
    known_tokens = {t.upper() for t in _EDITIONS.get("tokens", [])}
    # Try multi-token sequences first
    matched_edition, matched_set = _match_sequences(
        tokens, _EDITIONS.get("sequences", []), "edition"
    )
    if matched_edition:
        return matched_edition, matched_set
    for tok in tokens:
        if tok.upper() in known_tokens:
            return tok.upper(), {tok}
    return None, set()
@@ -0,0 +1,165 @@
 """Release domain — value objects and token sets."""
 from __future__ import annotations
 from dataclasses import dataclass, field
 from .knowledge import (
    load_audio,
    load_codecs,
    load_editions,
    load_forbidden_chars,
    load_hdr_extra,
    load_language_tokens,
    load_media_type_tokens,
    load_metadata_extensions,
    load_non_video_extensions,
    load_resolutions,
    load_sources,
    load_sources_extra,
    load_subtitle_extensions,
    load_video,
    load_video_extensions,
    load_win_forbidden_chars,
 )
 # Token sets — loaded once at import time from alfred/knowledge/release/
 _RESOLUTIONS: set[str] = load_resolutions()
 _SOURCES: set[str] = load_sources() | load_sources_extra()
 _CODECS: set[str] = load_codecs()
 _VIDEO_EXTENSIONS: set[str] = load_video_extensions()
 _NON_VIDEO_EXTENSIONS: set[str] = load_non_video_extensions()
 _SUBTITLE_EXTENSIONS: set[str] = load_subtitle_extensions()
 # Both metadata and subtitle extensions are ignored when deciding the media
 # type of a folder — neither is a conclusive signal for movie/tv/other.
 _METADATA_EXTENSIONS: set[str] = load_metadata_extensions() | _SUBTITLE_EXTENSIONS
 _FORBIDDEN_CHARS: set[str] = load_forbidden_chars()
 _LANGUAGE_TOKENS: set[str] = load_language_tokens()
 _AUDIO: dict = load_audio()
 _VIDEO_META: dict = load_video()
 _EDITIONS: dict = load_editions()
 _HDR_EXTRA: set[str] = load_hdr_extra()
 _MEDIA_TYPE_TOKENS: dict = load_media_type_tokens()
 # Translation table for stripping Windows-forbidden characters
 _WIN_FORBIDDEN_TABLE = str.maketrans("", "", "".join(load_win_forbidden_chars()))
 def _sanitize_for_fs(text: str) -> str:
    """Remove Windows-forbidden characters from a string."""
    return text.translate(_WIN_FORBIDDEN_TABLE)
 def _strip_episode_from_normalized(normalized: str) -> str:
    """
    Remove all episode parts (Exx) from a normalized release name, keeping Sxx.
    Oz.S03E01.1080p...             → Oz.S03.1080p...
    Archer.S14E09E10E11.1080p...   → Archer.S14.1080p...
    """
    tokens = normalized.split(".")
    result = []
    for tok in tokens:
        upper = tok.upper()
        # Token is SxxExx... — keep only the Sxx part
        if len(upper) >= 3 and upper[0] == "S" and upper[1:3].isdigit():
            result.append(tok[:3])  # "S" + two digits
        else:
            result.append(tok)
    return ".".join(result)
@dataclass
 class ParsedRelease:
    """Structured representation of a parsed release name."""
    raw: str  # original release name (untouched)
    normalised: str  # dots instead of spaces
    title: str  # show/movie title (dots, no year/season/tech)
    year: int | None  # movie year or show start year (from TMDB)
    season: int | None  # season number (None for movies)
    episode: int | None  # first episode number (None if season-pack)
    episode_end: int | None  # last episode for multi-ep (None otherwise)
    quality: str | None  # 1080p, 2160p, …
    source: str | None  # WEBRip, BluRay, …
    codec: str | None  # x265, HEVC, …
    group: str  # release group, "UNKNOWN" if missing
    tech_string: str  # quality.source.codec joined with dots
    media_type: str = (
        "unknown"  # "movie" | "tv_show" | "tv_complete" | "other" | "unknown"
    )
    site_tag: str | None = (
        None  # site watermark stripped from name, e.g. "TGx", "OxTorrent.vc"
    )
    parse_path: str = "direct"  # "direct" | "sanitized" | "ai"
    languages: list[str] = field(default_factory=list)  # ["MULTI", "VFF"], ["FRENCH"], …
    audio_codec: str | None = None  # "DTS-HD.MA", "DDP", "EAC3", …
    audio_channels: str | None = None  # "5.1", "7.1", "2.0", …
    bit_depth: str | None = None  # "10bit", "8bit", …
    hdr_format: str | None = None  # "DV", "HDR10", "DV.HDR10", …
    edition: str | None = None  # "UNRATED", "EXTENDED", "DIRECTORS.CUT", …
    @property
    def is_season_pack(self) -> bool:
        return self.season is not None and self.episode is None
    def show_folder_name(self, tmdb_title: str, tmdb_year: int) -> str:
        """
        Build the series root folder name.
        Format: {Title}.{Year}.{Tech}-{Group}
        Example: Oz.1997.1080p.WEBRip.x265-KONTRAST
        """
        title_part = _sanitize_for_fs(tmdb_title).replace(" ", ".")
        tech = self.tech_string or "Unknown"
        return f"{title_part}.{tmdb_year}.{tech}-{self.group}"
    def season_folder_name(self) -> str:
        """
        Build the season subfolder name = normalized release name (no episode).
        Example: Oz.S03.1080p.WEBRip.x265-KONTRAST
        For a single-episode release we still strip the episode token so the
        folder can hold the whole season.
        """
        return _strip_episode_from_normalized(self.normalised)
    def episode_filename(self, tmdb_episode_title: str | None, ext: str) -> str:
        """
        Build the episode filename.
        Format: {Title}.{SxxExx}.{EpisodeTitle}.{Tech}-{Group}.{ext}
        Example: Oz.S01E01.The.Routine.1080p.WEBRip.x265-KONTRAST.mkv
        If tmdb_episode_title is None, omits the episode title segment.
        """
        title_part = _sanitize_for_fs(self.title)
        s = f"S{self.season:02d}" if self.season is not None else ""
        e = f"E{self.episode:02d}" if self.episode is not None else ""
        se = s + e
        ep_title = ""
        if tmdb_episode_title:
            ep_title = "." + _sanitize_for_fs(tmdb_episode_title).replace(" ", ".")
        tech = self.tech_string or "Unknown"
        ext_clean = ext.lstrip(".")
        return f"{title_part}.{se}{ep_title}.{tech}-{self.group}.{ext_clean}"
    def movie_folder_name(self, tmdb_title: str, tmdb_year: int) -> str:
        """
        Build the movie folder name.
        Format: {Title}.{Year}.{Tech}-{Group}
        Example: Inception.2010.1080p.BluRay.x265-GROUP
        """
        return self.show_folder_name(tmdb_title, tmdb_year)
    def movie_filename(self, tmdb_title: str, tmdb_year: int, ext: str) -> str:
        """
        Build the movie filename (same as folder name + extension).
        Example: Inception.2010.1080p.BluRay.x265-GROUP.mkv
        """
        ext_clean = ext.lstrip(".")
        return f"{self.movie_folder_name(tmdb_title, tmdb_year)}.{ext_clean}"
@@ -1,7 +1,7 @@
 """Shared kernel - Common domain concepts used across subdomains."""
 from .exceptions import DomainException, ValidationError
-from .value_objects import FilePath, FileSize, ImdbId
+from .value_objects import FilePath, FileSize, ImdbId, Language
 __all__ = [
    "DomainException",
@@ -9,4 +9,5 @@ __all__ = [
    "ImdbId",
    "FilePath",
    "FileSize",
    "Language",
 ]
@@ -0,0 +1,5 @@
 """Shared knowledge loaders (cross-domain)."""
 from .language_registry import LanguageRegistry
 __all__ = ["LanguageRegistry"]
@@ -0,0 +1,129 @@
 """LanguageRegistry — loads and queries the canonical language table from YAML.
 Builtin entries live in ``alfred/knowledge/iso_languages.yaml`` (versioned).
 Learned entries can be added to ``data/knowledge/iso_languages_learned.yaml``
 (gitignored, instance-local) and are merged additively — they extend builtin
 languages or add new ones, never remove builtin entries.
 """
 import logging
 from pathlib import Path
 import yaml
 import alfred as _alfred_pkg
 from ..value_objects import Language
 logger = logging.getLogger(__name__)
 _BUILTIN_ROOT = Path(_alfred_pkg.__file__).parent / "knowledge"
 _LEARNED_ROOT = Path(_alfred_pkg.__file__).parent.parent / "data" / "knowledge"
 def _load_yaml(path: Path) -> dict:
    try:
        with open(path, encoding="utf-8") as f:
            return yaml.safe_load(f) or {}
    except FileNotFoundError:
        return {}
    except Exception as e:
        logger.warning(f"LanguageRegistry: could not load {path}: {e}")
        return {}
 def _merge_language_entries(base: dict, override: dict) -> dict:
    """
    Merge learned language entries into builtin entries.
    For each language iso, aliases lists are extended (deduped, order preserved);
    scalar fields in override win over base.
    """
    result = dict(base)
    for iso, override_entry in override.items():
        if iso not in result:
            result[iso] = override_entry
            continue
        merged = dict(result[iso])
        for key, val in override_entry.items():
            if key == "aliases" and isinstance(val, list):
                existing = merged.get("aliases", []) or []
                merged["aliases"] = existing + [v for v in val if v not in existing]
            else:
                merged[key] = val
        result[iso] = merged
    return result
 class LanguageRegistry:
    """
    Loads the canonical language table and provides lookup methods.
    Usage::
        registry = LanguageRegistry()
        fr = registry.from_iso("fra")
        fr2 = registry.from_any("French")        # → same Language as `fr`
        fr3 = registry.from_any("fr")            # → same Language
        fr4 = registry.from_any("vostfr")        # → None (vostfr is subtitle-specific,
                                                 #   lives in subtitles knowledge)
    """
    def __init__(self) -> None:
        self._by_iso: dict[str, Language] = {}
        self._lookup: dict[str, Language] = {}  # any-form → Language
        self._load()
    def _load(self) -> None:
        builtin = (
            _load_yaml(_BUILTIN_ROOT / "iso_languages.yaml").get("languages", {}) or {}
        )
        learned = (
            _load_yaml(_LEARNED_ROOT / "iso_languages_learned.yaml").get(
                "languages", {}
            )
            or {}
        )
        merged = _merge_language_entries(builtin, learned)
        for iso, entry in merged.items():
            language = Language(
                iso=iso,
                english_name=entry.get("english_name", iso),
                native_name=entry.get("native_name", iso),
                aliases=tuple(entry.get("aliases", []) or []),
            )
            self._by_iso[language.iso] = language
            # Build the flat lookup table for from_any
            self._lookup[language.iso] = language
            self._lookup[language.english_name.lower()] = language
            self._lookup[language.native_name.lower()] = language
            for alias in language.aliases:
                self._lookup[alias] = language
        logger.info(f"LanguageRegistry: {len(self._by_iso)} languages loaded")
    def from_iso(self, code: str) -> Language | None:
        """Look up by canonical 639-2/T code (case-insensitive)."""
        if not isinstance(code, str):
            return None
        return self._by_iso.get(code.lower().strip())
    def from_any(self, raw: str) -> Language | None:
        """
        Look up by any known representation: iso code, 639-1, 639-2/B variant,
        english name, native name, or any registered alias. Case-insensitive.
        """
        if not isinstance(raw, str):
            return None
        return self._lookup.get(raw.lower().strip())
    def all(self) -> list[Language]:
        """Return all known languages, in load order."""
        return list(self._by_iso.values())
    def __contains__(self, raw: str) -> bool:
        return self.from_any(raw) is not None
    def __len__(self) -> int:
        return len(self._by_iso)
@@ -0,0 +1,19 @@
 """Media — file-level track types (video/audio/subtitle) and MediaInfo container.
 These are the **container-view** dataclasses, populated from ffprobe output and
 used across the project to describe the content of a media file.
 """
 from .audio import AudioTrack
 from .info import MediaInfo
 from .matching import track_lang_matches
 from .subtitle import SubtitleTrack
 from .video import VideoTrack
 __all__ = [
    "AudioTrack",
    "MediaInfo",
    "SubtitleTrack",
    "VideoTrack",
    "track_lang_matches",
 ]
@@ -0,0 +1,17 @@
 """AudioTrack — a single audio stream as reported by ffprobe."""
 from __future__ import annotations
 from dataclasses import dataclass
@dataclass
 class AudioTrack:
    """A single audio track as reported by ffprobe."""
    index: int
    codec: str | None  # aac, ac3, eac3, dts, truehd, flac, …
    channels: int | None  # 2, 6 (5.1), 8 (7.1), …
    channel_layout: str | None  # stereo, 5.1, 7.1, …
    language: str | None  # ISO 639-2: fre, eng, und, …
    is_default: bool = False
@@ -0,0 +1,76 @@
 """MediaInfo — assembles video, audio and subtitle tracks for a media file."""
 from __future__ import annotations
 from dataclasses import dataclass, field
 from .audio import AudioTrack
 from .subtitle import SubtitleTrack
 from .video import VideoTrack
@dataclass
 class MediaInfo:
    """
    File-level media metadata extracted by ffprobe.
    Symmetric design: every stream type is a list of typed track objects.
    Backwards-compatible flat accessors (``resolution``, ``width``, …) read
    from the first video track when present.
    """
    video_tracks: list[VideoTrack] = field(default_factory=list)
    audio_tracks: list[AudioTrack] = field(default_factory=list)
    subtitle_tracks: list[SubtitleTrack] = field(default_factory=list)
    # File-level (from ffprobe ``format`` block, not from any single stream)
    duration_seconds: float | None = None
    bitrate_kbps: int | None = None
    # ──────────────────────────────────────────────────────────────────────
    # Video conveniences — read the first video track
    # ──────────────────────────────────────────────────────────────────────
    @property
    def primary_video(self) -> VideoTrack | None:
        return self.video_tracks[0] if self.video_tracks else None
    @property
    def width(self) -> int | None:
        v = self.primary_video
        return v.width if v else None
    @property
    def height(self) -> int | None:
        v = self.primary_video
        return v.height if v else None
    @property
    def video_codec(self) -> str | None:
        v = self.primary_video
        return v.codec if v else None
    @property
    def resolution(self) -> str | None:
        v = self.primary_video
        return v.resolution if v else None
    # ──────────────────────────────────────────────────────────────────────
    # Audio conveniences
    # ──────────────────────────────────────────────────────────────────────
    @property
    def audio_languages(self) -> list[str]:
        """Unique audio languages across all tracks (ISO 639-2)."""
        seen: set[str] = set()
        result: list[str] = []
        for track in self.audio_tracks:
            if track.language and track.language not in seen:
                seen.add(track.language)
                result.append(track.language)
        return result
    @property
    def is_multi_audio(self) -> bool:
        """True if more than one audio language is present."""
        return len(self.audio_languages) > 1
@@ -0,0 +1,33 @@
 """Language-matching helper shared by media-bearing entities.
 Both ``Episode`` and ``Movie`` carry ``audio_tracks`` / ``subtitle_tracks`` and
 need to answer "do I have audio in language X?". The matching contract is the
 same in both cases — keep it in one place.
 """
 from __future__ import annotations
 from ..value_objects import Language
 def track_lang_matches(track_lang: str | None, query: str | Language) -> bool:
    """
    Match a track's language string against a query (contract "C+").
      * ``Language`` query → matches if the track string is any known
        representation of that Language (delegates to ``Language.matches``).
        Powerful, cross-format mode.
      * ``str`` query → case-insensitive direct comparison against
        ``track_lang``. Simple, no normalization, no registry lookup.
    Callers needing cross-format resolution (``"fr"`` ↔ ``"fre"`` ↔
    ``"french"``) should resolve their string through a ``LanguageRegistry``
    once and pass the resulting ``Language``.
    """
    if track_lang is None:
        return False
    if isinstance(query, Language):
        return query.matches(track_lang)
    if isinstance(query, str):
        return track_lang.lower().strip() == query.lower().strip()
    return False
@@ -0,0 +1,25 @@
 """SubtitleTrack — a single embedded subtitle stream as reported by ffprobe.
 This is the **container-view** representation (ffprobe output) used uniformly
 across the project to describe a subtitle stream embedded in a media file.
 Not to be confused with ``alfred.domain.subtitles.entities.SubtitleCandidate``
 which models a subtitle being **scanned/matched** (with confidence, raw tokens,
 file path, etc.). The two coexist by design — they describe the same real-world
 concept seen from two different bounded contexts.
 """
 from __future__ import annotations
 from dataclasses import dataclass
@dataclass
 class SubtitleTrack:
    """A single embedded subtitle track as reported by ffprobe."""
    index: int
    codec: str | None  # subrip, ass, hdmv_pgs_subtitle, …
    language: str | None  # ISO 639-2: fre, eng, und, …
    is_default: bool = False
    is_forced: bool = False
@@ -0,0 +1,62 @@
 """VideoTrack — a single video stream as reported by ffprobe."""
 from __future__ import annotations
 from dataclasses import dataclass
@dataclass
 class VideoTrack:
    """A single video track as reported by ffprobe.
    A media file typically has one video track but can have several (alt
    camera angles, attached thumbnail images reported as still-image streams,
    etc.), hence the list[VideoTrack] on MediaInfo.
    """
    index: int
    codec: str | None  # h264, hevc, av1, …
    width: int | None
    height: int | None
    is_default: bool = False
    @property
    def resolution(self) -> str | None:
        """
        Best-effort resolution string: 2160p, 1080p, 720p, …
        Width takes priority over height to handle widescreen/cinema crops
        (e.g. 1920×960 scope → 1080p, not 720p). Falls back to height when
        width is unavailable.
        """
        match (self.width, self.height):
            case (None, None):
                return None
            case (w, h) if w is not None:
                match True:
                    case _ if w >= 3840:
                        return "2160p"
                    case _ if w >= 1920:
                        return "1080p"
                    case _ if w >= 1280:
                        return "720p"
                    case _ if w >= 720:
                        return "576p"
                    case _ if w >= 640:
                        return "480p"
                    case _:
                        return f"{h}p" if h else f"{w}w"
            case (None, h):
                match True:
                    case _ if h >= 2160:
                        return "2160p"
                    case _ if h >= 1080:
                        return "1080p"
                    case _ if h >= 720:
                        return "720p"
                    case _ if h >= 576:
                        return "576p"
                    case _ if h >= 480:
                        return "480p"
                    case _:
                        return f"{h}p"
@@ -131,3 +131,97 @@ class FileSize:
    def __repr__(self) -> str:
        return f"FileSize({self.bytes})"
@dataclass(frozen=True)
 class Language:
    """
    Canonical language value object.
    The primary identifier is the ISO 639-2/B code (3 letters, bibliographic form,
    e.g. "fre", "eng", "ger"). This is what ffprobe emits and the project-wide
    canonical form. All other representations (ISO 639-1 code, ISO 639-2/T
    variant, english/native names, common spellings) live in ``aliases`` and are
    used by ``matches()`` for case-insensitive lookup.
    Equality and hashing are based solely on ``iso`` so two Language objects with
    the same canonical code are interchangeable regardless of aliases.
    """
    iso: str
    english_name: str
    native_name: str
    aliases: tuple[str, ...] = ()
    def __post_init__(self):
        if not isinstance(self.iso, str) or not self.iso:
            raise ValidationError(
                f"Language.iso must be a non-empty string, got {self.iso!r}"
            )
        if len(self.iso) != 3:
            raise ValidationError(
                f"Language.iso must be a 3-letter ISO 639-2/B code, got {self.iso!r}"
            )
        # Normalize iso to lowercase
        object.__setattr__(self, "iso", self.iso.lower())
        # Normalize aliases to a tuple of lowercase strings (dedup, preserve order)
        seen: set[str] = set()
        normalized: list[str] = []
        for alias in self.aliases:
            if not isinstance(alias, str):
                continue
            a = alias.lower().strip()
            if a and a not in seen:
                seen.add(a)
                normalized.append(a)
        object.__setattr__(self, "aliases", tuple(normalized))
    def matches(self, raw: str) -> bool:
        """
        True if ``raw`` is any known representation of this language.
        Comparison is case-insensitive and whitespace-trimmed. The match space is
        the union of the canonical ``iso`` code, the english/native names, and
        every alias.
        """
        if not isinstance(raw, str):
            return False
        needle = raw.lower().strip()
        if not needle:
            return False
        if needle == self.iso:
            return True
        if needle == self.english_name.lower():
            return True
        if needle == self.native_name.lower():
            return True
        return needle in self.aliases
    def __eq__(self, other: object) -> bool:
        if not isinstance(other, Language):
            return NotImplemented
        return self.iso == other.iso
    def __hash__(self) -> int:
        return hash(self.iso)
    def __str__(self) -> str:
        return self.iso
    def __repr__(self) -> str:
        return f"Language({self.iso!r}, {self.english_name!r})"
 # Characters allowed in dot-separated folder/filename forms:
 # alphanumerics, underscores, spaces (about to be replaced with dots),
 # literal dots, and hyphens. Everything else is stripped.
 _FS_SAFE_CHARS = re.compile(r"[^\w\s\.\-]")
 def to_dot_folder_name(title: str) -> str:
    """Sanitize ``title`` for filesystem use and convert spaces to dots.
    Produces e.g. ``Breaking.Bad`` from ``"Breaking Bad"`` or
    ``Spider.Man`` from ``"Spider-Man: No Way Home"``.
    """
    return _FS_SAFE_CHARS.sub("", title).replace(" ", ".")
@@ -1,14 +1,37 @@
-"""Subtitles domain - Business logic for subtitle management (shared across movies and TV shows)."""
+"""Subtitles domain — subtitle identification, classification and placement."""
-from .entities import Subtitle
+from .aggregates import SubtitleRuleSet
 from .entities import MediaSubtitleMetadata, SubtitleCandidate
 from .exceptions import SubtitleNotFound
-from .services import SubtitleService
+from .knowledge import KnowledgeLoader, SubtitleKnowledgeBase
-from .value_objects import Language, SubtitleFormat
+from .services import PatternDetector, SubtitleIdentifier, SubtitleMatcher
 from .value_objects import (
    RuleScope,
    ScanStrategy,
    SubtitleFormat,
    SubtitleLanguage,
    SubtitleMatchingRules,
    SubtitlePattern,
    SubtitleType,
    TypeDetectionMethod,
 )
 __all__ = [
-    "Subtitle",
+    "SubtitleCandidate",
-    "Language",
+    "MediaSubtitleMetadata",
    "SubtitleRuleSet",
    "SubtitleKnowledgeBase",
    "KnowledgeLoader",
    "SubtitleIdentifier",
    "SubtitleMatcher",
    "PatternDetector",
    "SubtitleFormat",
    "SubtitleLanguage",
    "SubtitlePattern",
    "SubtitleType",
    "ScanStrategy",
    "TypeDetectionMethod",
    "SubtitleMatchingRules",
    "RuleScope",
    "SubtitleNotFound",
    "SubtitleService",
 ]
@@ -0,0 +1,95 @@
 """Subtitle domain aggregates."""
 from dataclasses import dataclass, field
 from typing import Any
 from ..shared.value_objects import ImdbId
 from .knowledge.base import SubtitleKnowledgeBase
 from .value_objects import RuleScope, SubtitleMatchingRules
 def DEFAULT_RULES() -> SubtitleMatchingRules:
    """Load default matching rules from subtitles.yaml (defaults section)."""
    return SubtitleKnowledgeBase().default_rules()
@dataclass
 class SubtitleRuleSet:
    """
    Rules for subtitle selection at a given scope level, with inheritance.
    Only delta fields are stored — None means "inherit from parent".
    Resolution order: global → release_group → show/movie → season → episode.
    A RuleSet can also be pinned to a specific media item (imdb_id),
    bypassing the scope hierarchy for that item.
    """
    scope: RuleScope
    parent: SubtitleRuleSet | None = None
    pinned_to: ImdbId | None = None
    # Deltas — None = inherit
    _languages: list[str] | None = field(default=None, repr=False)
    _formats: list[str] | None = field(default=None, repr=False)
    _types: list[str] | None = field(default=None, repr=False)
    _format_priority: list[str] | None = field(default=None, repr=False)
    _min_confidence: float | None = field(default=None, repr=False)
    def resolve(self) -> SubtitleMatchingRules:
        """
        Walk the parent chain and merge deltas into effective rules.
        Falls back to DEFAULT_RULES at the top of the chain.
        """
        base = self.parent.resolve() if self.parent else DEFAULT_RULES()
        return SubtitleMatchingRules(
            preferred_languages=self._languages or base.preferred_languages,
            preferred_formats=self._formats or base.preferred_formats,
            allowed_types=self._types or base.allowed_types,
            format_priority=self._format_priority or base.format_priority,
            min_confidence=self._min_confidence
            if self._min_confidence is not None
            else base.min_confidence,
        )
    def override(
        self,
        languages: list[str] | None = None,
        formats: list[str] | None = None,
        types: list[str] | None = None,
        format_priority: list[str] | None = None,
        min_confidence: float | None = None,
    ) -> None:
        """Set delta overrides at this scope level."""
        if languages is not None:
            self._languages = languages
        if formats is not None:
            self._formats = formats
        if types is not None:
            self._types = types
        if format_priority is not None:
            self._format_priority = format_priority
        if min_confidence is not None:
            self._min_confidence = min_confidence
    def to_dict(self) -> dict:
        """Serialize deltas only (for persistence in rules.yaml)."""
        delta: dict[str, Any] = {}
        if self._languages is not None:
            delta["languages"] = self._languages
        if self._formats is not None:
            delta["formats"] = self._formats
        if self._types is not None:
            delta["types"] = self._types
        if self._format_priority is not None:
            delta["format_priority"] = self._format_priority
        if self._min_confidence is not None:
            delta["min_confidence"] = self._min_confidence
        return {
            "scope": {"level": self.scope.level, "identifier": self.scope.identifier},
            "override": delta,
        }
    @classmethod
    def global_default(cls) -> SubtitleRuleSet:
        return cls(scope=RuleScope(level="global"))
@@ -1,96 +1,103 @@
 """Subtitle domain entities."""
-from dataclasses import dataclass
+from dataclasses import dataclass, field
 from pathlib import Path
-from ..shared.value_objects import FilePath, ImdbId
+from ..shared.value_objects import ImdbId
-from .value_objects import Language, SubtitleFormat, TimingOffset
+from .value_objects import (
    SubtitleFormat,
    SubtitleLanguage,
    SubtitleType,
 )
@dataclass
-class Subtitle:
+class SubtitleCandidate:
    """
-    Subtitle entity representing a subtitle file.
+    A subtitle being scanned and matched — either an external file or an embedded stream.
-    Can be associated with either a movie or a TV show episode.
+    Unlike ``alfred.domain.shared.media.SubtitleTrack`` (the pure container-view
    populated from ffprobe), a SubtitleCandidate carries the **flow state** of the
    subtitle matching pipeline: language/format are typed value objects that may
    be ``None`` while classification is in progress, ``confidence`` reflects how
    certain we are, and ``raw_tokens`` holds the filename fragments still under
    analysis. State evolves: unknown → resolved after user clarification.
    """
-    media_imdb_id: ImdbId
+    # Classification (may be None if not yet resolved)
-    language: Language
+    language: SubtitleLanguage | None
-    format: SubtitleFormat
+    format: SubtitleFormat | None
-    file_path: FilePath
+    subtitle_type: SubtitleType = SubtitleType.UNKNOWN
-    # Optional: for TV shows
+    # Source
-    season_number: int | None = None
+    is_embedded: bool = False
-    episode_number: int | None = None
+    file_path: Path | None = None  # None if embedded
    file_size_kb: float | None = None
    entry_count: int | None = None  # number of subtitle cues in the file
-    # Subtitle metadata
+    # Matching state
-    timing_offset: TimingOffset = TimingOffset(0)
+    confidence: float = 0.0  # 0.0 → 1.0, not applicable for embedded
-    hearing_impaired: bool = False
+    raw_tokens: list[str] = field(
-    forced: bool = False  # Forced subtitles (for foreign language parts)
+        default_factory=list
    )  # tokens extracted from filename
-    # Source information
+    def is_resolved(self) -> bool:
-    source: str | None = None  # e.g., "OpenSubtitles", "Subscene"
+        return self.language is not None
    uploader: str | None = None
    download_count: int | None = None
    rating: float | None = None
-    def __post_init__(self):
+    @property
-        """Validate subtitle entity."""
+    def destination_name(self) -> str:
-        # Ensure ImdbId is actually an ImdbId instance
+        """
-        if not isinstance(self.media_imdb_id, ImdbId):
+        Compute the output filename per naming convention:
-            if isinstance(self.media_imdb_id, str):
+            {lang}.{ext}
-                object.__setattr__(self, "media_imdb_id", ImdbId(self.media_imdb_id))
+            {lang}.sdh.{ext}
-
+            {lang}.forced.{ext}
-        # Ensure Language is actually a Language instance
+        """
-        if not isinstance(self.language, Language):
+        if not self.language or not self.format:
-            if isinstance(self.language, str):
+            raise ValueError(
-                object.__setattr__(self, "language", Language.from_code(self.language))
+                "Cannot compute destination_name: language or format missing"
        # Ensure SubtitleFormat is actually a SubtitleFormat instance
        if not isinstance(self.format, SubtitleFormat):
            if isinstance(self.format, str):
                object.__setattr__(
                    self, "format", SubtitleFormat.from_extension(self.format)
            )
-
+        ext = self.format.extensions[0].lstrip(".")
-        # Ensure FilePath is actually a FilePath instance
+        parts = [self.language.code]
-        if not isinstance(self.file_path, FilePath):
+        if self.subtitle_type == SubtitleType.SDH:
-            object.__setattr__(self, "file_path", FilePath(self.file_path))
+            parts.append("sdh")
-
+        elif self.subtitle_type == SubtitleType.FORCED:
    def is_for_movie(self) -> bool:
        """Check if this subtitle is for a movie."""
        return self.season_number is None and self.episode_number is None
    def is_for_episode(self) -> bool:
        """Check if this subtitle is for a TV show episode."""
        return self.season_number is not None and self.episode_number is not None
    def get_filename(self) -> str:
        """
        Get the suggested filename for this subtitle.
        Format for movies: "Movie.Title.{lang}.{format}"
        Format for episodes: "S01E05.{lang}.{format}"
        """
        if self.is_for_episode():
            base = f"S{self.season_number:02d}E{self.episode_number:02d}"
        else:
            # For movies, use the file path stem
            base = self.file_path.value.stem
        parts = [base, self.language.value]
        if self.hearing_impaired:
            parts.append("hi")
        if self.forced:
            parts.append("forced")
-
+        return ".".join(parts) + "." + ext
        return f"{'.'.join(parts)}.{self.format.value}"
    def __str__(self) -> str:
        if self.is_for_episode():
            return f"Subtitle S{self.season_number:02d}E{self.episode_number:02d} ({self.language.value})"
        return f"Subtitle ({self.language.value})"
    def __repr__(self) -> str:
-        return f"Subtitle(media={self.media_imdb_id}, lang={self.language.value})"
+        lang = self.language.code if self.language else "?"
        fmt = self.format.id if self.format else "?"
        src = (
            "embedded"
            if self.is_embedded
            else str(self.file_path.name if self.file_path else "?")
        )
        return f"SubtitleCandidate({lang}, {self.subtitle_type.value}, {fmt}, src={src}, conf={self.confidence:.2f})"
@dataclass
 class MediaSubtitleMetadata:
    """
    Snapshot of all subtitle information known for a given media item.
    Populated by the identifier service (ffprobe + filesystem scan).
    """
    media_id: ImdbId | None
    media_type: str  # "movie" | "tv_show"
    embedded_tracks: list[SubtitleCandidate] = field(default_factory=list)
    external_tracks: list[SubtitleCandidate] = field(default_factory=list)
    release_group: str | None = None
    detected_pattern_id: str | None = None  # pattern id from knowledge base
    pattern_confirmed: bool = False
    @property
    def all_tracks(self) -> list[SubtitleCandidate]:
        return self.embedded_tracks + self.external_tracks
    @property
    def total_count(self) -> int:
        return len(self.embedded_tracks) + len(self.external_tracks)
    @property
    def unresolved_tracks(self) -> list[SubtitleCandidate]:
        return [t for t in self.external_tracks if t.language is None]
@@ -0,0 +1,4 @@
 from .base import SubtitleKnowledgeBase
 from .loader import KnowledgeLoader
 __all__ = ["SubtitleKnowledgeBase", "KnowledgeLoader"]
@@ -0,0 +1,183 @@
 """SubtitleKnowledgeBase — parsed, typed view of the loaded knowledge."""
 import logging
 from ...shared.knowledge.language_registry import LanguageRegistry
 from ..value_objects import (
    ScanStrategy,
    SubtitleFormat,
    SubtitleLanguage,
    SubtitleMatchingRules,
    SubtitlePattern,
    SubtitleType,
    TypeDetectionMethod,
 )
 from .loader import KnowledgeLoader
 logger = logging.getLogger(__name__)
 class SubtitleKnowledgeBase:
    """
    Typed access to subtitle knowledge (formats, types, languages, patterns).
    Built from KnowledgeLoader — call kb.reload() to pick up newly learned entries
    without restarting.
    """
    def __init__(
        self,
        loader: KnowledgeLoader | None = None,
        language_registry: LanguageRegistry | None = None,
    ):
        self._loader = loader or KnowledgeLoader()
        self._language_registry = language_registry or LanguageRegistry()
        self._build()
    def _build(self) -> None:  # noqa: PLR0912 — straight-line YAML projection
        data = self._loader.subtitles()
        self._formats: dict[str, SubtitleFormat] = {}
        for fid, fdata in data.get("formats", {}).items():
            self._formats[fid] = SubtitleFormat(
                id=fid,
                extensions=fdata.get("extensions", []),
                description=fdata.get("description", ""),
            )
        # Languages are sourced primarily from the canonical LanguageRegistry
        # (alfred/knowledge/iso_languages.yaml — ISO 639-2/B). Subtitle-specific
        # tokens (VOSTFR, VF, VFF…) are merged on top from subtitles.yaml's
        # ``language_tokens`` section.
        subtitle_extras: dict[str, list[str]] = {
            code: list(tokens or [])
            for code, tokens in (data.get("language_tokens", {}) or {}).items()
        }
        self._languages: dict[str, SubtitleLanguage] = {}
        self._lang_token_map: dict[str, str] = {}
        for language in self._language_registry.all():
            tokens: list[str] = [language.iso, language.english_name.lower()]
            if language.native_name.lower() not in tokens:
                tokens.append(language.native_name.lower())
            for alias in language.aliases:
                if alias not in tokens:
                    tokens.append(alias)
            for extra in subtitle_extras.get(language.iso, []):
                if extra.lower() not in tokens:
                    tokens.append(extra.lower())
            self._languages[language.iso] = SubtitleLanguage(
                code=language.iso,
                tokens=tokens,
            )
            for token in tokens:
                self._lang_token_map[token.lower()] = language.iso
        # Subtitle-specific tokens for languages NOT in the canonical registry
        # are still honored: register them as a minimal SubtitleLanguage.
        for code, extras in subtitle_extras.items():
            if code in self._languages:
                continue
            tokens = [code] + [e.lower() for e in extras]
            self._languages[code] = SubtitleLanguage(code=code, tokens=tokens)
            for token in tokens:
                self._lang_token_map[token.lower()] = code
        # Build reverse token → type map
        self._type_token_map: dict[str, SubtitleType] = {}
        for type_id, tdata in data.get("types", {}).items():
            stype = SubtitleType(type_id)
            for token in tdata.get("tokens", []):
                self._type_token_map[token.lower()] = stype
        d = data.get("defaults", {})
        self._default_rules = SubtitleMatchingRules(
            preferred_languages=d.get("languages", ["fre", "eng"]),
            preferred_formats=d.get("formats", ["srt"]),
            allowed_types=d.get("types", ["standard", "forced"]),
            format_priority=d.get("format_priority", ["srt", "ass"]),
            min_confidence=d.get("min_confidence", 0.7),
        )
        self._patterns: dict[str, SubtitlePattern] = {}
        for pid, pdata in self._loader.patterns().items():
            try:
                self._patterns[pid] = SubtitlePattern(
                    id=pid,
                    description=pdata.get("description", ""),
                    scan_strategy=ScanStrategy(pdata.get("scan_strategy", "adjacent")),
                    root_folder=pdata.get("root_folder"),
                    type_detection=TypeDetectionMethod(
                        pdata.get("type_detection", {}).get("method", "token_in_name")
                    ),
                    version=pdata.get("version", "1.0"),
                )
            except ValueError as e:
                logger.warning(f"SubtitleKnowledgeBase: skipping pattern '{pid}': {e}")
    def reload(self) -> None:
        self._loader = KnowledgeLoader()
        self._build()
        logger.info("SubtitleKnowledgeBase: reloaded")
    # --- Defaults ---
    def default_rules(self) -> SubtitleMatchingRules:
        return self._default_rules
    # --- Formats ---
    def formats(self) -> dict[str, SubtitleFormat]:
        return self._formats
    def format_for_extension(self, ext: str) -> SubtitleFormat | None:
        for fmt in self._formats.values():
            if fmt.matches_extension(ext):
                return fmt
        return None
    def known_extensions(self) -> set[str]:
        exts = set()
        for fmt in self._formats.values():
            exts.update(fmt.extensions)
        return exts
    # --- Languages ---
    def languages(self) -> dict[str, SubtitleLanguage]:
        return self._languages
    def language_for_token(self, token: str) -> SubtitleLanguage | None:
        code = self._lang_token_map.get(token.lower())
        return self._languages.get(code) if code else None
    def is_known_lang_token(self, token: str) -> bool:
        return token.lower() in self._lang_token_map
    # --- Types ---
    def type_for_token(self, token: str) -> SubtitleType | None:
        return self._type_token_map.get(token.lower())
    def is_known_type_token(self, token: str) -> bool:
        return token.lower() in self._type_token_map
    # --- Patterns ---
    def patterns(self) -> dict[str, SubtitlePattern]:
        return self._patterns
    def pattern(self, pattern_id: str) -> SubtitlePattern | None:
        return self._patterns.get(pattern_id)
    def patterns_for_group(self, group_name: str) -> list[SubtitlePattern]:
        group = self._loader.release_group(group_name)
        if not group:
            return []
        return [
            self._patterns[pid]
            for pid in group.get("known_patterns", [])
            if pid in self._patterns
        ]
@@ -0,0 +1,135 @@
 """KnowledgeLoader — autodiscovers and merges builtin + learned YAML knowledge packs."""
 import logging
 from pathlib import Path
 import yaml
 import alfred as _alfred_pkg
 logger = logging.getLogger(__name__)
 # Builtin knowledge — anchored on the alfred package itself, not on this file's depth
 _BUILTIN_ROOT = Path(_alfred_pkg.__file__).parent / "knowledge"
 # Learned knowledge — local to this instance, gitignored
 _LEARNED_ROOT = Path(_alfred_pkg.__file__).parent.parent / "data" / "knowledge"
 def _load_yaml(path: Path) -> dict:
    try:
        with open(path, encoding="utf-8") as f:
            return yaml.safe_load(f) or {}
    except FileNotFoundError:
        return {}
    except Exception as e:
        logger.warning(f"KnowledgeLoader: could not load {path}: {e}")
        return {}
 def _merge(base: dict, override: dict) -> dict:
    """
    Deep merge override into base.
    Lists are extended (not replaced) — learned tokens are additive.
    Scalar values in override win over base.
    """
    result = dict(base)
    for key, val in override.items():
        if key in result and isinstance(result[key], dict) and isinstance(val, dict):
            result[key] = _merge(result[key], val)
        elif key in result and isinstance(result[key], list) and isinstance(val, list):
            # Extend list, deduplicate, preserve order
            combined = result[key] + [v for v in val if v not in result[key]]
            result[key] = combined
        else:
            result[key] = val
    return result
 class KnowledgeLoader:
    """
    Loads subtitle knowledge from YAML files.
    Builtin packs live in alfred/knowledge/ (versioned).
    Learned packs live in data/knowledge/ (gitignored, instance-local).
    Learned entries are merged additively — they can only add tokens/patterns,
    never remove builtin ones.
    Usage:
        loader = KnowledgeLoader()
        subtitles = loader.subtitles()   # merged subtitles.yaml
        patterns = loader.patterns()     # all patterns, keyed by id
        groups = loader.release_groups() # all release groups, keyed by name
    """
    def __init__(self):
        self._cache: dict[str, dict] = {}
        self._load()
    def _load(self) -> None:
        # Main subtitles knowledge
        builtin = _load_yaml(_BUILTIN_ROOT / "subtitles.yaml")
        learned = _load_yaml(_LEARNED_ROOT / "subtitles_learned.yaml")
        self._cache["subtitles"] = _merge(builtin, learned)
        # Patterns
        self._cache["patterns"] = {}
        for path in sorted((_BUILTIN_ROOT / "patterns").glob("*.yaml")):
            data = _load_yaml(path)
            pid = data.get("id", path.stem)
            self._cache["patterns"][pid] = data
        for path in sorted((_LEARNED_ROOT / "patterns").glob("*.yaml")):
            data = _load_yaml(path)
            pid = data.get("id", path.stem)
            if pid in self._cache["patterns"]:
                self._cache["patterns"][pid] = _merge(
                    self._cache["patterns"][pid], data
                )
            else:
                self._cache["patterns"][pid] = data
                logger.info(f"KnowledgeLoader: learned new pattern '{pid}'")
        # Release groups
        self._cache["release_groups"] = {}
        for path in sorted((_BUILTIN_ROOT / "release_groups").glob("*.yaml")):
            data = _load_yaml(path)
            name = data.get("name", path.stem)
            self._cache["release_groups"][name] = data
        for path in sorted((_LEARNED_ROOT / "release_groups").glob("*.yaml")):
            data = _load_yaml(path)
            name = data.get("name", path.stem)
            if name in self._cache["release_groups"]:
                self._cache["release_groups"][name] = _merge(
                    self._cache["release_groups"][name], data
                )
            else:
                self._cache["release_groups"][name] = data
                logger.info(f"KnowledgeLoader: learned new release group '{name}'")
        logger.info(
            f"KnowledgeLoader: {len(self._cache['patterns'])} patterns, "
            f"{len(self._cache['release_groups'])} release groups loaded"
        )
    def subtitles(self) -> dict:
        return self._cache["subtitles"]
    def patterns(self) -> dict[str, dict]:
        return self._cache["patterns"]
    def pattern(self, pattern_id: str) -> dict | None:
        return self._cache["patterns"].get(pattern_id)
    def release_groups(self) -> dict[str, dict]:
        return self._cache["release_groups"]
    def release_group(self, name: str) -> dict | None:
        """Case-insensitive lookup."""
        name_lower = name.lower()
        for key, val in self._cache["release_groups"].items():
            if key.lower() == name_lower:
                return val
        return None
@@ -1,60 +0,0 @@
 """Subtitle repository interfaces (abstract)."""
 from abc import ABC, abstractmethod
 from ..shared.value_objects import ImdbId
 from .entities import Subtitle
 from .value_objects import Language
 class SubtitleRepository(ABC):
    """
    Abstract repository for subtitle persistence.
    This defines the interface that infrastructure implementations must follow.
    """
    @abstractmethod
    def save(self, subtitle: Subtitle) -> None:
        """
        Save a subtitle to the repository.
        Args:
            subtitle: Subtitle entity to save
        """
        pass
    @abstractmethod
    def find_by_media(
        self,
        media_imdb_id: ImdbId,
        language: Language | None = None,
        season: int | None = None,
        episode: int | None = None,
    ) -> list[Subtitle]:
        """
        Find subtitles for a media item.
        Args:
            media_imdb_id: IMDb ID of the media
            language: Optional language filter
            season: Optional season number (for TV shows)
            episode: Optional episode number (for TV shows)
        Returns:
            List of matching subtitles
        """
        pass
    @abstractmethod
    def delete(self, subtitle: Subtitle) -> bool:
        """
        Delete a subtitle from the repository.
        Args:
            subtitle: Subtitle to delete
        Returns:
            True if deleted, False if not found
        """
        pass
@@ -0,0 +1,207 @@
 """SubtitleScanner — inspects local subtitle files and filters them per user preferences.
 Given a video file path, the scanner:
  1. Looks for subtitle files in the same directory as the video.
  2. Optionally also inspects a Subs/ subfolder adjacent to the video.
  3. Classifies each file (language, SDH, forced) from its filename, delegating
     all token knowledge to SubtitleKnowledgeBase (which itself merges
     LanguageRegistry + subtitle-specific tokens from subtitles.yaml).
  4. Filters according to SubtitlePreferences (languages, min_size_kb, keep_sdh,
     keep_forced).
  5. Returns a list of SubtitleCandidate — one per file that passes the filter,
     with the destination filename already computed.
 Filename classification heuristics
 -----------------------------------
 We parse the stem of each subtitle file looking for known patterns:
  fre.srt             → lang=fre, sdh=False, forced=False
  fre.sdh.srt         → lang=fre, sdh=True
  fre.forced.srt      → lang=fre, forced=True
  Breaking.Bad.S01E01.French.srt  → lang=fre (alias match via LanguageRegistry)
  Breaking.Bad.S01E01.VOSTFR.srt  → lang=fre (subtitle-specific token)
 ISO 639-2/B codes are used throughout (matching the project-wide canonical form
 from iso_languages.yaml — what ffprobe emits).
 Output naming convention (matches SubtitlePreferences docstring):
  {lang}.srt
  {lang}.sdh.srt
  {lang}.forced.srt
 """
 import logging
 import re
 from dataclasses import dataclass
 from pathlib import Path
 from .knowledge.base import SubtitleKnowledgeBase
 from .value_objects import SubtitleType
 logger = logging.getLogger(__name__)
 _TOKEN_SPLIT = re.compile(r"[\.\s_\-]+")
@dataclass
 class SubtitleCandidate:
    """A subtitle file that passed the filter, ready to be placed."""
    source_path: Path
    language: str  # ISO 639-2/B code, e.g. "fre"
    is_sdh: bool
    is_forced: bool
    extension: str  # e.g. ".srt"
    @property
    def destination_name(self) -> str:
        """
        Compute the destination filename per naming convention:
            {lang}.srt
            {lang}.sdh.srt
            {lang}.forced.srt
        """
        ext = self.extension.lstrip(".")
        parts = [self.language]
        if self.is_sdh:
            parts.append("sdh")
        elif self.is_forced:
            parts.append("forced")
        return ".".join(parts) + "." + ext
 # Module-level KB instance — built lazily on first use to avoid loading YAML at import.
 _KB: SubtitleKnowledgeBase | None = None
 def _kb() -> SubtitleKnowledgeBase:
    global _KB  # noqa: PLW0603 — intentional lazy module-level cache
    if _KB is None:
        _KB = SubtitleKnowledgeBase()
    return _KB
 def _classify(path: Path) -> tuple[str | None, bool, bool]:
    """
    Parse a subtitle filename and return (language_code, is_sdh, is_forced).
    ``language_code`` is the ISO 639-2/B canonical code (e.g. ``"fre"``).
    Returns (None, False, False) if the language cannot be determined.
    """
    stem = path.stem.lower()
    tokens = _TOKEN_SPLIT.split(stem)
    kb = _kb()
    language: str | None = None
    is_sdh = False
    is_forced = False
    for token in tokens:
        if not token:
            continue
        if language is None:
            lang = kb.language_for_token(token)
            if lang is not None:
                language = lang.code
                continue
        stype = kb.type_for_token(token)
        if stype is SubtitleType.SDH:
            is_sdh = True
        elif stype is SubtitleType.FORCED:
            is_forced = True
    return language, is_sdh, is_forced
 class SubtitleScanner:
    """
    Scans subtitle files next to a video and filters them per SubtitlePreferences.
    Usage:
        scanner = SubtitleScanner(prefs)
        candidates = scanner.scan(video_path)
        # Each candidate has .source_path and .destination_name
    """
    def __init__(
        self, languages: list[str], min_size_kb: int, keep_sdh: bool, keep_forced: bool
    ):
        self.languages = [lang.lower() for lang in languages]
        self.min_size_kb = min_size_kb
        self.keep_sdh = keep_sdh
        self.keep_forced = keep_forced
        self._kb = _kb()
        self._subtitle_extensions = {e.lower() for e in self._kb.known_extensions()}
    def scan(self, video_path: Path) -> list[SubtitleCandidate]:
        """
        Return all subtitle candidates found next to the video that pass the filter.
        Scans:
          - Same directory as the video (flat siblings)
          - Subs/ subfolder if present
        """
        candidates: list[SubtitleCandidate] = []
        search_dirs = [video_path.parent]
        subs_dir = video_path.parent / "Subs"
        if subs_dir.is_dir():
            search_dirs.append(subs_dir)
            logger.debug(f"SubtitleScanner: found Subs/ folder at {subs_dir}")
        for directory in search_dirs:
            for path in sorted(directory.iterdir()):
                if not path.is_file():
                    continue
                if path.suffix.lower() not in self._subtitle_extensions:
                    continue
                candidate = self._evaluate(path)
                if candidate is not None:
                    candidates.append(candidate)
        logger.info(
            f"SubtitleScanner: {len(candidates)} candidate(s) found for {video_path.name}"
        )
        return candidates
    def _evaluate(self, path: Path) -> SubtitleCandidate | None:
        """Apply all filters to a single subtitle file. Returns None if it should be dropped."""
        # Size filter
        size_kb = path.stat().st_size / 1024
        if size_kb < self.min_size_kb:
            logger.debug(
                f"SubtitleScanner: skip {path.name} (too small: {size_kb:.1f} KB)"
            )
            return None
        language, is_sdh, is_forced = _classify(path)
        # Language filter
        if language is None:
            logger.debug(f"SubtitleScanner: skip {path.name} (language unknown)")
            return None
        if language not in self.languages:
            logger.debug(
                f"SubtitleScanner: skip {path.name} (language '{language}' not in prefs)"
            )
            return None
        # SDH filter
        if is_sdh and not self.keep_sdh:
            logger.debug(f"SubtitleScanner: skip {path.name} (SDH not wanted)")
            return None
        # Forced filter
        if is_forced and not self.keep_forced:
            logger.debug(f"SubtitleScanner: skip {path.name} (forced not wanted)")
            return None
        return SubtitleCandidate(
            source_path=path,
            language=language,
            is_sdh=is_sdh,
            is_forced=is_forced,
            extension=path.suffix.lower(),
        )
@@ -1,149 +0,0 @@
 """Subtitle domain services - Business logic."""
 import logging
 from ..shared.value_objects import FilePath, ImdbId
 from .entities import Subtitle
 from .exceptions import SubtitleNotFound
 from .repositories import SubtitleRepository
 from .value_objects import Language, SubtitleFormat
 logger = logging.getLogger(__name__)
 class SubtitleService:
    """
    Domain service for subtitle-related business logic.
    This service is SHARED between movies and TV shows domains.
    Both can use this service to manage subtitles.
    """
    def __init__(self, repository: SubtitleRepository):
        """
        Initialize subtitle service.
        Args:
            repository: Subtitle repository for persistence
        """
        self.repository = repository
    def add_subtitle(self, subtitle: Subtitle) -> None:
        """
        Add a subtitle to the library.
        Args:
            subtitle: Subtitle entity to add
        """
        self.repository.save(subtitle)
        logger.info(
            f"Added subtitle: {subtitle.language.value} for {subtitle.media_imdb_id}"
        )
    def find_subtitles_for_movie(
        self, imdb_id: ImdbId, languages: list[Language] | None = None
    ) -> list[Subtitle]:
        """
        Find subtitles for a movie.
        Args:
            imdb_id: IMDb ID of the movie
            languages: Optional list of languages to filter by
        Returns:
            List of matching subtitles
        """
        if languages:
            all_subtitles = []
            for lang in languages:
                subs = self.repository.find_by_media(imdb_id, language=lang)
                all_subtitles.extend(subs)
            return all_subtitles
        else:
            return self.repository.find_by_media(imdb_id)
    def find_subtitles_for_episode(
        self,
        imdb_id: ImdbId,
        season: int,
        episode: int,
        languages: list[Language] | None = None,
    ) -> list[Subtitle]:
        """
        Find subtitles for a TV show episode.
        Args:
            imdb_id: IMDb ID of the TV show
            season: Season number
            episode: Episode number
            languages: Optional list of languages to filter by
        Returns:
            List of matching subtitles
        """
        if languages:
            all_subtitles = []
            for lang in languages:
                subs = self.repository.find_by_media(
                    imdb_id, language=lang, season=season, episode=episode
                )
                all_subtitles.extend(subs)
            return all_subtitles
        else:
            return self.repository.find_by_media(
                imdb_id, season=season, episode=episode
            )
    def remove_subtitle(self, subtitle: Subtitle) -> None:
        """
        Remove a subtitle from the library.
        Args:
            subtitle: Subtitle to remove
        Raises:
            SubtitleNotFound: If subtitle not found
        """
        if not self.repository.delete(subtitle):
            raise SubtitleNotFound(f"Subtitle not found: {subtitle}")
        logger.info(f"Removed subtitle: {subtitle}")
    def detect_format_from_file(self, file_path: FilePath) -> SubtitleFormat:
        """
        Detect subtitle format from file extension.
        Args:
            file_path: Path to subtitle file
        Returns:
            Detected subtitle format
        """
        extension = file_path.value.suffix
        return SubtitleFormat.from_extension(extension)
    def validate_subtitle_file(self, file_path: FilePath) -> bool:
        """
        Validate that a file is a valid subtitle file.
        Args:
            file_path: Path to the file
        Returns:
            True if valid subtitle file, False otherwise
        """
        if not file_path.exists():
            logger.warning(f"File does not exist: {file_path}")
            return False
        if not file_path.is_file():
            logger.warning(f"Path is not a file: {file_path}")
            return False
        # Check file extension
        try:
            self.detect_format_from_file(file_path)
            return True
        except Exception as e:
            logger.warning(f"Invalid subtitle format: {e}")
            return False
@@ -0,0 +1,13 @@
 from .identifier import SubtitleIdentifier
 from .matcher import SubtitleMatcher
 from .pattern_detector import PatternDetector
 from .placer import PlacedTrack, PlaceResult, SubtitlePlacer
 __all__ = [
    "SubtitleIdentifier",
    "SubtitleMatcher",
    "PatternDetector",
    "SubtitlePlacer",
    "PlacedTrack",
    "PlaceResult",
 ]
@@ -0,0 +1,348 @@
 """SubtitleIdentifier — finds and classifies all subtitle tracks for a video file."""
 import json
 import logging
 import re
 import subprocess
 from pathlib import Path
 from ...shared.value_objects import ImdbId
 from ..entities import MediaSubtitleMetadata, SubtitleCandidate
 from ..knowledge.base import SubtitleKnowledgeBase
 from ..value_objects import ScanStrategy, SubtitlePattern, SubtitleType
 logger = logging.getLogger(__name__)
 def _tokenize(name: str) -> list[str]:
    """Split a filename stem into lowercase tokens, stripping parentheses."""
    # Strip parenthesized qualifiers like (simplified), (canada), (brazil)
    name = re.sub(r"\([^)]*\)", "", name)
    return [t.lower() for t in re.split(r"[\.\s_\-]+", name) if t]
 def _tokenize_suffix(stem: str, episode_stem: str) -> list[str]:
    """
    For episode_subfolder pattern: the filename is {episode_stem}.{lang_tokens}.
    Return only the tokens that come after the episode stem portion.
    Falls back to full tokenization if the stem doesn't start with episode_stem.
    """
    stem_lower = stem.lower()
    prefix = episode_stem.lower()
    if stem_lower.startswith(prefix):
        suffix = stem[len(prefix) :]
        tokens = _tokenize(suffix)
        if tokens:
            return tokens
    return _tokenize(stem)
 def _count_entries(path: Path) -> int:
    """Return the entry count of an SRT file by finding the last cue number."""
    try:
        with open(path, encoding="utf-8", errors="replace") as f:
            lines = f.read().splitlines()
        for line in reversed(lines):
            if line.strip().isdigit():
                return int(line.strip())
        return 0
    except Exception:
        return 0
 class SubtitleIdentifier:
    """
    Finds all subtitle tracks for a given video file using a known pattern,
    then attempts to classify each track (language, type, format).
    Returns a MediaSubtitleMetadata with embedded + external tracks.
    External tracks with unknown language or low confidence are left as-is —
    the caller (use case) decides whether to ask the user for clarification.
    """
    def __init__(self, kb: SubtitleKnowledgeBase):
        self.kb = kb
    def identify(
        self,
        video_path: Path,
        pattern: SubtitlePattern,
        media_id: ImdbId | None,
        media_type: str,
        release_group: str | None = None,
    ) -> MediaSubtitleMetadata:
        metadata = MediaSubtitleMetadata(
            media_id=media_id,
            media_type=media_type,
            release_group=release_group,
            detected_pattern_id=pattern.id,
        )
        if pattern.scan_strategy == ScanStrategy.EMBEDDED:
            metadata.embedded_tracks = self._scan_embedded(video_path)
        else:
            metadata.external_tracks = self._scan_external(video_path, pattern)
            # Always also check for embedded tracks
            metadata.embedded_tracks = self._scan_embedded(video_path)
        return metadata
    # ------------------------------------------------------------------
    # Embedded tracks — ffprobe
    # ------------------------------------------------------------------
    def _scan_embedded(self, video_path: Path) -> list[SubtitleCandidate]:
        if not video_path.exists():
            return []
        try:
            result = subprocess.run(
                [
                    "ffprobe",
                    "-v",
                    "quiet",
                    "-print_format",
                    "json",
                    "-show_streams",
                    "-select_streams",
                    "s",
                    str(video_path),
                ],
                capture_output=True,
                text=True,
                timeout=30,
                check=False,
            )
            data = json.loads(result.stdout)
        except (
            subprocess.TimeoutExpired,
            json.JSONDecodeError,
            FileNotFoundError,
        ) as e:
            logger.debug(
                f"SubtitleIdentifier: ffprobe failed for {video_path.name}: {e}"
            )
            return []
        tracks = []
        for stream in data.get("streams", []):
            tags = stream.get("tags", {})
            disposition = stream.get("disposition", {})
            lang_code = tags.get("language", "")
            lang = self.kb.language_for_token(lang_code) if lang_code else None
            if disposition.get("hearing_impaired"):
                stype = SubtitleType.SDH
            elif disposition.get("forced"):
                stype = SubtitleType.FORCED
            else:
                stype = SubtitleType.STANDARD
            tracks.append(
                SubtitleCandidate(
                    language=lang,
                    format=None,
                    subtitle_type=stype,
                    is_embedded=True,
                    raw_tokens=[lang_code] if lang_code else [],
                )
            )
        logger.debug(
            f"SubtitleIdentifier: {len(tracks)} embedded track(s) in {video_path.name}"
        )
        return tracks
    # ------------------------------------------------------------------
    # External tracks — filesystem scan per pattern strategy
    # ------------------------------------------------------------------
    def _scan_external(
        self, video_path: Path, pattern: SubtitlePattern
    ) -> list[SubtitleCandidate]:
        strategy = pattern.scan_strategy
        episode_stem: str | None = None
        if strategy == ScanStrategy.ADJACENT:
            candidates = self._find_adjacent(video_path)
        elif strategy == ScanStrategy.FLAT:
            candidates = self._find_flat(video_path, pattern.root_folder or "Subs")
        elif strategy == ScanStrategy.EPISODE_SUBFOLDER:
            candidates, episode_stem = self._find_episode_subfolder(
                video_path, pattern.root_folder or "Subs"
            )
        else:
            return []
        return self._classify_files(candidates, pattern, episode_stem=episode_stem)
    def _find_adjacent(self, video_path: Path) -> list[Path]:
        return [
            p
            for p in sorted(video_path.parent.iterdir())
            if p.is_file()
            and p.suffix.lower() in self.kb.known_extensions()
            and p.stem != video_path.stem
        ]
    def _find_flat(self, video_path: Path, root_folder: str) -> list[Path]:
        subs_dir = video_path.parent / root_folder
        if not subs_dir.is_dir():
            # Also look at release root (one level up)
            subs_dir = video_path.parent.parent / root_folder
        if not subs_dir.is_dir():
            return []
        return [
            p
            for p in sorted(subs_dir.iterdir())
            if p.is_file() and p.suffix.lower() in self.kb.known_extensions()
        ]
    def _find_episode_subfolder(
        self, video_path: Path, root_folder: str
    ) -> tuple[list[Path], str]:
        """
        Look for Subs/{episode_stem}/*.srt
        Checks two locations:
          1. Adjacent to the video: video_path.parent / root_folder / video_path.stem
          2. Release root (one level up): video_path.parent.parent / root_folder / video_path.stem
        Returns (files, episode_stem) so the classifier can strip the prefix.
        """
        episode_stem = video_path.stem
        candidates_dirs = [
            video_path.parent / root_folder / episode_stem,
            video_path.parent.parent / root_folder / episode_stem,
        ]
        for subs_dir in candidates_dirs:
            if subs_dir.is_dir():
                files = [
                    p
                    for p in sorted(subs_dir.iterdir())
                    if p.is_file() and p.suffix.lower() in self.kb.known_extensions()
                ]
                if files:
                    logger.debug(
                        f"SubtitleIdentifier: found {len(files)} file(s) in {subs_dir}"
                    )
                    return files, episode_stem
        return [], episode_stem
    # ------------------------------------------------------------------
    # Classification
    # ------------------------------------------------------------------
    def _classify_files(
        self,
        paths: list[Path],
        pattern: SubtitlePattern,
        episode_stem: str | None = None,
    ) -> list[SubtitleCandidate]:
        tracks = []
        for path in paths:
            track = self._classify_single(path, episode_stem=episode_stem)
            tracks.append(track)
        # Post-process: if multiple tracks share same language but type is ambiguous,
        # apply size_and_count disambiguation
        if pattern.type_detection.value == "size_and_count":
            tracks = self._disambiguate_by_size(tracks)
        return tracks
    def _classify_single(
        self, path: Path, episode_stem: str | None = None
    ) -> SubtitleCandidate:
        fmt = self.kb.format_for_extension(path.suffix)
        tokens = (
            _tokenize_suffix(path.stem, episode_stem)
            if episode_stem
            else _tokenize(path.stem)
        )
        language = None
        subtitle_type = SubtitleType.UNKNOWN
        unknown_tokens = []
        matched_tokens = 0
        for token in tokens:
            if self.kb.is_known_lang_token(token):
                language = self.kb.language_for_token(token)
                matched_tokens += 1
            elif self.kb.is_known_type_token(token):
                subtitle_type = self.kb.type_for_token(token) or subtitle_type
                matched_tokens += 1
            elif token.isdigit():
                pass  # numeric prefix — ignore
            elif len(token) > 1:
                unknown_tokens.append(token)
        # Confidence: proportion of meaningful tokens that were recognized
        meaningful = [t for t in tokens if not t.isdigit() and len(t) > 1]
        confidence = matched_tokens / max(len(meaningful), 1) if meaningful else 0.5
        if unknown_tokens:
            logger.debug(
                f"SubtitleIdentifier: unknown tokens in '{path.name}': {unknown_tokens}"
            )
        size_kb = path.stat().st_size / 1024 if path.exists() else None
        entry_count = _count_entries(path) if path.exists() else None
        return SubtitleCandidate(
            language=language,
            format=fmt,
            subtitle_type=subtitle_type,
            is_embedded=False,
            file_path=path,
            file_size_kb=size_kb,
            entry_count=entry_count,
            confidence=confidence,
            raw_tokens=tokens,
        )
    def _disambiguate_by_size(
        self, tracks: list[SubtitleCandidate]
    ) -> list[SubtitleCandidate]:
        """
        When multiple tracks share the same language and type is UNKNOWN/STANDARD,
        the one with the most entries (lines) is SDH, the smallest is FORCED if
        there are 3+, otherwise the smaller is STANDARD.
        Only applied when type_detection = size_and_count.
        """
        # Group by language code
        lang_groups: dict[str, list[SubtitleCandidate]] = {}
        for track in tracks:
            key = track.language.code if track.language else "__unknown__"
            lang_groups.setdefault(key, []).append(track)
        result = []
        for group in lang_groups.values():
            if len(group) == 1:
                result.extend(group)
                continue
            # Sort by entry_count ascending (None treated as 0)
            sorted_group = sorted(group, key=lambda t: t.entry_count or 0)
            if len(sorted_group) == 2:
                # smaller = standard, larger = sdh
                self._set_type(sorted_group[0], SubtitleType.STANDARD)
                self._set_type(sorted_group[1], SubtitleType.SDH)
            elif len(sorted_group) >= 3:
                # smallest = forced, middle = standard, largest = sdh
                self._set_type(sorted_group[0], SubtitleType.FORCED)
                for t in sorted_group[1:-1]:
                    self._set_type(t, SubtitleType.STANDARD)
                self._set_type(sorted_group[-1], SubtitleType.SDH)
            result.extend(sorted_group)
        return result
    def _set_type(self, track: SubtitleCandidate, stype: SubtitleType) -> None:
        """Mutate track type in-place."""
        track.subtitle_type = stype
@@ -0,0 +1,120 @@
 """SubtitleMatcher — filters tracks against resolved rules."""
 import logging
 from ..entities import SubtitleCandidate
 from ..value_objects import SubtitleMatchingRules
 logger = logging.getLogger(__name__)
 class SubtitleMatcher:
    """
    Filters a list of SubtitleCandidate against effective SubtitleMatchingRules.
    Returns matched tracks (pass all filters, confidence >= min_confidence)
    and unresolved tracks (need user clarification).
    Conflict resolution: when two tracks share the same language + type,
    format_priority decides which one to keep.
    """
    def match(
        self,
        tracks: list[SubtitleCandidate],
        rules: SubtitleMatchingRules,
    ) -> tuple[list[SubtitleCandidate], list[SubtitleCandidate]]:
        """
        Returns (matched, unresolved).
        """
        matched: list[SubtitleCandidate] = []
        unresolved: list[SubtitleCandidate] = []
        for track in tracks:
            if track.is_embedded:
                continue
            if track.language is None or track.confidence < rules.min_confidence:
                unresolved.append(track)
                continue
            if not self._passes_filters(track, rules):
                logger.debug(f"SubtitleMatcher: filtered out {track}")
                continue
            matched.append(track)
        matched = self._resolve_conflicts(matched, rules)
        logger.info(
            f"SubtitleMatcher: {len(matched)} matched, {len(unresolved)} unresolved"
        )
        return matched, unresolved
    def _passes_filters(
        self, track: SubtitleCandidate, rules: SubtitleMatchingRules
    ) -> bool:
        # Language filter
        if rules.preferred_languages:
            if not track.language:
                return False
            if track.language.code not in rules.preferred_languages:
                return False
        # Format filter (only for external files)
        if rules.preferred_formats and not track.is_embedded:
            if not track.format:
                return False
            if track.format.id not in rules.preferred_formats:
                return False
        # Type filter
        if rules.allowed_types:
            if track.subtitle_type.value not in rules.allowed_types:
                return False
        return True
    def _resolve_conflicts(
        self,
        tracks: list[SubtitleCandidate],
        rules: SubtitleMatchingRules,
    ) -> list[SubtitleCandidate]:
        """
        When multiple tracks have same language + type, keep only the best one
        according to format_priority. If no format_priority applies, keep the first.
        """
        seen: dict[tuple, SubtitleCandidate] = {}
        for track in tracks:
            lang = track.language.code if track.language else None
            stype = track.subtitle_type.value
            key = (lang, stype)
            if key not in seen:
                seen[key] = track
            else:
                existing = seen[key]
                if self._prefer(track, existing, rules.format_priority):
                    logger.debug(
                        f"SubtitleMatcher: conflict {key} — "
                        f"preferring {track.format.id if track.format else 'embedded'} "
                        f"over {existing.format.id if existing.format else 'embedded'}"
                    )
                    seen[key] = track
        return list(seen.values())
    def _prefer(
        self,
        candidate: SubtitleCandidate,
        existing: SubtitleCandidate,
        format_priority: list[str],
    ) -> bool:
        """Return True if candidate is preferable to existing."""
        if not format_priority:
            return False
        c_fmt = candidate.format.id if candidate.format else ""
        e_fmt = existing.format.id if existing.format else ""
        c_rank = format_priority.index(c_fmt) if c_fmt in format_priority else 999
        e_rank = format_priority.index(e_fmt) if e_fmt in format_priority else 999
        return c_rank < e_rank
@@ -0,0 +1,226 @@
 """PatternDetector — discovers the subtitle structure of a release folder."""
 import json
 import logging
 import subprocess
 from pathlib import Path
 from ..knowledge.base import SubtitleKnowledgeBase
 from ..value_objects import ScanStrategy, SubtitlePattern
 logger = logging.getLogger(__name__)
 class PatternDetector:
    """
    Inspects a release folder and returns the best matching known pattern,
    plus a confidence score and a description of what was found.
    Used for "pattern discovery" — when we don't yet know which pattern
    a release follows. The result is proposed to the user for confirmation.
    """
    def __init__(self, kb: SubtitleKnowledgeBase):
        self.kb = kb
    def detect(self, release_root: Path, sample_video: Path) -> dict:
        """
        Analyse the release folder and return:
        {
            "detected": SubtitlePattern | None,
            "confidence": float,
            "description": str,         # human-readable description of what was found
            "candidate_pattern_ids": list[str],
        }
        """
        findings = self._inspect(release_root, sample_video)
        best, confidence = self._match_pattern(findings)
        return {
            "detected": best,
            "confidence": confidence,
            "description": self._describe(findings),
            "candidate_pattern_ids": [best.id] if best else [],
            "raw_findings": findings,
        }
    def _has_embedded_subtitles(self, video_path: Path) -> bool:
        """Run ffprobe to check whether the video has embedded subtitle streams."""
        try:
            result = subprocess.run(
                [
                    "ffprobe",
                    "-v",
                    "quiet",
                    "-print_format",
                    "json",
                    "-show_streams",
                    "-select_streams",
                    "s",
                    str(video_path),
                ],
                capture_output=True,
                text=True,
                timeout=30,
                check=False,
            )
            data = json.loads(result.stdout)
            return len(data.get("streams", [])) > 0
        except (subprocess.TimeoutExpired, json.JSONDecodeError, FileNotFoundError):
            return False
    def _inspect(self, release_root: Path, sample_video: Path) -> dict:
        """Gather structural facts about the release."""
        known_exts = self.kb.known_extensions()
        findings: dict = {
            "has_subs_folder": False,
            "subs_strategy": None,  # "flat" | "episode_subfolder"
            "subs_root": None,
            "adjacent_subs": False,
            "has_embedded": self._has_embedded_subtitles(sample_video),
            "files_per_episode": 0,
            "has_lang_tokens": False,
            "has_numeric_prefix": False,
        }
        # Check for Subs/ folder — adjacent or at release root
        for subs_candidate in [
            sample_video.parent / "Subs",
            release_root / "Subs",
        ]:
            if subs_candidate.is_dir():
                findings["has_subs_folder"] = True
                findings["subs_root"] = str(subs_candidate)
                # Is it flat or episode_subfolder?
                children = list(subs_candidate.iterdir())
                sub_files = [
                    c
                    for c in children
                    if c.is_file() and c.suffix.lower() in known_exts
                ]
                sub_dirs = [c for c in children if c.is_dir()]
                if sub_dirs and not sub_files:
                    findings["subs_strategy"] = "episode_subfolder"
                    # Count files in a sample subfolder
                    sample_sub = sub_dirs[0]
                    sample_files = [
                        f
                        for f in sample_sub.iterdir()
                        if f.is_file() and f.suffix.lower() in known_exts
                    ]
                    findings["files_per_episode"] = len(sample_files)
                    # Check naming conventions
                    for f in sample_files:
                        stem = f.stem
                        parts = stem.split("_")
                        if parts[0].isdigit():
                            findings["has_numeric_prefix"] = True
                        if any(
                            self.kb.is_known_lang_token(t.lower())
                            for t in stem.replace("_", ".").split(".")
                        ):
                            findings["has_lang_tokens"] = True
                else:
                    findings["subs_strategy"] = "flat"
                    findings["files_per_episode"] = len(sub_files)
                    for f in sub_files:
                        if any(
                            self.kb.is_known_lang_token(t.lower())
                            for t in f.stem.replace("_", ".").split(".")
                        ):
                            findings["has_lang_tokens"] = True
                break
        # Check adjacent subs (next to the video)
        if not findings["has_subs_folder"]:
            adjacent = [
                p
                for p in sample_video.parent.iterdir()
                if p.is_file() and p.suffix.lower() in known_exts
            ]
            if adjacent:
                findings["adjacent_subs"] = True
                findings["files_per_episode"] = len(adjacent)
        return findings
    def _match_pattern(self, findings: dict) -> tuple[SubtitlePattern | None, float]:
        """Score all known patterns against the findings."""
        scores: list[tuple[float, SubtitlePattern]] = []
        for pattern in self.kb.patterns().values():
            score = self._score(pattern, findings)
            scores.append((score, pattern))
        if not scores:
            return None, 0.0
        scores.sort(key=lambda x: x[0], reverse=True)
        best_score, best_pattern = scores[0]
        if best_score < 0.4:
            return None, best_score
        return best_pattern, best_score
    def _score(self, pattern: SubtitlePattern, findings: dict) -> float:
        """Return a 0.0–1.0 match score for this pattern against the findings."""
        score = 0.0
        total = 0.0
        strategy = pattern.scan_strategy
        if strategy == ScanStrategy.EMBEDDED:
            total += 1
            if findings.get("has_embedded"):
                score += 1.0
            if not findings.get("has_subs_folder") and not findings.get(
                "adjacent_subs"
            ):
                score += 0.5
                total += 0.5
        elif strategy == ScanStrategy.EPISODE_SUBFOLDER:
            total += 3
            if findings.get("has_subs_folder"):
                score += 1.0
            if findings.get("subs_strategy") == "episode_subfolder":
                score += 2.0
        elif strategy == ScanStrategy.FLAT:
            total += 2
            if findings.get("has_subs_folder"):
                score += 1.0
            if findings.get("subs_strategy") == "flat":
                score += 1.0
        elif strategy == ScanStrategy.ADJACENT:
            total += 2
            if findings.get("adjacent_subs"):
                score += 1.0
            if not findings.get("has_subs_folder"):
                score += 1.0
        return score / total if total > 0 else 0.0
    def _describe(self, findings: dict) -> str:
        parts = []
        if findings.get("has_subs_folder"):
            strategy = findings.get("subs_strategy", "?")
            n = findings.get("files_per_episode", 0)
            parts.append(f"Subs/ folder found ({strategy}), ~{n} file(s) per episode")
            if findings.get("has_numeric_prefix"):
                parts.append("files have numeric prefix (e.g. 2_English.srt)")
            if findings.get("has_lang_tokens"):
                parts.append("language tokens found in filenames")
        elif findings.get("adjacent_subs"):
            parts.append("subtitle files adjacent to video")
        else:
            parts.append("no external subtitle files found")
        if findings.get("has_embedded"):
            parts.append("embedded tracks detected (ffprobe)")
        return " — ".join(parts) if parts else "nothing found"
@@ -0,0 +1,116 @@
 """SubtitlePlacer — hard-links matched subtitle tracks next to the destination video."""
 import logging
 import os
 from dataclasses import dataclass
 from pathlib import Path
 from ..entities import SubtitleCandidate
 from ..value_objects import SubtitleType
 logger = logging.getLogger(__name__)
 def _build_dest_name(track: SubtitleCandidate, video_stem: str) -> str:
    """
    Build the destination filename for a subtitle track.
    Format: {video_stem}.{lang}.{ext}
            {video_stem}.{lang}.sdh.{ext}
            {video_stem}.{lang}.forced.{ext}
    """
    if not track.language or not track.format:
        raise ValueError("Cannot compute destination name: language or format missing")
    ext = track.format.extensions[0].lstrip(".")
    parts = [video_stem, track.language.code]
    if track.subtitle_type == SubtitleType.SDH:
        parts.append("sdh")
    elif track.subtitle_type == SubtitleType.FORCED:
        parts.append("forced")
    return ".".join(parts) + "." + ext
@dataclass
 class PlacedTrack:
    source: Path
    destination: Path
    filename: str
@dataclass
 class PlaceResult:
    placed: list[PlacedTrack]
    skipped: list[tuple[SubtitleCandidate, str]]  # (track, reason)
    @property
    def placed_count(self) -> int:
        return len(self.placed)
    @property
    def skipped_count(self) -> int:
        return len(self.skipped)
 class SubtitlePlacer:
    """
    Hard-links matched SubtitleCandidate files next to a destination video.
    Uses the same hard-link strategy as FileManager.copy_file:
    instant, no data duplication, qBittorrent keeps seeding.
    Embedded tracks are skipped — nothing to place on disk.
    """
    def place(
        self,
        tracks: list[SubtitleCandidate],
        destination_video: Path,
    ) -> PlaceResult:
        placed: list[PlacedTrack] = []
        skipped: list[tuple[SubtitleCandidate, str]] = []
        dest_dir = destination_video.parent
        for track in tracks:
            if track.is_embedded:
                logger.debug(f"SubtitlePlacer: skip embedded track ({track.language})")
                skipped.append((track, "embedded — no file to place"))
                continue
            if not track.file_path or not track.file_path.exists():
                skipped.append((track, "source file not found"))
                continue
            try:
                dest_name = _build_dest_name(track, destination_video.stem)
            except ValueError as e:
                skipped.append((track, str(e)))
                continue
            dest_path = dest_dir / dest_name
            if dest_path.exists():
                logger.debug(f"SubtitlePlacer: skip {dest_name} — already exists")
                skipped.append((track, "destination already exists"))
                continue
            try:
                os.link(track.file_path, dest_path)
                placed.append(
                    PlacedTrack(
                        source=track.file_path,
                        destination=dest_path,
                        filename=dest_name,
                    )
                )
                logger.info(f"SubtitlePlacer: placed {dest_name}")
            except OSError as e:
                logger.warning(f"SubtitlePlacer: failed to place {dest_name}: {e}")
                skipped.append((track, str(e)))
        logger.info(
            f"SubtitlePlacer: {len(placed)} placed, {len(skipped)} skipped "
            f"for {destination_video.name}"
        )
        return PlaceResult(placed=placed, skipped=skipped)
@@ -0,0 +1,21 @@
 """Subtitle service utilities."""
 from ..entities import SubtitleCandidate
 def available_subtitles(tracks: list[SubtitleCandidate]) -> list[SubtitleCandidate]:
    """
    Return the distinct subtitle tracks available, deduped by (language, type).
    Useful to display what is available for a media item regardless of user
    preferences — e.g. eng, eng.sdh, fra all show up as separate entries.
    """
    seen: set[tuple] = set()
    result: list[SubtitleCandidate] = []
    for track in tracks:
        lang = track.language.code if track.language else None
        key = (lang, track.subtitle_type)
        if key not in seen:
            seen.add(key)
            result.append(track)
    return result
@@ -1,91 +1,91 @@
 """Subtitle domain value objects."""
-from dataclasses import dataclass
+from dataclasses import dataclass, field
 from enum import Enum
-from ..shared.exceptions import ValidationError
+
 class ScanStrategy(Enum):
    """How to locate subtitle files for a given release."""
    ADJACENT = "adjacent"  # .srt next to the video
    FLAT = "flat"  # Subs/*.srt
    EPISODE_SUBFOLDER = "episode_subfolder"  # Subs/{episode_name}/*.srt
    EMBEDDED = "embedded"  # tracks inside the video container
-class Language(Enum):
+class TypeDetectionMethod(Enum):
-    """Supported subtitle languages."""
+    """How to differentiate standard / SDH / forced when tokens are ambiguous."""
-    ENGLISH = "en"
+    TOKEN_IN_NAME = "token_in_name"
-    FRENCH = "fr"
+    SIZE_AND_COUNT = "size_and_count"
-
+    FFPROBE_METADATA = "ffprobe_metadata"
    @classmethod
    def from_code(cls, code: str) -> "Language":
        """
        Get language from ISO 639-1 code.
        Args:
            code: Two-letter language code
        Returns:
            Language enum value
        Raises:
            ValidationError: If code is not supported
        """
        code_lower = code.lower()
        for lang in cls:
            if lang.value == code_lower:
                return lang
        raise ValidationError(f"Unsupported language code: {code}")
-class SubtitleFormat(Enum):
+class SubtitleType(Enum):
-    """Supported subtitle formats."""
+    STANDARD = "standard"
-
+    SDH = "sdh"
-    SRT = "srt"  # SubRip
+    FORCED = "forced"
-    ASS = "ass"  # Advanced SubStation Alpha
+    UNKNOWN = "unknown"
    SSA = "ssa"  # SubStation Alpha
    VTT = "vtt"  # WebVTT
    SUB = "sub"  # MicroDVD
    @classmethod
    def from_extension(cls, extension: str) -> "SubtitleFormat":
        """
        Get format from file extension.
        Args:
            extension: File extension (with or without dot)
        Returns:
            SubtitleFormat enum value
        Raises:
            ValidationError: If extension is not supported
        """
        ext = extension.lower().lstrip(".")
        for fmt in cls:
            if fmt.value == ext:
                return fmt
        raise ValidationError(f"Unsupported subtitle format: {extension}")
@dataclass(frozen=True)
-class TimingOffset:
+class SubtitleFormat:
-    """
+    """A known subtitle file format."""
    Value object representing subtitle timing offset in milliseconds.
-    Used for synchronizing subtitles with video.
+    id: str
    extensions: list[str]
    description: str = ""
    def matches_extension(self, ext: str) -> bool:
        return ext.lower() in [e.lower() for e in self.extensions]
@dataclass(frozen=True)
 class SubtitleLanguage:
    """A known subtitle language with its recognition tokens."""
    code: str  # ISO 639-1
    tokens: list[str]  # lowercase
    def matches_token(self, token: str) -> bool:
        return token.lower() in self.tokens
@dataclass(frozen=True)
 class SubtitlePattern:
    """
    A known structural pattern for how a release group organises subtitle files.
    Patterns are loaded from alfred/knowledge/patterns/*.yaml and are
    independent of any specific release group — multiple groups can share
    the same pattern.
    """
-    milliseconds: int
+    id: str
    description: str
    scan_strategy: ScanStrategy
    root_folder: str | None  # e.g. "Subs", None for adjacent/embedded
    type_detection: TypeDetectionMethod
    version: str = "1.0"
    def __post_init__(self):
        """Validate timing offset."""
        if not isinstance(self.milliseconds, int):
            raise ValidationError(
                f"Timing offset must be an integer, got {type(self.milliseconds)}"
            )
-    def to_seconds(self) -> float:
+@dataclass(frozen=True)
-        """Convert to seconds."""
+class SubtitleMatchingRules:
-        return self.milliseconds / 1000.0
+    """
    Effective rules after scope resolution (global → group → show → season → episode).
    Only stores actual values — None means "inherited, not overridden at this level".
    """
-    def __str__(self) -> str:
+    preferred_languages: list[str] = field(default_factory=list)  # ISO 639-1 codes
-        return f"{self.milliseconds}ms"
+    preferred_formats: list[str] = field(default_factory=list)  # format ids
    allowed_types: list[str] = field(default_factory=list)  # SubtitleType ids
    format_priority: list[str] = field(default_factory=list)  # ordered format ids
    min_confidence: float = 0.7
-    def __repr__(self) -> str:
+
-        return f"TimingOffset({self.milliseconds})"
+@dataclass(frozen=True)
 class RuleScope:
    """At which level a rule set applies."""
    level: str  # "global" | "release_group" | "movie" | "show" | "season" | "episode"
    identifier: str | None = None  # imdb_id, group name, "S01", "S01E03"…
@@ -2,18 +2,22 @@
 from .entities import Episode, Season, TVShow
 from .exceptions import InvalidEpisode, SeasonNotFound, TVShowNotFound
-from .services import TVShowService
+from .value_objects import (
-from .value_objects import EpisodeNumber, SeasonNumber, ShowStatus
+    CollectionStatus,
    EpisodeNumber,
    SeasonNumber,
    ShowStatus,
 )
 __all__ = [
    "TVShow",
    "Season",
    "Episode",
    "ShowStatus",
    "CollectionStatus",
    "SeasonNumber",
    "EpisodeNumber",
    "TVShowNotFound",
    "InvalidEpisode",
    "SeasonNotFound",
    "TVShowService",
 ]
@@ -1,126 +1,270 @@
-"""TV Show domain entities."""
+"""TV Show domain entities.
 This module implements the TVShow aggregate following DDD principles.
 Aggregate ownership::
    TVShow                            ← aggregate root (the repo returns this)
    └── seasons: dict[SeasonNumber, Season]
        └── Season
            └── episodes: dict[EpisodeNumber, Episode]
                └── Episode           ← file metadata + audio/subtitle tracks
 Rules:
 * ``TVShow`` is the aggregate **root** — the only entity exposed by the
  repository.
 * ``Season`` is owned by TVShow. ``Episode`` is owned by Season.
 * Children do not back-reference the root (no ``show_imdb_id`` on
  Season/Episode): they are only ever reached *through* TVShow.
 * Mutation invariants are enforced through aggregate-root methods such as
  ``TVShow.add_episode()`` — never reach into ``show.seasons[...].episodes``
  to mutate without going through the root, otherwise invariants are not
  guaranteed.
 """
 from __future__ import annotations
 import re
 from dataclasses import dataclass, field
 from datetime import datetime
-from ..shared.value_objects import FilePath, FileSize, ImdbId
+from ..shared.media import AudioTrack, SubtitleTrack, track_lang_matches
-from .value_objects import EpisodeNumber, SeasonNumber, ShowStatus
+from ..shared.value_objects import (
    FilePath,
    FileSize,
    ImdbId,
    Language,
    to_dot_folder_name,
 )
 from .value_objects import (
    CollectionStatus,
    EpisodeNumber,
    SeasonNumber,
    ShowStatus,
 )
 # ════════════════════════════════════════════════════════════════════════════
 # Episode
 # ════════════════════════════════════════════════════════════════════════════
@dataclass
-class TVShow:
+class Episode:
    """
-    TV Show entity representing a TV show in the media library.
+    A single episode of a TV show — leaf of the TVShow aggregate.
-    This is the main aggregate root for the TV shows domain.
+    Carries the file metadata (path, size) and the discovered tracks
-    Migrated from agent/models/tv_show.py
+    (audio + subtitle). Track lists are populated by the ffprobe + subtitle
    scan pipeline; they may be empty when the episode is known but not yet
    scanned, or when no file is downloaded yet.
    """
-    imdb_id: ImdbId
+    season_number: SeasonNumber
    episode_number: EpisodeNumber
    title: str
-    seasons_count: int
+    file_path: FilePath | None = None
-    status: ShowStatus
+    file_size: FileSize | None = None
-    tmdb_id: int | None = None
+    audio_tracks: list[AudioTrack] = field(default_factory=list)
-    first_air_date: str | None = None
+    subtitle_tracks: list[SubtitleTrack] = field(default_factory=list)
    added_at: datetime = field(default_factory=datetime.now)
-    def __post_init__(self):
+    def __post_init__(self) -> None:
-        """Validate TV show entity."""
+        # Coerce numbers if raw ints were passed
-        # Ensure ImdbId is actually an ImdbId instance
+        if not isinstance(self.season_number, SeasonNumber):
-        if not isinstance(self.imdb_id, ImdbId):
+            if isinstance(self.season_number, int):
-            if isinstance(self.imdb_id, str):
+                self.season_number = SeasonNumber(self.season_number)
-                object.__setattr__(self, "imdb_id", ImdbId(self.imdb_id))
+        if not isinstance(self.episode_number, EpisodeNumber):
-            else:
+            if isinstance(self.episode_number, int):
-                raise ValueError(
+                self.episode_number = EpisodeNumber(self.episode_number)
                    f"imdb_id must be ImdbId or str, got {type(self.imdb_id)}"
                )
-        # Ensure ShowStatus is actually a ShowStatus instance
+    # ── File presence ──────────────────────────────────────────────────────
        if not isinstance(self.status, ShowStatus):
            if isinstance(self.status, str):
                object.__setattr__(self, "status", ShowStatus.from_string(self.status))
            else:
                raise ValueError(
                    f"status must be ShowStatus or str, got {type(self.status)}"
                )
-        # Validate seasons_count
+    def has_file(self) -> bool:
-        if not isinstance(self.seasons_count, int) or self.seasons_count < 0:
+        """True if a file path is set and the file actually exists on disk."""
-            raise ValueError(
+        return self.file_path is not None and self.file_path.exists()
                f"seasons_count must be a non-negative integer, got {self.seasons_count}"
            )
-    def is_ongoing(self) -> bool:
+    def is_downloaded(self) -> bool:
-        """Check if the show is still ongoing."""
+        """Alias of ``has_file()`` — reads better in collection-status contexts."""
-        return self.status == ShowStatus.ONGOING
+        return self.has_file()
-    def is_ended(self) -> bool:
+    # ── Audio helpers ──────────────────────────────────────────────────────
        """Check if the show has ended."""
        return self.status == ShowStatus.ENDED
-    def get_folder_name(self) -> str:
+    def has_audio_in(self, lang: str | Language) -> bool:
-        """
+        """True if at least one audio track is in the given language."""
-        Get the folder name for this TV show.
+        return any(track_lang_matches(t.language, lang) for t in self.audio_tracks)
-        Format: "Title"
+    def audio_languages(self) -> list[str]:
-        Example: "Breaking.Bad"
+        """Unique audio languages across all tracks, in track order."""
-        """
+        seen: set[str] = set()
-        # Remove special characters and replace spaces with dots
+        result: list[str] = []
-        cleaned = re.sub(r"[^\w\s\.\-]", "", self.title)
+        for t in self.audio_tracks:
-        return cleaned.replace(" ", ".")
+            if t.language and t.language not in seen:
                seen.add(t.language)
                result.append(t.language)
        return result
    # ── Subtitle helpers ───────────────────────────────────────────────────
    def has_subtitles_in(self, lang: str | Language) -> bool:
        """True if at least one subtitle track is in the given language."""
        return any(track_lang_matches(t.language, lang) for t in self.subtitle_tracks)
    def has_forced_subs(self) -> bool:
        """True if at least one subtitle track is flagged as forced."""
        return any(t.is_forced for t in self.subtitle_tracks)
    def subtitle_languages(self) -> list[str]:
        """Unique subtitle languages across all tracks, in track order."""
        seen: set[str] = set()
        result: list[str] = []
        for t in self.subtitle_tracks:
            if t.language and t.language not in seen:
                seen.add(t.language)
                result.append(t.language)
        return result
    # ── Naming ─────────────────────────────────────────────────────────────
    def get_filename(self) -> str:
        """Suggested filename: ``S01E05.Pilot``."""
        season_str = f"S{self.season_number.value:02d}"
        episode_str = f"E{self.episode_number.value:02d}"
        clean_title = re.sub(r"[^\w\s\-]", "", self.title)
        clean_title = clean_title.replace(" ", ".")
        return f"{season_str}{episode_str}.{clean_title}"
    def __str__(self) -> str:
-        return f"{self.title} ({self.status.value}, {self.seasons_count} seasons)"
+        return f"S{self.season_number.value:02d}E{self.episode_number.value:02d} - {self.title}"
    def __repr__(self) -> str:
-        return f"TVShow(imdb_id={self.imdb_id}, title='{self.title}')"
+        return (
            f"Episode(S{self.season_number.value:02d}E{self.episode_number.value:02d})"
        )
 # ════════════════════════════════════════════════════════════════════════════
 # Season
 # ════════════════════════════════════════════════════════════════════════════
@dataclass
 class Season:
    """
-    Season entity representing a season of a TV show.
+    A season of a TV show — owned by ``TVShow``.
    Owns its episodes via the ``episodes`` dict keyed by ``EpisodeNumber``.
    Two TMDB-sourced counts shape the collection logic:
    * ``expected_episodes`` — total episodes planned for the season
      (``None`` if unknown).
    * ``aired_episodes`` — episodes **already aired** as of the latest TMDB
      refresh. ``None`` falls back to ``expected_episodes`` (best-effort).
    The split matters: ``is_complete()`` checks owned against aired, so a season
    in the middle of broadcasting can be "complete" today and become "partial"
    later when new episodes air — that is correct behavior.
    """
    show_imdb_id: ImdbId
    season_number: SeasonNumber
-    episode_count: int
+    episodes: dict[EpisodeNumber, Episode] = field(default_factory=dict)
    expected_episodes: int | None = None
    aired_episodes: int | None = None
    name: str | None = None
    overview: str | None = None
    air_date: str | None = None
    poster_path: str | None = None
-    def __post_init__(self):
+    def __post_init__(self) -> None:
        """Validate season entity."""
        # Ensure ImdbId is actually an ImdbId instance
        if not isinstance(self.show_imdb_id, ImdbId):
            if isinstance(self.show_imdb_id, str):
                object.__setattr__(self, "show_imdb_id", ImdbId(self.show_imdb_id))
        # Ensure SeasonNumber is actually a SeasonNumber instance
        if not isinstance(self.season_number, SeasonNumber):
            if isinstance(self.season_number, int):
-                object.__setattr__(
+                self.season_number = SeasonNumber(self.season_number)
-                    self, "season_number", SeasonNumber(self.season_number)
+
        if self.expected_episodes is not None and self.expected_episodes < 0:
            raise ValueError(
                f"expected_episodes must be >= 0, got {self.expected_episodes}"
            )
        if self.aired_episodes is not None and self.aired_episodes < 0:
            raise ValueError(f"aired_episodes must be >= 0, got {self.aired_episodes}")
        if (
            self.expected_episodes is not None
            and self.aired_episodes is not None
            and self.aired_episodes > self.expected_episodes
        ):
            raise ValueError(
                f"aired_episodes ({self.aired_episodes}) cannot exceed "
                f"expected_episodes ({self.expected_episodes})"
            )
-        # Validate episode_count
+    # ── Properties ─────────────────────────────────────────────────────────
-        if not isinstance(self.episode_count, int) or self.episode_count < 0:
+
-            raise ValueError(
+    @property
-                f"episode_count must be a non-negative integer, got {self.episode_count}"
+    def episode_count(self) -> int:
        """Number of episodes currently owned in this season."""
        return len(self.episodes)
    # ── Collection state ───────────────────────────────────────────────────
    def _effective_aired(self) -> int | None:
        """``aired_episodes`` if set, else fall back to ``expected_episodes``."""
        return (
            self.aired_episodes
            if self.aired_episodes is not None
            else self.expected_episodes
        )
    def is_complete(self) -> bool:
        """
        True if every aired episode is owned.
        Returns False (conservative) when the aired count is unknown — without
        knowing how many episodes have aired we cannot claim completeness.
        """
        aired = self._effective_aired()
        if aired is None:
            return False
        if aired == 0:
            # No episode has aired yet → trivially "complete"
            return True
        return len(self.episodes) >= aired
    def is_fully_aired(self) -> bool:
        """True if all planned episodes have already aired."""
        if self.expected_episodes is None or self.aired_episodes is None:
            return False
        return self.aired_episodes >= self.expected_episodes
    def missing_episodes(self) -> list[EpisodeNumber]:
        """
        List of episode numbers that have aired but are not owned.
        Episodes beyond ``aired_episodes`` are **not** considered missing
        (they have not aired yet). When the aired count is unknown, returns
        an empty list — we cannot reason about gaps without a target.
        """
        aired = self._effective_aired()
        if aired is None or aired <= 0:
            return []
        present = {ep.value for ep in self.episodes}
        return [EpisodeNumber(n) for n in range(1, aired + 1) if n not in present]
    # ── Mutation (called through the aggregate root) ───────────────────────
    def add_episode(self, episode: Episode) -> None:
        """
        Insert an episode into this season. Replaces any episode with the same
        number — callers wishing to detect conflicts should check beforehand.
        """
        if episode.season_number != self.season_number:
            raise ValueError(
                f"Episode season ({episode.season_number}) does not match season "
                f"({self.season_number})"
            )
        self.episodes[episode.episode_number] = episode
    # ── Naming ─────────────────────────────────────────────────────────────
    def is_special(self) -> bool:
        """Check if this is the specials season."""
        return self.season_number.is_special()
    def get_folder_name(self) -> str:
-        """
+        """``Season 01`` or ``Specials`` for season 0."""
        Get the folder name for this season.
        Format: "Season 01" or "Specials" for season 0
        """
        if self.is_special():
            return "Specials"
        return f"Season {self.season_number.value:02d}"
@@ -131,74 +275,168 @@ class Season:
        return f"Season {self.season_number.value}"
    def __repr__(self) -> str:
-        return f"Season(show={self.show_imdb_id}, number={self.season_number.value})"
+        return (
            f"Season(number={self.season_number.value}, episodes={len(self.episodes)})"
        )
 # ════════════════════════════════════════════════════════════════════════════
 # TVShow — aggregate root
 # ════════════════════════════════════════════════════════════════════════════
@dataclass
-class Episode:
+class TVShow:
    """
-    Episode entity representing an episode of a TV show.
+    Aggregate root for the TV shows domain.
    Owns its seasons via the ``seasons`` dict keyed by ``SeasonNumber``.
    All mutations (adding episodes, creating seasons) MUST go through the
    methods on this class — that is how invariants are preserved.
    Two axes describe the show, kept deliberately orthogonal:
    * ``status`` (``ShowStatus``) — production state (TMDB-sourced).
    * ``collection_status()`` — what the user owns vs what has aired today.
    A third axis (upcoming/scheduled) will be added later as a separate flag
    when scheduling support is introduced; for now we make no claim about
    future episodes.
    """
-    show_imdb_id: ImdbId
+    imdb_id: ImdbId
    season_number: SeasonNumber
    episode_number: EpisodeNumber
    title: str
-    file_path: FilePath | None = None
+    status: ShowStatus
-    file_size: FileSize | None = None
+    seasons: dict[SeasonNumber, Season] = field(default_factory=dict)
-    overview: str | None = None
+    expected_seasons: int | None = None
-    air_date: str | None = None
+    tmdb_id: int | None = None
    still_path: str | None = None
    vote_average: float | None = None
    runtime: int | None = None  # in minutes
-    def __post_init__(self):
+    def __post_init__(self) -> None:
-        """Validate episode entity."""
+        if not isinstance(self.imdb_id, ImdbId):
-        # Ensure ImdbId is actually an ImdbId instance
+            if isinstance(self.imdb_id, str):
-        if not isinstance(self.show_imdb_id, ImdbId):
+                self.imdb_id = ImdbId(self.imdb_id)
-            if isinstance(self.show_imdb_id, str):
+            else:
-                object.__setattr__(self, "show_imdb_id", ImdbId(self.show_imdb_id))
+                raise ValueError(
-
+                    f"imdb_id must be ImdbId or str, got {type(self.imdb_id)}"
        # Ensure SeasonNumber is actually a SeasonNumber instance
        if not isinstance(self.season_number, SeasonNumber):
            if isinstance(self.season_number, int):
                object.__setattr__(
                    self, "season_number", SeasonNumber(self.season_number)
                )
-        # Ensure EpisodeNumber is actually an EpisodeNumber instance
+        if not isinstance(self.status, ShowStatus):
-        if not isinstance(self.episode_number, EpisodeNumber):
+            if isinstance(self.status, str):
-            if isinstance(self.episode_number, int):
+                self.status = ShowStatus.from_string(self.status)
-                object.__setattr__(
+            else:
-                    self, "episode_number", EpisodeNumber(self.episode_number)
+                raise ValueError(
                    f"status must be ShowStatus or str, got {type(self.status)}"
                )
-    def has_file(self) -> bool:
+        if self.expected_seasons is not None and self.expected_seasons < 0:
-        """Check if the episode has an associated file."""
+            raise ValueError(
-        return self.file_path is not None and self.file_path.exists()
+                f"expected_seasons must be >= 0, got {self.expected_seasons}"
            )
-    def is_downloaded(self) -> bool:
+    # ── Production-state queries ───────────────────────────────────────────
        """Check if the episode is downloaded."""
        return self.has_file()
-    def get_filename(self) -> str:
+    def is_ongoing(self) -> bool:
        return self.status == ShowStatus.ONGOING
    def is_ended(self) -> bool:
        return self.status == ShowStatus.ENDED
    # ── Properties ─────────────────────────────────────────────────────────
    @property
    def seasons_count(self) -> int:
        """Number of seasons currently owned (any episode count, even 0)."""
        return len(self.seasons)
    @property
    def episode_count(self) -> int:
        """Total episodes owned across all seasons."""
        return sum(s.episode_count for s in self.seasons.values())
    # ── Mutation — the sole entry point for adding content ─────────────────
    def add_episode(self, episode: Episode) -> None:
        """
-        Get the suggested filename for this episode.
+        Add an episode to the appropriate season, creating the season if needed.
-        Format: "S01E01 - Episode Title.ext"
+        This is the **only** sanctioned way to add content to the aggregate —
-        Example: "S01E05 - Pilot.mkv"
+        it preserves the invariant that an episode is always reachable through
        ``show.seasons[s].episodes[e]``.
        """
-        season_str = f"S{self.season_number.value:02d}"
+        season = self.seasons.get(episode.season_number)
-        episode_str = f"E{self.episode_number.value:02d}"
+        if season is None:
            season = Season(season_number=episode.season_number)
            self.seasons[episode.season_number] = season
        season.add_episode(episode)
-        # Clean title for filename
+    def add_season(self, season: Season) -> None:
-        clean_title = re.sub(r"[^\w\s\-]", "", self.title)
+        """
-        clean_title = clean_title.replace(" ", ".")
+        Attach a (possibly already populated) Season to the show.
-        return f"{season_str}{episode_str}.{clean_title}"
+        Replaces any existing season with the same number.
        """
        self.seasons[season.season_number] = season
    # ── Collection state ───────────────────────────────────────────────────
    def collection_status(self) -> CollectionStatus:
        """
        High-level state of the user's collection for this show.
        * ``EMPTY``    — no episode owned
        * ``COMPLETE`` — every season is complete relative to its aired count
        * ``PARTIAL``  — at least one aired episode is missing
        Seasons with an unknown aired count are treated conservatively: if no
        season has any episode, the show is EMPTY; otherwise the unknown
        seasons cannot prove completeness, so the show is PARTIAL.
        """
        if self.episode_count == 0:
            return CollectionStatus.EMPTY
        # Check completeness across all seasons we know about
        for season in self.seasons.values():
            if not season.is_complete():
                return CollectionStatus.PARTIAL
        # We also need to consider whether seasons themselves are missing.
        # If expected_seasons is known and we have fewer seasons than expected,
        # the missing seasons may have aired episodes → cannot claim COMPLETE.
        if (
            self.expected_seasons is not None
            and len(self.seasons) < self.expected_seasons
        ):
            return CollectionStatus.PARTIAL
        return CollectionStatus.COMPLETE
    def is_complete_series(self) -> bool:
        """
        True if the show is finished (ENDED) **and** the collection is complete.
        This is the strongest "I own the entire series, no more to come" claim
        we can make today, before scheduling/upcoming-episode awareness lands.
        """
        return self.is_ended() and self.collection_status() == CollectionStatus.COMPLETE
    def missing_episodes(self) -> list[tuple[SeasonNumber, EpisodeNumber]]:
        """All aired-but-not-owned ``(season, episode)`` pairs across the show."""
        result: list[tuple[SeasonNumber, EpisodeNumber]] = []
        for season_number, season in sorted(
            self.seasons.items(), key=lambda kv: kv[0].value
        ):
            for ep_number in season.missing_episodes():
                result.append((season_number, ep_number))
        return result
    # ── Naming ─────────────────────────────────────────────────────────────
    def get_folder_name(self) -> str:
        """Dot-separated folder name (e.g. ``Breaking.Bad``)."""
        return to_dot_folder_name(self.title)
    def __str__(self) -> str:
-        return f"S{self.season_number.value:02d}E{self.episode_number.value:02d} - {self.title}"
+        return f"{self.title} ({self.status.value}, {self.seasons_count} seasons)"
    def __repr__(self) -> str:
-        return f"Episode(show={self.show_imdb_id}, S{self.season_number.value:02d}E{self.episode_number.value:02d})"
+        return f"TVShow(imdb_id={self.imdb_id}, title='{self.title}')"
@@ -1,126 +1,40 @@
-"""TV Show repository interfaces (abstract)."""
+"""TV Show repository interface.
 A single repository for the aggregate root only — Season and Episode are
 **inside** the TVShow aggregate and are never persisted independently. The
 aggregate is always loaded and saved as a whole.
 """
 from abc import ABC, abstractmethod
 from ..shared.value_objects import ImdbId
-from .entities import Episode, Season, TVShow
+from .entities import TVShow
 from .value_objects import EpisodeNumber, SeasonNumber
 class TVShowRepository(ABC):
    """
-    Abstract repository for TV show persistence.
+    Abstract repository for the TVShow aggregate.
-    This defines the interface that infrastructure implementations must follow.
+    Implementations are responsible for persisting the full aggregate graph
    (TVShow + all its Seasons + all their Episodes) atomically.
    """
    @abstractmethod
    def save(self, show: TVShow) -> None:
-        """
+        """Persist the full TVShow aggregate."""
        Save a TV show to the repository.
        Args:
            show: TVShow entity to save
        """
        pass
    @abstractmethod
    def find_by_imdb_id(self, imdb_id: ImdbId) -> TVShow | None:
-        """
+        """Load the full TVShow aggregate by IMDb ID, or None if absent."""
        Find a TV show by its IMDb ID.
        Args:
            imdb_id: IMDb ID to search for
        Returns:
            TVShow if found, None otherwise
        """
        pass
    @abstractmethod
    def find_all(self) -> list[TVShow]:
-        """
+        """Load all TVShow aggregates."""
        Get all TV shows in the repository.
        Returns:
            List of all TV shows
        """
        pass
    @abstractmethod
    def delete(self, imdb_id: ImdbId) -> bool:
-        """
+        """Remove the aggregate. Returns True if it existed and was deleted."""
        Delete a TV show from the repository.
        Args:
            imdb_id: IMDb ID of the show to delete
        Returns:
            True if deleted, False if not found
        """
        pass
    @abstractmethod
    def exists(self, imdb_id: ImdbId) -> bool:
-        """
+        """True if the aggregate exists in the store."""
        Check if a TV show exists in the repository.
        Args:
            imdb_id: IMDb ID to check
        Returns:
            True if exists, False otherwise
        """
        pass
 class SeasonRepository(ABC):
    """Abstract repository for season persistence."""
    @abstractmethod
    def save(self, season: Season) -> None:
        """Save a season."""
        pass
    @abstractmethod
    def find_by_show_and_number(
        self, show_imdb_id: ImdbId, season_number: SeasonNumber
    ) -> Season | None:
        """Find a season by show and season number."""
        pass
    @abstractmethod
    def find_all_by_show(self, show_imdb_id: ImdbId) -> list[Season]:
        """Get all seasons for a show."""
        pass
 class EpisodeRepository(ABC):
    """Abstract repository for episode persistence."""
    @abstractmethod
    def save(self, episode: Episode) -> None:
        """Save an episode."""
        pass
    @abstractmethod
    def find_by_show_season_episode(
        self,
        show_imdb_id: ImdbId,
        season_number: SeasonNumber,
        episode_number: EpisodeNumber,
    ) -> Episode | None:
        """Find an episode by show, season, and episode number."""
        pass
    @abstractmethod
    def find_all_by_season(
        self, show_imdb_id: ImdbId, season_number: SeasonNumber
    ) -> list[Episode]:
        """Get all episodes for a season."""
        pass
    @abstractmethod
    def find_all_by_show(self, show_imdb_id: ImdbId) -> list[Episode]:
        """Get all episodes for a show."""
        pass
@@ -1,234 +0,0 @@
 """TV Show domain services - Business logic."""
 import logging
 import re
 from ..shared.value_objects import ImdbId
 from .entities import TVShow
 from .exceptions import (
    TVShowAlreadyExists,
    TVShowNotFound,
 )
 from .repositories import EpisodeRepository, SeasonRepository, TVShowRepository
 logger = logging.getLogger(__name__)
 class TVShowService:
    """
    Domain service for TV show-related business logic.
    This service contains business rules that don't naturally fit
    within a single entity.
    """
    def __init__(
        self,
        show_repository: TVShowRepository,
        season_repository: SeasonRepository | None = None,
        episode_repository: EpisodeRepository | None = None,
    ):
        """
        Initialize TV show service.
        Args:
            show_repository: TV show repository for persistence
            season_repository: Optional season repository
            episode_repository: Optional episode repository
        """
        self.show_repository = show_repository
        self.season_repository = season_repository
        self.episode_repository = episode_repository
    def track_show(self, show: TVShow) -> None:
        """
        Start tracking a TV show.
        Args:
            show: TVShow entity to track
        Raises:
            TVShowAlreadyExists: If show is already being tracked
        """
        if self.show_repository.exists(show.imdb_id):
            raise TVShowAlreadyExists(
                f"TV show with IMDb ID {show.imdb_id} is already tracked"
            )
        self.show_repository.save(show)
        logger.info(f"Started tracking TV show: {show.title} ({show.imdb_id})")
    def get_show(self, imdb_id: ImdbId) -> TVShow:
        """
        Get a TV show by IMDb ID.
        Args:
            imdb_id: IMDb ID of the show
        Returns:
            TVShow entity
        Raises:
            TVShowNotFound: If show not found
        """
        show = self.show_repository.find_by_imdb_id(imdb_id)
        if not show:
            raise TVShowNotFound(f"TV show with IMDb ID {imdb_id} not found")
        return show
    def get_all_shows(self) -> list[TVShow]:
        """
        Get all tracked TV shows.
        Returns:
            List of all TV shows
        """
        return self.show_repository.find_all()
    def get_ongoing_shows(self) -> list[TVShow]:
        """
        Get all ongoing TV shows.
        Returns:
            List of ongoing TV shows
        """
        all_shows = self.show_repository.find_all()
        return [show for show in all_shows if show.is_ongoing()]
    def get_ended_shows(self) -> list[TVShow]:
        """
        Get all ended TV shows.
        Returns:
            List of ended TV shows
        """
        all_shows = self.show_repository.find_all()
        return [show for show in all_shows if show.is_ended()]
    def update_show(self, show: TVShow) -> None:
        """
        Update an existing TV show.
        Args:
            show: TVShow entity with updated data
        Raises:
            TVShowNotFound: If show doesn't exist
        """
        if not self.show_repository.exists(show.imdb_id):
            raise TVShowNotFound(f"TV show with IMDb ID {show.imdb_id} not found")
        self.show_repository.save(show)
        logger.info(f"Updated TV show: {show.title} ({show.imdb_id})")
    def untrack_show(self, imdb_id: ImdbId) -> None:
        """
        Stop tracking a TV show.
        Args:
            imdb_id: IMDb ID of the show to untrack
        Raises:
            TVShowNotFound: If show not found
        """
        if not self.show_repository.delete(imdb_id):
            raise TVShowNotFound(f"TV show with IMDb ID {imdb_id} not found")
        logger.info(f"Stopped tracking TV show with IMDb ID: {imdb_id}")
    def parse_episode_from_filename(self, filename: str) -> tuple[int, int] | None:
        """
        Parse season and episode numbers from filename.
        Supports formats:
        - S01E05
        - 1x05
        - Season 1 Episode 5
        Args:
            filename: Filename to parse
        Returns:
            Tuple of (season, episode) if found, None otherwise
        """
        filename_lower = filename.lower()
        # Pattern 1: S01E05
        pattern1 = r"s(\d{1,2})e(\d{1,2})"
        match = re.search(pattern1, filename_lower)
        if match:
            return (int(match.group(1)), int(match.group(2)))
        # Pattern 2: 1x05
        pattern2 = r"(\d{1,2})x(\d{1,2})"
        match = re.search(pattern2, filename_lower)
        if match:
            return (int(match.group(1)), int(match.group(2)))
        # Pattern 3: Season 1 Episode 5
        pattern3 = r"season\s*(\d{1,2})\s*episode\s*(\d{1,2})"
        match = re.search(pattern3, filename_lower)
        if match:
            return (int(match.group(1)), int(match.group(2)))
        return None
    def validate_episode_file(self, filename: str) -> bool:
        """
        Validate that a file is a valid episode file.
        Args:
            filename: Filename to validate
        Returns:
            True if valid episode file, False otherwise
        """
        # Check file extension
        valid_extensions = {".mkv", ".mp4", ".avi", ".mov", ".wmv", ".flv", ".webm"}
        extension = filename[filename.rfind(".") :].lower() if "." in filename else ""
        if extension not in valid_extensions:
            logger.warning(f"Invalid file extension: {extension}")
            return False
        # Check if we can parse episode info
        episode_info = self.parse_episode_from_filename(filename)
        if not episode_info:
            logger.warning(f"Could not parse episode info from filename: {filename}")
            return False
        return True
    def find_next_episode(
        self, show: TVShow, last_season: int, last_episode: int
    ) -> tuple[int, int] | None:
        """
        Find the next episode to download for a show.
        Args:
            show: TVShow entity
            last_season: Last downloaded season number
            last_episode: Last downloaded episode number
        Returns:
            Tuple of (season, episode) for next episode, or None if show is complete
        """
        # If show has ended and we've watched all seasons, no next episode
        if show.is_ended() and last_season >= show.seasons_count:
            return None
        # Simple logic: next episode in same season, or first episode of next season
        # This could be enhanced with actual episode counts per season
        next_episode = last_episode + 1
        next_season = last_season
        # Assume max 50 episodes per season (could be improved with actual data)
        if next_episode > 50:
            next_season += 1
            next_episode = 1
        # Don't go beyond known seasons
        if next_season > show.seasons_count:
            return None
        return (next_season, next_episode)
@@ -1,5 +1,7 @@
 """TV Show domain value objects."""
 from __future__ import annotations
 from dataclasses import dataclass
 from enum import Enum
@@ -7,28 +9,48 @@ from ..shared.exceptions import ValidationError
 class ShowStatus(Enum):
-    """Status of a TV show - whether it's still airing or has ended."""
+    """
    Production status of a TV show (real-world, source of truth = TMDB).
    Describes the **production** state of the show, independently of what
    the user owns. Orthogonal to ``CollectionStatus``.
    """
    ONGOING = "ongoing"
    ENDED = "ended"
    UNKNOWN = "unknown"
    @classmethod
-    def from_string(cls, status_str: str) -> "ShowStatus":
+    def from_string(cls, status_str: str) -> ShowStatus:
        """
-        Parse status from string.
+        Parse a production status string into a ShowStatus.
-        Args:
+        Accepts our internal vocabulary ("ongoing", "ended") as well as the
-            status_str: Status string (e.g., "ongoing", "ended")
+        statuses returned by TMDB ("Returning Series", "In Production",
        "Pilot", "Ended", "Canceled"). The mapping is intentionally binary:
-        Returns:
+          * ONGOING — any state where new episodes may still ship
-            ShowStatus enum value
+          * ENDED   — production has stopped (naturally or cancelled)
          * UNKNOWN — anything else / unrecognized
        Comparison is case-insensitive and whitespace-trimmed.
        """
        if not status_str:
            return cls.UNKNOWN
        key = status_str.strip().lower()
        status_map = {
            # Internal
            "ongoing": cls.ONGOING,
            "ended": cls.ENDED,
            # TMDB
            "returning series": cls.ONGOING,
            "in production": cls.ONGOING,
            "pilot": cls.ONGOING,
            "planned": cls.ONGOING,
            "canceled": cls.ENDED,
            "cancelled": cls.ENDED,
        }
-        return status_map.get(status_str.lower(), cls.UNKNOWN)
+        return status_map.get(key, cls.UNKNOWN)
@dataclass(frozen=True)
@@ -70,6 +92,23 @@ class SeasonNumber:
        return self.value
 class CollectionStatus(Enum):
    """
    State of the user's **collection** for a TV show (orthogonal to ShowStatus).
    Compares possessed episodes against episodes **already aired** — never
    against announced/upcoming ones. A returning show with all aired episodes
    owned is ``COMPLETE``, not ``PARTIAL``, even if more seasons are upcoming.
    Future scheduling info (upcoming seasons, next airing date) will live on
    the TVShow aggregate as separate flags, not in this enum.
    """
    EMPTY = "empty"  # 0 episode owned
    PARTIAL = "partial"  # some aired episodes are missing
    COMPLETE = "complete"  # all aired-to-date episodes are owned
@dataclass(frozen=True)
 class EpisodeNumber:
    """
@@ -1,6 +1,7 @@
 """qBittorrent Web API client."""
 import logging
 from pathlib import Path
 from typing import Any
 import requests
@@ -48,9 +49,9 @@ class QBittorrentClient:
        """
        cfg = config or settings
-        self.host = host or "http://192.168.178.47:30024"
+        self.host = host or cfg.qbittorrent_url
-        self.username = username or "admin"
+        self.username = username or cfg.qbittorrent_username
-        self.password = password or "adminadmin"
+        self.password = password or cfg.qbittorrent_password
        self.timeout = timeout or cfg.request_timeout
        self.session = requests.Session()
@@ -336,6 +337,88 @@ class QBittorrentClient:
            logger.error(f"Failed to resume torrent: {e}")
            raise
    def find_by_name(self, name: str) -> TorrentInfo | None:
        """
        Find a torrent by release folder name.
        Matching strategy (in order):
        1. Exact name match (torrent.name == name)
        2. Case-insensitive name match
        3. save_path ends with the name (folder moved but name intact)
        Args:
            name: Release folder name (e.g. "Foundation.2021.S01.1080p...")
        Returns:
            TorrentInfo if found, None otherwise
        """
        torrents = self.get_torrents()
        # 1. Exact
        for t in torrents:
            if t.name == name:
                return t
        # 2. Case-insensitive
        name_lower = name.lower()
        for t in torrents:
            if t.name.lower() == name_lower:
                return t
        # 3. save_path ends with the folder name
        for t in torrents:
            if t.save_path and Path(t.save_path).name.lower() == name_lower:
                return t
        return None
    def set_location(self, torrent_hash: str, location: str) -> bool:
        """
        Change the save path of a torrent.
        Args:
            torrent_hash: Hash of the torrent
            location: New save path (must exist on the server)
        Returns:
            True if location changed successfully
        """
        if not self._authenticated:
            self.login()
        data = {"hashes": torrent_hash, "location": location}
        try:
            self._make_request("POST", "/api/v2/torrents/setLocation", data=data)
            logger.info(f"Set location for {torrent_hash} → {location}")
            return True
        except QBittorrentAPIError as e:
            logger.error(f"Failed to set location for {torrent_hash}: {e}")
            raise
    def recheck(self, torrent_hash: str) -> bool:
        """
        Force recheck (hash verification) of a torrent.
        Args:
            torrent_hash: Hash of the torrent
        Returns:
            True if recheck triggered successfully
        """
        if not self._authenticated:
            self.login()
        data = {"hashes": torrent_hash}
        try:
            self._make_request("POST", "/api/v2/torrents/recheck", data=data)
            logger.info(f"Recheck triggered for {torrent_hash}")
            return True
        except QBittorrentAPIError as e:
            logger.error(f"Failed to recheck {torrent_hash}: {e}")
            raise
    def get_torrent_properties(self, torrent_hash: str) -> dict[str, Any]:
        """
        Get detailed properties of a torrent.
--- a/Show More
+++ b/Show More
		`@@ -0,0 +1,3 @@`
							`from .loader import WorkflowLoader`

							`__all__ = ["WorkflowLoader"]`