10 Commits

Author SHA1 Message Date
francwa 249c5de76a feat: major architectural refactor
- Refactor memory system (episodic/STM/LTM with components)
- Implement complete subtitle domain (scanner, matcher, placer)
- Add YAML workflow infrastructure
- Externalize knowledge base (patterns, release groups)
- Add comprehensive testing suite
- Create manual testing CLIs
2026-05-11 21:55:06 +02:00
francwa 62b5d0b998 Settings + fix startup 2026-04-30 12:41:42 +02:00
francwa 610dee365c mess: UV + settings KISS + fixes 2026-04-24 18:10:55 +02:00
francwa 58408d0dbe fix: fixed vectordb loneliness 2026-01-06 04:39:42 +01:00
francwa 2f1ac3c758 infra: simplified mongodb healthcheck 2026-01-06 04:36:52 +01:00
francwa d3b69f7459 feat: enabled logging for alfred(-core) 2026-01-06 04:33:59 +01:00
francwa 50c8204fa0 infra: added granular logging configuration for mongodb 2026-01-06 02:50:49 +01:00
francwa 507fe0f40e chore: updated dependencies 2026-01-06 02:41:18 +01:00
francwa b7b40eada1 fix: set proper database name for mongodb 2026-01-06 02:33:35 +01:00
francwa 9765386405 feat: named docker image to avoid docker picking the wrong one 2026-01-06 02:19:00 +01:00
124 changed files with 16617 additions and 3181 deletions
+77
View File
@@ -0,0 +1,77 @@
# --- IMPORTANT ---
# Settings are split across multiple files for clarity.
# Files (loaded in this order, last wins):
# .env.alfred — app config and service addresses (safe to commit)
# .env.secrets — generated secrets, passwords, URIs and API keys (DO NOT COMMIT)
# .env.make — build metadata synced from pyproject.toml (safe to commit)
#
# To customize: edit .env.alfred for config, .env.secrets for secrets.
# --- Alfred ---
MAX_HISTORY_MESSAGES=10
MAX_TOOL_ITERATIONS=10
REQUEST_TIMEOUT=30
# LLM Settings
LLM_TEMPERATURE=0.2
# Persistence
DATA_STORAGE_DIR=data
# Network
HOST=0.0.0.0
PORT=3080
# --- DATABASES ---
# Passwords and connection URIs are auto-generated in .env.secrets.
# Edit host/port/user/dbname here if needed.
# MongoDB (Application Data)
MONGO_HOST=mongodb
MONGO_PORT=27017
MONGO_USER=alfred
MONGO_DB_NAME=alfred
# PostgreSQL (Vector Database / RAG)
POSTGRES_HOST=vectordb
POSTGRES_PORT=5432
POSTGRES_USER=alfred
POSTGRES_DB_NAME=alfred
# --- EXTERNAL SERVICES ---
# TMDB — Media metadata (required). Get your key at https://www.themoviedb.org/
# → TMDB_API_KEY goes in .env.secrets
TMDB_BASE_URL=https://api.themoviedb.org/3
# qBittorrent
# → QBITTORRENT_PASSWORD goes in .env.secrets
QBITTORRENT_URL=http://qbittorrent:16140
QBITTORRENT_USERNAME=admin
QBITTORRENT_PORT=16140
# Meilisearch
# → MEILI_MASTER_KEY goes in .env.secrets
# MEILI_ENABLED=false # KEY DOESN'T EXISTS => SEARCH IS THE PROPER KEY
SEARCH=false
MEILI_NO_ANALYTICS=true
MEILI_HOST=http://meilisearch:7700
# --- LLM CONFIGURATION ---
# Providers: local, openai, anthropic, deepseek, google, kimi
# → API keys go in .env.secrets
DEFAULT_LLM_PROVIDER=local
# Local LLM (Ollama)
#OLLAMA_BASE_URL=http://ollama:11434
#OLLAMA_MODEL=llama3.3:latest
OLLAMA_BASE_URL=http://10.0.0.11:11434
OLLAMA_MODEL=glm-4.7-flash:latest
# --- RAG ENGINE ---
RAG_ENABLED=TRUE
RAG_API_URL=http://rag_api:8000
RAG_API_PORT=8000
EMBEDDINGS_PROVIDER=ollama
EMBEDDINGS_MODEL=nomic-embed-text
+23 -43
View File
@@ -1,3 +1,13 @@
# --- IMPORTANT ---
# Settings are split across multiple files for clarity.
# Files (loaded in this order, last wins):
# .env.alfred — app config and service addresses (safe to commit)
# .env.secrets — generated secrets, passwords, URIs and API keys (DO NOT COMMIT)
# .env.make — build metadata synced from pyproject.toml (safe to commit)
#
# To customize: edit .env.alfred for config, .env.secrets for secrets.
# --- Alfred ---
MAX_HISTORY_MESSAGES=10 MAX_HISTORY_MESSAGES=10
MAX_TOOL_ITERATIONS=10 MAX_TOOL_ITERATIONS=10
REQUEST_TIMEOUT=30 REQUEST_TIMEOUT=30
@@ -8,84 +18,54 @@ LLM_TEMPERATURE=0.2
# Persistence # Persistence
DATA_STORAGE_DIR=data DATA_STORAGE_DIR=data
# Network configuration # Network
HOST=0.0.0.0 HOST=0.0.0.0
PORT=3080 PORT=3080
# Build informations (Synced with pyproject.toml via bootstrap) # --- DATABASES ---
ALFRED_VERSION= # Passwords and connection URIs are auto-generated in .env.secrets.
IMAGE_NAME= # Edit host/port/user/dbname here if needed.
LIBRECHAT_VERSION=
PYTHON_VERSION=
PYTHON_VERSION_SHORT=
RAG_VERSION=
RUNNER=
SERVICE_NAME=
# --- SECURITY KEYS (CRITICAL) ---
# These are used for session tokens and encrypting sensitive data in MongoDB.
# If you lose these, you lose access to encrypted stored credentials.
JWT_SECRET=
JWT_REFRESH_SECRET=
CREDS_KEY=
CREDS_IV=
# --- DATABASES (AUTO-SECURED) ---
# Alfred uses MongoDB for application state and PostgreSQL for Vector RAG.
# Passwords will be generated as 24-character secure tokens if left blank.
# MongoDB (Application Data) # MongoDB (Application Data)
MONGO_URI=
MONGO_HOST=mongodb MONGO_HOST=mongodb
MONGO_PORT=27017 MONGO_PORT=27017
MONGO_USER=alfred MONGO_USER=alfred
MONGO_PASSWORD= MONGO_DB_NAME=LibreChat
MONGO_DB_NAME=alfred
# PostgreSQL (Vector Database / RAG) # PostgreSQL (Vector Database / RAG)
POSTGRES_URI=
POSTGRES_HOST=vectordb POSTGRES_HOST=vectordb
POSTGRES_PORT=5432 POSTGRES_PORT=5432
POSTGRES_USER=alfred POSTGRES_USER=alfred
POSTGRES_PASSWORD=
POSTGRES_DB_NAME=alfred POSTGRES_DB_NAME=alfred
# --- EXTERNAL SERVICES --- # --- EXTERNAL SERVICES ---
# Media Metadata (Required)
# Get your key at https://www.themoviedb.org/ # TMDB — Media metadata (required). Get your key at https://www.themoviedb.org/
TMDB_API_KEY= # → TMDB_API_KEY goes in .env.secrets
TMDB_BASE_URL=https://api.themoviedb.org/3 TMDB_BASE_URL=https://api.themoviedb.org/3
# qBittorrent integration # qBittorrent
# → QBITTORRENT_PASSWORD goes in .env.secrets
QBITTORRENT_URL=http://qbittorrent:16140 QBITTORRENT_URL=http://qbittorrent:16140
QBITTORRENT_USERNAME=admin QBITTORRENT_USERNAME=admin
QBITTORRENT_PASSWORD=
QBITTORRENT_PORT=16140 QBITTORRENT_PORT=16140
# Meilisearch # Meilisearch
# → MEILI_MASTER_KEY goes in .env.secrets
MEILI_ENABLED=FALSE MEILI_ENABLED=FALSE
MEILI_NO_ANALYTICS=TRUE MEILI_NO_ANALYTICS=TRUE
MEILI_HOST=http://meilisearch:7700 MEILI_HOST=http://meilisearch:7700
MEILI_MASTER_KEY=
# --- LLM CONFIGURATION --- # --- LLM CONFIGURATION ---
# Providers: 'local', 'openai', 'anthropic', 'deepseek', 'google', 'kimi' # Providers: local, openai, anthropic, deepseek, google, kimi
# → API keys go in .env.secrets
DEFAULT_LLM_PROVIDER=local DEFAULT_LLM_PROVIDER=local
# Local LLM (Ollama) # Local LLM (Ollama)
OLLAMA_BASE_URL=http://ollama:11434 OLLAMA_BASE_URL=http://ollama:11434
OLLAMA_MODEL=llama3.3:latest OLLAMA_MODEL=llama3.3:latest
# --- API KEYS (OPTIONAL) ---
# Fill only the ones you intend to use.
ANTHROPIC_API_KEY=
DEEPSEEK_API_KEY=
GOOGLE_API_KEY=
KIMI_API_KEY=
OPENAI_API_KEY=
# --- RAG ENGINE --- # --- RAG ENGINE ---
# Enable/Disable the Retrieval Augmented Generation system
RAG_ENABLED=TRUE RAG_ENABLED=TRUE
RAG_API_URL=http://rag_api:8000 RAG_API_URL=http://rag_api:8000
RAG_API_PORT=8000 RAG_API_PORT=8000
+878
View File
@@ -0,0 +1,878 @@
#=====================================================================#
# LibreChat Configuration #
#=====================================================================#
# Please refer to the reference documentation for assistance #
# with configuring your LibreChat environment. #
# #
# https://www.librechat.ai/docs/configuration/dotenv #
#=====================================================================#
#==================================================#
# Server Configuration #
#==================================================#
HOST=localhost
PORT=3080
MONGO_URI=mongodb://127.0.0.1:27017/LibreChat
#The maximum number of connections in the connection pool. */
MONGO_MAX_POOL_SIZE=
#The minimum number of connections in the connection pool. */
MONGO_MIN_POOL_SIZE=
#The maximum number of connections that may be in the process of being established concurrently by the connection pool. */
MONGO_MAX_CONNECTING=
#The maximum number of milliseconds that a connection can remain idle in the pool before being removed and closed. */
MONGO_MAX_IDLE_TIME_MS=
#The maximum time in milliseconds that a thread can wait for a connection to become available. */
MONGO_WAIT_QUEUE_TIMEOUT_MS=
# Set to false to disable automatic index creation for all models associated with this connection. */
MONGO_AUTO_INDEX=
# Set to `false` to disable Mongoose automatically calling `createCollection()` on every model created on this connection. */
MONGO_AUTO_CREATE=
DOMAIN_CLIENT=http://localhost:3080
DOMAIN_SERVER=http://localhost:3080
NO_INDEX=true
# Use the address that is at most n number of hops away from the Express application.
# req.socket.remoteAddress is the first hop, and the rest are looked for in the X-Forwarded-For header from right to left.
# A value of 0 means that the first untrusted address would be req.socket.remoteAddress, i.e. there is no reverse proxy.
# Defaulted to 1.
TRUST_PROXY=1
# Minimum password length for user authentication
# Default: 8
# Note: When using LDAP authentication, you may want to set this to 1
# to bypass local password validation, as LDAP servers handle their own
# password policies.
# MIN_PASSWORD_LENGTH=8
# When enabled, the app will continue running after encountering uncaught exceptions
# instead of exiting the process. Not recommended for production unless necessary.
# CONTINUE_ON_UNCAUGHT_EXCEPTION=false
#===============#
# JSON Logging #
#===============#
# Use when process console logs in cloud deployment like GCP/AWS
CONSOLE_JSON=false
#===============#
# Debug Logging #
#===============#
DEBUG_LOGGING=true
DEBUG_CONSOLE=false
# Set to true to enable agent debug logging
AGENT_DEBUG_LOGGING=false
# Enable memory diagnostics (logs heap/RSS snapshots every 60s, auto-enabled with --inspect)
# MEM_DIAG=true
#=============#
# Permissions #
#=============#
# UID=1000
# GID=1000
#==============#
# Node Options #
#==============#
# NOTE: NODE_MAX_OLD_SPACE_SIZE is NOT recognized by Node.js directly.
# This variable is used as a build argument for Docker or CI/CD workflows,
# and is NOT used by Node.js to set the heap size at runtime.
# To configure Node.js memory, use NODE_OPTIONS, e.g.:
# NODE_OPTIONS="--max-old-space-size=6144"
# See: https://nodejs.org/api/cli.html#--max-old-space-sizesize-in-mib
NODE_MAX_OLD_SPACE_SIZE=6144
#===============#
# Configuration #
#===============#
# Use an absolute path, a relative path, or a URL
# CONFIG_PATH="/alternative/path/to/librechat.yaml"
#==================#
# Langfuse Tracing #
#==================#
# Get Langfuse API keys for your project from the project settings page: https://cloud.langfuse.com
# LANGFUSE_PUBLIC_KEY=
# LANGFUSE_SECRET_KEY=
# LANGFUSE_BASE_URL=
#===================================================#
# Endpoints #
#===================================================#
# ENDPOINTS=openAI,assistants,azureOpenAI,google,anthropic
PROXY=
#===================================#
# Known Endpoints - librechat.yaml #
#===================================#
# https://www.librechat.ai/docs/configuration/librechat_yaml/ai_endpoints
# ANYSCALE_API_KEY=
# APIPIE_API_KEY=
# COHERE_API_KEY=
# DEEPSEEK_API_KEY=
# DATABRICKS_API_KEY=
# FIREWORKS_API_KEY=
# GROQ_API_KEY=
# HUGGINGFACE_TOKEN=
# MISTRAL_API_KEY=
# OPENROUTER_KEY=
# PERPLEXITY_API_KEY=
# SHUTTLEAI_API_KEY=
# TOGETHERAI_API_KEY=
# UNIFY_API_KEY=
# XAI_API_KEY=
#============#
# Anthropic #
#============#
ANTHROPIC_API_KEY=user_provided
# ANTHROPIC_MODELS=claude-sonnet-4-6,claude-opus-4-6,claude-opus-4-20250514,claude-sonnet-4-20250514,claude-3-7-sonnet-20250219,claude-3-5-sonnet-20241022,claude-3-5-haiku-20241022,claude-3-opus-20240229,claude-3-sonnet-20240229,claude-3-haiku-20240307
# ANTHROPIC_REVERSE_PROXY=
# Set to true to use Anthropic models through Google Vertex AI instead of direct API
# ANTHROPIC_USE_VERTEX=
# ANTHROPIC_VERTEX_REGION=us-east5
#============#
# Azure #
#============#
# Note: these variables are DEPRECATED
# Use the `librechat.yaml` configuration for `azureOpenAI` instead
# You may also continue to use them if you opt out of using the `librechat.yaml` configuration
# AZURE_OPENAI_DEFAULT_MODEL=gpt-3.5-turbo # Deprecated
# AZURE_OPENAI_MODELS=gpt-3.5-turbo,gpt-4 # Deprecated
# AZURE_USE_MODEL_AS_DEPLOYMENT_NAME=TRUE # Deprecated
# AZURE_API_KEY= # Deprecated
# AZURE_OPENAI_API_INSTANCE_NAME= # Deprecated
# AZURE_OPENAI_API_DEPLOYMENT_NAME= # Deprecated
# AZURE_OPENAI_API_VERSION= # Deprecated
# AZURE_OPENAI_API_COMPLETIONS_DEPLOYMENT_NAME= # Deprecated
# AZURE_OPENAI_API_EMBEDDINGS_DEPLOYMENT_NAME= # Deprecated
#=================#
# AWS Bedrock #
#=================#
# BEDROCK_AWS_DEFAULT_REGION=us-east-1 # A default region must be provided
# BEDROCK_AWS_ACCESS_KEY_ID=someAccessKey
# BEDROCK_AWS_SECRET_ACCESS_KEY=someSecretAccessKey
# BEDROCK_AWS_SESSION_TOKEN=someSessionToken
# Note: This example list is not meant to be exhaustive. If omitted, all known, supported model IDs will be included for you.
# BEDROCK_AWS_MODELS=anthropic.claude-sonnet-4-6,anthropic.claude-opus-4-6-v1,anthropic.claude-3-5-sonnet-20240620-v1:0,meta.llama3-1-8b-instruct-v1:0
# Cross-region inference model IDs: us.anthropic.claude-sonnet-4-6,us.anthropic.claude-opus-4-6-v1,global.anthropic.claude-opus-4-6-v1
# See all Bedrock model IDs here: https://docs.aws.amazon.com/bedrock/latest/userguide/model-ids.html#model-ids-arns
# Notes on specific models:
# The following models are not support due to not supporting streaming:
# ai21.j2-mid-v1
# The following models are not support due to not supporting conversation history:
# ai21.j2-ultra-v1, cohere.command-text-v14, cohere.command-light-text-v14
#============#
# Google #
#============#
GOOGLE_KEY=user_provided
# GOOGLE_REVERSE_PROXY=
# Some reverse proxies do not support the X-goog-api-key header, uncomment to pass the API key in Authorization header instead.
# GOOGLE_AUTH_HEADER=true
# Gemini API (AI Studio)
# GOOGLE_MODELS=gemini-3.1-pro-preview,gemini-3.1-pro-preview-customtools,gemini-3.1-flash-lite-preview,gemini-2.5-pro,gemini-2.5-flash,gemini-2.5-flash-lite,gemini-2.0-flash,gemini-2.0-flash-lite
# Vertex AI
# GOOGLE_MODELS=gemini-3.1-pro-preview,gemini-3.1-pro-preview-customtools,gemini-3.1-flash-lite-preview,gemini-2.5-pro,gemini-2.5-flash,gemini-2.5-flash-lite,gemini-2.0-flash-001,gemini-2.0-flash-lite-001
# GOOGLE_TITLE_MODEL=gemini-2.0-flash-lite-001
# Google Cloud region for Vertex AI (used by both chat and image generation)
# GOOGLE_LOC=us-central1
# Alternative region env var for Gemini Image Generation
# GOOGLE_CLOUD_LOCATION=global
# Vertex AI Service Account Configuration
# Path to your Google Cloud service account JSON file
# GOOGLE_SERVICE_KEY_FILE=/path/to/service-account.json
# Google Safety Settings
# NOTE: These settings apply to both Vertex AI and Gemini API (AI Studio)
#
# For Vertex AI:
# To use the BLOCK_NONE setting, you need either:
# (a) Access through an allowlist via your Google account team, or
# (b) Switch to monthly invoiced billing: https://cloud.google.com/billing/docs/how-to/invoiced-billing
#
# For Gemini API (AI Studio):
# BLOCK_NONE is available by default, no special account requirements.
#
# Available options: BLOCK_NONE, BLOCK_ONLY_HIGH, BLOCK_MEDIUM_AND_ABOVE, BLOCK_LOW_AND_ABOVE
#
# GOOGLE_SAFETY_SEXUALLY_EXPLICIT=BLOCK_ONLY_HIGH
# GOOGLE_SAFETY_HATE_SPEECH=BLOCK_ONLY_HIGH
# GOOGLE_SAFETY_HARASSMENT=BLOCK_ONLY_HIGH
# GOOGLE_SAFETY_DANGEROUS_CONTENT=BLOCK_ONLY_HIGH
# GOOGLE_SAFETY_CIVIC_INTEGRITY=BLOCK_ONLY_HIGH
#========================#
# Gemini Image Generation #
#========================#
# Gemini Image Generation Tool (for Agents)
# Supports multiple authentication methods in priority order:
# 1. User-provided API key (via GUI)
# 2. GEMINI_API_KEY env var (admin-configured)
# 3. GOOGLE_KEY env var (shared with Google chat endpoint)
# 4. Vertex AI service account (via GOOGLE_SERVICE_KEY_FILE)
# Option A: Use dedicated Gemini API key for image generation
# GEMINI_API_KEY=your-gemini-api-key
# Vertex AI model for image generation (defaults to gemini-2.5-flash-image)
# GEMINI_IMAGE_MODEL=gemini-2.5-flash-image
#============#
# OpenAI #
#============#
OPENAI_API_KEY=user_provided
# OPENAI_MODELS=gpt-5,gpt-5-codex,gpt-5-mini,gpt-5-nano,o3-pro,o3,o4-mini,gpt-4.1,gpt-4.1-mini,gpt-4.1-nano,o3-mini,o1-pro,o1,gpt-4o,gpt-4o-mini
DEBUG_OPENAI=false
# TITLE_CONVO=false
# OPENAI_TITLE_MODEL=gpt-4o-mini
# OPENAI_SUMMARIZE=true
# OPENAI_SUMMARY_MODEL=gpt-4o-mini
# OPENAI_FORCE_PROMPT=true
# OPENAI_REVERSE_PROXY=
# OPENAI_ORGANIZATION=
#====================#
# Assistants API #
#====================#
ASSISTANTS_API_KEY=user_provided
# ASSISTANTS_BASE_URL=
# ASSISTANTS_MODELS=gpt-4o,gpt-4o-mini,gpt-3.5-turbo-0125,gpt-3.5-turbo-16k-0613,gpt-3.5-turbo-16k,gpt-3.5-turbo,gpt-4,gpt-4-0314,gpt-4-32k-0314,gpt-4-0613,gpt-3.5-turbo-0613,gpt-3.5-turbo-1106,gpt-4-0125-preview,gpt-4-turbo-preview,gpt-4-1106-preview
#==========================#
# Azure Assistants API #
#==========================#
# Note: You should map your credentials with custom variables according to your Azure OpenAI Configuration
# The models for Azure Assistants are also determined by your Azure OpenAI configuration.
# More info, including how to enable use of Assistants with Azure here:
# https://www.librechat.ai/docs/configuration/librechat_yaml/ai_endpoints/azure#using-assistants-with-azure
CREDS_KEY=f34be427ebb29de8d88c107a71546019685ed8b241d8f2ed00c3df97ad2566f0
CREDS_IV=e2341419ec3dd3d19b13a1a87fafcbfb
# Azure AI Search
#-----------------
AZURE_AI_SEARCH_SERVICE_ENDPOINT=
AZURE_AI_SEARCH_INDEX_NAME=
AZURE_AI_SEARCH_API_KEY=
AZURE_AI_SEARCH_API_VERSION=
AZURE_AI_SEARCH_SEARCH_OPTION_QUERY_TYPE=
AZURE_AI_SEARCH_SEARCH_OPTION_TOP=
AZURE_AI_SEARCH_SEARCH_OPTION_SELECT=
# OpenAI Image Tools Customization
#----------------
# IMAGE_GEN_OAI_API_KEY= # Create or reuse OpenAI API key for image generation tool
# IMAGE_GEN_OAI_BASEURL= # Custom OpenAI base URL for image generation tool
# IMAGE_GEN_OAI_AZURE_API_VERSION= # Custom Azure OpenAI deployments
# IMAGE_GEN_OAI_MODEL=gpt-image-1 # OpenAI image model (e.g., gpt-image-1, gpt-image-1.5)
# IMAGE_GEN_OAI_DESCRIPTION=
# IMAGE_GEN_OAI_DESCRIPTION_WITH_FILES=Custom description for image generation tool when files are present
# IMAGE_GEN_OAI_DESCRIPTION_NO_FILES=Custom description for image generation tool when no files are present
# IMAGE_EDIT_OAI_DESCRIPTION=Custom description for image editing tool
# IMAGE_GEN_OAI_PROMPT_DESCRIPTION=Custom prompt description for image generation tool
# IMAGE_EDIT_OAI_PROMPT_DESCRIPTION=Custom prompt description for image editing tool
# DALL·E
#----------------
# DALLE_API_KEY=
# DALLE3_API_KEY=
# DALLE2_API_KEY=
# DALLE3_SYSTEM_PROMPT=
# DALLE2_SYSTEM_PROMPT=
# DALLE_REVERSE_PROXY=
# DALLE3_BASEURL=
# DALLE2_BASEURL=
# DALL·E (via Azure OpenAI)
# Note: requires some of the variables above to be set
#----------------
# DALLE3_AZURE_API_VERSION=
# DALLE2_AZURE_API_VERSION=
# Flux
#-----------------
FLUX_API_BASE_URL=https://api.us1.bfl.ai
# FLUX_API_BASE_URL = 'https://api.bfl.ml';
# Get your API key at https://api.us1.bfl.ai/auth/profile
# FLUX_API_KEY=
# Google
#-----------------
GOOGLE_SEARCH_API_KEY=
GOOGLE_CSE_ID=
# Stable Diffusion
#-----------------
SD_WEBUI_URL=http://host.docker.internal:7860
# Tavily
#-----------------
TAVILY_API_KEY=
# Traversaal
#-----------------
TRAVERSAAL_API_KEY=
# WolframAlpha
#-----------------
WOLFRAM_APP_ID=
# Zapier
#-----------------
ZAPIER_NLA_API_KEY=
#==================================================#
# Search #
#==================================================#
SEARCH=true
MEILI_NO_ANALYTICS=true
MEILI_HOST=http://0.0.0.0:7700
MEILI_MASTER_KEY=DrhYf7zENyR6AlUCKmnz0eYASOQdl6zxH7s7MKFSfFCt
# Optional: Disable indexing, useful in a multi-node setup
# where only one instance should perform an index sync.
# MEILI_NO_SYNC=true
#==================================================#
# Speech to Text & Text to Speech #
#==================================================#
STT_API_KEY=
TTS_API_KEY=
#==================================================#
# RAG #
#==================================================#
# More info: https://www.librechat.ai/docs/configuration/rag_api
# RAG_OPENAI_BASEURL=
# RAG_OPENAI_API_KEY=
# RAG_USE_FULL_CONTEXT=
# EMBEDDINGS_PROVIDER=openai
# EMBEDDINGS_MODEL=text-embedding-3-small
#===================================================#
# User System #
#===================================================#
#========================#
# Moderation #
#========================#
OPENAI_MODERATION=false
OPENAI_MODERATION_API_KEY=
# OPENAI_MODERATION_REVERSE_PROXY=
BAN_VIOLATIONS=true
BAN_DURATION=1000 * 60 * 60 * 2
BAN_INTERVAL=20
LOGIN_VIOLATION_SCORE=1
REGISTRATION_VIOLATION_SCORE=1
CONCURRENT_VIOLATION_SCORE=1
MESSAGE_VIOLATION_SCORE=1
NON_BROWSER_VIOLATION_SCORE=20
TTS_VIOLATION_SCORE=0
STT_VIOLATION_SCORE=0
FORK_VIOLATION_SCORE=0
IMPORT_VIOLATION_SCORE=0
FILE_UPLOAD_VIOLATION_SCORE=0
LOGIN_MAX=7
LOGIN_WINDOW=5
REGISTER_MAX=5
REGISTER_WINDOW=60
LIMIT_CONCURRENT_MESSAGES=true
CONCURRENT_MESSAGE_MAX=2
LIMIT_MESSAGE_IP=true
MESSAGE_IP_MAX=40
MESSAGE_IP_WINDOW=1
LIMIT_MESSAGE_USER=false
MESSAGE_USER_MAX=40
MESSAGE_USER_WINDOW=1
ILLEGAL_MODEL_REQ_SCORE=5
#========================#
# Balance #
#========================#
# CHECK_BALANCE=false
# START_BALANCE=20000 # note: the number of tokens that will be credited after registration.
#========================#
# Registration and Login #
#========================#
ALLOW_EMAIL_LOGIN=true
ALLOW_REGISTRATION=true
ALLOW_SOCIAL_LOGIN=false
ALLOW_SOCIAL_REGISTRATION=false
ALLOW_PASSWORD_RESET=false
# ALLOW_ACCOUNT_DELETION=true # note: enabled by default if omitted/commented out
ALLOW_UNVERIFIED_EMAIL_LOGIN=true
SESSION_EXPIRY=1000 * 60 * 15
REFRESH_TOKEN_EXPIRY=(1000 * 60 * 60 * 24) * 7
JWT_SECRET=16f8c0ef4a5d391b26034086c628469d3f9f497f08163ab9b40137092f2909ef
JWT_REFRESH_SECRET=eaa5191f2914e30b9387fd84e254e4ba6fc51b4654968a9b0803b456a54b8418
# Discord
DISCORD_CLIENT_ID=
DISCORD_CLIENT_SECRET=
DISCORD_CALLBACK_URL=/oauth/discord/callback
# Facebook
FACEBOOK_CLIENT_ID=
FACEBOOK_CLIENT_SECRET=
FACEBOOK_CALLBACK_URL=/oauth/facebook/callback
# GitHub
GITHUB_CLIENT_ID=
GITHUB_CLIENT_SECRET=
GITHUB_CALLBACK_URL=/oauth/github/callback
# GitHub Enterprise
# GITHUB_ENTERPRISE_BASE_URL=
# GITHUB_ENTERPRISE_USER_AGENT=
# Google
GOOGLE_CLIENT_ID=
GOOGLE_CLIENT_SECRET=
GOOGLE_CALLBACK_URL=/oauth/google/callback
# Apple
APPLE_CLIENT_ID=
APPLE_TEAM_ID=
APPLE_KEY_ID=
APPLE_PRIVATE_KEY_PATH=
APPLE_CALLBACK_URL=/oauth/apple/callback
# OpenID
OPENID_CLIENT_ID=
OPENID_CLIENT_SECRET=
OPENID_ISSUER=
OPENID_SESSION_SECRET=
OPENID_SCOPE="openid profile email"
OPENID_CALLBACK_URL=/oauth/openid/callback
OPENID_REQUIRED_ROLE=
OPENID_REQUIRED_ROLE_TOKEN_KIND=
OPENID_REQUIRED_ROLE_PARAMETER_PATH=
OPENID_ADMIN_ROLE=
OPENID_ADMIN_ROLE_PARAMETER_PATH=
OPENID_ADMIN_ROLE_TOKEN_KIND=
# Set to determine which user info property returned from OpenID Provider to store as the User's username
OPENID_USERNAME_CLAIM=
# Set to determine which user info property returned from OpenID Provider to store as the User's name
OPENID_NAME_CLAIM=
# Set to determine which user info claim to use as the email/identifier for user matching (e.g., "upn" for Entra ID)
# When not set, defaults to: email -> preferred_username -> upn
OPENID_EMAIL_CLAIM=
# Optional audience parameter for OpenID authorization requests
OPENID_AUDIENCE=
OPENID_BUTTON_LABEL=
OPENID_IMAGE_URL=
# Set to true to automatically redirect to the OpenID provider when a user visits the login page
# This will bypass the login form completely for users, only use this if OpenID is your only authentication method
OPENID_AUTO_REDIRECT=false
# Set to true to use PKCE (Proof Key for Code Exchange) for OpenID authentication
OPENID_USE_PKCE=false
#Set to true to reuse openid tokens for authentication management instead of using the mongodb session and the custom refresh token.
OPENID_REUSE_TOKENS=
#By default, signing key verification results are cached in order to prevent excessive HTTP requests to the JWKS endpoint.
#If a signing key matching the kid is found, this will be cached and the next time this kid is requested the signing key will be served from the cache.
#Default is true.
OPENID_JWKS_URL_CACHE_ENABLED=
OPENID_JWKS_URL_CACHE_TIME= # 600000 ms eq to 10 minutes leave empty to disable caching
#Set to true to trigger token exchange flow to acquire access token for the userinfo endpoint.
OPENID_ON_BEHALF_FLOW_FOR_USERINFO_REQUIRED=
OPENID_ON_BEHALF_FLOW_USERINFO_SCOPE="user.read" # example for Scope Needed for Microsoft Graph API
# Set to true to use the OpenID Connect end session endpoint for logout
OPENID_USE_END_SESSION_ENDPOINT=
# URL to redirect to after OpenID logout (defaults to ${DOMAIN_CLIENT}/login)
OPENID_POST_LOGOUT_REDIRECT_URI=
# Maximum logout URL length before using logout_hint instead of id_token_hint (default: 2000)
OPENID_MAX_LOGOUT_URL_LENGTH=
#========================#
# SharePoint Integration #
#========================#
# Requires Entra ID (OpenID) authentication to be configured
# Enable SharePoint file picker in chat and agent panels
# ENABLE_SHAREPOINT_FILEPICKER=true
# SharePoint tenant base URL (e.g., https://yourtenant.sharepoint.com)
# SHAREPOINT_BASE_URL=https://yourtenant.sharepoint.com
# Microsoft Graph API And SharePoint scopes for file picker
# SHAREPOINT_PICKER_SHAREPOINT_SCOPE==https://yourtenant.sharepoint.com/AllSites.Read
# SHAREPOINT_PICKER_GRAPH_SCOPE=Files.Read.All
#========================#
# SAML
# Note: If OpenID is enabled, SAML authentication will be automatically disabled.
SAML_ENTRY_POINT=
SAML_ISSUER=
SAML_CERT=
SAML_CALLBACK_URL=/oauth/saml/callback
SAML_SESSION_SECRET=
# Attribute mappings (optional)
SAML_EMAIL_CLAIM=
SAML_USERNAME_CLAIM=
SAML_GIVEN_NAME_CLAIM=
SAML_FAMILY_NAME_CLAIM=
SAML_PICTURE_CLAIM=
SAML_NAME_CLAIM=
# Logint buttion settings (optional)
SAML_BUTTON_LABEL=
SAML_IMAGE_URL=
# Whether the SAML Response should be signed.
# - If "true", the entire `SAML Response` will be signed.
# - If "false" or unset, only the `SAML Assertion` will be signed (default behavior).
# SAML_USE_AUTHN_RESPONSE_SIGNED=
#===============================================#
# Microsoft Graph API / Entra ID Integration #
#===============================================#
# Enable Entra ID people search integration in permissions/sharing system
# When enabled, the people picker will search both local database and Entra ID
USE_ENTRA_ID_FOR_PEOPLE_SEARCH=false
# When enabled, entra id groups owners will be considered as members of the group
ENTRA_ID_INCLUDE_OWNERS_AS_MEMBERS=false
# Microsoft Graph API scopes needed for people/group search
# Default scopes provide access to user profiles and group memberships
OPENID_GRAPH_SCOPES=User.Read,People.Read,GroupMember.Read.All
# LDAP
LDAP_URL=
LDAP_BIND_DN=
LDAP_BIND_CREDENTIALS=
LDAP_USER_SEARCH_BASE=
#LDAP_SEARCH_FILTER="mail="
LDAP_CA_CERT_PATH=
# LDAP_TLS_REJECT_UNAUTHORIZED=
# LDAP_STARTTLS=
# LDAP_LOGIN_USES_USERNAME=true
# LDAP_ID=
# LDAP_USERNAME=
# LDAP_EMAIL=
# LDAP_FULL_NAME=
#========================#
# Email Password Reset #
#========================#
EMAIL_SERVICE=
EMAIL_HOST=
EMAIL_PORT=25
EMAIL_ENCRYPTION=
EMAIL_ENCRYPTION_HOSTNAME=
EMAIL_ALLOW_SELFSIGNED=
# Leave both empty for SMTP servers that do not require authentication
EMAIL_USERNAME=
EMAIL_PASSWORD=
EMAIL_FROM_NAME=
EMAIL_FROM=noreply@librechat.ai
#========================#
# Mailgun API #
#========================#
# MAILGUN_API_KEY=your-mailgun-api-key
# MAILGUN_DOMAIN=mg.yourdomain.com
# EMAIL_FROM=noreply@yourdomain.com
# EMAIL_FROM_NAME="LibreChat"
# # Optional: For EU region
# MAILGUN_HOST=https://api.eu.mailgun.net
#========================#
# Firebase CDN #
#========================#
FIREBASE_API_KEY=
FIREBASE_AUTH_DOMAIN=
FIREBASE_PROJECT_ID=
FIREBASE_STORAGE_BUCKET=
FIREBASE_MESSAGING_SENDER_ID=
FIREBASE_APP_ID=
#========================#
# S3 AWS Bucket #
#========================#
AWS_ENDPOINT_URL=
AWS_ACCESS_KEY_ID=
AWS_SECRET_ACCESS_KEY=
AWS_REGION=
AWS_BUCKET_NAME=
# Required for path-style S3-compatible providers (MinIO, Hetzner, Backblaze B2, etc.)
# that don't support virtual-hosted-style URLs (bucket.endpoint). Not needed for AWS S3.
# AWS_FORCE_PATH_STYLE=false
#========================#
# Azure Blob Storage #
#========================#
AZURE_STORAGE_CONNECTION_STRING=
AZURE_STORAGE_PUBLIC_ACCESS=false
AZURE_CONTAINER_NAME=files
#========================#
# Shared Links #
#========================#
ALLOW_SHARED_LINKS=true
# Allows unauthenticated access to shared links. Defaults to false (auth required) if not set.
ALLOW_SHARED_LINKS_PUBLIC=false
#==============================#
# Static File Cache Control #
#==============================#
# Leave commented out to use defaults: 1 day (86400 seconds) for s-maxage and 2 days (172800 seconds) for max-age
# NODE_ENV must be set to production for these to take effect
# STATIC_CACHE_MAX_AGE=172800
# STATIC_CACHE_S_MAX_AGE=86400
# If you have another service in front of your LibreChat doing compression, disable express based compression here
# DISABLE_COMPRESSION=true
# If you have gzipped version of uploaded image images in the same folder, this will enable gzip scan and serving of these images
# Note: The images folder will be scanned on startup and a ma kept in memory. Be careful for large number of images.
# ENABLE_IMAGE_OUTPUT_GZIP_SCAN=true
#===================================================#
# UI #
#===================================================#
APP_TITLE=LibreChat
# CUSTOM_FOOTER="My custom footer"
HELP_AND_FAQ_URL=https://librechat.ai
# SHOW_BIRTHDAY_ICON=true
# Google tag manager id
#ANALYTICS_GTM_ID=user provided google tag manager id
# limit conversation file imports to a certain number of bytes in size to avoid the container
# maxing out memory limitations by unremarking this line and supplying a file size in bytes
# such as the below example of 250 mib
# CONVERSATION_IMPORT_MAX_FILE_SIZE_BYTES=262144000
#===============#
# REDIS Options #
#===============#
# Enable Redis for caching and session storage
# USE_REDIS=true
# Enable Redis for resumable LLM streams (defaults to USE_REDIS value if not set)
# Set to false to use in-memory storage for streams while keeping Redis for other caches
# USE_REDIS_STREAMS=true
# Single Redis instance
# REDIS_URI=redis://127.0.0.1:6379
# Redis cluster (multiple nodes)
# REDIS_URI=redis://127.0.0.1:7001,redis://127.0.0.1:7002,redis://127.0.0.1:7003
# Redis with TLS/SSL encryption and CA certificate
# REDIS_URI=rediss://127.0.0.1:6380
# REDIS_CA=/path/to/ca-cert.pem
# Elasticache may need to use an alternate dnsLookup for TLS connections. see "Special Note: Aws Elasticache Clusters with TLS" on this webpage: https://www.npmjs.com/package/ioredis
# Enable alternative dnsLookup for redis
# REDIS_USE_ALTERNATIVE_DNS_LOOKUP=true
# Redis authentication (if required)
# REDIS_USERNAME=your_redis_username
# REDIS_PASSWORD=your_redis_password
# Redis key prefix configuration
# Use environment variable name for dynamic prefix (recommended for cloud deployments)
# REDIS_KEY_PREFIX_VAR=K_REVISION
# Or use static prefix directly
# REDIS_KEY_PREFIX=librechat
# Redis connection limits
# REDIS_MAX_LISTENERS=40
# Redis ping interval in seconds (0 = disabled, >0 = enabled)
# When set to a positive integer, Redis clients will ping the server at this interval to keep connections alive
# When unset or 0, no pinging is performed (recommended for most use cases)
# REDIS_PING_INTERVAL=300
# Force specific cache namespaces to use in-memory storage even when Redis is enabled
# Comma-separated list of CacheKeys
# Defaults to CONFIG_STORE,APP_CONFIG so YAML-derived config stays per-container (safe for blue/green deployments)
# Set to empty string to force all namespaces through Redis: FORCED_IN_MEMORY_CACHE_NAMESPACES=
# FORCED_IN_MEMORY_CACHE_NAMESPACES=CONFIG_STORE,APP_CONFIG
# Leader Election Configuration (for multi-instance deployments with Redis)
# Duration in seconds that the leader lease is valid before it expires (default: 25)
# LEADER_LEASE_DURATION=25
# Interval in seconds at which the leader renews its lease (default: 10)
# LEADER_RENEW_INTERVAL=10
# Maximum number of retry attempts when renewing the lease fails (default: 3)
# LEADER_RENEW_ATTEMPTS=3
# Delay in seconds between retry attempts when renewing the lease (default: 0.5)
# LEADER_RENEW_RETRY_DELAY=0.5
#==================================================#
# Others #
#==================================================#
# You should leave the following commented out #
# NODE_ENV=
# E2E_USER_EMAIL=
# E2E_USER_PASSWORD=
#=====================================================#
# Cache Headers #
#=====================================================#
# Headers that control caching of the index.html #
# Default configuration prevents caching to ensure #
# users always get the latest version. Customize #
# only if you understand caching implications. #
# INDEX_CACHE_CONTROL=no-cache, no-store, must-revalidate
# INDEX_PRAGMA=no-cache
# INDEX_EXPIRES=0
# no-cache: Forces validation with server before using cached version
# no-store: Prevents storing the response entirely
# must-revalidate: Prevents using stale content when offline
#=====================================================#
# OpenWeather #
#=====================================================#
OPENWEATHER_API_KEY=
#====================================#
# LibreChat Code Interpreter API #
#====================================#
# https://code.librechat.ai
# LIBRECHAT_CODE_API_KEY=your-key
#======================#
# Web Search #
#======================#
# Note: All of the following variable names can be customized.
# Omit values to allow user to provide them.
# For more information on configuration values, see:
# https://librechat.ai/docs/features/web_search
# Search Provider (Required)
# SERPER_API_KEY=your_serper_api_key
# Scraper (Required)
# FIRECRAWL_API_KEY=your_firecrawl_api_key
# Optional: Custom Firecrawl API URL
# FIRECRAWL_API_URL=your_firecrawl_api_url
# Reranker (Required)
# JINA_API_KEY=your_jina_api_key
# or
# COHERE_API_KEY=your_cohere_api_key
#======================#
# MCP Configuration #
#======================#
# Treat 401/403 responses as OAuth requirement when no oauth metadata found
# MCP_OAUTH_ON_AUTH_ERROR=true
# Timeout for OAuth detection requests in milliseconds
# MCP_OAUTH_DETECTION_TIMEOUT=5000
# Cache connection status checks for this many milliseconds to avoid expensive verification
# MCP_CONNECTION_CHECK_TTL=60000
# Skip code challenge method validation (e.g., for AWS Cognito that supports S256 but doesn't advertise it)
# When set to true, forces S256 code challenge even if not advertised in .well-known/openid-configuration
# MCP_SKIP_CODE_CHALLENGE_CHECK=false
# Circuit breaker: max connect/disconnect cycles before tripping (per server)
# MCP_CB_MAX_CYCLES=7
# Circuit breaker: sliding window (ms) for counting cycles
# MCP_CB_CYCLE_WINDOW_MS=45000
# Circuit breaker: cooldown (ms) after the cycle breaker trips
# MCP_CB_CYCLE_COOLDOWN_MS=15000
# Circuit breaker: max consecutive failed connection rounds before backoff
# MCP_CB_MAX_FAILED_ROUNDS=3
# Circuit breaker: sliding window (ms) for counting failed rounds
# MCP_CB_FAILED_WINDOW_MS=120000
# Circuit breaker: base backoff (ms) after failed round threshold is reached
# MCP_CB_BASE_BACKOFF_MS=30000
# Circuit breaker: max backoff cap (ms) for exponential backoff
# MCP_CB_MAX_BACKOFF_MS=300000
+8
View File
@@ -0,0 +1,8 @@
# Auto-generated from pyproject.toml — do not edit manually
ALFRED_VERSION=0.1.7
PYTHON_VERSION=3.14.3
IMAGE_NAME=alfred_media_organizer
SERVICE_NAME=alfred
LIBRECHAT_VERSION=v0.8.4
RAG_VERSION=v0.7.3
UV_VERSION=0.11.6
+9 -1
View File
@@ -55,7 +55,7 @@ coverage.xml
Thumbs.db Thumbs.db
# Secrets # Secrets
.env .env.secrets
# Backup files # Backup files
*.backup *.backup
@@ -65,3 +65,11 @@ data/*
# Application logs # Application logs
logs/* logs/*
# Documentation folder
docs/
# .md files
*.md
#
+30 -61
View File
@@ -2,46 +2,36 @@
# check=skip=InvalidDefaultArgInFrom # check=skip=InvalidDefaultArgInFrom
ARG PYTHON_VERSION ARG PYTHON_VERSION
ARG PYTHON_VERSION_SHORT ARG UV_VERSION
ARG RUNNER
# Stage 0: uv binary (workaround — --from doesn't support ARG expansion)
FROM ghcr.io/astral-sh/uv:${UV_VERSION} AS uv-bin
# =========================================== # ===========================================
# Stage 1: Builder # Stage 1: Builder
# =========================================== # ===========================================
FROM python:${PYTHON_VERSION}-slim-bookworm AS builder FROM python:${PYTHON_VERSION}-slim-bookworm AS builder
# Re-declare ARGs after FROM to make them available in this stage
ARG RUNNER
# STFU - No need - Write logs asap
ENV DEBIAN_FRONTEND=noninteractive \ ENV DEBIAN_FRONTEND=noninteractive \
PYTHONDONTWRITEBYTECODE=1 \ PYTHONDONTWRITEBYTECODE=1 \
PYTHONUNBUFFERED=1 PYTHONUNBUFFERED=1 \
UV_PROJECT_ENVIRONMENT=/venv
# Install build dependencies (needs root) # Install build dependencies
RUN apt-get update && apt-get install -y --no-install-recommends \ RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
build-essential \ --mount=type=cache,target=/var/lib/apt,sharing=locked \
&& rm -rf /var/lib/apt/lists/* apt-get update \
&& apt-get install -y --no-install-recommends build-essential
# Install runner globally (needs root) - Save cache for future # Install uv globally
RUN --mount=type=cache,target=/root/.cache/pip \ COPY --from=uv-bin /uv /usr/local/bin/uv
pip install $RUNNER
# Set working directory for dependency installation
WORKDIR /tmp WORKDIR /tmp
# Copy dependency files COPY pyproject.toml uv.lock Makefile ./
COPY pyproject.toml poetry.lock* uv.lock* Makefile ./
# Install dependencies as root (to avoid permission issues with system packages) # Install dependencies into /venv
RUN --mount=type=cache,target=/root/.cache/pip \ RUN --mount=type=cache,target=/root/.cache/uv uv sync
--mount=type=cache,target=/root/.cache/pypoetry \
--mount=type=cache,target=/root/.cache/uv \
if [ "$RUNNER" = "poetry" ]; then \
poetry config virtualenvs.create false && \
poetry install --only main --no-root; \
elif [ "$RUNNER" = "uv" ]; then \
uv pip install --system -r pyproject.toml; \
fi
COPY scripts/ ./scripts/ COPY scripts/ ./scripts/
COPY .env.example ./ COPY .env.example ./
@@ -51,16 +41,7 @@ COPY .env.example ./
# =========================================== # ===========================================
FROM builder AS test FROM builder AS test
ARG RUNNER RUN --mount=type=cache,target=/root/.cache/uv uv sync --group dev
RUN --mount=type=cache,target=/root/.cache/pip \
--mount=type=cache,target=/root/.cache/pypoetry \
--mount=type=cache,target=/root/.cache/uv \
if [ "$RUNNER" = "poetry" ]; then \
poetry install --no-root; \
elif [ "$RUNNER" = "uv" ]; then \
uv pip install --system -e .[dev]; \
fi
COPY alfred/ ./alfred COPY alfred/ ./alfred
COPY scripts ./scripts COPY scripts ./scripts
@@ -71,51 +52,39 @@ COPY tests/ ./tests
# =========================================== # ===========================================
FROM python:${PYTHON_VERSION}-slim-bookworm AS runtime FROM python:${PYTHON_VERSION}-slim-bookworm AS runtime
ARG PYTHON_VERSION_SHORT ENV PYTHONDONTWRITEBYTECODE=1 \
PYTHONUNBUFFERED=1 \
# TODO: A-t-on encore besoin de toutes les clés ?
ENV LLM_PROVIDER=deepseek \
MEMORY_STORAGE_DIR=/data/memory \
PYTHONDONTWRITEBYTECODE=1 \
PYTHONPATH=/home/appuser \ PYTHONPATH=/home/appuser \
PYTHONUNBUFFERED=1 PATH="/venv/bin:$PATH"
# Install runtime dependencies (needs root) # Install runtime dependencies
RUN apt-get update && apt-get install -y --no-install-recommends \ RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
ca-certificates \ --mount=type=cache,target=/var/lib/apt,sharing=locked \
&& rm -rf /var/lib/apt/lists/* \ apt-get update \
&& apt-get clean && apt-get install -y --no-install-recommends ca-certificates
# Create non-root user # Create non-root user
RUN useradd -m -u 1000 -s /bin/bash appuser RUN useradd -m -u 1000 -s /bin/bash appuser
# Create data directories (needs root for /data) # Create data directories
RUN mkdir -p /data /logs \ RUN mkdir -p /data /logs \
&& chown -R appuser:appuser /data /logs && chown -R appuser:appuser /data /logs
# Switch to non-root user
USER appuser USER appuser
# Set working directory (owned by appuser)
WORKDIR /home/appuser WORKDIR /home/appuser
# Copy Python packages from builder stage # Copy venv from builder stage
COPY --from=builder /usr/local/lib/python${PYTHON_VERSION_SHORT}/site-packages /usr/local/lib/python${PYTHON_VERSION_SHORT}/site-packages COPY --from=builder /venv /venv
COPY --from=builder /usr/local/bin /usr/local/bin
# Copy application code (already owned by appuser) # Copy application code
COPY --chown=appuser:appuser alfred/ ./alfred COPY --chown=appuser:appuser alfred/ ./alfred
COPY --chown=appuser:appuser scripts/ ./scripts COPY --chown=appuser:appuser scripts/ ./scripts
COPY --chown=appuser:appuser .env.example ./ COPY --chown=appuser:appuser .env.example ./
COPY --chown=appuser:appuser pyproject.toml ./ COPY --chown=appuser:appuser pyproject.toml ./
# Create volumes for persistent data
VOLUME ["/data", "/logs"] VOLUME ["/data", "/logs"]
# Expose port
EXPOSE 8000 EXPOSE 8000
# Health check
HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \ HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
CMD python -c "import requests; requests.get('http://localhost:8000/health', timeout=5).raise_for_status()" || exit 1 CMD python -c "import requests; requests.get('http://localhost:8000/health', timeout=5).raise_for_status()" || exit 1
+27 -22
View File
@@ -1,6 +1,7 @@
.DEFAULT_GOAL := help .DEFAULT_GOAL := help
# --- Load Config from pyproject.toml --- # --- Load Config from pyproject.toml ---
export
-include .env.make -include .env.make
# --- Profiles management --- # --- Profiles management ---
@@ -9,27 +10,29 @@ p ?= full
PROFILES_PARAM := COMPOSE_PROFILES=$(p) PROFILES_PARAM := COMPOSE_PROFILES=$(p)
# --- Commands --- # --- Commands ---
DOCKER_COMPOSE := docker compose DOCKER_COMPOSE := docker compose \
DOCKER_BUILD := docker build --no-cache \ --env-file .env.alfred \
--env-file .env.secrets \
--env-file .env.make
DOCKER_BUILD := DOCKER_BUILDKIT=1 docker build \
--build-arg PYTHON_VERSION=$(PYTHON_VERSION) \ --build-arg PYTHON_VERSION=$(PYTHON_VERSION) \
--build-arg PYTHON_VERSION_SHORT=$(PYTHON_VERSION_SHORT) \ --build-arg UV_VERSION=$(UV_VERSION)
--build-arg RUNNER=$(RUNNER)
# --- Phony --- # --- Phony ---
.PHONY: .env bootstrap up down restart logs ps shell build build-test install \ .PHONY: bootstrap up down restart logs ps shell build build-test install \
update install-hooks test coverage lint format clean major minor patch help update install-hooks test coverage lint format clean major minor patch help
# --- Setup --- # --- Setup ---
.env .env.make: .env.alfred .env.librechat .env.secrets .env.make:
@echo "Initializing environment..." @echo "Initializing environment..."
@python scripts/bootstrap.py \ @uv run python scripts/bootstrap.py \
&& echo "✓ Environment ready" \ && echo "✓ Environment ready" \
|| (echo "✗ Environment setup failed" && exit 1) || (echo "✗ Environment setup failed" && exit 1)
bootstrap: .env .env.make bootstrap: .env.alfred .env.librechat .env.secrets .env.make
# --- Docker --- # --- Docker ---
up: .env up: .env.alfred .env.secrets
@echo "Starting containers with profiles: [full]..." @echo "Starting containers with profiles: [full]..."
@$(PROFILES_PARAM) $(DOCKER_COMPOSE) up -d --remove-orphans \ @$(PROFILES_PARAM) $(DOCKER_COMPOSE) up -d --remove-orphans \
&& echo "✓ Containers started" \ && echo "✓ Containers started" \
@@ -74,45 +77,45 @@ build-test: .env.make
# --- Dependencies --- # --- Dependencies ---
install: install:
@echo "Installing dependencies with $(RUNNER)..." @echo "Installing dependencies with uv..."
@$(RUNNER) install \ @uv install \
&& echo "✓ Dependencies installed" \ && echo "✓ Dependencies installed" \
|| (echo "✗ Installation failed" && exit 1) || (echo "✗ Installation failed" && exit 1)
install-hooks: install-hooks:
@echo "Installing pre-commit hooks..." @echo "Installing pre-commit hooks..."
@$(RUNNER) run pre-commit install \ @uv run pre-commit install \
&& echo "✓ Hooks installed" \ && echo "✓ Hooks installed" \
|| (echo "✗ Hook installation failed" && exit 1) || (echo "✗ Hook installation failed" && exit 1)
update: update:
@echo "Updating dependencies with $(RUNNER)..." @echo "Updating dependencies with uv..."
@$(RUNNER) update \ @uv update \
&& echo "✓ Dependencies updated" \ && echo "✓ Dependencies updated" \
|| (echo "✗ Update failed" && exit 1) || (echo "✗ Update failed" && exit 1)
# --- Quality --- # --- Quality ---
test: test:
@echo "Running tests..." @echo "Running tests..."
@$(RUNNER) run pytest \ @uv run pytest \
&& echo "✓ Tests passed" \ && echo "✓ Tests passed" \
|| (echo "✗ Tests failed" && exit 1) || (echo "✗ Tests failed" && exit 1)
coverage: coverage:
@echo "Running tests with coverage..." @echo "Running tests with coverage..."
@$(RUNNER) run pytest --cov=. --cov-report=html --cov-report=term \ @uv run pytest --cov=. --cov-report=html --cov-report=term \
&& echo "✓ Coverage report generated" \ && echo "✓ Coverage report generated" \
|| (echo "✗ Coverage failed" && exit 1) || (echo "✗ Coverage failed" && exit 1)
lint: lint:
@echo "Linting code..." @echo "Linting code..."
@$(RUNNER) run ruff check --fix . \ @uv run ruff check --fix . \
&& echo "✓ Linting complete" \ && echo "✓ Linting complete" \
|| (echo "✗ Linting failed" && exit 1) || (echo "✗ Linting failed" && exit 1)
format: format:
@echo "Formatting code..." @echo "Formatting code..."
@$(RUNNER) run ruff format . && $(RUNNER) run ruff check --fix . \ @uv run ruff format . && uv run ruff check --fix . \
&& echo "✓ Code formatted" \ && echo "✓ Code formatted" \
|| (echo "✗ Formatting failed" && exit 1) || (echo "✗ Formatting failed" && exit 1)
@@ -125,7 +128,7 @@ clean:
# --- Versioning --- # --- Versioning ---
major minor patch: _check-main major minor patch: _check-main
@echo "Bumping $@ version..." @echo "Bumping $@ version..."
@$(RUNNER) run bump-my-version bump $@ \ @uv run bump-my-version bump $@ \
&& echo "✓ Version bumped" \ && echo "✓ Version bumped" \
|| (echo "✗ Version bump failed" && exit 1) || (echo "✗ Version bump failed" && exit 1)
@@ -138,8 +141,7 @@ major minor patch: _check-main
_ci-dump-config: _ci-dump-config:
@echo "image_name=$(IMAGE_NAME)" @echo "image_name=$(IMAGE_NAME)"
@echo "python_version=$(PYTHON_VERSION)" @echo "python_version=$(PYTHON_VERSION)"
@echo "python_version_short=$(PYTHON_VERSION_SHORT)" @echo "uv_version=$(UV_VERSION)"
@echo "runner=$(RUNNER)"
@echo "service_name=$(SERVICE_NAME)" @echo "service_name=$(SERVICE_NAME)"
_ci-run-tests:build-test _ci-run-tests:build-test
@@ -161,6 +163,9 @@ help:
@echo "" @echo ""
@echo "Usage: make [target] [p=profile1,profile2]" @echo "Usage: make [target] [p=profile1,profile2]"
@echo "" @echo ""
@echo "Setup:"
@echo " bootstrap Generate .env.alfred, .env.librechat, .env.secrets and .env.make"
@echo ""
@echo "Docker:" @echo "Docker:"
@echo " up Start containers (default profile: core)" @echo " up Start containers (default profile: core)"
@echo " Example: make up p=rag,meili" @echo " Example: make up p=rag,meili"
@@ -173,7 +178,7 @@ help:
@echo "" @echo ""
@echo "Dev & Quality:" @echo "Dev & Quality:"
@echo " setup Bootstrap .env and security keys" @echo " setup Bootstrap .env and security keys"
@echo " install Install dependencies via $(RUNNER)" @echo " install Install dependencies via uv"
@echo " test Run pytest suite" @echo " test Run pytest suite"
@echo " coverage Run tests and generate HTML report" @echo " coverage Run tests and generate HTML report"
@echo " lint/format Quality and style checks" @echo " lint/format Quality and style checks"
+177 -339
View File
@@ -3,7 +3,7 @@
An AI-powered agent for managing your local media library with natural language. Search, download, and organize movies and TV shows effortlessly through a conversational interface. An AI-powered agent for managing your local media library with natural language. Search, download, and organize movies and TV shows effortlessly through a conversational interface.
[![Python 3.14](https://img.shields.io/badge/python-3.14-blue.svg)](https://www.python.org/downloads/) [![Python 3.14](https://img.shields.io/badge/python-3.14-blue.svg)](https://www.python.org/downloads/)
[![Poetry](https://img.shields.io/badge/dependency%20manager-poetry-blue)](https://python-poetry.org/) [![uv](https://img.shields.io/badge/dependency%20manager-uv-purple)](https://github.com/astral-sh/uv)
[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT) [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
[![Code style: ruff](https://img.shields.io/badge/code%20style-ruff-000000.svg)](https://github.com/astral-sh/ruff) [![Code style: ruff](https://img.shields.io/badge/code%20style-ruff-000000.svg)](https://github.com/astral-sh/ruff)
@@ -13,9 +13,10 @@ An AI-powered agent for managing your local media library with natural language.
- 🔍 **Smart Search** — Find movies and TV shows via TMDB with rich metadata - 🔍 **Smart Search** — Find movies and TV shows via TMDB with rich metadata
- 📥 **Torrent Integration** — Search and download via qBittorrent - 📥 **Torrent Integration** — Search and download via qBittorrent
- 🧠 **Contextual Memory** — Remembers your preferences and conversation history - 🧠 **Contextual Memory** — Remembers your preferences and conversation history
- 📁 **Auto-Organization**Keeps your media library tidy and well-structured - 📁 **Auto-Organization**Moves and renames media files, resolves destinations, handles subtitles
- 🌐 **OpenAI-Compatible API** — Works with any OpenAI-compatible client - 🎞️ **Subtitle Pipeline** — Identifies, matches, and places subtitle tracks automatically
- 🖥️ **LibreChat Frontend** — Beautiful web UI included out of the box - 🔄 **Workflow Engine** — YAML-defined multi-step workflows (e.g. `organize_media`)
- 🌐 **OpenAI-Compatible API** — Works with any OpenAI-compatible client (LibreChat, OpenWebUI, etc.)
- 🔒 **Secure by Default** — Auto-generated secrets and encrypted credentials - 🔒 **Secure by Default** — Auto-generated secrets and encrypted credentials
## 🏗️ Architecture ## 🏗️ Architecture
@@ -26,33 +27,50 @@ Built with **Domain-Driven Design (DDD)** principles for clean separation of con
alfred/ alfred/
├── agent/ # AI agent orchestration ├── agent/ # AI agent orchestration
│ ├── llm/ # LLM clients (Ollama, DeepSeek) │ ├── llm/ # LLM clients (Ollama, DeepSeek)
── tools/ # Tool implementations ── tools/ # Tool implementations (api, filesystem, language)
│ └── workflows/ # YAML-defined multi-step workflows
├── application/ # Use cases & DTOs ├── application/ # Use cases & DTOs
│ ├── movies/ # Movie search use cases │ ├── movies/ # Movie search
│ ├── torrents/ # Torrent management │ ├── torrents/ # Torrent management
│ └── filesystem/ # File operations │ └── filesystem/ # File operations (move, list, subtitles, seed links)
├── domain/ # Business logic & entities ├── domain/ # Business logic & entities
│ ├── media/ # Release parsing
│ ├── movies/ # Movie entities │ ├── movies/ # Movie entities
│ ├── tv_shows/ # TV show entities │ ├── tv_shows/ # TV show entities & value objects
── subtitles/ # Subtitle entities ── subtitles/ # Subtitle scanner, services, knowledge base
│ └── shared/ # Common value objects (ImdbId, FilePath, FileSize)
└── infrastructure/ # External services & persistence └── infrastructure/ # External services & persistence
├── api/ # External API clients (TMDB, qBittorrent) ├── api/ # External API clients (TMDB, qBittorrent, Knaben)
├── filesystem/ # File system operations ├── filesystem/ # File manager (hard-link based, path-traversal safe)
── persistence/ # Memory & repositories ── persistence/ # Three-tier memory (LTM/STM/Episodic) + JSON repositories
└── subtitle/ # Subtitle infrastructure
``` ```
See [docs/architecture_diagram.md](docs/architecture_diagram.md) for detailed architectural diagrams. ### Key flows
**Agent execution:** `agent.step(user_input)` → LLM call → if tool_calls, execute each via registry → loop until no tool calls or `max_tool_iterations` → return final response.
**Media organization workflow:**
1. `resolve_destination` — Determines target folder/filename from release name
2. `move_media` — Hard-links file to library, deletes source
3. `manage_subtitles` — Scans, classifies, and places subtitle tracks
4. `create_seed_links` — Hard-links library file back to torrents/ for continued seeding
**Memory tiers:**
- **LTM** (`data/memory/ltm.json`) — Persisted config, media library, watchlist
- **STM** — Conversation history (capped at `MAX_HISTORY_MESSAGES`)
- **Episodic** — Transient search results, active downloads, recent errors
## 🚀 Quick Start ## 🚀 Quick Start
### Prerequisites ### Prerequisites
- **Python 3.14+** (required) - **Python 3.14+**
- **Poetry** (dependency manager) - **uv** (dependency manager)
- **Docker & Docker Compose** (recommended for full stack) - **Docker & Docker Compose** (recommended for full stack)
- **API Keys:** - **API Keys:**
- TMDB API key ([get one here](https://www.themoviedb.org/settings/api)) - TMDB API key ([get one here](https://www.themoviedb.org/settings/api))
- Optional: DeepSeek, OpenAI, Anthropic, or other LLM provider keys - Optional: DeepSeek or other LLM provider keys
### Installation ### Installation
@@ -64,9 +82,15 @@ cd alfred_media_organizer
# Install dependencies # Install dependencies
make install make install
# Install pre-commit hooks
make install-hooks
# Bootstrap environment (generates .env with secure secrets) # Bootstrap environment (generates .env with secure secrets)
make bootstrap make bootstrap
# Validate your .env against the schema
make validate
# Edit .env with your API keys # Edit .env with your API keys
nano .env nano .env
``` ```
@@ -94,162 +118,95 @@ The web interface will be available at **http://localhost:3080**
### Running Locally (Development) ### Running Locally (Development)
```bash ```bash
# Install dependencies uv run uvicorn alfred.app:app --reload --port 8000
poetry install
# Start the API server
poetry run uvicorn alfred.app:app --reload --port 8000
``` ```
## ⚙️ Configuration ## ⚙️ Configuration
### Environment Bootstrap ### Settings system
Alfred uses a smart bootstrap system that: `settings.toml` is the single source of truth. The schema flows:
1. **Generates secure secrets** automatically (JWT tokens, database passwords, encryption keys) ```
2. **Syncs build variables** from `pyproject.toml` (versions, image names) settings.toml → settings_schema.py → settings_bootstrap.py → .env + .env.make → settings.py
3. **Preserves existing secrets** when re-running (never overwrites your API keys) ```
4. **Computes database URIs** automatically from individual components
To add a setting: define it in `settings.toml`, run `make bootstrap`, then access via `settings.my_new_setting`.
```bash ```bash
# First time setup # First time setup
make bootstrap make bootstrap
# Re-run after updating pyproject.toml (secrets are preserved) # Validate existing .env against schema
make validate
# Re-run after settings.toml changes (existing secrets preserved)
make bootstrap make bootstrap
``` ```
### Configuration File (.env) **Never commit `.env` or `.env.make`** — both are gitignored and auto-generated.
The `.env` file is generated from `.env.example` with secure defaults: ### Key settings (.env)
```bash ```bash
# --- CORE SETTINGS --- # --- CORE ---
HOST=0.0.0.0
PORT=3080
MAX_HISTORY_MESSAGES=10 MAX_HISTORY_MESSAGES=10
MAX_TOOL_ITERATIONS=10 MAX_TOOL_ITERATIONS=10
# --- LLM CONFIGURATION --- # --- LLM ---
# Providers: 'local' (Ollama), 'deepseek', 'openai', 'anthropic', 'google' DEFAULT_LLM_PROVIDER=local # local (Ollama) | deepseek
DEFAULT_LLM_PROVIDER=local
# Local LLM (Ollama - included in Docker stack)
OLLAMA_BASE_URL=http://ollama:11434 OLLAMA_BASE_URL=http://ollama:11434
OLLAMA_MODEL=llama3.3:latest OLLAMA_MODEL=llama3.3:latest
LLM_TEMPERATURE=0.2 LLM_TEMPERATURE=0.2
# --- API KEYS (fill only what you need) --- # --- API KEYS ---
TMDB_API_KEY=your-tmdb-key-here # Required for movie search TMDB_API_KEY=your-tmdb-key # Required for movie/show search
DEEPSEEK_API_KEY= # Optional DEEPSEEK_API_KEY= # Optional
OPENAI_API_KEY= # Optional
ANTHROPIC_API_KEY= # Optional
# --- SECURITY (auto-generated, don't modify) --- # --- SECURITY (auto-generated) ---
JWT_SECRET=<auto-generated> JWT_SECRET=<auto>
JWT_REFRESH_SECRET=<auto-generated> CREDS_KEY=<auto>
CREDS_KEY=<auto-generated> MONGO_PASSWORD=<auto>
CREDS_IV=<auto-generated>
# --- DATABASES (auto-generated passwords) ---
MONGO_PASSWORD=<auto-generated>
POSTGRES_PASSWORD=<auto-generated>
``` ```
### Security Keys
Security keys are defined in `pyproject.toml` and generated automatically:
```toml
[tool.alfred.security]
jwt_secret = "32:b64" # 32 bytes, base64 URL-safe
jwt_refresh_secret = "32:b64"
creds_key = "32:hex" # 32 bytes, hexadecimal (AES-256)
creds_iv = "16:hex" # 16 bytes, hexadecimal (AES IV)
mongo_password = "16:hex"
postgres_password = "16:hex"
```
**Formats:**
- `b64` — Base64 URL-safe (for JWT tokens)
- `hex` — Hexadecimal (for encryption keys, passwords)
## 🐳 Docker Services ## 🐳 Docker Services
### Service Architecture
```
┌─────────────────────────────────────────────────────────────┐
│ alfred-net (bridge) │
├─────────────────────────────────────────────────────────────┤
│ │
│ ┌──────────────┐ ┌──────────────┐ ┌──────────────┐ │
│ │ LibreChat │───▶│ Alfred │───▶│ MongoDB │ │
│ │ :3080 │ │ (core) │ │ :27017 │ │
│ └──────────────┘ └──────────────┘ └──────────────┘ │
│ │ │ │
│ │ ▼ │
│ │ ┌──────────────┐ │
│ │ │ Ollama │ │
│ │ │ (local) │ │
│ │ └──────────────┘ │
│ │ │
│ ┌──────┴───────────────────────────────────────────────┐ │
│ │ Optional Services (profiles) │ │
│ ├──────────────┬──────────────┬──────────────┬─────────┤ │
│ │ Meilisearch │ RAG API │ VectorDB │qBittor- │ │
│ │ :7700 │ :8000 │ :5432 │ rent │ │
│ │ [meili] │ [rag] │ [rag] │[qbit..] │ │
│ └──────────────┴──────────────┴──────────────┴─────────┘ │
│ │
└─────────────────────────────────────────────────────────────┘
```
### Docker Profiles ### Docker Profiles
| Profile | Services | Use Case | | Profile | Extra services | Use case |
|---------|----------|----------| |---------|---------------|----------|
| (default) | LibreChat, Alfred, MongoDB, Ollama | Basic setup | | (default) | — | LibreChat + Alfred + MongoDB + Ollama |
| `meili` | + Meilisearch | Fast search | | `meili` | Meilisearch | Fast full-text search |
| `rag` | + RAG API, VectorDB | Document retrieval | | `rag` | RAG API + VectorDB (PostgreSQL) | Document retrieval |
| `qbittorrent` | + qBittorrent | Torrent downloads | | `qbittorrent` | qBittorrent | Torrent downloads |
| `full` | All services | Complete setup | | `full` | All of the above | Complete setup |
```bash ```bash
# Start with specific profiles make up # Start (default profile)
make up p=rag,meili
make up p=full
```
### Docker Commands
```bash
make up # Start containers (default profile)
make up p=full # Start with all services make up p=full # Start with all services
make down # Stop all containers make down # Stop
make restart # Restart containers make restart # Restart
make logs # Follow logs make logs # Follow logs
make ps # Show container status make ps # Container status
make shell # Open bash in Alfred container
make build # Build production image
make build-test # Build test image
``` ```
## 🛠️ Available Tools ## 🛠️ Available Tools
The agent has access to these tools for interacting with your media library:
| Tool | Description | | Tool | Description |
|------|-------------| |------|-------------|
| `find_media_imdb_id` | Search for movies/TV shows on TMDB by title | | `find_media_imdb_id` | Search for movies/TV shows on TMDB by title |
| `find_torrent` | Search for torrents across multiple indexers | | `find_torrent` | Search for torrents across multiple indexers |
| `get_torrent_by_index` | Get detailed info about a specific torrent result | | `get_torrent_by_index` | Get detailed info about a specific result |
| `add_torrent_by_index` | Download a torrent by its index in search results | | `add_torrent_by_index` | Download a torrent from search results |
| `add_torrent_to_qbittorrent` | Add a torrent via magnet link directly | | `add_torrent_to_qbittorrent` | Add a torrent via magnet link directly |
| `set_path_for_folder` | Configure folder paths for media organization | | `resolve_destination` | Compute the target library path for a release |
| `list_folder` | List contents of a folder | | `move_media` | Hard-link a file to its library destination |
| `set_language` | Set preferred language for searches | | `manage_subtitles` | Scan, classify, and place subtitle tracks |
| `create_seed_links` | Prepare torrent folder so qBittorrent keeps seeding |
| `learn` | Teach Alfred a new pattern (release group, naming convention) |
| `set_path_for_folder` | Configure folder paths |
| `list_folder` | List contents of a configured folder |
| `set_language` | Set preferred language for the session |
## 💬 Usage Examples ## 💬 Usage Examples
@@ -266,11 +223,12 @@ Alfred: I found 3 torrents for Inception (2010):
You: Download the first one You: Download the first one
Alfred: ✓ Added to qBittorrent! Download started. Alfred: ✓ Added to qBittorrent! Download started.
Saving to: /downloads/Movies/Inception (2010)/
You: What's downloading right now? You: Organize the Breaking Bad S01 download
Alfred: You have 1 active download: Alfred: ✓ Resolved destination: /tv_shows/Breaking.Bad/Season 01/
- Inception.2010.1080p.BluRay.x264 (45% complete, ETA: 12 min) ✓ Moved 6 episode files
✓ Placed 6 subtitle tracks (fr, en)
✓ Seed links created in /torrents/
``` ```
### Via API ### Via API
@@ -279,219 +237,147 @@ Alfred: You have 1 active download:
# Health check # Health check
curl http://localhost:8000/health curl http://localhost:8000/health
# Chat with the agent (OpenAI-compatible) # Chat (OpenAI-compatible)
curl -X POST http://localhost:8000/v1/chat/completions \ curl -X POST http://localhost:8000/v1/chat/completions \
-H "Content-Type: application/json" \ -H "Content-Type: application/json" \
-d '{ -d '{
"model": "alfred", "model": "alfred",
"messages": [ "messages": [{"role": "user", "content": "Find The Matrix 4K"}]
{"role": "user", "content": "Find The Matrix 4K"}
]
}' }'
# List available models # List models
curl http://localhost:8000/v1/models curl http://localhost:8000/v1/models
# View memory state (debug) # View memory state
curl http://localhost:8000/memory/state curl http://localhost:8000/memory/state
# Clear session memory
curl -X POST http://localhost:8000/memory/clear-session
``` ```
### Via OpenWebUI or Other Clients Alfred is compatible with any OpenAI-compatible client. Point it at `http://localhost:8000/v1`, model `alfred`.
Alfred is compatible with any OpenAI-compatible client:
1. Add as OpenAI-compatible endpoint: `http://localhost:8000/v1`
2. Model name: `alfred`
3. No API key required (or use any placeholder)
## 🧠 Memory System ## 🧠 Memory System
Alfred uses a three-tier memory system for context management: Alfred uses a three-tier memory system:
### Long-Term Memory (LTM) | Tier | Storage | Contents | Lifetime |
- **Persistent** — Saved to JSON files |------|---------|----------|----------|
- **Contents:** Configuration, user preferences, media library state | **LTM** | JSON file (`data/memory/ltm.json`) | Config, library, watchlist, learned patterns | Permanent |
- **Survives:** Application restarts | **STM** | RAM | Conversation history (capped) | Session |
| **Episodic** | RAM | Search results, active downloads, errors | Short-lived |
### Short-Term Memory (STM)
- **Session-based** — Stored in RAM
- **Contents:** Conversation history, current workflow state
- **Cleared:** On session end or restart
### Episodic Memory
- **Transient** — Stored in RAM
- **Contents:** Search results, active downloads, recent errors
- **Cleared:** Frequently, after task completion
## 🧪 Development ## 🧪 Development
### Project Setup
```bash
# Install all dependencies (including dev)
poetry install
# Install pre-commit hooks
make install-hooks
# Run the development server
poetry run uvicorn alfred.app:app --reload
```
### Running Tests ### Running Tests
```bash ```bash
# Run all tests (parallel execution) # Run full suite (parallel)
make test make test
# Run with coverage report # Run with coverage report
make coverage make coverage
# Run specific test file # Run a single file
poetry run pytest tests/test_agent.py -v uv run pytest tests/test_agent.py -v
# Run specific test # Run a single class
poetry run pytest tests/test_config_loader.py::TestBootstrapEnv -v uv run pytest tests/test_agent.py::TestAgentInit -v
# Skip slow tests
uv run pytest -m "not slow"
``` ```
### Test coverage
The suite covers:
- **Agent loop** — tool execution, history, max iterations, error handling
- **Tool registry** — OpenAI schema format, parameter extraction
- **Prompts** — system prompt building, tool inclusion
- **Memory** — LTM/STM/Episodic operations, persistence
- **Filesystem tools** — path traversal security, folder listing
- **File manager** — hard-link, move, seed links (real filesystem, no mocks)
- **Application use cases** — `resolve_destination`, `create_seed_links`, `list_folder`, `move_media`
- **Domain** — TV show/movie entities, shared value objects (`ImdbId`, `FilePath`, `FileSize`), subtitle scanner
- **Repositories** — JSON-backed movie, TV show, subtitle repos
- **Bootstrap** — secret generation, idempotency, URI construction
- **Workflows** — YAML loading, structure validation
- **Configuration** — boundary validation for all settings
### Code Quality ### Code Quality
```bash ```bash
# Lint and auto-fix make lint # Ruff check --fix
make lint make format # Ruff format + check --fix
# Format code
make format
# Clean build artifacts
make clean
``` ```
### Adding a New Tool ### Adding a New Tool
1. **Create the tool function** in `alfred/agent/tools/`: 1. Implement the function in `alfred/agent/tools/`:
```python ```python
# alfred/agent/tools/api.py # alfred/agent/tools/api.py
def my_new_tool(param: str) -> dict[str, Any]: def my_new_tool(param: str) -> dict[str, Any]:
""" """Short description shown to the LLM to decide when to call this tool."""
Short description of what this tool does.
This will be shown to the LLM to help it decide when to use this tool.
"""
memory = get_memory() memory = get_memory()
# ...
# Your implementation here return {"status": "ok", "data": result}
result = do_something(param)
return {
"status": "success",
"data": result
}
``` ```
2. **Register in the registry** (`alfred/agent/registry.py`): 2. Register it in `alfred/agent/registry.py`:
```python ```python
tool_functions = [ tool_functions = [
# ... existing tools ... # ... existing tools ...
api_tools.my_new_tool, # Add your tool here api_tools.my_new_tool,
] ]
``` ```
The tool will be automatically registered with its parameters extracted from the function signature. The registry auto-generates the JSON schema from the function signature and docstring.
### Adding a Workflow
Create a YAML file in `alfred/agent/workflows/`:
```yaml
name: my_workflow
description: What this workflow does
steps:
- tool: resolve_destination
description: Find where the file should go
- tool: move_media
description: Move the file
```
Workflows are loaded automatically at startup.
### Version Management ### Version Management
```bash ```bash
# Bump version (must be on main branch) # Must be on main branch
make patch # 0.1.7 -> 0.1.8 make patch # 0.1.7 0.1.8
make minor # 0.1.7 -> 0.2.0 make minor # 0.1.7 0.2.0
make major # 0.1.7 -> 1.0.0 make major # 0.1.7 1.0.0
``` ```
## 📚 API Reference ## 📚 API Reference
### Endpoints ### Endpoints
#### `GET /health` | Method | Path | Description |
Health check endpoint. |--------|------|-------------|
| `GET` | `/health` | Health check |
```json | `GET` | `/v1/models` | List models (OpenAI-compatible) |
{ | `POST` | `/v1/chat/completions` | Chat (OpenAI-compatible, streaming supported) |
"status": "healthy", | `GET` | `/memory/state` | Full memory dump (debug) |
"version": "0.1.7" | `POST` | `/memory/clear-session` | Clear STM + Episodic |
} | `GET` | `/memory/episodic/search-results` | Current search results |
```
#### `GET /v1/models`
List available models (OpenAI-compatible).
```json
{
"object": "list",
"data": [
{
"id": "alfred",
"object": "model",
"owned_by": "alfred"
}
]
}
```
#### `POST /v1/chat/completions`
Chat with the agent (OpenAI-compatible).
**Request:**
```json
{
"model": "alfred",
"messages": [
{"role": "user", "content": "Find Inception"}
],
"stream": false
}
```
**Response:**
```json
{
"id": "chatcmpl-xxx",
"object": "chat.completion",
"created": 1234567890,
"model": "alfred",
"choices": [{
"index": 0,
"message": {
"role": "assistant",
"content": "I found Inception (2010)..."
},
"finish_reason": "stop"
}]
}
```
#### `GET /memory/state`
View full memory state (debug endpoint).
#### `POST /memory/clear-session`
Clear session memories (STM + Episodic).
## 🔧 Troubleshooting ## 🔧 Troubleshooting
### Agent doesn't respond ### Agent doesn't respond
1. Check API keys in `.env` 1. Check API keys in `.env`
2. Verify LLM provider is running: 2. Verify the LLM is running:
```bash ```bash
# For Ollama
docker logs alfred-ollama docker logs alfred-ollama
# Check if model is pulled
docker exec alfred-ollama ollama list docker exec alfred-ollama ollama list
``` ```
3. Check Alfred logs: `docker logs alfred-core` 3. Check Alfred logs: `docker logs alfred-core`
@@ -499,76 +385,34 @@ Clear session memories (STM + Episodic).
### qBittorrent connection failed ### qBittorrent connection failed
1. Verify qBittorrent is running: `docker ps | grep qbittorrent` 1. Verify qBittorrent is running: `docker ps | grep qbittorrent`
2. Check Web UI is enabled in qBittorrent settings 2. Check credentials in `.env` (`QBITTORRENT_URL`, `QBITTORRENT_USERNAME`, `QBITTORRENT_PASSWORD`)
3. Verify credentials in `.env`:
```bash
QBITTORRENT_URL=http://qbittorrent:16140
QBITTORRENT_USERNAME=admin
QBITTORRENT_PASSWORD=<check-your-env>
```
### Database connection issues
1. Check MongoDB is healthy: `docker logs alfred-mongodb`
2. Verify credentials match in `.env`
3. Try restarting: `make restart`
### Memory not persisting ### Memory not persisting
1. Check `data/` directory exists and is writable 1. Check `data/` directory is writable
2. Verify volume mounts in `docker-compose.yaml` 2. Verify volume mounts in `docker-compose.yaml`
3. Check file permissions: `ls -la data/`
### Bootstrap fails ### Bootstrap fails
1. Ensure `.env.example` exists ```bash
2. Check `pyproject.toml` has required sections: make validate # Check what's wrong with .env
```toml make bootstrap # Regenerate (preserves existing secrets)
[tool.alfred.settings]
[tool.alfred.security]
``` ```
3. Run manually: `python scripts/bootstrap.py`
### Tests failing ### Tests failing
1. Update dependencies: `poetry install`
2. Check Python version: `python --version` (needs 3.14+)
3. Run specific failing test with verbose output:
```bash ```bash
poetry run pytest tests/test_failing.py -v --tb=long uv run pytest tests/test_failing.py -v --tb=long
``` ```
## 🤝 Contributing ## 🤝 Contributing
Contributions are welcome! Please follow these steps: 1. Fork the repository
2. Create a feature branch: `git checkout -b feat/my-feature`
1. **Fork** the repository 3. Make your changes + add tests
2. **Create** a feature branch: `git checkout -b feature/my-feature` 4. Run `make test && make lint && make format`
3. **Make** your changes 5. Commit with [Conventional Commits](https://www.conventionalcommits.org/): `feat:`, `fix:`, `docs:`, `refactor:`, `test:`, `chore:`, `infra:`
4. **Run** tests: `make test` 6. Open a Pull Request
5. **Run** linting: `make lint && make format`
6. **Commit**: `git commit -m "feat: add my feature"`
7. **Push**: `git push origin feature/my-feature`
8. **Create** a Pull Request
### Commit Convention
We use [Conventional Commits](https://www.conventionalcommits.org/):
- `feat:` New feature
- `fix:` Bug fix
- `docs:` Documentation
- `refactor:` Code refactoring
- `test:` Adding tests
- `chore:` Maintenance
## 📖 Documentation
- [Architecture Diagram](docs/architecture_diagram.md) — System architecture overview
- [Class Diagram](docs/class_diagram.md) — Class structure and relationships
- [Component Diagram](docs/component_diagram.md) — Component interactions
- [Sequence Diagram](docs/sequence_diagram.md) — Sequence flows
- [Flowchart](docs/flowchart.md) — System flowcharts
## 📄 License ## 📄 License
@@ -576,19 +420,13 @@ MIT License — see [LICENSE](LICENSE) file for details.
## 🙏 Acknowledgments ## 🙏 Acknowledgments
- [LibreChat](https://github.com/danny-avila/LibreChat) — Beautiful chat interface - [LibreChat](https://github.com/danny-avila/LibreChat) — Chat interface
- [Ollama](https://ollama.ai/) — Local LLM runtime - [Ollama](https://ollama.ai/) — Local LLM runtime
- [DeepSeek](https://www.deepseek.com/) — LLM provider - [DeepSeek](https://www.deepseek.com/) — LLM provider
- [TMDB](https://www.themoviedb.org/) — Movie database - [TMDB](https://www.themoviedb.org/) — Movie & TV database
- [qBittorrent](https://www.qbittorrent.org/) — Torrent client - [qBittorrent](https://www.qbittorrent.org/) — Torrent client
- [FastAPI](https://fastapi.tiangolo.com/) — Web framework - [FastAPI](https://fastapi.tiangolo.com/) — Web framework
- [Pydantic](https://docs.pydantic.dev/) — Data validation - [uv](https://github.com/astral-sh/uv) — Fast Python package manager
## 📬 Support
- 📧 Email: francois.hodiaumont@gmail.com
- 🐛 Issues: [GitHub Issues](https://github.com/francwa/alfred_media_organizer/issues)
- 💬 Discussions: [GitHub Discussions](https://github.com/francwa/alfred_media_organizer/discussions)
--- ---
+28 -2
View File
@@ -4,6 +4,7 @@ import json
from typing import Any from typing import Any
from alfred.infrastructure.persistence import get_memory from alfred.infrastructure.persistence import get_memory
from alfred.infrastructure.persistence.memory import MemoryRegistry
from .registry import Tool from .registry import Tool
@@ -13,6 +14,7 @@ class PromptBuilder:
def __init__(self, tools: dict[str, Tool]): def __init__(self, tools: dict[str, Tool]):
self.tools = tools self.tools = tools
self._memory_registry = MemoryRegistry()
def build_tools_spec(self) -> list[dict[str, Any]]: def build_tools_spec(self) -> list[dict[str, Any]]:
"""Build the tool specification for the LLM API.""" """Build the tool specification for the LLM API."""
@@ -109,11 +111,30 @@ class PromptBuilder:
return "\n".join(lines) return "\n".join(lines)
def _format_memory_schema(self) -> str:
"""Describe available memory components so the agent knows what to read/write and when."""
schema = self._memory_registry.schema()
tier_labels = {"ltm": "LONG-TERM (persisted)", "stm": "SHORT-TERM (session)", "episodic": "EPISODIC (volatile)"}
lines = ["MEMORY COMPONENTS:"]
for tier, components in schema.items():
if not components:
continue
lines.append(f"\n [{tier_labels.get(tier, tier.upper())}]")
for c in components:
access = c.get("access", "read")
lines.append(f" {c['name']} ({access}): {c['description']}")
for field_name, field_desc in c.get("fields", {}).items():
lines.append(f" · {field_name}: {field_desc}")
return "\n".join(lines)
def _format_config_context(self, memory) -> str: def _format_config_context(self, memory) -> str:
"""Format configuration context.""" """Format configuration context."""
lines = ["CURRENT CONFIGURATION:"] lines = ["CURRENT CONFIGURATION:"]
if memory.ltm.config: folders = {**memory.ltm.workspace.as_dict(), **memory.ltm.library_paths.to_dict()}
for key, value in memory.ltm.config.items(): if folders:
for key, value in folders.items():
lines.append(f" - {key}: {value}") lines.append(f" - {key}: {value}")
else: else:
lines.append(" (no configuration set)") lines.append(" (no configuration set)")
@@ -138,6 +159,9 @@ class PromptBuilder:
tools_desc = self._format_tools_description() tools_desc = self._format_tools_description()
tools_section = f"\nAVAILABLE TOOLS:\n{tools_desc}" if tools_desc else "" tools_section = f"\nAVAILABLE TOOLS:\n{tools_desc}" if tools_desc else ""
# Memory schema
memory_schema = self._format_memory_schema()
# Configuration # Configuration
config_section = self._format_config_context(memory) config_section = self._format_config_context(memory)
if config_section: if config_section:
@@ -172,6 +196,8 @@ EXAMPLES:
{language_instruction} {language_instruction}
{tools_section} {tools_section}
{memory_schema}
{config_section} {config_section}
{stm_context} {stm_context}
{episodic_context} {episodic_context}
+5
View File
@@ -97,6 +97,11 @@ def make_tools(settings) -> dict[str, Tool]:
tool_functions = [ tool_functions = [
fs_tools.set_path_for_folder, fs_tools.set_path_for_folder,
fs_tools.list_folder, fs_tools.list_folder,
fs_tools.resolve_destination,
fs_tools.move_media,
fs_tools.manage_subtitles,
fs_tools.create_seed_links,
fs_tools.learn,
api_tools.find_media_imdb_id, api_tools.find_media_imdb_id,
api_tools.find_torrent, api_tools.find_torrent,
api_tools.add_torrent_by_index, api_tools.add_torrent_by_index,
+191 -1
View File
@@ -1,10 +1,200 @@
"""Filesystem tools for folder management.""" """Filesystem tools for folder management."""
from pathlib import Path
from typing import Any from typing import Any
from alfred.application.filesystem import ListFolderUseCase, SetFolderPathUseCase import alfred as _alfred_pkg
import yaml
from alfred.application.filesystem import (
CreateSeedLinksUseCase,
ListFolderUseCase,
ManageSubtitlesUseCase,
MoveMediaUseCase,
ResolveDestinationUseCase,
SetFolderPathUseCase,
)
from alfred.infrastructure.filesystem import FileManager from alfred.infrastructure.filesystem import FileManager
_LEARNED_ROOT = Path(_alfred_pkg.__file__).parent.parent / "data" / "knowledge"
def move_media(source: str, destination: str) -> dict[str, Any]:
"""
Move a media file to a destination path.
Copies the file safely first (with integrity check), then deletes the source.
Use this to organise a downloaded file into the media library.
Args:
source: Absolute path to the source file.
destination: Absolute path to the destination file (must not already exist).
Returns:
Dict with status, source, destination, filename, and size — or error details.
"""
file_manager = FileManager()
use_case = MoveMediaUseCase(file_manager)
return use_case.execute(source, destination).to_dict()
def resolve_destination(
release_name: str,
source_file: str,
tmdb_title: str,
tmdb_year: int,
tmdb_episode_title: str | None = None,
confirmed_folder: str | None = None,
) -> dict[str, Any]:
"""
Compute the destination path in the media library for a release.
Call this before move_media to get the correct library path. Handles:
- Parsing the release name (quality, codec, group, season/episode)
- Looking up any existing series folder in the library
- Applying group-conflict rules (asks user if ambiguous)
- Building the full destination path with correct naming conventions
Args:
release_name: Raw release folder or file name
(e.g. "Oz.S03.1080p.WEBRip.x265-KONTRAST").
source_file: Absolute path to the source video file (used for extension).
tmdb_title: Canonical show/movie title from TMDB (e.g. "Oz").
tmdb_year: Release/start year from TMDB (e.g. 1997).
tmdb_episode_title: Episode title from TMDB for single-episode releases
(e.g. "The Routine"). Omit for season packs and movies.
confirmed_folder: If a previous call returned needs_clarification, pass
the user-chosen folder name here to proceed.
Returns:
On success: dict with status, library_file, series_folder, season_folder,
series_folder_name, season_folder_name, filename,
is_new_series_folder.
On ambiguity: dict with status="needs_clarification", question, options.
On error: dict with status="error", error, message.
"""
use_case = ResolveDestinationUseCase()
return use_case.execute(
release_name=release_name,
source_file=source_file,
tmdb_title=tmdb_title,
tmdb_year=tmdb_year,
tmdb_episode_title=tmdb_episode_title,
confirmed_folder=confirmed_folder,
).to_dict()
def create_seed_links(library_file: str, original_download_folder: str) -> dict[str, Any]:
"""
Prepare a torrent subfolder so qBittorrent can keep seeding after a move.
Hard-links the video file from the library into torrents/<original_folder_name>/,
then copies all remaining files from the original download folder (subtitles,
.nfo, .jpg, .txt, …) so the torrent data is complete.
Call this after move_media when the user wants to keep seeding.
Args:
library_file: Absolute path to the video file now in the library.
original_download_folder: Absolute path to the original download folder
(may still contain subs, nfo, and other release files).
Returns:
Dict with status, torrent_subfolder, linked_file, copied_files,
copied_count, skipped — or error details.
"""
file_manager = FileManager()
use_case = CreateSeedLinksUseCase(file_manager)
return use_case.execute(library_file, original_download_folder).to_dict()
def manage_subtitles(source_video: str, destination_video: str) -> dict[str, Any]:
"""
Place subtitle files alongside an organised video file.
Scans for subtitle files (.srt, .ass, .ssa, .vtt, .sub) next to the source
video, filters them according to the user's SubtitlePreferences (languages,
min size, SDH, forced), and hard-links the passing files next to the
destination video with the correct naming convention:
fr.srt / fr.sdh.srt / fr.forced.srt / en.srt …
Call this right after move_media or copy_media, passing the same source and
destination paths. If no subtitles are found, returns ok with placed_count=0.
Args:
source_video: Absolute path to the original video file (in the download folder).
destination_video: Absolute path to the placed video file (in the library).
Returns:
Dict with status, placed list (source, destination, filename), placed_count,
skipped_count — or error details.
"""
file_manager = FileManager()
use_case = ManageSubtitlesUseCase(file_manager)
return use_case.execute(source_video, destination_video).to_dict()
def learn(pack: str, category: str, key: str, values: list[str]) -> dict[str, Any]:
"""
Teach Alfred a new token mapping and persist it to the learned knowledge pack.
Use this when a subtitle file contains an unrecognised token — after confirming
with the user what the token means, call learn() to persist it so Alfred
recognises it in future scans.
Args:
pack: Knowledge pack name. Currently only "subtitles" is supported.
category: Category within the pack: "languages", "types", or "formats".
key: The entry key — e.g. ISO 639-1 language code ("es"), type id ("sdh").
values: List of tokens to add — e.g. ["spanish", "espanol", "spa"].
Returns:
Dict with status, added_count, and the updated token list.
"""
_VALID_PACKS = {"subtitles"}
_VALID_CATEGORIES = {"languages", "types", "formats"}
if pack not in _VALID_PACKS:
return {"status": "error", "error": "unknown_pack", "message": f"Unknown pack '{pack}'. Valid: {sorted(_VALID_PACKS)}"}
if category not in _VALID_CATEGORIES:
return {"status": "error", "error": "unknown_category", "message": f"Unknown category '{category}'. Valid: {sorted(_VALID_CATEGORIES)}"}
learned_path = _LEARNED_ROOT / "subtitles_learned.yaml"
_LEARNED_ROOT.mkdir(parents=True, exist_ok=True)
data: dict = {}
if learned_path.exists():
try:
with open(learned_path, encoding="utf-8") as f:
data = yaml.safe_load(f) or {}
except Exception as e:
return {"status": "error", "error": "read_failed", "message": str(e)}
cat_data = data.setdefault(category, {})
entry = cat_data.setdefault(key, {"tokens": []})
existing = entry.get("tokens", [])
new_tokens = [v for v in values if v not in existing]
entry["tokens"] = existing + new_tokens
tmp = learned_path.with_suffix(".yaml.tmp")
try:
with open(tmp, "w", encoding="utf-8") as f:
yaml.safe_dump(data, f, allow_unicode=True, default_flow_style=False, sort_keys=False)
tmp.rename(learned_path)
except Exception as e:
tmp.unlink(missing_ok=True)
return {"status": "error", "error": "write_failed", "message": str(e)}
return {
"status": "ok",
"pack": pack,
"category": category,
"key": key,
"added_count": len(new_tokens),
"tokens": entry["tokens"],
}
def set_path_for_folder(folder_name: str, path_value: str) -> dict[str, Any]: def set_path_for_folder(folder_name: str, path_value: str) -> dict[str, Any]:
""" """
+3
View File
@@ -0,0 +1,3 @@
from .loader import WorkflowLoader
__all__ = ["WorkflowLoader"]
+52
View File
@@ -0,0 +1,52 @@
"""WorkflowLoader — autodiscovers and loads workflow YAML files.
Scans the workflows/ directory for all .yaml files and exposes them
as dicts. No manual registration needed — drop a new .yaml file and
it will be picked up automatically.
"""
import logging
from pathlib import Path
import yaml
logger = logging.getLogger(__name__)
_WORKFLOWS_DIR = Path(__file__).parent
class WorkflowLoader:
"""
Loads all workflow definitions from the workflows/ directory.
Usage:
loader = WorkflowLoader()
all_workflows = loader.all()
workflow = loader.get("organize_media")
"""
def __init__(self):
self._workflows: dict[str, dict] = {}
self._load()
def _load(self) -> None:
for path in sorted(_WORKFLOWS_DIR.glob("*.yaml")):
try:
data = yaml.safe_load(path.read_text(encoding="utf-8"))
name = data.get("name") or path.stem
self._workflows[name] = data
logger.info(f"WorkflowLoader: Loaded '{name}' from {path.name}")
except Exception as e:
logger.warning(f"WorkflowLoader: Could not load {path.name}: {e}")
def all(self) -> dict[str, dict]:
"""Return all loaded workflows keyed by name."""
return self._workflows
def get(self, name: str) -> dict | None:
"""Return a specific workflow by name, or None if not found."""
return self._workflows.get(name)
def names(self) -> list[str]:
"""Return all available workflow names."""
return list(self._workflows.keys())
@@ -0,0 +1,69 @@
name: manage_subtitles
description: >
Place subtitle files alongside a video that has just been organised into the library.
Detects the release pattern automatically, identifies and classifies all tracks,
filters by user rules, and hard-links matching files to the destination.
If any tracks are unrecognised, asks the user and optionally teaches Alfred.
trigger:
examples:
- "handle subtitles for The X-Files S01E01"
- "place the subs next to the file"
- "subtitles are in the Subs/ folder"
- "add subtitles"
tools:
- manage_subtitles
- learn
memory:
SubtitlePreferences: read
Workflow: read-write
steps:
- id: place_subtitles
tool: manage_subtitles
description: >
Detect release pattern, identify and classify all subtitle tracks,
filter by rules, hard-link matching files next to the destination video.
Reads SubtitlePreferences from LTM for language/type/format filtering.
params:
source_video: "{source_video}"
destination_video: "{destination_video}"
imdb_id: "{imdb_id}"
media_type: "{media_type}"
release_group: "{release_group}"
season: "{season}"
episode: "{episode}"
on_result:
ok_placed_zero: skip # no subtitles found — not an error
needs_clarification: ask_user # unrecognised tokens found
- id: ask_user
description: >
Some tracks could not be classified. Show the user the unresolved tokens
and ask if they want to teach Alfred what they mean.
If yes → go to learn_tokens. If no → end workflow.
ask_user:
question: >
I could not identify some tokens in the subtitle files: {unresolved}.
Do you want to teach me what they mean?
answers:
yes: { next_step: learn_tokens }
no: { next_step: end }
- id: learn_tokens
tool: learn
description: >
Persist a new token mapping to the learned knowledge pack so Alfred
recognises it in future scans without asking again.
params:
pack: "subtitles"
category: "{token_category}" # "languages" or "types"
key: "{token_key}" # e.g. "es", "de"
values: "{token_values}" # e.g. ["spanish", "espanol"]
subtitle_naming:
standard: "{lang}.{ext}"
sdh: "{lang}.sdh.{ext}"
forced: "{lang}.forced.{ext}"
@@ -0,0 +1,82 @@
name: organize_media
description: >
Organise a downloaded series or movie into the media library.
Triggered when the user asks to move/organize a specific title.
Always moves the video file. Optionally creates seed links in the
torrents folder so qBittorrent can keep seeding.
trigger:
examples:
- "organize Breaking Bad"
- "organise Severance season 2"
- "move Inception to my library"
- "organize Breaking Bad season 1, keep seeding"
tools:
- list_folder
- find_media_imdb_id
- resolve_destination
- move_media
- manage_subtitles
- create_seed_links
memory:
WorkspacePaths: read
LibraryPaths: read
Library: read-write
Workflow: read-write
Entities: read-write
steps:
- id: list_downloads
tool: list_folder
description: List the download folder to find the target files.
params:
folder_type: download
- id: identify_media
tool: find_media_imdb_id
description: Confirm title, type (series/movie), and metadata via TMDB.
- id: resolve_destination
tool: resolve_destination
description: >
Compute the correct destination path in the library.
Uses the release name + TMDB metadata to build folder and file names.
If multiple series folders exist for this title, returns
needs_clarification and the user must pick one (re-call with confirmed_folder).
- id: move_file
tool: move_media
description: >
Move the video file to library_file returned by resolve_destination.
- id: handle_subtitles
tool: manage_subtitles
description: >
Place subtitle files alongside the video in the library.
Pass the original source path and the new library destination path.
on_missing: skip
- id: ask_seeding
ask_user:
question: "Do you want to keep seeding this torrent?"
answers:
"yes": { next_step: create_seed_links }
"no": { next_step: update_library }
- id: create_seed_links
tool: create_seed_links
description: >
Hard-link the library video file back into torrents/<original_folder>/
and copy all remaining files from the original download folder
(subs, nfo, jpg, …) so the torrent stays complete for seeding.
- id: update_library
memory_write: Library
description: Add the entry to the LTM library after a successful move.
naming_convention:
# Resolved by domain entities (Movie, Episode) — not hardcoded here
tv_show: "{title}/Season {season:02d}/{title}.S{season:02d}E{episode:02d}.{ext}"
movie: "{title} ({year})/{title}.{year}.{ext}"
+1 -1
View File
@@ -29,7 +29,7 @@ app = FastAPI(
version="0.2.0", version="0.2.0",
) )
memory_path = Path(settings.data_storage) / "memory" memory_path = Path(settings.data_storage_dir) / "memory"
init_memory(storage_dir=str(memory_path)) init_memory(storage_dir=str(memory_path))
logger.info(f"Memory context initialized (path: {memory_path})") logger.info(f"Memory context initialized (path: {memory_path})")
+21 -1
View File
@@ -1,12 +1,32 @@
"""Filesystem use cases.""" """Filesystem use cases."""
from .dto import ListFolderResponse, SetFolderPathResponse from .create_seed_links import CreateSeedLinksUseCase
from .dto import (
CreateSeedLinksResponse,
ListFolderResponse,
ManageSubtitlesResponse,
MoveMediaResponse,
PlacedSubtitle,
SetFolderPathResponse,
)
from .list_folder import ListFolderUseCase from .list_folder import ListFolderUseCase
from .manage_subtitles import ManageSubtitlesUseCase
from .move_media import MoveMediaUseCase
from .resolve_destination import ResolveDestinationUseCase, ResolvedDestination
from .set_folder_path import SetFolderPathUseCase from .set_folder_path import SetFolderPathUseCase
__all__ = [ __all__ = [
"SetFolderPathUseCase", "SetFolderPathUseCase",
"ListFolderUseCase", "ListFolderUseCase",
"CreateSeedLinksUseCase",
"MoveMediaUseCase",
"ManageSubtitlesUseCase",
"ResolveDestinationUseCase",
"ResolvedDestination",
"SetFolderPathResponse", "SetFolderPathResponse",
"ListFolderResponse", "ListFolderResponse",
"CreateSeedLinksResponse",
"MoveMediaResponse",
"ManageSubtitlesResponse",
"PlacedSubtitle",
] ]
@@ -0,0 +1,54 @@
"""CreateSeedLinksUseCase — prepares a torrent folder for continued seeding."""
import logging
from alfred.infrastructure.filesystem import FileManager
from alfred.infrastructure.persistence import get_memory
from .dto import CreateSeedLinksResponse
logger = logging.getLogger(__name__)
class CreateSeedLinksUseCase:
"""
Prepares a torrent subfolder so qBittorrent can keep seeding after a move.
Hard-links the video file from the library back into torrents/<original_folder>/,
then copies all remaining files from the original download folder (subs, nfo, …).
"""
def __init__(self, file_manager: FileManager):
self.file_manager = file_manager
def execute(
self, library_file: str, original_download_folder: str
) -> CreateSeedLinksResponse:
memory = get_memory()
torrent_folder = memory.ltm.workspace.torrent
if not torrent_folder:
return CreateSeedLinksResponse(
status="error",
error="torrent_folder_not_set",
message="Torrent folder is not configured. Use set_path_for_folder to set it.",
)
result = self.file_manager.create_seed_links(
library_file, original_download_folder, torrent_folder
)
if result.get("status") == "ok":
return CreateSeedLinksResponse(
status="ok",
torrent_subfolder=result.get("torrent_subfolder"),
linked_file=result.get("linked_file"),
copied_files=result.get("copied_files"),
copied_count=result.get("copied_count", 0),
skipped=result.get("skipped"),
)
return CreateSeedLinksResponse(
status="error",
error=result.get("error"),
message=result.get("message"),
)
+149 -1
View File
@@ -1,6 +1,56 @@
"""Filesystem application DTOs.""" """Filesystem application DTOs."""
from dataclasses import dataclass from __future__ import annotations
from dataclasses import dataclass, field
@dataclass
class CopyMediaResponse:
"""Response from copying a media file."""
status: str
source: str | None = None
destination: str | None = None
filename: str | None = None
size: int | None = None
error: str | None = None
message: str | None = None
def to_dict(self) -> dict:
if self.error:
return {"status": self.status, "error": self.error, "message": self.message}
return {
"status": self.status,
"source": self.source,
"destination": self.destination,
"filename": self.filename,
"size": self.size,
}
@dataclass
class MoveMediaResponse:
"""Response from moving a media file."""
status: str
source: str | None = None
destination: str | None = None
filename: str | None = None
size: int | None = None
error: str | None = None
message: str | None = None
def to_dict(self) -> dict:
if self.error:
return {"status": self.status, "error": self.error, "message": self.message}
return {
"status": self.status,
"source": self.source,
"destination": self.destination,
"filename": self.filename,
"size": self.size,
}
@dataclass @dataclass
@@ -29,6 +79,104 @@ class SetFolderPathResponse:
return result return result
@dataclass
class PlacedSubtitle:
"""One subtitle file successfully placed."""
source: str
destination: str
filename: str
def to_dict(self) -> dict:
return {"source": self.source, "destination": self.destination, "filename": self.filename}
@dataclass
class UnresolvedTrack:
"""A subtitle track that needs agent clarification before placement."""
raw_tokens: list[str]
file_path: str | None = None
file_size_kb: float | None = None
reason: str = "" # "unknown_language" | "low_confidence"
def to_dict(self) -> dict:
return {
"raw_tokens": self.raw_tokens,
"file_path": self.file_path,
"file_size_kb": self.file_size_kb,
"reason": self.reason,
}
@dataclass
class AvailableSubtitle:
"""One subtitle track available on an embedded media item."""
language: str # ISO 639-2 code
subtitle_type: str # "standard" | "sdh" | "forced" | "unknown"
def to_dict(self) -> dict:
return {"language": self.language, "type": self.subtitle_type}
@dataclass
class ManageSubtitlesResponse:
"""Response from the manage_subtitles use case."""
status: str # "ok" | "needs_clarification" | "error"
video_path: str | None = None
placed: list[PlacedSubtitle] | None = None
skipped_count: int = 0
unresolved: list[UnresolvedTrack] | None = None
available: list[AvailableSubtitle] | None = None # embedded tracks summary
error: str | None = None
message: str | None = None
def to_dict(self) -> dict:
if self.error:
return {"status": self.status, "error": self.error, "message": self.message}
result = {
"status": self.status,
"video_path": self.video_path,
"placed": [p.to_dict() for p in (self.placed or [])],
"placed_count": len(self.placed or []),
"skipped_count": self.skipped_count,
}
if self.unresolved:
result["unresolved"] = [u.to_dict() for u in self.unresolved]
result["unresolved_count"] = len(self.unresolved)
if self.available:
result["available"] = [a.to_dict() for a in self.available]
return result
@dataclass
class CreateSeedLinksResponse:
"""Response from creating seed links for a torrent."""
status: str
torrent_subfolder: str | None = None
linked_file: str | None = None
copied_files: list[str] | None = None
copied_count: int = 0
skipped: list[str] | None = None
error: str | None = None
message: str | None = None
def to_dict(self) -> dict:
if self.error:
return {"status": self.status, "error": self.error, "message": self.message}
return {
"status": self.status,
"torrent_subfolder": self.torrent_subfolder,
"linked_file": self.linked_file,
"copied_files": self.copied_files or [],
"copied_count": self.copied_count,
"skipped": self.skipped or [],
}
@dataclass @dataclass
class ListFolderResponse: class ListFolderResponse:
"""Response from listing a folder.""" """Response from listing a folder."""
@@ -0,0 +1,258 @@
"""ManageSubtitlesUseCase — orchestrates the full subtitle pipeline for a video file."""
import logging
from pathlib import Path
from alfred.domain.shared.value_objects import ImdbId
from alfred.domain.subtitles.entities import SubtitleTrack
from alfred.domain.subtitles.knowledge.base import SubtitleKnowledgeBase
from alfred.domain.subtitles.knowledge.loader import KnowledgeLoader
from alfred.domain.subtitles.services.identifier import SubtitleIdentifier
from alfred.domain.subtitles.services.matcher import SubtitleMatcher
from alfred.domain.subtitles.services.pattern_detector import PatternDetector
from alfred.domain.subtitles.services.placer import PlacedTrack, SubtitlePlacer
from alfred.domain.subtitles.services.utils import available_subtitles
from alfred.domain.subtitles.value_objects import ScanStrategy
from alfred.infrastructure.persistence.context import get_memory
from alfred.infrastructure.subtitle.metadata_store import SubtitleMetadataStore
from alfred.infrastructure.subtitle.rule_repository import RuleSetRepository
from .dto import AvailableSubtitle, ManageSubtitlesResponse, PlacedSubtitle, UnresolvedTrack
logger = logging.getLogger(__name__)
def _infer_library_root(dest_video: Path, media_type: str) -> Path:
"""
Infer the media library root folder from the destination video path.
TV show: video → Season 01 → The X-Files (3 levels up)
Movie: video → Inception (2010) (1 level up)
"""
if media_type == "tv_show":
return dest_video.parent.parent
return dest_video.parent
def _to_imdb_id(raw: str | None) -> ImdbId | None:
if not raw:
return None
try:
return ImdbId(raw)
except Exception:
return None
class ManageSubtitlesUseCase:
"""
Full subtitle pipeline:
1. Load knowledge base
2. Detect (or confirm) the release pattern
3. Identify all tracks (ffprobe + filesystem scan)
4. Load + resolve rules for this media
5. Match tracks against rules
6. If any tracks are unresolved → return needs_clarification (don't place yet)
7. Place matched tracks via hard-link
8. Persist to .alfred/metadata.yaml
The use case is stateless — all dependencies are instantiated inline.
"""
def execute(
self,
source_video: str,
destination_video: str,
imdb_id: str | None = None,
media_type: str = "tv_show",
release_group: str | None = None,
season: int | None = None,
episode: int | None = None,
confirmed_pattern_id: str | None = None,
) -> ManageSubtitlesResponse:
source_path = Path(source_video)
dest_path = Path(destination_video)
if not source_path.exists():
return ManageSubtitlesResponse(
status="error",
error="source_not_found",
message=f"Source video not found: {source_video}",
)
kb = SubtitleKnowledgeBase(KnowledgeLoader())
library_root = _infer_library_root(dest_path, media_type)
store = SubtitleMetadataStore(library_root)
repo = RuleSetRepository(library_root)
# --- Pattern resolution ---
pattern = self._resolve_pattern(
kb, store, source_path, confirmed_pattern_id, release_group
)
if pattern is None:
return ManageSubtitlesResponse(
status="error",
error="pattern_not_found",
message="Could not determine subtitle pattern for this release.",
)
# --- Identify ---
media_id = _to_imdb_id(imdb_id)
identifier = SubtitleIdentifier(kb)
metadata = identifier.identify(
video_path=source_path,
pattern=pattern,
media_id=media_id,
media_type=media_type,
release_group=release_group,
)
if metadata.total_count == 0:
logger.info(f"ManageSubtitles: no subtitle tracks found for {source_path.name}")
return ManageSubtitlesResponse(
status="ok",
video_path=destination_video,
placed=[],
skipped_count=0,
)
# --- Embedded short-circuit ---
if pattern.scan_strategy == ScanStrategy.EMBEDDED:
logger.info("ManageSubtitles: embedded pattern — skipping matcher")
available = [
AvailableSubtitle(
language=t.language.code if t.language else "?",
subtitle_type=t.subtitle_type.value,
)
for t in available_subtitles(metadata.embedded_tracks)
]
return ManageSubtitlesResponse(
status="ok",
video_path=destination_video,
placed=[],
skipped_count=0,
available=available,
)
# --- Match (external only) ---
subtitle_prefs = None
try:
memory = get_memory()
subtitle_prefs = memory.ltm.subtitle_preferences
except Exception:
pass
rules = repo.load(release_group, subtitle_prefs).resolve()
matcher = SubtitleMatcher()
matched, unresolved = matcher.match(metadata.external_tracks, rules)
if unresolved:
logger.info(
f"ManageSubtitles: {len(unresolved)} unresolved track(s) — needs clarification"
)
return ManageSubtitlesResponse(
status="needs_clarification",
video_path=destination_video,
placed=[],
unresolved=[_to_unresolved_dto(t) for t in unresolved],
)
if not matched:
return ManageSubtitlesResponse(
status="ok",
video_path=destination_video,
placed=[],
skipped_count=metadata.total_count,
)
# --- Place ---
placer = SubtitlePlacer()
place_result = placer.place(matched, dest_path)
# --- Persist ---
if place_result.placed:
pairs = _pair_placed_with_tracks(place_result.placed, matched)
store.append_history(pairs, season, episode, release_group)
placed_dtos = [
PlacedSubtitle(
source=str(p.source),
destination=str(p.destination),
filename=p.filename,
)
for p in place_result.placed
]
return ManageSubtitlesResponse(
status="ok",
video_path=destination_video,
placed=placed_dtos,
skipped_count=place_result.skipped_count,
)
def _resolve_pattern(
self,
kb: SubtitleKnowledgeBase,
store: SubtitleMetadataStore,
source_path: Path,
confirmed_pattern_id: str | None,
release_group: str | None,
):
# 1. Explicit override from caller
if confirmed_pattern_id:
p = kb.pattern(confirmed_pattern_id)
if p:
return p
logger.warning(f"ManageSubtitles: unknown pattern '{confirmed_pattern_id}'")
# 2. Previously confirmed in metadata store
stored_id = store.confirmed_pattern()
if stored_id:
p = kb.pattern(stored_id)
if p:
logger.debug(f"ManageSubtitles: using confirmed pattern '{stored_id}'")
return p
# 3. Auto-detect
release_root = source_path.parent
detector = PatternDetector(kb)
result = detector.detect(release_root, source_path)
if result["detected"] and result["confidence"] >= 0.6:
logger.info(
f"ManageSubtitles: auto-detected pattern '{result['detected'].id}' "
f"(confidence={result['confidence']:.2f})"
)
return result["detected"]
# 4. Fallback — adjacent (safest default)
logger.info("ManageSubtitles: falling back to 'adjacent' pattern")
return kb.pattern("adjacent")
def _to_unresolved_dto(track: SubtitleTrack, min_confidence: float = 0.7) -> UnresolvedTrack:
reason = "unknown_language" if track.language is None else "low_confidence"
return UnresolvedTrack(
raw_tokens=track.raw_tokens,
file_path=str(track.file_path) if track.file_path else None,
file_size_kb=track.file_size_kb,
reason=reason,
)
def _pair_placed_with_tracks(
placed: list[PlacedTrack],
tracks: list[SubtitleTrack],
) -> list[tuple[PlacedTrack, SubtitleTrack]]:
"""
Pair each PlacedTrack with its originating SubtitleTrack by source path.
Falls back to positional matching if paths don't align.
"""
track_by_path = {t.file_path: t for t in tracks if t.file_path}
pairs = []
for p in placed:
track = track_by_path.get(p.source)
if track is None and tracks:
track = tracks[0] # positional fallback
if track:
pairs.append((p, track))
return pairs
@@ -0,0 +1,43 @@
"""Move media use case."""
import logging
from alfred.infrastructure.filesystem import FileManager
from .dto import MoveMediaResponse
logger = logging.getLogger(__name__)
class MoveMediaUseCase:
"""Use case for moving a media file to a destination (copy + delete source)."""
def __init__(self, file_manager: FileManager):
self.file_manager = file_manager
def execute(self, source: str, destination: str) -> MoveMediaResponse:
"""
Move a media file from source to destination.
Args:
source: Absolute path to the source file.
destination: Absolute path to the destination file.
Returns:
MoveMediaResponse with success or error information.
"""
result = self.file_manager.move_file(source, destination)
if result.get("status") == "ok":
return MoveMediaResponse(
status="ok",
source=result.get("source"),
destination=result.get("destination"),
filename=result.get("filename"),
size=result.get("size"),
)
return MoveMediaResponse(
status="error",
error=result.get("error"),
message=result.get("message"),
)
@@ -0,0 +1,246 @@
"""
ResolveDestinationUseCase — compute the library destination path for a release.
Steps:
1. Parse the release name
2. Look up TMDB for title + year (+ episode title if single episode)
3. Scan the library for an existing series folder
4. Apply group-conflict rules
5. Return the computed paths (or needs_clarification if ambiguous)
"""
from __future__ import annotations
import logging
import re
from dataclasses import dataclass, field
from pathlib import Path
from alfred.domain.media.release_parser import ParsedRelease, parse_release
from alfred.infrastructure.persistence import get_memory
logger = logging.getLogger(__name__)
# Characters forbidden on Windows filesystems (served via NFS)
_WIN_FORBIDDEN = re.compile(r'[?:*"<>|\\]')
def _sanitise(text: str) -> str:
return _WIN_FORBIDDEN.sub("", text)
# ---------------------------------------------------------------------------
# DTOs
# ---------------------------------------------------------------------------
@dataclass
class ResolvedDestination:
"""All computed paths for a release, ready to hand to move_media."""
status: str # "ok" | "needs_clarification" | "error"
# Populated on "ok"
library_file: str | None = None # absolute path of the destination video file
series_folder: str | None = None # absolute path of the series root folder
season_folder: str | None = None # absolute path of the season subfolder
series_folder_name: str | None = None # just the folder name (for display)
season_folder_name: str | None = None
filename: str | None = None
is_new_series_folder: bool = False # True if we're creating the folder
# Populated on "needs_clarification"
question: str | None = None
options: list[str] | None = None # existing group folder names to pick from
# Populated on "error"
error: str | None = None
message: str | None = None
def to_dict(self) -> dict:
if self.status == "error":
return {"status": self.status, "error": self.error, "message": self.message}
if self.status == "needs_clarification":
return {
"status": self.status,
"question": self.question,
"options": self.options or [],
}
return {
"status": self.status,
"library_file": self.library_file,
"series_folder": self.series_folder,
"season_folder": self.season_folder,
"series_folder_name": self.series_folder_name,
"season_folder_name": self.season_folder_name,
"filename": self.filename,
"is_new_series_folder": self.is_new_series_folder,
}
# ---------------------------------------------------------------------------
# Use case
# ---------------------------------------------------------------------------
class ResolveDestinationUseCase:
"""
Compute the full destination path for a media file being organised.
The caller provides:
- release_name: the raw release folder/file name
- source_file: path to the actual video file (to get extension)
- tmdb_title: canonical title from TMDB
- tmdb_year: release year from TMDB
- tmdb_episode_title: episode title from TMDB (None for movies / season packs)
- confirmed_folder: if the user already answered needs_clarification, pass
the chosen folder name here to skip the check
Returns a ResolvedDestination.
"""
def execute(
self,
release_name: str,
source_file: str,
tmdb_title: str,
tmdb_year: int,
tmdb_episode_title: str | None = None,
confirmed_folder: str | None = None,
) -> ResolvedDestination:
parsed = parse_release(release_name)
ext = Path(source_file).suffix # ".mkv"
if parsed.is_movie:
return self._resolve_movie(parsed, tmdb_title, tmdb_year, ext)
return self._resolve_tvshow(
parsed, tmdb_title, tmdb_year, tmdb_episode_title, ext, confirmed_folder
)
# ------------------------------------------------------------------
# Movie
# ------------------------------------------------------------------
def _resolve_movie(
self, parsed: ParsedRelease, tmdb_title: str, tmdb_year: int, ext: str
) -> ResolvedDestination:
memory = get_memory()
movies_root = memory.ltm.library_paths.get("movie")
if not movies_root:
return ResolvedDestination(
status="error",
error="library_not_set",
message="Movie library path is not configured.",
)
folder_name = _sanitise(parsed.movie_folder_name(tmdb_title, tmdb_year))
filename = _sanitise(parsed.movie_filename(tmdb_title, tmdb_year, ext))
folder_path = Path(movies_root) / folder_name
file_path = folder_path / filename
return ResolvedDestination(
status="ok",
library_file=str(file_path),
series_folder=str(folder_path),
series_folder_name=folder_name,
filename=filename,
is_new_series_folder=not folder_path.exists(),
)
# ------------------------------------------------------------------
# TV show
# ------------------------------------------------------------------
def _resolve_tvshow(
self,
parsed: ParsedRelease,
tmdb_title: str,
tmdb_year: int,
tmdb_episode_title: str | None,
ext: str,
confirmed_folder: str | None,
) -> ResolvedDestination:
memory = get_memory()
tv_root = memory.ltm.library_paths.get("tv_show")
if not tv_root:
return ResolvedDestination(
status="error",
error="library_not_set",
message="TV show library path is not configured.",
)
tv_root_path = Path(tv_root)
# --- Find existing series folders for this title ---
existing = _find_existing_series_folders(tv_root_path, tmdb_title, tmdb_year)
# --- Determine series folder name ---
if confirmed_folder:
series_folder_name = confirmed_folder
is_new = not (tv_root_path / confirmed_folder).exists()
elif len(existing) == 0:
# No existing folder — create with release group
series_folder_name = _sanitise(parsed.show_folder_name(tmdb_title, tmdb_year))
is_new = True
elif len(existing) == 1:
# Exactly one match — use it regardless of group
series_folder_name = existing[0]
is_new = False
else:
# Multiple folders — ask user
return ResolvedDestination(
status="needs_clarification",
question=(
f"Multiple folders found for '{tmdb_title}' in your library. "
f"Which one should I use for this release ({parsed.group})?"
),
options=existing,
)
# --- Build paths ---
season_folder_name = parsed.season_folder_name()
filename = _sanitise(
parsed.episode_filename(tmdb_episode_title, ext)
if not parsed.is_season_pack
else parsed.season_folder_name() + ext
)
series_path = tv_root_path / series_folder_name
season_path = series_path / season_folder_name
file_path = season_path / filename
return ResolvedDestination(
status="ok",
library_file=str(file_path),
series_folder=str(series_path),
season_folder=str(season_path),
series_folder_name=series_folder_name,
season_folder_name=season_folder_name,
filename=filename,
is_new_series_folder=is_new,
)
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
def _find_existing_series_folders(tv_root: Path, tmdb_title: str, tmdb_year: int) -> list[str]:
"""
Return names of folders in tv_root that match the given title + year.
Matching is loose: normalised title (dots, no special chars) + year must
appear at the start of the folder name.
"""
if not tv_root.exists():
return []
# Build a normalised prefix to match against: "Oz.1997"
clean_title = _sanitise(tmdb_title).replace(" ", ".")
prefix = f"{clean_title}.{tmdb_year}".lower()
matches = []
for entry in tv_root.iterdir():
if entry.is_dir() and entry.name.lower().startswith(prefix):
matches.append(entry.name)
return sorted(matches)
+5
View File
@@ -0,0 +1,5 @@
"""Media domain — shared naming and release parsing."""
from .release_parser import ParsedRelease, parse_release
__all__ = ["ParsedRelease", "parse_release"]
+306
View File
@@ -0,0 +1,306 @@
"""
release_parser.py — Parse a release name into structured components.
Handles both dot-separated and space-separated release names:
Oz.S03.1080p.WEBRip.x265-KONTRAST
Oz S03 1080p WEBRip x265-KONTRAST
Inception.2010.1080p.BluRay.x265-GROUP
"""
from __future__ import annotations
import re
from dataclasses import dataclass, field
# Known quality tokens
_QUALITIES = {"2160p", "1080p", "720p", "480p", "576p", "4k", "8k"}
# Known source tokens (case-insensitive match)
_SOURCES = {
"bluray", "blu-ray", "bdrip", "brrip",
"webrip", "web-rip", "webdl", "web-dl", "web",
"hdtv", "hdrip", "dvdrip", "dvd", "vodrip",
"amzn", "nf", "dsnp", "hmax", "atvp",
}
# Known codec tokens
_CODECS = {
"x264", "x265", "h264", "h265", "hevc", "avc",
"xvid", "divx", "av1", "vp9",
"h.264", "h.265",
}
# Windows-forbidden characters (we strip these from display names)
_WIN_FORBIDDEN = re.compile(r'[?:*"<>|\\]')
# Episode/season pattern: S01, S01E02, S01E02E03, 1x02, etc.
_SEASON_EP_RE = re.compile(
r"S(\d{1,2})(?:E(\d{2})(?:E(\d{2}))?)?",
re.IGNORECASE,
)
# Year pattern
_YEAR_RE = re.compile(r"\b(19\d{2}|20\d{2})\b")
@dataclass
class ParsedRelease:
"""Structured representation of a parsed release name."""
raw: str # original release name (untouched)
normalised: str # dots instead of spaces
title: str # show/movie title (dots, no year/season/tech)
year: int | None # movie year or show start year (from TMDB)
season: int | None # season number (None for movies)
episode: int | None # first episode number (None if season-pack)
episode_end: int | None # last episode for multi-ep (None otherwise)
quality: str | None # 1080p, 2160p, …
source: str | None # WEBRip, BluRay, …
codec: str | None # x265, HEVC, …
group: str # release group, "UNKNOWN" if missing
tech_string: str # quality.source.codec joined with dots
# -------------------------------------------------------------------------
# Derived helpers
# -------------------------------------------------------------------------
@property
def is_movie(self) -> bool:
return self.season is None
@property
def is_season_pack(self) -> bool:
return self.season is not None and self.episode is None
def show_folder_name(self, tmdb_title: str, tmdb_year: int) -> str:
"""
Build the series root folder name.
Format: {Title}.{Year}.{Tech}-{Group}
Example: Oz.1997.1080p.WEBRip.x265-KONTRAST
"""
title_part = _sanitise_for_fs(tmdb_title).replace(" ", ".")
tech = self.tech_string or "Unknown"
return f"{title_part}.{tmdb_year}.{tech}-{self.group}"
def season_folder_name(self) -> str:
"""
Build the season subfolder name = normalised release name (no episode).
Example: Oz.S03.1080p.WEBRip.x265-KONTRAST
For a single-episode release we still strip the episode token so the
folder can hold the whole season.
"""
return _strip_episode_from_normalised(self.normalised)
def episode_filename(self, tmdb_episode_title: str | None, ext: str) -> str:
"""
Build the episode filename.
Format: {Title}.{SxxExx}.{EpisodeTitle}.{Tech}-{Group}.{ext}
Example: Oz.S01E01.The.Routine.1080p.WEBRip.x265-KONTRAST.mkv
If tmdb_episode_title is None, omits the episode title segment.
"""
title_part = _sanitise_for_fs(self.title) # already dotted from normalised
s = f"S{self.season:02d}" if self.season is not None else ""
e = f"E{self.episode:02d}" if self.episode is not None else ""
se = s + e
ep_title = ""
if tmdb_episode_title:
ep_title = "." + _sanitise_for_fs(tmdb_episode_title).replace(" ", ".")
tech = self.tech_string or "Unknown"
ext_clean = ext.lstrip(".")
return f"{title_part}.{se}{ep_title}.{tech}-{self.group}.{ext_clean}"
def movie_folder_name(self, tmdb_title: str, tmdb_year: int) -> str:
"""
Build the movie folder name.
Format: {Title}.{Year}.{Tech}-{Group}
Example: Inception.2010.1080p.BluRay.x265-GROUP
"""
return self.show_folder_name(tmdb_title, tmdb_year)
def movie_filename(self, tmdb_title: str, tmdb_year: int, ext: str) -> str:
"""
Build the movie filename (same as folder name + extension).
Example: Inception.2010.1080p.BluRay.x265-GROUP.mkv
"""
ext_clean = ext.lstrip(".")
return f"{self.movie_folder_name(tmdb_title, tmdb_year)}.{ext_clean}"
# ---------------------------------------------------------------------------
# Public API
# ---------------------------------------------------------------------------
def parse_release(name: str) -> ParsedRelease:
"""
Parse a release name and return a ParsedRelease.
Accepts both dot-separated and space-separated names.
"""
normalised = _normalise(name)
tokens = normalised.split(".")
season, episode, episode_end = _extract_season_episode(tokens)
quality, source, codec, group, tech_tokens = _extract_tech(tokens)
title = _extract_title(tokens, season, episode, tech_tokens)
year = _extract_year(tokens, title)
tech_parts = [p for p in [quality, source, codec] if p]
tech_string = ".".join(tech_parts)
return ParsedRelease(
raw=name,
normalised=normalised,
title=title,
year=year,
season=season,
episode=episode,
episode_end=episode_end,
quality=quality,
source=source,
codec=codec,
group=group,
tech_string=tech_string,
)
# ---------------------------------------------------------------------------
# Internal helpers
# ---------------------------------------------------------------------------
def _normalise(name: str) -> str:
"""Replace spaces with dots, collapse multiple dots."""
s = name.replace(" ", ".")
s = re.sub(r"\.{2,}", ".", s)
return s.strip(".")
def _sanitise_for_fs(text: str) -> str:
"""Remove Windows-forbidden characters from a string."""
return _WIN_FORBIDDEN.sub("", text)
def _extract_season_episode(tokens: list[str]) -> tuple[int | None, int | None, int | None]:
joined = ".".join(tokens)
m = _SEASON_EP_RE.search(joined)
if not m:
return None, None, None
season = int(m.group(1))
episode = int(m.group(2)) if m.group(2) else None
episode_end = int(m.group(3)) if m.group(3) else None
return season, episode, episode_end
def _extract_tech(
tokens: list[str],
) -> tuple[str | None, str | None, str | None, str, set[str]]:
"""
Extract quality, source, codec, group from tokens.
Returns (quality, source, codec, group, tech_token_set).
Group extraction strategy (in priority order):
1. Token where prefix is a known codec: x265-GROUP
2. Last token in the list that contains a dash (fallback for 10bit-GROUP, AAC5.1-GROUP, etc.)
"""
quality: str | None = None
source: str | None = None
codec: str | None = None
group = "UNKNOWN"
tech_tokens: set[str] = set()
for tok in tokens:
tl = tok.lower()
if tl in _QUALITIES:
quality = tok
tech_tokens.add(tok)
continue
if tl in _SOURCES:
source = tok
tech_tokens.add(tok)
continue
if "-" in tok:
parts = tok.rsplit("-", 1)
# codec-GROUP (highest priority for group)
if parts[0].lower() in _CODECS:
codec = parts[0]
group = parts[1] if parts[1] else "UNKNOWN"
tech_tokens.add(tok)
continue
# source with dash: Web-DL, WEB-DL, etc.
if parts[0].lower() in _SOURCES or tok.lower().replace("-", "") in _SOURCES:
source = tok
tech_tokens.add(tok)
continue
if tl in _CODECS:
codec = tok
tech_tokens.add(tok)
# Fallback: if group still UNKNOWN, use the rightmost token with a dash
# that isn't a known source (handles "10bit-Protozoan", "AAC5.1-YTS", etc.)
if group == "UNKNOWN":
for tok in reversed(tokens):
if "-" in tok:
parts = tok.rsplit("-", 1)
tl = tok.lower()
if tl in _SOURCES or tok.lower().replace("-", "") in _SOURCES:
continue
if parts[1]: # non-empty group part
group = parts[1]
break
return quality, source, codec, group, tech_tokens
def _extract_title(tokens: list[str], season: int | None, episode: int | None, tech_tokens: set[str]) -> str:
"""
Extract the title portion: everything before the first season/year/tech token.
"""
title_parts = []
for tok in tokens:
# Stop at season token
if _SEASON_EP_RE.match(tok):
break
# Stop at year
if _YEAR_RE.fullmatch(tok):
break
# Stop at tech tokens
if tok in tech_tokens or tok.lower() in _QUALITIES | _SOURCES | _CODECS:
break
# Stop if token contains a dash (likely codec-GROUP)
if "-" in tok and any(p.lower() in _CODECS | _SOURCES for p in tok.split("-")):
break
title_parts.append(tok)
return ".".join(title_parts) if title_parts else tokens[0]
def _extract_year(tokens: list[str], title: str) -> int | None:
"""Extract a 4-digit year from tokens (only after the title)."""
title_len = len(title.split("."))
for tok in tokens[title_len:]:
m = _YEAR_RE.fullmatch(tok)
if m:
return int(m.group(1))
return None
def _strip_episode_from_normalised(normalised: str) -> str:
"""
Remove all episode parts (Exx) from a normalised release name, keeping Sxx.
Oz.S03E01.1080p... → Oz.S03.1080p...
Archer.S14E09E10E11.1080p... → Archer.S14.1080p...
"""
return re.sub(r"(S\d{2})(E\d{2})+", r"\1", normalised, flags=re.IGNORECASE)
+30 -7
View File
@@ -1,14 +1,37 @@
"""Subtitles domain - Business logic for subtitle management (shared across movies and TV shows).""" """Subtitles domain — subtitle identification, classification and placement."""
from .entities import Subtitle from .aggregates import SubtitleRuleSet
from .entities import MediaSubtitleMetadata, SubtitleTrack
from .exceptions import SubtitleNotFound from .exceptions import SubtitleNotFound
from .services import SubtitleService from .knowledge import KnowledgeLoader, SubtitleKnowledgeBase
from .value_objects import Language, SubtitleFormat from .services import PatternDetector, SubtitleIdentifier, SubtitleMatcher
from .value_objects import (
RuleScope,
ScanStrategy,
SubtitleFormat,
SubtitleLanguage,
SubtitleMatchingRules,
SubtitlePattern,
SubtitleType,
TypeDetectionMethod,
)
__all__ = [ __all__ = [
"Subtitle", "SubtitleTrack",
"Language", "MediaSubtitleMetadata",
"SubtitleRuleSet",
"SubtitleKnowledgeBase",
"KnowledgeLoader",
"SubtitleIdentifier",
"SubtitleMatcher",
"PatternDetector",
"SubtitleFormat", "SubtitleFormat",
"SubtitleLanguage",
"SubtitlePattern",
"SubtitleType",
"ScanStrategy",
"TypeDetectionMethod",
"SubtitleMatchingRules",
"RuleScope",
"SubtitleNotFound", "SubtitleNotFound",
"SubtitleService",
] ]
+90
View File
@@ -0,0 +1,90 @@
"""Subtitle domain aggregates."""
from dataclasses import dataclass, field
from typing import Any
from ..shared.value_objects import ImdbId
from .knowledge.base import SubtitleKnowledgeBase
from .value_objects import RuleScope, SubtitleMatchingRules
def DEFAULT_RULES() -> SubtitleMatchingRules:
"""Load default matching rules from subtitles.yaml (defaults section)."""
return SubtitleKnowledgeBase().default_rules()
@dataclass
class SubtitleRuleSet:
"""
Rules for subtitle selection at a given scope level, with inheritance.
Only delta fields are stored — None means "inherit from parent".
Resolution order: global → release_group → show/movie → season → episode.
A RuleSet can also be pinned to a specific media item (imdb_id),
bypassing the scope hierarchy for that item.
"""
scope: RuleScope
parent: "SubtitleRuleSet | None" = None
pinned_to: ImdbId | None = None
# Deltas — None = inherit
_languages: list[str] | None = field(default=None, repr=False)
_formats: list[str] | None = field(default=None, repr=False)
_types: list[str] | None = field(default=None, repr=False)
_format_priority: list[str] | None = field(default=None, repr=False)
_min_confidence: float | None = field(default=None, repr=False)
def resolve(self) -> SubtitleMatchingRules:
"""
Walk the parent chain and merge deltas into effective rules.
Falls back to DEFAULT_RULES at the top of the chain.
"""
base = self.parent.resolve() if self.parent else DEFAULT_RULES()
return SubtitleMatchingRules(
preferred_languages=self._languages or base.preferred_languages,
preferred_formats=self._formats or base.preferred_formats,
allowed_types=self._types or base.allowed_types,
format_priority=self._format_priority or base.format_priority,
min_confidence=self._min_confidence if self._min_confidence is not None else base.min_confidence,
)
def override(
self,
languages: list[str] | None = None,
formats: list[str] | None = None,
types: list[str] | None = None,
format_priority: list[str] | None = None,
min_confidence: float | None = None,
) -> None:
"""Set delta overrides at this scope level."""
if languages is not None:
self._languages = languages
if formats is not None:
self._formats = formats
if types is not None:
self._types = types
if format_priority is not None:
self._format_priority = format_priority
if min_confidence is not None:
self._min_confidence = min_confidence
def to_dict(self) -> dict:
"""Serialize deltas only (for persistence in rules.yaml)."""
delta: dict[str, Any] = {}
if self._languages is not None:
delta["languages"] = self._languages
if self._formats is not None:
delta["formats"] = self._formats
if self._types is not None:
delta["types"] = self._types
if self._format_priority is not None:
delta["format_priority"] = self._format_priority
if self._min_confidence is not None:
delta["min_confidence"] = self._min_confidence
return {"scope": {"level": self.scope.level, "identifier": self.scope.identifier}, "override": delta}
@classmethod
def global_default(cls) -> "SubtitleRuleSet":
return cls(scope=RuleScope(level="global"))
+68 -77
View File
@@ -1,96 +1,87 @@
"""Subtitle domain entities.""" """Subtitle domain entities."""
from dataclasses import dataclass from dataclasses import dataclass, field
from pathlib import Path
from ..shared.value_objects import FilePath, ImdbId from ..shared.value_objects import ImdbId
from .value_objects import Language, SubtitleFormat, TimingOffset from .value_objects import SubtitleFormat, SubtitleLanguage, SubtitleMatchingRules, SubtitleType
@dataclass @dataclass
class Subtitle: class SubtitleTrack:
""" """
Subtitle entity representing a subtitle file. A single subtitle track — either an external file or an embedded stream.
Can be associated with either a movie or a TV show episode. State can evolve: unknown → resolved after user clarification.
confidence reflects how certain we are about language + type classification.
""" """
media_imdb_id: ImdbId # Classification (may be None if not yet resolved)
language: Language language: SubtitleLanguage | None
format: SubtitleFormat format: SubtitleFormat | None
file_path: FilePath subtitle_type: SubtitleType = SubtitleType.UNKNOWN
# Optional: for TV shows # Source
season_number: int | None = None is_embedded: bool = False
episode_number: int | None = None file_path: Path | None = None # None if embedded
file_size_kb: float | None = None
entry_count: int | None = None # number of subtitle cues in the file
# Subtitle metadata # Matching state
timing_offset: TimingOffset = TimingOffset(0) confidence: float = 0.0 # 0.0 → 1.0, not applicable for embedded
hearing_impaired: bool = False raw_tokens: list[str] = field(default_factory=list) # tokens extracted from filename
forced: bool = False # Forced subtitles (for foreign language parts)
# Source information def is_resolved(self) -> bool:
source: str | None = None # e.g., "OpenSubtitles", "Subscene" return self.language is not None
uploader: str | None = None
download_count: int | None = None
rating: float | None = None
def __post_init__(self): @property
"""Validate subtitle entity.""" def destination_name(self) -> str:
# Ensure ImdbId is actually an ImdbId instance
if not isinstance(self.media_imdb_id, ImdbId):
if isinstance(self.media_imdb_id, str):
object.__setattr__(self, "media_imdb_id", ImdbId(self.media_imdb_id))
# Ensure Language is actually a Language instance
if not isinstance(self.language, Language):
if isinstance(self.language, str):
object.__setattr__(self, "language", Language.from_code(self.language))
# Ensure SubtitleFormat is actually a SubtitleFormat instance
if not isinstance(self.format, SubtitleFormat):
if isinstance(self.format, str):
object.__setattr__(
self, "format", SubtitleFormat.from_extension(self.format)
)
# Ensure FilePath is actually a FilePath instance
if not isinstance(self.file_path, FilePath):
object.__setattr__(self, "file_path", FilePath(self.file_path))
def is_for_movie(self) -> bool:
"""Check if this subtitle is for a movie."""
return self.season_number is None and self.episode_number is None
def is_for_episode(self) -> bool:
"""Check if this subtitle is for a TV show episode."""
return self.season_number is not None and self.episode_number is not None
def get_filename(self) -> str:
""" """
Get the suggested filename for this subtitle. Compute the output filename per naming convention:
{lang}.{ext}
Format for movies: "Movie.Title.{lang}.{format}" {lang}.sdh.{ext}
Format for episodes: "S01E05.{lang}.{format}" {lang}.forced.{ext}
""" """
if self.is_for_episode(): if not self.language or not self.format:
base = f"S{self.season_number:02d}E{self.episode_number:02d}" raise ValueError("Cannot compute destination_name: language or format missing")
else: ext = self.format.extensions[0].lstrip(".")
# For movies, use the file path stem parts = [self.language.code]
base = self.file_path.value.stem if self.subtitle_type == SubtitleType.SDH:
parts.append("sdh")
parts = [base, self.language.value] elif self.subtitle_type == SubtitleType.FORCED:
if self.hearing_impaired:
parts.append("hi")
if self.forced:
parts.append("forced") parts.append("forced")
return ".".join(parts) + "." + ext
return f"{'.'.join(parts)}.{self.format.value}"
def __str__(self) -> str:
if self.is_for_episode():
return f"Subtitle S{self.season_number:02d}E{self.episode_number:02d} ({self.language.value})"
return f"Subtitle ({self.language.value})"
def __repr__(self) -> str: def __repr__(self) -> str:
return f"Subtitle(media={self.media_imdb_id}, lang={self.language.value})" lang = self.language.code if self.language else "?"
fmt = self.format.id if self.format else "?"
src = "embedded" if self.is_embedded else str(self.file_path.name if self.file_path else "?")
return f"SubtitleTrack({lang}, {self.subtitle_type.value}, {fmt}, src={src}, conf={self.confidence:.2f})"
@dataclass
class MediaSubtitleMetadata:
"""
Snapshot of all subtitle information known for a given media item.
Populated by the identifier service (ffprobe + filesystem scan).
"""
media_id: ImdbId | None
media_type: str # "movie" | "tv_show"
embedded_tracks: list[SubtitleTrack] = field(default_factory=list)
external_tracks: list[SubtitleTrack] = field(default_factory=list)
release_group: str | None = None
detected_pattern_id: str | None = None # pattern id from knowledge base
pattern_confirmed: bool = False
@property
def all_tracks(self) -> list[SubtitleTrack]:
return self.embedded_tracks + self.external_tracks
@property
def total_count(self) -> int:
return len(self.embedded_tracks) + len(self.external_tracks)
@property
def unresolved_tracks(self) -> list[SubtitleTrack]:
return [t for t in self.external_tracks if t.language is None]
@@ -0,0 +1,4 @@
from .base import SubtitleKnowledgeBase
from .loader import KnowledgeLoader
__all__ = ["SubtitleKnowledgeBase", "KnowledgeLoader"]
+151
View File
@@ -0,0 +1,151 @@
"""SubtitleKnowledgeBase — parsed, typed view of the loaded knowledge."""
import logging
from functools import cached_property
from ..value_objects import (
ScanStrategy,
SubtitleFormat,
SubtitleLanguage,
SubtitleMatchingRules,
SubtitlePattern,
SubtitleType,
TypeDetectionMethod,
)
from .loader import KnowledgeLoader
logger = logging.getLogger(__name__)
class SubtitleKnowledgeBase:
"""
Typed access to subtitle knowledge (formats, types, languages, patterns).
Built from KnowledgeLoader — call kb.reload() to pick up newly learned entries
without restarting.
"""
def __init__(self, loader: KnowledgeLoader | None = None):
self._loader = loader or KnowledgeLoader()
self._build()
def _build(self) -> None:
data = self._loader.subtitles()
self._formats: dict[str, SubtitleFormat] = {}
for fid, fdata in data.get("formats", {}).items():
self._formats[fid] = SubtitleFormat(
id=fid,
extensions=fdata.get("extensions", []),
description=fdata.get("description", ""),
)
self._languages: dict[str, SubtitleLanguage] = {}
for code, ldata in data.get("languages", {}).items():
self._languages[code] = SubtitleLanguage(
code=code,
tokens=ldata.get("tokens", []),
)
# Build reverse token → language code map
self._lang_token_map: dict[str, str] = {}
for code, lang in self._languages.items():
for token in lang.tokens:
self._lang_token_map[token.lower()] = code
# Build reverse token → type map
self._type_token_map: dict[str, SubtitleType] = {}
for type_id, tdata in data.get("types", {}).items():
stype = SubtitleType(type_id)
for token in tdata.get("tokens", []):
self._type_token_map[token.lower()] = stype
d = data.get("defaults", {})
self._default_rules = SubtitleMatchingRules(
preferred_languages=d.get("languages", ["fra", "eng"]),
preferred_formats=d.get("formats", ["srt"]),
allowed_types=d.get("types", ["standard", "forced"]),
format_priority=d.get("format_priority", ["srt", "ass"]),
min_confidence=d.get("min_confidence", 0.7),
)
self._patterns: dict[str, SubtitlePattern] = {}
for pid, pdata in self._loader.patterns().items():
try:
self._patterns[pid] = SubtitlePattern(
id=pid,
description=pdata.get("description", ""),
scan_strategy=ScanStrategy(pdata.get("scan_strategy", "adjacent")),
root_folder=pdata.get("root_folder"),
type_detection=TypeDetectionMethod(
pdata.get("type_detection", {}).get("method", "token_in_name")
),
version=pdata.get("version", "1.0"),
)
except ValueError as e:
logger.warning(f"SubtitleKnowledgeBase: skipping pattern '{pid}': {e}")
def reload(self) -> None:
self._loader = KnowledgeLoader()
self._build()
logger.info("SubtitleKnowledgeBase: reloaded")
# --- Defaults ---
def default_rules(self) -> SubtitleMatchingRules:
return self._default_rules
# --- Formats ---
def formats(self) -> dict[str, SubtitleFormat]:
return self._formats
def format_for_extension(self, ext: str) -> SubtitleFormat | None:
for fmt in self._formats.values():
if fmt.matches_extension(ext):
return fmt
return None
def known_extensions(self) -> set[str]:
exts = set()
for fmt in self._formats.values():
exts.update(fmt.extensions)
return exts
# --- Languages ---
def languages(self) -> dict[str, SubtitleLanguage]:
return self._languages
def language_for_token(self, token: str) -> SubtitleLanguage | None:
code = self._lang_token_map.get(token.lower())
return self._languages.get(code) if code else None
def is_known_lang_token(self, token: str) -> bool:
return token.lower() in self._lang_token_map
# --- Types ---
def type_for_token(self, token: str) -> SubtitleType | None:
return self._type_token_map.get(token.lower())
def is_known_type_token(self, token: str) -> bool:
return token.lower() in self._type_token_map
# --- Patterns ---
def patterns(self) -> dict[str, SubtitlePattern]:
return self._patterns
def pattern(self, pattern_id: str) -> SubtitlePattern | None:
return self._patterns.get(pattern_id)
def patterns_for_group(self, group_name: str) -> list[SubtitlePattern]:
group = self._loader.release_group(group_name)
if not group:
return []
return [
self._patterns[pid]
for pid in group.get("known_patterns", [])
if pid in self._patterns
]
+131
View File
@@ -0,0 +1,131 @@
"""KnowledgeLoader — autodiscovers and merges builtin + learned YAML knowledge packs."""
import logging
from pathlib import Path
import yaml
logger = logging.getLogger(__name__)
import alfred as _alfred_pkg
# Builtin knowledge — anchored on the alfred package itself, not on this file's depth
_BUILTIN_ROOT = Path(_alfred_pkg.__file__).parent / "knowledge"
# Learned knowledge — local to this instance, gitignored
_LEARNED_ROOT = Path(_alfred_pkg.__file__).parent.parent / "data" / "knowledge"
def _load_yaml(path: Path) -> dict:
try:
with open(path, encoding="utf-8") as f:
return yaml.safe_load(f) or {}
except FileNotFoundError:
return {}
except Exception as e:
logger.warning(f"KnowledgeLoader: could not load {path}: {e}")
return {}
def _merge(base: dict, override: dict) -> dict:
"""
Deep merge override into base.
Lists are extended (not replaced) — learned tokens are additive.
Scalar values in override win over base.
"""
result = dict(base)
for key, val in override.items():
if key in result and isinstance(result[key], dict) and isinstance(val, dict):
result[key] = _merge(result[key], val)
elif key in result and isinstance(result[key], list) and isinstance(val, list):
# Extend list, deduplicate, preserve order
combined = result[key] + [v for v in val if v not in result[key]]
result[key] = combined
else:
result[key] = val
return result
class KnowledgeLoader:
"""
Loads subtitle knowledge from YAML files.
Builtin packs live in alfred/knowledge/ (versioned).
Learned packs live in data/knowledge/ (gitignored, instance-local).
Learned entries are merged additively — they can only add tokens/patterns,
never remove builtin ones.
Usage:
loader = KnowledgeLoader()
subtitles = loader.subtitles() # merged subtitles.yaml
patterns = loader.patterns() # all patterns, keyed by id
groups = loader.release_groups() # all release groups, keyed by name
"""
def __init__(self):
self._cache: dict[str, dict] = {}
self._load()
def _load(self) -> None:
# Main subtitles knowledge
builtin = _load_yaml(_BUILTIN_ROOT / "subtitles.yaml")
learned = _load_yaml(_LEARNED_ROOT / "subtitles_learned.yaml")
self._cache["subtitles"] = _merge(builtin, learned)
# Patterns
self._cache["patterns"] = {}
for path in sorted((_BUILTIN_ROOT / "patterns").glob("*.yaml")):
data = _load_yaml(path)
pid = data.get("id", path.stem)
self._cache["patterns"][pid] = data
for path in sorted((_LEARNED_ROOT / "patterns").glob("*.yaml")):
data = _load_yaml(path)
pid = data.get("id", path.stem)
if pid in self._cache["patterns"]:
self._cache["patterns"][pid] = _merge(self._cache["patterns"][pid], data)
else:
self._cache["patterns"][pid] = data
logger.info(f"KnowledgeLoader: learned new pattern '{pid}'")
# Release groups
self._cache["release_groups"] = {}
for path in sorted((_BUILTIN_ROOT / "release_groups").glob("*.yaml")):
data = _load_yaml(path)
name = data.get("name", path.stem)
self._cache["release_groups"][name] = data
for path in sorted((_LEARNED_ROOT / "release_groups").glob("*.yaml")):
data = _load_yaml(path)
name = data.get("name", path.stem)
if name in self._cache["release_groups"]:
self._cache["release_groups"][name] = _merge(self._cache["release_groups"][name], data)
else:
self._cache["release_groups"][name] = data
logger.info(f"KnowledgeLoader: learned new release group '{name}'")
logger.info(
f"KnowledgeLoader: {len(self._cache['patterns'])} patterns, "
f"{len(self._cache['release_groups'])} release groups loaded"
)
def subtitles(self) -> dict:
return self._cache["subtitles"]
def patterns(self) -> dict[str, dict]:
return self._cache["patterns"]
def pattern(self, pattern_id: str) -> dict | None:
return self._cache["patterns"].get(pattern_id)
def release_groups(self) -> dict[str, dict]:
return self._cache["release_groups"]
def release_group(self, name: str) -> dict | None:
"""Case-insensitive lookup."""
name_lower = name.lower()
for key, val in self._cache["release_groups"].items():
if key.lower() == name_lower:
return val
return None
+221
View File
@@ -0,0 +1,221 @@
"""SubtitleScanner — inspects local subtitle files and filters them per user preferences.
Given a video file path, the scanner:
1. Looks for subtitle files in the same directory as the video.
2. Optionally also inspects a Subs/ subfolder adjacent to the video.
3. Classifies each file (language, SDH, forced) from its filename.
4. Filters according to SubtitlePreferences (languages, min_size_kb, keep_sdh, keep_forced).
5. Returns a list of SubtitleCandidate — one per file that passes the filter,
with the destination filename already computed.
Filename classification heuristics
-----------------------------------
We parse the stem of each subtitle file looking for known patterns:
fr.srt → lang=fr, sdh=False, forced=False
fr.sdh.srt → lang=fr, sdh=True
fr.hi.srt → lang=fr, sdh=True (hi = hearing-impaired, alias for sdh)
fr.forced.srt → lang=fr, forced=True
Breaking.Bad.S01E01.French.srt → lang=fr (keyword match)
Breaking.Bad.S01E01.VOSTFR.srt → lang=fr (VOSTFR = French forced/foreign subs)
Output naming convention (matches SubtitlePreferences docstring):
{lang}.srt
{lang}.sdh.srt
{lang}.forced.srt
"""
import logging
from dataclasses import dataclass, field
from pathlib import Path
logger = logging.getLogger(__name__)
# Subtitle file extensions we handle
SUBTITLE_EXTENSIONS = {".srt", ".ass", ".ssa", ".vtt", ".sub"}
# Language keyword map: lowercase token → ISO 639-1 code
_LANG_KEYWORDS: dict[str, str] = {
# French
"fr": "fr",
"fra": "fr",
"french": "fr",
"francais": "fr",
"français": "fr",
"vf": "fr",
"vff": "fr",
"vostfr": "fr",
# English
"en": "en",
"eng": "en",
"english": "en",
# Spanish
"es": "es",
"spa": "es",
"spanish": "es",
"espanol": "es",
# German
"de": "de",
"deu": "de",
"ger": "de",
"german": "de",
# Italian
"it": "it",
"ita": "it",
"italian": "it",
# Portuguese
"pt": "pt",
"por": "pt",
"portuguese": "pt",
# Dutch
"nl": "nl",
"nld": "nl",
"dutch": "nl",
# Japanese
"ja": "ja",
"jpn": "ja",
"japanese": "ja",
}
# Tokens that indicate SDH / hearing-impaired
_SDH_TOKENS = {"sdh", "hi", "hearing", "impaired", "cc", "closedcaption"}
# Tokens that indicate forced subtitles
_FORCED_TOKENS = {"forced", "foreign"}
@dataclass
class SubtitleCandidate:
"""A subtitle file that passed the filter, ready to be placed."""
source_path: Path
language: str # ISO 639-1 code, e.g. "fr"
is_sdh: bool
is_forced: bool
extension: str # e.g. ".srt"
@property
def destination_name(self) -> str:
"""
Compute the destination filename per naming convention:
{lang}.srt
{lang}.sdh.srt
{lang}.forced.srt
"""
ext = self.extension.lstrip(".")
parts = [self.language]
if self.is_sdh:
parts.append("sdh")
elif self.is_forced:
parts.append("forced")
return ".".join(parts) + "." + ext
def _classify(path: Path) -> tuple[str | None, bool, bool]:
"""
Parse a subtitle filename and return (language_code, is_sdh, is_forced).
Returns (None, False, False) if the language cannot be determined.
"""
stem = path.stem.lower()
# Split on dots, spaces, underscores, hyphens
import re
tokens = re.split(r"[\.\s_\-]+", stem)
language: str | None = None
is_sdh = False
is_forced = False
for token in tokens:
if token in _LANG_KEYWORDS:
language = _LANG_KEYWORDS[token]
if token in _SDH_TOKENS:
is_sdh = True
if token in _FORCED_TOKENS:
is_forced = True
return language, is_sdh, is_forced
class SubtitleScanner:
"""
Scans subtitle files next to a video and filters them per SubtitlePreferences.
Usage:
scanner = SubtitleScanner(prefs)
candidates = scanner.scan(video_path)
# Each candidate has .source_path and .destination_name
"""
def __init__(self, languages: list[str], min_size_kb: int, keep_sdh: bool, keep_forced: bool):
self.languages = [l.lower() for l in languages]
self.min_size_kb = min_size_kb
self.keep_sdh = keep_sdh
self.keep_forced = keep_forced
def scan(self, video_path: Path) -> list[SubtitleCandidate]:
"""
Return all subtitle candidates found next to the video that pass the filter.
Scans:
- Same directory as the video (flat siblings)
- Subs/ subfolder if present
"""
candidates: list[SubtitleCandidate] = []
search_dirs = [video_path.parent]
subs_dir = video_path.parent / "Subs"
if subs_dir.is_dir():
search_dirs.append(subs_dir)
logger.debug(f"SubtitleScanner: found Subs/ folder at {subs_dir}")
for directory in search_dirs:
for path in sorted(directory.iterdir()):
if not path.is_file():
continue
if path.suffix.lower() not in SUBTITLE_EXTENSIONS:
continue
candidate = self._evaluate(path)
if candidate is not None:
candidates.append(candidate)
logger.info(f"SubtitleScanner: {len(candidates)} candidate(s) found for {video_path.name}")
return candidates
def _evaluate(self, path: Path) -> SubtitleCandidate | None:
"""Apply all filters to a single subtitle file. Returns None if it should be dropped."""
# Size filter
size_kb = path.stat().st_size / 1024
if size_kb < self.min_size_kb:
logger.debug(f"SubtitleScanner: skip {path.name} (too small: {size_kb:.1f} KB)")
return None
language, is_sdh, is_forced = _classify(path)
# Language filter
if language is None:
logger.debug(f"SubtitleScanner: skip {path.name} (language unknown)")
return None
if language not in self.languages:
logger.debug(f"SubtitleScanner: skip {path.name} (language '{language}' not in prefs)")
return None
# SDH filter
if is_sdh and not self.keep_sdh:
logger.debug(f"SubtitleScanner: skip {path.name} (SDH not wanted)")
return None
# Forced filter
if is_forced and not self.keep_forced:
logger.debug(f"SubtitleScanner: skip {path.name} (forced not wanted)")
return None
return SubtitleCandidate(
source_path=path,
language=language,
is_sdh=is_sdh,
is_forced=is_forced,
extension=path.suffix.lower(),
)
@@ -0,0 +1,13 @@
from .identifier import SubtitleIdentifier
from .matcher import SubtitleMatcher
from .pattern_detector import PatternDetector
from .placer import PlacedTrack, PlaceResult, SubtitlePlacer
__all__ = [
"SubtitleIdentifier",
"SubtitleMatcher",
"PatternDetector",
"SubtitlePlacer",
"PlacedTrack",
"PlaceResult",
]
@@ -0,0 +1,287 @@
"""SubtitleIdentifier — finds and classifies all subtitle tracks for a video file."""
import logging
import re
import subprocess
import json
from pathlib import Path
from ...shared.value_objects import ImdbId
from ..entities import MediaSubtitleMetadata, SubtitleTrack
from ..knowledge.base import SubtitleKnowledgeBase
from ..value_objects import ScanStrategy, SubtitlePattern, SubtitleType
logger = logging.getLogger(__name__)
def _tokenize(name: str) -> list[str]:
"""Split a filename stem into lowercase tokens."""
return [t.lower() for t in re.split(r"[\.\s_\-]+", name) if t]
def _count_entries(path: Path) -> int:
"""Return the entry count of an SRT file by finding the last cue number."""
try:
with open(path, encoding="utf-8", errors="replace") as f:
lines = f.read().splitlines()
for line in reversed(lines):
if line.strip().isdigit():
return int(line.strip())
return 0
except Exception:
return 0
class SubtitleIdentifier:
"""
Finds all subtitle tracks for a given video file using a known pattern,
then attempts to classify each track (language, type, format).
Returns a MediaSubtitleMetadata with embedded + external tracks.
External tracks with unknown language or low confidence are left as-is —
the caller (use case) decides whether to ask the user for clarification.
"""
def __init__(self, kb: SubtitleKnowledgeBase):
self.kb = kb
def identify(
self,
video_path: Path,
pattern: SubtitlePattern,
media_id: ImdbId | None,
media_type: str,
release_group: str | None = None,
) -> MediaSubtitleMetadata:
metadata = MediaSubtitleMetadata(
media_id=media_id,
media_type=media_type,
release_group=release_group,
detected_pattern_id=pattern.id,
)
if pattern.scan_strategy == ScanStrategy.EMBEDDED:
metadata.embedded_tracks = self._scan_embedded(video_path)
else:
metadata.external_tracks = self._scan_external(video_path, pattern)
# Always also check for embedded tracks
metadata.embedded_tracks = self._scan_embedded(video_path)
return metadata
# ------------------------------------------------------------------
# Embedded tracks — ffprobe
# ------------------------------------------------------------------
def _scan_embedded(self, video_path: Path) -> list[SubtitleTrack]:
if not video_path.exists():
return []
try:
result = subprocess.run(
[
"ffprobe", "-v", "quiet",
"-print_format", "json",
"-show_streams",
"-select_streams", "s",
str(video_path),
],
capture_output=True, text=True, timeout=30,
)
data = json.loads(result.stdout)
except (subprocess.TimeoutExpired, json.JSONDecodeError, FileNotFoundError) as e:
logger.debug(f"SubtitleIdentifier: ffprobe failed for {video_path.name}: {e}")
return []
tracks = []
for stream in data.get("streams", []):
tags = stream.get("tags", {})
disposition = stream.get("disposition", {})
lang_code = tags.get("language", "")
title = tags.get("title", "")
lang = self.kb.language_for_token(lang_code) if lang_code else None
if disposition.get("hearing_impaired"):
stype = SubtitleType.SDH
elif disposition.get("forced"):
stype = SubtitleType.FORCED
else:
stype = SubtitleType.STANDARD
tracks.append(SubtitleTrack(
language=lang,
format=None,
subtitle_type=stype,
is_embedded=True,
raw_tokens=[lang_code] if lang_code else [],
))
logger.debug(f"SubtitleIdentifier: {len(tracks)} embedded track(s) in {video_path.name}")
return tracks
# ------------------------------------------------------------------
# External tracks — filesystem scan per pattern strategy
# ------------------------------------------------------------------
def _scan_external(self, video_path: Path, pattern: SubtitlePattern) -> list[SubtitleTrack]:
strategy = pattern.scan_strategy
if strategy == ScanStrategy.ADJACENT:
candidates = self._find_adjacent(video_path)
elif strategy == ScanStrategy.FLAT:
candidates = self._find_flat(video_path, pattern.root_folder or "Subs")
elif strategy == ScanStrategy.EPISODE_SUBFOLDER:
candidates = self._find_episode_subfolder(video_path, pattern.root_folder or "Subs")
else:
return []
return self._classify_files(candidates, pattern)
def _find_adjacent(self, video_path: Path) -> list[Path]:
return [
p for p in sorted(video_path.parent.iterdir())
if p.is_file() and p.suffix.lower() in self.kb.known_extensions()
and p.stem != video_path.stem
]
def _find_flat(self, video_path: Path, root_folder: str) -> list[Path]:
subs_dir = video_path.parent / root_folder
if not subs_dir.is_dir():
# Also look at release root (one level up)
subs_dir = video_path.parent.parent / root_folder
if not subs_dir.is_dir():
return []
return [
p for p in sorted(subs_dir.iterdir())
if p.is_file() and p.suffix.lower() in self.kb.known_extensions()
]
def _find_episode_subfolder(self, video_path: Path, root_folder: str) -> list[Path]:
"""
Look for Subs/{episode_stem}/*.srt
Checks two locations:
1. Adjacent to the video: video_path.parent / root_folder / video_path.stem
2. Release root (one level up): video_path.parent.parent / root_folder / video_path.stem
"""
episode_stem = video_path.stem
candidates_dirs = [
video_path.parent / root_folder / episode_stem,
video_path.parent.parent / root_folder / episode_stem,
]
for subs_dir in candidates_dirs:
if subs_dir.is_dir():
files = [
p for p in sorted(subs_dir.iterdir())
if p.is_file() and p.suffix.lower() in self.kb.known_extensions()
]
if files:
logger.debug(f"SubtitleIdentifier: found {len(files)} file(s) in {subs_dir}")
return files
return []
# ------------------------------------------------------------------
# Classification
# ------------------------------------------------------------------
def _classify_files(self, paths: list[Path], pattern: SubtitlePattern) -> list[SubtitleTrack]:
tracks = []
for path in paths:
track = self._classify_single(path)
tracks.append(track)
# Post-process: if multiple tracks share same language but type is ambiguous,
# apply size_and_count disambiguation
if pattern.type_detection.value == "size_and_count":
tracks = self._disambiguate_by_size(tracks)
return tracks
def _classify_single(self, path: Path) -> SubtitleTrack:
fmt = self.kb.format_for_extension(path.suffix)
tokens = _tokenize(path.stem)
language = None
subtitle_type = SubtitleType.UNKNOWN
unknown_tokens = []
matched_tokens = 0
for token in tokens:
if self.kb.is_known_lang_token(token):
language = self.kb.language_for_token(token)
matched_tokens += 1
elif self.kb.is_known_type_token(token):
subtitle_type = self.kb.type_for_token(token) or subtitle_type
matched_tokens += 1
elif token.isdigit():
pass # numeric prefix — ignore
elif len(token) > 1:
unknown_tokens.append(token)
# Confidence: proportion of meaningful tokens that were recognized
meaningful = [t for t in tokens if not t.isdigit() and len(t) > 1]
confidence = matched_tokens / max(len(meaningful), 1) if meaningful else 0.5
if unknown_tokens:
logger.debug(
f"SubtitleIdentifier: unknown tokens in '{path.name}': {unknown_tokens}"
)
size_kb = path.stat().st_size / 1024 if path.exists() else None
entry_count = _count_entries(path) if path.exists() else None
return SubtitleTrack(
language=language,
format=fmt,
subtitle_type=subtitle_type,
is_embedded=False,
file_path=path,
file_size_kb=size_kb,
entry_count=entry_count,
confidence=confidence,
raw_tokens=tokens,
)
def _disambiguate_by_size(self, tracks: list[SubtitleTrack]) -> list[SubtitleTrack]:
"""
When multiple tracks share the same language and type is UNKNOWN/STANDARD,
the one with the most entries (lines) is SDH, the smallest is FORCED if
there are 3+, otherwise the smaller is STANDARD.
Only applied when type_detection = size_and_count.
"""
from itertools import groupby
# Group by language code
lang_groups: dict[str, list[SubtitleTrack]] = {}
for track in tracks:
key = track.language.code if track.language else "__unknown__"
lang_groups.setdefault(key, []).append(track)
result = []
for lang_code, group in lang_groups.items():
if len(group) == 1:
result.extend(group)
continue
# Sort by entry_count ascending (None treated as 0)
sorted_group = sorted(group, key=lambda t: t.entry_count or 0)
if len(sorted_group) == 2:
# smaller = standard, larger = sdh
self._set_type(sorted_group[0], SubtitleType.STANDARD)
self._set_type(sorted_group[1], SubtitleType.SDH)
elif len(sorted_group) >= 3:
# smallest = forced, middle = standard, largest = sdh
self._set_type(sorted_group[0], SubtitleType.FORCED)
for t in sorted_group[1:-1]:
self._set_type(t, SubtitleType.STANDARD)
self._set_type(sorted_group[-1], SubtitleType.SDH)
result.extend(sorted_group)
return result
def _set_type(self, track: SubtitleTrack, stype: SubtitleType) -> None:
"""Mutate track type in-place."""
track.subtitle_type = stype
+118
View File
@@ -0,0 +1,118 @@
"""SubtitleMatcher — filters tracks against resolved rules."""
import logging
from ..entities import SubtitleTrack
from ..value_objects import SubtitleMatchingRules, SubtitleType
logger = logging.getLogger(__name__)
class SubtitleMatcher:
"""
Filters a list of SubtitleTrack against effective SubtitleMatchingRules.
Returns matched tracks (pass all filters, confidence >= min_confidence)
and unresolved tracks (need user clarification).
Conflict resolution: when two tracks share the same language + type,
format_priority decides which one to keep.
"""
def match(
self,
tracks: list[SubtitleTrack],
rules: SubtitleMatchingRules,
) -> tuple[list[SubtitleTrack], list[SubtitleTrack]]:
"""
Returns (matched, unresolved).
"""
matched: list[SubtitleTrack] = []
unresolved: list[SubtitleTrack] = []
for track in tracks:
if track.is_embedded:
continue
if track.language is None or track.confidence < rules.min_confidence:
unresolved.append(track)
continue
if not self._passes_filters(track, rules):
logger.debug(f"SubtitleMatcher: filtered out {track}")
continue
matched.append(track)
matched = self._resolve_conflicts(matched, rules)
logger.info(
f"SubtitleMatcher: {len(matched)} matched, {len(unresolved)} unresolved"
)
return matched, unresolved
def _passes_filters(self, track: SubtitleTrack, rules: SubtitleMatchingRules) -> bool:
# Language filter
if rules.preferred_languages:
if not track.language:
return False
if track.language.code not in rules.preferred_languages:
return False
# Format filter (only for external files)
if rules.preferred_formats and not track.is_embedded:
if not track.format:
return False
if track.format.id not in rules.preferred_formats:
return False
# Type filter
if rules.allowed_types:
if track.subtitle_type.value not in rules.allowed_types:
return False
return True
def _resolve_conflicts(
self,
tracks: list[SubtitleTrack],
rules: SubtitleMatchingRules,
) -> list[SubtitleTrack]:
"""
When multiple tracks have same language + type, keep only the best one
according to format_priority. If no format_priority applies, keep the first.
"""
seen: dict[tuple, SubtitleTrack] = {}
for track in tracks:
lang = track.language.code if track.language else None
stype = track.subtitle_type.value
key = (lang, stype)
if key not in seen:
seen[key] = track
else:
existing = seen[key]
if self._prefer(track, existing, rules.format_priority):
logger.debug(
f"SubtitleMatcher: conflict {key}"
f"preferring {track.format.id if track.format else 'embedded'} "
f"over {existing.format.id if existing.format else 'embedded'}"
)
seen[key] = track
return list(seen.values())
def _prefer(
self,
candidate: SubtitleTrack,
existing: SubtitleTrack,
format_priority: list[str],
) -> bool:
"""Return True if candidate is preferable to existing."""
if not format_priority:
return False
c_fmt = candidate.format.id if candidate.format else ""
e_fmt = existing.format.id if existing.format else ""
c_rank = format_priority.index(c_fmt) if c_fmt in format_priority else 999
e_rank = format_priority.index(e_fmt) if e_fmt in format_priority else 999
return c_rank < e_rank
@@ -0,0 +1,205 @@
"""PatternDetector — discovers the subtitle structure of a release folder."""
import json
import logging
import subprocess
from pathlib import Path
from ..knowledge.base import SubtitleKnowledgeBase
from ..value_objects import ScanStrategy, SubtitlePattern
logger = logging.getLogger(__name__)
class PatternDetector:
"""
Inspects a release folder and returns the best matching known pattern,
plus a confidence score and a description of what was found.
Used for "pattern discovery" — when we don't yet know which pattern
a release follows. The result is proposed to the user for confirmation.
"""
def __init__(self, kb: SubtitleKnowledgeBase):
self.kb = kb
def detect(self, release_root: Path, sample_video: Path) -> dict:
"""
Analyse the release folder and return:
{
"detected": SubtitlePattern | None,
"confidence": float,
"description": str, # human-readable description of what was found
"candidate_pattern_ids": list[str],
}
"""
findings = self._inspect(release_root, sample_video)
best, confidence = self._match_pattern(findings)
return {
"detected": best,
"confidence": confidence,
"description": self._describe(findings),
"candidate_pattern_ids": [best.id] if best else [],
"raw_findings": findings,
}
def _has_embedded_subtitles(self, video_path: Path) -> bool:
"""Run ffprobe to check whether the video has embedded subtitle streams."""
try:
result = subprocess.run(
[
"ffprobe", "-v", "quiet",
"-print_format", "json",
"-show_streams",
"-select_streams", "s",
str(video_path),
],
capture_output=True, text=True, timeout=30,
)
data = json.loads(result.stdout)
return len(data.get("streams", [])) > 0
except (subprocess.TimeoutExpired, json.JSONDecodeError, FileNotFoundError):
return False
def _inspect(self, release_root: Path, sample_video: Path) -> dict:
"""Gather structural facts about the release."""
known_exts = self.kb.known_extensions()
findings: dict = {
"has_subs_folder": False,
"subs_strategy": None, # "flat" | "episode_subfolder"
"subs_root": None,
"adjacent_subs": False,
"has_embedded": self._has_embedded_subtitles(sample_video),
"files_per_episode": 0,
"has_lang_tokens": False,
"has_numeric_prefix": False,
}
# Check for Subs/ folder — adjacent or at release root
for subs_candidate in [
sample_video.parent / "Subs",
release_root / "Subs",
]:
if subs_candidate.is_dir():
findings["has_subs_folder"] = True
findings["subs_root"] = str(subs_candidate)
# Is it flat or episode_subfolder?
children = list(subs_candidate.iterdir())
sub_files = [c for c in children if c.is_file() and c.suffix.lower() in known_exts]
sub_dirs = [c for c in children if c.is_dir()]
if sub_dirs and not sub_files:
findings["subs_strategy"] = "episode_subfolder"
# Count files in a sample subfolder
sample_sub = sub_dirs[0]
sample_files = [f for f in sample_sub.iterdir()
if f.is_file() and f.suffix.lower() in known_exts]
findings["files_per_episode"] = len(sample_files)
# Check naming conventions
for f in sample_files:
stem = f.stem
parts = stem.split("_")
if parts[0].isdigit():
findings["has_numeric_prefix"] = True
if any(self.kb.is_known_lang_token(t.lower())
for t in stem.replace("_", ".").split(".")):
findings["has_lang_tokens"] = True
else:
findings["subs_strategy"] = "flat"
findings["files_per_episode"] = len(sub_files)
for f in sub_files:
if any(self.kb.is_known_lang_token(t.lower())
for t in f.stem.replace("_", ".").split(".")):
findings["has_lang_tokens"] = True
break
# Check adjacent subs (next to the video)
if not findings["has_subs_folder"]:
adjacent = [
p for p in sample_video.parent.iterdir()
if p.is_file() and p.suffix.lower() in known_exts
]
if adjacent:
findings["adjacent_subs"] = True
findings["files_per_episode"] = len(adjacent)
return findings
def _match_pattern(self, findings: dict) -> tuple[SubtitlePattern | None, float]:
"""Score all known patterns against the findings."""
scores: list[tuple[float, SubtitlePattern]] = []
for pattern in self.kb.patterns().values():
score = self._score(pattern, findings)
scores.append((score, pattern))
if not scores:
return None, 0.0
scores.sort(key=lambda x: x[0], reverse=True)
best_score, best_pattern = scores[0]
if best_score < 0.4:
return None, best_score
return best_pattern, best_score
def _score(self, pattern: SubtitlePattern, findings: dict) -> float:
"""Return a 0.01.0 match score for this pattern against the findings."""
score = 0.0
total = 0.0
strategy = pattern.scan_strategy
if strategy == ScanStrategy.EMBEDDED:
total += 1
if findings.get("has_embedded"):
score += 1.0
if not findings.get("has_subs_folder") and not findings.get("adjacent_subs"):
score += 0.5
total += 0.5
elif strategy == ScanStrategy.EPISODE_SUBFOLDER:
total += 3
if findings.get("has_subs_folder"):
score += 1.0
if findings.get("subs_strategy") == "episode_subfolder":
score += 2.0
elif strategy == ScanStrategy.FLAT:
total += 2
if findings.get("has_subs_folder"):
score += 1.0
if findings.get("subs_strategy") == "flat":
score += 1.0
elif strategy == ScanStrategy.ADJACENT:
total += 2
if findings.get("adjacent_subs"):
score += 1.0
if not findings.get("has_subs_folder"):
score += 1.0
return score / total if total > 0 else 0.0
def _describe(self, findings: dict) -> str:
parts = []
if findings.get("has_subs_folder"):
strategy = findings.get("subs_strategy", "?")
n = findings.get("files_per_episode", 0)
parts.append(f"Subs/ folder found ({strategy}), ~{n} file(s) per episode")
if findings.get("has_numeric_prefix"):
parts.append("files have numeric prefix (e.g. 2_English.srt)")
if findings.get("has_lang_tokens"):
parts.append("language tokens found in filenames")
elif findings.get("adjacent_subs"):
parts.append("subtitle files adjacent to video")
else:
parts.append("no external subtitle files found")
if findings.get("has_embedded"):
parts.append("embedded tracks detected (ffprobe)")
return "".join(parts) if parts else "nothing found"
@@ -0,0 +1,93 @@
"""SubtitlePlacer — hard-links matched subtitle tracks next to the destination video."""
import logging
import os
from dataclasses import dataclass
from pathlib import Path
from ..entities import SubtitleTrack
logger = logging.getLogger(__name__)
@dataclass
class PlacedTrack:
source: Path
destination: Path
filename: str
@dataclass
class PlaceResult:
placed: list[PlacedTrack]
skipped: list[tuple[SubtitleTrack, str]] # (track, reason)
@property
def placed_count(self) -> int:
return len(self.placed)
@property
def skipped_count(self) -> int:
return len(self.skipped)
class SubtitlePlacer:
"""
Hard-links matched SubtitleTrack files next to a destination video.
Uses the same hard-link strategy as FileManager.copy_file:
instant, no data duplication, qBittorrent keeps seeding.
Embedded tracks are skipped — nothing to place on disk.
"""
def place(
self,
tracks: list[SubtitleTrack],
destination_video: Path,
) -> PlaceResult:
placed: list[PlacedTrack] = []
skipped: list[tuple[SubtitleTrack, str]] = []
dest_dir = destination_video.parent
for track in tracks:
if track.is_embedded:
logger.debug(f"SubtitlePlacer: skip embedded track ({track.language})")
skipped.append((track, "embedded — no file to place"))
continue
if not track.file_path or not track.file_path.exists():
skipped.append((track, "source file not found"))
continue
try:
dest_name = track.destination_name
except ValueError as e:
skipped.append((track, str(e)))
continue
dest_path = dest_dir / dest_name
if dest_path.exists():
logger.debug(f"SubtitlePlacer: skip {dest_name} — already exists")
skipped.append((track, "destination already exists"))
continue
try:
os.link(track.file_path, dest_path)
placed.append(PlacedTrack(
source=track.file_path,
destination=dest_path,
filename=dest_name,
))
logger.info(f"SubtitlePlacer: placed {dest_name}")
except OSError as e:
logger.warning(f"SubtitlePlacer: failed to place {dest_name}: {e}")
skipped.append((track, str(e)))
logger.info(
f"SubtitlePlacer: {len(placed)} placed, {len(skipped)} skipped "
f"for {destination_video.name}"
)
return PlaceResult(placed=placed, skipped=skipped)
+21
View File
@@ -0,0 +1,21 @@
"""Subtitle service utilities."""
from ..entities import SubtitleTrack
def available_subtitles(tracks: list[SubtitleTrack]) -> list[SubtitleTrack]:
"""
Return the distinct subtitle tracks available, deduped by (language, type).
Useful to display what is available for a media item regardless of user
preferences — e.g. eng, eng.sdh, fra all show up as separate entries.
"""
seen: set[tuple] = set()
result: list[SubtitleTrack] = []
for track in tracks:
lang = track.language.code if track.language else None
key = (lang, track.subtitle_type)
if key not in seen:
seen.add(key)
result.append(track)
return result
+72 -70
View File
@@ -1,91 +1,93 @@
"""Subtitle domain value objects.""" """Subtitle domain value objects."""
from dataclasses import dataclass from dataclasses import dataclass, field
from enum import Enum from enum import Enum
from pathlib import Path
from ..shared.exceptions import ValidationError from typing import Any
class Language(Enum): class ScanStrategy(Enum):
"""Supported subtitle languages.""" """How to locate subtitle files for a given release."""
ENGLISH = "en" ADJACENT = "adjacent" # .srt next to the video
FRENCH = "fr" FLAT = "flat" # Subs/*.srt
EPISODE_SUBFOLDER = "episode_subfolder" # Subs/{episode_name}/*.srt
@classmethod EMBEDDED = "embedded" # tracks inside the video container
def from_code(cls, code: str) -> "Language":
"""
Get language from ISO 639-1 code.
Args:
code: Two-letter language code
Returns:
Language enum value
Raises:
ValidationError: If code is not supported
"""
code_lower = code.lower()
for lang in cls:
if lang.value == code_lower:
return lang
raise ValidationError(f"Unsupported language code: {code}")
class SubtitleFormat(Enum): class TypeDetectionMethod(Enum):
"""Supported subtitle formats.""" """How to differentiate standard / SDH / forced when tokens are ambiguous."""
SRT = "srt" # SubRip TOKEN_IN_NAME = "token_in_name"
ASS = "ass" # Advanced SubStation Alpha SIZE_AND_COUNT = "size_and_count"
SSA = "ssa" # SubStation Alpha FFPROBE_METADATA = "ffprobe_metadata"
VTT = "vtt" # WebVTT
SUB = "sub" # MicroDVD
@classmethod
def from_extension(cls, extension: str) -> "SubtitleFormat":
"""
Get format from file extension.
Args: class SubtitleType(Enum):
extension: File extension (with or without dot) STANDARD = "standard"
SDH = "sdh"
Returns: FORCED = "forced"
SubtitleFormat enum value UNKNOWN = "unknown"
Raises:
ValidationError: If extension is not supported
"""
ext = extension.lower().lstrip(".")
for fmt in cls:
if fmt.value == ext:
return fmt
raise ValidationError(f"Unsupported subtitle format: {extension}")
@dataclass(frozen=True) @dataclass(frozen=True)
class TimingOffset: class SubtitleFormat:
""" """A known subtitle file format."""
Value object representing subtitle timing offset in milliseconds.
Used for synchronizing subtitles with video. id: str
extensions: list[str]
description: str = ""
def matches_extension(self, ext: str) -> bool:
return ext.lower() in [e.lower() for e in self.extensions]
@dataclass(frozen=True)
class SubtitleLanguage:
"""A known subtitle language with its recognition tokens."""
code: str # ISO 639-1
tokens: list[str] # lowercase
def matches_token(self, token: str) -> bool:
return token.lower() in self.tokens
@dataclass(frozen=True)
class SubtitlePattern:
"""
A known structural pattern for how a release group organises subtitle files.
Patterns are loaded from alfred/knowledge/patterns/*.yaml and are
independent of any specific release group — multiple groups can share
the same pattern.
""" """
milliseconds: int id: str
description: str
scan_strategy: ScanStrategy
root_folder: str | None # e.g. "Subs", None for adjacent/embedded
type_detection: TypeDetectionMethod
version: str = "1.0"
def __post_init__(self):
"""Validate timing offset."""
if not isinstance(self.milliseconds, int):
raise ValidationError(
f"Timing offset must be an integer, got {type(self.milliseconds)}"
)
def to_seconds(self) -> float: @dataclass(frozen=True)
"""Convert to seconds.""" class SubtitleMatchingRules:
return self.milliseconds / 1000.0 """
Effective rules after scope resolution (global → group → show → season → episode).
Only stores actual values — None means "inherited, not overridden at this level".
"""
def __str__(self) -> str: preferred_languages: list[str] = field(default_factory=list) # ISO 639-1 codes
return f"{self.milliseconds}ms" preferred_formats: list[str] = field(default_factory=list) # format ids
allowed_types: list[str] = field(default_factory=list) # SubtitleType ids
format_priority: list[str] = field(default_factory=list) # ordered format ids
min_confidence: float = 0.7
def __repr__(self) -> str:
return f"TimingOffset({self.milliseconds})" @dataclass(frozen=True)
class RuleScope:
"""At which level a rule set applies."""
level: str # "global" | "release_group" | "movie" | "show" | "season" | "episode"
identifier: str | None = None # imdb_id, group name, "S01", "S01E03"…
-11
View File
@@ -2,7 +2,6 @@
import re import re
from dataclasses import dataclass, field from dataclasses import dataclass, field
from datetime import datetime
from ..shared.value_objects import FilePath, FileSize, ImdbId from ..shared.value_objects import FilePath, FileSize, ImdbId
from .value_objects import EpisodeNumber, SeasonNumber, ShowStatus from .value_objects import EpisodeNumber, SeasonNumber, ShowStatus
@@ -22,8 +21,6 @@ class TVShow:
seasons_count: int seasons_count: int
status: ShowStatus status: ShowStatus
tmdb_id: int | None = None tmdb_id: int | None = None
first_air_date: str | None = None
added_at: datetime = field(default_factory=datetime.now)
def __post_init__(self): def __post_init__(self):
"""Validate TV show entity.""" """Validate TV show entity."""
@@ -87,9 +84,6 @@ class Season:
season_number: SeasonNumber season_number: SeasonNumber
episode_count: int episode_count: int
name: str | None = None name: str | None = None
overview: str | None = None
air_date: str | None = None
poster_path: str | None = None
def __post_init__(self): def __post_init__(self):
"""Validate season entity.""" """Validate season entity."""
@@ -146,11 +140,6 @@ class Episode:
title: str title: str
file_path: FilePath | None = None file_path: FilePath | None = None
file_size: FileSize | None = None file_size: FileSize | None = None
overview: str | None = None
air_date: str | None = None
still_path: str | None = None
vote_average: float | None = None
runtime: int | None = None # in minutes
def __post_init__(self): def __post_init__(self):
"""Validate episode entity.""" """Validate episode entity."""
+157 -151
View File
@@ -2,8 +2,7 @@
import logging import logging
import os import os
import shutil from collections import namedtuple
from enum import Enum
from pathlib import Path from pathlib import Path
from typing import Any from typing import Any
@@ -13,14 +12,11 @@ from .exceptions import PathTraversalError
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
FileOperationResult = namedtuple("FileOperationResult", ["success", "error", "message"])
class FolderName(Enum):
"""Types of folders that can be managed."""
DOWNLOAD = "download" def _err(error: str, message: str) -> dict[str, Any]:
TVSHOW = "tvshow" return {"status": "error", "error": error, "message": message}
MOVIE = "movie"
TORRENT = "torrent"
class FileManager: class FileManager:
@@ -35,8 +31,6 @@ class FileManager:
""" """
Set a folder path in the configuration. Set a folder path in the configuration.
Validates that the path exists, is a directory, and is readable.
Args: Args:
folder_name: Name of folder (download, tvshow, movie, torrent). folder_name: Name of folder (download, tvshow, movie, torrent).
path_value: Absolute path to the folder. path_value: Absolute path to the folder.
@@ -45,53 +39,39 @@ class FileManager:
Dict with status or error information. Dict with status or error information.
""" """
try: try:
self._validate_folder_name(folder_name)
path_obj = Path(path_value).resolve() path_obj = Path(path_value).resolve()
if not path_obj.exists(): if not path_obj.exists():
logger.warning(f"Path does not exist: {path_value}") return _err("invalid_path", f"Path does not exist: {path_value}")
return {
"error": "invalid_path",
"message": f"Path does not exist: {path_value}",
}
if not path_obj.is_dir(): if not path_obj.is_dir():
logger.warning(f"Path is not a directory: {path_value}") return _err("invalid_path", f"Path is not a directory: {path_value}")
return {
"error": "invalid_path",
"message": f"Path is not a directory: {path_value}",
}
if not os.access(path_obj, os.R_OK): if not os.access(path_obj, os.R_OK):
logger.warning(f"Path is not readable: {path_value}") return _err("permission_denied", f"Path is not readable: {path_value}")
return {
"error": "permission_denied",
"message": f"Path is not readable: {path_value}",
}
memory = get_memory() memory = get_memory()
memory.ltm.set_config(f"{folder_name}_folder", str(path_obj)) # workspace folders have fixed attributes; library folders go in the dict
if folder_name in ("download", "torrent"):
setattr(memory.ltm.workspace, folder_name, str(path_obj))
else:
memory.ltm.library_paths.set(folder_name, str(path_obj))
memory.save() memory.save()
logger.info(f"Set {folder_name}_folder to: {path_obj}") logger.info(f"Set {folder_name} to: {path_obj}")
return {"status": "ok", "folder_name": folder_name, "path": str(path_obj)} return {"status": "ok", "folder_name": folder_name, "path": str(path_obj)}
except ValueError as e: except ValueError as e:
logger.error(f"Validation error: {e}") return _err("validation_failed", str(e))
return {"error": "validation_failed", "message": str(e)}
except Exception as e: except Exception as e:
logger.error(f"Unexpected error setting path: {e}", exc_info=True) logger.error(f"Unexpected error setting path: {e}", exc_info=True)
return {"error": "internal_error", "message": "Failed to set path"} return _err("internal_error", "Failed to set path")
def list_folder( # noqa: PLR0911 def list_folder(self, folder_type: str, path: str = ".") -> dict[str, Any]:
self, folder_type: str, path: str = "."
) -> dict[str, Any]:
""" """
List contents of a configured folder. List contents of a configured folder.
Includes security checks to prevent path traversal.
Args: Args:
folder_type: Type of folder (download, tvshow, movie, torrent). folder_type: Type of folder (download, tvshow, movie, torrent).
path: Relative path within the folder (default: root). path: Relative path within the folder (default: root).
@@ -100,43 +80,28 @@ class FileManager:
Dict with folder contents or error information. Dict with folder contents or error information.
""" """
try: try:
self._validate_folder_name(folder_type)
safe_path = self._sanitize_path(path) safe_path = self._sanitize_path(path)
memory = get_memory() memory = get_memory()
folder_key = f"{folder_type}_folder" if folder_type in ("download", "torrent"):
folder_path = memory.ltm.get_config(folder_key) folder_path = getattr(memory.ltm.workspace, folder_type, None)
else:
folder_path = memory.ltm.library_paths.get(folder_type)
if not folder_path: if not folder_path:
logger.warning(f"Folder not configured: {folder_type}") return _err("folder_not_set", f"{folder_type.capitalize()} folder not configured.")
return {
"error": "folder_not_set",
"message": f"{folder_type.capitalize()} folder not configured.",
}
root = Path(folder_path) root = Path(folder_path)
target = root / safe_path target = root / safe_path
if not self._is_safe_path(root, target): if not self._is_safe_path(root, target):
logger.warning(f"Path traversal attempt: {path}") return _err("forbidden", "Access denied: path outside allowed directory")
return {
"error": "forbidden",
"message": "Access denied: path outside allowed directory",
}
if not target.exists(): if not target.exists():
logger.warning(f"Path does not exist: {target}") return _err("not_found", f"Path does not exist: {safe_path}")
return {
"error": "not_found",
"message": f"Path does not exist: {safe_path}",
}
if not target.is_dir(): if not target.is_dir():
logger.warning(f"Path is not a directory: {target}") return _err("not_a_directory", f"Path is not a directory: {safe_path}")
return {
"error": "not_a_directory",
"message": f"Path is not a directory: {safe_path}",
}
try: try:
entries = [entry.name for entry in target.iterdir()] entries = [entry.name for entry in target.iterdir()]
@@ -149,35 +114,28 @@ class FileManager:
"count": len(entries), "count": len(entries),
} }
except PermissionError: except PermissionError:
logger.warning(f"Permission denied: {target}") return _err("permission_denied", f"Permission denied: {safe_path}")
return {
"error": "permission_denied",
"message": f"Permission denied: {safe_path}",
}
except PathTraversalError as e: except PathTraversalError as e:
logger.warning(f"Path traversal attempt: {e}") return _err("forbidden", str(e))
return {"error": "forbidden", "message": str(e)}
except ValueError as e: except ValueError as e:
logger.error(f"Validation error: {e}") return _err("validation_failed", str(e))
return {"error": "validation_failed", "message": str(e)}
except Exception as e: except Exception as e:
logger.error(f"Unexpected error listing folder: {e}", exc_info=True) logger.error(f"Unexpected error listing folder: {e}", exc_info=True)
return {"error": "internal_error", "message": "Failed to list folder"} return _err("internal_error", "Failed to list folder")
def move_file( # noqa: PLR0911 def copy_file(self, source: str, destination: str) -> dict[str, Any]:
self, source: str, destination: str
) -> dict[str, Any]:
""" """
Move a file from one location to another. Hard-link a file to a destination (instant, no data duplication).
Includes validation and verification after move. Both paths must be on the same filesystem. qBittorrent keeps seeding
the original inode unaffected.
Args: Args:
source: Source file path. source: Absolute path to the source file.
destination: Destination file path. destination: Absolute path to the destination file.
Returns: Returns:
Dict with status or error information. Dict with status or error information.
@@ -186,126 +144,174 @@ class FileManager:
source_path = Path(source).resolve() source_path = Path(source).resolve()
dest_path = Path(destination).resolve() dest_path = Path(destination).resolve()
logger.info(f"Moving file: {source_path} -> {dest_path}") logger.info(f"Hard-linking: {source_path} -> {dest_path}")
if not source_path.exists(): if not source_path.exists():
return { return _err("source_not_found", f"Source does not exist: {source}")
"error": "source_not_found",
"message": f"Source does not exist: {source}",
}
if not source_path.is_file(): if not source_path.is_file():
return { return _err("source_not_file", f"Source is not a file: {source}")
"error": "source_not_file",
"message": f"Source is not a file: {source}",
}
source_size = source_path.stat().st_size if not dest_path.parent.exists():
dest_parent = dest_path.parent return _err("destination_dir_not_found", f"Destination directory does not exist: {dest_path.parent}")
if not dest_parent.exists():
return {
"error": "destination_dir_not_found",
"message": f"Destination directory does not exist: {dest_parent}",
}
if dest_path.exists(): if dest_path.exists():
return { return _err("destination_exists", f"Destination already exists: {destination}")
"error": "destination_exists",
"message": f"Destination already exists: {destination}",
}
shutil.move(str(source_path), str(dest_path)) os.link(source_path, dest_path)
# Verify move logger.info(f"Hard link created: {dest_path.name}")
if not dest_path.exists():
return {
"error": "move_verification_failed",
"message": "File was not moved successfully",
}
dest_size = dest_path.stat().st_size
if dest_size != source_size:
return {
"error": "size_mismatch",
"message": "File size mismatch after move",
}
logger.info(f"File moved successfully: {dest_path.name}")
return { return {
"status": "ok", "status": "ok",
"source": str(source_path), "source": str(source_path),
"destination": str(dest_path), "destination": str(dest_path),
"filename": dest_path.name, "filename": dest_path.name,
"size": dest_size, "size": source_path.stat().st_size,
}
except OSError as e:
logger.error(f"Error creating hard link: {e}", exc_info=True)
return _err("link_failed", str(e))
def move_file(self, source: str, destination: str) -> dict[str, Any]:
"""
Move a file via hard link + source deletion.
Hard-links the file to the destination, then removes the source.
qBittorrent keeps seeding during the operation since the inode
is still referenced until the source is removed.
Args:
source: Absolute path to the source file.
destination: Absolute path to the destination file.
Returns:
Dict with status or error information.
"""
try:
source_path = Path(source).resolve()
link_result = self.copy_file(source, destination)
if link_result.get("status") != "ok":
return link_result
source_path.unlink()
logger.info(f"File moved: {source_path.name} -> {link_result['destination']}")
return {
"status": "ok",
"source": str(source_path),
"destination": link_result["destination"],
"filename": link_result["filename"],
"size": link_result["size"],
} }
except Exception as e: except Exception as e:
logger.error(f"Error moving file: {e}", exc_info=True) logger.error(f"Error moving file: {e}", exc_info=True)
return {"error": "move_failed", "message": str(e)} return _err("move_failed", str(e))
def _validate_folder_name(self, folder_name: str) -> bool: def create_seed_links(
self, library_file: str, original_download_folder: str, torrent_folder: str
) -> dict[str, Any]:
""" """
Validate folder name against allowed values. Prepare a torrent folder so qBittorrent can keep seeding after a move.
- Hard-links the moved video file from the library back into
torrents/<original_folder_name>/ (same inode, no data copy).
- Copies every other file from the original download folder
(.srt, .nfo, .jpg, .txt, …) into the same torrent subfolder,
preserving relative paths.
Args: Args:
folder_name: Name to validate. library_file: Absolute path to the video file in the library.
original_download_folder: Absolute path to the download folder
that contained the original release (may still have subs etc.).
torrent_folder: Absolute path to the root torrents/ directory.
Returns: Returns:
True if valid. Dict with status, linked_file, copied_files list, skipped list.
Raises:
ValueError: If folder name is invalid.
""" """
valid_names = [fn.value for fn in FolderName] try:
if folder_name not in valid_names: lib_path = Path(library_file).resolve()
raise ValueError( src_folder = Path(original_download_folder).resolve()
f"Invalid folder_name '{folder_name}'. " torrent_root = Path(torrent_folder).resolve()
f"Must be one of: {', '.join(valid_names)}"
) if not lib_path.exists():
return True return _err("library_file_not_found", f"Library file not found: {library_file}")
if not src_folder.exists():
return _err("source_folder_not_found", f"Download folder not found: {original_download_folder}")
if not torrent_root.exists():
return _err("torrent_folder_not_found", f"Torrent folder not found: {torrent_folder}")
dest_folder = torrent_root / src_folder.name
dest_folder.mkdir(parents=True, exist_ok=True)
# Hard-link the video file from library → torrent subfolder
link_dest = dest_folder / lib_path.name
if link_dest.exists():
return _err("destination_exists", f"Link already exists: {link_dest}")
os.link(lib_path, link_dest)
logger.info(f"Hard-linked for seeding: {lib_path.name}{dest_folder}")
# Copy everything else from the original download folder
copied: list[str] = []
skipped: list[str] = []
for item in src_folder.rglob("*"):
if not item.is_file():
continue
rel = item.relative_to(src_folder)
dest_item = dest_folder / rel
dest_item.parent.mkdir(parents=True, exist_ok=True)
if dest_item.exists():
skipped.append(str(rel))
continue
import shutil
shutil.copy2(item, dest_item)
copied.append(str(rel))
logger.debug(f"Copied for seeding: {rel}")
return {
"status": "ok",
"torrent_subfolder": str(dest_folder),
"linked_file": str(link_dest),
"copied_files": copied,
"copied_count": len(copied),
"skipped": skipped,
}
except OSError as e:
logger.error(f"create_seed_links failed: {e}", exc_info=True)
return _err("link_failed", str(e))
except Exception as e:
logger.error(f"create_seed_links unexpected error: {e}", exc_info=True)
return _err("internal_error", str(e))
def _sanitize_path(self, path: str) -> str: def _sanitize_path(self, path: str) -> str:
""" """
Sanitize path to prevent path traversal attacks. Sanitize a relative path to prevent path traversal attacks.
Args: Raises PathTraversalError if the path tries to escape the root.
path: Path to sanitize.
Returns:
Sanitized path.
Raises:
PathTraversalError: If path contains traversal attempts.
""" """
normalized = os.path.normpath(path) normalized = os.path.normpath(path)
# Reject absolute paths
if os.path.isabs(normalized): if os.path.isabs(normalized):
raise PathTraversalError("Absolute paths are not allowed") raise PathTraversalError("Absolute paths are not allowed")
# Reject parent directory references
if normalized.startswith("..") or "/.." in normalized or "\\.." in normalized: if normalized.startswith("..") or "/.." in normalized or "\\.." in normalized:
raise PathTraversalError("Parent directory references not allowed") raise PathTraversalError("Parent directory references not allowed")
# Reject null bytes
if "\x00" in normalized: if "\x00" in normalized:
raise PathTraversalError("Null bytes in path not allowed") raise PathTraversalError("Null bytes in path not allowed")
return normalized return normalized
def _is_safe_path(self, base_path: Path, target_path: Path) -> bool: def _is_safe_path(self, base_path: Path, target_path: Path) -> bool:
""" """Return True if target_path is inside base_path (prevents traversal)."""
Check if target path is within base path.
Args:
base_path: The allowed base directory.
target_path: The path to check.
Returns:
True if target is within base, False otherwise.
"""
try: try:
base_resolved = base_path.resolve() target_path.resolve().relative_to(base_path.resolve())
target_resolved = target_path.resolve()
target_resolved.relative_to(base_resolved)
return True return True
except (ValueError, OSError): except (ValueError, OSError):
return False return False
+5 -12
View File
@@ -1,17 +1,10 @@
"""Persistence layer - Data storage implementations.""" """Persistence layer - Data storage implementations."""
from .context import ( from .context import get_memory, has_memory, init_memory, set_memory
get_memory, from .memory import Memory
has_memory, from .memory.episodic import EpisodicMemory
init_memory, from .memory.ltm import LongTermMemory
set_memory, from .memory.stm import ShortTermMemory
)
from .memory import (
EpisodicMemory,
LongTermMemory,
Memory,
ShortTermMemory,
)
__all__ = [ __all__ = [
"Memory", "Memory",
+21 -15
View File
@@ -1,28 +1,26 @@
""" """
Memory context using contextvars. Memory context — global singleton.
Provides thread-safe and async-safe access to the Memory instance Provides async-safe access to the Memory instance
without passing it explicitly through all function calls. without passing it explicitly through all function calls.
Usage: Usage:
# At application startup # At application startup
from alfred.infrastructure.persistence import init_memory, get_memory from alfred.infrastructure.persistence import init_memory, get_memory
init_memory("memory_data") init_memory("memory")
# Anywhere in the code # Anywhere in the code
memory = get_memory() memory = get_memory()
memory.ltm.set_config("key", "value") memory.ltm.set_config("key", "value")
""" """
from contextvars import ContextVar
from .memory import Memory from .memory import Memory
_memory_ctx: ContextVar[Memory | None] = ContextVar("memory", default=None) _memory: Memory | None = None
def init_memory(storage_dir: str = "memory_data") -> Memory: def init_memory(storage_dir: str = "memory") -> Memory:
""" """
Initialize the memory and set it in the context. Initialize the memory and set it in the context.
@@ -34,9 +32,9 @@ def init_memory(storage_dir: str = "memory_data") -> Memory:
Returns: Returns:
The initialized Memory instance. The initialized Memory instance.
""" """
memory = Memory(storage_dir=storage_dir) global _memory
_memory_ctx.set(memory) _memory = Memory(storage_dir=storage_dir)
return memory return _memory
def set_memory(memory: Memory) -> None: def set_memory(memory: Memory) -> None:
@@ -48,7 +46,8 @@ def set_memory(memory: Memory) -> None:
Args: Args:
memory: Memory instance to set. memory: Memory instance to set.
""" """
_memory_ctx.set(memory) global _memory
_memory = memory
def get_memory() -> Memory: def get_memory() -> Memory:
@@ -61,12 +60,11 @@ def get_memory() -> Memory:
Raises: Raises:
RuntimeError: If memory has not been initialized. RuntimeError: If memory has not been initialized.
""" """
memory = _memory_ctx.get() if _memory is None:
if memory is None:
raise RuntimeError( raise RuntimeError(
"Memory not initialized. Call init_memory() at application startup." "Memory not initialized. Call init_memory() at application startup."
) )
return memory return _memory
def has_memory() -> bool: def has_memory() -> bool:
@@ -76,4 +74,12 @@ def has_memory() -> bool:
Returns: Returns:
True if memory is available, False otherwise. True if memory is available, False otherwise.
""" """
return _memory_ctx.get() is not None return _memory is not None
def reset_memory() -> None:
"""
Reset the memory singleton to None. For use in tests only.
"""
global _memory
_memory = None
@@ -119,10 +119,6 @@ class JsonSubtitleRepository(SubtitleRepository):
"timing_offset": subtitle.timing_offset.milliseconds, "timing_offset": subtitle.timing_offset.milliseconds,
"hearing_impaired": subtitle.hearing_impaired, "hearing_impaired": subtitle.hearing_impaired,
"forced": subtitle.forced, "forced": subtitle.forced,
"source": subtitle.source,
"uploader": subtitle.uploader,
"download_count": subtitle.download_count,
"rating": subtitle.rating,
} }
def _from_dict(self, data: dict[str, Any]) -> Subtitle: def _from_dict(self, data: dict[str, Any]) -> Subtitle:
@@ -137,8 +133,4 @@ class JsonSubtitleRepository(SubtitleRepository):
timing_offset=TimingOffset(data.get("timing_offset", 0)), timing_offset=TimingOffset(data.get("timing_offset", 0)),
hearing_impaired=data.get("hearing_impaired", False), hearing_impaired=data.get("hearing_impaired", False),
forced=data.get("forced", False), forced=data.get("forced", False),
source=data.get("source"),
uploader=data.get("uploader"),
download_count=data.get("download_count"),
rating=data.get("rating"),
) )
@@ -1,7 +1,6 @@
"""JSON-based TV show repository implementation.""" """JSON-based TV show repository implementation."""
import logging import logging
from datetime import datetime
from typing import Any from typing import Any
from alfred.domain.shared.value_objects import ImdbId from alfred.domain.shared.value_objects import ImdbId
@@ -115,8 +114,6 @@ class JsonTVShowRepository(TVShowRepository):
"seasons_count": show.seasons_count, "seasons_count": show.seasons_count,
"status": show.status.value, "status": show.status.value,
"tmdb_id": show.tmdb_id, "tmdb_id": show.tmdb_id,
"first_air_date": show.first_air_date,
"added_at": show.added_at.isoformat(),
} }
def _from_dict(self, data: dict[str, Any]) -> TVShow: def _from_dict(self, data: dict[str, Any]) -> TVShow:
@@ -127,10 +124,4 @@ class JsonTVShowRepository(TVShowRepository):
seasons_count=data["seasons_count"], seasons_count=data["seasons_count"],
status=ShowStatus.from_string(data["status"]), status=ShowStatus.from_string(data["status"]),
tmdb_id=data.get("tmdb_id"), tmdb_id=data.get("tmdb_id"),
first_air_date=data.get("first_air_date"),
added_at=(
datetime.fromisoformat(data["added_at"])
if data.get("added_at")
else datetime.now()
),
) )
-577
View File
@@ -1,577 +0,0 @@
"""
Memory - Unified management of 3 memory types.
Architecture:
- LTM (Long-Term Memory): Configuration, library, preferences - Persistent
- STM (Short-Term Memory): Conversation, current workflow - Volatile
- Episodic Memory: Search results, transient states - Very volatile
"""
import json
import logging
from dataclasses import dataclass, field
from datetime import datetime
from pathlib import Path
from typing import Any
logger = logging.getLogger(__name__)
# =============================================================================
# LONG-TERM MEMORY (LTM) - Persistent
# =============================================================================
@dataclass
class LongTermMemory:
"""
Long-term memory - Persistent and static.
Stores:
- User configuration (folders, URLs)
- Preferences (quality, languages)
- Library (owned movies/TV shows)
- Followed shows (watchlist)
"""
# Folder and service configuration
config: dict[str, str] = field(default_factory=dict)
# User preferences
preferences: dict[str, Any] = field(
default_factory=lambda: {
"preferred_quality": "1080p",
"preferred_languages": ["en", "fr"],
"auto_organize": False,
"naming_format": "{title}.{year}.{quality}",
}
)
# Library of owned media
library: dict[str, list[dict]] = field(
default_factory=lambda: {"movies": [], "tv_shows": []}
)
# Followed shows (watchlist)
following: list[dict] = field(default_factory=list)
def get_config(self, key: str, default: Any = None) -> Any:
"""Get a configuration value."""
return self.config.get(key, default)
def set_config(self, key: str, value: Any) -> None:
"""Set a configuration value."""
self.config[key] = value
logger.debug(f"LTM: Set config {key}")
def has_config(self, key: str) -> bool:
"""Check if a configuration exists."""
return key in self.config and self.config[key] is not None
def add_to_library(self, media_type: str, media: dict) -> None:
"""Add a media item to the library."""
if media_type not in self.library:
self.library[media_type] = []
# Avoid duplicates by imdb_id
existing_ids = [m.get("imdb_id") for m in self.library[media_type]]
if media.get("imdb_id") not in existing_ids:
media["added_at"] = datetime.now().isoformat()
self.library[media_type].append(media)
logger.info(f"LTM: Added {media.get('title')} to {media_type}")
def get_library(self, media_type: str) -> list[dict]:
"""Get the library for a media type."""
return self.library.get(media_type, [])
def follow_show(self, show: dict) -> None:
"""Add a show to the watchlist."""
existing_ids = [s.get("imdb_id") for s in self.following]
if show.get("imdb_id") not in existing_ids:
show["followed_at"] = datetime.now().isoformat()
self.following.append(show)
logger.info(f"LTM: Now following {show.get('title')}")
def to_dict(self) -> dict:
"""Convert to dictionary for serialization."""
return {
"config": self.config,
"preferences": self.preferences,
"library": self.library,
"following": self.following,
}
@classmethod
def from_dict(cls, data: dict) -> "LongTermMemory":
"""Create an instance from a dictionary."""
return cls(
config=data.get("config", {}),
preferences=data.get(
"preferences",
{
"preferred_quality": "1080p",
"preferred_languages": ["en", "fr"],
"auto_organize": False,
"naming_format": "{title}.{year}.{quality}",
},
),
library=data.get("library", {"movies": [], "tv_shows": []}),
following=data.get("following", []),
)
# =============================================================================
# SHORT-TERM MEMORY (STM) - Conversation
# =============================================================================
@dataclass
class ShortTermMemory:
"""
Short-term memory - Volatile and conversational.
Stores:
- Current conversation history
- Current workflow (what we're doing)
- Extracted entities from conversation
- Current discussion topic
"""
# Conversation message history
conversation_history: list[dict[str, str]] = field(default_factory=list)
# Current workflow
current_workflow: dict | None = None
# Extracted entities (title, year, requested quality, etc.)
extracted_entities: dict[str, Any] = field(default_factory=dict)
# Current conversation topic
current_topic: str | None = None
# Conversation language
language: str = "en"
# History message limit
max_history: int = 20
def add_message(self, role: str, content: str) -> None:
"""Add a message to history."""
self.conversation_history.append(
{"role": role, "content": content, "timestamp": datetime.now().isoformat()}
)
# Keep only the last N messages
if len(self.conversation_history) > self.max_history:
self.conversation_history = self.conversation_history[-self.max_history :]
logger.debug(f"STM: Added {role} message")
def get_recent_history(self, n: int = 10) -> list[dict]:
"""Get the last N messages."""
return self.conversation_history[-n:]
def start_workflow(self, workflow_type: str, target: dict) -> None:
"""Start a new workflow."""
self.current_workflow = {
"type": workflow_type,
"target": target,
"stage": "started",
"started_at": datetime.now().isoformat(),
}
logger.info(f"STM: Started workflow '{workflow_type}'")
def update_workflow_stage(self, stage: str) -> None:
"""Update the workflow stage."""
if self.current_workflow:
self.current_workflow["stage"] = stage
logger.debug(f"STM: Workflow stage -> {stage}")
def end_workflow(self) -> None:
"""End the current workflow."""
if self.current_workflow:
logger.info(f"STM: Ended workflow '{self.current_workflow.get('type')}'")
self.current_workflow = None
def set_entity(self, key: str, value: Any) -> None:
"""Store an extracted entity."""
self.extracted_entities[key] = value
logger.debug(f"STM: Set entity {key}={value}")
def get_entity(self, key: str, default: Any = None) -> Any:
"""Get an extracted entity."""
return self.extracted_entities.get(key, default)
def clear_entities(self) -> None:
"""Clear extracted entities."""
self.extracted_entities = {}
def set_topic(self, topic: str) -> None:
"""Set the current topic."""
self.current_topic = topic
logger.debug(f"STM: Topic -> {topic}")
def set_language(self, language: str) -> None:
"""Set the conversation language."""
self.language = language
logger.debug(f"STM: Language -> {language}")
def clear(self) -> None:
"""Reset short-term memory."""
self.conversation_history = []
self.current_workflow = None
self.extracted_entities = {}
self.current_topic = None
self.language = "en"
logger.info("STM: Cleared")
def to_dict(self) -> dict:
"""Convert to dictionary."""
return {
"conversation_history": self.conversation_history,
"current_workflow": self.current_workflow,
"extracted_entities": self.extracted_entities,
"current_topic": self.current_topic,
"language": self.language,
}
# =============================================================================
# EPISODIC MEMORY - Transient states
# =============================================================================
@dataclass
class EpisodicMemory:
"""
Episodic/sensory memory - Temporary and event-driven.
Stores:
- Last search results
- Active downloads
- Recent errors
- Pending questions awaiting user response
- Background events
"""
# Last search results
last_search_results: dict | None = None
# Active downloads
active_downloads: list[dict] = field(default_factory=list)
# Recent errors
recent_errors: list[dict] = field(default_factory=list)
# Pending question awaiting user response
pending_question: dict | None = None
# Background events (download complete, new files, etc.)
background_events: list[dict] = field(default_factory=list)
# Limits for errors/events kept
max_errors: int = 5
max_events: int = 10
def store_search_results(
self, query: str, results: list[dict], search_type: str = "torrent"
) -> None:
"""
Store search results with index.
Args:
query: The search query
results: List of results
search_type: Type of search (torrent, movie, tvshow)
"""
self.last_search_results = {
"query": query,
"type": search_type,
"timestamp": datetime.now().isoformat(),
"results": [{"index": i + 1, **r} for i, r in enumerate(results)],
}
logger.info(f"Episodic: Stored {len(results)} search results for '{query}'")
def get_result_by_index(self, index: int) -> dict | None:
"""
Get a result by its number (1-indexed).
Args:
index: Result number (1, 2, 3, ...)
Returns:
The result or None if not found
"""
if not self.last_search_results:
logger.warning("Episodic: No search results stored")
return None
for result in self.last_search_results.get("results", []):
if result.get("index") == index:
return result
logger.warning(f"Episodic: Result #{index} not found")
return None
def get_search_results(self) -> dict | None:
"""Get the last search results."""
return self.last_search_results
def clear_search_results(self) -> None:
"""Clear search results."""
self.last_search_results = None
def add_active_download(self, download: dict) -> None:
"""Add an active download."""
download["started_at"] = datetime.now().isoformat()
self.active_downloads.append(download)
logger.info(f"Episodic: Added download '{download.get('name')}'")
def update_download_progress(
self, task_id: str, progress: int, status: str = "downloading"
) -> None:
"""Update download progress."""
for dl in self.active_downloads:
if dl.get("task_id") == task_id:
dl["progress"] = progress
dl["status"] = status
dl["updated_at"] = datetime.now().isoformat()
break
def complete_download(self, task_id: str, file_path: str) -> dict | None:
"""Mark a download as complete and remove it."""
for i, dl in enumerate(self.active_downloads):
if dl.get("task_id") == task_id:
completed = self.active_downloads.pop(i)
completed["status"] = "completed"
completed["file_path"] = file_path
completed["completed_at"] = datetime.now().isoformat()
# Add a background event
self.add_background_event(
"download_complete",
{"name": completed.get("name"), "file_path": file_path},
)
logger.info(f"Episodic: Download completed '{completed.get('name')}'")
return completed
return None
def get_active_downloads(self) -> list[dict]:
"""Get active downloads."""
return self.active_downloads
def add_error(self, action: str, error: str, context: dict | None = None) -> None:
"""Record a recent error."""
self.recent_errors.append(
{
"timestamp": datetime.now().isoformat(),
"action": action,
"error": error,
"context": context or {},
}
)
# Keep only the last N errors
self.recent_errors = self.recent_errors[-self.max_errors :]
logger.warning(f"Episodic: Error in '{action}': {error}")
def get_recent_errors(self) -> list[dict]:
"""Get recent errors."""
return self.recent_errors
def set_pending_question(
self,
question: str,
options: list[dict],
context: dict,
question_type: str = "choice",
) -> None:
"""
Record a question awaiting user response.
Args:
question: The question asked
options: List of possible options
context: Question context
question_type: Type of question (choice, confirmation, input)
"""
self.pending_question = {
"type": question_type,
"question": question,
"options": options,
"context": context,
"timestamp": datetime.now().isoformat(),
}
logger.info(f"Episodic: Pending question set ({question_type})")
def get_pending_question(self) -> dict | None:
"""Get the pending question."""
return self.pending_question
def resolve_pending_question(self, answer_index: int | None = None) -> dict | None:
"""
Resolve the pending question and return the chosen option.
Args:
answer_index: Answer index (1-indexed) or None to cancel
Returns:
The chosen option or None
"""
if not self.pending_question:
return None
result = None
if answer_index is not None and self.pending_question.get("options"):
for opt in self.pending_question["options"]:
if opt.get("index") == answer_index:
result = opt
break
self.pending_question = None
logger.info("Episodic: Pending question resolved")
return result
def add_background_event(self, event_type: str, data: dict) -> None:
"""Add a background event."""
self.background_events.append(
{
"type": event_type,
"timestamp": datetime.now().isoformat(),
"data": data,
"read": False,
}
)
# Keep only the last N events
self.background_events = self.background_events[-self.max_events :]
logger.info(f"Episodic: Background event '{event_type}'")
def get_unread_events(self) -> list[dict]:
"""Get unread events and mark them as read."""
unread = [e for e in self.background_events if not e.get("read")]
for e in self.background_events:
e["read"] = True
return unread
def clear(self) -> None:
"""Reset episodic memory."""
self.last_search_results = None
self.active_downloads = []
self.recent_errors = []
self.pending_question = None
self.background_events = []
logger.info("Episodic: Cleared")
def to_dict(self) -> dict:
"""Convert to dictionary."""
return {
"last_search_results": self.last_search_results,
"active_downloads": self.active_downloads,
"recent_errors": self.recent_errors,
"pending_question": self.pending_question,
"background_events": self.background_events,
}
# =============================================================================
# MEMORY MANAGER - Unified manager
# =============================================================================
class Memory:
"""
Unified manager for the 3 memory types.
Usage:
memory = Memory("memory_data")
memory.ltm.set_config("download_folder", "/path")
memory.stm.add_message("user", "Hello")
memory.episodic.store_search_results("query", results)
memory.save()
"""
def __init__(self, storage_dir: str = "memory_data"):
"""
Initialize the memory.
Args:
storage_dir: Directory for persistent storage
"""
self.storage_dir = Path(storage_dir)
self.storage_dir.mkdir(parents=True, exist_ok=True)
self.ltm_file = self.storage_dir / "ltm.json"
# Initialize the 3 memory types
self.ltm = self._load_ltm()
self.stm = ShortTermMemory()
self.episodic = EpisodicMemory()
logger.info(f"Memory initialized (storage: {storage_dir})")
def _load_ltm(self) -> LongTermMemory:
"""Load LTM from file."""
if self.ltm_file.exists():
try:
data = json.loads(self.ltm_file.read_text(encoding="utf-8"))
logger.info("LTM loaded from file")
return LongTermMemory.from_dict(data)
except (OSError, json.JSONDecodeError) as e:
logger.warning(f"Could not load LTM: {e}")
return LongTermMemory()
def save(self) -> None:
"""Save LTM (the only persistent memory)."""
try:
self.ltm_file.write_text(
json.dumps(self.ltm.to_dict(), indent=2, ensure_ascii=False),
encoding="utf-8",
)
logger.debug("LTM saved to file")
except OSError as e:
logger.error(f"Failed to save LTM: {e}")
raise
def get_context_for_prompt(self) -> dict:
"""
Generate context to include in the system prompt.
Returns:
Dictionary with relevant context from all 3 memories
"""
return {
"config": self.ltm.config,
"preferences": self.ltm.preferences,
"current_workflow": self.stm.current_workflow,
"current_topic": self.stm.current_topic,
"extracted_entities": self.stm.extracted_entities,
"last_search": {
"query": (
self.episodic.last_search_results.get("query")
if self.episodic.last_search_results
else None
),
"result_count": (
len(self.episodic.last_search_results.get("results", []))
if self.episodic.last_search_results
else 0
),
},
"active_downloads_count": len(self.episodic.active_downloads),
"pending_question": self.episodic.pending_question is not None,
"unread_events": len(
[e for e in self.episodic.background_events if not e.get("read")]
),
}
def get_full_state(self) -> dict:
"""Return the full state of all 3 memories (for debug)."""
return {
"ltm": self.ltm.to_dict(),
"stm": self.stm.to_dict(),
"episodic": self.episodic.to_dict(),
}
def clear_session(self) -> None:
"""Clear session memories (STM + Episodic)."""
self.stm.clear()
self.episodic.clear()
logger.info("Session memories cleared")
@@ -0,0 +1,4 @@
from .base import Memory
from .registry import MemoryRegistry
__all__ = ["Memory", "MemoryRegistry"]
@@ -0,0 +1,90 @@
"""Memory — unified manager for the 3 memory tiers."""
import json
import logging
from pathlib import Path
from .episodic import EpisodicMemory
from .ltm import LongTermMemory
from .stm import ShortTermMemory
logger = logging.getLogger(__name__)
class Memory:
"""
Unified manager for the 3 memory tiers.
Usage:
memory = Memory("data/memory")
memory.ltm.workspace.download = "/downloads"
memory.stm.add_message("user", "Hello")
memory.episodic.store_search_results("query", results)
memory.save()
"""
def __init__(self, storage_dir: str = "memory"):
self.storage_dir = Path(storage_dir)
self.storage_dir.mkdir(parents=True, exist_ok=True)
self.ltm_file = self.storage_dir / "ltm.json"
self.ltm = self._load_ltm()
self.stm = ShortTermMemory()
self.episodic = EpisodicMemory()
logger.info(f"Memory initialized (storage: {storage_dir})")
def _load_ltm(self) -> LongTermMemory:
"""Load LTM from disk, or return a fresh instance."""
if self.ltm_file.exists():
try:
data = json.loads(self.ltm_file.read_text(encoding="utf-8"))
logger.info("LTM loaded from file")
return LongTermMemory.from_dict(data)
except (OSError, json.JSONDecodeError) as e:
logger.warning(f"Could not load LTM: {e}")
return LongTermMemory()
def save(self) -> None:
"""Persist LTM to disk (STM and Episodic are volatile)."""
try:
self.ltm_file.write_text(
json.dumps(self.ltm.to_dict(), indent=2, ensure_ascii=False),
encoding="utf-8",
)
logger.debug("LTM saved")
except OSError as e:
logger.error(f"Failed to save LTM: {e}")
raise
def get_context_for_prompt(self) -> dict:
"""Snapshot of relevant memory for the system prompt."""
return {
"workspace": self.ltm.workspace.as_dict(),
"library_paths": self.ltm.library_paths.to_dict(),
"preferences": self.ltm.preferences.to_dict(),
"current_workflow": self.stm.workflow.to_dict(),
"current_topic": self.stm.entities.topic,
"extracted_entities": self.stm.entities.data,
"last_search": {
"query": self.episodic.search_results.last.get("query") if self.episodic.search_results.last else None,
"result_count": len(self.episodic.search_results.last.get("results", [])) if self.episodic.search_results.last else 0,
},
"active_downloads_count": len(self.episodic.downloads.active),
"pending_question": self.episodic.pending_question is not None,
"unread_events": len([e for e in self.episodic.events.items if not e.get("read")]),
}
def get_full_state(self) -> dict:
"""Full state dump for debug/API."""
return {
"ltm": self.ltm.to_dict(),
"stm": self.stm.to_dict(),
"episodic": self.episodic.to_dict(),
}
def clear_session(self) -> None:
"""Reset volatile memories (STM + Episodic)."""
self.stm.clear()
self.episodic.clear()
logger.info("Session memories cleared")
@@ -0,0 +1,3 @@
from .episodic import EpisodicMemory
__all__ = ["EpisodicMemory"]
@@ -0,0 +1,6 @@
from .downloads import Downloads
from .errors import Errors
from .events import Events
from .search_results import SearchResults
__all__ = ["SearchResults", "Downloads", "Events", "Errors"]
@@ -0,0 +1,56 @@
"""Downloads — active torrent downloads being tracked."""
import logging
from dataclasses import dataclass, field
from datetime import datetime
logger = logging.getLogger(__name__)
@dataclass
class Downloads:
active: list[dict] = field(default_factory=list)
def add(self, download: dict) -> None:
download["started_at"] = datetime.now().isoformat()
self.active.append(download)
logger.info(f"Downloads: Added '{download.get('name')}'")
def update_progress(self, task_id: str, progress: int, status: str = "downloading") -> None:
for dl in self.active:
if dl.get("task_id") == task_id:
dl["progress"] = progress
dl["status"] = status
dl["updated_at"] = datetime.now().isoformat()
break
def complete(self, task_id: str, file_path: str) -> dict | None:
for i, dl in enumerate(self.active):
if dl.get("task_id") == task_id:
completed = self.active.pop(i)
completed.update({"status": "completed", "file_path": file_path, "completed_at": datetime.now().isoformat()})
logger.info(f"Downloads: Completed '{completed.get('name')}'")
return completed
return None
def clear(self) -> None:
self.active = []
@classmethod
def describe(cls) -> dict:
return {
"name": "Downloads",
"tier": "episodic",
"access": "read-write",
"description": (
"Active torrent downloads currently in progress. "
"Read to report download status to the user. "
"Write to track new downloads or update progress."
),
"fields": {
"active": "List of active downloads. Each entry has task_id, name, progress, status, started_at.",
},
}
def to_dict(self) -> list:
return self.active
@@ -0,0 +1,46 @@
"""Errors — recent agent errors for context and debugging."""
import logging
from dataclasses import dataclass, field
from datetime import datetime
logger = logging.getLogger(__name__)
MAX_ERRORS = 5
@dataclass
class Errors:
items: list[dict] = field(default_factory=list)
max_errors: int = MAX_ERRORS
def add(self, action: str, error: str, context: dict | None = None) -> None:
self.items.append({
"timestamp": datetime.now().isoformat(),
"action": action,
"error": error,
"context": context or {},
})
self.items = self.items[-self.max_errors:]
logger.warning(f"Errors: '{action}': {error}")
def clear(self) -> None:
self.items = []
@classmethod
def describe(cls) -> dict:
return {
"name": "Errors",
"tier": "episodic",
"access": "read",
"description": (
"Recent errors encountered during tool execution. "
"Read when something goes wrong to understand what failed and avoid repeating the same mistake."
),
"fields": {
"items": f"Last {MAX_ERRORS} errors. Each has timestamp, action, error message, context.",
},
}
def to_dict(self) -> list:
return self.items
@@ -0,0 +1,49 @@
"""Events — background events (download complete, new files, etc.)."""
import logging
from dataclasses import dataclass, field
from datetime import datetime
logger = logging.getLogger(__name__)
MAX_EVENTS = 10
@dataclass
class Events:
items: list[dict] = field(default_factory=list)
max_events: int = MAX_EVENTS
def add(self, event_type: str, data: dict) -> None:
self.items.append({"type": event_type, "timestamp": datetime.now().isoformat(), "data": data, "read": False})
self.items = self.items[-self.max_events:]
logger.info(f"Events: '{event_type}'")
def get_unread(self) -> list[dict]:
"""Return unread events and mark them as read."""
unread = [e for e in self.items if not e.get("read")]
for e in self.items:
e["read"] = True
return unread
def clear(self) -> None:
self.items = []
@classmethod
def describe(cls) -> dict:
return {
"name": "Events",
"tier": "episodic",
"access": "read",
"description": (
"Background events that occurred during the session (download complete, file moved, etc.). "
"Read unread events at the start of each turn to surface anything that happened in the background. "
"Events are written automatically by other components."
),
"fields": {
"items": f"Last {MAX_EVENTS} events. Each has type, timestamp, data, read flag.",
},
}
def to_dict(self) -> list:
return self.items
@@ -0,0 +1,52 @@
"""SearchResults — last torrent/media search results."""
import logging
from dataclasses import dataclass
from datetime import datetime
logger = logging.getLogger(__name__)
@dataclass
class SearchResults:
last: dict | None = None
def store(self, query: str, results: list[dict], search_type: str = "torrent") -> None:
self.last = {
"query": query,
"type": search_type,
"timestamp": datetime.now().isoformat(),
"results": [{"index": i + 1, **r} for i, r in enumerate(results)],
}
logger.info(f"SearchResults: Stored {len(results)} results for '{query}'")
def get_by_index(self, index: int) -> dict | None:
if not self.last:
return None
for result in self.last.get("results", []):
if result.get("index") == index:
return result
logger.warning(f"SearchResults: #{index} not found")
return None
def clear(self) -> None:
self.last = None
@classmethod
def describe(cls) -> dict:
return {
"name": "SearchResults",
"tier": "episodic",
"access": "read-write",
"description": (
"Last torrent or media search results. "
"Read to let the user pick a result by index without searching again. "
"Write after every search to store fresh results."
),
"fields": {
"last": "Dict with query, type, timestamp, and results list. Each result has an index (1-based) plus provider fields.",
},
}
def to_dict(self) -> dict | None:
return self.last
@@ -0,0 +1,126 @@
"""EpisodicMemory — transient event-driven memory, reset on restart."""
import logging
from dataclasses import dataclass, field
from .components import Downloads, Errors, Events, SearchResults
logger = logging.getLogger(__name__)
@dataclass
class EpisodicMemory:
"""
Episodic memory — very short-lived, event-driven.
Composed of:
- search_results: last torrent/media search
- downloads: active downloads being tracked
- events: background events (download complete, etc.)
- errors: recent errors for context
"""
search_results: SearchResults = field(default_factory=SearchResults)
downloads: Downloads = field(default_factory=Downloads)
events: Events = field(default_factory=Events)
errors: Errors = field(default_factory=Errors)
# Convenience proxies for backward compatibility
@property
def last_search_results(self) -> dict | None:
return self.search_results.last
@property
def active_downloads(self) -> list[dict]:
return self.downloads.active
@property
def background_events(self) -> list[dict]:
return self.events.items
@property
def recent_errors(self) -> list[dict]:
return self.errors.items
# Pending question — not a component yet, kept simple
pending_question: dict | None = None
# Convenience methods forwarded to components
def store_search_results(self, query: str, results: list[dict], search_type: str = "torrent") -> None:
self.search_results.store(query, results, search_type)
def get_result_by_index(self, index: int) -> dict | None:
return self.search_results.get_by_index(index)
def get_search_results(self) -> dict | None:
return self.search_results.last
def clear_search_results(self) -> None:
self.search_results.clear()
def add_active_download(self, download: dict) -> None:
self.downloads.add(download)
def update_download_progress(self, task_id: str, progress: int, status: str = "downloading") -> None:
self.downloads.update_progress(task_id, progress, status)
def complete_download(self, task_id: str, file_path: str) -> dict | None:
completed = self.downloads.complete(task_id, file_path)
if completed:
self.events.add("download_complete", {"name": completed.get("name"), "file_path": file_path})
return completed
def get_active_downloads(self) -> list[dict]:
return self.downloads.active
def add_error(self, action: str, error: str, context: dict | None = None) -> None:
self.errors.add(action, error, context)
def get_recent_errors(self) -> list[dict]:
return self.errors.items
def set_pending_question(self, question: str, options: list[dict], context: dict, question_type: str = "choice") -> None:
self.pending_question = {
"type": question_type,
"question": question,
"options": options,
"context": context,
}
def get_pending_question(self) -> dict | None:
return self.pending_question
def resolve_pending_question(self, answer_index: int | None = None) -> dict | None:
if not self.pending_question:
return None
result = None
if answer_index is not None:
for opt in self.pending_question.get("options", []):
if opt.get("index") == answer_index:
result = opt
break
self.pending_question = None
return result
def add_background_event(self, event_type: str, data: dict) -> None:
self.events.add(event_type, data)
def get_unread_events(self) -> list[dict]:
return self.events.get_unread()
def clear(self) -> None:
self.search_results.clear()
self.downloads.clear()
self.events.clear()
self.errors.clear()
self.pending_question = None
logger.info("Episodic: Cleared")
def to_dict(self) -> dict:
return {
"last_search_results": self.search_results.to_dict(),
"active_downloads": self.downloads.to_dict(),
"recent_errors": self.errors.to_dict(),
"pending_question": self.pending_question,
"background_events": self.events.to_dict(),
}
@@ -0,0 +1,3 @@
from .ltm import LongTermMemory
__all__ = ["LongTermMemory"]
@@ -0,0 +1,15 @@
from .following import Following
from .library import Library
from .library_paths import LibraryPaths
from .media_preferences import MediaPreferences
from .subtitle_preferences import SubtitlePreferences
from .workspace import WorkspacePaths
__all__ = [
"WorkspacePaths",
"LibraryPaths",
"MediaPreferences",
"SubtitlePreferences",
"Library",
"Following",
]
@@ -0,0 +1,43 @@
"""Following — watchlist of TV shows being followed."""
import logging
from dataclasses import dataclass, field
from datetime import datetime
logger = logging.getLogger(__name__)
@dataclass
class Following:
shows: list[dict] = field(default_factory=list)
def add(self, show: dict) -> None:
"""Follow a show, skipping duplicates by imdb_id."""
existing_ids = [s.get("imdb_id") for s in self.shows]
if show.get("imdb_id") not in existing_ids:
show["followed_at"] = datetime.now().isoformat()
self.shows.append(show)
logger.info(f"Following: Now following {show.get('title')}")
def to_dict(self) -> list:
return self.shows
@classmethod
def describe(cls) -> dict:
return {
"name": "Following",
"tier": "ltm",
"access": "read-write",
"description": (
"Watchlist of TV shows the user is actively following. "
"Read to check if a show should be monitored for new episodes. "
"Write (add) when the user explicitly asks to follow a show."
),
"fields": {
"shows": "List of followed shows. Each entry has imdb_id, title, followed_at.",
},
}
@classmethod
def from_dict(cls, data: list) -> "Following":
return cls(shows=data)
@@ -0,0 +1,64 @@
"""Library — owned movies and TV shows."""
import logging
from dataclasses import dataclass, field
from datetime import datetime
logger = logging.getLogger(__name__)
@dataclass
class Library:
movies: list[dict] = field(default_factory=list)
tv_shows: list[dict] = field(default_factory=list)
def add(self, media_type: str, media: dict) -> None:
"""Add a media item, skipping duplicates by imdb_id."""
collection = self._collection(media_type)
if collection is None:
return
existing_ids = [m.get("imdb_id") for m in collection]
if media.get("imdb_id") not in existing_ids:
media["added_at"] = datetime.now().isoformat()
collection.append(media)
logger.info(f"Library: Added {media.get('title')} to {media_type}")
def get(self, media_type: str) -> list[dict]:
"""Get all items for a media type."""
return self._collection(media_type) or []
@classmethod
def describe(cls) -> dict:
return {
"name": "Library",
"tier": "ltm",
"access": "read-write",
"description": (
"Catalogue of media owned by the user. "
"Read to check if a title is already in the library before downloading. "
"Write (add) after successfully moving a media file to its destination."
),
"fields": {
"movies": "List of owned movies. Each entry has imdb_id, title, year, quality, file_path, added_at.",
"tv_shows": "List of owned TV shows. Each entry has imdb_id, title, seasons, added_at.",
},
}
def _collection(self, media_type: str) -> list[dict] | None:
if media_type == "movies":
return self.movies
if media_type == "tv_shows":
return self.tv_shows
logger.warning(f"Library: Unknown media type '{media_type}'")
return None
def to_dict(self) -> dict:
return {"movies": self.movies, "tv_shows": self.tv_shows}
@classmethod
def from_dict(cls, data: dict) -> "Library":
return cls(
movies=data.get("movies", []),
tv_shows=data.get("tv_shows", []),
)
@@ -0,0 +1,70 @@
"""LibraryPaths — user-defined media library folders.
Extensible: the user creates collections as needed (tv_shows, movies, music, games…).
Each collection name maps to its root folder on disk.
Set via /set_path, never modified by the agent autonomously.
Access: READ ONLY for the agent — used to resolve destination paths when organizing media.
"""
from dataclasses import dataclass, field
@dataclass
class LibraryPaths:
"""
User-defined media library folders.
folders is a free dict: {"tv_shows": "/media/tv", "movies": "/media/movies", ...}
Add new collections simply by setting a new key via /set_path.
Access: READ ONLY for the agent — set via /set_path only.
"""
folders: dict[str, str] = field(default_factory=dict)
def get(self, collection: str) -> str | None:
return self.folders.get(collection)
def set(self, collection: str, path: str) -> None:
self.folders[collection] = path
def to_dict(self) -> dict:
return self.folders
@classmethod
def describe(cls) -> dict:
return {
"name": "LibraryPaths",
"tier": "ltm",
"access": "read",
"description": (
"User-defined media library folders. "
"Read these paths to resolve where to move an organised media file. "
"Keys are collection names (tv_shows, movies, music, games…), values are root paths. "
"New collections are added by the user via /set_path — never by the agent."
),
"fields": {
"folders": "Dict of collection_name → absolute path. E.g. {'tv_shows': '/media/tv', 'movies': '/media/movies'}.",
},
}
@classmethod
def from_dict(cls, data: dict) -> "LibraryPaths":
# Migrate from old flat format (tvshow_folder, movie_folder)
folders = dict(data)
if not folders:
return cls()
migrated = {}
legacy_map = {
"tvshow_folder": "tv_shows",
"movie_folder": "movies",
}
for old_key, new_key in legacy_map.items():
if old_key in folders:
migrated[new_key] = folders.pop(old_key)
# Keep any already-migrated keys
migrated.update(folders)
return cls(folders=migrated)
@@ -0,0 +1,52 @@
"""MediaPreferences — user preferences for video quality and audio."""
from dataclasses import dataclass, field
@dataclass
class MediaPreferences:
"""Quality and audio preferences for media downloads and organisation."""
quality: str = "1080p"
audio_languages: list[str] = field(default_factory=lambda: ["fr", "en"])
auto_organize: bool = False
def to_dict(self) -> dict:
return {
"quality": self.quality,
"audio_languages": self.audio_languages,
"auto_organize": self.auto_organize,
}
@classmethod
def describe(cls) -> dict:
return {
"name": "MediaPreferences",
"tier": "ltm",
"access": "read",
"description": (
"User preferences for video quality and audio. "
"Use these when searching torrents or choosing a release to download. "
"Never modify autonomously — only via explicit user command."
),
"fields": {
"quality": "Preferred video quality, e.g. '1080p', '4K', '720p'.",
"audio_languages": (
"Ordered list of preferred audio languages (ISO 639-1). "
"First = most preferred."
),
"auto_organize": "If True, organise files into the library automatically after download.",
},
}
@classmethod
def from_dict(cls, data: dict) -> "MediaPreferences":
return cls(
# migration: old key was preferred_quality / preferred_languages
quality=data.get("quality") or data.get("preferred_quality", "1080p"),
audio_languages=(
data.get("audio_languages")
or data.get("preferred_languages", ["fr", "en"])
),
auto_organize=data.get("auto_organize", False),
)
@@ -0,0 +1,80 @@
"""SubtitlePreferences — user preferences for subtitle handling."""
from dataclasses import dataclass, field
@dataclass
class SubtitlePreferences:
"""
User-level defaults for subtitle selection, applied globally as the
base of the SubtitleRuleSet inheritance chain.
These are the top-level defaults — individual shows/movies/release groups
can override them via .alfred/rules.yaml.
Naming convention used when placing subtitle files alongside a video:
{lang}.srt → standard track (e.g. fr.srt, en.srt)
{lang}.sdh.srt → SDH / hearing-impaired track
{lang}.forced.srt → forced track (foreign lines only)
Fields mirror SubtitleRuleSet.override() parameters:
- languages: ordered list of ISO 639-1 codes to keep (others ignored)
- formats: list of subtitle formats to keep (e.g. ["srt", "ass"])
- types: list of subtitle types to keep (e.g. ["standard", "forced", "sdh"])
"""
languages: list[str] = field(default_factory=lambda: ["fr", "en"])
formats: list[str] = field(default_factory=lambda: ["srt", "ass"])
types: list[str] = field(default_factory=lambda: ["standard", "forced", "sdh"])
def to_dict(self) -> dict:
return {
"languages": self.languages,
"formats": self.formats,
"types": self.types,
}
@classmethod
def describe(cls) -> dict:
return {
"name": "SubtitlePreferences",
"tier": "ltm",
"access": "read",
"description": (
"User defaults for subtitle selection. Applied as global base rules; "
"overridden per show/movie/release group via .alfred/rules.yaml. "
"Never modify autonomously — only via explicit user command."
),
"fields": {
"languages": (
"Ordered list of subtitle languages to keep (ISO 639-1). "
"Others are ignored. First = most preferred."
),
"formats": (
"List of subtitle formats to keep, e.g. ['srt', 'ass']. "
"Others are skipped."
),
"types": (
"List of subtitle types to keep: 'standard', 'sdh', 'forced'. "
"Omit a type to drop those tracks globally."
),
},
}
@classmethod
def from_dict(cls, data: dict) -> "SubtitlePreferences":
# Migration: old fields (min_size_kb, keep_sdh, keep_forced, link_subs_folder) are silently dropped
prefs = cls(
languages=data.get("languages", ["fr", "en"]),
formats=data.get("formats", ["srt", "ass"]),
types=data.get("types", ["standard", "forced", "sdh"]),
)
# Back-compat: keep_sdh / keep_forced → types list
if "types" not in data:
types = ["standard"]
if data.get("keep_sdh", True):
types.append("sdh")
if data.get("keep_forced", True):
types.append("forced")
prefs.types = types
return prefs
@@ -0,0 +1,57 @@
"""WorkspacePaths — fixed infrastructure folders.
Set once via /set_path, never modified by the agent.
These are operational paths (where files land), not the media library.
"""
from dataclasses import dataclass
@dataclass
class WorkspacePaths:
"""
Fixed infrastructure folders.
- download: where qBittorrent drops completed downloads
- torrent: where .torrent files are stored
Access: READ ONLY for the agent — set via /set_path only.
"""
download: str | None = None
torrent: str | None = None
def as_dict(self) -> dict[str, str]:
"""Return configured paths, skipping unset values."""
return {k: v for k, v in {
"download": self.download,
"torrent": self.torrent,
}.items() if v is not None}
def to_dict(self) -> dict:
return {"download": self.download, "torrent": self.torrent}
@classmethod
def describe(cls) -> dict:
return {
"name": "WorkspacePaths",
"tier": "ltm",
"access": "read",
"description": (
"Fixed infrastructure folders used during file operations. "
"Read these paths to know where to find downloaded files or .torrent files. "
"Never modify — set exclusively via /set_path."
),
"fields": {
"download": "Root folder where qBittorrent drops completed downloads.",
"torrent": "Folder where .torrent files are stored.",
},
}
@classmethod
def from_dict(cls, data: dict) -> "WorkspacePaths":
# Migrate from old flat format (download_folder, torrent_folder)
return cls(
download=data.get("download") or data.get("download_folder"),
torrent=data.get("torrent") or data.get("torrent_folder"),
)
@@ -0,0 +1,65 @@
"""LongTermMemory — persistent memory across sessions."""
import logging
from dataclasses import dataclass, field
from .components import (
Following,
Library,
LibraryPaths,
MediaPreferences,
SubtitlePreferences,
WorkspacePaths,
)
logger = logging.getLogger(__name__)
@dataclass
class LongTermMemory:
"""
Long-term memory — persisted to disk, survives restarts.
- workspace: fixed infrastructure paths (download, torrent) — READ ONLY for agent
- library_paths: user-defined media folders (tv_shows, movies, …) — READ ONLY for agent
- media_preferences: quality and audio language preferences
- subtitle_preferences: subtitle selection and naming rules
- library: owned media catalogue
- following: watchlist
"""
workspace: WorkspacePaths = field(default_factory=WorkspacePaths)
library_paths: LibraryPaths = field(default_factory=LibraryPaths)
media_preferences: MediaPreferences = field(default_factory=MediaPreferences)
subtitle_preferences: SubtitlePreferences = field(default_factory=SubtitlePreferences)
library: Library = field(default_factory=Library)
following: Following = field(default_factory=Following)
def to_dict(self) -> dict:
return {
"workspace": self.workspace.to_dict(),
"library_paths": self.library_paths.to_dict(),
"media_preferences": self.media_preferences.to_dict(),
"subtitle_preferences": self.subtitle_preferences.to_dict(),
"library": self.library.to_dict(),
"following": self.following.to_dict(),
}
@classmethod
def from_dict(cls, data: dict) -> "LongTermMemory":
# Migration: old flat format had paths at the top level
workspace_data = data.get("workspace") or data
library_paths_data = data.get("library_paths") or data.get("paths") or data
return cls(
workspace=WorkspacePaths.from_dict(workspace_data),
library_paths=LibraryPaths.from_dict(library_paths_data),
# migration: old key was "preferences"
media_preferences=MediaPreferences.from_dict(
data.get("media_preferences") or data.get("preferences", {})
),
subtitle_preferences=SubtitlePreferences.from_dict(
data.get("subtitle_preferences", {})
),
library=Library.from_dict(data.get("library", {})),
following=Following.from_dict(data.get("following", [])),
)
@@ -0,0 +1,80 @@
"""MemoryRegistry — autodiscovers and describes all memory components.
Scans the components/ subfolder of each memory tier (ltm, stm, episodic),
imports every class that has a describe() classmethod, and exposes their
descriptions for use in the system prompt.
No manual registration needed — drop a new component file in the right
components/ folder and it will be picked up automatically.
"""
import importlib
import inspect
import logging
import pkgutil
from pathlib import Path
logger = logging.getLogger(__name__)
# Tier packages relative to this file's package
_TIER_PACKAGES = [
"alfred.infrastructure.persistence.memory.ltm.components",
"alfred.infrastructure.persistence.memory.stm.components",
"alfred.infrastructure.persistence.memory.episodic.components",
]
def _load_components(package_name: str) -> list[dict]:
"""Import all modules in a package and collect describe() results."""
descriptions = []
try:
package = importlib.import_module(package_name)
package_path = Path(package.__file__).parent
for module_info in pkgutil.iter_modules([str(package_path)]):
module = importlib.import_module(f"{package_name}.{module_info.name}")
for _, cls in inspect.getmembers(module, inspect.isclass):
if cls.__module__ == module.__name__ and hasattr(cls, "describe"):
try:
descriptions.append(cls.describe())
except Exception as e:
logger.warning(f"MemoryRegistry: describe() failed on {cls.__name__}: {e}")
except Exception as e:
logger.warning(f"MemoryRegistry: Could not load package {package_name}: {e}")
return descriptions
class MemoryRegistry:
"""
Autodiscovers memory components and exposes their descriptions.
Usage:
registry = MemoryRegistry()
all_components = registry.all() # flat list
ltm_components = registry.by_tier("ltm") # filtered by tier
schema = registry.schema() # grouped by tier, for the prompt
"""
def __init__(self):
self._components: list[dict] = []
for package in _TIER_PACKAGES:
self._components.extend(_load_components(package))
logger.info(f"MemoryRegistry: Loaded {len(self._components)} components")
def all(self) -> list[dict]:
"""Return all component descriptions."""
return self._components
def by_tier(self, tier: str) -> list[dict]:
"""Return components for a specific tier (ltm, stm, episodic)."""
return [c for c in self._components if c.get("tier") == tier]
def schema(self) -> dict[str, list[dict]]:
"""Return components grouped by tier."""
result: dict[str, list[dict]] = {"ltm": [], "stm": [], "episodic": []}
for component in self._components:
tier = component.get("tier", "unknown")
result.setdefault(tier, []).append(component)
return result
@@ -0,0 +1,3 @@
from .stm import ShortTermMemory
__all__ = ["ShortTermMemory"]
@@ -0,0 +1,5 @@
from .conversation import Conversation
from .entities import Entities
from .workflow import Workflow
__all__ = ["Conversation", "Workflow", "Entities"]
@@ -0,0 +1,55 @@
"""Conversation — message history for the current session."""
import logging
from dataclasses import dataclass, field
from datetime import datetime
logger = logging.getLogger(__name__)
MAX_HISTORY = 20
@dataclass
class Conversation:
messages: list[dict] = field(default_factory=list)
max_history: int = MAX_HISTORY
language: str = "en"
def add(self, role: str, content: str) -> None:
"""Append a message, capping at max_history."""
self.messages.append({"role": role, "content": content, "timestamp": datetime.now().isoformat()})
if len(self.messages) > self.max_history:
self.messages = self.messages[-self.max_history:]
logger.debug(f"Conversation: Added {role} message")
def recent(self, n: int = 10) -> list[dict]:
"""Return the last N messages."""
return self.messages[-n:]
def set_language(self, language: str) -> None:
self.language = language
logger.debug(f"Conversation: Language -> {language}")
def clear(self) -> None:
self.messages = []
self.language = "en"
@classmethod
def describe(cls) -> dict:
return {
"name": "Conversation",
"tier": "stm",
"access": "read",
"description": (
"Current session message history and detected language. "
"Read to maintain conversational context. "
"Messages are managed automatically — never write directly."
),
"fields": {
"messages": f"Last {MAX_HISTORY} messages (role, content, timestamp).",
"language": "Detected conversation language (ISO 639-1 code, e.g. 'fr', 'en').",
},
}
def to_dict(self) -> dict:
return {"messages": self.messages, "language": self.language}
@@ -0,0 +1,48 @@
"""Entities — extracted entities from the current conversation (title, year, quality, etc.)."""
import logging
from dataclasses import dataclass, field
from typing import Any
logger = logging.getLogger(__name__)
@dataclass
class Entities:
data: dict[str, Any] = field(default_factory=dict)
topic: str | None = None
def set(self, key: str, value: Any) -> None:
self.data[key] = value
logger.debug(f"Entities: {key}={value}")
def get(self, key: str, default: Any = None) -> Any:
return self.data.get(key, default)
def set_topic(self, topic: str) -> None:
self.topic = topic
logger.debug(f"Entities: Topic -> {topic}")
def clear(self) -> None:
self.data = {}
self.topic = None
@classmethod
def describe(cls) -> dict:
return {
"name": "Entities",
"tier": "stm",
"access": "read-write",
"description": (
"Entities and topic extracted from the current conversation. "
"Read to retrieve what the user is talking about (title, year, quality, etc.) without re-parsing the history. "
"Write when you identify a new entity or topic shift."
),
"fields": {
"data": "Key-value pairs of extracted entities. E.g. {'title': 'Breaking Bad', 'year': 2008, 'quality': '1080p'}.",
"topic": "Current conversation topic as a short string. E.g. 'media_search', 'organize_file'.",
},
}
def to_dict(self) -> dict:
return {"data": self.data, "topic": self.topic}
@@ -0,0 +1,53 @@
"""Workflow — tracks the current in-progress agent task."""
import logging
from dataclasses import dataclass
from datetime import datetime
logger = logging.getLogger(__name__)
@dataclass
class Workflow:
current: dict | None = None
def start(self, workflow_type: str, target: dict) -> None:
self.current = {
"type": workflow_type,
"target": target,
"stage": "started",
"started_at": datetime.now().isoformat(),
}
logger.info(f"Workflow: Started '{workflow_type}'")
def update_stage(self, stage: str) -> None:
if self.current:
self.current["stage"] = stage
logger.debug(f"Workflow: Stage -> {stage}")
def end(self) -> None:
if self.current:
logger.info(f"Workflow: Ended '{self.current.get('type')}'")
self.current = None
def clear(self) -> None:
self.current = None
@classmethod
def describe(cls) -> dict:
return {
"name": "Workflow",
"tier": "stm",
"access": "read-write",
"description": (
"Tracks the current in-progress multi-step task. "
"Read to know what you are currently doing and what stage you are at. "
"Write to start, advance, or end a workflow as you execute steps."
),
"fields": {
"current": "Active workflow dict with keys: type, target, stage, started_at. None if idle.",
},
}
def to_dict(self) -> dict | None:
return self.current
@@ -0,0 +1,91 @@
"""ShortTermMemory — volatile session memory, reset on restart."""
import logging
from dataclasses import dataclass, field
from .components import Conversation, Entities, Workflow
logger = logging.getLogger(__name__)
@dataclass
class ShortTermMemory:
"""
Short-term memory — lives for the duration of a session.
Composed of:
- conversation: message history + language
- workflow: current in-progress task
- entities: extracted context (title, year, quality…)
"""
conversation: Conversation = field(default_factory=Conversation)
workflow: Workflow = field(default_factory=Workflow)
entities: Entities = field(default_factory=Entities)
# Convenience proxies kept for backward compatibility with existing callers
@property
def conversation_history(self) -> list[dict]:
return self.conversation.messages
@property
def current_workflow(self) -> dict | None:
return self.workflow.current
@property
def extracted_entities(self) -> dict:
return self.entities.data
@property
def current_topic(self) -> str | None:
return self.entities.topic
@property
def language(self) -> str:
return self.conversation.language
# Convenience methods forwarded to components
def add_message(self, role: str, content: str) -> None:
self.conversation.add(role, content)
def get_recent_history(self, n: int = 10) -> list[dict]:
return self.conversation.recent(n)
def start_workflow(self, workflow_type: str, target: dict) -> None:
self.workflow.start(workflow_type, target)
def update_workflow_stage(self, stage: str) -> None:
self.workflow.update_stage(stage)
def end_workflow(self) -> None:
self.workflow.end()
def set_entity(self, key: str, value) -> None:
self.entities.set(key, value)
def get_entity(self, key: str, default=None):
return self.entities.get(key, default)
def clear_entities(self) -> None:
self.entities.clear()
def set_topic(self, topic: str) -> None:
self.entities.set_topic(topic)
def set_language(self, language: str) -> None:
self.conversation.set_language(language)
def clear(self) -> None:
self.conversation.clear()
self.workflow.clear()
self.entities.clear()
logger.info("STM: Cleared")
def to_dict(self) -> dict:
return {
"conversation_history": self.conversation.messages,
"current_workflow": self.workflow.to_dict(),
"extracted_entities": self.entities.data,
"current_topic": self.entities.topic,
"language": self.conversation.language,
}
@@ -0,0 +1,6 @@
"""Infrastructure adapters for subtitle persistence."""
from .metadata_store import SubtitleMetadataStore
from .rule_repository import RuleSetRepository
__all__ = ["SubtitleMetadataStore", "RuleSetRepository"]
@@ -0,0 +1,144 @@
"""SubtitleMetadataStore — reads/writes .alfred/metadata.yaml colocated with media."""
import logging
from datetime import datetime, timezone
from pathlib import Path
from typing import Any
import yaml
from alfred.domain.subtitles.entities import SubtitleTrack
from alfred.domain.subtitles.services.placer import PlacedTrack
logger = logging.getLogger(__name__)
class SubtitleMetadataStore:
"""
Manages the .alfred/metadata.yaml file that lives inside the media library folder.
For TV shows: /media/tv_shows/The X-Files/.alfred/metadata.yaml
For movies: /media/movies/Inception (2010)/.alfred/metadata.yaml
The store never raises on a missing file — it returns empty defaults.
Writes are atomic (write to .tmp then rename).
"""
def __init__(self, library_root: Path):
self._root = library_root
self._alfred_dir = library_root / ".alfred"
self._metadata_path = self._alfred_dir / "metadata.yaml"
# ------------------------------------------------------------------
# Load / Save
# ------------------------------------------------------------------
def load(self) -> dict:
"""Return the full metadata dict. Empty dict if file absent."""
if not self._metadata_path.exists():
return {}
try:
with open(self._metadata_path, encoding="utf-8") as f:
return yaml.safe_load(f) or {}
except Exception as e:
logger.warning(f"MetadataStore: could not read {self._metadata_path}: {e}")
return {}
def save(self, data: dict) -> None:
"""Atomically write metadata.yaml. Creates .alfred/ if needed."""
self._alfred_dir.mkdir(parents=True, exist_ok=True)
tmp = self._metadata_path.with_suffix(".yaml.tmp")
try:
with open(tmp, "w", encoding="utf-8") as f:
yaml.safe_dump(data, f, allow_unicode=True, default_flow_style=False, sort_keys=False)
tmp.rename(self._metadata_path)
except Exception as e:
logger.error(f"MetadataStore: could not write {self._metadata_path}: {e}")
tmp.unlink(missing_ok=True)
raise
# ------------------------------------------------------------------
# Pattern
# ------------------------------------------------------------------
def confirmed_pattern(self) -> str | None:
"""Return the confirmed pattern_id, or None."""
data = self.load()
if data.get("pattern_confirmed"):
return data.get("detected_pattern")
return None
def mark_pattern_confirmed(self, pattern_id: str, media_info: dict | None = None) -> None:
"""Persist detected_pattern + pattern_confirmed=true."""
data = self.load()
data["detected_pattern"] = pattern_id
data["pattern_confirmed"] = True
if media_info:
data.setdefault("media_type", media_info.get("media_type"))
data.setdefault("imdb_id", media_info.get("imdb_id"))
data.setdefault("title", media_info.get("title"))
self.save(data)
logger.info(f"MetadataStore: confirmed pattern '{pattern_id}' for {self._root.name}")
# ------------------------------------------------------------------
# Subtitle history
# ------------------------------------------------------------------
def append_history(
self,
placed_pairs: list[tuple[PlacedTrack, SubtitleTrack]],
season: int | None = None,
episode: int | None = None,
release_group: str | None = None,
) -> None:
"""Append one history entry with all placed tracks."""
if not placed_pairs:
return
data = self.load()
history = data.setdefault("subtitle_history", [])
tracks_data: list[dict[str, Any]] = []
for placed, track in placed_pairs:
# Infer type from destination filename parts (e.g. en.sdh.srt → sdh)
parts = placed.filename.rsplit(".", 2) # ["en", "sdh", "srt"] or ["en", "srt"]
inferred_type = parts[1] if len(parts) == 3 else "standard"
tracks_data.append({
"language": track.language.code if track.language else "unknown",
"type": inferred_type,
"format": placed.destination.suffix.lstrip("."),
"is_embedded": track.is_embedded,
"source_file": placed.source.name,
"placed_as": placed.filename,
"confidence": round(track.confidence, 3),
})
entry: dict[str, Any] = {
"placed_at": datetime.now(timezone.utc).isoformat(),
"release_group": release_group,
"tracks": tracks_data,
}
if season is not None:
entry["season"] = season
if episode is not None:
entry["episode"] = episode
history.append(entry)
# Update release_groups list
if release_group:
groups = data.setdefault("release_groups", [])
if release_group not in groups:
groups.append(release_group)
self.save(data)
logger.info(
f"MetadataStore: appended history "
f"({'S%02dE%02d' % (season, episode) if season and episode else 'movie'}) "
f"{len(tracks_data)} track(s)"
)
def history(self) -> list[dict]:
"""Return the raw history list."""
return self.load().get("subtitle_history", [])
@@ -0,0 +1,116 @@
"""RuleSetRepository — loads SubtitleRuleSet from .alfred/ YAML files."""
import logging
from pathlib import Path
from typing import TYPE_CHECKING
import yaml
from alfred.domain.subtitles.aggregates import SubtitleRuleSet
from alfred.domain.subtitles.value_objects import RuleScope
if TYPE_CHECKING:
from alfred.infrastructure.persistence.memory.ltm.components.subtitle_preferences import SubtitlePreferences
logger = logging.getLogger(__name__)
def _load_yaml(path: Path) -> dict:
if not path.exists():
return {}
try:
with open(path, encoding="utf-8") as f:
return yaml.safe_load(f) or {}
except Exception as e:
logger.warning(f"RuleSetRepository: could not read {path}: {e}")
return {}
class RuleSetRepository:
"""
Builds a fully chained SubtitleRuleSet by reading YAML from .alfred/.
Inheritance chain:
global (hardcoded defaults)
└── release_group (.alfred/release_groups/{GROUP}.yaml)
└── local (.alfred/rules.yaml)
Rules are delta-only — None means "inherit from parent".
The repository only creates intermediate nodes when the corresponding
file exists and contains an override section.
"""
def __init__(self, library_root: Path):
self._alfred_dir = library_root / ".alfred"
def load(
self,
release_group: str | None = None,
subtitle_preferences: "SubtitlePreferences | None" = None,
) -> SubtitleRuleSet:
"""
Build and return the resolved RuleSet chain.
If subtitle_preferences is provided, it seeds the global base rule set
from LTM (overriding the hardcoded DEFAULT_RULES).
Returns global default if no overrides exist.
"""
base = SubtitleRuleSet.global_default()
if subtitle_preferences is not None:
base.override(
languages=subtitle_preferences.languages,
formats=subtitle_preferences.formats,
types=subtitle_preferences.types,
)
current = base
# Release group level
if release_group:
rg_path = self._alfred_dir / "release_groups" / f"{release_group}.yaml"
rg_data = _load_yaml(rg_path).get("override", {})
if rg_data:
rg_ruleset = SubtitleRuleSet(
scope=RuleScope(level="release_group", identifier=release_group),
parent=current,
)
rg_ruleset.override(**_filter_override(rg_data))
current = rg_ruleset
logger.debug(f"RuleSetRepository: loaded release_group override for '{release_group}'")
# Local (show/movie) level
local_data = _load_yaml(self._alfred_dir / "rules.yaml").get("override", {})
if local_data:
local_ruleset = SubtitleRuleSet(
scope=RuleScope(level="show"),
parent=current,
)
local_ruleset.override(**_filter_override(local_data))
current = local_ruleset
logger.debug("RuleSetRepository: loaded local rules.yaml override")
return current
def save_local(self, delta: dict) -> None:
"""Write or update .alfred/rules.yaml with override delta."""
self._alfred_dir.mkdir(parents=True, exist_ok=True)
path = self._alfred_dir / "rules.yaml"
existing = _load_yaml(path)
existing_override = existing.get("override", {})
existing_override.update(delta)
data = {"override": existing_override}
tmp = path.with_suffix(".yaml.tmp")
try:
with open(tmp, "w", encoding="utf-8") as f:
yaml.safe_dump(data, f, allow_unicode=True, default_flow_style=False, sort_keys=False)
tmp.rename(path)
logger.info(f"RuleSetRepository: saved local rules to {path}")
except Exception as e:
logger.error(f"RuleSetRepository: could not write {path}: {e}")
tmp.unlink(missing_ok=True)
raise
def _filter_override(data: dict) -> dict:
"""Keep only keys that SubtitleRuleSet.override() accepts."""
valid = {"languages", "formats", "types", "format_priority", "min_confidence"}
return {k: v for k, v in data.items() if k in valid}
+13
View File
@@ -0,0 +1,13 @@
id: adjacent
version: "1.0"
description: >
Subtitle files sit directly alongside the video file, in the same directory.
Example: Show.S01E01.mkv + Show.S01E01.English.srt in the same folder.
scan_strategy: adjacent
root_folder: null
type_detection:
method: token_in_name
description: >
Type (standard/SDH/forced) is determined from tokens in the filename.
+14
View File
@@ -0,0 +1,14 @@
id: embedded
version: "1.0"
description: >
Subtitle tracks are embedded inside the video container (MKV, MP4).
Detected via ffprobe — no external files.
scan_strategy: embedded
root_folder: null
type_detection:
method: ffprobe_metadata
description: >
Language, type (SDH/forced) and format are read directly from the
container track metadata via ffprobe.
@@ -0,0 +1,16 @@
id: episode_subfolder
version: "1.0"
description: >
Subtitle files are in a Subs/ folder at the release root, with one subfolder
per episode named after the episode filename (without extension).
Example: Subs/Show.S01E01.BluRay.x265-RARBG/2_English.srt
scan_strategy: episode_subfolder
root_folder: "Subs"
type_detection:
method: size_and_count
description: >
When multiple files share the same detected language, differentiate
standard vs SDH by comparing file size and subtitle entry count.
Larger file (more entries) = SDH.
+14
View File
@@ -0,0 +1,14 @@
id: subs_flat
version: "1.0"
description: >
Subtitle files are directly in a Subs/ folder at the release root,
with no per-episode subfolder.
Example: Subs/Show.S01E01.English.srt
scan_strategy: flat
root_folder: "Subs"
type_detection:
method: token_in_name
description: >
Type (standard/SDH/forced) is determined from tokens in the filename.
@@ -0,0 +1,5 @@
name: KONSTRAST
known_patterns: ["episode_subfolder", "embedded"]
notes: >
Follows similar conventions to RARBG. Pattern varies per release — always
verify per season.
@@ -0,0 +1,2 @@
name: RARBG
known_patterns: ["episode_subfolder"]
+89
View File
@@ -0,0 +1,89 @@
name: subtitles
version: "1.0"
description: "Subtitle classification rules — formats, types, languages and their tokens"
defaults:
languages: ["fra", "eng"]
formats: ["srt"]
types: ["standard", "forced", "sdh"]
format_priority: ["srt", "ass"]
min_confidence: 0.7
formats:
srt:
extensions: [".srt"]
description: "SubRip — plain text, universal"
ass:
extensions: [".ass", ".ssa"]
description: "Advanced SubStation Alpha — with styles and positioning"
types:
standard:
tokens: []
description: "Normal subtitle track"
sdh:
tokens: ["sdh", "hi", "cc", "hearing"]
description: "Hearing-impaired — includes sound effects and speaker labels"
forced:
tokens: ["forced", "foreign"]
description: "Foreign lines only — e.g. alien speech in an otherwise English film"
languages:
fra:
tokens: ["fr", "fra", "french", "francais", "vf", "vff", "vostfr"]
eng:
tokens: ["en", "eng", "english"]
spa:
tokens: ["es", "spa", "spanish", "espanol", "español"]
deu:
tokens: ["de", "deu", "ger", "german", "deutsch"]
ita:
tokens: ["it", "ita", "italian", "italiano"]
por:
tokens: ["pt", "por", "portuguese", "portugues", "português"]
nld:
tokens: ["nl", "nld", "dut", "dutch", "nederlands"]
nor:
tokens: ["no", "nor", "norwegian", "norsk"]
swe:
tokens: ["sv", "swe", "swedish", "svenska"]
dan:
tokens: ["da", "dan", "danish", "dansk"]
fin:
tokens: ["fi", "fin", "finnish", "suomi"]
pol:
tokens: ["pl", "pol", "polish", "polski"]
ces:
tokens: ["cs", "ces", "cze", "czech"]
slk:
tokens: ["sk", "slk", "slo", "slovak"]
hun:
tokens: ["hu", "hun", "hungarian", "magyar"]
ron:
tokens: ["ro", "ron", "rum", "romanian", "romana", "română"]
bul:
tokens: ["bg", "bul", "bulgarian"]
hrv:
tokens: ["hr", "hrv", "croatian", "hrvatski"]
srp:
tokens: ["sr", "srp", "serbian", "srpski"]
slv:
tokens: ["sl", "slv", "slovenian", "slovensko"]
est:
tokens: ["et", "est", "estonian", "eesti"]
lav:
tokens: ["lv", "lav", "latvian", "latviesu"]
lit:
tokens: ["lt", "lit", "lithuanian", "lietuviu"]
mkd:
tokens: ["mk", "mkd", "mac", "macedonian"]
jpn:
tokens: ["ja", "jpn", "japanese"]
zho:
tokens: ["zh", "zho", "chi", "chinese"]
kor:
tokens: ["ko", "kor", "korean"]
ara:
tokens: ["ar", "ara", "arabic"]
tur:
tokens: ["tr", "tur", "turkish"]
+44 -156
View File
@@ -1,209 +1,97 @@
import secrets """
from pathlib import Path Application settings — Alfred only.
from typing import NamedTuple
import tomllib Only declares what Alfred's Python code actually consumes.
from pydantic import Field, computed_field, field_validator Everything else (.env.alfred, .env.secrets) is loaded by Docker Compose
for other services and ignored here via extra="ignore".
"""
from pathlib import Path
from pydantic import field_validator
from pydantic_settings import BaseSettings, SettingsConfigDict from pydantic_settings import BaseSettings, SettingsConfigDict
BASE_DIR = Path(__file__).resolve().parent.parent BASE_DIR = Path(__file__).resolve().parent.parent
ENV_FILE_PATH = BASE_DIR / ".env"
toml_path = BASE_DIR / "pyproject.toml"
class ConfigurationError(Exception): class ConfigurationError(Exception):
"""Raised when configuration is invalid.""" """Raised when configuration is invalid."""
pass
class ProjectVersions(NamedTuple):
"""
Immutable structure for project versions.
Forces explicit naming and prevents accidental swaps.
"""
librechat: str
rag: str
alfred: str
def get_versions_from_toml() -> ProjectVersions:
"""
Reads versioning information from pyproject.toml.
Returns the default value if the file or key is missing.
"""
if not toml_path.exists():
raise FileNotFoundError(f"pyproject.toml not found: {toml_path}")
with open(toml_path, "rb") as f:
data = tomllib.load(f)
try:
return ProjectVersions(
librechat=data["tool"]["alfred"]["settings"]["librechat_version"],
rag=data["tool"]["alfred"]["settings"]["rag_version"],
alfred=data["tool"]["poetry"]["version"],
)
except KeyError as e:
raise KeyError(f"Error: Missing key {e} in pyproject.toml") from e
# Load versions once
VERSIONS: ProjectVersions = get_versions_from_toml()
class Settings(BaseSettings): class Settings(BaseSettings):
model_config = SettingsConfigDict( model_config = SettingsConfigDict(
env_file=ENV_FILE_PATH, env_file=[BASE_DIR / ".env.alfred", BASE_DIR / ".env.secrets", BASE_DIR / ".env.make"],
env_file_encoding="utf-8", env_file_encoding="utf-8",
extra="ignore", extra="ignore",
case_sensitive=False, case_sensitive=False,
) )
# --- GENERAL SETTINGS ---
host: str = "0.0.0.0"
port: int = 3080
debug_logging: bool = False
debug_console: bool = False
data_storage: str = "data"
librechat_version: str = Field(VERSIONS.librechat, description="Librechat version")
rag_version: str = Field(VERSIONS.rag, description="RAG engine version")
alfred_version: str = Field(VERSIONS.alfred, description="Alfred version")
# --- CONTEXT SETTINGS --- # --- APP ---
max_history_messages: int = 10 max_history_messages: int = 10
max_tool_iterations: int = 10 max_tool_iterations: int = 10
request_timeout: int = 30 request_timeout: int = 30
llm_temperature: float = 0.2
data_storage_dir: str = "data"
# TODO: Finish # --- BUILD ---
alfred_version: str | None = None
# --- LLM ---
default_llm_provider: str = "local"
ollama_base_url: str = "http://ollama:11434"
ollama_model: str = "llama3.3:latest"
deepseek_base_url: str = "https://api.deepseek.com" deepseek_base_url: str = "https://api.deepseek.com"
deepseek_model: str = "deepseek-chat" deepseek_model: str = "deepseek-chat"
# --- API KEYS --- # --- API KEYS ---
anthropic_api_key: str | None = Field(None, description="Claude API key") tmdb_api_key: str | None = None
deepseek_api_key: str | None = Field(None, description="Deepseek API key")
google_api_key: str | None = Field(None, description="Gemini API key")
kimi_api_key: str | None = Field(None, description="Kimi API key")
openai_api_key: str | None = Field(None, description="ChatGPT API key")
# --- SECURITY KEYS ---
# Generated automatically if not in .env to ensure "Secure by Default"
jwt_secret: str = Field(default_factory=lambda: secrets.token_urlsafe(32))
jwt_refresh_secret: str = Field(default_factory=lambda: secrets.token_urlsafe(32))
# We keep these for encryption of keys in MongoDB (AES-256 Hex format)
creds_key: str = Field(default_factory=lambda: secrets.token_hex(32))
creds_iv: str = Field(default_factory=lambda: secrets.token_hex(16))
# --- SERVICES ---
qbittorrent_url: str = "http://qbittorrent:16140"
qbittorrent_username: str = "admin"
qbittorrent_password: str = Field(default_factory=lambda: secrets.token_urlsafe(16))
mongo_host: str = "mongodb"
mongo_user: str = "alfred"
mongo_password: str = Field(
default_factory=lambda: secrets.token_urlsafe(24), repr=False, exclude=True
)
mongo_port: int = 27017
mongo_db_name: str = "alfred"
@computed_field(repr=False)
@property
def mongo_uri(self) -> str:
return (
f"mongodb://{self.mongo_user}:{self.mongo_password}"
f"@{self.mongo_host}:{self.mongo_port}/{self.mongo_db_name}"
f"?authSource=admin"
)
postgres_host: str = "vectordb"
postgres_user: str = "alfred"
postgres_password: str = Field(
default_factory=lambda: secrets.token_urlsafe(24), repr=False, exclude=True
)
postgres_port: int = 5432
postgres_db_name: str = "alfred"
@computed_field(repr=False)
@property
def postgres_uri(self) -> str:
return (
f"postgresql://{self.postgres_user}:{self.postgres_password}"
f"@{self.postgres_host}:{self.postgres_port}/{self.postgres_db_name}"
)
tmdb_api_key: str | None = Field(None, description="The Movie Database API key")
tmdb_base_url: str = "https://api.themoviedb.org/3" tmdb_base_url: str = "https://api.themoviedb.org/3"
deepseek_api_key: str | None = None
# --- LLM PICKER & CONFIG --- openai_api_key: str | None = None
# Providers: 'local', 'deepseek', ... anthropic_api_key: str | None = None
default_llm_provider: str = "local" google_api_key: str | None = None
ollama_base_url: str = "http://ollama:11434" kimi_api_key: str | None = None
# Models: ...
ollama_model: str = "llama3.3:latest"
llm_temperature: float = 0.2
# --- RAG ENGINE ---
rag_enabled: bool = True # TODO: Handle False
rag_api_url: str = "http://rag_api:8000"
embeddings_provider: str = "ollama"
# Models: ...
embeddings_model: str = "nomic-embed-text"
# --- MEILISEARCH ---
meili_enabled: bool = Field(True, description="Enable meili")
meili_no_analytics: bool = True
meili_host: str = "http://meilisearch:7700"
meili_master_key: str = Field(
default_factory=lambda: secrets.token_urlsafe(32),
description="Master key for Meilisearch",
repr=False,
)
# --- VALIDATORS --- # --- VALIDATORS ---
@field_validator("llm_temperature") @field_validator("llm_temperature")
@classmethod @classmethod
def validate_temperature(cls, v: float) -> float: def validate_temperature(cls, v: float) -> float:
if not 0.0 <= v <= 2.0: if not 0.0 <= v <= 2.0:
raise ConfigurationError( raise ConfigurationError(f"Temperature must be between 0.0 and 2.0, got {v}")
f"Temperature must be between 0.0 and 2.0, got {v}"
)
return v return v
@field_validator("max_tool_iterations") @field_validator("max_tool_iterations")
@classmethod @classmethod
def validate_max_iterations(cls, v: int) -> int: def validate_max_iterations(cls, v: int) -> int:
if not 1 <= v <= 20: if not 1 <= v <= 20:
raise ConfigurationError( raise ConfigurationError(f"max_tool_iterations must be between 1 and 20, got {v}")
f"max_tool_iterations must be between 1 and 50, got {v}"
)
return v return v
@field_validator("request_timeout") @field_validator("request_timeout")
@classmethod @classmethod
def validate_timeout(cls, v: int) -> int: def validate_timeout(cls, v: int) -> int:
if not 1 <= v <= 300: if not 1 <= v <= 300:
raise ConfigurationError( raise ConfigurationError(f"request_timeout must be between 1 and 300 seconds, got {v}")
f"request_timeout must be between 1 and 300 seconds, got {v}"
)
return v return v
@field_validator("deepseek_base_url", "tmdb_base_url") # --- HELPERS ---
@classmethod def is_tmdb_configured(self) -> bool:
def validate_url(cls, v: str, info) -> str:
if not v.startswith(("http://", "https://")):
raise ConfigurationError(f"Invalid {info.field_name}")
return v
def is_tmdb_configured(self):
return bool(self.tmdb_api_key) return bool(self.tmdb_api_key)
def is_deepseek_configured(self): def is_deepseek_configured(self) -> bool:
return bool(self.deepseek_api_key) return bool(self.deepseek_api_key)
def dump_safe(self): def is_openai_configured(self) -> bool:
return self.model_dump(exclude_none=False) return bool(self.openai_api_key)
def is_anthropic_configured(self) -> bool:
return bool(self.anthropic_api_key)
def is_google_configured(self) -> bool:
return bool(self.google_api_key)
def is_kimi_configured(self) -> bool:
return bool(self.kimi_api_key)
settings = Settings() settings = Settings()
+45 -22
View File
@@ -8,8 +8,7 @@ services:
target: builder target: builder
args: args:
PYTHON_VERSION: ${PYTHON_VERSION} PYTHON_VERSION: ${PYTHON_VERSION}
PYTHON_VERSION_SHORT: ${PYTHON_VERSION_SHORT} UV_VERSION: ${UV_VERSION}
RUNNER: ${RUNNER}
command: python scripts/bootstrap.py command: python scripts/bootstrap.py
networks: networks:
- alfred-net - alfred-net
@@ -17,24 +16,30 @@ services:
# --- MAIN APPLICATION --- # --- MAIN APPLICATION ---
alfred: alfred:
container_name: alfred-core container_name: alfred-core
image: alfred_media_organizer:latest
build: build:
context: . context: .
args: args:
PYTHON_VERSION: ${PYTHON_VERSION} PYTHON_VERSION: ${PYTHON_VERSION}
PYTHON_VERSION_SHORT: ${PYTHON_VERSION_SHORT} UV_VERSION: ${UV_VERSION}
RUNNER: ${RUNNER}
depends_on: depends_on:
alfred-init: alfred-init:
condition: service_completed_successfully condition: service_completed_successfully
restart: unless-stopped restart: unless-stopped
env_file: env_file:
- path: .env - path: .env.alfred
required: true
- path: .env.secrets
required: true
- path: .env.make
required: true required: true
volumes: volumes:
- ./data:/data - ./data:/data
- ./logs:/logs - ./logs:/logs
# TODO: Hot reload (comment out in production) # TODO: Hot reload (comment out in production)
#- ./alfred:/home/appuser/alfred - ./alfred:/home/appuser/alfred
command: >
sh -c "python -u -m uvicorn alfred.app:app --host 0.0.0.0 --port 8000 --reload 2>&1 | tee -a /logs/alfred.log"
networks: networks:
- alfred-net - alfred-net
@@ -49,7 +54,11 @@ services:
condition: service_healthy condition: service_healthy
restart: unless-stopped restart: unless-stopped
env_file: env_file:
- path: .env - path: .env.librechat
required: true
- path: .env.alfred
required: true
- path: .env.secrets
required: true required: true
environment: environment:
# Remap value name # Remap value name
@@ -75,21 +84,23 @@ services:
alfred-init: alfred-init:
condition: service_completed_successfully condition: service_completed_successfully
env_file: env_file:
- path: .env - path: .env.alfred
required: true
- path: .env.secrets
required: true required: true
environment: environment:
# Remap value name
- MONGO_INITDB_ROOT_USERNAME=${MONGO_USER} - MONGO_INITDB_ROOT_USERNAME=${MONGO_USER}
- MONGO_INITDB_ROOT_PASSWORD=${MONGO_PASSWORD} - MONGO_INITDB_ROOT_PASSWORD=${MONGO_PASSWORD}
# Fix MongoDB + Linux kernel >= 6.19
- GLIBC_TUNABLES=glibc.cpu.hwcaps=-SHSTK
ports: ports:
- "${MONGO_PORT}:${MONGO_PORT}" - "${MONGO_PORT}:${MONGO_PORT}"
volumes: volumes:
- ./data/mongo:/data/db - ./data/mongodb:/data/db
command: mongod --quiet --setParameter logComponentVerbosity='{"network":{"verbosity":0}}' - ./mongod.conf:/etc/mongod.conf:ro
command: ["mongod", "--config", "/etc/mongod.conf"]
healthcheck: healthcheck:
test: | test: bash -c "echo > /dev/tcp/localhost/27017"
mongosh --quiet --eval "db.adminCommand('ping')" || \
mongosh --quiet -u "${MONGO_USER}" -p "${MONGO_PASSWORD}" --authenticationDatabase admin --eval "db.adminCommand('ping')"
interval: 10s interval: 10s
timeout: 5s timeout: 5s
retries: 5 retries: 5
@@ -105,7 +116,9 @@ services:
condition: service_completed_successfully condition: service_completed_successfully
restart: unless-stopped restart: unless-stopped
env_file: env_file:
- path: .env - path: .env.alfred
required: true
- path: .env.secrets
required: true required: true
volumes: volumes:
- ./data/ollama:/root/.ollama - ./data/ollama:/root/.ollama
@@ -122,7 +135,9 @@ services:
condition: service_completed_successfully condition: service_completed_successfully
restart: unless-stopped restart: unless-stopped
env_file: env_file:
- path: .env - path: .env.alfred
required: true
- path: .env.secrets
required: true required: true
volumes: volumes:
- ./data/meilisearch:/meili_data - ./data/meilisearch:/meili_data
@@ -141,7 +156,9 @@ services:
condition: service_healthy condition: service_healthy
restart: unless-stopped restart: unless-stopped
env_file: env_file:
- path: .env - path: .env.alfred
required: true
- path: .env.secrets
required: true required: true
ports: ports:
- "${RAG_API_PORT}:${RAG_API_PORT}" - "${RAG_API_PORT}:${RAG_API_PORT}"
@@ -154,13 +171,15 @@ services:
# --- DATABASE #2 - Vector RAG (Optional) --- # --- DATABASE #2 - Vector RAG (Optional) ---
vectordb: vectordb:
container_name: alfred-vectordb container_name: alfred-vectordb
image: pgvector/pgvector:0.8.2-pg16-bookworm image: pgvector/pgvector:0.8.0-pg16-bookworm
depends_on: depends_on:
alfred-init: alfred-init:
condition: service_completed_successfully condition: service_completed_successfully
restart: unless-stopped restart: unless-stopped
env_file: env_file:
- path: .env - path: .env.alfred
required: true
- path: .env.secrets
required: true required: true
ports: ports:
- "${POSTGRES_PORT}:${POSTGRES_PORT}" - "${POSTGRES_PORT}:${POSTGRES_PORT}"
@@ -168,12 +187,14 @@ services:
- ./data/vectordb:/var/lib/postgresql/data - ./data/vectordb:/var/lib/postgresql/data
profiles: ["rag", "full"] profiles: ["rag", "full"]
healthcheck: healthcheck:
test: [ "CMD-SHELL", "pg_isready -U ${POSTGRES_USER:-alfred} -d ${POSTGRES_DB_NAME:-alfred}" ] test: [ "CMD-SHELL", "pg_isready -U $${POSTGRES_USER:-alfred} -d $${POSTGRES_DB_NAME:-alfred}" ]
interval: 5s interval: 5s
timeout: 5s timeout: 5s
retries: 5 retries: 5
networks: networks:
- alfred-net alfred-net:
aliases:
- db
# --- QBITTORENT (Optional) --- # --- QBITTORENT (Optional) ---
qbittorrent: qbittorrent:
@@ -184,7 +205,9 @@ services:
condition: service_completed_successfully condition: service_completed_successfully
restart: unless-stopped restart: unless-stopped
env_file: env_file:
- path: .env - path: .env.alfred
required: true
- path: .env.secrets
required: true required: true
environment: environment:
- PUID=1000 - PUID=1000
+878
View File
@@ -0,0 +1,878 @@
#=====================================================================#
# LibreChat Configuration #
#=====================================================================#
# Please refer to the reference documentation for assistance #
# with configuring your LibreChat environment. #
# #
# https://www.librechat.ai/docs/configuration/dotenv #
#=====================================================================#
#==================================================#
# Server Configuration #
#==================================================#
HOST=localhost
PORT=3080
MONGO_URI=mongodb://127.0.0.1:27017/LibreChat
#The maximum number of connections in the connection pool. */
MONGO_MAX_POOL_SIZE=
#The minimum number of connections in the connection pool. */
MONGO_MIN_POOL_SIZE=
#The maximum number of connections that may be in the process of being established concurrently by the connection pool. */
MONGO_MAX_CONNECTING=
#The maximum number of milliseconds that a connection can remain idle in the pool before being removed and closed. */
MONGO_MAX_IDLE_TIME_MS=
#The maximum time in milliseconds that a thread can wait for a connection to become available. */
MONGO_WAIT_QUEUE_TIMEOUT_MS=
# Set to false to disable automatic index creation for all models associated with this connection. */
MONGO_AUTO_INDEX=
# Set to `false` to disable Mongoose automatically calling `createCollection()` on every model created on this connection. */
MONGO_AUTO_CREATE=
DOMAIN_CLIENT=http://localhost:3080
DOMAIN_SERVER=http://localhost:3080
NO_INDEX=true
# Use the address that is at most n number of hops away from the Express application.
# req.socket.remoteAddress is the first hop, and the rest are looked for in the X-Forwarded-For header from right to left.
# A value of 0 means that the first untrusted address would be req.socket.remoteAddress, i.e. there is no reverse proxy.
# Defaulted to 1.
TRUST_PROXY=1
# Minimum password length for user authentication
# Default: 8
# Note: When using LDAP authentication, you may want to set this to 1
# to bypass local password validation, as LDAP servers handle their own
# password policies.
# MIN_PASSWORD_LENGTH=8
# When enabled, the app will continue running after encountering uncaught exceptions
# instead of exiting the process. Not recommended for production unless necessary.
# CONTINUE_ON_UNCAUGHT_EXCEPTION=false
#===============#
# JSON Logging #
#===============#
# Use when process console logs in cloud deployment like GCP/AWS
CONSOLE_JSON=false
#===============#
# Debug Logging #
#===============#
DEBUG_LOGGING=true
DEBUG_CONSOLE=false
# Set to true to enable agent debug logging
AGENT_DEBUG_LOGGING=false
# Enable memory diagnostics (logs heap/RSS snapshots every 60s, auto-enabled with --inspect)
# MEM_DIAG=true
#=============#
# Permissions #
#=============#
# UID=1000
# GID=1000
#==============#
# Node Options #
#==============#
# NOTE: NODE_MAX_OLD_SPACE_SIZE is NOT recognized by Node.js directly.
# This variable is used as a build argument for Docker or CI/CD workflows,
# and is NOT used by Node.js to set the heap size at runtime.
# To configure Node.js memory, use NODE_OPTIONS, e.g.:
# NODE_OPTIONS="--max-old-space-size=6144"
# See: https://nodejs.org/api/cli.html#--max-old-space-sizesize-in-mib
NODE_MAX_OLD_SPACE_SIZE=6144
#===============#
# Configuration #
#===============#
# Use an absolute path, a relative path, or a URL
# CONFIG_PATH="/alternative/path/to/librechat.yaml"
#==================#
# Langfuse Tracing #
#==================#
# Get Langfuse API keys for your project from the project settings page: https://cloud.langfuse.com
# LANGFUSE_PUBLIC_KEY=
# LANGFUSE_SECRET_KEY=
# LANGFUSE_BASE_URL=
#===================================================#
# Endpoints #
#===================================================#
# ENDPOINTS=openAI,assistants,azureOpenAI,google,anthropic
PROXY=
#===================================#
# Known Endpoints - librechat.yaml #
#===================================#
# https://www.librechat.ai/docs/configuration/librechat_yaml/ai_endpoints
# ANYSCALE_API_KEY=
# APIPIE_API_KEY=
# COHERE_API_KEY=
# DEEPSEEK_API_KEY=
# DATABRICKS_API_KEY=
# FIREWORKS_API_KEY=
# GROQ_API_KEY=
# HUGGINGFACE_TOKEN=
# MISTRAL_API_KEY=
# OPENROUTER_KEY=
# PERPLEXITY_API_KEY=
# SHUTTLEAI_API_KEY=
# TOGETHERAI_API_KEY=
# UNIFY_API_KEY=
# XAI_API_KEY=
#============#
# Anthropic #
#============#
ANTHROPIC_API_KEY=user_provided
# ANTHROPIC_MODELS=claude-sonnet-4-6,claude-opus-4-6,claude-opus-4-20250514,claude-sonnet-4-20250514,claude-3-7-sonnet-20250219,claude-3-5-sonnet-20241022,claude-3-5-haiku-20241022,claude-3-opus-20240229,claude-3-sonnet-20240229,claude-3-haiku-20240307
# ANTHROPIC_REVERSE_PROXY=
# Set to true to use Anthropic models through Google Vertex AI instead of direct API
# ANTHROPIC_USE_VERTEX=
# ANTHROPIC_VERTEX_REGION=us-east5
#============#
# Azure #
#============#
# Note: these variables are DEPRECATED
# Use the `librechat.yaml` configuration for `azureOpenAI` instead
# You may also continue to use them if you opt out of using the `librechat.yaml` configuration
# AZURE_OPENAI_DEFAULT_MODEL=gpt-3.5-turbo # Deprecated
# AZURE_OPENAI_MODELS=gpt-3.5-turbo,gpt-4 # Deprecated
# AZURE_USE_MODEL_AS_DEPLOYMENT_NAME=TRUE # Deprecated
# AZURE_API_KEY= # Deprecated
# AZURE_OPENAI_API_INSTANCE_NAME= # Deprecated
# AZURE_OPENAI_API_DEPLOYMENT_NAME= # Deprecated
# AZURE_OPENAI_API_VERSION= # Deprecated
# AZURE_OPENAI_API_COMPLETIONS_DEPLOYMENT_NAME= # Deprecated
# AZURE_OPENAI_API_EMBEDDINGS_DEPLOYMENT_NAME= # Deprecated
#=================#
# AWS Bedrock #
#=================#
# BEDROCK_AWS_DEFAULT_REGION=us-east-1 # A default region must be provided
# BEDROCK_AWS_ACCESS_KEY_ID=someAccessKey
# BEDROCK_AWS_SECRET_ACCESS_KEY=someSecretAccessKey
# BEDROCK_AWS_SESSION_TOKEN=someSessionToken
# Note: This example list is not meant to be exhaustive. If omitted, all known, supported model IDs will be included for you.
# BEDROCK_AWS_MODELS=anthropic.claude-sonnet-4-6,anthropic.claude-opus-4-6-v1,anthropic.claude-3-5-sonnet-20240620-v1:0,meta.llama3-1-8b-instruct-v1:0
# Cross-region inference model IDs: us.anthropic.claude-sonnet-4-6,us.anthropic.claude-opus-4-6-v1,global.anthropic.claude-opus-4-6-v1
# See all Bedrock model IDs here: https://docs.aws.amazon.com/bedrock/latest/userguide/model-ids.html#model-ids-arns
# Notes on specific models:
# The following models are not support due to not supporting streaming:
# ai21.j2-mid-v1
# The following models are not support due to not supporting conversation history:
# ai21.j2-ultra-v1, cohere.command-text-v14, cohere.command-light-text-v14
#============#
# Google #
#============#
GOOGLE_KEY=user_provided
# GOOGLE_REVERSE_PROXY=
# Some reverse proxies do not support the X-goog-api-key header, uncomment to pass the API key in Authorization header instead.
# GOOGLE_AUTH_HEADER=true
# Gemini API (AI Studio)
# GOOGLE_MODELS=gemini-3.1-pro-preview,gemini-3.1-pro-preview-customtools,gemini-3.1-flash-lite-preview,gemini-2.5-pro,gemini-2.5-flash,gemini-2.5-flash-lite,gemini-2.0-flash,gemini-2.0-flash-lite
# Vertex AI
# GOOGLE_MODELS=gemini-3.1-pro-preview,gemini-3.1-pro-preview-customtools,gemini-3.1-flash-lite-preview,gemini-2.5-pro,gemini-2.5-flash,gemini-2.5-flash-lite,gemini-2.0-flash-001,gemini-2.0-flash-lite-001
# GOOGLE_TITLE_MODEL=gemini-2.0-flash-lite-001
# Google Cloud region for Vertex AI (used by both chat and image generation)
# GOOGLE_LOC=us-central1
# Alternative region env var for Gemini Image Generation
# GOOGLE_CLOUD_LOCATION=global
# Vertex AI Service Account Configuration
# Path to your Google Cloud service account JSON file
# GOOGLE_SERVICE_KEY_FILE=/path/to/service-account.json
# Google Safety Settings
# NOTE: These settings apply to both Vertex AI and Gemini API (AI Studio)
#
# For Vertex AI:
# To use the BLOCK_NONE setting, you need either:
# (a) Access through an allowlist via your Google account team, or
# (b) Switch to monthly invoiced billing: https://cloud.google.com/billing/docs/how-to/invoiced-billing
#
# For Gemini API (AI Studio):
# BLOCK_NONE is available by default, no special account requirements.
#
# Available options: BLOCK_NONE, BLOCK_ONLY_HIGH, BLOCK_MEDIUM_AND_ABOVE, BLOCK_LOW_AND_ABOVE
#
# GOOGLE_SAFETY_SEXUALLY_EXPLICIT=BLOCK_ONLY_HIGH
# GOOGLE_SAFETY_HATE_SPEECH=BLOCK_ONLY_HIGH
# GOOGLE_SAFETY_HARASSMENT=BLOCK_ONLY_HIGH
# GOOGLE_SAFETY_DANGEROUS_CONTENT=BLOCK_ONLY_HIGH
# GOOGLE_SAFETY_CIVIC_INTEGRITY=BLOCK_ONLY_HIGH
#========================#
# Gemini Image Generation #
#========================#
# Gemini Image Generation Tool (for Agents)
# Supports multiple authentication methods in priority order:
# 1. User-provided API key (via GUI)
# 2. GEMINI_API_KEY env var (admin-configured)
# 3. GOOGLE_KEY env var (shared with Google chat endpoint)
# 4. Vertex AI service account (via GOOGLE_SERVICE_KEY_FILE)
# Option A: Use dedicated Gemini API key for image generation
# GEMINI_API_KEY=your-gemini-api-key
# Vertex AI model for image generation (defaults to gemini-2.5-flash-image)
# GEMINI_IMAGE_MODEL=gemini-2.5-flash-image
#============#
# OpenAI #
#============#
OPENAI_API_KEY=user_provided
# OPENAI_MODELS=gpt-5,gpt-5-codex,gpt-5-mini,gpt-5-nano,o3-pro,o3,o4-mini,gpt-4.1,gpt-4.1-mini,gpt-4.1-nano,o3-mini,o1-pro,o1,gpt-4o,gpt-4o-mini
DEBUG_OPENAI=false
# TITLE_CONVO=false
# OPENAI_TITLE_MODEL=gpt-4o-mini
# OPENAI_SUMMARIZE=true
# OPENAI_SUMMARY_MODEL=gpt-4o-mini
# OPENAI_FORCE_PROMPT=true
# OPENAI_REVERSE_PROXY=
# OPENAI_ORGANIZATION=
#====================#
# Assistants API #
#====================#
ASSISTANTS_API_KEY=user_provided
# ASSISTANTS_BASE_URL=
# ASSISTANTS_MODELS=gpt-4o,gpt-4o-mini,gpt-3.5-turbo-0125,gpt-3.5-turbo-16k-0613,gpt-3.5-turbo-16k,gpt-3.5-turbo,gpt-4,gpt-4-0314,gpt-4-32k-0314,gpt-4-0613,gpt-3.5-turbo-0613,gpt-3.5-turbo-1106,gpt-4-0125-preview,gpt-4-turbo-preview,gpt-4-1106-preview
#==========================#
# Azure Assistants API #
#==========================#
# Note: You should map your credentials with custom variables according to your Azure OpenAI Configuration
# The models for Azure Assistants are also determined by your Azure OpenAI configuration.
# More info, including how to enable use of Assistants with Azure here:
# https://www.librechat.ai/docs/configuration/librechat_yaml/ai_endpoints/azure#using-assistants-with-azure
CREDS_KEY=f34be427ebb29de8d88c107a71546019685ed8b241d8f2ed00c3df97ad2566f0
CREDS_IV=e2341419ec3dd3d19b13a1a87fafcbfb
# Azure AI Search
#-----------------
AZURE_AI_SEARCH_SERVICE_ENDPOINT=
AZURE_AI_SEARCH_INDEX_NAME=
AZURE_AI_SEARCH_API_KEY=
AZURE_AI_SEARCH_API_VERSION=
AZURE_AI_SEARCH_SEARCH_OPTION_QUERY_TYPE=
AZURE_AI_SEARCH_SEARCH_OPTION_TOP=
AZURE_AI_SEARCH_SEARCH_OPTION_SELECT=
# OpenAI Image Tools Customization
#----------------
# IMAGE_GEN_OAI_API_KEY= # Create or reuse OpenAI API key for image generation tool
# IMAGE_GEN_OAI_BASEURL= # Custom OpenAI base URL for image generation tool
# IMAGE_GEN_OAI_AZURE_API_VERSION= # Custom Azure OpenAI deployments
# IMAGE_GEN_OAI_MODEL=gpt-image-1 # OpenAI image model (e.g., gpt-image-1, gpt-image-1.5)
# IMAGE_GEN_OAI_DESCRIPTION=
# IMAGE_GEN_OAI_DESCRIPTION_WITH_FILES=Custom description for image generation tool when files are present
# IMAGE_GEN_OAI_DESCRIPTION_NO_FILES=Custom description for image generation tool when no files are present
# IMAGE_EDIT_OAI_DESCRIPTION=Custom description for image editing tool
# IMAGE_GEN_OAI_PROMPT_DESCRIPTION=Custom prompt description for image generation tool
# IMAGE_EDIT_OAI_PROMPT_DESCRIPTION=Custom prompt description for image editing tool
# DALL·E
#----------------
# DALLE_API_KEY=
# DALLE3_API_KEY=
# DALLE2_API_KEY=
# DALLE3_SYSTEM_PROMPT=
# DALLE2_SYSTEM_PROMPT=
# DALLE_REVERSE_PROXY=
# DALLE3_BASEURL=
# DALLE2_BASEURL=
# DALL·E (via Azure OpenAI)
# Note: requires some of the variables above to be set
#----------------
# DALLE3_AZURE_API_VERSION=
# DALLE2_AZURE_API_VERSION=
# Flux
#-----------------
FLUX_API_BASE_URL=https://api.us1.bfl.ai
# FLUX_API_BASE_URL = 'https://api.bfl.ml';
# Get your API key at https://api.us1.bfl.ai/auth/profile
# FLUX_API_KEY=
# Google
#-----------------
GOOGLE_SEARCH_API_KEY=
GOOGLE_CSE_ID=
# Stable Diffusion
#-----------------
SD_WEBUI_URL=http://host.docker.internal:7860
# Tavily
#-----------------
TAVILY_API_KEY=
# Traversaal
#-----------------
TRAVERSAAL_API_KEY=
# WolframAlpha
#-----------------
WOLFRAM_APP_ID=
# Zapier
#-----------------
ZAPIER_NLA_API_KEY=
#==================================================#
# Search #
#==================================================#
SEARCH=true
MEILI_NO_ANALYTICS=true
MEILI_HOST=http://0.0.0.0:7700
MEILI_MASTER_KEY=DrhYf7zENyR6AlUCKmnz0eYASOQdl6zxH7s7MKFSfFCt
# Optional: Disable indexing, useful in a multi-node setup
# where only one instance should perform an index sync.
# MEILI_NO_SYNC=true
#==================================================#
# Speech to Text & Text to Speech #
#==================================================#
STT_API_KEY=
TTS_API_KEY=
#==================================================#
# RAG #
#==================================================#
# More info: https://www.librechat.ai/docs/configuration/rag_api
# RAG_OPENAI_BASEURL=
# RAG_OPENAI_API_KEY=
# RAG_USE_FULL_CONTEXT=
# EMBEDDINGS_PROVIDER=openai
# EMBEDDINGS_MODEL=text-embedding-3-small
#===================================================#
# User System #
#===================================================#
#========================#
# Moderation #
#========================#
OPENAI_MODERATION=false
OPENAI_MODERATION_API_KEY=
# OPENAI_MODERATION_REVERSE_PROXY=
BAN_VIOLATIONS=true
BAN_DURATION=1000 * 60 * 60 * 2
BAN_INTERVAL=20
LOGIN_VIOLATION_SCORE=1
REGISTRATION_VIOLATION_SCORE=1
CONCURRENT_VIOLATION_SCORE=1
MESSAGE_VIOLATION_SCORE=1
NON_BROWSER_VIOLATION_SCORE=20
TTS_VIOLATION_SCORE=0
STT_VIOLATION_SCORE=0
FORK_VIOLATION_SCORE=0
IMPORT_VIOLATION_SCORE=0
FILE_UPLOAD_VIOLATION_SCORE=0
LOGIN_MAX=7
LOGIN_WINDOW=5
REGISTER_MAX=5
REGISTER_WINDOW=60
LIMIT_CONCURRENT_MESSAGES=true
CONCURRENT_MESSAGE_MAX=2
LIMIT_MESSAGE_IP=true
MESSAGE_IP_MAX=40
MESSAGE_IP_WINDOW=1
LIMIT_MESSAGE_USER=false
MESSAGE_USER_MAX=40
MESSAGE_USER_WINDOW=1
ILLEGAL_MODEL_REQ_SCORE=5
#========================#
# Balance #
#========================#
# CHECK_BALANCE=false
# START_BALANCE=20000 # note: the number of tokens that will be credited after registration.
#========================#
# Registration and Login #
#========================#
ALLOW_EMAIL_LOGIN=true
ALLOW_REGISTRATION=true
ALLOW_SOCIAL_LOGIN=false
ALLOW_SOCIAL_REGISTRATION=false
ALLOW_PASSWORD_RESET=false
# ALLOW_ACCOUNT_DELETION=true # note: enabled by default if omitted/commented out
ALLOW_UNVERIFIED_EMAIL_LOGIN=true
SESSION_EXPIRY=1000 * 60 * 15
REFRESH_TOKEN_EXPIRY=(1000 * 60 * 60 * 24) * 7
JWT_SECRET=16f8c0ef4a5d391b26034086c628469d3f9f497f08163ab9b40137092f2909ef
JWT_REFRESH_SECRET=eaa5191f2914e30b9387fd84e254e4ba6fc51b4654968a9b0803b456a54b8418
# Discord
DISCORD_CLIENT_ID=
DISCORD_CLIENT_SECRET=
DISCORD_CALLBACK_URL=/oauth/discord/callback
# Facebook
FACEBOOK_CLIENT_ID=
FACEBOOK_CLIENT_SECRET=
FACEBOOK_CALLBACK_URL=/oauth/facebook/callback
# GitHub
GITHUB_CLIENT_ID=
GITHUB_CLIENT_SECRET=
GITHUB_CALLBACK_URL=/oauth/github/callback
# GitHub Enterprise
# GITHUB_ENTERPRISE_BASE_URL=
# GITHUB_ENTERPRISE_USER_AGENT=
# Google
GOOGLE_CLIENT_ID=
GOOGLE_CLIENT_SECRET=
GOOGLE_CALLBACK_URL=/oauth/google/callback
# Apple
APPLE_CLIENT_ID=
APPLE_TEAM_ID=
APPLE_KEY_ID=
APPLE_PRIVATE_KEY_PATH=
APPLE_CALLBACK_URL=/oauth/apple/callback
# OpenID
OPENID_CLIENT_ID=
OPENID_CLIENT_SECRET=
OPENID_ISSUER=
OPENID_SESSION_SECRET=
OPENID_SCOPE="openid profile email"
OPENID_CALLBACK_URL=/oauth/openid/callback
OPENID_REQUIRED_ROLE=
OPENID_REQUIRED_ROLE_TOKEN_KIND=
OPENID_REQUIRED_ROLE_PARAMETER_PATH=
OPENID_ADMIN_ROLE=
OPENID_ADMIN_ROLE_PARAMETER_PATH=
OPENID_ADMIN_ROLE_TOKEN_KIND=
# Set to determine which user info property returned from OpenID Provider to store as the User's username
OPENID_USERNAME_CLAIM=
# Set to determine which user info property returned from OpenID Provider to store as the User's name
OPENID_NAME_CLAIM=
# Set to determine which user info claim to use as the email/identifier for user matching (e.g., "upn" for Entra ID)
# When not set, defaults to: email -> preferred_username -> upn
OPENID_EMAIL_CLAIM=
# Optional audience parameter for OpenID authorization requests
OPENID_AUDIENCE=
OPENID_BUTTON_LABEL=
OPENID_IMAGE_URL=
# Set to true to automatically redirect to the OpenID provider when a user visits the login page
# This will bypass the login form completely for users, only use this if OpenID is your only authentication method
OPENID_AUTO_REDIRECT=false
# Set to true to use PKCE (Proof Key for Code Exchange) for OpenID authentication
OPENID_USE_PKCE=false
#Set to true to reuse openid tokens for authentication management instead of using the mongodb session and the custom refresh token.
OPENID_REUSE_TOKENS=
#By default, signing key verification results are cached in order to prevent excessive HTTP requests to the JWKS endpoint.
#If a signing key matching the kid is found, this will be cached and the next time this kid is requested the signing key will be served from the cache.
#Default is true.
OPENID_JWKS_URL_CACHE_ENABLED=
OPENID_JWKS_URL_CACHE_TIME= # 600000 ms eq to 10 minutes leave empty to disable caching
#Set to true to trigger token exchange flow to acquire access token for the userinfo endpoint.
OPENID_ON_BEHALF_FLOW_FOR_USERINFO_REQUIRED=
OPENID_ON_BEHALF_FLOW_USERINFO_SCOPE="user.read" # example for Scope Needed for Microsoft Graph API
# Set to true to use the OpenID Connect end session endpoint for logout
OPENID_USE_END_SESSION_ENDPOINT=
# URL to redirect to after OpenID logout (defaults to ${DOMAIN_CLIENT}/login)
OPENID_POST_LOGOUT_REDIRECT_URI=
# Maximum logout URL length before using logout_hint instead of id_token_hint (default: 2000)
OPENID_MAX_LOGOUT_URL_LENGTH=
#========================#
# SharePoint Integration #
#========================#
# Requires Entra ID (OpenID) authentication to be configured
# Enable SharePoint file picker in chat and agent panels
# ENABLE_SHAREPOINT_FILEPICKER=true
# SharePoint tenant base URL (e.g., https://yourtenant.sharepoint.com)
# SHAREPOINT_BASE_URL=https://yourtenant.sharepoint.com
# Microsoft Graph API And SharePoint scopes for file picker
# SHAREPOINT_PICKER_SHAREPOINT_SCOPE==https://yourtenant.sharepoint.com/AllSites.Read
# SHAREPOINT_PICKER_GRAPH_SCOPE=Files.Read.All
#========================#
# SAML
# Note: If OpenID is enabled, SAML authentication will be automatically disabled.
SAML_ENTRY_POINT=
SAML_ISSUER=
SAML_CERT=
SAML_CALLBACK_URL=/oauth/saml/callback
SAML_SESSION_SECRET=
# Attribute mappings (optional)
SAML_EMAIL_CLAIM=
SAML_USERNAME_CLAIM=
SAML_GIVEN_NAME_CLAIM=
SAML_FAMILY_NAME_CLAIM=
SAML_PICTURE_CLAIM=
SAML_NAME_CLAIM=
# Logint buttion settings (optional)
SAML_BUTTON_LABEL=
SAML_IMAGE_URL=
# Whether the SAML Response should be signed.
# - If "true", the entire `SAML Response` will be signed.
# - If "false" or unset, only the `SAML Assertion` will be signed (default behavior).
# SAML_USE_AUTHN_RESPONSE_SIGNED=
#===============================================#
# Microsoft Graph API / Entra ID Integration #
#===============================================#
# Enable Entra ID people search integration in permissions/sharing system
# When enabled, the people picker will search both local database and Entra ID
USE_ENTRA_ID_FOR_PEOPLE_SEARCH=false
# When enabled, entra id groups owners will be considered as members of the group
ENTRA_ID_INCLUDE_OWNERS_AS_MEMBERS=false
# Microsoft Graph API scopes needed for people/group search
# Default scopes provide access to user profiles and group memberships
OPENID_GRAPH_SCOPES=User.Read,People.Read,GroupMember.Read.All
# LDAP
LDAP_URL=
LDAP_BIND_DN=
LDAP_BIND_CREDENTIALS=
LDAP_USER_SEARCH_BASE=
#LDAP_SEARCH_FILTER="mail="
LDAP_CA_CERT_PATH=
# LDAP_TLS_REJECT_UNAUTHORIZED=
# LDAP_STARTTLS=
# LDAP_LOGIN_USES_USERNAME=true
# LDAP_ID=
# LDAP_USERNAME=
# LDAP_EMAIL=
# LDAP_FULL_NAME=
#========================#
# Email Password Reset #
#========================#
EMAIL_SERVICE=
EMAIL_HOST=
EMAIL_PORT=25
EMAIL_ENCRYPTION=
EMAIL_ENCRYPTION_HOSTNAME=
EMAIL_ALLOW_SELFSIGNED=
# Leave both empty for SMTP servers that do not require authentication
EMAIL_USERNAME=
EMAIL_PASSWORD=
EMAIL_FROM_NAME=
EMAIL_FROM=noreply@librechat.ai
#========================#
# Mailgun API #
#========================#
# MAILGUN_API_KEY=your-mailgun-api-key
# MAILGUN_DOMAIN=mg.yourdomain.com
# EMAIL_FROM=noreply@yourdomain.com
# EMAIL_FROM_NAME="LibreChat"
# # Optional: For EU region
# MAILGUN_HOST=https://api.eu.mailgun.net
#========================#
# Firebase CDN #
#========================#
FIREBASE_API_KEY=
FIREBASE_AUTH_DOMAIN=
FIREBASE_PROJECT_ID=
FIREBASE_STORAGE_BUCKET=
FIREBASE_MESSAGING_SENDER_ID=
FIREBASE_APP_ID=
#========================#
# S3 AWS Bucket #
#========================#
AWS_ENDPOINT_URL=
AWS_ACCESS_KEY_ID=
AWS_SECRET_ACCESS_KEY=
AWS_REGION=
AWS_BUCKET_NAME=
# Required for path-style S3-compatible providers (MinIO, Hetzner, Backblaze B2, etc.)
# that don't support virtual-hosted-style URLs (bucket.endpoint). Not needed for AWS S3.
# AWS_FORCE_PATH_STYLE=false
#========================#
# Azure Blob Storage #
#========================#
AZURE_STORAGE_CONNECTION_STRING=
AZURE_STORAGE_PUBLIC_ACCESS=false
AZURE_CONTAINER_NAME=files
#========================#
# Shared Links #
#========================#
ALLOW_SHARED_LINKS=true
# Allows unauthenticated access to shared links. Defaults to false (auth required) if not set.
ALLOW_SHARED_LINKS_PUBLIC=false
#==============================#
# Static File Cache Control #
#==============================#
# Leave commented out to use defaults: 1 day (86400 seconds) for s-maxage and 2 days (172800 seconds) for max-age
# NODE_ENV must be set to production for these to take effect
# STATIC_CACHE_MAX_AGE=172800
# STATIC_CACHE_S_MAX_AGE=86400
# If you have another service in front of your LibreChat doing compression, disable express based compression here
# DISABLE_COMPRESSION=true
# If you have gzipped version of uploaded image images in the same folder, this will enable gzip scan and serving of these images
# Note: The images folder will be scanned on startup and a ma kept in memory. Be careful for large number of images.
# ENABLE_IMAGE_OUTPUT_GZIP_SCAN=true
#===================================================#
# UI #
#===================================================#
APP_TITLE=LibreChat
# CUSTOM_FOOTER="My custom footer"
HELP_AND_FAQ_URL=https://librechat.ai
# SHOW_BIRTHDAY_ICON=true
# Google tag manager id
#ANALYTICS_GTM_ID=user provided google tag manager id
# limit conversation file imports to a certain number of bytes in size to avoid the container
# maxing out memory limitations by unremarking this line and supplying a file size in bytes
# such as the below example of 250 mib
# CONVERSATION_IMPORT_MAX_FILE_SIZE_BYTES=262144000
#===============#
# REDIS Options #
#===============#
# Enable Redis for caching and session storage
# USE_REDIS=true
# Enable Redis for resumable LLM streams (defaults to USE_REDIS value if not set)
# Set to false to use in-memory storage for streams while keeping Redis for other caches
# USE_REDIS_STREAMS=true
# Single Redis instance
# REDIS_URI=redis://127.0.0.1:6379
# Redis cluster (multiple nodes)
# REDIS_URI=redis://127.0.0.1:7001,redis://127.0.0.1:7002,redis://127.0.0.1:7003
# Redis with TLS/SSL encryption and CA certificate
# REDIS_URI=rediss://127.0.0.1:6380
# REDIS_CA=/path/to/ca-cert.pem
# Elasticache may need to use an alternate dnsLookup for TLS connections. see "Special Note: Aws Elasticache Clusters with TLS" on this webpage: https://www.npmjs.com/package/ioredis
# Enable alternative dnsLookup for redis
# REDIS_USE_ALTERNATIVE_DNS_LOOKUP=true
# Redis authentication (if required)
# REDIS_USERNAME=your_redis_username
# REDIS_PASSWORD=your_redis_password
# Redis key prefix configuration
# Use environment variable name for dynamic prefix (recommended for cloud deployments)
# REDIS_KEY_PREFIX_VAR=K_REVISION
# Or use static prefix directly
# REDIS_KEY_PREFIX=librechat
# Redis connection limits
# REDIS_MAX_LISTENERS=40
# Redis ping interval in seconds (0 = disabled, >0 = enabled)
# When set to a positive integer, Redis clients will ping the server at this interval to keep connections alive
# When unset or 0, no pinging is performed (recommended for most use cases)
# REDIS_PING_INTERVAL=300
# Force specific cache namespaces to use in-memory storage even when Redis is enabled
# Comma-separated list of CacheKeys
# Defaults to CONFIG_STORE,APP_CONFIG so YAML-derived config stays per-container (safe for blue/green deployments)
# Set to empty string to force all namespaces through Redis: FORCED_IN_MEMORY_CACHE_NAMESPACES=
# FORCED_IN_MEMORY_CACHE_NAMESPACES=CONFIG_STORE,APP_CONFIG
# Leader Election Configuration (for multi-instance deployments with Redis)
# Duration in seconds that the leader lease is valid before it expires (default: 25)
# LEADER_LEASE_DURATION=25
# Interval in seconds at which the leader renews its lease (default: 10)
# LEADER_RENEW_INTERVAL=10
# Maximum number of retry attempts when renewing the lease fails (default: 3)
# LEADER_RENEW_ATTEMPTS=3
# Delay in seconds between retry attempts when renewing the lease (default: 0.5)
# LEADER_RENEW_RETRY_DELAY=0.5
#==================================================#
# Others #
#==================================================#
# You should leave the following commented out #
# NODE_ENV=
# E2E_USER_EMAIL=
# E2E_USER_PASSWORD=
#=====================================================#
# Cache Headers #
#=====================================================#
# Headers that control caching of the index.html #
# Default configuration prevents caching to ensure #
# users always get the latest version. Customize #
# only if you understand caching implications. #
# INDEX_CACHE_CONTROL=no-cache, no-store, must-revalidate
# INDEX_PRAGMA=no-cache
# INDEX_EXPIRES=0
# no-cache: Forces validation with server before using cached version
# no-store: Prevents storing the response entirely
# must-revalidate: Prevents using stale content when offline
#=====================================================#
# OpenWeather #
#=====================================================#
OPENWEATHER_API_KEY=
#====================================#
# LibreChat Code Interpreter API #
#====================================#
# https://code.librechat.ai
# LIBRECHAT_CODE_API_KEY=your-key
#======================#
# Web Search #
#======================#
# Note: All of the following variable names can be customized.
# Omit values to allow user to provide them.
# For more information on configuration values, see:
# https://librechat.ai/docs/features/web_search
# Search Provider (Required)
# SERPER_API_KEY=your_serper_api_key
# Scraper (Required)
# FIRECRAWL_API_KEY=your_firecrawl_api_key
# Optional: Custom Firecrawl API URL
# FIRECRAWL_API_URL=your_firecrawl_api_url
# Reranker (Required)
# JINA_API_KEY=your_jina_api_key
# or
# COHERE_API_KEY=your_cohere_api_key
#======================#
# MCP Configuration #
#======================#
# Treat 401/403 responses as OAuth requirement when no oauth metadata found
# MCP_OAUTH_ON_AUTH_ERROR=true
# Timeout for OAuth detection requests in milliseconds
# MCP_OAUTH_DETECTION_TIMEOUT=5000
# Cache connection status checks for this many milliseconds to avoid expensive verification
# MCP_CONNECTION_CHECK_TTL=60000
# Skip code challenge method validation (e.g., for AWS Cognito that supports S256 but doesn't advertise it)
# When set to true, forces S256 code challenge even if not advertised in .well-known/openid-configuration
# MCP_SKIP_CODE_CHALLENGE_CHECK=false
# Circuit breaker: max connect/disconnect cycles before tripping (per server)
# MCP_CB_MAX_CYCLES=7
# Circuit breaker: sliding window (ms) for counting cycles
# MCP_CB_CYCLE_WINDOW_MS=45000
# Circuit breaker: cooldown (ms) after the cycle breaker trips
# MCP_CB_CYCLE_COOLDOWN_MS=15000
# Circuit breaker: max consecutive failed connection rounds before backoff
# MCP_CB_MAX_FAILED_ROUNDS=3
# Circuit breaker: sliding window (ms) for counting failed rounds
# MCP_CB_FAILED_WINDOW_MS=120000
# Circuit breaker: base backoff (ms) after failed round threshold is reached
# MCP_CB_BASE_BACKOFF_MS=30000
# Circuit breaker: max backoff cap (ms) for exponential backoff
# MCP_CB_MAX_BACKOFF_MS=300000
+2 -4
View File
@@ -6,9 +6,7 @@ cache: true
endpoints: endpoints:
anthropic: anthropic:
apiKey: "${ANTHROPIC_API_KEY}" apiKey: "${ANTHROPIC_API_KEY}"
models: models: ["claude-sonnet-4-5", "claude-haiku-4-5", "claude-opus-4-5"]
default: ["claude-sonnet-4-5", "claude-haiku-4-5", "claude-opus-4-5"]
fetch: false
titleConvo: true titleConvo: true
titleModel: "claude-haiku-4-5" titleModel: "claude-haiku-4-5"
modelDisplayLabel: "Claude AI" modelDisplayLabel: "Claude AI"
@@ -72,7 +70,7 @@ endpoints:
apiKey: "dummy_key" apiKey: "dummy_key"
baseURL: "http://alfred:8000/v1" baseURL: "http://alfred:8000/v1"
models: models:
default: ["local-deepseek-agent"] default: ["glm-4.7-flash:latest"]
fetch: false fetch: false
titleConvo: false titleConvo: false
titleModel: "current_model" titleModel: "current_model"
+45
View File
@@ -0,0 +1,45 @@
# MongoDB Configuration File
# Network settings
net:
port: 27017
bindIp: 0.0.0.0
# Storage settings
storage:
dbPath: /data/db
# System log settings
systemLog:
destination: file
path: /dev/stdout
logAppend: true
verbosity: 0
quiet: true
component:
accessControl:
verbosity: -1
command:
verbosity: 0
control:
verbosity: 0
ftdc:
verbosity: 0
geo:
verbosity: 0
index:
verbosity: 0
network:
verbosity: 0
query:
verbosity: 0
replication:
verbosity: 0
sharding:
verbosity: 0
storage:
verbosity: 0
write:
verbosity: 0
transaction:
verbosity: 0
Generated
-1221
View File
File diff suppressed because it is too large Load Diff
+51 -34
View File
@@ -1,50 +1,67 @@
[tool.poetry] [project]
name = "alfred" name = "alfred"
version = "0.1.7" version = "0.1.7"
description = "AI agent for managing a local media library" description = "AI agent for managing a local media library"
authors = ["Francwa <francois.hodiaumont@gmail.com>"] authors = ["Francwa <francois.hodiaumont@gmail.com>"]
readme = "README.md" readme = "README.md"
package-mode = false requires-python = "==3.14.3"
dependencies = [
"python-dotenv~=1.0.0",
"requests~=2.32.5",
"fastapi~=0.127.1",
"pydantic~=2.12.4",
"uvicorn~=0.40.0",
"httpx~=0.28.1",
"pydantic-settings~=2.12.0",
"click~=8.1",
]
[tool.alfred.settings] [tool.alfred]
image_name = "alfred_media_organizer" image_name = "alfred_media_organizer"
librechat_version = "v0.8.1" librechat_version = "v0.8.4"
rag_version = "v0.7.0" rag_version = "v0.7.3"
runner = "poetry"
service_name = "alfred" service_name = "alfred"
uv_version = "0.11.6"
[tool.alfred.security] [tool.alfred.secrets]
jwt_secret = "32:b64" JWT_SECRET = "32:hex"
jwt_refresh_secret = "32:b64" JWT_REFRESH_SECRET = "32:hex"
creds_key = "32:b64" CREDS_KEY = "32:hex"
creds_iv = "16:b64" CREDS_IV = "16:hex"
meili_master_key = "32:b64" MEILI_MASTER_KEY = "32:b64"
mongo_password = "16:hex" MONGO_PASSWORD = "16:hex"
postgres_password = "16:hex" POSTGRES_PASSWORD = "16:hex"
qbittorrent_password = "16:hex" QBITTORRENT_PASSWORD = "16:hex"
[tool.poetry.dependencies] [tool.alfred.config.pattern]
python = "==3.14.2" type = "multi"
python-dotenv = "^1.0.0" patterns = [
requests = "^2.32.5" "^#[=\\-*#]{3,}#?\\s*$",
fastapi = "^0.127.1" "^#\\s+(.+?)\\s+#\\s*$",
pydantic = "^2.12.4" "^#[=\\-*#]{3,}#?\\s*$",
uvicorn = "^0.40.0" ]
pytest-xdist = "^3.8.0"
httpx = "^0.28.1" [tool.alfred.config]
pydantic-settings = "^2.12.0" extra_fields = []
[tool.uv]
package = false
[dependency-groups]
dev = [
"pytest~=8.0.0",
"pytest-cov~=4.1.0",
"pytest-asyncio~=0.23.0",
"pytest-xdist~=3.8.0",
"ruff~=0.14.7",
"pre-commit~=4.5.1",
"bump-my-version~=1.2.5",
]
[tool.poetry.group.dev.dependencies]
pytest = "^8.0.0"
pytest-cov = "^4.1.0"
pytest-asyncio = "^0.23.0"
ruff = "^0.14.7"
pre-commit = "^4.5.1"
bump-my-version = "^1.2.5"
[build-system] [build-system]
requires = ["poetry-core"] requires = ["hatchling"]
build-backend = "poetry.core.masonry.api" build-backend = "hatchling.build"
[tool.pytest.ini_options] [tool.pytest.ini_options]
# Chemins où pytest cherche les tests # Chemins où pytest cherche les tests
+4693
View File
File diff suppressed because it is too large Load Diff
+154 -209
View File
@@ -1,239 +1,184 @@
#!/usr/bin/env python3
"""Bootstrap script - generates .env.alfred, .env.librechat, .env.secrets and .env.make."""
import re
import secrets import secrets
import sys
from pathlib import Path from pathlib import Path
import tomllib import tomllib
from config_loader import load_build_config, write_env_make
BASE_DIR = Path(__file__).resolve().parent.parent
def load_secrets_spec(toml_data: dict) -> dict[str, tuple[int, str]]:
"""Load secrets spec from pyproject.toml [tool.alfred.secrets]."""
raw = toml_data.get("tool", {}).get("alfred", {}).get("secrets", {})
result = {}
for key, rule in raw.items():
size_str, fmt = rule.split(":")
result[key] = (int(size_str), fmt)
return result
def generate_secret(rule: str) -> str: def generate_secret(size: int, fmt: str) -> str:
""" match fmt:
Generates a cryptographically secure secret based on a spec string. case "hex":
Example specs: '32:b64', '16:hex'.
"""
chunks: list[str] = rule.split(":")
size: int = int(chunks[0])
tech: str = chunks[1]
if tech == "b64":
return secrets.token_urlsafe(size)
elif tech == "hex":
return secrets.token_hex(size) return secrets.token_hex(size)
case "b64":
return secrets.token_urlsafe(size)
case _:
raise ValueError(f"Unknown format: {fmt}")
def load_env_file(path: Path) -> dict[str, str]:
"""Load key=value pairs from an env file, ignoring comments and blanks."""
result = {}
if not path.exists():
return result
for line in path.read_text().splitlines():
stripped = line.strip()
if stripped and not stripped.startswith("#") and "=" in stripped:
key, _, value = stripped.partition("=")
result[key.strip()] = value.strip()
return result
def copy_example_if_missing(src: Path, dst: Path, label: str) -> None:
"""Copy src to dst only if dst doesn't exist yet."""
if dst.exists():
print(f"{dst.name} already exists, skipping")
return
if not src.exists():
print(f"{label} example not found at {src} — skipping (add it manually)")
return
dst.write_text(src.read_text())
print(f" + {dst.name} created from {src.name}")
def generate_secrets_file(path: Path, secrets_spec: dict[str, tuple[int, str]]) -> None:
"""Generate .env.secrets with missing secrets, never overwrite existing ones."""
existing = load_env_file(path)
lines = list(path.read_text().splitlines()) if path.exists() else [
"# Auto-generated secrets — DO NOT COMMIT",
"# Run 'make bootstrap' to generate missing secrets",
"",
]
added = []
for key, (size, fmt) in secrets_spec.items():
if key not in existing:
value = generate_secret(size, fmt)
lines.append(f"{key}={value}")
added.append(key)
path.write_text("\n".join(lines) + "\n")
if added:
print(f" + Generated: {', '.join(added)}")
else: else:
raise ValueError(f"Invalid security format: {tech}") print(" ↻ All secrets already exist, nothing generated")
def extract_python_version(version_string: str) -> tuple[str, str]: def extract_python_version(version_string: str) -> tuple[str, str]:
""" clean = re.sub(r"^[=^~><]+", "", version_string.strip())
Extract Python version from poetry dependency string. parts = clean.split(".")
Examples:
"==3.14.2" -> ("3.14.2", "3.14")
"^3.14.2" -> ("3.14.2", "3.14")
"~3.14.2" -> ("3.14.2", "3.14")
"3.14.2" -> ("3.14.2", "3.14")
"""
import re # noqa: PLC0415
# Remove poetry version operators (==, ^, ~, >=, etc.)
clean_version = re.sub(r"^[=^~><]+", "", version_string.strip())
# Extract version parts
parts = clean_version.split(".")
if len(parts) >= 2: if len(parts) >= 2:
full_version = clean_version return clean, f"{parts[0]}.{parts[1]}"
short_version = f"{parts[0]}.{parts[1]}" raise ValueError(f"Invalid Python version: {version_string}")
return full_version, short_version
def build_uris(env_alfred: Path, env_secrets: Path) -> None:
"""Build MONGO_URI and POSTGRES_URI from components and append them to .env.secrets."""
env = {**load_env_file(env_alfred), **load_env_file(env_secrets)}
existing = load_env_file(env_secrets)
computed = {
"MONGO_URI": (
f"mongodb://{env['MONGO_USER']}:{env['MONGO_PASSWORD']}"
f"@{env['MONGO_HOST']}:{env['MONGO_PORT']}/{env['MONGO_DB_NAME']}"
f"?authSource=admin"
),
"POSTGRES_URI": (
f"postgresql://{env['POSTGRES_USER']}:{env['POSTGRES_PASSWORD']}"
f"@{env['POSTGRES_HOST']}:{env['POSTGRES_PORT']}/{env['POSTGRES_DB_NAME']}"
),
}
content = env_secrets.read_text()
added = []
for key, value in computed.items():
if key in existing:
content = re.sub(rf"^{key}=.*$", f"{key}={value}", content, flags=re.MULTILINE)
else: else:
raise ValueError(f"Invalid Python version format: {version_string}") content = content.rstrip("\n") + f"\n{key}={value}\n"
added.append(key)
env_secrets.write_text(content)
if added:
print(f" + Computed: {', '.join(added)}")
else:
print(" ↻ URIs updated")
# TODO: Refactor def write_env_make(toml_data: dict) -> None:
def bootstrap(): # noqa: PLR0912, PLR0915 """Write .env.make from pyproject.toml."""
""" project = toml_data["project"]
Initializes the .env file by merging .env.example with generated secrets alfred = toml_data["tool"]["alfred"]
and build variables from pyproject.toml.
Also generates .env.make for Makefile.
ALWAYS preserves existing secrets! python_full, python_short = extract_python_version(project["requires-python"])
"""
base_dir = Path(__file__).resolve().parent.parent
env_path = base_dir / ".env"
example_path = base_dir / ".env.example" lines = [
if not example_path.exists(): "# Auto-generated from pyproject.toml — do not edit manually",
print(f"{example_path.name} not found.") f"ALFRED_VERSION={project['version']}",
return f"PYTHON_VERSION={python_full}",
f"IMAGE_NAME={alfred['image_name']}",
f"SERVICE_NAME={alfred['service_name']}",
f"LIBRECHAT_VERSION={alfred['librechat_version']}",
f"RAG_VERSION={alfred['rag_version']}",
f"UV_VERSION={alfred['uv_version']}",
]
toml_path = base_dir / "pyproject.toml" env_make_path = BASE_DIR / ".env.make"
env_make_path.write_text("\n".join(lines) + "\n")
print(f" + {env_make_path.name} written")
def main() -> int:
print("🚀 Starting bootstrap...")
toml_path = BASE_DIR / "pyproject.toml"
if not toml_path.exists(): if not toml_path.exists():
print(f"{toml_path.name} not found.") print(f"pyproject.toml not found: {toml_path}")
return return 1
# ALWAYS load existing .env if it exists
existing_env = {}
if env_path.exists():
print("🔄 Reading existing .env...")
with open(env_path) as f:
for line in f:
if "=" in line and not line.strip().startswith("#"):
key, value = line.split("=", 1)
existing_env[key.strip()] = value.strip()
print(f" Found {len(existing_env)} existing keys")
print("🔧 Updating .env file (keeping secrets)...")
else:
print("🔧 Initializing: Creating secure .env file...")
# Load data from pyproject.toml
with open(toml_path, "rb") as f: with open(toml_path, "rb") as f:
data = tomllib.load(f) toml_data = tomllib.load(f)
security_keys = data["tool"]["alfred"]["security"]
settings_keys = data["tool"]["alfred"]["settings"]
dependencies = data["tool"]["poetry"]["dependencies"]
alfred_version = data["tool"]["poetry"]["version"]
# Normalize TOML keys to UPPER_CASE for .env format (done once) print("\n📄 Env files:")
security_keys_upper = {k.upper(): v for k, v in security_keys.items()} copy_example_if_missing(
settings_keys_upper = {k.upper(): v for k, v in settings_keys.items()} src=BASE_DIR / ".env.example",
dst=BASE_DIR / ".env.alfred",
# Extract Python version label="Alfred",
python_version_full, python_version_short = extract_python_version( )
dependencies["python"] copy_example_if_missing(
src=BASE_DIR / "librechat" / ".env.example",
dst=BASE_DIR / ".env.librechat",
label="LibreChat",
) )
# Read .env.example secrets_spec = load_secrets_spec(toml_data)
with open(example_path) as f: print("\n🔐 Secrets:")
example_lines = f.readlines() generate_secrets_file(BASE_DIR / ".env.secrets", secrets_spec)
new_lines = [] print("\n🔗 URIs:")
# Process each line from .env.example build_uris(BASE_DIR / ".env.alfred", BASE_DIR / ".env.secrets")
for raw_line in example_lines:
line = raw_line.strip()
if line and not line.startswith("#") and "=" in line: print("\n🔧 Build config:")
key, value = line.split("=", 1) write_env_make(toml_data)
key = key.strip()
# Check if key exists in current .env (update mode) print("\n✅ Bootstrap complete!")
if key in existing_env: return 0
# Keep existing value for secrets
if key in security_keys_upper:
new_lines.append(f"{key}={existing_env[key]}\n")
print(f" ↻ Kept existing {key}")
# Update build vars from pyproject.toml
elif key in settings_keys_upper:
new_value = settings_keys_upper[key]
if existing_env[key] != new_value:
new_lines.append(f"{key}={new_value}\n")
print(f" ↻ Updated {key}: {existing_env[key]}{new_value}")
else:
new_lines.append(f"{key}={existing_env[key]}\n")
print(f" ↻ Kept {key}={existing_env[key]}")
# Update Python versions
elif key == "PYTHON_VERSION":
if existing_env[key] != python_version_full:
new_lines.append(f"{key}={python_version_full}\n")
print(
f" ↻ Updated Python: {existing_env[key]}{python_version_full}"
)
else:
new_lines.append(f"{key}={existing_env[key]}\n")
print(f" ↻ Kept Python: {existing_env[key]}")
elif key == "PYTHON_VERSION_SHORT":
if existing_env[key] != python_version_short:
new_lines.append(f"{key}={python_version_short}\n")
print(
f" ↻ Updated Python (short): {existing_env[key]}{python_version_short}"
)
else:
new_lines.append(f"{key}={existing_env[key]}\n")
print(f" ↻ Kept Python (short): {existing_env[key]}")
elif key == "ALFRED_VERSION":
if existing_env.get(key) != alfred_version:
new_lines.append(f"{key}={alfred_version}\n")
print(
f" ↻ Updated Alfred version: {existing_env.get(key, 'N/A')}{alfred_version}"
)
else:
new_lines.append(f"{key}={alfred_version}\n")
print(f" ↻ Kept Alfred version: {alfred_version}")
# Keep other existing values
else:
new_lines.append(f"{key}={existing_env[key]}\n")
# Key doesn't exist, generate/add it
elif key in security_keys_upper:
rule = security_keys_upper[key]
secret = generate_secret(rule)
new_lines.append(f"{key}={secret}\n")
print(f" + Secret generated for {key} ({rule})")
elif key in settings_keys_upper:
value = settings_keys_upper[key]
new_lines.append(f"{key}={value}\n")
print(f" + Setting added: {key}={value}")
elif key == "PYTHON_VERSION":
new_lines.append(f"{key}={python_version_full}\n")
print(f" + Python version: {python_version_full}")
elif key == "PYTHON_VERSION_SHORT":
new_lines.append(f"{key}={python_version_short}\n")
print(f" + Python version (short): {python_version_short}")
elif key == "ALFRED_VERSION":
new_lines.append(f"{key}={alfred_version}\n")
print(f" + Alfred version: {alfred_version}")
else:
new_lines.append(raw_line)
else:
# Keep comments and empty lines
new_lines.append(raw_line)
# Compute database URIs from the generated values
final_env = {}
for line in new_lines:
if "=" in line and not line.strip().startswith("#"):
key, value = line.split("=", 1)
final_env[key.strip()] = value.strip()
# Compute MONGO_URI
if "MONGO_USER" in final_env and "MONGO_PASSWORD" in final_env:
mongo_uri = (
f"mongodb://{final_env.get('MONGO_USER', 'alfred')}:"
f"{final_env.get('MONGO_PASSWORD', '')}@"
f"{final_env.get('MONGO_HOST', 'mongodb')}:"
f"{final_env.get('MONGO_PORT', '27017')}/"
f"{final_env.get('MONGO_DB_NAME', 'alfred')}?authSource=admin"
)
# Update MONGO_URI in new_lines
for i, line in enumerate(new_lines):
if line.startswith("MONGO_URI="):
new_lines[i] = f"MONGO_URI={mongo_uri}\n"
print(" ✓ Computed MONGO_URI")
break
# Compute POSTGRES_URI
if "POSTGRES_USER" in final_env and "POSTGRES_PASSWORD" in final_env:
postgres_uri = (
f"postgresql://{final_env.get('POSTGRES_USER', 'alfred')}:"
f"{final_env.get('POSTGRES_PASSWORD', '')}@"
f"{final_env.get('POSTGRES_HOST', 'vectordb')}:"
f"{final_env.get('POSTGRES_PORT', '5432')}/"
f"{final_env.get('POSTGRES_DB_NAME', 'alfred')}"
)
# Update POSTGRES_URI in new_lines
for i, line in enumerate(new_lines):
if line.startswith("POSTGRES_URI="):
new_lines[i] = f"POSTGRES_URI={postgres_uri}\n"
print(" ✓ Computed POSTGRES_URI")
break
# Write .env file
with open(env_path, "w", encoding="utf-8") as f:
f.writelines(new_lines)
print(f"\n{env_path.name} generated successfully.")
# Generate .env.make for Makefile using shared config loader
config = load_build_config(base_dir)
write_env_make(config, base_dir)
print("✅ .env.make generated for Makefile.")
print("\n⚠️ Reminder: Please manually add your API keys to the .env file.")
if __name__ == "__main__": if __name__ == "__main__":
bootstrap() sys.exit(main())
+30 -36
View File
@@ -1,4 +1,4 @@
"""Shared configuration loader for bootstrap and CI.""" """Shared configuration loader — reads build config from pyproject.toml."""
import re import re
from pathlib import Path from pathlib import Path
@@ -13,30 +13,24 @@ class BuildConfig(NamedTuple):
alfred_version: str alfred_version: str
python_version: str python_version: str
python_version_short: str python_version_short: str
runner: str
image_name: str image_name: str
service_name: str service_name: str
librechat_version: str librechat_version: str
rag_version: str rag_version: str
uv_version: str
def extract_python_version(version_string: str) -> tuple[str, str]: def extract_python_version(version_string: str) -> tuple[str, str]:
""" """
Extract Python version from poetry dependency string. Extract Python version from uv dependency string.
Examples: Examples:
"==3.14.2" -> ("3.14.2", "3.14") "==3.14.2" -> ("3.14.2", "3.14")
"^3.14.2" -> ("3.14.2", "3.14") "^3.14.2" -> ("3.14.2", "3.14")
"~3.14.2" -> ("3.14.2", "3.14")
"3.14.2" -> ("3.14.2", "3.14")
""" """
clean_version = re.sub(r"^[=^~><]+", "", version_string.strip()) clean = re.sub(r"^[=^~><]+", "", version_string.strip())
parts = clean_version.split(".") parts = clean.split(".")
if len(parts) >= 2: if len(parts) >= 2:
full_version = clean_version return clean, f"{parts[0]}.{parts[1]}"
short_version = f"{parts[0]}.{parts[1]}"
return full_version, short_version
else:
raise ValueError(f"Invalid Python version format: {version_string}") raise ValueError(f"Invalid Python version format: {version_string}")
@@ -51,23 +45,21 @@ def load_build_config(base_dir: Path | None = None) -> BuildConfig:
with open(toml_path, "rb") as f: with open(toml_path, "rb") as f:
data = tomllib.load(f) data = tomllib.load(f)
settings_keys = data["tool"]["alfred"]["settings"]
dependencies = data["tool"]["poetry"]["dependencies"]
alfred_version = data["tool"]["poetry"]["version"]
python_version_full, python_version_short = extract_python_version( project = data["project"]
dependencies["python"] alfred = data["tool"]["alfred"]
)
python_full, python_short = extract_python_version(project["requires-python"])
return BuildConfig( return BuildConfig(
alfred_version=alfred_version, alfred_version=project["version"],
python_version=python_version_full, python_version=python_full,
python_version_short=python_version_short, python_version_short=python_short,
runner=settings_keys["runner"], image_name=alfred["image_name"],
image_name=settings_keys["image_name"], service_name=alfred["service_name"],
service_name=settings_keys["service_name"], librechat_version=alfred["librechat_version"],
librechat_version=settings_keys["librechat_version"], rag_version=alfred["rag_version"],
rag_version=settings_keys["rag_version"], uv_version=alfred["uv_version"],
) )
@@ -76,14 +68,16 @@ def write_env_make(config: BuildConfig, base_dir: Path | None = None) -> None:
if base_dir is None: if base_dir is None:
base_dir = Path(__file__).resolve().parent.parent base_dir = Path(__file__).resolve().parent.parent
lines = [
"# Auto-generated from pyproject.toml — do not edit manually",
f"export ALFRED_VERSION={config.alfred_version}",
f"export PYTHON_VERSION={config.python_version}",
f"export IMAGE_NAME={config.image_name}",
f"export SERVICE_NAME={config.service_name}",
f"export LIBRECHAT_VERSION={config.librechat_version}",
f"export RAG_VERSION={config.rag_version}",
f"export UV_VERSION={config.uv_version}",
]
env_make_path = base_dir / ".env.make" env_make_path = base_dir / ".env.make"
with open(env_make_path, "w", encoding="utf-8") as f: env_make_path.write_text("\n".join(lines) + "\n")
f.write("# Auto-generated from pyproject.toml\n")
f.write(f"export ALFRED_VERSION={config.alfred_version}\n")
f.write(f"export PYTHON_VERSION={config.python_version}\n")
f.write(f"export PYTHON_VERSION_SHORT={config.python_version_short}\n")
f.write(f"export RUNNER={config.runner}\n")
f.write(f"export IMAGE_NAME={config.image_name}\n")
f.write(f"export SERVICE_NAME={config.service_name}\n")
f.write(f"export LIBRECHAT_VERSION={config.librechat_version}\n")
f.write(f"export RAG_VERSION={config.rag_version}\n")
-22
View File
@@ -1,22 +0,0 @@
#!/usr/bin/env python3
"""Generate .env.make for CI/CD without generating secrets."""
import sys
from config_loader import load_build_config, write_env_make
def main():
"""Generate .env.make from pyproject.toml."""
try:
config = load_build_config()
write_env_make(config)
print("✅ .env.make generated successfully.")
return 0
except Exception as e:
print(f"❌ Failed to generate .env.make: {e}")
return 1
if __name__ == "__main__":
sys.exit(main())
+528
View File
@@ -0,0 +1,528 @@
#!/usr/bin/env python3
"""
scan_subtitles.py — CLI pour tester le pipeline de scan de sous-titres Alfred.
Usage:
uv run testing/subtitles/scan_subtitles.py <season_folder> [options]
Options:
--release-group RARBG Groupe de release (optionnel — active les known patterns)
--pattern adjacent Forcer un pattern (adjacent|flat|episode_subfolder|embedded)
--video FILE Fichier vidéo de référence (défaut: premier .mkv/.mp4 trouvé)
--verbose Détails sur chaque token analysé
--no-color Désactive la colorisation
Exemples:
uv run scripts/scan_subtitles.py "/media/tv/The X-Files/Season 01"
uv run scripts/scan_subtitles.py "/media/tv/The X-Files/Season 01" --release-group RARBG
uv run scripts/scan_subtitles.py "/media/tv/The X-Files/Season 01" --pattern episode_subfolder --verbose
"""
import argparse
import sys
import textwrap
from pathlib import Path
# Ajoute la racine du projet au path (testing/subtitles/ → ../../)
_PROJECT_ROOT = Path(__file__).resolve().parents[2]
if str(_PROJECT_ROOT) not in sys.path:
sys.path.insert(0, str(_PROJECT_ROOT))
# ---------------------------------------------------------------------------
# Colorisation simple (pas de dépendance externe)
# ---------------------------------------------------------------------------
USE_COLOR = True
RESET = "\033[0m"
BOLD = "\033[1m"
DIM = "\033[2m"
GREEN = "\033[32m"
YELLOW = "\033[33m"
RED = "\033[31m"
CYAN = "\033[36m"
BLUE = "\033[34m"
MAGENTA = "\033[35m"
def c(text: str, *codes: str) -> str:
if not USE_COLOR:
return text
return "".join(codes) + text + RESET
def section(title: str) -> None:
width = 70
print()
print(c("" * width, DIM))
print(c(f" {title}", BOLD, CYAN))
print(c("" * width, DIM))
def ok(msg: str) -> None:
print(c("", GREEN, BOLD) + msg)
def warn(msg: str) -> None:
print(c("", YELLOW, BOLD) + msg)
def err(msg: str) -> None:
print(c("", RED, BOLD) + msg)
def info(msg: str, indent: int = 2) -> None:
print(" " * indent + msg)
def kv(key: str, value: str, indent: int = 4) -> None:
print(" " * indent + c(f"{key}: ", BOLD) + value)
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
VIDEO_EXTS = {".mkv", ".mp4", ".avi", ".mov", ".ts", ".m2ts"}
def find_videos(folder: Path) -> list[Path]:
return sorted(
p for p in folder.iterdir()
if p.is_file() and p.suffix.lower() in VIDEO_EXTS
)
def confidence_bar(conf: float, width: int = 20) -> str:
filled = int(conf * width)
bar = "" * filled + "" * (width - filled)
if conf >= 0.8:
color = GREEN
elif conf >= 0.5:
color = YELLOW
else:
color = RED
return c(bar, color) + c(f" {conf:.0%}", BOLD)
def track_summary(track, verbose: bool = False) -> None:
lang = track.language.code if track.language else c("?", RED)
fmt = track.format.id if track.format else c("?", RED)
typ = track.subtitle_type.value
src = "embedded" if track.is_embedded else (track.file_path.name if track.file_path else "?")
# Couleur du type
type_colors = {
"standard": GREEN,
"sdh": YELLOW,
"forced": BLUE,
"unknown": RED,
}
typ_str = c(typ, type_colors.get(typ, RESET))
unresolved = not track.is_embedded and track.language is None
clarif = c(" [langue inconnue]", RED, BOLD) if unresolved else ""
print(f" {c(src, BOLD)}")
print(f" lang={c(lang, CYAN)} type={typ_str} format={fmt}")
conf_str = c("n/a (embedded)", DIM) if track.is_embedded else confidence_bar(track.confidence)
print(f" confidence={conf_str}{clarif}")
if track.entry_count is not None:
print(f" entries={track.entry_count} size={track.file_size_kb:.1f} KB" if track.file_size_kb else f" entries={track.entry_count}")
if verbose and track.raw_tokens:
print(f" tokens={track.raw_tokens}")
if track.is_resolved() and track.language and track.format:
try:
dest = track.destination_name
print(f"{c(dest, GREEN, BOLD)}")
except ValueError:
pass
# ---------------------------------------------------------------------------
# Étapes du pipeline
# ---------------------------------------------------------------------------
def step_load_kb() -> "SubtitleKnowledgeBase":
from alfred.domain.subtitles.knowledge.base import SubtitleKnowledgeBase
from alfred.domain.subtitles.knowledge.loader import KnowledgeLoader
section("ÉTAPE 1 — Chargement de la base de connaissances")
kb = SubtitleKnowledgeBase(KnowledgeLoader())
fmts = kb.formats()
langs = kb.languages()
patterns = kb.patterns()
ok(f"{len(fmts)} format(s) connu(s): {', '.join(fmts.keys())}")
ok(f"{len(langs)} langue(s) connue(s): {', '.join(langs.keys())}")
ok(f"{len(patterns)} pattern(s) connu(s): {', '.join(patterns.keys())}")
total_tokens = sum(len(l.tokens) for l in langs.values())
info(c(f"{total_tokens} tokens de langue au total", DIM), indent=4)
return kb
def step_detect_pattern(
kb: "SubtitleKnowledgeBase",
season_folder: Path,
sample_video: Path,
release_group: str | None,
forced_pattern: str | None,
) -> "SubtitlePattern":
from alfred.domain.subtitles.services.pattern_detector import PatternDetector
section("ÉTAPE 2 — Détection du pattern de release")
# Priorité: forced > known patterns from release_group > auto-detect
if forced_pattern:
pattern = kb.pattern(forced_pattern)
if not pattern:
err(f"Pattern inconnu: '{forced_pattern}'")
print(f" Patterns disponibles: {', '.join(kb.patterns().keys())}")
sys.exit(1)
ok(f"Pattern forcé: {c(forced_pattern, CYAN, BOLD)}")
return pattern
if release_group:
known = kb.patterns_for_group(release_group)
if known:
kv("Release group", release_group)
ok(f"Pattern(s) connu(s) pour {release_group}: {', '.join(p.id for p in known)}")
pattern = known[0]
kv("Pattern sélectionné", c(pattern.id, CYAN, BOLD))
return pattern
else:
warn(f"Groupe '{release_group}' inconnu — lancement de la détection auto")
# Auto-detect
kv("Dossier analysé", str(season_folder))
kv("Vidéo de référence", sample_video.name)
detector = PatternDetector(kb)
result = detector.detect(season_folder, sample_video)
findings = result.get("raw_findings", {})
info(c("Observations:", BOLD), indent=4)
for key, val in findings.items():
if val not in (False, None, 0):
info(f" {key}: {c(str(val), CYAN)}", indent=4)
detected = result.get("detected")
confidence = result.get("confidence", 0.0)
description = result.get("description", "")
print()
info(c(f'Description: "{description}"', DIM), indent=4)
print(f" Confiance: {confidence_bar(confidence)}")
if detected:
ok(f"Pattern détecté: {c(detected.id, CYAN, BOLD)}")
kv("Stratégie de scan", detected.scan_strategy.value)
kv("Détection de type", detected.type_detection.value)
if detected.root_folder:
kv("Dossier racine", detected.root_folder)
return detected
else:
warn("Aucun pattern détecté avec confiance suffisante — fallback: adjacent")
fallback = kb.pattern("adjacent")
if not fallback:
err("Pattern 'adjacent' introuvable dans la KB !")
sys.exit(1)
return fallback
def step_identify_tracks(
kb: "SubtitleKnowledgeBase",
sample_video: Path,
pattern: "SubtitlePattern",
release_group: str | None,
verbose: bool,
) -> "MediaSubtitleMetadata":
from alfred.domain.subtitles.services.identifier import SubtitleIdentifier
section("ÉTAPE 3 — Identification des pistes")
kv("Vidéo", sample_video.name)
kv("Pattern", pattern.id)
identifier = SubtitleIdentifier(kb)
metadata = identifier.identify(
video_path=sample_video,
pattern=pattern,
media_id=None,
media_type="tv_show",
release_group=release_group,
)
n_emb = len(metadata.embedded_tracks)
n_ext = len(metadata.external_tracks)
n_unresolved = len(metadata.unresolved_tracks)
print()
ok(f"{n_ext} piste(s) externe(s) trouvée(s)")
if n_emb:
ok(f"{n_emb} piste(s) embarquée(s) (ffprobe)")
if n_unresolved:
warn(f"{n_unresolved} piste(s) externe(s) sans langue reconnue")
if metadata.external_tracks:
print()
info(c("Pistes externes:", BOLD))
for track in metadata.external_tracks:
track_summary(track, verbose)
if metadata.embedded_tracks:
print()
info(c("Pistes embarquées:", BOLD))
for track in metadata.embedded_tracks:
track_summary(track, verbose)
return metadata
def step_apply_rules(
metadata: "MediaSubtitleMetadata",
release_group: str | None,
) -> tuple["SubtitleMatchingRules | None", list, list]:
from alfred.domain.subtitles.aggregates import DEFAULT_RULES
from alfred.domain.subtitles.services.matcher import SubtitleMatcher
from alfred.domain.subtitles.services.utils import available_subtitles
from alfred.domain.subtitles.value_objects import ScanStrategy
section("ÉTAPE 4 — Application des règles")
# Cas embedded : pas de matcher, on liste directement les pistes disponibles
if metadata.detected_pattern_id == ScanStrategy.EMBEDDED.value:
info(c("Pattern embedded — le matcher est court-circuité", DIM), indent=4)
tracks = available_subtitles(metadata.embedded_tracks)
ok(f"{len(tracks)} piste(s) disponible(s)")
return None, tracks, []
rules = DEFAULT_RULES()
kv("Langues préférées", str(rules.preferred_languages))
kv("Formats préférés", str(rules.preferred_formats))
kv("Types autorisés", str(rules.allowed_types))
kv("Confiance min", str(rules.min_confidence))
info(c("(règles globales par défaut — pas de .alfred/ en mode scan)", DIM), indent=4)
matcher = SubtitleMatcher()
matched, unresolved = matcher.match(metadata.external_tracks, rules)
print()
ok(f"{len(matched)} piste(s) retenue(s)")
if unresolved:
warn(f"{len(unresolved)} piste(s) écartée(s) ou non résolue(s)")
return rules, matched, unresolved
def step_show_results(
matched: list,
unresolved: list,
is_embedded: bool,
verbose: bool,
) -> None:
section("RÉSULTAT FINAL")
if matched:
label = "piste(s) disponible(s)" if is_embedded else "piste(s) qui seraient placées"
ok(f"{len(matched)} {label}:")
for track in matched:
lang = track.language.code if track.language else "?"
typ = track.subtitle_type.value
if is_embedded:
print(f" {c(lang, CYAN)} {c(typ, GREEN)}")
else:
try:
dest = track.destination_name
src = track.file_path.name if track.file_path else "?"
print(f" {c(src, DIM)}{c(dest, GREEN, BOLD)}")
except ValueError:
warn(f" Piste incomplète (lang ou format manquant): {track}")
else:
warn("Aucune piste retenue.")
if unresolved:
print()
warn(f"{len(unresolved)} piste(s) écartées ou à clarifier:")
for track in unresolved:
src = track.file_path.name if track.file_path else "?"
reason = "langue inconnue" if track.language is None else "confiance insuffisante"
line = f" {c(src, DIM)} ({reason})"
if verbose and track.raw_tokens:
line += c(f" tokens: {track.raw_tokens}", YELLOW)
print(line)
print()
# ---------------------------------------------------------------------------
# Scan multi-épisodes (résumé)
# ---------------------------------------------------------------------------
def scan_season(
kb: "SubtitleKnowledgeBase",
pattern: "SubtitlePattern",
season_folder: Path,
release_group: str | None,
verbose: bool,
) -> None:
from alfred.domain.subtitles.aggregates import DEFAULT_RULES
from alfred.domain.subtitles.services.identifier import SubtitleIdentifier
from alfred.domain.subtitles.services.matcher import SubtitleMatcher
videos = find_videos(season_folder)
section(f"SCAN COMPLET DE LA SAISON ({len(videos)} épisode(s))")
if not videos:
warn("Aucun fichier vidéo trouvé dans ce dossier.")
return
identifier = SubtitleIdentifier(kb)
matcher = SubtitleMatcher()
rules = DEFAULT_RULES()
col_w = max(len(v.name) for v in videos) + 2
for video in videos:
metadata = identifier.identify(
video_path=video,
pattern=pattern,
media_id=None,
media_type="tv_show",
release_group=release_group,
)
matched, unresolved = matcher.match(metadata.external_tracks, rules)
placed_names = []
for t in matched:
try:
placed_names.append(t.destination_name)
except ValueError:
pass
status_icon = c("", GREEN, BOLD) if placed_names else c("", RED, BOLD)
warn_icon = c(f" [{len(unresolved)} non-résolue(s)]", YELLOW) if unresolved else ""
print(f" {status_icon} {video.name:{col_w}} {c(', '.join(placed_names) or '', GREEN if placed_names else DIM)}{warn_icon}")
# ---------------------------------------------------------------------------
# Main
# ---------------------------------------------------------------------------
def parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(
description="Scanner de sous-titres Alfred — pipeline de diagnostic",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog=textwrap.dedent(__doc__ or ""),
)
parser.add_argument("season_folder", help="Dossier de la saison (ou du film)")
parser.add_argument("--release-group", "-g", metavar="GROUP",
help="Groupe de release (ex: RARBG, KONSTRAST)")
parser.add_argument("--pattern", "-p", metavar="PATTERN",
help="Forcer un pattern (adjacent|flat|episode_subfolder|embedded)")
parser.add_argument("--video", "-v", metavar="FILE",
help="Fichier vidéo de référence (défaut: premier trouvé)")
parser.add_argument("--verbose", action="store_true",
help="Affiche les tokens bruts par piste")
parser.add_argument("--no-color", action="store_true",
help="Désactive la colorisation ANSI")
parser.add_argument("--season-scan", action="store_true",
help="Après le diagnostic, scanner tous les épisodes de la saison")
return parser.parse_args()
def main() -> None:
global USE_COLOR
args = parse_args()
if args.no_color or not sys.stdout.isatty():
USE_COLOR = False
season_folder = Path(args.season_folder).expanduser().resolve()
if not season_folder.is_dir():
print(f"Erreur: '{season_folder}' n'est pas un dossier.", file=sys.stderr)
sys.exit(1)
print()
print(c("" * 70, BOLD))
print(c(" Alfred — Subtitle Scanner", BOLD, MAGENTA))
print(c("" * 70, BOLD))
kv("Dossier", str(season_folder), indent=2)
# Trouver la vidéo de référence
if args.video:
sample_video = Path(args.video).expanduser().resolve()
if not sample_video.exists():
print(f"Erreur: '{sample_video}' introuvable.", file=sys.stderr)
sys.exit(1)
else:
videos = find_videos(season_folder)
if not videos:
# Chercher un niveau plus bas (structure release root)
for sub in season_folder.iterdir():
if sub.is_dir():
videos = find_videos(sub)
if videos:
break
if not videos:
print("Erreur: aucun fichier vidéo trouvé dans ce dossier.", file=sys.stderr)
sys.exit(1)
sample_video = videos[0]
kv("Vidéo de référence", sample_video.name, indent=2)
# ---- Pipeline ----
kb = step_load_kb()
pattern = step_detect_pattern(
kb=kb,
season_folder=season_folder,
sample_video=sample_video,
release_group=args.release_group,
forced_pattern=args.pattern,
)
metadata = step_identify_tracks(
kb=kb,
sample_video=sample_video,
pattern=pattern,
release_group=args.release_group,
verbose=args.verbose,
)
rules, matched, unresolved = step_apply_rules(
metadata=metadata,
release_group=args.release_group,
)
step_show_results(
matched=matched,
unresolved=unresolved,
is_embedded=rules is None,
verbose=args.verbose,
)
if args.season_scan:
scan_season(
kb=kb,
pattern=pattern,
season_folder=season_folder,
release_group=args.release_group,
verbose=args.verbose,
)
print(c("" * 70, BOLD))
print()
if __name__ == "__main__":
main()
+479
View File
@@ -0,0 +1,479 @@
#!/usr/bin/env python3
"""
run_workflow.py — Simulate an Alfred workflow step by step (dry-run or live).
Usage:
uv run testing/workflows/run_workflow.py organize_media [options]
Options:
--dry-run Print what each step would do without executing tools (default).
--live Actually execute the tools (uses real filesystem + memory).
--source PATH Source video file (download folder).
--dest PATH Destination video file (library path).
--download-folder P Original download folder (for create_seed_links).
--imdb-id ID IMDb ID for identify_media step (tt1234567).
--seed Answer "yes" to the seeding question.
--no-color Disable ANSI colours.
Examples:
uv run testing/workflows/run_workflow.py organize_media --dry-run \\
--source "/downloads/Breaking.Bad.S01E01.mkv" \\
--dest "/tv/Breaking Bad/Season 01/Breaking Bad.S01E01.mkv"
uv run testing/workflows/run_workflow.py organize_media --live \\
--source "/downloads/BB/Breaking.Bad.S01E01.mkv" \\
--dest "/tv/Breaking Bad/Season 01/Breaking Bad.S01E01.mkv" \\
--download-folder "/downloads/BB" --seed
"""
import argparse
import sys
import textwrap
from pathlib import Path
from typing import Any
# Project root on sys.path
_PROJECT_ROOT = Path(__file__).resolve().parents[2]
if str(_PROJECT_ROOT) not in sys.path:
sys.path.insert(0, str(_PROJECT_ROOT))
# ---------------------------------------------------------------------------
# Colours
# ---------------------------------------------------------------------------
USE_COLOR = True
RESET = "\033[0m"
BOLD = "\033[1m"
DIM = "\033[2m"
GREEN = "\033[32m"
YELLOW = "\033[33m"
RED = "\033[31m"
CYAN = "\033[36m"
BLUE = "\033[34m"
MAGENTA = "\033[35m"
def c(text: str, *codes: str) -> str:
if not USE_COLOR:
return text
return "".join(codes) + str(text) + RESET
def section(title: str) -> None:
print()
print(c("" * 70, DIM))
print(c(f" {title}", BOLD, CYAN))
print(c("" * 70, DIM))
def ok(msg: str) -> None: print(c("", GREEN, BOLD) + msg)
def warn(msg: str) -> None: print(c("", YELLOW, BOLD) + msg)
def err(msg: str) -> None: print(c("", RED, BOLD) + msg)
def info(msg: str) -> None: print(f" {msg}")
def kv(key: str, val: str) -> None:
print(f" {c(key + ':', BOLD)} {val}")
# ---------------------------------------------------------------------------
# Dry-run tool stubs
# ---------------------------------------------------------------------------
def _dry_list_folder(folder_type: str, path: str = ".") -> dict[str, Any]:
return {
"status": "ok",
"folder_type": folder_type,
"path": path,
"entries": ["[dry-run — no real listing]"],
"count": 1,
}
def _dry_find_media_imdb_id(**kwargs) -> dict[str, Any]:
return {
"status": "ok",
"imdb_id": kwargs.get("imdb_id") or "tt0000000",
"title": "Dry Run Show",
"type": "tv_show",
"year": 2024,
}
def _dry_resolve_destination(
release_name: str,
source_file: str,
tmdb_title: str,
tmdb_year: int,
tmdb_episode_title: str | None = None,
confirmed_folder: str | None = None,
) -> dict[str, Any]:
from alfred.domain.media.release_parser import parse_release
parsed = parse_release(release_name)
ext = Path(source_file).suffix
if parsed.is_movie:
folder = parsed.movie_folder_name(tmdb_title, tmdb_year)
fname = parsed.movie_filename(tmdb_title, tmdb_year, ext)
return {
"status": "ok",
"library_file": f"/movies/{folder}/{fname}",
"series_folder": f"/movies/{folder}",
"series_folder_name": folder,
"season_folder": None,
"season_folder_name": None,
"filename": fname,
"is_new_series_folder": True,
}
season_folder = parsed.season_folder_name()
show_folder = confirmed_folder or parsed.show_folder_name(tmdb_title, tmdb_year)
fname = parsed.episode_filename(tmdb_episode_title, ext) if not parsed.is_season_pack else season_folder + ext
return {
"status": "ok",
"library_file": f"/tv/{show_folder}/{season_folder}/{fname}",
"series_folder": f"/tv/{show_folder}",
"season_folder": f"/tv/{show_folder}/{season_folder}",
"series_folder_name": show_folder,
"season_folder_name": season_folder,
"filename": fname,
"is_new_series_folder": confirmed_folder is None,
}
def _dry_move_media(source: str, destination: str) -> dict[str, Any]:
return {
"status": "ok",
"source": source,
"destination": destination,
"filename": Path(destination).name,
"size": 0,
}
def _dry_manage_subtitles(source_video: str, destination_video: str) -> dict[str, Any]:
return {
"status": "ok",
"video_path": destination_video,
"placed": [],
"placed_count": 0,
"skipped_count": 0,
}
def _dry_create_seed_links(library_file: str, original_download_folder: str) -> dict[str, Any]:
return {
"status": "ok",
"torrent_subfolder": f"/torrents/{Path(original_download_folder).name}",
"linked_file": f"/torrents/{Path(original_download_folder).name}/{Path(library_file).name}",
"copied_files": ["[dry-run — no real copy]"],
"copied_count": 1,
"skipped": [],
}
DRY_RUN_TOOLS: dict[str, Any] = {
"list_folder": _dry_list_folder,
"find_media_imdb_id": _dry_find_media_imdb_id,
"resolve_destination": _dry_resolve_destination,
"move_media": _dry_move_media,
"manage_subtitles": _dry_manage_subtitles,
"create_seed_links": _dry_create_seed_links,
}
# ---------------------------------------------------------------------------
# Live tools
# ---------------------------------------------------------------------------
def _load_live_tools() -> dict[str, Any]:
from alfred.agent.tools.filesystem import (
create_seed_links,
list_folder,
manage_subtitles,
move_media,
)
# find_media_imdb_id lives in the api tools
try:
from alfred.agent.tools.api import find_media_imdb_id
except ImportError:
def find_media_imdb_id(**kwargs): # type: ignore[misc]
return {"status": "error", "error": "not_available", "message": "api tools not loaded"}
return {
"list_folder": list_folder,
"find_media_imdb_id": find_media_imdb_id,
"move_media": move_media,
"manage_subtitles": manage_subtitles,
"create_seed_links": create_seed_links,
}
# ---------------------------------------------------------------------------
# Workflow runner
# ---------------------------------------------------------------------------
class WorkflowRunner:
def __init__(self, workflow: dict, tools: dict[str, Any], live: bool, args: argparse.Namespace):
self.workflow = workflow
self.tools = tools
self.live = live
self.args = args
self.context: dict[str, Any] = {} # step results accumulate here
self.step_results: list[dict] = []
def run(self) -> None:
name = self.workflow.get("name", "?")
desc = self.workflow.get("description", "").strip()
mode = c("LIVE", RED, BOLD) if self.live else c("DRY-RUN", YELLOW, BOLD)
print()
print(c("" * 70, BOLD))
print(c(f" Alfred — Workflow Simulator [{mode}]", BOLD, MAGENTA))
print(c("" * 70, BOLD))
kv("Workflow", c(name, CYAN, BOLD))
kv("Description", desc)
kv("Tools allowed", ", ".join(self.workflow.get("tools", [])))
steps = self.workflow.get("steps", [])
for step in steps:
self._run_step(step)
section("SIMULATION TERMINÉE")
ok(f"{len(self.step_results)} step(s) exécuté(s)")
errors = [r for r in self.step_results if r.get("result", {}).get("status") == "error"]
if errors:
warn(f"{len(errors)} step(s) en erreur")
for r in errors:
err(f" {r['id']}: {r['result'].get('error')}{r['result'].get('message')}")
print()
print(c("" * 70, BOLD))
print()
def _run_step(self, step: dict) -> None:
step_id = step.get("id", "?")
# --- ask_user step ---
if "ask_user" in step:
section(f"STEP [{step_id}] — ask_user")
q = step["ask_user"].get("question", "")
answers = step["ask_user"].get("answers", {})
info(c(f'Question: "{q}"', BOLD))
info(f"Réponses possibles: {', '.join(str(k) for k in answers.keys())}")
answer = "yes" if self.args.seed else "no"
# PyYAML parses bare yes/no as booleans — normalise keys to str
answers_str = {str(k): v for k, v in answers.items()}
next_step = answers_str.get(answer, {}).get("next_step", "update_library")
ok(f"Réponse simulée: {c(answer, CYAN)} → next: {c(next_step, CYAN)}")
self.context["seeding"] = (answer == "yes")
self.context["ask_seeding_answer"] = answer
self.context["next_after_ask"] = next_step
# If "no", skip create_seed_links
if answer == "no":
self.context["skip_create_seed_links"] = True
return
# --- memory_write step ---
if "memory_write" in step:
section(f"STEP [{step_id}] — memory_write ({step['memory_write']})")
if self.live:
warn("memory_write: pas encore implémenté dans le simulator live")
else:
ok("(dry-run) Library entry would be written to LTM")
self.step_results.append({"id": step_id, "result": {"status": "ok"}})
return
# --- tool step ---
tool_name = step.get("tool")
if not tool_name:
warn(f"Step '{step_id}' has no tool or ask_user — skipped")
return
# Skip create_seed_links if user said no to seeding
if tool_name == "create_seed_links" and self.context.get("skip_create_seed_links"):
section(f"STEP [{step_id}] — {tool_name}")
warn("Skipped (user chose not to seed)")
return
section(f"STEP [{step_id}] — {c(tool_name, CYAN, BOLD)}")
desc = step.get("description", "").strip()
if desc:
info(c(desc, DIM))
kwargs = self._build_kwargs(tool_name, step)
for k, v in kwargs.items():
kv(k, str(v))
if tool_name not in self.tools:
err(f"Tool '{tool_name}' not found in tool registry")
self.step_results.append({"id": step_id, "result": {"status": "error", "error": "unknown_tool"}})
return
try:
result = self.tools[tool_name](**kwargs)
except Exception as e:
err(f"Tool raised an exception: {e}")
self.step_results.append({"id": step_id, "result": {"status": "error", "error": str(e)}})
return
self._print_result(result)
self.context[step_id] = result
self.step_results.append({"id": step_id, "result": result})
def _build_kwargs(self, tool_name: str, step: dict) -> dict[str, Any]:
"""Build tool kwargs from step params + CLI args + previous context."""
# Start from step-level params (static defaults from YAML)
kwargs: dict[str, Any] = dict(step.get("params") or {})
a = self.args
if tool_name == "list_folder":
kwargs.setdefault("folder_type", "download")
elif tool_name == "find_media_imdb_id":
if a.imdb_id:
kwargs["imdb_id"] = a.imdb_id
elif tool_name == "resolve_destination":
if a.release:
kwargs["release_name"] = a.release
elif a.source:
kwargs.setdefault("release_name", Path(a.source).parent.name)
if a.source:
kwargs["source_file"] = a.source
if a.tmdb_title:
kwargs["tmdb_title"] = a.tmdb_title
if a.tmdb_year:
kwargs["tmdb_year"] = a.tmdb_year
if a.episode_title:
kwargs["tmdb_episode_title"] = a.episode_title
elif tool_name == "move_media":
# If resolve_destination ran, use its library_file as destination
resolved = self.context.get("resolve_destination", {})
if a.source:
kwargs["source"] = a.source
dest = a.dest or resolved.get("library_file")
if dest:
kwargs["destination"] = dest
elif tool_name == "manage_subtitles":
resolved = self.context.get("resolve_destination", {})
if a.source:
kwargs["source_video"] = a.source
dest = a.dest or resolved.get("library_file")
if dest:
kwargs["destination_video"] = dest
elif tool_name == "create_seed_links":
resolved = self.context.get("resolve_destination", {})
library_file = a.dest or resolved.get("library_file")
if library_file:
kwargs["library_file"] = library_file
if a.download_folder:
kwargs["original_download_folder"] = a.download_folder
elif a.source:
kwargs.setdefault("original_download_folder", str(Path(a.source).parent))
return kwargs
def _print_result(self, result: dict) -> None:
status = result.get("status", "?")
if status == "ok":
ok(f"status={c('ok', GREEN)}")
elif status == "needs_clarification":
warn(f"status={c('needs_clarification', YELLOW)}")
else:
err(f"status={c(status, RED)} error={result.get('error')} msg={result.get('message')}")
return
# Pretty-print notable fields
skip = {"status", "error", "message"}
for k, v in result.items():
if k in skip:
continue
if isinstance(v, list):
if v:
info(c(f"{k}:", BOLD))
for item in v[:10]:
info(f"{item}")
if len(v) > 10:
info(c(f" … and {len(v) - 10} more", DIM))
else:
info(f"{c(k + ':', BOLD)} (empty)")
else:
kv(k, str(v))
# ---------------------------------------------------------------------------
# CLI
# ---------------------------------------------------------------------------
def parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(
description="Alfred workflow simulator",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog=textwrap.dedent(__doc__ or ""),
)
parser.add_argument("workflow", help="Workflow name (e.g. organize_media)")
parser.add_argument("--dry-run", dest="dry_run", action="store_true", default=True,
help="Simulate steps without executing tools (default)")
parser.add_argument("--live", action="store_true",
help="Actually execute tools against the real filesystem")
parser.add_argument("--source", metavar="PATH",
help="Source video file (in download folder)")
parser.add_argument("--dest", metavar="PATH",
help="Destination video file (in library, overrides resolve_destination)")
parser.add_argument("--download-folder", metavar="PATH",
help="Original download folder (for create_seed_links)")
parser.add_argument("--imdb-id", metavar="ID",
help="IMDb ID for identify_media (tt1234567)")
parser.add_argument("--release", metavar="NAME",
help="Release name (e.g. Oz.S03.1080p.WEBRip.x265-KONTRAST)")
parser.add_argument("--tmdb-title", metavar="TITLE",
help="Canonical title from TMDB (e.g. 'Oz')")
parser.add_argument("--tmdb-year", metavar="YEAR", type=int,
help="Start/release year from TMDB (e.g. 1997)")
parser.add_argument("--episode-title", metavar="TITLE",
help="Episode title from TMDB for single-episode releases")
parser.add_argument("--seed", action="store_true",
help='Answer "yes" to the seeding question')
parser.add_argument("--no-color", action="store_true")
return parser.parse_args()
def main() -> None:
global USE_COLOR
args = parse_args()
if args.no_color or not sys.stdout.isatty():
USE_COLOR = False
if args.live:
args.dry_run = False
# Load workflow
from alfred.agent.workflows.loader import WorkflowLoader
loader = WorkflowLoader()
workflow = loader.get(args.workflow)
if not workflow:
print(f"Erreur: workflow '{args.workflow}' introuvable.", file=sys.stderr)
print(f"Disponibles: {', '.join(loader.names())}", file=sys.stderr)
sys.exit(1)
# Load tools
if args.live:
try:
tools = _load_live_tools()
except Exception as e:
print(f"Erreur chargement des tools live: {e}", file=sys.stderr)
sys.exit(1)
else:
tools = DRY_RUN_TOOLS
runner = WorkflowRunner(workflow, tools, live=args.live, args=args)
runner.run()
if __name__ == "__main__":
main()
View File
+208
View File
@@ -0,0 +1,208 @@
"""
Tests for alfred.agent.registry — tool registration and JSON schema generation.
"""
import pytest
from alfred.agent.registry import Tool, _create_tool_from_function, make_tools
from alfred.settings import settings
# ---------------------------------------------------------------------------
# _create_tool_from_function
# ---------------------------------------------------------------------------
class TestCreateToolFromFunction:
def test_name_from_function(self):
def my_tool(x: str) -> dict:
"""Does something."""
return {}
tool = _create_tool_from_function(my_tool)
assert tool.name == "my_tool"
def test_description_from_docstring_first_line(self):
def my_tool(x: str) -> dict:
"""First line description.
More details here.
"""
return {}
tool = _create_tool_from_function(my_tool)
assert tool.description == "First line description."
def test_description_fallback_to_name(self):
def no_doc(x: str) -> dict:
return {}
tool = _create_tool_from_function(no_doc)
assert tool.description == "no_doc"
def test_required_params_without_default(self):
def tool(a: str, b: int) -> dict:
"""Tool."""
return {}
t = _create_tool_from_function(tool)
assert "a" in t.parameters["required"]
assert "b" in t.parameters["required"]
def test_optional_params_not_required(self):
def tool(a: str, b: str = "default") -> dict:
"""Tool."""
return {}
t = _create_tool_from_function(tool)
assert "a" in t.parameters["required"]
assert "b" not in t.parameters["required"]
def test_none_default_not_required(self):
def tool(a: str, b: str | None = None) -> dict:
"""Tool."""
return {}
t = _create_tool_from_function(tool)
assert "b" not in t.parameters["required"]
def test_type_mapping_str(self):
def tool(x: str) -> dict:
"""T."""
return {}
t = _create_tool_from_function(tool)
assert t.parameters["properties"]["x"]["type"] == "string"
def test_type_mapping_int(self):
def tool(x: int) -> dict:
"""T."""
return {}
t = _create_tool_from_function(tool)
assert t.parameters["properties"]["x"]["type"] == "integer"
def test_type_mapping_float(self):
def tool(x: float) -> dict:
"""T."""
return {}
t = _create_tool_from_function(tool)
assert t.parameters["properties"]["x"]["type"] == "number"
def test_type_mapping_bool(self):
def tool(x: bool) -> dict:
"""T."""
return {}
t = _create_tool_from_function(tool)
assert t.parameters["properties"]["x"]["type"] == "boolean"
def test_unknown_type_defaults_to_string(self):
def tool(x: list) -> dict:
"""T."""
return {}
t = _create_tool_from_function(tool)
assert t.parameters["properties"]["x"]["type"] == "string"
def test_no_annotation_defaults_to_string(self):
def tool(x) -> dict:
"""T."""
return {}
t = _create_tool_from_function(tool)
assert t.parameters["properties"]["x"]["type"] == "string"
def test_self_param_excluded(self):
class MyClass:
def tool(self, x: str) -> dict:
"""T."""
return {}
t = _create_tool_from_function(MyClass().tool)
assert "self" not in t.parameters["properties"]
def test_parameters_schema_structure(self):
def tool(a: str, b: int = 0) -> dict:
"""T."""
return {}
t = _create_tool_from_function(tool)
assert t.parameters["type"] == "object"
assert "properties" in t.parameters
assert "required" in t.parameters
def test_func_stored_on_tool(self):
def tool(x: str) -> dict:
"""T."""
return {"x": x}
t = _create_tool_from_function(tool)
assert t.func("hello") == {"x": "hello"}
# ---------------------------------------------------------------------------
# make_tools
# ---------------------------------------------------------------------------
class TestMakeTools:
def test_returns_dict(self):
tools = make_tools(settings)
assert isinstance(tools, dict)
def test_all_expected_tools_present(self):
tools = make_tools(settings)
expected = {
"set_path_for_folder",
"list_folder",
"resolve_destination",
"move_media",
"manage_subtitles",
"create_seed_links",
"learn",
"find_media_imdb_id",
"find_torrent",
"add_torrent_by_index",
"add_torrent_to_qbittorrent",
"get_torrent_by_index",
"set_language",
}
assert expected.issubset(tools.keys())
def test_each_tool_is_tool_instance(self):
tools = make_tools(settings)
for name, tool in tools.items():
assert isinstance(tool, Tool), f"{name} is not a Tool instance"
def test_each_tool_has_callable_func(self):
tools = make_tools(settings)
for name, tool in tools.items():
assert callable(tool.func), f"{name}.func is not callable"
def test_tool_name_matches_key(self):
tools = make_tools(settings)
for key, tool in tools.items():
assert tool.name == key
def test_resolve_destination_schema(self):
tools = make_tools(settings)
t = tools["resolve_destination"]
props = t.parameters["properties"]
required = t.parameters["required"]
# Required args
assert "release_name" in required
assert "source_file" in required
assert "tmdb_title" in required
assert "tmdb_year" in required
# Optional args not required
assert "tmdb_episode_title" not in required
assert "confirmed_folder" not in required
# tmdb_year is int
assert props["tmdb_year"]["type"] == "integer"
def test_move_media_schema(self):
tools = make_tools(settings)
t = tools["move_media"]
required = t.parameters["required"]
assert "source" in required
assert "destination" in required
def test_create_seed_links_schema(self):
tools = make_tools(settings)
t = tools["create_seed_links"]
required = t.parameters["required"]
assert "library_file" in required
assert "original_download_folder" in required
def test_no_duplicate_tools(self):
tools = make_tools(settings)
# dict keys are unique by definition, but verify no name conflicts
names = [t.name for t in tools.values()]
assert len(names) == len(set(names))
View File
+41
View File
@@ -0,0 +1,41 @@
"""Fixtures for application-layer tests."""
import shutil
import tempfile
from pathlib import Path
import pytest
from alfred.infrastructure.persistence import Memory, set_memory
@pytest.fixture
def app_temp(tmp_path):
"""Real folder structure: downloads, movies, tv_shows, torrents."""
(tmp_path / "downloads").mkdir()
(tmp_path / "movies").mkdir()
(tmp_path / "tv_shows").mkdir()
(tmp_path / "torrents").mkdir()
return tmp_path
@pytest.fixture
def memory_configured(app_temp, tmp_path):
"""
Fresh Memory with library_paths and workspace configured using the real API.
Replaces the broken memory_with_config from root conftest for these tests.
"""
import tempfile, os
storage = tempfile.mkdtemp()
mem = Memory(storage_dir=storage)
set_memory(mem)
mem.ltm.workspace.download = str(app_temp / "downloads")
mem.ltm.workspace.torrent = str(app_temp / "torrents")
mem.ltm.library_paths.set("movie", str(app_temp / "movies"))
mem.ltm.library_paths.set("tv_show", str(app_temp / "tv_shows"))
mem.save()
yield mem
shutil.rmtree(storage, ignore_errors=True)
+117
View File
@@ -0,0 +1,117 @@
"""
Tests for alfred.application.filesystem.create_seed_links.CreateSeedLinksUseCase
"""
import os
from pathlib import Path
from unittest.mock import MagicMock, patch
import pytest
from alfred.application.filesystem.create_seed_links import CreateSeedLinksUseCase
from alfred.infrastructure.filesystem.file_manager import FileManager
@pytest.fixture
def fm():
return FileManager()
@pytest.fixture
def use_case(fm):
return CreateSeedLinksUseCase(fm)
@pytest.fixture
def seed_env(tmp_path_factory):
"""
Realistic post-move environment (uses its own tmp dir, independent of app_temp):
- library video file (hard-linked from original)
- original download folder with remaining files
- torrents root folder
"""
d = tmp_path_factory.mktemp("seed_env")
lib_dir = d / "tv" / "Oz.1997.1080p.WEBRip.x265-KONTRAST" / "Oz.S01.1080p.WEBRip.x265-KONTRAST"
lib_dir.mkdir(parents=True)
lib_video = lib_dir / "Oz.S01E01.1080p.WEBRip.x265-KONTRAST.mp4"
lib_video.write_bytes(b"video")
dl = d / "downloads" / "Oz.S01.1080p.WEBRip.x265-KONTRAST"
dl.mkdir(parents=True)
(dl / "KONTRAST.txt").write_text("release notes")
(dl / "[TGx]info.txt").write_text("tgx")
subs = dl / "Subs" / "Oz.S01E01.1080p.WEBRip.x265-KONTRAST"
subs.mkdir(parents=True)
(subs / "2_eng,English [CC][SDH].srt").write_text("1\n00:00:01 --> 00:00:02\nHello\n")
torrents = d / "torrents"
torrents.mkdir()
return lib_video, dl, torrents
# ---------------------------------------------------------------------------
# Happy path
# ---------------------------------------------------------------------------
class TestCreateSeedLinksHappyPath:
def test_ok_when_torrent_folder_configured(self, use_case, seed_env, memory_configured):
from alfred.infrastructure.persistence import get_memory
mem = get_memory()
lib_video, dl, torrents = seed_env
mem.ltm.workspace.torrent = str(torrents)
mem.save()
result = use_case.execute(str(lib_video), str(dl))
assert result.status == "ok"
assert result.torrent_subfolder is not None
assert result.linked_file is not None
assert result.copied_count > 0
def test_to_dict_ok(self, use_case, seed_env, memory_configured):
from alfred.infrastructure.persistence import get_memory
mem = get_memory()
lib_video, dl, torrents = seed_env
mem.ltm.workspace.torrent = str(torrents)
mem.save()
d = use_case.execute(str(lib_video), str(dl)).to_dict()
assert d["status"] == "ok"
assert "torrent_subfolder" in d
assert "copied_files" in d
assert isinstance(d["copied_files"], list)
# ---------------------------------------------------------------------------
# Error: torrent folder not configured
# ---------------------------------------------------------------------------
class TestCreateSeedLinksErrors:
def test_error_when_torrent_not_configured(self, use_case, seed_env, memory):
lib_video, dl, _ = seed_env
result = use_case.execute(str(lib_video), str(dl))
assert result.status == "error"
assert result.error == "torrent_folder_not_set"
assert result.message is not None
def test_to_dict_error(self, use_case, seed_env, memory):
lib_video, dl, _ = seed_env
d = use_case.execute(str(lib_video), str(dl)).to_dict()
assert d["status"] == "error"
assert "error" in d
assert "message" in d
def test_error_delegates_to_file_manager(self, memory_configured):
"""FileManager errors are propagated correctly."""
from alfred.infrastructure.persistence import get_memory
mem = get_memory()
# torrent already configured by memory_configured fixture
# library_file does not exist → should propagate error from FileManager
uc = CreateSeedLinksUseCase(FileManager())
result = uc.execute("/nonexistent/lib.mkv", "/nonexistent/dl")
assert result.status == "error"
@@ -0,0 +1,179 @@
"""Tests for ListFolderUseCase and MoveMediaUseCase."""
import pytest
from unittest.mock import MagicMock
from alfred.application.filesystem.list_folder import ListFolderUseCase
from alfred.application.filesystem.move_media import MoveMediaUseCase
# ---------------------------------------------------------------------------
# ListFolderUseCase
# ---------------------------------------------------------------------------
class TestListFolderUseCase:
def _use_case(self, fm_result):
fm = MagicMock()
fm.list_folder.return_value = fm_result
return ListFolderUseCase(fm)
def test_success_returns_response(self):
uc = self._use_case({
"status": "ok",
"folder_type": "download",
"path": ".",
"entries": ["movie.mkv", "show/"],
"count": 2,
})
resp = uc.execute("download")
assert resp.status == "ok"
assert resp.folder_type == "download"
assert resp.path == "."
assert resp.entries == ["movie.mkv", "show/"]
assert resp.count == 2
def test_error_propagates(self):
uc = self._use_case({
"status": "error",
"error": "folder_not_set",
"message": "Download folder not configured.",
})
resp = uc.execute("download")
assert resp.status == "error"
assert resp.error == "folder_not_set"
assert resp.message == "Download folder not configured."
def test_delegates_folder_type_and_path(self):
fm = MagicMock()
fm.list_folder.return_value = {
"status": "ok",
"folder_type": "tv_show",
"path": "Breaking Bad",
"entries": [],
"count": 0,
}
uc = ListFolderUseCase(fm)
uc.execute("tv_show", "Breaking Bad")
fm.list_folder.assert_called_once_with("tv_show", "Breaking Bad")
def test_default_path_is_dot(self):
fm = MagicMock()
fm.list_folder.return_value = {
"status": "ok", "folder_type": "download",
"path": ".", "entries": [], "count": 0,
}
uc = ListFolderUseCase(fm)
uc.execute("download")
fm.list_folder.assert_called_once_with("download", ".")
def test_success_response_has_no_error(self):
uc = self._use_case({
"status": "ok",
"folder_type": "movie",
"path": ".",
"entries": [],
"count": 0,
})
resp = uc.execute("movie")
assert resp.error is None
def test_error_response_has_no_entries(self):
uc = self._use_case({
"status": "error",
"error": "not_found",
"message": "Path does not exist",
})
resp = uc.execute("download", "some/path")
assert resp.entries is None
assert resp.count is None
# ---------------------------------------------------------------------------
# MoveMediaUseCase
# ---------------------------------------------------------------------------
class TestMoveMediaUseCase:
def _use_case(self, fm_result):
fm = MagicMock()
fm.move_file.return_value = fm_result
return MoveMediaUseCase(fm)
def test_success_returns_response(self, tmp_path):
src = str(tmp_path / "src.mkv")
dst = str(tmp_path / "dst.mkv")
uc = self._use_case({
"status": "ok",
"source": src,
"destination": dst,
"filename": "dst.mkv",
"size": 1024,
})
resp = uc.execute(src, dst)
assert resp.status == "ok"
assert resp.source == src
assert resp.destination == dst
assert resp.filename == "dst.mkv"
assert resp.size == 1024
def test_error_propagates(self, tmp_path):
uc = self._use_case({
"status": "error",
"error": "source_not_found",
"message": "Source does not exist: /ghost.mkv",
})
resp = uc.execute("/ghost.mkv", str(tmp_path / "dst.mkv"))
assert resp.status == "error"
assert resp.error == "source_not_found"
def test_delegates_to_file_manager(self, tmp_path):
src = "/downloads/movie.mkv"
dst = "/movies/Movie.2024/movie.mkv"
fm = MagicMock()
fm.move_file.return_value = {
"status": "ok", "source": src, "destination": dst,
"filename": "movie.mkv", "size": 1,
}
uc = MoveMediaUseCase(fm)
uc.execute(src, dst)
fm.move_file.assert_called_once_with(src, dst)
def test_error_response_has_no_paths(self):
uc = self._use_case({
"status": "error",
"error": "destination_exists",
"message": "File already exists",
})
resp = uc.execute("/src.mkv", "/dst.mkv")
assert resp.source is None
assert resp.destination is None
assert resp.filename is None
def test_to_dict_success(self, tmp_path):
src = "/downloads/movie.mkv"
dst = "/movies/movie.mkv"
uc = self._use_case({
"status": "ok",
"source": src,
"destination": dst,
"filename": "movie.mkv",
"size": 2048,
})
resp = uc.execute(src, dst)
d = resp.to_dict()
assert d["status"] == "ok"
assert d["filename"] == "movie.mkv"
assert d["size"] == 2048
def test_to_dict_error(self):
uc = self._use_case({
"status": "error",
"error": "link_failed",
"message": "Cross-device link not permitted",
})
resp = uc.execute("/src.mkv", "/dst.mkv")
d = resp.to_dict()
assert d["status"] == "error"
assert "error" in d
assert "message" in d
@@ -0,0 +1,315 @@
"""
Tests for alfred.application.filesystem.resolve_destination
Uses a real temp filesystem + a real Memory instance (via conftest fixtures).
No network calls — TMDB data is passed in directly.
"""
from pathlib import Path
import pytest
from alfred.application.filesystem.resolve_destination import (
ResolveDestinationUseCase,
_find_existing_series_folders,
)
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
def _use_case():
return ResolveDestinationUseCase()
# ---------------------------------------------------------------------------
# Movies
# ---------------------------------------------------------------------------
class TestResolveMovie:
def test_basic_movie(self, memory_configured):
result = _use_case().execute(
release_name="Another.Round.2020.1080p.BluRay.x264-YTS",
source_file="/downloads/Another.Round.2020.1080p.BluRay.x264-YTS/Another.Round.2020.1080p.BluRay.x264-YTS.mp4",
tmdb_title="Another Round",
tmdb_year=2020,
)
assert result.status == "ok"
assert "Another.Round.2020" in result.series_folder_name
assert "1080p.BluRay.x264-YTS" in result.series_folder_name
assert result.filename.endswith(".mp4")
assert result.season_folder is None
def test_movie_library_file_path_is_inside_series_folder(self, memory_configured):
result = _use_case().execute(
release_name="Revolver.2005.1080p.BluRay.x265-RARBG",
source_file="/downloads/Revolver.2005.1080p.BluRay.x265-RARBG.mkv",
tmdb_title="Revolver",
tmdb_year=2005,
)
assert result.status == "ok"
assert result.library_file.startswith(result.series_folder)
def test_movie_library_not_set(self, memory):
# memory has no library paths configured
result = _use_case().execute(
release_name="Revolver.2005.1080p.BluRay.x265-RARBG",
source_file="/downloads/Revolver.2005.1080p.BluRay.x265-RARBG.mkv",
tmdb_title="Revolver",
tmdb_year=2005,
)
assert result.status == "error"
assert result.error == "library_not_set"
def test_movie_folder_marked_new(self, memory_configured):
# No existing folder → is_new_series_folder = True
result = _use_case().execute(
release_name="Godzilla.Minus.One.2023.1080p.BluRay.x265-YTS",
source_file="/downloads/Godzilla.Minus.One.2023.1080p.BluRay.x265-YTS.mp4",
tmdb_title="Godzilla Minus One",
tmdb_year=2023,
)
assert result.status == "ok"
assert result.is_new_series_folder is True
def test_movie_sanitises_forbidden_chars_in_title(self, memory_configured):
result = _use_case().execute(
release_name="Alien.Earth.2024.1080p.WEBRip.x265-KONTRAST",
source_file="/downloads/Alien.Earth.2024.1080p.WEBRip.x265-KONTRAST.mkv",
tmdb_title="Alien: Earth",
tmdb_year=2024,
)
assert result.status == "ok"
assert ":" not in result.series_folder_name
def test_to_dict_ok(self, memory_configured):
result = _use_case().execute(
release_name="Revolver.2005.1080p.BluRay.x265-RARBG",
source_file="/downloads/Revolver.mkv",
tmdb_title="Revolver",
tmdb_year=2005,
)
d = result.to_dict()
assert d["status"] == "ok"
assert "library_file" in d
assert "series_folder_name" in d
# ---------------------------------------------------------------------------
# TV shows — no existing folder
# ---------------------------------------------------------------------------
class TestResolveTVShowNewFolder:
def test_oz_s01_creates_new_folder(self, memory_configured):
result = _use_case().execute(
release_name="Oz.S01.1080p.WEBRip.x265-KONTRAST",
source_file="/downloads/Oz.S01.1080p.WEBRip.x265-KONTRAST/Oz.S01E01.1080p.WEBRip.x265-KONTRAST.mp4",
tmdb_title="Oz",
tmdb_year=1997,
)
assert result.status == "ok"
assert result.is_new_series_folder is True
assert result.series_folder_name == "Oz.1997.1080p.WEBRip.x265-KONTRAST"
assert result.season_folder_name == "Oz.S01.1080p.WEBRip.x265-KONTRAST"
def test_tv_library_not_set(self, memory):
result = _use_case().execute(
release_name="Oz.S01.1080p.WEBRip.x265-KONTRAST",
source_file="/downloads/Oz.S01E01.mp4",
tmdb_title="Oz",
tmdb_year=1997,
)
assert result.status == "error"
assert result.error == "library_not_set"
def test_single_episode_filename(self, memory_configured):
result = _use_case().execute(
release_name="Fallout.2024.S02E01.1080p.x265-ELiTE",
source_file="/downloads/Fallout.2024.S02E01.1080p.x265-ELiTE.mkv",
tmdb_title="Fallout",
tmdb_year=2024,
tmdb_episode_title="The Beginning",
)
assert result.status == "ok"
assert "S02E01" in result.filename
assert "The.Beginning" in result.filename
assert result.filename.endswith(".mkv")
def test_season_pack_filename_is_folder_name_plus_ext(self, memory_configured):
result = _use_case().execute(
release_name="Oz.S01.1080p.WEBRip.x265-KONTRAST",
source_file="/downloads/Oz.S01.1080p.WEBRip.x265-KONTRAST/Oz.S01E01.mp4",
tmdb_title="Oz",
tmdb_year=1997,
)
assert result.status == "ok"
# Season pack: filename = season_folder_name + ext
assert result.filename == result.season_folder_name + ".mp4"
def test_library_file_is_inside_season_folder(self, memory_configured):
result = _use_case().execute(
release_name="Oz.S01.1080p.WEBRip.x265-KONTRAST",
source_file="/downloads/Oz.S01E01.mp4",
tmdb_title="Oz",
tmdb_year=1997,
)
assert result.library_file.startswith(result.season_folder)
assert result.season_folder.startswith(result.series_folder)
# ---------------------------------------------------------------------------
# TV shows — existing folder matching
# ---------------------------------------------------------------------------
class TestResolveTVShowExistingFolder:
def _make_series_folder(self, tv_root, name):
"""Create a series folder in the tv library."""
import os
path = tv_root / name
path.mkdir(parents=True, exist_ok=True)
return path
def test_uses_existing_single_folder(self, memory_configured, app_temp):
"""When exactly one folder matches title+year, use it regardless of group."""
from alfred.infrastructure.persistence import get_memory
mem = get_memory()
tv_root = Path(mem.ltm.library_paths.get("tv_show"))
existing = tv_root / "Oz.1997.1080p.WEBRip.x265-RARBG"
existing.mkdir(parents=True, exist_ok=True)
result = _use_case().execute(
release_name="Oz.S02.1080p.WEBRip.x265-KONTRAST",
source_file="/downloads/Oz.S02E01.mp4",
tmdb_title="Oz",
tmdb_year=1997,
)
assert result.status == "ok"
assert result.series_folder_name == "Oz.1997.1080p.WEBRip.x265-RARBG"
assert result.is_new_series_folder is False
def test_needs_clarification_on_multiple_folders(self, memory_configured, app_temp):
"""When multiple folders match, return needs_clarification with options."""
from alfred.infrastructure.persistence import get_memory
mem = get_memory()
tv_root = Path(mem.ltm.library_paths.get("tv_show"))
(tv_root / "Slow.Horses.2022.1080p.WEBRip.x265-RARBG").mkdir(parents=True, exist_ok=True)
(tv_root / "Slow.Horses.2022.1080p.WEBRip.x265-KONTRAST").mkdir(parents=True, exist_ok=True)
result = _use_case().execute(
release_name="Slow.Horses.S05.1080p.WEBRip.x265-KONTRAST",
source_file="/downloads/Slow.Horses.S05E01.mkv",
tmdb_title="Slow Horses",
tmdb_year=2022,
)
assert result.status == "needs_clarification"
assert result.question is not None
assert len(result.options) == 2
assert "Slow.Horses.2022.1080p.WEBRip.x265-RARBG" in result.options
assert "Slow.Horses.2022.1080p.WEBRip.x265-KONTRAST" in result.options
def test_confirmed_folder_bypasses_detection(self, memory_configured, app_temp):
"""confirmed_folder skips the folder search."""
from alfred.infrastructure.persistence import get_memory
mem = get_memory()
tv_root = Path(mem.ltm.library_paths.get("tv_show"))
chosen = "Slow.Horses.2022.1080p.WEBRip.x265-RARBG"
(tv_root / chosen).mkdir(parents=True, exist_ok=True)
result = _use_case().execute(
release_name="Slow.Horses.S05.1080p.WEBRip.x265-KONTRAST",
source_file="/downloads/Slow.Horses.S05E01.mkv",
tmdb_title="Slow Horses",
tmdb_year=2022,
confirmed_folder=chosen,
)
assert result.status == "ok"
assert result.series_folder_name == chosen
def test_to_dict_needs_clarification(self, memory_configured, app_temp):
from alfred.infrastructure.persistence import get_memory
mem = get_memory()
tv_root = Path(mem.ltm.library_paths.get("tv_show"))
(tv_root / "Oz.1997.1080p.WEBRip.x265-RARBG").mkdir(parents=True, exist_ok=True)
(tv_root / "Oz.1997.1080p.WEBRip.x265-KONTRAST").mkdir(parents=True, exist_ok=True)
result = _use_case().execute(
release_name="Oz.S03.1080p.WEBRip.x265-KONTRAST",
source_file="/downloads/Oz.S03E01.mp4",
tmdb_title="Oz",
tmdb_year=1997,
)
d = result.to_dict()
assert d["status"] == "needs_clarification"
assert "question" in d
assert isinstance(d["options"], list)
def test_to_dict_error(self, memory):
result = _use_case().execute(
release_name="Oz.S01.1080p.WEBRip.x265-KONTRAST",
source_file="/downloads/Oz.S01E01.mp4",
tmdb_title="Oz",
tmdb_year=1997,
)
d = result.to_dict()
assert d["status"] == "error"
assert "error" in d
assert "message" in d
# ---------------------------------------------------------------------------
# _find_existing_series_folders
# ---------------------------------------------------------------------------
class TestFindExistingSeriesFolders:
def test_empty_library(self, tmp_path):
assert _find_existing_series_folders(tmp_path, "Oz", 1997) == []
def test_nonexistent_root(self, tmp_path):
assert _find_existing_series_folders(tmp_path / "nope", "Oz", 1997) == []
def test_single_match(self, tmp_path):
(tmp_path / "Oz.1997.1080p.WEBRip.x265-KONTRAST").mkdir()
result = _find_existing_series_folders(tmp_path, "Oz", 1997)
assert result == ["Oz.1997.1080p.WEBRip.x265-KONTRAST"]
def test_multiple_matches(self, tmp_path):
(tmp_path / "Oz.1997.1080p.WEBRip.x265-KONTRAST").mkdir()
(tmp_path / "Oz.1997.1080p.WEBRip.x265-RARBG").mkdir()
result = _find_existing_series_folders(tmp_path, "Oz", 1997)
assert len(result) == 2
assert sorted(result) == result # sorted
def test_no_match_different_year(self, tmp_path):
(tmp_path / "Oz.1997.1080p.WEBRip.x265-KONTRAST").mkdir()
result = _find_existing_series_folders(tmp_path, "Oz", 2000)
assert result == []
def test_no_match_different_title(self, tmp_path):
(tmp_path / "Oz.1997.1080p.WEBRip.x265-KONTRAST").mkdir()
result = _find_existing_series_folders(tmp_path, "Breaking Bad", 2008)
assert result == []
def test_ignores_files_not_dirs(self, tmp_path):
(tmp_path / "Oz.1997.1080p.WEBRip.x265-KONTRAST").mkdir()
(tmp_path / "Oz.1997.some.file.txt").touch()
result = _find_existing_series_folders(tmp_path, "Oz", 1997)
assert len(result) == 1
def test_case_insensitive_prefix(self, tmp_path):
# Folder stored with mixed case
(tmp_path / "OZ.1997.1080p.WEBRip.x265-KONTRAST").mkdir()
result = _find_existing_series_folders(tmp_path, "Oz", 1997)
assert len(result) == 1
def test_title_with_special_chars_sanitised(self, tmp_path):
# "Star Wars: Andor" → sanitised (colon removed) + spaces→dots → "Star.Wars.Andor.2022"
(tmp_path / "Star.Wars.Andor.2022.1080p.WEBRip.x265-GROUP").mkdir()
result = _find_existing_series_folders(tmp_path, "Star Wars: Andor", 2022)
assert len(result) == 1

Some files were not shown because too many files have changed in this diff Show More