Compare commits
10 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| de02bdea06 | |||
| 62b5d0b998 | |||
| 610dee365c | |||
| 58408d0dbe | |||
| 2f1ac3c758 | |||
| d3b69f7459 | |||
| 50c8204fa0 | |||
| 507fe0f40e | |||
| b7b40eada1 | |||
| 9765386405 |
+77
@@ -0,0 +1,77 @@
|
||||
# --- IMPORTANT ---
|
||||
# Settings are split across multiple files for clarity.
|
||||
# Files (loaded in this order, last wins):
|
||||
# .env.alfred — app config and service addresses (safe to commit)
|
||||
# .env.secrets — generated secrets, passwords, URIs and API keys (DO NOT COMMIT)
|
||||
# .env.make — build metadata synced from pyproject.toml (safe to commit)
|
||||
#
|
||||
# To customize: edit .env.alfred for config, .env.secrets for secrets.
|
||||
|
||||
# --- Alfred ---
|
||||
MAX_HISTORY_MESSAGES=10
|
||||
MAX_TOOL_ITERATIONS=10
|
||||
REQUEST_TIMEOUT=30
|
||||
|
||||
# LLM Settings
|
||||
LLM_TEMPERATURE=0.2
|
||||
|
||||
# Persistence
|
||||
DATA_STORAGE_DIR=data
|
||||
|
||||
# Network
|
||||
HOST=0.0.0.0
|
||||
PORT=3080
|
||||
|
||||
# --- DATABASES ---
|
||||
# Passwords and connection URIs are auto-generated in .env.secrets.
|
||||
# Edit host/port/user/dbname here if needed.
|
||||
|
||||
# MongoDB (Application Data)
|
||||
MONGO_HOST=mongodb
|
||||
MONGO_PORT=27017
|
||||
MONGO_USER=alfred
|
||||
MONGO_DB_NAME=alfred
|
||||
|
||||
# PostgreSQL (Vector Database / RAG)
|
||||
POSTGRES_HOST=vectordb
|
||||
POSTGRES_PORT=5432
|
||||
POSTGRES_USER=alfred
|
||||
POSTGRES_DB_NAME=alfred
|
||||
|
||||
# --- EXTERNAL SERVICES ---
|
||||
|
||||
# TMDB — Media metadata (required). Get your key at https://www.themoviedb.org/
|
||||
# → TMDB_API_KEY goes in .env.secrets
|
||||
TMDB_BASE_URL=https://api.themoviedb.org/3
|
||||
|
||||
# qBittorrent
|
||||
# → QBITTORRENT_PASSWORD goes in .env.secrets
|
||||
QBITTORRENT_URL=http://qbittorrent:16140
|
||||
QBITTORRENT_USERNAME=admin
|
||||
QBITTORRENT_PORT=16140
|
||||
|
||||
# Meilisearch
|
||||
# → MEILI_MASTER_KEY goes in .env.secrets
|
||||
# MEILI_ENABLED=false # KEY DOESN'T EXISTS => SEARCH IS THE PROPER KEY
|
||||
SEARCH=false
|
||||
MEILI_NO_ANALYTICS=true
|
||||
MEILI_HOST=http://meilisearch:7700
|
||||
|
||||
# --- LLM CONFIGURATION ---
|
||||
# Providers: local, openai, anthropic, deepseek, google, kimi
|
||||
# → API keys go in .env.secrets
|
||||
DEFAULT_LLM_PROVIDER=local
|
||||
|
||||
# Local LLM (Ollama)
|
||||
#OLLAMA_BASE_URL=http://ollama:11434
|
||||
#OLLAMA_MODEL=llama3.3:latest
|
||||
|
||||
OLLAMA_BASE_URL=http://10.0.0.11:11434
|
||||
OLLAMA_MODEL=glm-4.7-flash:latest
|
||||
|
||||
# --- RAG ENGINE ---
|
||||
RAG_ENABLED=TRUE
|
||||
RAG_API_URL=http://rag_api:8000
|
||||
RAG_API_PORT=8000
|
||||
EMBEDDINGS_PROVIDER=ollama
|
||||
EMBEDDINGS_MODEL=nomic-embed-text
|
||||
+23
-43
@@ -1,3 +1,13 @@
|
||||
# --- IMPORTANT ---
|
||||
# Settings are split across multiple files for clarity.
|
||||
# Files (loaded in this order, last wins):
|
||||
# .env.alfred — app config and service addresses (safe to commit)
|
||||
# .env.secrets — generated secrets, passwords, URIs and API keys (DO NOT COMMIT)
|
||||
# .env.make — build metadata synced from pyproject.toml (safe to commit)
|
||||
#
|
||||
# To customize: edit .env.alfred for config, .env.secrets for secrets.
|
||||
|
||||
# --- Alfred ---
|
||||
MAX_HISTORY_MESSAGES=10
|
||||
MAX_TOOL_ITERATIONS=10
|
||||
REQUEST_TIMEOUT=30
|
||||
@@ -8,84 +18,54 @@ LLM_TEMPERATURE=0.2
|
||||
# Persistence
|
||||
DATA_STORAGE_DIR=data
|
||||
|
||||
# Network configuration
|
||||
# Network
|
||||
HOST=0.0.0.0
|
||||
PORT=3080
|
||||
|
||||
# Build informations (Synced with pyproject.toml via bootstrap)
|
||||
ALFRED_VERSION=
|
||||
IMAGE_NAME=
|
||||
LIBRECHAT_VERSION=
|
||||
PYTHON_VERSION=
|
||||
PYTHON_VERSION_SHORT=
|
||||
RAG_VERSION=
|
||||
RUNNER=
|
||||
SERVICE_NAME=
|
||||
|
||||
# --- SECURITY KEYS (CRITICAL) ---
|
||||
# These are used for session tokens and encrypting sensitive data in MongoDB.
|
||||
# If you lose these, you lose access to encrypted stored credentials.
|
||||
JWT_SECRET=
|
||||
JWT_REFRESH_SECRET=
|
||||
CREDS_KEY=
|
||||
CREDS_IV=
|
||||
|
||||
# --- DATABASES (AUTO-SECURED) ---
|
||||
# Alfred uses MongoDB for application state and PostgreSQL for Vector RAG.
|
||||
# Passwords will be generated as 24-character secure tokens if left blank.
|
||||
# --- DATABASES ---
|
||||
# Passwords and connection URIs are auto-generated in .env.secrets.
|
||||
# Edit host/port/user/dbname here if needed.
|
||||
|
||||
# MongoDB (Application Data)
|
||||
MONGO_URI=
|
||||
MONGO_HOST=mongodb
|
||||
MONGO_PORT=27017
|
||||
MONGO_USER=alfred
|
||||
MONGO_PASSWORD=
|
||||
MONGO_DB_NAME=alfred
|
||||
MONGO_DB_NAME=LibreChat
|
||||
|
||||
# PostgreSQL (Vector Database / RAG)
|
||||
POSTGRES_URI=
|
||||
POSTGRES_HOST=vectordb
|
||||
POSTGRES_PORT=5432
|
||||
POSTGRES_USER=alfred
|
||||
POSTGRES_PASSWORD=
|
||||
POSTGRES_DB_NAME=alfred
|
||||
|
||||
# --- EXTERNAL SERVICES ---
|
||||
# Media Metadata (Required)
|
||||
# Get your key at https://www.themoviedb.org/
|
||||
TMDB_API_KEY=
|
||||
|
||||
# TMDB — Media metadata (required). Get your key at https://www.themoviedb.org/
|
||||
# → TMDB_API_KEY goes in .env.secrets
|
||||
TMDB_BASE_URL=https://api.themoviedb.org/3
|
||||
|
||||
# qBittorrent integration
|
||||
# qBittorrent
|
||||
# → QBITTORRENT_PASSWORD goes in .env.secrets
|
||||
QBITTORRENT_URL=http://qbittorrent:16140
|
||||
QBITTORRENT_USERNAME=admin
|
||||
QBITTORRENT_PASSWORD=
|
||||
QBITTORRENT_PORT=16140
|
||||
|
||||
# Meilisearch
|
||||
# → MEILI_MASTER_KEY goes in .env.secrets
|
||||
MEILI_ENABLED=FALSE
|
||||
MEILI_NO_ANALYTICS=TRUE
|
||||
MEILI_HOST=http://meilisearch:7700
|
||||
MEILI_MASTER_KEY=
|
||||
|
||||
# --- LLM CONFIGURATION ---
|
||||
# Providers: 'local', 'openai', 'anthropic', 'deepseek', 'google', 'kimi'
|
||||
# Providers: local, openai, anthropic, deepseek, google, kimi
|
||||
# → API keys go in .env.secrets
|
||||
DEFAULT_LLM_PROVIDER=local
|
||||
|
||||
# Local LLM (Ollama)
|
||||
OLLAMA_BASE_URL=http://ollama:11434
|
||||
OLLAMA_MODEL=llama3.3:latest
|
||||
|
||||
# --- API KEYS (OPTIONAL) ---
|
||||
# Fill only the ones you intend to use.
|
||||
ANTHROPIC_API_KEY=
|
||||
DEEPSEEK_API_KEY=
|
||||
GOOGLE_API_KEY=
|
||||
KIMI_API_KEY=
|
||||
OPENAI_API_KEY=
|
||||
|
||||
# --- RAG ENGINE ---
|
||||
# Enable/Disable the Retrieval Augmented Generation system
|
||||
RAG_ENABLED=TRUE
|
||||
RAG_API_URL=http://rag_api:8000
|
||||
RAG_API_PORT=8000
|
||||
|
||||
+878
@@ -0,0 +1,878 @@
|
||||
#=====================================================================#
|
||||
# LibreChat Configuration #
|
||||
#=====================================================================#
|
||||
# Please refer to the reference documentation for assistance #
|
||||
# with configuring your LibreChat environment. #
|
||||
# #
|
||||
# https://www.librechat.ai/docs/configuration/dotenv #
|
||||
#=====================================================================#
|
||||
|
||||
#==================================================#
|
||||
# Server Configuration #
|
||||
#==================================================#
|
||||
|
||||
HOST=localhost
|
||||
PORT=3080
|
||||
|
||||
MONGO_URI=mongodb://127.0.0.1:27017/LibreChat
|
||||
#The maximum number of connections in the connection pool. */
|
||||
MONGO_MAX_POOL_SIZE=
|
||||
#The minimum number of connections in the connection pool. */
|
||||
MONGO_MIN_POOL_SIZE=
|
||||
#The maximum number of connections that may be in the process of being established concurrently by the connection pool. */
|
||||
MONGO_MAX_CONNECTING=
|
||||
#The maximum number of milliseconds that a connection can remain idle in the pool before being removed and closed. */
|
||||
MONGO_MAX_IDLE_TIME_MS=
|
||||
#The maximum time in milliseconds that a thread can wait for a connection to become available. */
|
||||
MONGO_WAIT_QUEUE_TIMEOUT_MS=
|
||||
# Set to false to disable automatic index creation for all models associated with this connection. */
|
||||
MONGO_AUTO_INDEX=
|
||||
# Set to `false` to disable Mongoose automatically calling `createCollection()` on every model created on this connection. */
|
||||
MONGO_AUTO_CREATE=
|
||||
|
||||
DOMAIN_CLIENT=http://localhost:3080
|
||||
DOMAIN_SERVER=http://localhost:3080
|
||||
|
||||
NO_INDEX=true
|
||||
# Use the address that is at most n number of hops away from the Express application.
|
||||
# req.socket.remoteAddress is the first hop, and the rest are looked for in the X-Forwarded-For header from right to left.
|
||||
# A value of 0 means that the first untrusted address would be req.socket.remoteAddress, i.e. there is no reverse proxy.
|
||||
# Defaulted to 1.
|
||||
TRUST_PROXY=1
|
||||
|
||||
# Minimum password length for user authentication
|
||||
# Default: 8
|
||||
# Note: When using LDAP authentication, you may want to set this to 1
|
||||
# to bypass local password validation, as LDAP servers handle their own
|
||||
# password policies.
|
||||
# MIN_PASSWORD_LENGTH=8
|
||||
|
||||
# When enabled, the app will continue running after encountering uncaught exceptions
|
||||
# instead of exiting the process. Not recommended for production unless necessary.
|
||||
# CONTINUE_ON_UNCAUGHT_EXCEPTION=false
|
||||
|
||||
#===============#
|
||||
# JSON Logging #
|
||||
#===============#
|
||||
|
||||
# Use when process console logs in cloud deployment like GCP/AWS
|
||||
CONSOLE_JSON=false
|
||||
|
||||
#===============#
|
||||
# Debug Logging #
|
||||
#===============#
|
||||
|
||||
DEBUG_LOGGING=true
|
||||
DEBUG_CONSOLE=false
|
||||
# Set to true to enable agent debug logging
|
||||
AGENT_DEBUG_LOGGING=false
|
||||
|
||||
# Enable memory diagnostics (logs heap/RSS snapshots every 60s, auto-enabled with --inspect)
|
||||
# MEM_DIAG=true
|
||||
|
||||
#=============#
|
||||
# Permissions #
|
||||
#=============#
|
||||
|
||||
# UID=1000
|
||||
# GID=1000
|
||||
|
||||
#==============#
|
||||
# Node Options #
|
||||
#==============#
|
||||
|
||||
# NOTE: NODE_MAX_OLD_SPACE_SIZE is NOT recognized by Node.js directly.
|
||||
# This variable is used as a build argument for Docker or CI/CD workflows,
|
||||
# and is NOT used by Node.js to set the heap size at runtime.
|
||||
# To configure Node.js memory, use NODE_OPTIONS, e.g.:
|
||||
# NODE_OPTIONS="--max-old-space-size=6144"
|
||||
# See: https://nodejs.org/api/cli.html#--max-old-space-sizesize-in-mib
|
||||
NODE_MAX_OLD_SPACE_SIZE=6144
|
||||
|
||||
#===============#
|
||||
# Configuration #
|
||||
#===============#
|
||||
# Use an absolute path, a relative path, or a URL
|
||||
|
||||
# CONFIG_PATH="/alternative/path/to/librechat.yaml"
|
||||
|
||||
#==================#
|
||||
# Langfuse Tracing #
|
||||
#==================#
|
||||
|
||||
# Get Langfuse API keys for your project from the project settings page: https://cloud.langfuse.com
|
||||
|
||||
# LANGFUSE_PUBLIC_KEY=
|
||||
# LANGFUSE_SECRET_KEY=
|
||||
# LANGFUSE_BASE_URL=
|
||||
|
||||
#===================================================#
|
||||
# Endpoints #
|
||||
#===================================================#
|
||||
|
||||
# ENDPOINTS=openAI,assistants,azureOpenAI,google,anthropic
|
||||
|
||||
PROXY=
|
||||
|
||||
#===================================#
|
||||
# Known Endpoints - librechat.yaml #
|
||||
#===================================#
|
||||
# https://www.librechat.ai/docs/configuration/librechat_yaml/ai_endpoints
|
||||
|
||||
# ANYSCALE_API_KEY=
|
||||
# APIPIE_API_KEY=
|
||||
# COHERE_API_KEY=
|
||||
# DEEPSEEK_API_KEY=
|
||||
# DATABRICKS_API_KEY=
|
||||
# FIREWORKS_API_KEY=
|
||||
# GROQ_API_KEY=
|
||||
# HUGGINGFACE_TOKEN=
|
||||
# MISTRAL_API_KEY=
|
||||
# OPENROUTER_KEY=
|
||||
# PERPLEXITY_API_KEY=
|
||||
# SHUTTLEAI_API_KEY=
|
||||
# TOGETHERAI_API_KEY=
|
||||
# UNIFY_API_KEY=
|
||||
# XAI_API_KEY=
|
||||
|
||||
#============#
|
||||
# Anthropic #
|
||||
#============#
|
||||
|
||||
ANTHROPIC_API_KEY=user_provided
|
||||
# ANTHROPIC_MODELS=claude-sonnet-4-6,claude-opus-4-6,claude-opus-4-20250514,claude-sonnet-4-20250514,claude-3-7-sonnet-20250219,claude-3-5-sonnet-20241022,claude-3-5-haiku-20241022,claude-3-opus-20240229,claude-3-sonnet-20240229,claude-3-haiku-20240307
|
||||
# ANTHROPIC_REVERSE_PROXY=
|
||||
|
||||
# Set to true to use Anthropic models through Google Vertex AI instead of direct API
|
||||
# ANTHROPIC_USE_VERTEX=
|
||||
# ANTHROPIC_VERTEX_REGION=us-east5
|
||||
|
||||
#============#
|
||||
# Azure #
|
||||
#============#
|
||||
|
||||
# Note: these variables are DEPRECATED
|
||||
# Use the `librechat.yaml` configuration for `azureOpenAI` instead
|
||||
# You may also continue to use them if you opt out of using the `librechat.yaml` configuration
|
||||
|
||||
# AZURE_OPENAI_DEFAULT_MODEL=gpt-3.5-turbo # Deprecated
|
||||
# AZURE_OPENAI_MODELS=gpt-3.5-turbo,gpt-4 # Deprecated
|
||||
# AZURE_USE_MODEL_AS_DEPLOYMENT_NAME=TRUE # Deprecated
|
||||
# AZURE_API_KEY= # Deprecated
|
||||
# AZURE_OPENAI_API_INSTANCE_NAME= # Deprecated
|
||||
# AZURE_OPENAI_API_DEPLOYMENT_NAME= # Deprecated
|
||||
# AZURE_OPENAI_API_VERSION= # Deprecated
|
||||
# AZURE_OPENAI_API_COMPLETIONS_DEPLOYMENT_NAME= # Deprecated
|
||||
# AZURE_OPENAI_API_EMBEDDINGS_DEPLOYMENT_NAME= # Deprecated
|
||||
|
||||
#=================#
|
||||
# AWS Bedrock #
|
||||
#=================#
|
||||
|
||||
# BEDROCK_AWS_DEFAULT_REGION=us-east-1 # A default region must be provided
|
||||
# BEDROCK_AWS_ACCESS_KEY_ID=someAccessKey
|
||||
# BEDROCK_AWS_SECRET_ACCESS_KEY=someSecretAccessKey
|
||||
# BEDROCK_AWS_SESSION_TOKEN=someSessionToken
|
||||
|
||||
# Note: This example list is not meant to be exhaustive. If omitted, all known, supported model IDs will be included for you.
|
||||
# BEDROCK_AWS_MODELS=anthropic.claude-sonnet-4-6,anthropic.claude-opus-4-6-v1,anthropic.claude-3-5-sonnet-20240620-v1:0,meta.llama3-1-8b-instruct-v1:0
|
||||
# Cross-region inference model IDs: us.anthropic.claude-sonnet-4-6,us.anthropic.claude-opus-4-6-v1,global.anthropic.claude-opus-4-6-v1
|
||||
|
||||
# See all Bedrock model IDs here: https://docs.aws.amazon.com/bedrock/latest/userguide/model-ids.html#model-ids-arns
|
||||
|
||||
# Notes on specific models:
|
||||
# The following models are not support due to not supporting streaming:
|
||||
# ai21.j2-mid-v1
|
||||
|
||||
# The following models are not support due to not supporting conversation history:
|
||||
# ai21.j2-ultra-v1, cohere.command-text-v14, cohere.command-light-text-v14
|
||||
|
||||
#============#
|
||||
# Google #
|
||||
#============#
|
||||
|
||||
GOOGLE_KEY=user_provided
|
||||
|
||||
# GOOGLE_REVERSE_PROXY=
|
||||
# Some reverse proxies do not support the X-goog-api-key header, uncomment to pass the API key in Authorization header instead.
|
||||
# GOOGLE_AUTH_HEADER=true
|
||||
|
||||
# Gemini API (AI Studio)
|
||||
# GOOGLE_MODELS=gemini-3.1-pro-preview,gemini-3.1-pro-preview-customtools,gemini-3.1-flash-lite-preview,gemini-2.5-pro,gemini-2.5-flash,gemini-2.5-flash-lite,gemini-2.0-flash,gemini-2.0-flash-lite
|
||||
|
||||
# Vertex AI
|
||||
# GOOGLE_MODELS=gemini-3.1-pro-preview,gemini-3.1-pro-preview-customtools,gemini-3.1-flash-lite-preview,gemini-2.5-pro,gemini-2.5-flash,gemini-2.5-flash-lite,gemini-2.0-flash-001,gemini-2.0-flash-lite-001
|
||||
|
||||
# GOOGLE_TITLE_MODEL=gemini-2.0-flash-lite-001
|
||||
|
||||
# Google Cloud region for Vertex AI (used by both chat and image generation)
|
||||
# GOOGLE_LOC=us-central1
|
||||
|
||||
# Alternative region env var for Gemini Image Generation
|
||||
# GOOGLE_CLOUD_LOCATION=global
|
||||
|
||||
# Vertex AI Service Account Configuration
|
||||
# Path to your Google Cloud service account JSON file
|
||||
# GOOGLE_SERVICE_KEY_FILE=/path/to/service-account.json
|
||||
|
||||
# Google Safety Settings
|
||||
# NOTE: These settings apply to both Vertex AI and Gemini API (AI Studio)
|
||||
#
|
||||
# For Vertex AI:
|
||||
# To use the BLOCK_NONE setting, you need either:
|
||||
# (a) Access through an allowlist via your Google account team, or
|
||||
# (b) Switch to monthly invoiced billing: https://cloud.google.com/billing/docs/how-to/invoiced-billing
|
||||
#
|
||||
# For Gemini API (AI Studio):
|
||||
# BLOCK_NONE is available by default, no special account requirements.
|
||||
#
|
||||
# Available options: BLOCK_NONE, BLOCK_ONLY_HIGH, BLOCK_MEDIUM_AND_ABOVE, BLOCK_LOW_AND_ABOVE
|
||||
#
|
||||
# GOOGLE_SAFETY_SEXUALLY_EXPLICIT=BLOCK_ONLY_HIGH
|
||||
# GOOGLE_SAFETY_HATE_SPEECH=BLOCK_ONLY_HIGH
|
||||
# GOOGLE_SAFETY_HARASSMENT=BLOCK_ONLY_HIGH
|
||||
# GOOGLE_SAFETY_DANGEROUS_CONTENT=BLOCK_ONLY_HIGH
|
||||
# GOOGLE_SAFETY_CIVIC_INTEGRITY=BLOCK_ONLY_HIGH
|
||||
|
||||
#========================#
|
||||
# Gemini Image Generation #
|
||||
#========================#
|
||||
|
||||
# Gemini Image Generation Tool (for Agents)
|
||||
# Supports multiple authentication methods in priority order:
|
||||
# 1. User-provided API key (via GUI)
|
||||
# 2. GEMINI_API_KEY env var (admin-configured)
|
||||
# 3. GOOGLE_KEY env var (shared with Google chat endpoint)
|
||||
# 4. Vertex AI service account (via GOOGLE_SERVICE_KEY_FILE)
|
||||
|
||||
# Option A: Use dedicated Gemini API key for image generation
|
||||
# GEMINI_API_KEY=your-gemini-api-key
|
||||
|
||||
# Vertex AI model for image generation (defaults to gemini-2.5-flash-image)
|
||||
# GEMINI_IMAGE_MODEL=gemini-2.5-flash-image
|
||||
|
||||
#============#
|
||||
# OpenAI #
|
||||
#============#
|
||||
|
||||
OPENAI_API_KEY=user_provided
|
||||
# OPENAI_MODELS=gpt-5,gpt-5-codex,gpt-5-mini,gpt-5-nano,o3-pro,o3,o4-mini,gpt-4.1,gpt-4.1-mini,gpt-4.1-nano,o3-mini,o1-pro,o1,gpt-4o,gpt-4o-mini
|
||||
|
||||
DEBUG_OPENAI=false
|
||||
|
||||
# TITLE_CONVO=false
|
||||
# OPENAI_TITLE_MODEL=gpt-4o-mini
|
||||
|
||||
# OPENAI_SUMMARIZE=true
|
||||
# OPENAI_SUMMARY_MODEL=gpt-4o-mini
|
||||
|
||||
# OPENAI_FORCE_PROMPT=true
|
||||
|
||||
# OPENAI_REVERSE_PROXY=
|
||||
|
||||
# OPENAI_ORGANIZATION=
|
||||
|
||||
#====================#
|
||||
# Assistants API #
|
||||
#====================#
|
||||
|
||||
ASSISTANTS_API_KEY=user_provided
|
||||
# ASSISTANTS_BASE_URL=
|
||||
# ASSISTANTS_MODELS=gpt-4o,gpt-4o-mini,gpt-3.5-turbo-0125,gpt-3.5-turbo-16k-0613,gpt-3.5-turbo-16k,gpt-3.5-turbo,gpt-4,gpt-4-0314,gpt-4-32k-0314,gpt-4-0613,gpt-3.5-turbo-0613,gpt-3.5-turbo-1106,gpt-4-0125-preview,gpt-4-turbo-preview,gpt-4-1106-preview
|
||||
|
||||
#==========================#
|
||||
# Azure Assistants API #
|
||||
#==========================#
|
||||
|
||||
# Note: You should map your credentials with custom variables according to your Azure OpenAI Configuration
|
||||
# The models for Azure Assistants are also determined by your Azure OpenAI configuration.
|
||||
|
||||
# More info, including how to enable use of Assistants with Azure here:
|
||||
# https://www.librechat.ai/docs/configuration/librechat_yaml/ai_endpoints/azure#using-assistants-with-azure
|
||||
|
||||
CREDS_KEY=f34be427ebb29de8d88c107a71546019685ed8b241d8f2ed00c3df97ad2566f0
|
||||
CREDS_IV=e2341419ec3dd3d19b13a1a87fafcbfb
|
||||
|
||||
# Azure AI Search
|
||||
#-----------------
|
||||
AZURE_AI_SEARCH_SERVICE_ENDPOINT=
|
||||
AZURE_AI_SEARCH_INDEX_NAME=
|
||||
AZURE_AI_SEARCH_API_KEY=
|
||||
|
||||
AZURE_AI_SEARCH_API_VERSION=
|
||||
AZURE_AI_SEARCH_SEARCH_OPTION_QUERY_TYPE=
|
||||
AZURE_AI_SEARCH_SEARCH_OPTION_TOP=
|
||||
AZURE_AI_SEARCH_SEARCH_OPTION_SELECT=
|
||||
|
||||
# OpenAI Image Tools Customization
|
||||
#----------------
|
||||
# IMAGE_GEN_OAI_API_KEY= # Create or reuse OpenAI API key for image generation tool
|
||||
# IMAGE_GEN_OAI_BASEURL= # Custom OpenAI base URL for image generation tool
|
||||
# IMAGE_GEN_OAI_AZURE_API_VERSION= # Custom Azure OpenAI deployments
|
||||
# IMAGE_GEN_OAI_MODEL=gpt-image-1 # OpenAI image model (e.g., gpt-image-1, gpt-image-1.5)
|
||||
# IMAGE_GEN_OAI_DESCRIPTION=
|
||||
# IMAGE_GEN_OAI_DESCRIPTION_WITH_FILES=Custom description for image generation tool when files are present
|
||||
# IMAGE_GEN_OAI_DESCRIPTION_NO_FILES=Custom description for image generation tool when no files are present
|
||||
# IMAGE_EDIT_OAI_DESCRIPTION=Custom description for image editing tool
|
||||
# IMAGE_GEN_OAI_PROMPT_DESCRIPTION=Custom prompt description for image generation tool
|
||||
# IMAGE_EDIT_OAI_PROMPT_DESCRIPTION=Custom prompt description for image editing tool
|
||||
|
||||
# DALL·E
|
||||
#----------------
|
||||
# DALLE_API_KEY=
|
||||
# DALLE3_API_KEY=
|
||||
# DALLE2_API_KEY=
|
||||
# DALLE3_SYSTEM_PROMPT=
|
||||
# DALLE2_SYSTEM_PROMPT=
|
||||
# DALLE_REVERSE_PROXY=
|
||||
# DALLE3_BASEURL=
|
||||
# DALLE2_BASEURL=
|
||||
|
||||
# DALL·E (via Azure OpenAI)
|
||||
# Note: requires some of the variables above to be set
|
||||
#----------------
|
||||
# DALLE3_AZURE_API_VERSION=
|
||||
# DALLE2_AZURE_API_VERSION=
|
||||
|
||||
# Flux
|
||||
#-----------------
|
||||
FLUX_API_BASE_URL=https://api.us1.bfl.ai
|
||||
# FLUX_API_BASE_URL = 'https://api.bfl.ml';
|
||||
|
||||
# Get your API key at https://api.us1.bfl.ai/auth/profile
|
||||
# FLUX_API_KEY=
|
||||
|
||||
# Google
|
||||
#-----------------
|
||||
GOOGLE_SEARCH_API_KEY=
|
||||
GOOGLE_CSE_ID=
|
||||
|
||||
# Stable Diffusion
|
||||
#-----------------
|
||||
SD_WEBUI_URL=http://host.docker.internal:7860
|
||||
|
||||
# Tavily
|
||||
#-----------------
|
||||
TAVILY_API_KEY=
|
||||
|
||||
# Traversaal
|
||||
#-----------------
|
||||
TRAVERSAAL_API_KEY=
|
||||
|
||||
# WolframAlpha
|
||||
#-----------------
|
||||
WOLFRAM_APP_ID=
|
||||
|
||||
# Zapier
|
||||
#-----------------
|
||||
ZAPIER_NLA_API_KEY=
|
||||
|
||||
#==================================================#
|
||||
# Search #
|
||||
#==================================================#
|
||||
|
||||
SEARCH=true
|
||||
MEILI_NO_ANALYTICS=true
|
||||
MEILI_HOST=http://0.0.0.0:7700
|
||||
MEILI_MASTER_KEY=DrhYf7zENyR6AlUCKmnz0eYASOQdl6zxH7s7MKFSfFCt
|
||||
|
||||
# Optional: Disable indexing, useful in a multi-node setup
|
||||
# where only one instance should perform an index sync.
|
||||
# MEILI_NO_SYNC=true
|
||||
|
||||
#==================================================#
|
||||
# Speech to Text & Text to Speech #
|
||||
#==================================================#
|
||||
|
||||
STT_API_KEY=
|
||||
TTS_API_KEY=
|
||||
|
||||
#==================================================#
|
||||
# RAG #
|
||||
#==================================================#
|
||||
# More info: https://www.librechat.ai/docs/configuration/rag_api
|
||||
|
||||
# RAG_OPENAI_BASEURL=
|
||||
# RAG_OPENAI_API_KEY=
|
||||
# RAG_USE_FULL_CONTEXT=
|
||||
# EMBEDDINGS_PROVIDER=openai
|
||||
# EMBEDDINGS_MODEL=text-embedding-3-small
|
||||
|
||||
#===================================================#
|
||||
# User System #
|
||||
#===================================================#
|
||||
|
||||
#========================#
|
||||
# Moderation #
|
||||
#========================#
|
||||
|
||||
OPENAI_MODERATION=false
|
||||
OPENAI_MODERATION_API_KEY=
|
||||
# OPENAI_MODERATION_REVERSE_PROXY=
|
||||
|
||||
BAN_VIOLATIONS=true
|
||||
BAN_DURATION=1000 * 60 * 60 * 2
|
||||
BAN_INTERVAL=20
|
||||
|
||||
LOGIN_VIOLATION_SCORE=1
|
||||
REGISTRATION_VIOLATION_SCORE=1
|
||||
CONCURRENT_VIOLATION_SCORE=1
|
||||
MESSAGE_VIOLATION_SCORE=1
|
||||
NON_BROWSER_VIOLATION_SCORE=20
|
||||
TTS_VIOLATION_SCORE=0
|
||||
STT_VIOLATION_SCORE=0
|
||||
FORK_VIOLATION_SCORE=0
|
||||
IMPORT_VIOLATION_SCORE=0
|
||||
FILE_UPLOAD_VIOLATION_SCORE=0
|
||||
|
||||
LOGIN_MAX=7
|
||||
LOGIN_WINDOW=5
|
||||
REGISTER_MAX=5
|
||||
REGISTER_WINDOW=60
|
||||
|
||||
LIMIT_CONCURRENT_MESSAGES=true
|
||||
CONCURRENT_MESSAGE_MAX=2
|
||||
|
||||
LIMIT_MESSAGE_IP=true
|
||||
MESSAGE_IP_MAX=40
|
||||
MESSAGE_IP_WINDOW=1
|
||||
|
||||
LIMIT_MESSAGE_USER=false
|
||||
MESSAGE_USER_MAX=40
|
||||
MESSAGE_USER_WINDOW=1
|
||||
|
||||
ILLEGAL_MODEL_REQ_SCORE=5
|
||||
|
||||
#========================#
|
||||
# Balance #
|
||||
#========================#
|
||||
|
||||
# CHECK_BALANCE=false
|
||||
# START_BALANCE=20000 # note: the number of tokens that will be credited after registration.
|
||||
|
||||
#========================#
|
||||
# Registration and Login #
|
||||
#========================#
|
||||
|
||||
ALLOW_EMAIL_LOGIN=true
|
||||
ALLOW_REGISTRATION=true
|
||||
ALLOW_SOCIAL_LOGIN=false
|
||||
ALLOW_SOCIAL_REGISTRATION=false
|
||||
ALLOW_PASSWORD_RESET=false
|
||||
# ALLOW_ACCOUNT_DELETION=true # note: enabled by default if omitted/commented out
|
||||
ALLOW_UNVERIFIED_EMAIL_LOGIN=true
|
||||
|
||||
SESSION_EXPIRY=1000 * 60 * 15
|
||||
REFRESH_TOKEN_EXPIRY=(1000 * 60 * 60 * 24) * 7
|
||||
|
||||
JWT_SECRET=16f8c0ef4a5d391b26034086c628469d3f9f497f08163ab9b40137092f2909ef
|
||||
JWT_REFRESH_SECRET=eaa5191f2914e30b9387fd84e254e4ba6fc51b4654968a9b0803b456a54b8418
|
||||
|
||||
# Discord
|
||||
DISCORD_CLIENT_ID=
|
||||
DISCORD_CLIENT_SECRET=
|
||||
DISCORD_CALLBACK_URL=/oauth/discord/callback
|
||||
|
||||
# Facebook
|
||||
FACEBOOK_CLIENT_ID=
|
||||
FACEBOOK_CLIENT_SECRET=
|
||||
FACEBOOK_CALLBACK_URL=/oauth/facebook/callback
|
||||
|
||||
# GitHub
|
||||
GITHUB_CLIENT_ID=
|
||||
GITHUB_CLIENT_SECRET=
|
||||
GITHUB_CALLBACK_URL=/oauth/github/callback
|
||||
# GitHub Enterprise
|
||||
# GITHUB_ENTERPRISE_BASE_URL=
|
||||
# GITHUB_ENTERPRISE_USER_AGENT=
|
||||
|
||||
# Google
|
||||
GOOGLE_CLIENT_ID=
|
||||
GOOGLE_CLIENT_SECRET=
|
||||
GOOGLE_CALLBACK_URL=/oauth/google/callback
|
||||
|
||||
# Apple
|
||||
APPLE_CLIENT_ID=
|
||||
APPLE_TEAM_ID=
|
||||
APPLE_KEY_ID=
|
||||
APPLE_PRIVATE_KEY_PATH=
|
||||
APPLE_CALLBACK_URL=/oauth/apple/callback
|
||||
|
||||
# OpenID
|
||||
OPENID_CLIENT_ID=
|
||||
OPENID_CLIENT_SECRET=
|
||||
OPENID_ISSUER=
|
||||
OPENID_SESSION_SECRET=
|
||||
OPENID_SCOPE="openid profile email"
|
||||
OPENID_CALLBACK_URL=/oauth/openid/callback
|
||||
OPENID_REQUIRED_ROLE=
|
||||
OPENID_REQUIRED_ROLE_TOKEN_KIND=
|
||||
OPENID_REQUIRED_ROLE_PARAMETER_PATH=
|
||||
OPENID_ADMIN_ROLE=
|
||||
OPENID_ADMIN_ROLE_PARAMETER_PATH=
|
||||
OPENID_ADMIN_ROLE_TOKEN_KIND=
|
||||
# Set to determine which user info property returned from OpenID Provider to store as the User's username
|
||||
OPENID_USERNAME_CLAIM=
|
||||
# Set to determine which user info property returned from OpenID Provider to store as the User's name
|
||||
OPENID_NAME_CLAIM=
|
||||
# Set to determine which user info claim to use as the email/identifier for user matching (e.g., "upn" for Entra ID)
|
||||
# When not set, defaults to: email -> preferred_username -> upn
|
||||
OPENID_EMAIL_CLAIM=
|
||||
# Optional audience parameter for OpenID authorization requests
|
||||
OPENID_AUDIENCE=
|
||||
|
||||
OPENID_BUTTON_LABEL=
|
||||
OPENID_IMAGE_URL=
|
||||
# Set to true to automatically redirect to the OpenID provider when a user visits the login page
|
||||
# This will bypass the login form completely for users, only use this if OpenID is your only authentication method
|
||||
OPENID_AUTO_REDIRECT=false
|
||||
# Set to true to use PKCE (Proof Key for Code Exchange) for OpenID authentication
|
||||
OPENID_USE_PKCE=false
|
||||
#Set to true to reuse openid tokens for authentication management instead of using the mongodb session and the custom refresh token.
|
||||
OPENID_REUSE_TOKENS=
|
||||
#By default, signing key verification results are cached in order to prevent excessive HTTP requests to the JWKS endpoint.
|
||||
#If a signing key matching the kid is found, this will be cached and the next time this kid is requested the signing key will be served from the cache.
|
||||
#Default is true.
|
||||
OPENID_JWKS_URL_CACHE_ENABLED=
|
||||
OPENID_JWKS_URL_CACHE_TIME= # 600000 ms eq to 10 minutes leave empty to disable caching
|
||||
#Set to true to trigger token exchange flow to acquire access token for the userinfo endpoint.
|
||||
OPENID_ON_BEHALF_FLOW_FOR_USERINFO_REQUIRED=
|
||||
OPENID_ON_BEHALF_FLOW_USERINFO_SCOPE="user.read" # example for Scope Needed for Microsoft Graph API
|
||||
# Set to true to use the OpenID Connect end session endpoint for logout
|
||||
OPENID_USE_END_SESSION_ENDPOINT=
|
||||
# URL to redirect to after OpenID logout (defaults to ${DOMAIN_CLIENT}/login)
|
||||
OPENID_POST_LOGOUT_REDIRECT_URI=
|
||||
# Maximum logout URL length before using logout_hint instead of id_token_hint (default: 2000)
|
||||
OPENID_MAX_LOGOUT_URL_LENGTH=
|
||||
|
||||
#========================#
|
||||
# SharePoint Integration #
|
||||
#========================#
|
||||
# Requires Entra ID (OpenID) authentication to be configured
|
||||
|
||||
# Enable SharePoint file picker in chat and agent panels
|
||||
# ENABLE_SHAREPOINT_FILEPICKER=true
|
||||
|
||||
# SharePoint tenant base URL (e.g., https://yourtenant.sharepoint.com)
|
||||
# SHAREPOINT_BASE_URL=https://yourtenant.sharepoint.com
|
||||
|
||||
# Microsoft Graph API And SharePoint scopes for file picker
|
||||
# SHAREPOINT_PICKER_SHAREPOINT_SCOPE==https://yourtenant.sharepoint.com/AllSites.Read
|
||||
# SHAREPOINT_PICKER_GRAPH_SCOPE=Files.Read.All
|
||||
#========================#
|
||||
|
||||
# SAML
|
||||
# Note: If OpenID is enabled, SAML authentication will be automatically disabled.
|
||||
SAML_ENTRY_POINT=
|
||||
SAML_ISSUER=
|
||||
SAML_CERT=
|
||||
SAML_CALLBACK_URL=/oauth/saml/callback
|
||||
SAML_SESSION_SECRET=
|
||||
|
||||
# Attribute mappings (optional)
|
||||
SAML_EMAIL_CLAIM=
|
||||
SAML_USERNAME_CLAIM=
|
||||
SAML_GIVEN_NAME_CLAIM=
|
||||
SAML_FAMILY_NAME_CLAIM=
|
||||
SAML_PICTURE_CLAIM=
|
||||
SAML_NAME_CLAIM=
|
||||
|
||||
# Logint buttion settings (optional)
|
||||
SAML_BUTTON_LABEL=
|
||||
SAML_IMAGE_URL=
|
||||
|
||||
# Whether the SAML Response should be signed.
|
||||
# - If "true", the entire `SAML Response` will be signed.
|
||||
# - If "false" or unset, only the `SAML Assertion` will be signed (default behavior).
|
||||
# SAML_USE_AUTHN_RESPONSE_SIGNED=
|
||||
|
||||
|
||||
#===============================================#
|
||||
# Microsoft Graph API / Entra ID Integration #
|
||||
#===============================================#
|
||||
|
||||
# Enable Entra ID people search integration in permissions/sharing system
|
||||
# When enabled, the people picker will search both local database and Entra ID
|
||||
USE_ENTRA_ID_FOR_PEOPLE_SEARCH=false
|
||||
|
||||
# When enabled, entra id groups owners will be considered as members of the group
|
||||
ENTRA_ID_INCLUDE_OWNERS_AS_MEMBERS=false
|
||||
|
||||
# Microsoft Graph API scopes needed for people/group search
|
||||
# Default scopes provide access to user profiles and group memberships
|
||||
OPENID_GRAPH_SCOPES=User.Read,People.Read,GroupMember.Read.All
|
||||
|
||||
# LDAP
|
||||
LDAP_URL=
|
||||
LDAP_BIND_DN=
|
||||
LDAP_BIND_CREDENTIALS=
|
||||
LDAP_USER_SEARCH_BASE=
|
||||
#LDAP_SEARCH_FILTER="mail="
|
||||
LDAP_CA_CERT_PATH=
|
||||
# LDAP_TLS_REJECT_UNAUTHORIZED=
|
||||
# LDAP_STARTTLS=
|
||||
# LDAP_LOGIN_USES_USERNAME=true
|
||||
# LDAP_ID=
|
||||
# LDAP_USERNAME=
|
||||
# LDAP_EMAIL=
|
||||
# LDAP_FULL_NAME=
|
||||
|
||||
#========================#
|
||||
# Email Password Reset #
|
||||
#========================#
|
||||
|
||||
EMAIL_SERVICE=
|
||||
EMAIL_HOST=
|
||||
EMAIL_PORT=25
|
||||
EMAIL_ENCRYPTION=
|
||||
EMAIL_ENCRYPTION_HOSTNAME=
|
||||
EMAIL_ALLOW_SELFSIGNED=
|
||||
# Leave both empty for SMTP servers that do not require authentication
|
||||
EMAIL_USERNAME=
|
||||
EMAIL_PASSWORD=
|
||||
EMAIL_FROM_NAME=
|
||||
EMAIL_FROM=noreply@librechat.ai
|
||||
|
||||
#========================#
|
||||
# Mailgun API #
|
||||
#========================#
|
||||
|
||||
# MAILGUN_API_KEY=your-mailgun-api-key
|
||||
# MAILGUN_DOMAIN=mg.yourdomain.com
|
||||
# EMAIL_FROM=noreply@yourdomain.com
|
||||
# EMAIL_FROM_NAME="LibreChat"
|
||||
|
||||
# # Optional: For EU region
|
||||
# MAILGUN_HOST=https://api.eu.mailgun.net
|
||||
|
||||
#========================#
|
||||
# Firebase CDN #
|
||||
#========================#
|
||||
|
||||
FIREBASE_API_KEY=
|
||||
FIREBASE_AUTH_DOMAIN=
|
||||
FIREBASE_PROJECT_ID=
|
||||
FIREBASE_STORAGE_BUCKET=
|
||||
FIREBASE_MESSAGING_SENDER_ID=
|
||||
FIREBASE_APP_ID=
|
||||
|
||||
#========================#
|
||||
# S3 AWS Bucket #
|
||||
#========================#
|
||||
|
||||
AWS_ENDPOINT_URL=
|
||||
AWS_ACCESS_KEY_ID=
|
||||
AWS_SECRET_ACCESS_KEY=
|
||||
AWS_REGION=
|
||||
AWS_BUCKET_NAME=
|
||||
# Required for path-style S3-compatible providers (MinIO, Hetzner, Backblaze B2, etc.)
|
||||
# that don't support virtual-hosted-style URLs (bucket.endpoint). Not needed for AWS S3.
|
||||
# AWS_FORCE_PATH_STYLE=false
|
||||
|
||||
#========================#
|
||||
# Azure Blob Storage #
|
||||
#========================#
|
||||
|
||||
AZURE_STORAGE_CONNECTION_STRING=
|
||||
AZURE_STORAGE_PUBLIC_ACCESS=false
|
||||
AZURE_CONTAINER_NAME=files
|
||||
|
||||
#========================#
|
||||
# Shared Links #
|
||||
#========================#
|
||||
|
||||
ALLOW_SHARED_LINKS=true
|
||||
# Allows unauthenticated access to shared links. Defaults to false (auth required) if not set.
|
||||
ALLOW_SHARED_LINKS_PUBLIC=false
|
||||
|
||||
#==============================#
|
||||
# Static File Cache Control #
|
||||
#==============================#
|
||||
|
||||
# Leave commented out to use defaults: 1 day (86400 seconds) for s-maxage and 2 days (172800 seconds) for max-age
|
||||
# NODE_ENV must be set to production for these to take effect
|
||||
# STATIC_CACHE_MAX_AGE=172800
|
||||
# STATIC_CACHE_S_MAX_AGE=86400
|
||||
|
||||
# If you have another service in front of your LibreChat doing compression, disable express based compression here
|
||||
# DISABLE_COMPRESSION=true
|
||||
|
||||
# If you have gzipped version of uploaded image images in the same folder, this will enable gzip scan and serving of these images
|
||||
# Note: The images folder will be scanned on startup and a ma kept in memory. Be careful for large number of images.
|
||||
# ENABLE_IMAGE_OUTPUT_GZIP_SCAN=true
|
||||
|
||||
#===================================================#
|
||||
# UI #
|
||||
#===================================================#
|
||||
|
||||
APP_TITLE=LibreChat
|
||||
# CUSTOM_FOOTER="My custom footer"
|
||||
HELP_AND_FAQ_URL=https://librechat.ai
|
||||
|
||||
# SHOW_BIRTHDAY_ICON=true
|
||||
|
||||
# Google tag manager id
|
||||
#ANALYTICS_GTM_ID=user provided google tag manager id
|
||||
|
||||
# limit conversation file imports to a certain number of bytes in size to avoid the container
|
||||
# maxing out memory limitations by unremarking this line and supplying a file size in bytes
|
||||
# such as the below example of 250 mib
|
||||
# CONVERSATION_IMPORT_MAX_FILE_SIZE_BYTES=262144000
|
||||
|
||||
|
||||
#===============#
|
||||
# REDIS Options #
|
||||
#===============#
|
||||
|
||||
# Enable Redis for caching and session storage
|
||||
# USE_REDIS=true
|
||||
# Enable Redis for resumable LLM streams (defaults to USE_REDIS value if not set)
|
||||
# Set to false to use in-memory storage for streams while keeping Redis for other caches
|
||||
# USE_REDIS_STREAMS=true
|
||||
|
||||
# Single Redis instance
|
||||
# REDIS_URI=redis://127.0.0.1:6379
|
||||
|
||||
# Redis cluster (multiple nodes)
|
||||
# REDIS_URI=redis://127.0.0.1:7001,redis://127.0.0.1:7002,redis://127.0.0.1:7003
|
||||
|
||||
# Redis with TLS/SSL encryption and CA certificate
|
||||
# REDIS_URI=rediss://127.0.0.1:6380
|
||||
# REDIS_CA=/path/to/ca-cert.pem
|
||||
|
||||
# Elasticache may need to use an alternate dnsLookup for TLS connections. see "Special Note: Aws Elasticache Clusters with TLS" on this webpage: https://www.npmjs.com/package/ioredis
|
||||
# Enable alternative dnsLookup for redis
|
||||
# REDIS_USE_ALTERNATIVE_DNS_LOOKUP=true
|
||||
|
||||
# Redis authentication (if required)
|
||||
# REDIS_USERNAME=your_redis_username
|
||||
# REDIS_PASSWORD=your_redis_password
|
||||
|
||||
# Redis key prefix configuration
|
||||
# Use environment variable name for dynamic prefix (recommended for cloud deployments)
|
||||
# REDIS_KEY_PREFIX_VAR=K_REVISION
|
||||
# Or use static prefix directly
|
||||
# REDIS_KEY_PREFIX=librechat
|
||||
|
||||
# Redis connection limits
|
||||
# REDIS_MAX_LISTENERS=40
|
||||
|
||||
# Redis ping interval in seconds (0 = disabled, >0 = enabled)
|
||||
# When set to a positive integer, Redis clients will ping the server at this interval to keep connections alive
|
||||
# When unset or 0, no pinging is performed (recommended for most use cases)
|
||||
# REDIS_PING_INTERVAL=300
|
||||
|
||||
# Force specific cache namespaces to use in-memory storage even when Redis is enabled
|
||||
# Comma-separated list of CacheKeys
|
||||
# Defaults to CONFIG_STORE,APP_CONFIG so YAML-derived config stays per-container (safe for blue/green deployments)
|
||||
# Set to empty string to force all namespaces through Redis: FORCED_IN_MEMORY_CACHE_NAMESPACES=
|
||||
# FORCED_IN_MEMORY_CACHE_NAMESPACES=CONFIG_STORE,APP_CONFIG
|
||||
|
||||
# Leader Election Configuration (for multi-instance deployments with Redis)
|
||||
# Duration in seconds that the leader lease is valid before it expires (default: 25)
|
||||
# LEADER_LEASE_DURATION=25
|
||||
# Interval in seconds at which the leader renews its lease (default: 10)
|
||||
# LEADER_RENEW_INTERVAL=10
|
||||
# Maximum number of retry attempts when renewing the lease fails (default: 3)
|
||||
# LEADER_RENEW_ATTEMPTS=3
|
||||
# Delay in seconds between retry attempts when renewing the lease (default: 0.5)
|
||||
# LEADER_RENEW_RETRY_DELAY=0.5
|
||||
|
||||
#==================================================#
|
||||
# Others #
|
||||
#==================================================#
|
||||
# You should leave the following commented out #
|
||||
|
||||
# NODE_ENV=
|
||||
|
||||
# E2E_USER_EMAIL=
|
||||
# E2E_USER_PASSWORD=
|
||||
|
||||
#=====================================================#
|
||||
# Cache Headers #
|
||||
#=====================================================#
|
||||
# Headers that control caching of the index.html #
|
||||
# Default configuration prevents caching to ensure #
|
||||
# users always get the latest version. Customize #
|
||||
# only if you understand caching implications. #
|
||||
|
||||
# INDEX_CACHE_CONTROL=no-cache, no-store, must-revalidate
|
||||
# INDEX_PRAGMA=no-cache
|
||||
# INDEX_EXPIRES=0
|
||||
|
||||
# no-cache: Forces validation with server before using cached version
|
||||
# no-store: Prevents storing the response entirely
|
||||
# must-revalidate: Prevents using stale content when offline
|
||||
|
||||
#=====================================================#
|
||||
# OpenWeather #
|
||||
#=====================================================#
|
||||
OPENWEATHER_API_KEY=
|
||||
|
||||
#====================================#
|
||||
# LibreChat Code Interpreter API #
|
||||
#====================================#
|
||||
|
||||
# https://code.librechat.ai
|
||||
# LIBRECHAT_CODE_API_KEY=your-key
|
||||
|
||||
#======================#
|
||||
# Web Search #
|
||||
#======================#
|
||||
|
||||
# Note: All of the following variable names can be customized.
|
||||
# Omit values to allow user to provide them.
|
||||
|
||||
# For more information on configuration values, see:
|
||||
# https://librechat.ai/docs/features/web_search
|
||||
|
||||
# Search Provider (Required)
|
||||
# SERPER_API_KEY=your_serper_api_key
|
||||
|
||||
# Scraper (Required)
|
||||
# FIRECRAWL_API_KEY=your_firecrawl_api_key
|
||||
# Optional: Custom Firecrawl API URL
|
||||
# FIRECRAWL_API_URL=your_firecrawl_api_url
|
||||
|
||||
# Reranker (Required)
|
||||
# JINA_API_KEY=your_jina_api_key
|
||||
# or
|
||||
# COHERE_API_KEY=your_cohere_api_key
|
||||
|
||||
#======================#
|
||||
# MCP Configuration #
|
||||
#======================#
|
||||
|
||||
# Treat 401/403 responses as OAuth requirement when no oauth metadata found
|
||||
# MCP_OAUTH_ON_AUTH_ERROR=true
|
||||
|
||||
# Timeout for OAuth detection requests in milliseconds
|
||||
# MCP_OAUTH_DETECTION_TIMEOUT=5000
|
||||
|
||||
# Cache connection status checks for this many milliseconds to avoid expensive verification
|
||||
# MCP_CONNECTION_CHECK_TTL=60000
|
||||
|
||||
# Skip code challenge method validation (e.g., for AWS Cognito that supports S256 but doesn't advertise it)
|
||||
# When set to true, forces S256 code challenge even if not advertised in .well-known/openid-configuration
|
||||
# MCP_SKIP_CODE_CHALLENGE_CHECK=false
|
||||
|
||||
# Circuit breaker: max connect/disconnect cycles before tripping (per server)
|
||||
# MCP_CB_MAX_CYCLES=7
|
||||
|
||||
# Circuit breaker: sliding window (ms) for counting cycles
|
||||
# MCP_CB_CYCLE_WINDOW_MS=45000
|
||||
|
||||
# Circuit breaker: cooldown (ms) after the cycle breaker trips
|
||||
# MCP_CB_CYCLE_COOLDOWN_MS=15000
|
||||
|
||||
# Circuit breaker: max consecutive failed connection rounds before backoff
|
||||
# MCP_CB_MAX_FAILED_ROUNDS=3
|
||||
|
||||
# Circuit breaker: sliding window (ms) for counting failed rounds
|
||||
# MCP_CB_FAILED_WINDOW_MS=120000
|
||||
|
||||
# Circuit breaker: base backoff (ms) after failed round threshold is reached
|
||||
# MCP_CB_BASE_BACKOFF_MS=30000
|
||||
|
||||
# Circuit breaker: max backoff cap (ms) for exponential backoff
|
||||
# MCP_CB_MAX_BACKOFF_MS=300000
|
||||
@@ -0,0 +1,8 @@
|
||||
# Auto-generated from pyproject.toml — do not edit manually
|
||||
ALFRED_VERSION=0.1.7
|
||||
PYTHON_VERSION=3.14.3
|
||||
IMAGE_NAME=alfred_media_organizer
|
||||
SERVICE_NAME=alfred
|
||||
LIBRECHAT_VERSION=v0.8.4
|
||||
RAG_VERSION=v0.7.3
|
||||
UV_VERSION=0.11.6
|
||||
+9
-1
@@ -55,7 +55,7 @@ coverage.xml
|
||||
Thumbs.db
|
||||
|
||||
# Secrets
|
||||
.env
|
||||
.env.secrets
|
||||
|
||||
# Backup files
|
||||
*.backup
|
||||
@@ -65,3 +65,11 @@ data/*
|
||||
|
||||
# Application logs
|
||||
logs/*
|
||||
|
||||
# Documentation folder
|
||||
docs/
|
||||
|
||||
# .md files
|
||||
*.md
|
||||
|
||||
#
|
||||
|
||||
+30
-61
@@ -2,46 +2,36 @@
|
||||
# check=skip=InvalidDefaultArgInFrom
|
||||
|
||||
ARG PYTHON_VERSION
|
||||
ARG PYTHON_VERSION_SHORT
|
||||
ARG RUNNER
|
||||
ARG UV_VERSION
|
||||
|
||||
# Stage 0: uv binary (workaround — --from doesn't support ARG expansion)
|
||||
FROM ghcr.io/astral-sh/uv:${UV_VERSION} AS uv-bin
|
||||
|
||||
# ===========================================
|
||||
# Stage 1: Builder
|
||||
# ===========================================
|
||||
FROM python:${PYTHON_VERSION}-slim-bookworm AS builder
|
||||
|
||||
# Re-declare ARGs after FROM to make them available in this stage
|
||||
ARG RUNNER
|
||||
|
||||
# STFU - No need - Write logs asap
|
||||
ENV DEBIAN_FRONTEND=noninteractive \
|
||||
PYTHONDONTWRITEBYTECODE=1 \
|
||||
PYTHONUNBUFFERED=1
|
||||
PYTHONUNBUFFERED=1 \
|
||||
UV_PROJECT_ENVIRONMENT=/venv
|
||||
|
||||
# Install build dependencies (needs root)
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
build-essential \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
# Install build dependencies
|
||||
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
|
||||
--mount=type=cache,target=/var/lib/apt,sharing=locked \
|
||||
apt-get update \
|
||||
&& apt-get install -y --no-install-recommends build-essential
|
||||
|
||||
# Install runner globally (needs root) - Save cache for future
|
||||
RUN --mount=type=cache,target=/root/.cache/pip \
|
||||
pip install $RUNNER
|
||||
# Install uv globally
|
||||
COPY --from=uv-bin /uv /usr/local/bin/uv
|
||||
|
||||
# Set working directory for dependency installation
|
||||
WORKDIR /tmp
|
||||
|
||||
# Copy dependency files
|
||||
COPY pyproject.toml poetry.lock* uv.lock* Makefile ./
|
||||
COPY pyproject.toml uv.lock Makefile ./
|
||||
|
||||
# Install dependencies as root (to avoid permission issues with system packages)
|
||||
RUN --mount=type=cache,target=/root/.cache/pip \
|
||||
--mount=type=cache,target=/root/.cache/pypoetry \
|
||||
--mount=type=cache,target=/root/.cache/uv \
|
||||
if [ "$RUNNER" = "poetry" ]; then \
|
||||
poetry config virtualenvs.create false && \
|
||||
poetry install --only main --no-root; \
|
||||
elif [ "$RUNNER" = "uv" ]; then \
|
||||
uv pip install --system -r pyproject.toml; \
|
||||
fi
|
||||
# Install dependencies into /venv
|
||||
RUN --mount=type=cache,target=/root/.cache/uv uv sync
|
||||
|
||||
COPY scripts/ ./scripts/
|
||||
COPY .env.example ./
|
||||
@@ -51,16 +41,7 @@ COPY .env.example ./
|
||||
# ===========================================
|
||||
FROM builder AS test
|
||||
|
||||
ARG RUNNER
|
||||
|
||||
RUN --mount=type=cache,target=/root/.cache/pip \
|
||||
--mount=type=cache,target=/root/.cache/pypoetry \
|
||||
--mount=type=cache,target=/root/.cache/uv \
|
||||
if [ "$RUNNER" = "poetry" ]; then \
|
||||
poetry install --no-root; \
|
||||
elif [ "$RUNNER" = "uv" ]; then \
|
||||
uv pip install --system -e .[dev]; \
|
||||
fi
|
||||
RUN --mount=type=cache,target=/root/.cache/uv uv sync --group dev
|
||||
|
||||
COPY alfred/ ./alfred
|
||||
COPY scripts ./scripts
|
||||
@@ -71,51 +52,39 @@ COPY tests/ ./tests
|
||||
# ===========================================
|
||||
FROM python:${PYTHON_VERSION}-slim-bookworm AS runtime
|
||||
|
||||
ARG PYTHON_VERSION_SHORT
|
||||
|
||||
# TODO: A-t-on encore besoin de toutes les clés ?
|
||||
ENV LLM_PROVIDER=deepseek \
|
||||
MEMORY_STORAGE_DIR=/data/memory \
|
||||
PYTHONDONTWRITEBYTECODE=1 \
|
||||
ENV PYTHONDONTWRITEBYTECODE=1 \
|
||||
PYTHONUNBUFFERED=1 \
|
||||
PYTHONPATH=/home/appuser \
|
||||
PYTHONUNBUFFERED=1
|
||||
PATH="/venv/bin:$PATH"
|
||||
|
||||
# Install runtime dependencies (needs root)
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
ca-certificates \
|
||||
&& rm -rf /var/lib/apt/lists/* \
|
||||
&& apt-get clean
|
||||
# Install runtime dependencies
|
||||
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
|
||||
--mount=type=cache,target=/var/lib/apt,sharing=locked \
|
||||
apt-get update \
|
||||
&& apt-get install -y --no-install-recommends ca-certificates
|
||||
|
||||
# Create non-root user
|
||||
RUN useradd -m -u 1000 -s /bin/bash appuser
|
||||
|
||||
# Create data directories (needs root for /data)
|
||||
# Create data directories
|
||||
RUN mkdir -p /data /logs \
|
||||
&& chown -R appuser:appuser /data /logs
|
||||
|
||||
# Switch to non-root user
|
||||
USER appuser
|
||||
|
||||
# Set working directory (owned by appuser)
|
||||
WORKDIR /home/appuser
|
||||
|
||||
# Copy Python packages from builder stage
|
||||
COPY --from=builder /usr/local/lib/python${PYTHON_VERSION_SHORT}/site-packages /usr/local/lib/python${PYTHON_VERSION_SHORT}/site-packages
|
||||
COPY --from=builder /usr/local/bin /usr/local/bin
|
||||
# Copy venv from builder stage
|
||||
COPY --from=builder /venv /venv
|
||||
|
||||
# Copy application code (already owned by appuser)
|
||||
# Copy application code
|
||||
COPY --chown=appuser:appuser alfred/ ./alfred
|
||||
COPY --chown=appuser:appuser scripts/ ./scripts
|
||||
COPY --chown=appuser:appuser .env.example ./
|
||||
COPY --chown=appuser:appuser pyproject.toml ./
|
||||
|
||||
# Create volumes for persistent data
|
||||
VOLUME ["/data", "/logs"]
|
||||
|
||||
# Expose port
|
||||
EXPOSE 8000
|
||||
|
||||
# Health check
|
||||
HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
|
||||
CMD python -c "import requests; requests.get('http://localhost:8000/health', timeout=5).raise_for_status()" || exit 1
|
||||
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
.DEFAULT_GOAL := help
|
||||
|
||||
# --- Load Config from pyproject.toml ---
|
||||
export
|
||||
-include .env.make
|
||||
|
||||
# --- Profiles management ---
|
||||
@@ -9,27 +10,29 @@ p ?= full
|
||||
PROFILES_PARAM := COMPOSE_PROFILES=$(p)
|
||||
|
||||
# --- Commands ---
|
||||
DOCKER_COMPOSE := docker compose
|
||||
DOCKER_BUILD := docker build --no-cache \
|
||||
DOCKER_COMPOSE := docker compose \
|
||||
--env-file .env.alfred \
|
||||
--env-file .env.secrets \
|
||||
--env-file .env.make
|
||||
DOCKER_BUILD := DOCKER_BUILDKIT=1 docker build \
|
||||
--build-arg PYTHON_VERSION=$(PYTHON_VERSION) \
|
||||
--build-arg PYTHON_VERSION_SHORT=$(PYTHON_VERSION_SHORT) \
|
||||
--build-arg RUNNER=$(RUNNER)
|
||||
--build-arg UV_VERSION=$(UV_VERSION)
|
||||
|
||||
# --- Phony ---
|
||||
.PHONY: .env bootstrap up down restart logs ps shell build build-test install \
|
||||
.PHONY: bootstrap up down restart logs ps shell build build-test install \
|
||||
update install-hooks test coverage lint format clean major minor patch help
|
||||
|
||||
# --- Setup ---
|
||||
.env .env.make:
|
||||
.env.alfred .env.librechat .env.secrets .env.make:
|
||||
@echo "Initializing environment..."
|
||||
@python scripts/bootstrap.py \
|
||||
@uv run python scripts/bootstrap.py \
|
||||
&& echo "✓ Environment ready" \
|
||||
|| (echo "✗ Environment setup failed" && exit 1)
|
||||
|
||||
bootstrap: .env .env.make
|
||||
bootstrap: .env.alfred .env.librechat .env.secrets .env.make
|
||||
|
||||
# --- Docker ---
|
||||
up: .env
|
||||
up: .env.alfred .env.secrets
|
||||
@echo "Starting containers with profiles: [full]..."
|
||||
@$(PROFILES_PARAM) $(DOCKER_COMPOSE) up -d --remove-orphans \
|
||||
&& echo "✓ Containers started" \
|
||||
@@ -74,45 +77,45 @@ build-test: .env.make
|
||||
|
||||
# --- Dependencies ---
|
||||
install:
|
||||
@echo "Installing dependencies with $(RUNNER)..."
|
||||
@$(RUNNER) install \
|
||||
@echo "Installing dependencies with uv..."
|
||||
@uv install \
|
||||
&& echo "✓ Dependencies installed" \
|
||||
|| (echo "✗ Installation failed" && exit 1)
|
||||
|
||||
install-hooks:
|
||||
@echo "Installing pre-commit hooks..."
|
||||
@$(RUNNER) run pre-commit install \
|
||||
@uv run pre-commit install \
|
||||
&& echo "✓ Hooks installed" \
|
||||
|| (echo "✗ Hook installation failed" && exit 1)
|
||||
|
||||
update:
|
||||
@echo "Updating dependencies with $(RUNNER)..."
|
||||
@$(RUNNER) update \
|
||||
@echo "Updating dependencies with uv..."
|
||||
@uv update \
|
||||
&& echo "✓ Dependencies updated" \
|
||||
|| (echo "✗ Update failed" && exit 1)
|
||||
|
||||
# --- Quality ---
|
||||
test:
|
||||
@echo "Running tests..."
|
||||
@$(RUNNER) run pytest \
|
||||
@uv run pytest \
|
||||
&& echo "✓ Tests passed" \
|
||||
|| (echo "✗ Tests failed" && exit 1)
|
||||
|
||||
coverage:
|
||||
@echo "Running tests with coverage..."
|
||||
@$(RUNNER) run pytest --cov=. --cov-report=html --cov-report=term \
|
||||
@uv run pytest --cov=. --cov-report=html --cov-report=term \
|
||||
&& echo "✓ Coverage report generated" \
|
||||
|| (echo "✗ Coverage failed" && exit 1)
|
||||
|
||||
lint:
|
||||
@echo "Linting code..."
|
||||
@$(RUNNER) run ruff check --fix . \
|
||||
@uv run ruff check --fix . \
|
||||
&& echo "✓ Linting complete" \
|
||||
|| (echo "✗ Linting failed" && exit 1)
|
||||
|
||||
format:
|
||||
@echo "Formatting code..."
|
||||
@$(RUNNER) run ruff format . && $(RUNNER) run ruff check --fix . \
|
||||
@uv run ruff format . && uv run ruff check --fix . \
|
||||
&& echo "✓ Code formatted" \
|
||||
|| (echo "✗ Formatting failed" && exit 1)
|
||||
|
||||
@@ -125,7 +128,7 @@ clean:
|
||||
# --- Versioning ---
|
||||
major minor patch: _check-main
|
||||
@echo "Bumping $@ version..."
|
||||
@$(RUNNER) run bump-my-version bump $@ \
|
||||
@uv run bump-my-version bump $@ \
|
||||
&& echo "✓ Version bumped" \
|
||||
|| (echo "✗ Version bump failed" && exit 1)
|
||||
|
||||
@@ -138,8 +141,7 @@ major minor patch: _check-main
|
||||
_ci-dump-config:
|
||||
@echo "image_name=$(IMAGE_NAME)"
|
||||
@echo "python_version=$(PYTHON_VERSION)"
|
||||
@echo "python_version_short=$(PYTHON_VERSION_SHORT)"
|
||||
@echo "runner=$(RUNNER)"
|
||||
@echo "uv_version=$(UV_VERSION)"
|
||||
@echo "service_name=$(SERVICE_NAME)"
|
||||
|
||||
_ci-run-tests:build-test
|
||||
@@ -161,6 +163,9 @@ help:
|
||||
@echo ""
|
||||
@echo "Usage: make [target] [p=profile1,profile2]"
|
||||
@echo ""
|
||||
@echo "Setup:"
|
||||
@echo " bootstrap Generate .env.alfred, .env.librechat, .env.secrets and .env.make"
|
||||
@echo ""
|
||||
@echo "Docker:"
|
||||
@echo " up Start containers (default profile: core)"
|
||||
@echo " Example: make up p=rag,meili"
|
||||
@@ -173,7 +178,7 @@ help:
|
||||
@echo ""
|
||||
@echo "Dev & Quality:"
|
||||
@echo " setup Bootstrap .env and security keys"
|
||||
@echo " install Install dependencies via $(RUNNER)"
|
||||
@echo " install Install dependencies via uv"
|
||||
@echo " test Run pytest suite"
|
||||
@echo " coverage Run tests and generate HTML report"
|
||||
@echo " lint/format Quality and style checks"
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
An AI-powered agent for managing your local media library with natural language. Search, download, and organize movies and TV shows effortlessly through a conversational interface.
|
||||
|
||||
[](https://www.python.org/downloads/)
|
||||
[](https://python-poetry.org/)
|
||||
[](https://github.com/astral-sh/uv)
|
||||
[](https://opensource.org/licenses/MIT)
|
||||
[](https://github.com/astral-sh/ruff)
|
||||
|
||||
@@ -13,9 +13,10 @@ An AI-powered agent for managing your local media library with natural language.
|
||||
- 🔍 **Smart Search** — Find movies and TV shows via TMDB with rich metadata
|
||||
- 📥 **Torrent Integration** — Search and download via qBittorrent
|
||||
- 🧠 **Contextual Memory** — Remembers your preferences and conversation history
|
||||
- 📁 **Auto-Organization** — Keeps your media library tidy and well-structured
|
||||
- 🌐 **OpenAI-Compatible API** — Works with any OpenAI-compatible client
|
||||
- 🖥️ **LibreChat Frontend** — Beautiful web UI included out of the box
|
||||
- 📁 **Auto-Organization** — Moves and renames media files, resolves destinations, handles subtitles
|
||||
- 🎞️ **Subtitle Pipeline** — Identifies, matches, and places subtitle tracks automatically
|
||||
- 🔄 **Workflow Engine** — YAML-defined multi-step workflows (e.g. `organize_media`)
|
||||
- 🌐 **OpenAI-Compatible API** — Works with any OpenAI-compatible client (LibreChat, OpenWebUI, etc.)
|
||||
- 🔒 **Secure by Default** — Auto-generated secrets and encrypted credentials
|
||||
|
||||
## 🏗️ Architecture
|
||||
@@ -26,33 +27,50 @@ Built with **Domain-Driven Design (DDD)** principles for clean separation of con
|
||||
alfred/
|
||||
├── agent/ # AI agent orchestration
|
||||
│ ├── llm/ # LLM clients (Ollama, DeepSeek)
|
||||
│ └── tools/ # Tool implementations
|
||||
│ ├── tools/ # Tool implementations (api, filesystem, language)
|
||||
│ └── workflows/ # YAML-defined multi-step workflows
|
||||
├── application/ # Use cases & DTOs
|
||||
│ ├── movies/ # Movie search use cases
|
||||
│ ├── movies/ # Movie search
|
||||
│ ├── torrents/ # Torrent management
|
||||
│ └── filesystem/ # File operations
|
||||
│ └── filesystem/ # File operations (move, list, subtitles, seed links)
|
||||
├── domain/ # Business logic & entities
|
||||
│ ├── media/ # Release parsing
|
||||
│ ├── movies/ # Movie entities
|
||||
│ ├── tv_shows/ # TV show entities
|
||||
│ └── subtitles/ # Subtitle entities
|
||||
│ ├── tv_shows/ # TV show entities & value objects
|
||||
│ ├── subtitles/ # Subtitle scanner, services, knowledge base
|
||||
│ └── shared/ # Common value objects (ImdbId, FilePath, FileSize)
|
||||
└── infrastructure/ # External services & persistence
|
||||
├── api/ # External API clients (TMDB, qBittorrent)
|
||||
├── filesystem/ # File system operations
|
||||
└── persistence/ # Memory & repositories
|
||||
├── api/ # External API clients (TMDB, qBittorrent, Knaben)
|
||||
├── filesystem/ # File manager (hard-link based, path-traversal safe)
|
||||
├── persistence/ # Three-tier memory (LTM/STM/Episodic) + JSON repositories
|
||||
└── subtitle/ # Subtitle infrastructure
|
||||
```
|
||||
|
||||
See [docs/architecture_diagram.md](docs/architecture_diagram.md) for detailed architectural diagrams.
|
||||
### Key flows
|
||||
|
||||
**Agent execution:** `agent.step(user_input)` → LLM call → if tool_calls, execute each via registry → loop until no tool calls or `max_tool_iterations` → return final response.
|
||||
|
||||
**Media organization workflow:**
|
||||
1. `resolve_destination` — Determines target folder/filename from release name
|
||||
2. `move_media` — Hard-links file to library, deletes source
|
||||
3. `manage_subtitles` — Scans, classifies, and places subtitle tracks
|
||||
4. `create_seed_links` — Hard-links library file back to torrents/ for continued seeding
|
||||
|
||||
**Memory tiers:**
|
||||
- **LTM** (`data/memory/ltm.json`) — Persisted config, media library, watchlist
|
||||
- **STM** — Conversation history (capped at `MAX_HISTORY_MESSAGES`)
|
||||
- **Episodic** — Transient search results, active downloads, recent errors
|
||||
|
||||
## 🚀 Quick Start
|
||||
|
||||
### Prerequisites
|
||||
|
||||
- **Python 3.14+** (required)
|
||||
- **Poetry** (dependency manager)
|
||||
- **Python 3.14+**
|
||||
- **uv** (dependency manager)
|
||||
- **Docker & Docker Compose** (recommended for full stack)
|
||||
- **API Keys:**
|
||||
- TMDB API key ([get one here](https://www.themoviedb.org/settings/api))
|
||||
- Optional: DeepSeek, OpenAI, Anthropic, or other LLM provider keys
|
||||
- Optional: DeepSeek or other LLM provider keys
|
||||
|
||||
### Installation
|
||||
|
||||
@@ -64,9 +82,15 @@ cd alfred_media_organizer
|
||||
# Install dependencies
|
||||
make install
|
||||
|
||||
# Install pre-commit hooks
|
||||
make install-hooks
|
||||
|
||||
# Bootstrap environment (generates .env with secure secrets)
|
||||
make bootstrap
|
||||
|
||||
# Validate your .env against the schema
|
||||
make validate
|
||||
|
||||
# Edit .env with your API keys
|
||||
nano .env
|
||||
```
|
||||
@@ -94,162 +118,95 @@ The web interface will be available at **http://localhost:3080**
|
||||
### Running Locally (Development)
|
||||
|
||||
```bash
|
||||
# Install dependencies
|
||||
poetry install
|
||||
|
||||
# Start the API server
|
||||
poetry run uvicorn alfred.app:app --reload --port 8000
|
||||
uv run uvicorn alfred.app:app --reload --port 8000
|
||||
```
|
||||
|
||||
## ⚙️ Configuration
|
||||
|
||||
### Environment Bootstrap
|
||||
### Settings system
|
||||
|
||||
Alfred uses a smart bootstrap system that:
|
||||
`settings.toml` is the single source of truth. The schema flows:
|
||||
|
||||
1. **Generates secure secrets** automatically (JWT tokens, database passwords, encryption keys)
|
||||
2. **Syncs build variables** from `pyproject.toml` (versions, image names)
|
||||
3. **Preserves existing secrets** when re-running (never overwrites your API keys)
|
||||
4. **Computes database URIs** automatically from individual components
|
||||
```
|
||||
settings.toml → settings_schema.py → settings_bootstrap.py → .env + .env.make → settings.py
|
||||
```
|
||||
|
||||
To add a setting: define it in `settings.toml`, run `make bootstrap`, then access via `settings.my_new_setting`.
|
||||
|
||||
```bash
|
||||
# First time setup
|
||||
make bootstrap
|
||||
|
||||
# Re-run after updating pyproject.toml (secrets are preserved)
|
||||
# Validate existing .env against schema
|
||||
make validate
|
||||
|
||||
# Re-run after settings.toml changes (existing secrets preserved)
|
||||
make bootstrap
|
||||
```
|
||||
|
||||
### Configuration File (.env)
|
||||
**Never commit `.env` or `.env.make`** — both are gitignored and auto-generated.
|
||||
|
||||
The `.env` file is generated from `.env.example` with secure defaults:
|
||||
### Key settings (.env)
|
||||
|
||||
```bash
|
||||
# --- CORE SETTINGS ---
|
||||
HOST=0.0.0.0
|
||||
PORT=3080
|
||||
# --- CORE ---
|
||||
MAX_HISTORY_MESSAGES=10
|
||||
MAX_TOOL_ITERATIONS=10
|
||||
|
||||
# --- LLM CONFIGURATION ---
|
||||
# Providers: 'local' (Ollama), 'deepseek', 'openai', 'anthropic', 'google'
|
||||
DEFAULT_LLM_PROVIDER=local
|
||||
|
||||
# Local LLM (Ollama - included in Docker stack)
|
||||
# --- LLM ---
|
||||
DEFAULT_LLM_PROVIDER=local # local (Ollama) | deepseek
|
||||
OLLAMA_BASE_URL=http://ollama:11434
|
||||
OLLAMA_MODEL=llama3.3:latest
|
||||
LLM_TEMPERATURE=0.2
|
||||
|
||||
# --- API KEYS (fill only what you need) ---
|
||||
TMDB_API_KEY=your-tmdb-key-here # Required for movie search
|
||||
DEEPSEEK_API_KEY= # Optional
|
||||
OPENAI_API_KEY= # Optional
|
||||
ANTHROPIC_API_KEY= # Optional
|
||||
# --- API KEYS ---
|
||||
TMDB_API_KEY=your-tmdb-key # Required for movie/show search
|
||||
DEEPSEEK_API_KEY= # Optional
|
||||
|
||||
# --- SECURITY (auto-generated, don't modify) ---
|
||||
JWT_SECRET=<auto-generated>
|
||||
JWT_REFRESH_SECRET=<auto-generated>
|
||||
CREDS_KEY=<auto-generated>
|
||||
CREDS_IV=<auto-generated>
|
||||
|
||||
# --- DATABASES (auto-generated passwords) ---
|
||||
MONGO_PASSWORD=<auto-generated>
|
||||
POSTGRES_PASSWORD=<auto-generated>
|
||||
# --- SECURITY (auto-generated) ---
|
||||
JWT_SECRET=<auto>
|
||||
CREDS_KEY=<auto>
|
||||
MONGO_PASSWORD=<auto>
|
||||
```
|
||||
|
||||
### Security Keys
|
||||
|
||||
Security keys are defined in `pyproject.toml` and generated automatically:
|
||||
|
||||
```toml
|
||||
[tool.alfred.security]
|
||||
jwt_secret = "32:b64" # 32 bytes, base64 URL-safe
|
||||
jwt_refresh_secret = "32:b64"
|
||||
creds_key = "32:hex" # 32 bytes, hexadecimal (AES-256)
|
||||
creds_iv = "16:hex" # 16 bytes, hexadecimal (AES IV)
|
||||
mongo_password = "16:hex"
|
||||
postgres_password = "16:hex"
|
||||
```
|
||||
|
||||
**Formats:**
|
||||
- `b64` — Base64 URL-safe (for JWT tokens)
|
||||
- `hex` — Hexadecimal (for encryption keys, passwords)
|
||||
|
||||
## 🐳 Docker Services
|
||||
|
||||
### Service Architecture
|
||||
|
||||
```
|
||||
┌─────────────────────────────────────────────────────────────┐
|
||||
│ alfred-net (bridge) │
|
||||
├─────────────────────────────────────────────────────────────┤
|
||||
│ │
|
||||
│ ┌──────────────┐ ┌──────────────┐ ┌──────────────┐ │
|
||||
│ │ LibreChat │───▶│ Alfred │───▶│ MongoDB │ │
|
||||
│ │ :3080 │ │ (core) │ │ :27017 │ │
|
||||
│ └──────────────┘ └──────────────┘ └──────────────┘ │
|
||||
│ │ │ │
|
||||
│ │ ▼ │
|
||||
│ │ ┌──────────────┐ │
|
||||
│ │ │ Ollama │ │
|
||||
│ │ │ (local) │ │
|
||||
│ │ └──────────────┘ │
|
||||
│ │ │
|
||||
│ ┌──────┴───────────────────────────────────────────────┐ │
|
||||
│ │ Optional Services (profiles) │ │
|
||||
│ ├──────────────┬──────────────┬──────────────┬─────────┤ │
|
||||
│ │ Meilisearch │ RAG API │ VectorDB │qBittor- │ │
|
||||
│ │ :7700 │ :8000 │ :5432 │ rent │ │
|
||||
│ │ [meili] │ [rag] │ [rag] │[qbit..] │ │
|
||||
│ └──────────────┴──────────────┴──────────────┴─────────┘ │
|
||||
│ │
|
||||
└─────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
### Docker Profiles
|
||||
|
||||
| Profile | Services | Use Case |
|
||||
|---------|----------|----------|
|
||||
| (default) | LibreChat, Alfred, MongoDB, Ollama | Basic setup |
|
||||
| `meili` | + Meilisearch | Fast search |
|
||||
| `rag` | + RAG API, VectorDB | Document retrieval |
|
||||
| `qbittorrent` | + qBittorrent | Torrent downloads |
|
||||
| `full` | All services | Complete setup |
|
||||
| Profile | Extra services | Use case |
|
||||
|---------|---------------|----------|
|
||||
| (default) | — | LibreChat + Alfred + MongoDB + Ollama |
|
||||
| `meili` | Meilisearch | Fast full-text search |
|
||||
| `rag` | RAG API + VectorDB (PostgreSQL) | Document retrieval |
|
||||
| `qbittorrent` | qBittorrent | Torrent downloads |
|
||||
| `full` | All of the above | Complete setup |
|
||||
|
||||
```bash
|
||||
# Start with specific profiles
|
||||
make up p=rag,meili
|
||||
make up p=full
|
||||
```
|
||||
|
||||
### Docker Commands
|
||||
|
||||
```bash
|
||||
make up # Start containers (default profile)
|
||||
make up # Start (default profile)
|
||||
make up p=full # Start with all services
|
||||
make down # Stop all containers
|
||||
make restart # Restart containers
|
||||
make down # Stop
|
||||
make restart # Restart
|
||||
make logs # Follow logs
|
||||
make ps # Show container status
|
||||
make shell # Open bash in Alfred container
|
||||
make build # Build production image
|
||||
make build-test # Build test image
|
||||
make ps # Container status
|
||||
```
|
||||
|
||||
## 🛠️ Available Tools
|
||||
|
||||
The agent has access to these tools for interacting with your media library:
|
||||
|
||||
| Tool | Description |
|
||||
|------|-------------|
|
||||
| `find_media_imdb_id` | Search for movies/TV shows on TMDB by title |
|
||||
| `find_torrent` | Search for torrents across multiple indexers |
|
||||
| `get_torrent_by_index` | Get detailed info about a specific torrent result |
|
||||
| `add_torrent_by_index` | Download a torrent by its index in search results |
|
||||
| `get_torrent_by_index` | Get detailed info about a specific result |
|
||||
| `add_torrent_by_index` | Download a torrent from search results |
|
||||
| `add_torrent_to_qbittorrent` | Add a torrent via magnet link directly |
|
||||
| `set_path_for_folder` | Configure folder paths for media organization |
|
||||
| `list_folder` | List contents of a folder |
|
||||
| `set_language` | Set preferred language for searches |
|
||||
| `resolve_destination` | Compute the target library path for a release |
|
||||
| `move_media` | Hard-link a file to its library destination |
|
||||
| `manage_subtitles` | Scan, classify, and place subtitle tracks |
|
||||
| `create_seed_links` | Prepare torrent folder so qBittorrent keeps seeding |
|
||||
| `learn` | Teach Alfred a new pattern (release group, naming convention) |
|
||||
| `set_path_for_folder` | Configure folder paths |
|
||||
| `list_folder` | List contents of a configured folder |
|
||||
| `set_language` | Set preferred language for the session |
|
||||
|
||||
## 💬 Usage Examples
|
||||
|
||||
@@ -266,11 +223,12 @@ Alfred: I found 3 torrents for Inception (2010):
|
||||
|
||||
You: Download the first one
|
||||
Alfred: ✓ Added to qBittorrent! Download started.
|
||||
Saving to: /downloads/Movies/Inception (2010)/
|
||||
|
||||
You: What's downloading right now?
|
||||
Alfred: You have 1 active download:
|
||||
- Inception.2010.1080p.BluRay.x264 (45% complete, ETA: 12 min)
|
||||
You: Organize the Breaking Bad S01 download
|
||||
Alfred: ✓ Resolved destination: /tv_shows/Breaking.Bad/Season 01/
|
||||
✓ Moved 6 episode files
|
||||
✓ Placed 6 subtitle tracks (fr, en)
|
||||
✓ Seed links created in /torrents/
|
||||
```
|
||||
|
||||
### Via API
|
||||
@@ -279,219 +237,147 @@ Alfred: You have 1 active download:
|
||||
# Health check
|
||||
curl http://localhost:8000/health
|
||||
|
||||
# Chat with the agent (OpenAI-compatible)
|
||||
# Chat (OpenAI-compatible)
|
||||
curl -X POST http://localhost:8000/v1/chat/completions \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"model": "alfred",
|
||||
"messages": [
|
||||
{"role": "user", "content": "Find The Matrix 4K"}
|
||||
]
|
||||
"messages": [{"role": "user", "content": "Find The Matrix 4K"}]
|
||||
}'
|
||||
|
||||
# List available models
|
||||
# List models
|
||||
curl http://localhost:8000/v1/models
|
||||
|
||||
# View memory state (debug)
|
||||
# View memory state
|
||||
curl http://localhost:8000/memory/state
|
||||
|
||||
# Clear session memory
|
||||
curl -X POST http://localhost:8000/memory/clear-session
|
||||
```
|
||||
|
||||
### Via OpenWebUI or Other Clients
|
||||
|
||||
Alfred is compatible with any OpenAI-compatible client:
|
||||
|
||||
1. Add as OpenAI-compatible endpoint: `http://localhost:8000/v1`
|
||||
2. Model name: `alfred`
|
||||
3. No API key required (or use any placeholder)
|
||||
Alfred is compatible with any OpenAI-compatible client. Point it at `http://localhost:8000/v1`, model `alfred`.
|
||||
|
||||
## 🧠 Memory System
|
||||
|
||||
Alfred uses a three-tier memory system for context management:
|
||||
Alfred uses a three-tier memory system:
|
||||
|
||||
### Long-Term Memory (LTM)
|
||||
- **Persistent** — Saved to JSON files
|
||||
- **Contents:** Configuration, user preferences, media library state
|
||||
- **Survives:** Application restarts
|
||||
|
||||
### Short-Term Memory (STM)
|
||||
- **Session-based** — Stored in RAM
|
||||
- **Contents:** Conversation history, current workflow state
|
||||
- **Cleared:** On session end or restart
|
||||
|
||||
### Episodic Memory
|
||||
- **Transient** — Stored in RAM
|
||||
- **Contents:** Search results, active downloads, recent errors
|
||||
- **Cleared:** Frequently, after task completion
|
||||
| Tier | Storage | Contents | Lifetime |
|
||||
|------|---------|----------|----------|
|
||||
| **LTM** | JSON file (`data/memory/ltm.json`) | Config, library, watchlist, learned patterns | Permanent |
|
||||
| **STM** | RAM | Conversation history (capped) | Session |
|
||||
| **Episodic** | RAM | Search results, active downloads, errors | Short-lived |
|
||||
|
||||
## 🧪 Development
|
||||
|
||||
### Project Setup
|
||||
|
||||
```bash
|
||||
# Install all dependencies (including dev)
|
||||
poetry install
|
||||
|
||||
# Install pre-commit hooks
|
||||
make install-hooks
|
||||
|
||||
# Run the development server
|
||||
poetry run uvicorn alfred.app:app --reload
|
||||
```
|
||||
|
||||
### Running Tests
|
||||
|
||||
```bash
|
||||
# Run all tests (parallel execution)
|
||||
# Run full suite (parallel)
|
||||
make test
|
||||
|
||||
# Run with coverage report
|
||||
make coverage
|
||||
|
||||
# Run specific test file
|
||||
poetry run pytest tests/test_agent.py -v
|
||||
# Run a single file
|
||||
uv run pytest tests/test_agent.py -v
|
||||
|
||||
# Run specific test
|
||||
poetry run pytest tests/test_config_loader.py::TestBootstrapEnv -v
|
||||
# Run a single class
|
||||
uv run pytest tests/test_agent.py::TestAgentInit -v
|
||||
|
||||
# Skip slow tests
|
||||
uv run pytest -m "not slow"
|
||||
```
|
||||
|
||||
### Test coverage
|
||||
|
||||
The suite covers:
|
||||
- **Agent loop** — tool execution, history, max iterations, error handling
|
||||
- **Tool registry** — OpenAI schema format, parameter extraction
|
||||
- **Prompts** — system prompt building, tool inclusion
|
||||
- **Memory** — LTM/STM/Episodic operations, persistence
|
||||
- **Filesystem tools** — path traversal security, folder listing
|
||||
- **File manager** — hard-link, move, seed links (real filesystem, no mocks)
|
||||
- **Application use cases** — `resolve_destination`, `create_seed_links`, `list_folder`, `move_media`
|
||||
- **Domain** — TV show/movie entities, shared value objects (`ImdbId`, `FilePath`, `FileSize`), subtitle scanner
|
||||
- **Repositories** — JSON-backed movie, TV show, subtitle repos
|
||||
- **Bootstrap** — secret generation, idempotency, URI construction
|
||||
- **Workflows** — YAML loading, structure validation
|
||||
- **Configuration** — boundary validation for all settings
|
||||
|
||||
### Code Quality
|
||||
|
||||
```bash
|
||||
# Lint and auto-fix
|
||||
make lint
|
||||
|
||||
# Format code
|
||||
make format
|
||||
|
||||
# Clean build artifacts
|
||||
make clean
|
||||
make lint # Ruff check --fix
|
||||
make format # Ruff format + check --fix
|
||||
```
|
||||
|
||||
### Adding a New Tool
|
||||
|
||||
1. **Create the tool function** in `alfred/agent/tools/`:
|
||||
1. Implement the function in `alfred/agent/tools/`:
|
||||
|
||||
```python
|
||||
# alfred/agent/tools/api.py
|
||||
def my_new_tool(param: str) -> dict[str, Any]:
|
||||
"""
|
||||
Short description of what this tool does.
|
||||
|
||||
This will be shown to the LLM to help it decide when to use this tool.
|
||||
"""
|
||||
"""Short description shown to the LLM to decide when to call this tool."""
|
||||
memory = get_memory()
|
||||
|
||||
# Your implementation here
|
||||
result = do_something(param)
|
||||
|
||||
return {
|
||||
"status": "success",
|
||||
"data": result
|
||||
}
|
||||
# ...
|
||||
return {"status": "ok", "data": result}
|
||||
```
|
||||
|
||||
2. **Register in the registry** (`alfred/agent/registry.py`):
|
||||
2. Register it in `alfred/agent/registry.py`:
|
||||
|
||||
```python
|
||||
tool_functions = [
|
||||
# ... existing tools ...
|
||||
api_tools.my_new_tool, # Add your tool here
|
||||
api_tools.my_new_tool,
|
||||
]
|
||||
```
|
||||
|
||||
The tool will be automatically registered with its parameters extracted from the function signature.
|
||||
The registry auto-generates the JSON schema from the function signature and docstring.
|
||||
|
||||
### Adding a Workflow
|
||||
|
||||
Create a YAML file in `alfred/agent/workflows/`:
|
||||
|
||||
```yaml
|
||||
name: my_workflow
|
||||
description: What this workflow does
|
||||
steps:
|
||||
- tool: resolve_destination
|
||||
description: Find where the file should go
|
||||
- tool: move_media
|
||||
description: Move the file
|
||||
```
|
||||
|
||||
Workflows are loaded automatically at startup.
|
||||
|
||||
### Version Management
|
||||
|
||||
```bash
|
||||
# Bump version (must be on main branch)
|
||||
make patch # 0.1.7 -> 0.1.8
|
||||
make minor # 0.1.7 -> 0.2.0
|
||||
make major # 0.1.7 -> 1.0.0
|
||||
# Must be on main branch
|
||||
make patch # 0.1.7 → 0.1.8
|
||||
make minor # 0.1.7 → 0.2.0
|
||||
make major # 0.1.7 → 1.0.0
|
||||
```
|
||||
|
||||
## 📚 API Reference
|
||||
|
||||
### Endpoints
|
||||
|
||||
#### `GET /health`
|
||||
Health check endpoint.
|
||||
|
||||
```json
|
||||
{
|
||||
"status": "healthy",
|
||||
"version": "0.1.7"
|
||||
}
|
||||
```
|
||||
|
||||
#### `GET /v1/models`
|
||||
List available models (OpenAI-compatible).
|
||||
|
||||
```json
|
||||
{
|
||||
"object": "list",
|
||||
"data": [
|
||||
{
|
||||
"id": "alfred",
|
||||
"object": "model",
|
||||
"owned_by": "alfred"
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
#### `POST /v1/chat/completions`
|
||||
Chat with the agent (OpenAI-compatible).
|
||||
|
||||
**Request:**
|
||||
```json
|
||||
{
|
||||
"model": "alfred",
|
||||
"messages": [
|
||||
{"role": "user", "content": "Find Inception"}
|
||||
],
|
||||
"stream": false
|
||||
}
|
||||
```
|
||||
|
||||
**Response:**
|
||||
```json
|
||||
{
|
||||
"id": "chatcmpl-xxx",
|
||||
"object": "chat.completion",
|
||||
"created": 1234567890,
|
||||
"model": "alfred",
|
||||
"choices": [{
|
||||
"index": 0,
|
||||
"message": {
|
||||
"role": "assistant",
|
||||
"content": "I found Inception (2010)..."
|
||||
},
|
||||
"finish_reason": "stop"
|
||||
}]
|
||||
}
|
||||
```
|
||||
|
||||
#### `GET /memory/state`
|
||||
View full memory state (debug endpoint).
|
||||
|
||||
#### `POST /memory/clear-session`
|
||||
Clear session memories (STM + Episodic).
|
||||
| Method | Path | Description |
|
||||
|--------|------|-------------|
|
||||
| `GET` | `/health` | Health check |
|
||||
| `GET` | `/v1/models` | List models (OpenAI-compatible) |
|
||||
| `POST` | `/v1/chat/completions` | Chat (OpenAI-compatible, streaming supported) |
|
||||
| `GET` | `/memory/state` | Full memory dump (debug) |
|
||||
| `POST` | `/memory/clear-session` | Clear STM + Episodic |
|
||||
| `GET` | `/memory/episodic/search-results` | Current search results |
|
||||
|
||||
## 🔧 Troubleshooting
|
||||
|
||||
### Agent doesn't respond
|
||||
|
||||
1. Check API keys in `.env`
|
||||
2. Verify LLM provider is running:
|
||||
2. Verify the LLM is running:
|
||||
```bash
|
||||
# For Ollama
|
||||
docker logs alfred-ollama
|
||||
|
||||
# Check if model is pulled
|
||||
docker exec alfred-ollama ollama list
|
||||
```
|
||||
3. Check Alfred logs: `docker logs alfred-core`
|
||||
@@ -499,76 +385,34 @@ Clear session memories (STM + Episodic).
|
||||
### qBittorrent connection failed
|
||||
|
||||
1. Verify qBittorrent is running: `docker ps | grep qbittorrent`
|
||||
2. Check Web UI is enabled in qBittorrent settings
|
||||
3. Verify credentials in `.env`:
|
||||
```bash
|
||||
QBITTORRENT_URL=http://qbittorrent:16140
|
||||
QBITTORRENT_USERNAME=admin
|
||||
QBITTORRENT_PASSWORD=<check-your-env>
|
||||
```
|
||||
|
||||
### Database connection issues
|
||||
|
||||
1. Check MongoDB is healthy: `docker logs alfred-mongodb`
|
||||
2. Verify credentials match in `.env`
|
||||
3. Try restarting: `make restart`
|
||||
2. Check credentials in `.env` (`QBITTORRENT_URL`, `QBITTORRENT_USERNAME`, `QBITTORRENT_PASSWORD`)
|
||||
|
||||
### Memory not persisting
|
||||
|
||||
1. Check `data/` directory exists and is writable
|
||||
1. Check `data/` directory is writable
|
||||
2. Verify volume mounts in `docker-compose.yaml`
|
||||
3. Check file permissions: `ls -la data/`
|
||||
|
||||
### Bootstrap fails
|
||||
|
||||
1. Ensure `.env.example` exists
|
||||
2. Check `pyproject.toml` has required sections:
|
||||
```toml
|
||||
[tool.alfred.settings]
|
||||
[tool.alfred.security]
|
||||
```
|
||||
3. Run manually: `python scripts/bootstrap.py`
|
||||
```bash
|
||||
make validate # Check what's wrong with .env
|
||||
make bootstrap # Regenerate (preserves existing secrets)
|
||||
```
|
||||
|
||||
### Tests failing
|
||||
|
||||
1. Update dependencies: `poetry install`
|
||||
2. Check Python version: `python --version` (needs 3.14+)
|
||||
3. Run specific failing test with verbose output:
|
||||
```bash
|
||||
poetry run pytest tests/test_failing.py -v --tb=long
|
||||
```
|
||||
```bash
|
||||
uv run pytest tests/test_failing.py -v --tb=long
|
||||
```
|
||||
|
||||
## 🤝 Contributing
|
||||
|
||||
Contributions are welcome! Please follow these steps:
|
||||
|
||||
1. **Fork** the repository
|
||||
2. **Create** a feature branch: `git checkout -b feature/my-feature`
|
||||
3. **Make** your changes
|
||||
4. **Run** tests: `make test`
|
||||
5. **Run** linting: `make lint && make format`
|
||||
6. **Commit**: `git commit -m "feat: add my feature"`
|
||||
7. **Push**: `git push origin feature/my-feature`
|
||||
8. **Create** a Pull Request
|
||||
|
||||
### Commit Convention
|
||||
|
||||
We use [Conventional Commits](https://www.conventionalcommits.org/):
|
||||
|
||||
- `feat:` New feature
|
||||
- `fix:` Bug fix
|
||||
- `docs:` Documentation
|
||||
- `refactor:` Code refactoring
|
||||
- `test:` Adding tests
|
||||
- `chore:` Maintenance
|
||||
|
||||
## 📖 Documentation
|
||||
|
||||
- [Architecture Diagram](docs/architecture_diagram.md) — System architecture overview
|
||||
- [Class Diagram](docs/class_diagram.md) — Class structure and relationships
|
||||
- [Component Diagram](docs/component_diagram.md) — Component interactions
|
||||
- [Sequence Diagram](docs/sequence_diagram.md) — Sequence flows
|
||||
- [Flowchart](docs/flowchart.md) — System flowcharts
|
||||
1. Fork the repository
|
||||
2. Create a feature branch: `git checkout -b feat/my-feature`
|
||||
3. Make your changes + add tests
|
||||
4. Run `make test && make lint && make format`
|
||||
5. Commit with [Conventional Commits](https://www.conventionalcommits.org/): `feat:`, `fix:`, `docs:`, `refactor:`, `test:`, `chore:`, `infra:`
|
||||
6. Open a Pull Request
|
||||
|
||||
## 📄 License
|
||||
|
||||
@@ -576,19 +420,13 @@ MIT License — see [LICENSE](LICENSE) file for details.
|
||||
|
||||
## 🙏 Acknowledgments
|
||||
|
||||
- [LibreChat](https://github.com/danny-avila/LibreChat) — Beautiful chat interface
|
||||
- [LibreChat](https://github.com/danny-avila/LibreChat) — Chat interface
|
||||
- [Ollama](https://ollama.ai/) — Local LLM runtime
|
||||
- [DeepSeek](https://www.deepseek.com/) — LLM provider
|
||||
- [TMDB](https://www.themoviedb.org/) — Movie database
|
||||
- [TMDB](https://www.themoviedb.org/) — Movie & TV database
|
||||
- [qBittorrent](https://www.qbittorrent.org/) — Torrent client
|
||||
- [FastAPI](https://fastapi.tiangolo.com/) — Web framework
|
||||
- [Pydantic](https://docs.pydantic.dev/) — Data validation
|
||||
|
||||
## 📬 Support
|
||||
|
||||
- 📧 Email: francois.hodiaumont@gmail.com
|
||||
- 🐛 Issues: [GitHub Issues](https://github.com/francwa/alfred_media_organizer/issues)
|
||||
- 💬 Discussions: [GitHub Discussions](https://github.com/francwa/alfred_media_organizer/discussions)
|
||||
- [uv](https://github.com/astral-sh/uv) — Fast Python package manager
|
||||
|
||||
---
|
||||
|
||||
|
||||
+28
-2
@@ -4,6 +4,7 @@ import json
|
||||
from typing import Any
|
||||
|
||||
from alfred.infrastructure.persistence import get_memory
|
||||
from alfred.infrastructure.persistence.memory import MemoryRegistry
|
||||
|
||||
from .registry import Tool
|
||||
|
||||
@@ -13,6 +14,7 @@ class PromptBuilder:
|
||||
|
||||
def __init__(self, tools: dict[str, Tool]):
|
||||
self.tools = tools
|
||||
self._memory_registry = MemoryRegistry()
|
||||
|
||||
def build_tools_spec(self) -> list[dict[str, Any]]:
|
||||
"""Build the tool specification for the LLM API."""
|
||||
@@ -109,11 +111,30 @@ class PromptBuilder:
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
def _format_memory_schema(self) -> str:
|
||||
"""Describe available memory components so the agent knows what to read/write and when."""
|
||||
schema = self._memory_registry.schema()
|
||||
tier_labels = {"ltm": "LONG-TERM (persisted)", "stm": "SHORT-TERM (session)", "episodic": "EPISODIC (volatile)"}
|
||||
lines = ["MEMORY COMPONENTS:"]
|
||||
|
||||
for tier, components in schema.items():
|
||||
if not components:
|
||||
continue
|
||||
lines.append(f"\n [{tier_labels.get(tier, tier.upper())}]")
|
||||
for c in components:
|
||||
access = c.get("access", "read")
|
||||
lines.append(f" {c['name']} ({access}): {c['description']}")
|
||||
for field_name, field_desc in c.get("fields", {}).items():
|
||||
lines.append(f" · {field_name}: {field_desc}")
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
def _format_config_context(self, memory) -> str:
|
||||
"""Format configuration context."""
|
||||
lines = ["CURRENT CONFIGURATION:"]
|
||||
if memory.ltm.config:
|
||||
for key, value in memory.ltm.config.items():
|
||||
folders = {**memory.ltm.workspace.as_dict(), **memory.ltm.library_paths.to_dict()}
|
||||
if folders:
|
||||
for key, value in folders.items():
|
||||
lines.append(f" - {key}: {value}")
|
||||
else:
|
||||
lines.append(" (no configuration set)")
|
||||
@@ -138,6 +159,9 @@ class PromptBuilder:
|
||||
tools_desc = self._format_tools_description()
|
||||
tools_section = f"\nAVAILABLE TOOLS:\n{tools_desc}" if tools_desc else ""
|
||||
|
||||
# Memory schema
|
||||
memory_schema = self._format_memory_schema()
|
||||
|
||||
# Configuration
|
||||
config_section = self._format_config_context(memory)
|
||||
if config_section:
|
||||
@@ -172,6 +196,8 @@ EXAMPLES:
|
||||
|
||||
{language_instruction}
|
||||
{tools_section}
|
||||
|
||||
{memory_schema}
|
||||
{config_section}
|
||||
{stm_context}
|
||||
{episodic_context}
|
||||
|
||||
@@ -97,6 +97,11 @@ def make_tools(settings) -> dict[str, Tool]:
|
||||
tool_functions = [
|
||||
fs_tools.set_path_for_folder,
|
||||
fs_tools.list_folder,
|
||||
fs_tools.resolve_destination,
|
||||
fs_tools.move_media,
|
||||
fs_tools.manage_subtitles,
|
||||
fs_tools.create_seed_links,
|
||||
fs_tools.learn,
|
||||
api_tools.find_media_imdb_id,
|
||||
api_tools.find_torrent,
|
||||
api_tools.add_torrent_by_index,
|
||||
|
||||
@@ -1,10 +1,200 @@
|
||||
"""Filesystem tools for folder management."""
|
||||
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from alfred.application.filesystem import ListFolderUseCase, SetFolderPathUseCase
|
||||
import alfred as _alfred_pkg
|
||||
import yaml
|
||||
|
||||
from alfred.application.filesystem import (
|
||||
CreateSeedLinksUseCase,
|
||||
ListFolderUseCase,
|
||||
ManageSubtitlesUseCase,
|
||||
MoveMediaUseCase,
|
||||
ResolveDestinationUseCase,
|
||||
SetFolderPathUseCase,
|
||||
)
|
||||
from alfred.infrastructure.filesystem import FileManager
|
||||
|
||||
_LEARNED_ROOT = Path(_alfred_pkg.__file__).parent.parent / "data" / "knowledge"
|
||||
|
||||
|
||||
def move_media(source: str, destination: str) -> dict[str, Any]:
|
||||
"""
|
||||
Move a media file to a destination path.
|
||||
|
||||
Copies the file safely first (with integrity check), then deletes the source.
|
||||
Use this to organise a downloaded file into the media library.
|
||||
|
||||
Args:
|
||||
source: Absolute path to the source file.
|
||||
destination: Absolute path to the destination file (must not already exist).
|
||||
|
||||
Returns:
|
||||
Dict with status, source, destination, filename, and size — or error details.
|
||||
"""
|
||||
file_manager = FileManager()
|
||||
use_case = MoveMediaUseCase(file_manager)
|
||||
return use_case.execute(source, destination).to_dict()
|
||||
|
||||
|
||||
def resolve_destination(
|
||||
release_name: str,
|
||||
source_file: str,
|
||||
tmdb_title: str,
|
||||
tmdb_year: int,
|
||||
tmdb_episode_title: str | None = None,
|
||||
confirmed_folder: str | None = None,
|
||||
) -> dict[str, Any]:
|
||||
"""
|
||||
Compute the destination path in the media library for a release.
|
||||
|
||||
Call this before move_media to get the correct library path. Handles:
|
||||
- Parsing the release name (quality, codec, group, season/episode)
|
||||
- Looking up any existing series folder in the library
|
||||
- Applying group-conflict rules (asks user if ambiguous)
|
||||
- Building the full destination path with correct naming conventions
|
||||
|
||||
Args:
|
||||
release_name: Raw release folder or file name
|
||||
(e.g. "Oz.S03.1080p.WEBRip.x265-KONTRAST").
|
||||
source_file: Absolute path to the source video file (used for extension).
|
||||
tmdb_title: Canonical show/movie title from TMDB (e.g. "Oz").
|
||||
tmdb_year: Release/start year from TMDB (e.g. 1997).
|
||||
tmdb_episode_title: Episode title from TMDB for single-episode releases
|
||||
(e.g. "The Routine"). Omit for season packs and movies.
|
||||
confirmed_folder: If a previous call returned needs_clarification, pass
|
||||
the user-chosen folder name here to proceed.
|
||||
|
||||
Returns:
|
||||
On success: dict with status, library_file, series_folder, season_folder,
|
||||
series_folder_name, season_folder_name, filename,
|
||||
is_new_series_folder.
|
||||
On ambiguity: dict with status="needs_clarification", question, options.
|
||||
On error: dict with status="error", error, message.
|
||||
"""
|
||||
use_case = ResolveDestinationUseCase()
|
||||
return use_case.execute(
|
||||
release_name=release_name,
|
||||
source_file=source_file,
|
||||
tmdb_title=tmdb_title,
|
||||
tmdb_year=tmdb_year,
|
||||
tmdb_episode_title=tmdb_episode_title,
|
||||
confirmed_folder=confirmed_folder,
|
||||
).to_dict()
|
||||
|
||||
|
||||
def create_seed_links(library_file: str, original_download_folder: str) -> dict[str, Any]:
|
||||
"""
|
||||
Prepare a torrent subfolder so qBittorrent can keep seeding after a move.
|
||||
|
||||
Hard-links the video file from the library into torrents/<original_folder_name>/,
|
||||
then copies all remaining files from the original download folder (subtitles,
|
||||
.nfo, .jpg, .txt, …) so the torrent data is complete.
|
||||
|
||||
Call this after move_media when the user wants to keep seeding.
|
||||
|
||||
Args:
|
||||
library_file: Absolute path to the video file now in the library.
|
||||
original_download_folder: Absolute path to the original download folder
|
||||
(may still contain subs, nfo, and other release files).
|
||||
|
||||
Returns:
|
||||
Dict with status, torrent_subfolder, linked_file, copied_files,
|
||||
copied_count, skipped — or error details.
|
||||
"""
|
||||
file_manager = FileManager()
|
||||
use_case = CreateSeedLinksUseCase(file_manager)
|
||||
return use_case.execute(library_file, original_download_folder).to_dict()
|
||||
|
||||
|
||||
def manage_subtitles(source_video: str, destination_video: str) -> dict[str, Any]:
|
||||
"""
|
||||
Place subtitle files alongside an organised video file.
|
||||
|
||||
Scans for subtitle files (.srt, .ass, .ssa, .vtt, .sub) next to the source
|
||||
video, filters them according to the user's SubtitlePreferences (languages,
|
||||
min size, SDH, forced), and hard-links the passing files next to the
|
||||
destination video with the correct naming convention:
|
||||
fr.srt / fr.sdh.srt / fr.forced.srt / en.srt …
|
||||
|
||||
Call this right after move_media or copy_media, passing the same source and
|
||||
destination paths. If no subtitles are found, returns ok with placed_count=0.
|
||||
|
||||
Args:
|
||||
source_video: Absolute path to the original video file (in the download folder).
|
||||
destination_video: Absolute path to the placed video file (in the library).
|
||||
|
||||
Returns:
|
||||
Dict with status, placed list (source, destination, filename), placed_count,
|
||||
skipped_count — or error details.
|
||||
"""
|
||||
file_manager = FileManager()
|
||||
use_case = ManageSubtitlesUseCase(file_manager)
|
||||
return use_case.execute(source_video, destination_video).to_dict()
|
||||
|
||||
|
||||
def learn(pack: str, category: str, key: str, values: list[str]) -> dict[str, Any]:
|
||||
"""
|
||||
Teach Alfred a new token mapping and persist it to the learned knowledge pack.
|
||||
|
||||
Use this when a subtitle file contains an unrecognised token — after confirming
|
||||
with the user what the token means, call learn() to persist it so Alfred
|
||||
recognises it in future scans.
|
||||
|
||||
Args:
|
||||
pack: Knowledge pack name. Currently only "subtitles" is supported.
|
||||
category: Category within the pack: "languages", "types", or "formats".
|
||||
key: The entry key — e.g. ISO 639-1 language code ("es"), type id ("sdh").
|
||||
values: List of tokens to add — e.g. ["spanish", "espanol", "spa"].
|
||||
|
||||
Returns:
|
||||
Dict with status, added_count, and the updated token list.
|
||||
"""
|
||||
_VALID_PACKS = {"subtitles"}
|
||||
_VALID_CATEGORIES = {"languages", "types", "formats"}
|
||||
|
||||
if pack not in _VALID_PACKS:
|
||||
return {"status": "error", "error": "unknown_pack", "message": f"Unknown pack '{pack}'. Valid: {sorted(_VALID_PACKS)}"}
|
||||
|
||||
if category not in _VALID_CATEGORIES:
|
||||
return {"status": "error", "error": "unknown_category", "message": f"Unknown category '{category}'. Valid: {sorted(_VALID_CATEGORIES)}"}
|
||||
|
||||
learned_path = _LEARNED_ROOT / "subtitles_learned.yaml"
|
||||
_LEARNED_ROOT.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
data: dict = {}
|
||||
if learned_path.exists():
|
||||
try:
|
||||
with open(learned_path, encoding="utf-8") as f:
|
||||
data = yaml.safe_load(f) or {}
|
||||
except Exception as e:
|
||||
return {"status": "error", "error": "read_failed", "message": str(e)}
|
||||
|
||||
cat_data = data.setdefault(category, {})
|
||||
entry = cat_data.setdefault(key, {"tokens": []})
|
||||
existing = entry.get("tokens", [])
|
||||
new_tokens = [v for v in values if v not in existing]
|
||||
entry["tokens"] = existing + new_tokens
|
||||
|
||||
tmp = learned_path.with_suffix(".yaml.tmp")
|
||||
try:
|
||||
with open(tmp, "w", encoding="utf-8") as f:
|
||||
yaml.safe_dump(data, f, allow_unicode=True, default_flow_style=False, sort_keys=False)
|
||||
tmp.rename(learned_path)
|
||||
except Exception as e:
|
||||
tmp.unlink(missing_ok=True)
|
||||
return {"status": "error", "error": "write_failed", "message": str(e)}
|
||||
|
||||
return {
|
||||
"status": "ok",
|
||||
"pack": pack,
|
||||
"category": category,
|
||||
"key": key,
|
||||
"added_count": len(new_tokens),
|
||||
"tokens": entry["tokens"],
|
||||
}
|
||||
|
||||
|
||||
def set_path_for_folder(folder_name: str, path_value: str) -> dict[str, Any]:
|
||||
"""
|
||||
|
||||
@@ -0,0 +1,3 @@
|
||||
from .loader import WorkflowLoader
|
||||
|
||||
__all__ = ["WorkflowLoader"]
|
||||
@@ -0,0 +1,52 @@
|
||||
"""WorkflowLoader — autodiscovers and loads workflow YAML files.
|
||||
|
||||
Scans the workflows/ directory for all .yaml files and exposes them
|
||||
as dicts. No manual registration needed — drop a new .yaml file and
|
||||
it will be picked up automatically.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from pathlib import Path
|
||||
|
||||
import yaml
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_WORKFLOWS_DIR = Path(__file__).parent
|
||||
|
||||
|
||||
class WorkflowLoader:
|
||||
"""
|
||||
Loads all workflow definitions from the workflows/ directory.
|
||||
|
||||
Usage:
|
||||
loader = WorkflowLoader()
|
||||
all_workflows = loader.all()
|
||||
workflow = loader.get("organize_media")
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self._workflows: dict[str, dict] = {}
|
||||
self._load()
|
||||
|
||||
def _load(self) -> None:
|
||||
for path in sorted(_WORKFLOWS_DIR.glob("*.yaml")):
|
||||
try:
|
||||
data = yaml.safe_load(path.read_text(encoding="utf-8"))
|
||||
name = data.get("name") or path.stem
|
||||
self._workflows[name] = data
|
||||
logger.info(f"WorkflowLoader: Loaded '{name}' from {path.name}")
|
||||
except Exception as e:
|
||||
logger.warning(f"WorkflowLoader: Could not load {path.name}: {e}")
|
||||
|
||||
def all(self) -> dict[str, dict]:
|
||||
"""Return all loaded workflows keyed by name."""
|
||||
return self._workflows
|
||||
|
||||
def get(self, name: str) -> dict | None:
|
||||
"""Return a specific workflow by name, or None if not found."""
|
||||
return self._workflows.get(name)
|
||||
|
||||
def names(self) -> list[str]:
|
||||
"""Return all available workflow names."""
|
||||
return list(self._workflows.keys())
|
||||
@@ -0,0 +1,69 @@
|
||||
name: manage_subtitles
|
||||
description: >
|
||||
Place subtitle files alongside a video that has just been organised into the library.
|
||||
Detects the release pattern automatically, identifies and classifies all tracks,
|
||||
filters by user rules, and hard-links matching files to the destination.
|
||||
If any tracks are unrecognised, asks the user and optionally teaches Alfred.
|
||||
|
||||
trigger:
|
||||
examples:
|
||||
- "handle subtitles for The X-Files S01E01"
|
||||
- "place the subs next to the file"
|
||||
- "subtitles are in the Subs/ folder"
|
||||
- "add subtitles"
|
||||
|
||||
tools:
|
||||
- manage_subtitles
|
||||
- learn
|
||||
|
||||
memory:
|
||||
SubtitlePreferences: read
|
||||
Workflow: read-write
|
||||
|
||||
steps:
|
||||
- id: place_subtitles
|
||||
tool: manage_subtitles
|
||||
description: >
|
||||
Detect release pattern, identify and classify all subtitle tracks,
|
||||
filter by rules, hard-link matching files next to the destination video.
|
||||
Reads SubtitlePreferences from LTM for language/type/format filtering.
|
||||
params:
|
||||
source_video: "{source_video}"
|
||||
destination_video: "{destination_video}"
|
||||
imdb_id: "{imdb_id}"
|
||||
media_type: "{media_type}"
|
||||
release_group: "{release_group}"
|
||||
season: "{season}"
|
||||
episode: "{episode}"
|
||||
on_result:
|
||||
ok_placed_zero: skip # no subtitles found — not an error
|
||||
needs_clarification: ask_user # unrecognised tokens found
|
||||
|
||||
- id: ask_user
|
||||
description: >
|
||||
Some tracks could not be classified. Show the user the unresolved tokens
|
||||
and ask if they want to teach Alfred what they mean.
|
||||
If yes → go to learn_tokens. If no → end workflow.
|
||||
ask_user:
|
||||
question: >
|
||||
I could not identify some tokens in the subtitle files: {unresolved}.
|
||||
Do you want to teach me what they mean?
|
||||
answers:
|
||||
yes: { next_step: learn_tokens }
|
||||
no: { next_step: end }
|
||||
|
||||
- id: learn_tokens
|
||||
tool: learn
|
||||
description: >
|
||||
Persist a new token mapping to the learned knowledge pack so Alfred
|
||||
recognises it in future scans without asking again.
|
||||
params:
|
||||
pack: "subtitles"
|
||||
category: "{token_category}" # "languages" or "types"
|
||||
key: "{token_key}" # e.g. "es", "de"
|
||||
values: "{token_values}" # e.g. ["spanish", "espanol"]
|
||||
|
||||
subtitle_naming:
|
||||
standard: "{lang}.{ext}"
|
||||
sdh: "{lang}.sdh.{ext}"
|
||||
forced: "{lang}.forced.{ext}"
|
||||
@@ -0,0 +1,82 @@
|
||||
name: organize_media
|
||||
description: >
|
||||
Organise a downloaded series or movie into the media library.
|
||||
Triggered when the user asks to move/organize a specific title.
|
||||
Always moves the video file. Optionally creates seed links in the
|
||||
torrents folder so qBittorrent can keep seeding.
|
||||
|
||||
trigger:
|
||||
examples:
|
||||
- "organize Breaking Bad"
|
||||
- "organise Severance season 2"
|
||||
- "move Inception to my library"
|
||||
- "organize Breaking Bad season 1, keep seeding"
|
||||
|
||||
tools:
|
||||
- list_folder
|
||||
- find_media_imdb_id
|
||||
- resolve_destination
|
||||
- move_media
|
||||
- manage_subtitles
|
||||
- create_seed_links
|
||||
|
||||
memory:
|
||||
WorkspacePaths: read
|
||||
LibraryPaths: read
|
||||
Library: read-write
|
||||
Workflow: read-write
|
||||
Entities: read-write
|
||||
|
||||
steps:
|
||||
- id: list_downloads
|
||||
tool: list_folder
|
||||
description: List the download folder to find the target files.
|
||||
params:
|
||||
folder_type: download
|
||||
|
||||
- id: identify_media
|
||||
tool: find_media_imdb_id
|
||||
description: Confirm title, type (series/movie), and metadata via TMDB.
|
||||
|
||||
- id: resolve_destination
|
||||
tool: resolve_destination
|
||||
description: >
|
||||
Compute the correct destination path in the library.
|
||||
Uses the release name + TMDB metadata to build folder and file names.
|
||||
If multiple series folders exist for this title, returns
|
||||
needs_clarification and the user must pick one (re-call with confirmed_folder).
|
||||
|
||||
- id: move_file
|
||||
tool: move_media
|
||||
description: >
|
||||
Move the video file to library_file returned by resolve_destination.
|
||||
|
||||
- id: handle_subtitles
|
||||
tool: manage_subtitles
|
||||
description: >
|
||||
Place subtitle files alongside the video in the library.
|
||||
Pass the original source path and the new library destination path.
|
||||
on_missing: skip
|
||||
|
||||
- id: ask_seeding
|
||||
ask_user:
|
||||
question: "Do you want to keep seeding this torrent?"
|
||||
answers:
|
||||
"yes": { next_step: create_seed_links }
|
||||
"no": { next_step: update_library }
|
||||
|
||||
- id: create_seed_links
|
||||
tool: create_seed_links
|
||||
description: >
|
||||
Hard-link the library video file back into torrents/<original_folder>/
|
||||
and copy all remaining files from the original download folder
|
||||
(subs, nfo, jpg, …) so the torrent stays complete for seeding.
|
||||
|
||||
- id: update_library
|
||||
memory_write: Library
|
||||
description: Add the entry to the LTM library after a successful move.
|
||||
|
||||
naming_convention:
|
||||
# Resolved by domain entities (Movie, Episode) — not hardcoded here
|
||||
tv_show: "{title}/Season {season:02d}/{title}.S{season:02d}E{episode:02d}.{ext}"
|
||||
movie: "{title} ({year})/{title}.{year}.{ext}"
|
||||
+1
-1
@@ -29,7 +29,7 @@ app = FastAPI(
|
||||
version="0.2.0",
|
||||
)
|
||||
|
||||
memory_path = Path(settings.data_storage) / "memory"
|
||||
memory_path = Path(settings.data_storage_dir) / "memory"
|
||||
init_memory(storage_dir=str(memory_path))
|
||||
logger.info(f"Memory context initialized (path: {memory_path})")
|
||||
|
||||
|
||||
@@ -1,12 +1,32 @@
|
||||
"""Filesystem use cases."""
|
||||
|
||||
from .dto import ListFolderResponse, SetFolderPathResponse
|
||||
from .create_seed_links import CreateSeedLinksUseCase
|
||||
from .dto import (
|
||||
CreateSeedLinksResponse,
|
||||
ListFolderResponse,
|
||||
ManageSubtitlesResponse,
|
||||
MoveMediaResponse,
|
||||
PlacedSubtitle,
|
||||
SetFolderPathResponse,
|
||||
)
|
||||
from .list_folder import ListFolderUseCase
|
||||
from .manage_subtitles import ManageSubtitlesUseCase
|
||||
from .move_media import MoveMediaUseCase
|
||||
from .resolve_destination import ResolveDestinationUseCase, ResolvedDestination
|
||||
from .set_folder_path import SetFolderPathUseCase
|
||||
|
||||
__all__ = [
|
||||
"SetFolderPathUseCase",
|
||||
"ListFolderUseCase",
|
||||
"CreateSeedLinksUseCase",
|
||||
"MoveMediaUseCase",
|
||||
"ManageSubtitlesUseCase",
|
||||
"ResolveDestinationUseCase",
|
||||
"ResolvedDestination",
|
||||
"SetFolderPathResponse",
|
||||
"ListFolderResponse",
|
||||
"CreateSeedLinksResponse",
|
||||
"MoveMediaResponse",
|
||||
"ManageSubtitlesResponse",
|
||||
"PlacedSubtitle",
|
||||
]
|
||||
|
||||
@@ -0,0 +1,54 @@
|
||||
"""CreateSeedLinksUseCase — prepares a torrent folder for continued seeding."""
|
||||
|
||||
import logging
|
||||
|
||||
from alfred.infrastructure.filesystem import FileManager
|
||||
from alfred.infrastructure.persistence import get_memory
|
||||
|
||||
from .dto import CreateSeedLinksResponse
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class CreateSeedLinksUseCase:
|
||||
"""
|
||||
Prepares a torrent subfolder so qBittorrent can keep seeding after a move.
|
||||
|
||||
Hard-links the video file from the library back into torrents/<original_folder>/,
|
||||
then copies all remaining files from the original download folder (subs, nfo, …).
|
||||
"""
|
||||
|
||||
def __init__(self, file_manager: FileManager):
|
||||
self.file_manager = file_manager
|
||||
|
||||
def execute(
|
||||
self, library_file: str, original_download_folder: str
|
||||
) -> CreateSeedLinksResponse:
|
||||
memory = get_memory()
|
||||
torrent_folder = memory.ltm.workspace.torrent
|
||||
|
||||
if not torrent_folder:
|
||||
return CreateSeedLinksResponse(
|
||||
status="error",
|
||||
error="torrent_folder_not_set",
|
||||
message="Torrent folder is not configured. Use set_path_for_folder to set it.",
|
||||
)
|
||||
|
||||
result = self.file_manager.create_seed_links(
|
||||
library_file, original_download_folder, torrent_folder
|
||||
)
|
||||
|
||||
if result.get("status") == "ok":
|
||||
return CreateSeedLinksResponse(
|
||||
status="ok",
|
||||
torrent_subfolder=result.get("torrent_subfolder"),
|
||||
linked_file=result.get("linked_file"),
|
||||
copied_files=result.get("copied_files"),
|
||||
copied_count=result.get("copied_count", 0),
|
||||
skipped=result.get("skipped"),
|
||||
)
|
||||
return CreateSeedLinksResponse(
|
||||
status="error",
|
||||
error=result.get("error"),
|
||||
message=result.get("message"),
|
||||
)
|
||||
@@ -1,6 +1,56 @@
|
||||
"""Filesystem application DTOs."""
|
||||
|
||||
from dataclasses import dataclass
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
|
||||
|
||||
@dataclass
|
||||
class CopyMediaResponse:
|
||||
"""Response from copying a media file."""
|
||||
|
||||
status: str
|
||||
source: str | None = None
|
||||
destination: str | None = None
|
||||
filename: str | None = None
|
||||
size: int | None = None
|
||||
error: str | None = None
|
||||
message: str | None = None
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
if self.error:
|
||||
return {"status": self.status, "error": self.error, "message": self.message}
|
||||
return {
|
||||
"status": self.status,
|
||||
"source": self.source,
|
||||
"destination": self.destination,
|
||||
"filename": self.filename,
|
||||
"size": self.size,
|
||||
}
|
||||
|
||||
|
||||
@dataclass
|
||||
class MoveMediaResponse:
|
||||
"""Response from moving a media file."""
|
||||
|
||||
status: str
|
||||
source: str | None = None
|
||||
destination: str | None = None
|
||||
filename: str | None = None
|
||||
size: int | None = None
|
||||
error: str | None = None
|
||||
message: str | None = None
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
if self.error:
|
||||
return {"status": self.status, "error": self.error, "message": self.message}
|
||||
return {
|
||||
"status": self.status,
|
||||
"source": self.source,
|
||||
"destination": self.destination,
|
||||
"filename": self.filename,
|
||||
"size": self.size,
|
||||
}
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -29,6 +79,104 @@ class SetFolderPathResponse:
|
||||
return result
|
||||
|
||||
|
||||
@dataclass
|
||||
class PlacedSubtitle:
|
||||
"""One subtitle file successfully placed."""
|
||||
|
||||
source: str
|
||||
destination: str
|
||||
filename: str
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
return {"source": self.source, "destination": self.destination, "filename": self.filename}
|
||||
|
||||
|
||||
@dataclass
|
||||
class UnresolvedTrack:
|
||||
"""A subtitle track that needs agent clarification before placement."""
|
||||
|
||||
raw_tokens: list[str]
|
||||
file_path: str | None = None
|
||||
file_size_kb: float | None = None
|
||||
reason: str = "" # "unknown_language" | "low_confidence"
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
return {
|
||||
"raw_tokens": self.raw_tokens,
|
||||
"file_path": self.file_path,
|
||||
"file_size_kb": self.file_size_kb,
|
||||
"reason": self.reason,
|
||||
}
|
||||
|
||||
|
||||
@dataclass
|
||||
class AvailableSubtitle:
|
||||
"""One subtitle track available on an embedded media item."""
|
||||
|
||||
language: str # ISO 639-2 code
|
||||
subtitle_type: str # "standard" | "sdh" | "forced" | "unknown"
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
return {"language": self.language, "type": self.subtitle_type}
|
||||
|
||||
|
||||
@dataclass
|
||||
class ManageSubtitlesResponse:
|
||||
"""Response from the manage_subtitles use case."""
|
||||
|
||||
status: str # "ok" | "needs_clarification" | "error"
|
||||
video_path: str | None = None
|
||||
placed: list[PlacedSubtitle] | None = None
|
||||
skipped_count: int = 0
|
||||
unresolved: list[UnresolvedTrack] | None = None
|
||||
available: list[AvailableSubtitle] | None = None # embedded tracks summary
|
||||
error: str | None = None
|
||||
message: str | None = None
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
if self.error:
|
||||
return {"status": self.status, "error": self.error, "message": self.message}
|
||||
result = {
|
||||
"status": self.status,
|
||||
"video_path": self.video_path,
|
||||
"placed": [p.to_dict() for p in (self.placed or [])],
|
||||
"placed_count": len(self.placed or []),
|
||||
"skipped_count": self.skipped_count,
|
||||
}
|
||||
if self.unresolved:
|
||||
result["unresolved"] = [u.to_dict() for u in self.unresolved]
|
||||
result["unresolved_count"] = len(self.unresolved)
|
||||
if self.available:
|
||||
result["available"] = [a.to_dict() for a in self.available]
|
||||
return result
|
||||
|
||||
|
||||
@dataclass
|
||||
class CreateSeedLinksResponse:
|
||||
"""Response from creating seed links for a torrent."""
|
||||
|
||||
status: str
|
||||
torrent_subfolder: str | None = None
|
||||
linked_file: str | None = None
|
||||
copied_files: list[str] | None = None
|
||||
copied_count: int = 0
|
||||
skipped: list[str] | None = None
|
||||
error: str | None = None
|
||||
message: str | None = None
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
if self.error:
|
||||
return {"status": self.status, "error": self.error, "message": self.message}
|
||||
return {
|
||||
"status": self.status,
|
||||
"torrent_subfolder": self.torrent_subfolder,
|
||||
"linked_file": self.linked_file,
|
||||
"copied_files": self.copied_files or [],
|
||||
"copied_count": self.copied_count,
|
||||
"skipped": self.skipped or [],
|
||||
}
|
||||
|
||||
|
||||
@dataclass
|
||||
class ListFolderResponse:
|
||||
"""Response from listing a folder."""
|
||||
|
||||
@@ -0,0 +1,258 @@
|
||||
"""ManageSubtitlesUseCase — orchestrates the full subtitle pipeline for a video file."""
|
||||
|
||||
import logging
|
||||
from pathlib import Path
|
||||
|
||||
from alfred.domain.shared.value_objects import ImdbId
|
||||
from alfred.domain.subtitles.entities import SubtitleTrack
|
||||
from alfred.domain.subtitles.knowledge.base import SubtitleKnowledgeBase
|
||||
from alfred.domain.subtitles.knowledge.loader import KnowledgeLoader
|
||||
from alfred.domain.subtitles.services.identifier import SubtitleIdentifier
|
||||
from alfred.domain.subtitles.services.matcher import SubtitleMatcher
|
||||
from alfred.domain.subtitles.services.pattern_detector import PatternDetector
|
||||
from alfred.domain.subtitles.services.placer import PlacedTrack, SubtitlePlacer
|
||||
from alfred.domain.subtitles.services.utils import available_subtitles
|
||||
from alfred.domain.subtitles.value_objects import ScanStrategy
|
||||
from alfred.infrastructure.persistence.context import get_memory
|
||||
from alfred.infrastructure.subtitle.metadata_store import SubtitleMetadataStore
|
||||
from alfred.infrastructure.subtitle.rule_repository import RuleSetRepository
|
||||
|
||||
from .dto import AvailableSubtitle, ManageSubtitlesResponse, PlacedSubtitle, UnresolvedTrack
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _infer_library_root(dest_video: Path, media_type: str) -> Path:
|
||||
"""
|
||||
Infer the media library root folder from the destination video path.
|
||||
|
||||
TV show: video → Season 01 → The X-Files (3 levels up)
|
||||
Movie: video → Inception (2010) (1 level up)
|
||||
"""
|
||||
if media_type == "tv_show":
|
||||
return dest_video.parent.parent
|
||||
return dest_video.parent
|
||||
|
||||
|
||||
def _to_imdb_id(raw: str | None) -> ImdbId | None:
|
||||
if not raw:
|
||||
return None
|
||||
try:
|
||||
return ImdbId(raw)
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
class ManageSubtitlesUseCase:
|
||||
"""
|
||||
Full subtitle pipeline:
|
||||
|
||||
1. Load knowledge base
|
||||
2. Detect (or confirm) the release pattern
|
||||
3. Identify all tracks (ffprobe + filesystem scan)
|
||||
4. Load + resolve rules for this media
|
||||
5. Match tracks against rules
|
||||
6. If any tracks are unresolved → return needs_clarification (don't place yet)
|
||||
7. Place matched tracks via hard-link
|
||||
8. Persist to .alfred/metadata.yaml
|
||||
|
||||
The use case is stateless — all dependencies are instantiated inline.
|
||||
"""
|
||||
|
||||
def execute(
|
||||
self,
|
||||
source_video: str,
|
||||
destination_video: str,
|
||||
imdb_id: str | None = None,
|
||||
media_type: str = "tv_show",
|
||||
release_group: str | None = None,
|
||||
season: int | None = None,
|
||||
episode: int | None = None,
|
||||
confirmed_pattern_id: str | None = None,
|
||||
) -> ManageSubtitlesResponse:
|
||||
source_path = Path(source_video)
|
||||
dest_path = Path(destination_video)
|
||||
|
||||
if not source_path.exists():
|
||||
return ManageSubtitlesResponse(
|
||||
status="error",
|
||||
error="source_not_found",
|
||||
message=f"Source video not found: {source_video}",
|
||||
)
|
||||
|
||||
kb = SubtitleKnowledgeBase(KnowledgeLoader())
|
||||
library_root = _infer_library_root(dest_path, media_type)
|
||||
store = SubtitleMetadataStore(library_root)
|
||||
repo = RuleSetRepository(library_root)
|
||||
|
||||
# --- Pattern resolution ---
|
||||
pattern = self._resolve_pattern(
|
||||
kb, store, source_path, confirmed_pattern_id, release_group
|
||||
)
|
||||
if pattern is None:
|
||||
return ManageSubtitlesResponse(
|
||||
status="error",
|
||||
error="pattern_not_found",
|
||||
message="Could not determine subtitle pattern for this release.",
|
||||
)
|
||||
|
||||
# --- Identify ---
|
||||
media_id = _to_imdb_id(imdb_id)
|
||||
identifier = SubtitleIdentifier(kb)
|
||||
metadata = identifier.identify(
|
||||
video_path=source_path,
|
||||
pattern=pattern,
|
||||
media_id=media_id,
|
||||
media_type=media_type,
|
||||
release_group=release_group,
|
||||
)
|
||||
|
||||
if metadata.total_count == 0:
|
||||
logger.info(f"ManageSubtitles: no subtitle tracks found for {source_path.name}")
|
||||
return ManageSubtitlesResponse(
|
||||
status="ok",
|
||||
video_path=destination_video,
|
||||
placed=[],
|
||||
skipped_count=0,
|
||||
)
|
||||
|
||||
# --- Embedded short-circuit ---
|
||||
if pattern.scan_strategy == ScanStrategy.EMBEDDED:
|
||||
logger.info("ManageSubtitles: embedded pattern — skipping matcher")
|
||||
available = [
|
||||
AvailableSubtitle(
|
||||
language=t.language.code if t.language else "?",
|
||||
subtitle_type=t.subtitle_type.value,
|
||||
)
|
||||
for t in available_subtitles(metadata.embedded_tracks)
|
||||
]
|
||||
return ManageSubtitlesResponse(
|
||||
status="ok",
|
||||
video_path=destination_video,
|
||||
placed=[],
|
||||
skipped_count=0,
|
||||
available=available,
|
||||
)
|
||||
|
||||
# --- Match (external only) ---
|
||||
subtitle_prefs = None
|
||||
try:
|
||||
memory = get_memory()
|
||||
subtitle_prefs = memory.ltm.subtitle_preferences
|
||||
except Exception:
|
||||
pass
|
||||
rules = repo.load(release_group, subtitle_prefs).resolve()
|
||||
matcher = SubtitleMatcher()
|
||||
matched, unresolved = matcher.match(metadata.external_tracks, rules)
|
||||
|
||||
if unresolved:
|
||||
logger.info(
|
||||
f"ManageSubtitles: {len(unresolved)} unresolved track(s) — needs clarification"
|
||||
)
|
||||
return ManageSubtitlesResponse(
|
||||
status="needs_clarification",
|
||||
video_path=destination_video,
|
||||
placed=[],
|
||||
unresolved=[_to_unresolved_dto(t) for t in unresolved],
|
||||
)
|
||||
|
||||
if not matched:
|
||||
return ManageSubtitlesResponse(
|
||||
status="ok",
|
||||
video_path=destination_video,
|
||||
placed=[],
|
||||
skipped_count=metadata.total_count,
|
||||
)
|
||||
|
||||
# --- Place ---
|
||||
placer = SubtitlePlacer()
|
||||
place_result = placer.place(matched, dest_path)
|
||||
|
||||
# --- Persist ---
|
||||
if place_result.placed:
|
||||
pairs = _pair_placed_with_tracks(place_result.placed, matched)
|
||||
store.append_history(pairs, season, episode, release_group)
|
||||
|
||||
placed_dtos = [
|
||||
PlacedSubtitle(
|
||||
source=str(p.source),
|
||||
destination=str(p.destination),
|
||||
filename=p.filename,
|
||||
)
|
||||
for p in place_result.placed
|
||||
]
|
||||
|
||||
return ManageSubtitlesResponse(
|
||||
status="ok",
|
||||
video_path=destination_video,
|
||||
placed=placed_dtos,
|
||||
skipped_count=place_result.skipped_count,
|
||||
)
|
||||
|
||||
def _resolve_pattern(
|
||||
self,
|
||||
kb: SubtitleKnowledgeBase,
|
||||
store: SubtitleMetadataStore,
|
||||
source_path: Path,
|
||||
confirmed_pattern_id: str | None,
|
||||
release_group: str | None,
|
||||
):
|
||||
# 1. Explicit override from caller
|
||||
if confirmed_pattern_id:
|
||||
p = kb.pattern(confirmed_pattern_id)
|
||||
if p:
|
||||
return p
|
||||
logger.warning(f"ManageSubtitles: unknown pattern '{confirmed_pattern_id}'")
|
||||
|
||||
# 2. Previously confirmed in metadata store
|
||||
stored_id = store.confirmed_pattern()
|
||||
if stored_id:
|
||||
p = kb.pattern(stored_id)
|
||||
if p:
|
||||
logger.debug(f"ManageSubtitles: using confirmed pattern '{stored_id}'")
|
||||
return p
|
||||
|
||||
# 3. Auto-detect
|
||||
release_root = source_path.parent
|
||||
detector = PatternDetector(kb)
|
||||
result = detector.detect(release_root, source_path)
|
||||
|
||||
if result["detected"] and result["confidence"] >= 0.6:
|
||||
logger.info(
|
||||
f"ManageSubtitles: auto-detected pattern '{result['detected'].id}' "
|
||||
f"(confidence={result['confidence']:.2f})"
|
||||
)
|
||||
return result["detected"]
|
||||
|
||||
# 4. Fallback — adjacent (safest default)
|
||||
logger.info("ManageSubtitles: falling back to 'adjacent' pattern")
|
||||
return kb.pattern("adjacent")
|
||||
|
||||
|
||||
def _to_unresolved_dto(track: SubtitleTrack, min_confidence: float = 0.7) -> UnresolvedTrack:
|
||||
reason = "unknown_language" if track.language is None else "low_confidence"
|
||||
return UnresolvedTrack(
|
||||
raw_tokens=track.raw_tokens,
|
||||
file_path=str(track.file_path) if track.file_path else None,
|
||||
file_size_kb=track.file_size_kb,
|
||||
reason=reason,
|
||||
)
|
||||
|
||||
|
||||
def _pair_placed_with_tracks(
|
||||
placed: list[PlacedTrack],
|
||||
tracks: list[SubtitleTrack],
|
||||
) -> list[tuple[PlacedTrack, SubtitleTrack]]:
|
||||
"""
|
||||
Pair each PlacedTrack with its originating SubtitleTrack by source path.
|
||||
Falls back to positional matching if paths don't align.
|
||||
"""
|
||||
track_by_path = {t.file_path: t for t in tracks if t.file_path}
|
||||
pairs = []
|
||||
for p in placed:
|
||||
track = track_by_path.get(p.source)
|
||||
if track is None and tracks:
|
||||
track = tracks[0] # positional fallback
|
||||
if track:
|
||||
pairs.append((p, track))
|
||||
return pairs
|
||||
@@ -0,0 +1,43 @@
|
||||
"""Move media use case."""
|
||||
|
||||
import logging
|
||||
|
||||
from alfred.infrastructure.filesystem import FileManager
|
||||
|
||||
from .dto import MoveMediaResponse
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class MoveMediaUseCase:
|
||||
"""Use case for moving a media file to a destination (copy + delete source)."""
|
||||
|
||||
def __init__(self, file_manager: FileManager):
|
||||
self.file_manager = file_manager
|
||||
|
||||
def execute(self, source: str, destination: str) -> MoveMediaResponse:
|
||||
"""
|
||||
Move a media file from source to destination.
|
||||
|
||||
Args:
|
||||
source: Absolute path to the source file.
|
||||
destination: Absolute path to the destination file.
|
||||
|
||||
Returns:
|
||||
MoveMediaResponse with success or error information.
|
||||
"""
|
||||
result = self.file_manager.move_file(source, destination)
|
||||
|
||||
if result.get("status") == "ok":
|
||||
return MoveMediaResponse(
|
||||
status="ok",
|
||||
source=result.get("source"),
|
||||
destination=result.get("destination"),
|
||||
filename=result.get("filename"),
|
||||
size=result.get("size"),
|
||||
)
|
||||
return MoveMediaResponse(
|
||||
status="error",
|
||||
error=result.get("error"),
|
||||
message=result.get("message"),
|
||||
)
|
||||
@@ -0,0 +1,246 @@
|
||||
"""
|
||||
ResolveDestinationUseCase — compute the library destination path for a release.
|
||||
|
||||
Steps:
|
||||
1. Parse the release name
|
||||
2. Look up TMDB for title + year (+ episode title if single episode)
|
||||
3. Scan the library for an existing series folder
|
||||
4. Apply group-conflict rules
|
||||
5. Return the computed paths (or needs_clarification if ambiguous)
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import re
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
|
||||
from alfred.domain.media.release_parser import ParsedRelease, parse_release
|
||||
from alfred.infrastructure.persistence import get_memory
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Characters forbidden on Windows filesystems (served via NFS)
|
||||
_WIN_FORBIDDEN = re.compile(r'[?:*"<>|\\]')
|
||||
|
||||
|
||||
def _sanitise(text: str) -> str:
|
||||
return _WIN_FORBIDDEN.sub("", text)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# DTOs
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@dataclass
|
||||
class ResolvedDestination:
|
||||
"""All computed paths for a release, ready to hand to move_media."""
|
||||
|
||||
status: str # "ok" | "needs_clarification" | "error"
|
||||
|
||||
# Populated on "ok"
|
||||
library_file: str | None = None # absolute path of the destination video file
|
||||
series_folder: str | None = None # absolute path of the series root folder
|
||||
season_folder: str | None = None # absolute path of the season subfolder
|
||||
series_folder_name: str | None = None # just the folder name (for display)
|
||||
season_folder_name: str | None = None
|
||||
filename: str | None = None
|
||||
is_new_series_folder: bool = False # True if we're creating the folder
|
||||
|
||||
# Populated on "needs_clarification"
|
||||
question: str | None = None
|
||||
options: list[str] | None = None # existing group folder names to pick from
|
||||
|
||||
# Populated on "error"
|
||||
error: str | None = None
|
||||
message: str | None = None
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
if self.status == "error":
|
||||
return {"status": self.status, "error": self.error, "message": self.message}
|
||||
if self.status == "needs_clarification":
|
||||
return {
|
||||
"status": self.status,
|
||||
"question": self.question,
|
||||
"options": self.options or [],
|
||||
}
|
||||
return {
|
||||
"status": self.status,
|
||||
"library_file": self.library_file,
|
||||
"series_folder": self.series_folder,
|
||||
"season_folder": self.season_folder,
|
||||
"series_folder_name": self.series_folder_name,
|
||||
"season_folder_name": self.season_folder_name,
|
||||
"filename": self.filename,
|
||||
"is_new_series_folder": self.is_new_series_folder,
|
||||
}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Use case
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class ResolveDestinationUseCase:
|
||||
"""
|
||||
Compute the full destination path for a media file being organised.
|
||||
|
||||
The caller provides:
|
||||
- release_name: the raw release folder/file name
|
||||
- source_file: path to the actual video file (to get extension)
|
||||
- tmdb_title: canonical title from TMDB
|
||||
- tmdb_year: release year from TMDB
|
||||
- tmdb_episode_title: episode title from TMDB (None for movies / season packs)
|
||||
- confirmed_folder: if the user already answered needs_clarification, pass
|
||||
the chosen folder name here to skip the check
|
||||
|
||||
Returns a ResolvedDestination.
|
||||
"""
|
||||
|
||||
def execute(
|
||||
self,
|
||||
release_name: str,
|
||||
source_file: str,
|
||||
tmdb_title: str,
|
||||
tmdb_year: int,
|
||||
tmdb_episode_title: str | None = None,
|
||||
confirmed_folder: str | None = None,
|
||||
) -> ResolvedDestination:
|
||||
parsed = parse_release(release_name)
|
||||
ext = Path(source_file).suffix # ".mkv"
|
||||
|
||||
if parsed.is_movie:
|
||||
return self._resolve_movie(parsed, tmdb_title, tmdb_year, ext)
|
||||
return self._resolve_tvshow(
|
||||
parsed, tmdb_title, tmdb_year, tmdb_episode_title, ext, confirmed_folder
|
||||
)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Movie
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def _resolve_movie(
|
||||
self, parsed: ParsedRelease, tmdb_title: str, tmdb_year: int, ext: str
|
||||
) -> ResolvedDestination:
|
||||
memory = get_memory()
|
||||
movies_root = memory.ltm.library_paths.get("movie")
|
||||
if not movies_root:
|
||||
return ResolvedDestination(
|
||||
status="error",
|
||||
error="library_not_set",
|
||||
message="Movie library path is not configured.",
|
||||
)
|
||||
|
||||
folder_name = _sanitise(parsed.movie_folder_name(tmdb_title, tmdb_year))
|
||||
filename = _sanitise(parsed.movie_filename(tmdb_title, tmdb_year, ext))
|
||||
|
||||
folder_path = Path(movies_root) / folder_name
|
||||
file_path = folder_path / filename
|
||||
|
||||
return ResolvedDestination(
|
||||
status="ok",
|
||||
library_file=str(file_path),
|
||||
series_folder=str(folder_path),
|
||||
series_folder_name=folder_name,
|
||||
filename=filename,
|
||||
is_new_series_folder=not folder_path.exists(),
|
||||
)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# TV show
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def _resolve_tvshow(
|
||||
self,
|
||||
parsed: ParsedRelease,
|
||||
tmdb_title: str,
|
||||
tmdb_year: int,
|
||||
tmdb_episode_title: str | None,
|
||||
ext: str,
|
||||
confirmed_folder: str | None,
|
||||
) -> ResolvedDestination:
|
||||
memory = get_memory()
|
||||
tv_root = memory.ltm.library_paths.get("tv_show")
|
||||
if not tv_root:
|
||||
return ResolvedDestination(
|
||||
status="error",
|
||||
error="library_not_set",
|
||||
message="TV show library path is not configured.",
|
||||
)
|
||||
|
||||
tv_root_path = Path(tv_root)
|
||||
|
||||
# --- Find existing series folders for this title ---
|
||||
existing = _find_existing_series_folders(tv_root_path, tmdb_title, tmdb_year)
|
||||
|
||||
# --- Determine series folder name ---
|
||||
if confirmed_folder:
|
||||
series_folder_name = confirmed_folder
|
||||
is_new = not (tv_root_path / confirmed_folder).exists()
|
||||
elif len(existing) == 0:
|
||||
# No existing folder — create with release group
|
||||
series_folder_name = _sanitise(parsed.show_folder_name(tmdb_title, tmdb_year))
|
||||
is_new = True
|
||||
elif len(existing) == 1:
|
||||
# Exactly one match — use it regardless of group
|
||||
series_folder_name = existing[0]
|
||||
is_new = False
|
||||
else:
|
||||
# Multiple folders — ask user
|
||||
return ResolvedDestination(
|
||||
status="needs_clarification",
|
||||
question=(
|
||||
f"Multiple folders found for '{tmdb_title}' in your library. "
|
||||
f"Which one should I use for this release ({parsed.group})?"
|
||||
),
|
||||
options=existing,
|
||||
)
|
||||
|
||||
# --- Build paths ---
|
||||
season_folder_name = parsed.season_folder_name()
|
||||
filename = _sanitise(
|
||||
parsed.episode_filename(tmdb_episode_title, ext)
|
||||
if not parsed.is_season_pack
|
||||
else parsed.season_folder_name() + ext
|
||||
)
|
||||
|
||||
series_path = tv_root_path / series_folder_name
|
||||
season_path = series_path / season_folder_name
|
||||
file_path = season_path / filename
|
||||
|
||||
return ResolvedDestination(
|
||||
status="ok",
|
||||
library_file=str(file_path),
|
||||
series_folder=str(series_path),
|
||||
season_folder=str(season_path),
|
||||
series_folder_name=series_folder_name,
|
||||
season_folder_name=season_folder_name,
|
||||
filename=filename,
|
||||
is_new_series_folder=is_new,
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _find_existing_series_folders(tv_root: Path, tmdb_title: str, tmdb_year: int) -> list[str]:
|
||||
"""
|
||||
Return names of folders in tv_root that match the given title + year.
|
||||
|
||||
Matching is loose: normalised title (dots, no special chars) + year must
|
||||
appear at the start of the folder name.
|
||||
"""
|
||||
if not tv_root.exists():
|
||||
return []
|
||||
|
||||
# Build a normalised prefix to match against: "Oz.1997"
|
||||
clean_title = _sanitise(tmdb_title).replace(" ", ".")
|
||||
prefix = f"{clean_title}.{tmdb_year}".lower()
|
||||
|
||||
matches = []
|
||||
for entry in tv_root.iterdir():
|
||||
if entry.is_dir() and entry.name.lower().startswith(prefix):
|
||||
matches.append(entry.name)
|
||||
|
||||
return sorted(matches)
|
||||
@@ -0,0 +1,5 @@
|
||||
"""Media domain — shared naming and release parsing."""
|
||||
|
||||
from .release_parser import ParsedRelease, parse_release
|
||||
|
||||
__all__ = ["ParsedRelease", "parse_release"]
|
||||
@@ -0,0 +1,306 @@
|
||||
"""
|
||||
release_parser.py — Parse a release name into structured components.
|
||||
|
||||
Handles both dot-separated and space-separated release names:
|
||||
Oz.S03.1080p.WEBRip.x265-KONTRAST
|
||||
Oz S03 1080p WEBRip x265-KONTRAST
|
||||
Inception.2010.1080p.BluRay.x265-GROUP
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from dataclasses import dataclass, field
|
||||
|
||||
# Known quality tokens
|
||||
_QUALITIES = {"2160p", "1080p", "720p", "480p", "576p", "4k", "8k"}
|
||||
|
||||
# Known source tokens (case-insensitive match)
|
||||
_SOURCES = {
|
||||
"bluray", "blu-ray", "bdrip", "brrip",
|
||||
"webrip", "web-rip", "webdl", "web-dl", "web",
|
||||
"hdtv", "hdrip", "dvdrip", "dvd", "vodrip",
|
||||
"amzn", "nf", "dsnp", "hmax", "atvp",
|
||||
}
|
||||
|
||||
# Known codec tokens
|
||||
_CODECS = {
|
||||
"x264", "x265", "h264", "h265", "hevc", "avc",
|
||||
"xvid", "divx", "av1", "vp9",
|
||||
"h.264", "h.265",
|
||||
}
|
||||
|
||||
# Windows-forbidden characters (we strip these from display names)
|
||||
_WIN_FORBIDDEN = re.compile(r'[?:*"<>|\\]')
|
||||
|
||||
# Episode/season pattern: S01, S01E02, S01E02E03, 1x02, etc.
|
||||
_SEASON_EP_RE = re.compile(
|
||||
r"S(\d{1,2})(?:E(\d{2})(?:E(\d{2}))?)?",
|
||||
re.IGNORECASE,
|
||||
)
|
||||
|
||||
# Year pattern
|
||||
_YEAR_RE = re.compile(r"\b(19\d{2}|20\d{2})\b")
|
||||
|
||||
|
||||
@dataclass
|
||||
class ParsedRelease:
|
||||
"""Structured representation of a parsed release name."""
|
||||
|
||||
raw: str # original release name (untouched)
|
||||
normalised: str # dots instead of spaces
|
||||
title: str # show/movie title (dots, no year/season/tech)
|
||||
year: int | None # movie year or show start year (from TMDB)
|
||||
season: int | None # season number (None for movies)
|
||||
episode: int | None # first episode number (None if season-pack)
|
||||
episode_end: int | None # last episode for multi-ep (None otherwise)
|
||||
quality: str | None # 1080p, 2160p, …
|
||||
source: str | None # WEBRip, BluRay, …
|
||||
codec: str | None # x265, HEVC, …
|
||||
group: str # release group, "UNKNOWN" if missing
|
||||
tech_string: str # quality.source.codec joined with dots
|
||||
|
||||
# -------------------------------------------------------------------------
|
||||
# Derived helpers
|
||||
# -------------------------------------------------------------------------
|
||||
|
||||
@property
|
||||
def is_movie(self) -> bool:
|
||||
return self.season is None
|
||||
|
||||
@property
|
||||
def is_season_pack(self) -> bool:
|
||||
return self.season is not None and self.episode is None
|
||||
|
||||
def show_folder_name(self, tmdb_title: str, tmdb_year: int) -> str:
|
||||
"""
|
||||
Build the series root folder name.
|
||||
|
||||
Format: {Title}.{Year}.{Tech}-{Group}
|
||||
Example: Oz.1997.1080p.WEBRip.x265-KONTRAST
|
||||
"""
|
||||
title_part = _sanitise_for_fs(tmdb_title).replace(" ", ".")
|
||||
tech = self.tech_string or "Unknown"
|
||||
return f"{title_part}.{tmdb_year}.{tech}-{self.group}"
|
||||
|
||||
def season_folder_name(self) -> str:
|
||||
"""
|
||||
Build the season subfolder name = normalised release name (no episode).
|
||||
|
||||
Example: Oz.S03.1080p.WEBRip.x265-KONTRAST
|
||||
For a single-episode release we still strip the episode token so the
|
||||
folder can hold the whole season.
|
||||
"""
|
||||
return _strip_episode_from_normalised(self.normalised)
|
||||
|
||||
def episode_filename(self, tmdb_episode_title: str | None, ext: str) -> str:
|
||||
"""
|
||||
Build the episode filename.
|
||||
|
||||
Format: {Title}.{SxxExx}.{EpisodeTitle}.{Tech}-{Group}.{ext}
|
||||
Example: Oz.S01E01.The.Routine.1080p.WEBRip.x265-KONTRAST.mkv
|
||||
|
||||
If tmdb_episode_title is None, omits the episode title segment.
|
||||
"""
|
||||
title_part = _sanitise_for_fs(self.title) # already dotted from normalised
|
||||
s = f"S{self.season:02d}" if self.season is not None else ""
|
||||
e = f"E{self.episode:02d}" if self.episode is not None else ""
|
||||
se = s + e
|
||||
|
||||
ep_title = ""
|
||||
if tmdb_episode_title:
|
||||
ep_title = "." + _sanitise_for_fs(tmdb_episode_title).replace(" ", ".")
|
||||
|
||||
tech = self.tech_string or "Unknown"
|
||||
ext_clean = ext.lstrip(".")
|
||||
return f"{title_part}.{se}{ep_title}.{tech}-{self.group}.{ext_clean}"
|
||||
|
||||
def movie_folder_name(self, tmdb_title: str, tmdb_year: int) -> str:
|
||||
"""
|
||||
Build the movie folder name.
|
||||
|
||||
Format: {Title}.{Year}.{Tech}-{Group}
|
||||
Example: Inception.2010.1080p.BluRay.x265-GROUP
|
||||
"""
|
||||
return self.show_folder_name(tmdb_title, tmdb_year)
|
||||
|
||||
def movie_filename(self, tmdb_title: str, tmdb_year: int, ext: str) -> str:
|
||||
"""
|
||||
Build the movie filename (same as folder name + extension).
|
||||
|
||||
Example: Inception.2010.1080p.BluRay.x265-GROUP.mkv
|
||||
"""
|
||||
ext_clean = ext.lstrip(".")
|
||||
return f"{self.movie_folder_name(tmdb_title, tmdb_year)}.{ext_clean}"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Public API
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def parse_release(name: str) -> ParsedRelease:
|
||||
"""
|
||||
Parse a release name and return a ParsedRelease.
|
||||
|
||||
Accepts both dot-separated and space-separated names.
|
||||
"""
|
||||
normalised = _normalise(name)
|
||||
tokens = normalised.split(".")
|
||||
|
||||
season, episode, episode_end = _extract_season_episode(tokens)
|
||||
quality, source, codec, group, tech_tokens = _extract_tech(tokens)
|
||||
title = _extract_title(tokens, season, episode, tech_tokens)
|
||||
year = _extract_year(tokens, title)
|
||||
|
||||
tech_parts = [p for p in [quality, source, codec] if p]
|
||||
tech_string = ".".join(tech_parts)
|
||||
|
||||
return ParsedRelease(
|
||||
raw=name,
|
||||
normalised=normalised,
|
||||
title=title,
|
||||
year=year,
|
||||
season=season,
|
||||
episode=episode,
|
||||
episode_end=episode_end,
|
||||
quality=quality,
|
||||
source=source,
|
||||
codec=codec,
|
||||
group=group,
|
||||
tech_string=tech_string,
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Internal helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _normalise(name: str) -> str:
|
||||
"""Replace spaces with dots, collapse multiple dots."""
|
||||
s = name.replace(" ", ".")
|
||||
s = re.sub(r"\.{2,}", ".", s)
|
||||
return s.strip(".")
|
||||
|
||||
|
||||
def _sanitise_for_fs(text: str) -> str:
|
||||
"""Remove Windows-forbidden characters from a string."""
|
||||
return _WIN_FORBIDDEN.sub("", text)
|
||||
|
||||
|
||||
def _extract_season_episode(tokens: list[str]) -> tuple[int | None, int | None, int | None]:
|
||||
joined = ".".join(tokens)
|
||||
m = _SEASON_EP_RE.search(joined)
|
||||
if not m:
|
||||
return None, None, None
|
||||
season = int(m.group(1))
|
||||
episode = int(m.group(2)) if m.group(2) else None
|
||||
episode_end = int(m.group(3)) if m.group(3) else None
|
||||
return season, episode, episode_end
|
||||
|
||||
|
||||
def _extract_tech(
|
||||
tokens: list[str],
|
||||
) -> tuple[str | None, str | None, str | None, str, set[str]]:
|
||||
"""
|
||||
Extract quality, source, codec, group from tokens.
|
||||
|
||||
Returns (quality, source, codec, group, tech_token_set).
|
||||
|
||||
Group extraction strategy (in priority order):
|
||||
1. Token where prefix is a known codec: x265-GROUP
|
||||
2. Last token in the list that contains a dash (fallback for 10bit-GROUP, AAC5.1-GROUP, etc.)
|
||||
"""
|
||||
quality: str | None = None
|
||||
source: str | None = None
|
||||
codec: str | None = None
|
||||
group = "UNKNOWN"
|
||||
tech_tokens: set[str] = set()
|
||||
|
||||
for tok in tokens:
|
||||
tl = tok.lower()
|
||||
|
||||
if tl in _QUALITIES:
|
||||
quality = tok
|
||||
tech_tokens.add(tok)
|
||||
continue
|
||||
|
||||
if tl in _SOURCES:
|
||||
source = tok
|
||||
tech_tokens.add(tok)
|
||||
continue
|
||||
|
||||
if "-" in tok:
|
||||
parts = tok.rsplit("-", 1)
|
||||
# codec-GROUP (highest priority for group)
|
||||
if parts[0].lower() in _CODECS:
|
||||
codec = parts[0]
|
||||
group = parts[1] if parts[1] else "UNKNOWN"
|
||||
tech_tokens.add(tok)
|
||||
continue
|
||||
# source with dash: Web-DL, WEB-DL, etc.
|
||||
if parts[0].lower() in _SOURCES or tok.lower().replace("-", "") in _SOURCES:
|
||||
source = tok
|
||||
tech_tokens.add(tok)
|
||||
continue
|
||||
|
||||
if tl in _CODECS:
|
||||
codec = tok
|
||||
tech_tokens.add(tok)
|
||||
|
||||
# Fallback: if group still UNKNOWN, use the rightmost token with a dash
|
||||
# that isn't a known source (handles "10bit-Protozoan", "AAC5.1-YTS", etc.)
|
||||
if group == "UNKNOWN":
|
||||
for tok in reversed(tokens):
|
||||
if "-" in tok:
|
||||
parts = tok.rsplit("-", 1)
|
||||
tl = tok.lower()
|
||||
if tl in _SOURCES or tok.lower().replace("-", "") in _SOURCES:
|
||||
continue
|
||||
if parts[1]: # non-empty group part
|
||||
group = parts[1]
|
||||
break
|
||||
|
||||
return quality, source, codec, group, tech_tokens
|
||||
|
||||
|
||||
def _extract_title(tokens: list[str], season: int | None, episode: int | None, tech_tokens: set[str]) -> str:
|
||||
"""
|
||||
Extract the title portion: everything before the first season/year/tech token.
|
||||
"""
|
||||
title_parts = []
|
||||
for tok in tokens:
|
||||
# Stop at season token
|
||||
if _SEASON_EP_RE.match(tok):
|
||||
break
|
||||
# Stop at year
|
||||
if _YEAR_RE.fullmatch(tok):
|
||||
break
|
||||
# Stop at tech tokens
|
||||
if tok in tech_tokens or tok.lower() in _QUALITIES | _SOURCES | _CODECS:
|
||||
break
|
||||
# Stop if token contains a dash (likely codec-GROUP)
|
||||
if "-" in tok and any(p.lower() in _CODECS | _SOURCES for p in tok.split("-")):
|
||||
break
|
||||
title_parts.append(tok)
|
||||
|
||||
return ".".join(title_parts) if title_parts else tokens[0]
|
||||
|
||||
|
||||
def _extract_year(tokens: list[str], title: str) -> int | None:
|
||||
"""Extract a 4-digit year from tokens (only after the title)."""
|
||||
title_len = len(title.split("."))
|
||||
for tok in tokens[title_len:]:
|
||||
m = _YEAR_RE.fullmatch(tok)
|
||||
if m:
|
||||
return int(m.group(1))
|
||||
return None
|
||||
|
||||
|
||||
def _strip_episode_from_normalised(normalised: str) -> str:
|
||||
"""
|
||||
Remove all episode parts (Exx) from a normalised release name, keeping Sxx.
|
||||
|
||||
Oz.S03E01.1080p... → Oz.S03.1080p...
|
||||
Archer.S14E09E10E11.1080p... → Archer.S14.1080p...
|
||||
"""
|
||||
return re.sub(r"(S\d{2})(E\d{2})+", r"\1", normalised, flags=re.IGNORECASE)
|
||||
@@ -1,14 +1,37 @@
|
||||
"""Subtitles domain - Business logic for subtitle management (shared across movies and TV shows)."""
|
||||
"""Subtitles domain — subtitle identification, classification and placement."""
|
||||
|
||||
from .entities import Subtitle
|
||||
from .aggregates import SubtitleRuleSet
|
||||
from .entities import MediaSubtitleMetadata, SubtitleTrack
|
||||
from .exceptions import SubtitleNotFound
|
||||
from .services import SubtitleService
|
||||
from .value_objects import Language, SubtitleFormat
|
||||
from .knowledge import KnowledgeLoader, SubtitleKnowledgeBase
|
||||
from .services import PatternDetector, SubtitleIdentifier, SubtitleMatcher
|
||||
from .value_objects import (
|
||||
RuleScope,
|
||||
ScanStrategy,
|
||||
SubtitleFormat,
|
||||
SubtitleLanguage,
|
||||
SubtitleMatchingRules,
|
||||
SubtitlePattern,
|
||||
SubtitleType,
|
||||
TypeDetectionMethod,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
"Subtitle",
|
||||
"Language",
|
||||
"SubtitleTrack",
|
||||
"MediaSubtitleMetadata",
|
||||
"SubtitleRuleSet",
|
||||
"SubtitleKnowledgeBase",
|
||||
"KnowledgeLoader",
|
||||
"SubtitleIdentifier",
|
||||
"SubtitleMatcher",
|
||||
"PatternDetector",
|
||||
"SubtitleFormat",
|
||||
"SubtitleLanguage",
|
||||
"SubtitlePattern",
|
||||
"SubtitleType",
|
||||
"ScanStrategy",
|
||||
"TypeDetectionMethod",
|
||||
"SubtitleMatchingRules",
|
||||
"RuleScope",
|
||||
"SubtitleNotFound",
|
||||
"SubtitleService",
|
||||
]
|
||||
|
||||
@@ -0,0 +1,90 @@
|
||||
"""Subtitle domain aggregates."""
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any
|
||||
|
||||
from ..shared.value_objects import ImdbId
|
||||
from .knowledge.base import SubtitleKnowledgeBase
|
||||
from .value_objects import RuleScope, SubtitleMatchingRules
|
||||
|
||||
|
||||
def DEFAULT_RULES() -> SubtitleMatchingRules:
|
||||
"""Load default matching rules from subtitles.yaml (defaults section)."""
|
||||
return SubtitleKnowledgeBase().default_rules()
|
||||
|
||||
|
||||
@dataclass
|
||||
class SubtitleRuleSet:
|
||||
"""
|
||||
Rules for subtitle selection at a given scope level, with inheritance.
|
||||
|
||||
Only delta fields are stored — None means "inherit from parent".
|
||||
Resolution order: global → release_group → show/movie → season → episode.
|
||||
|
||||
A RuleSet can also be pinned to a specific media item (imdb_id),
|
||||
bypassing the scope hierarchy for that item.
|
||||
"""
|
||||
|
||||
scope: RuleScope
|
||||
parent: "SubtitleRuleSet | None" = None
|
||||
pinned_to: ImdbId | None = None
|
||||
|
||||
# Deltas — None = inherit
|
||||
_languages: list[str] | None = field(default=None, repr=False)
|
||||
_formats: list[str] | None = field(default=None, repr=False)
|
||||
_types: list[str] | None = field(default=None, repr=False)
|
||||
_format_priority: list[str] | None = field(default=None, repr=False)
|
||||
_min_confidence: float | None = field(default=None, repr=False)
|
||||
|
||||
def resolve(self) -> SubtitleMatchingRules:
|
||||
"""
|
||||
Walk the parent chain and merge deltas into effective rules.
|
||||
Falls back to DEFAULT_RULES at the top of the chain.
|
||||
"""
|
||||
base = self.parent.resolve() if self.parent else DEFAULT_RULES()
|
||||
return SubtitleMatchingRules(
|
||||
preferred_languages=self._languages or base.preferred_languages,
|
||||
preferred_formats=self._formats or base.preferred_formats,
|
||||
allowed_types=self._types or base.allowed_types,
|
||||
format_priority=self._format_priority or base.format_priority,
|
||||
min_confidence=self._min_confidence if self._min_confidence is not None else base.min_confidence,
|
||||
)
|
||||
|
||||
def override(
|
||||
self,
|
||||
languages: list[str] | None = None,
|
||||
formats: list[str] | None = None,
|
||||
types: list[str] | None = None,
|
||||
format_priority: list[str] | None = None,
|
||||
min_confidence: float | None = None,
|
||||
) -> None:
|
||||
"""Set delta overrides at this scope level."""
|
||||
if languages is not None:
|
||||
self._languages = languages
|
||||
if formats is not None:
|
||||
self._formats = formats
|
||||
if types is not None:
|
||||
self._types = types
|
||||
if format_priority is not None:
|
||||
self._format_priority = format_priority
|
||||
if min_confidence is not None:
|
||||
self._min_confidence = min_confidence
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
"""Serialize deltas only (for persistence in rules.yaml)."""
|
||||
delta: dict[str, Any] = {}
|
||||
if self._languages is not None:
|
||||
delta["languages"] = self._languages
|
||||
if self._formats is not None:
|
||||
delta["formats"] = self._formats
|
||||
if self._types is not None:
|
||||
delta["types"] = self._types
|
||||
if self._format_priority is not None:
|
||||
delta["format_priority"] = self._format_priority
|
||||
if self._min_confidence is not None:
|
||||
delta["min_confidence"] = self._min_confidence
|
||||
return {"scope": {"level": self.scope.level, "identifier": self.scope.identifier}, "override": delta}
|
||||
|
||||
@classmethod
|
||||
def global_default(cls) -> "SubtitleRuleSet":
|
||||
return cls(scope=RuleScope(level="global"))
|
||||
@@ -1,96 +1,87 @@
|
||||
"""Subtitle domain entities."""
|
||||
|
||||
from dataclasses import dataclass
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
|
||||
from ..shared.value_objects import FilePath, ImdbId
|
||||
from .value_objects import Language, SubtitleFormat, TimingOffset
|
||||
from ..shared.value_objects import ImdbId
|
||||
from .value_objects import SubtitleFormat, SubtitleLanguage, SubtitleMatchingRules, SubtitleType
|
||||
|
||||
|
||||
@dataclass
|
||||
class Subtitle:
|
||||
class SubtitleTrack:
|
||||
"""
|
||||
Subtitle entity representing a subtitle file.
|
||||
A single subtitle track — either an external file or an embedded stream.
|
||||
|
||||
Can be associated with either a movie or a TV show episode.
|
||||
State can evolve: unknown → resolved after user clarification.
|
||||
confidence reflects how certain we are about language + type classification.
|
||||
"""
|
||||
|
||||
media_imdb_id: ImdbId
|
||||
language: Language
|
||||
format: SubtitleFormat
|
||||
file_path: FilePath
|
||||
# Classification (may be None if not yet resolved)
|
||||
language: SubtitleLanguage | None
|
||||
format: SubtitleFormat | None
|
||||
subtitle_type: SubtitleType = SubtitleType.UNKNOWN
|
||||
|
||||
# Optional: for TV shows
|
||||
season_number: int | None = None
|
||||
episode_number: int | None = None
|
||||
# Source
|
||||
is_embedded: bool = False
|
||||
file_path: Path | None = None # None if embedded
|
||||
file_size_kb: float | None = None
|
||||
entry_count: int | None = None # number of subtitle cues in the file
|
||||
|
||||
# Subtitle metadata
|
||||
timing_offset: TimingOffset = TimingOffset(0)
|
||||
hearing_impaired: bool = False
|
||||
forced: bool = False # Forced subtitles (for foreign language parts)
|
||||
# Matching state
|
||||
confidence: float = 0.0 # 0.0 → 1.0, not applicable for embedded
|
||||
raw_tokens: list[str] = field(default_factory=list) # tokens extracted from filename
|
||||
|
||||
# Source information
|
||||
source: str | None = None # e.g., "OpenSubtitles", "Subscene"
|
||||
uploader: str | None = None
|
||||
download_count: int | None = None
|
||||
rating: float | None = None
|
||||
def is_resolved(self) -> bool:
|
||||
return self.language is not None
|
||||
|
||||
def __post_init__(self):
|
||||
"""Validate subtitle entity."""
|
||||
# Ensure ImdbId is actually an ImdbId instance
|
||||
if not isinstance(self.media_imdb_id, ImdbId):
|
||||
if isinstance(self.media_imdb_id, str):
|
||||
object.__setattr__(self, "media_imdb_id", ImdbId(self.media_imdb_id))
|
||||
|
||||
# Ensure Language is actually a Language instance
|
||||
if not isinstance(self.language, Language):
|
||||
if isinstance(self.language, str):
|
||||
object.__setattr__(self, "language", Language.from_code(self.language))
|
||||
|
||||
# Ensure SubtitleFormat is actually a SubtitleFormat instance
|
||||
if not isinstance(self.format, SubtitleFormat):
|
||||
if isinstance(self.format, str):
|
||||
object.__setattr__(
|
||||
self, "format", SubtitleFormat.from_extension(self.format)
|
||||
)
|
||||
|
||||
# Ensure FilePath is actually a FilePath instance
|
||||
if not isinstance(self.file_path, FilePath):
|
||||
object.__setattr__(self, "file_path", FilePath(self.file_path))
|
||||
|
||||
def is_for_movie(self) -> bool:
|
||||
"""Check if this subtitle is for a movie."""
|
||||
return self.season_number is None and self.episode_number is None
|
||||
|
||||
def is_for_episode(self) -> bool:
|
||||
"""Check if this subtitle is for a TV show episode."""
|
||||
return self.season_number is not None and self.episode_number is not None
|
||||
|
||||
def get_filename(self) -> str:
|
||||
@property
|
||||
def destination_name(self) -> str:
|
||||
"""
|
||||
Get the suggested filename for this subtitle.
|
||||
|
||||
Format for movies: "Movie.Title.{lang}.{format}"
|
||||
Format for episodes: "S01E05.{lang}.{format}"
|
||||
Compute the output filename per naming convention:
|
||||
{lang}.{ext}
|
||||
{lang}.sdh.{ext}
|
||||
{lang}.forced.{ext}
|
||||
"""
|
||||
if self.is_for_episode():
|
||||
base = f"S{self.season_number:02d}E{self.episode_number:02d}"
|
||||
else:
|
||||
# For movies, use the file path stem
|
||||
base = self.file_path.value.stem
|
||||
|
||||
parts = [base, self.language.value]
|
||||
|
||||
if self.hearing_impaired:
|
||||
parts.append("hi")
|
||||
if self.forced:
|
||||
if not self.language or not self.format:
|
||||
raise ValueError("Cannot compute destination_name: language or format missing")
|
||||
ext = self.format.extensions[0].lstrip(".")
|
||||
parts = [self.language.code]
|
||||
if self.subtitle_type == SubtitleType.SDH:
|
||||
parts.append("sdh")
|
||||
elif self.subtitle_type == SubtitleType.FORCED:
|
||||
parts.append("forced")
|
||||
|
||||
return f"{'.'.join(parts)}.{self.format.value}"
|
||||
|
||||
def __str__(self) -> str:
|
||||
if self.is_for_episode():
|
||||
return f"Subtitle S{self.season_number:02d}E{self.episode_number:02d} ({self.language.value})"
|
||||
return f"Subtitle ({self.language.value})"
|
||||
return ".".join(parts) + "." + ext
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return f"Subtitle(media={self.media_imdb_id}, lang={self.language.value})"
|
||||
lang = self.language.code if self.language else "?"
|
||||
fmt = self.format.id if self.format else "?"
|
||||
src = "embedded" if self.is_embedded else str(self.file_path.name if self.file_path else "?")
|
||||
return f"SubtitleTrack({lang}, {self.subtitle_type.value}, {fmt}, src={src}, conf={self.confidence:.2f})"
|
||||
|
||||
|
||||
@dataclass
|
||||
class MediaSubtitleMetadata:
|
||||
"""
|
||||
Snapshot of all subtitle information known for a given media item.
|
||||
Populated by the identifier service (ffprobe + filesystem scan).
|
||||
"""
|
||||
|
||||
media_id: ImdbId | None
|
||||
media_type: str # "movie" | "tv_show"
|
||||
embedded_tracks: list[SubtitleTrack] = field(default_factory=list)
|
||||
external_tracks: list[SubtitleTrack] = field(default_factory=list)
|
||||
release_group: str | None = None
|
||||
detected_pattern_id: str | None = None # pattern id from knowledge base
|
||||
pattern_confirmed: bool = False
|
||||
|
||||
@property
|
||||
def all_tracks(self) -> list[SubtitleTrack]:
|
||||
return self.embedded_tracks + self.external_tracks
|
||||
|
||||
@property
|
||||
def total_count(self) -> int:
|
||||
return len(self.embedded_tracks) + len(self.external_tracks)
|
||||
|
||||
@property
|
||||
def unresolved_tracks(self) -> list[SubtitleTrack]:
|
||||
return [t for t in self.external_tracks if t.language is None]
|
||||
|
||||
@@ -0,0 +1,4 @@
|
||||
from .base import SubtitleKnowledgeBase
|
||||
from .loader import KnowledgeLoader
|
||||
|
||||
__all__ = ["SubtitleKnowledgeBase", "KnowledgeLoader"]
|
||||
@@ -0,0 +1,151 @@
|
||||
"""SubtitleKnowledgeBase — parsed, typed view of the loaded knowledge."""
|
||||
|
||||
import logging
|
||||
from functools import cached_property
|
||||
|
||||
from ..value_objects import (
|
||||
ScanStrategy,
|
||||
SubtitleFormat,
|
||||
SubtitleLanguage,
|
||||
SubtitleMatchingRules,
|
||||
SubtitlePattern,
|
||||
SubtitleType,
|
||||
TypeDetectionMethod,
|
||||
)
|
||||
from .loader import KnowledgeLoader
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class SubtitleKnowledgeBase:
|
||||
"""
|
||||
Typed access to subtitle knowledge (formats, types, languages, patterns).
|
||||
|
||||
Built from KnowledgeLoader — call kb.reload() to pick up newly learned entries
|
||||
without restarting.
|
||||
"""
|
||||
|
||||
def __init__(self, loader: KnowledgeLoader | None = None):
|
||||
self._loader = loader or KnowledgeLoader()
|
||||
self._build()
|
||||
|
||||
def _build(self) -> None:
|
||||
data = self._loader.subtitles()
|
||||
|
||||
self._formats: dict[str, SubtitleFormat] = {}
|
||||
for fid, fdata in data.get("formats", {}).items():
|
||||
self._formats[fid] = SubtitleFormat(
|
||||
id=fid,
|
||||
extensions=fdata.get("extensions", []),
|
||||
description=fdata.get("description", ""),
|
||||
)
|
||||
|
||||
self._languages: dict[str, SubtitleLanguage] = {}
|
||||
for code, ldata in data.get("languages", {}).items():
|
||||
self._languages[code] = SubtitleLanguage(
|
||||
code=code,
|
||||
tokens=ldata.get("tokens", []),
|
||||
)
|
||||
|
||||
# Build reverse token → language code map
|
||||
self._lang_token_map: dict[str, str] = {}
|
||||
for code, lang in self._languages.items():
|
||||
for token in lang.tokens:
|
||||
self._lang_token_map[token.lower()] = code
|
||||
|
||||
# Build reverse token → type map
|
||||
self._type_token_map: dict[str, SubtitleType] = {}
|
||||
for type_id, tdata in data.get("types", {}).items():
|
||||
stype = SubtitleType(type_id)
|
||||
for token in tdata.get("tokens", []):
|
||||
self._type_token_map[token.lower()] = stype
|
||||
|
||||
d = data.get("defaults", {})
|
||||
self._default_rules = SubtitleMatchingRules(
|
||||
preferred_languages=d.get("languages", ["fra", "eng"]),
|
||||
preferred_formats=d.get("formats", ["srt"]),
|
||||
allowed_types=d.get("types", ["standard", "forced"]),
|
||||
format_priority=d.get("format_priority", ["srt", "ass"]),
|
||||
min_confidence=d.get("min_confidence", 0.7),
|
||||
)
|
||||
|
||||
self._patterns: dict[str, SubtitlePattern] = {}
|
||||
for pid, pdata in self._loader.patterns().items():
|
||||
try:
|
||||
self._patterns[pid] = SubtitlePattern(
|
||||
id=pid,
|
||||
description=pdata.get("description", ""),
|
||||
scan_strategy=ScanStrategy(pdata.get("scan_strategy", "adjacent")),
|
||||
root_folder=pdata.get("root_folder"),
|
||||
type_detection=TypeDetectionMethod(
|
||||
pdata.get("type_detection", {}).get("method", "token_in_name")
|
||||
),
|
||||
version=pdata.get("version", "1.0"),
|
||||
)
|
||||
except ValueError as e:
|
||||
logger.warning(f"SubtitleKnowledgeBase: skipping pattern '{pid}': {e}")
|
||||
|
||||
def reload(self) -> None:
|
||||
self._loader = KnowledgeLoader()
|
||||
self._build()
|
||||
logger.info("SubtitleKnowledgeBase: reloaded")
|
||||
|
||||
# --- Defaults ---
|
||||
|
||||
def default_rules(self) -> SubtitleMatchingRules:
|
||||
return self._default_rules
|
||||
|
||||
# --- Formats ---
|
||||
|
||||
def formats(self) -> dict[str, SubtitleFormat]:
|
||||
return self._formats
|
||||
|
||||
def format_for_extension(self, ext: str) -> SubtitleFormat | None:
|
||||
for fmt in self._formats.values():
|
||||
if fmt.matches_extension(ext):
|
||||
return fmt
|
||||
return None
|
||||
|
||||
def known_extensions(self) -> set[str]:
|
||||
exts = set()
|
||||
for fmt in self._formats.values():
|
||||
exts.update(fmt.extensions)
|
||||
return exts
|
||||
|
||||
# --- Languages ---
|
||||
|
||||
def languages(self) -> dict[str, SubtitleLanguage]:
|
||||
return self._languages
|
||||
|
||||
def language_for_token(self, token: str) -> SubtitleLanguage | None:
|
||||
code = self._lang_token_map.get(token.lower())
|
||||
return self._languages.get(code) if code else None
|
||||
|
||||
def is_known_lang_token(self, token: str) -> bool:
|
||||
return token.lower() in self._lang_token_map
|
||||
|
||||
# --- Types ---
|
||||
|
||||
def type_for_token(self, token: str) -> SubtitleType | None:
|
||||
return self._type_token_map.get(token.lower())
|
||||
|
||||
def is_known_type_token(self, token: str) -> bool:
|
||||
return token.lower() in self._type_token_map
|
||||
|
||||
# --- Patterns ---
|
||||
|
||||
def patterns(self) -> dict[str, SubtitlePattern]:
|
||||
return self._patterns
|
||||
|
||||
def pattern(self, pattern_id: str) -> SubtitlePattern | None:
|
||||
return self._patterns.get(pattern_id)
|
||||
|
||||
def patterns_for_group(self, group_name: str) -> list[SubtitlePattern]:
|
||||
group = self._loader.release_group(group_name)
|
||||
if not group:
|
||||
return []
|
||||
return [
|
||||
self._patterns[pid]
|
||||
for pid in group.get("known_patterns", [])
|
||||
if pid in self._patterns
|
||||
]
|
||||
@@ -0,0 +1,131 @@
|
||||
"""KnowledgeLoader — autodiscovers and merges builtin + learned YAML knowledge packs."""
|
||||
|
||||
import logging
|
||||
from pathlib import Path
|
||||
|
||||
import yaml
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
import alfred as _alfred_pkg
|
||||
|
||||
# Builtin knowledge — anchored on the alfred package itself, not on this file's depth
|
||||
_BUILTIN_ROOT = Path(_alfred_pkg.__file__).parent / "knowledge"
|
||||
|
||||
# Learned knowledge — local to this instance, gitignored
|
||||
_LEARNED_ROOT = Path(_alfred_pkg.__file__).parent.parent / "data" / "knowledge"
|
||||
|
||||
|
||||
def _load_yaml(path: Path) -> dict:
|
||||
try:
|
||||
with open(path, encoding="utf-8") as f:
|
||||
return yaml.safe_load(f) or {}
|
||||
except FileNotFoundError:
|
||||
return {}
|
||||
except Exception as e:
|
||||
logger.warning(f"KnowledgeLoader: could not load {path}: {e}")
|
||||
return {}
|
||||
|
||||
|
||||
def _merge(base: dict, override: dict) -> dict:
|
||||
"""
|
||||
Deep merge override into base.
|
||||
Lists are extended (not replaced) — learned tokens are additive.
|
||||
Scalar values in override win over base.
|
||||
"""
|
||||
result = dict(base)
|
||||
for key, val in override.items():
|
||||
if key in result and isinstance(result[key], dict) and isinstance(val, dict):
|
||||
result[key] = _merge(result[key], val)
|
||||
elif key in result and isinstance(result[key], list) and isinstance(val, list):
|
||||
# Extend list, deduplicate, preserve order
|
||||
combined = result[key] + [v for v in val if v not in result[key]]
|
||||
result[key] = combined
|
||||
else:
|
||||
result[key] = val
|
||||
return result
|
||||
|
||||
|
||||
class KnowledgeLoader:
|
||||
"""
|
||||
Loads subtitle knowledge from YAML files.
|
||||
|
||||
Builtin packs live in alfred/knowledge/ (versioned).
|
||||
Learned packs live in data/knowledge/ (gitignored, instance-local).
|
||||
|
||||
Learned entries are merged additively — they can only add tokens/patterns,
|
||||
never remove builtin ones.
|
||||
|
||||
Usage:
|
||||
loader = KnowledgeLoader()
|
||||
subtitles = loader.subtitles() # merged subtitles.yaml
|
||||
patterns = loader.patterns() # all patterns, keyed by id
|
||||
groups = loader.release_groups() # all release groups, keyed by name
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self._cache: dict[str, dict] = {}
|
||||
self._load()
|
||||
|
||||
def _load(self) -> None:
|
||||
# Main subtitles knowledge
|
||||
builtin = _load_yaml(_BUILTIN_ROOT / "subtitles.yaml")
|
||||
learned = _load_yaml(_LEARNED_ROOT / "subtitles_learned.yaml")
|
||||
self._cache["subtitles"] = _merge(builtin, learned)
|
||||
|
||||
# Patterns
|
||||
self._cache["patterns"] = {}
|
||||
for path in sorted((_BUILTIN_ROOT / "patterns").glob("*.yaml")):
|
||||
data = _load_yaml(path)
|
||||
pid = data.get("id", path.stem)
|
||||
self._cache["patterns"][pid] = data
|
||||
|
||||
for path in sorted((_LEARNED_ROOT / "patterns").glob("*.yaml")):
|
||||
data = _load_yaml(path)
|
||||
pid = data.get("id", path.stem)
|
||||
if pid in self._cache["patterns"]:
|
||||
self._cache["patterns"][pid] = _merge(self._cache["patterns"][pid], data)
|
||||
else:
|
||||
self._cache["patterns"][pid] = data
|
||||
logger.info(f"KnowledgeLoader: learned new pattern '{pid}'")
|
||||
|
||||
# Release groups
|
||||
self._cache["release_groups"] = {}
|
||||
for path in sorted((_BUILTIN_ROOT / "release_groups").glob("*.yaml")):
|
||||
data = _load_yaml(path)
|
||||
name = data.get("name", path.stem)
|
||||
self._cache["release_groups"][name] = data
|
||||
|
||||
for path in sorted((_LEARNED_ROOT / "release_groups").glob("*.yaml")):
|
||||
data = _load_yaml(path)
|
||||
name = data.get("name", path.stem)
|
||||
if name in self._cache["release_groups"]:
|
||||
self._cache["release_groups"][name] = _merge(self._cache["release_groups"][name], data)
|
||||
else:
|
||||
self._cache["release_groups"][name] = data
|
||||
logger.info(f"KnowledgeLoader: learned new release group '{name}'")
|
||||
|
||||
logger.info(
|
||||
f"KnowledgeLoader: {len(self._cache['patterns'])} patterns, "
|
||||
f"{len(self._cache['release_groups'])} release groups loaded"
|
||||
)
|
||||
|
||||
def subtitles(self) -> dict:
|
||||
return self._cache["subtitles"]
|
||||
|
||||
def patterns(self) -> dict[str, dict]:
|
||||
return self._cache["patterns"]
|
||||
|
||||
def pattern(self, pattern_id: str) -> dict | None:
|
||||
return self._cache["patterns"].get(pattern_id)
|
||||
|
||||
def release_groups(self) -> dict[str, dict]:
|
||||
return self._cache["release_groups"]
|
||||
|
||||
def release_group(self, name: str) -> dict | None:
|
||||
"""Case-insensitive lookup."""
|
||||
name_lower = name.lower()
|
||||
for key, val in self._cache["release_groups"].items():
|
||||
if key.lower() == name_lower:
|
||||
return val
|
||||
return None
|
||||
@@ -0,0 +1,221 @@
|
||||
"""SubtitleScanner — inspects local subtitle files and filters them per user preferences.
|
||||
|
||||
Given a video file path, the scanner:
|
||||
1. Looks for subtitle files in the same directory as the video.
|
||||
2. Optionally also inspects a Subs/ subfolder adjacent to the video.
|
||||
3. Classifies each file (language, SDH, forced) from its filename.
|
||||
4. Filters according to SubtitlePreferences (languages, min_size_kb, keep_sdh, keep_forced).
|
||||
5. Returns a list of SubtitleCandidate — one per file that passes the filter,
|
||||
with the destination filename already computed.
|
||||
|
||||
Filename classification heuristics
|
||||
-----------------------------------
|
||||
We parse the stem of each subtitle file looking for known patterns:
|
||||
|
||||
fr.srt → lang=fr, sdh=False, forced=False
|
||||
fr.sdh.srt → lang=fr, sdh=True
|
||||
fr.hi.srt → lang=fr, sdh=True (hi = hearing-impaired, alias for sdh)
|
||||
fr.forced.srt → lang=fr, forced=True
|
||||
Breaking.Bad.S01E01.French.srt → lang=fr (keyword match)
|
||||
Breaking.Bad.S01E01.VOSTFR.srt → lang=fr (VOSTFR = French forced/foreign subs)
|
||||
|
||||
Output naming convention (matches SubtitlePreferences docstring):
|
||||
{lang}.srt
|
||||
{lang}.sdh.srt
|
||||
{lang}.forced.srt
|
||||
"""
|
||||
|
||||
import logging
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Subtitle file extensions we handle
|
||||
SUBTITLE_EXTENSIONS = {".srt", ".ass", ".ssa", ".vtt", ".sub"}
|
||||
|
||||
# Language keyword map: lowercase token → ISO 639-1 code
|
||||
_LANG_KEYWORDS: dict[str, str] = {
|
||||
# French
|
||||
"fr": "fr",
|
||||
"fra": "fr",
|
||||
"french": "fr",
|
||||
"francais": "fr",
|
||||
"français": "fr",
|
||||
"vf": "fr",
|
||||
"vff": "fr",
|
||||
"vostfr": "fr",
|
||||
# English
|
||||
"en": "en",
|
||||
"eng": "en",
|
||||
"english": "en",
|
||||
# Spanish
|
||||
"es": "es",
|
||||
"spa": "es",
|
||||
"spanish": "es",
|
||||
"espanol": "es",
|
||||
# German
|
||||
"de": "de",
|
||||
"deu": "de",
|
||||
"ger": "de",
|
||||
"german": "de",
|
||||
# Italian
|
||||
"it": "it",
|
||||
"ita": "it",
|
||||
"italian": "it",
|
||||
# Portuguese
|
||||
"pt": "pt",
|
||||
"por": "pt",
|
||||
"portuguese": "pt",
|
||||
# Dutch
|
||||
"nl": "nl",
|
||||
"nld": "nl",
|
||||
"dutch": "nl",
|
||||
# Japanese
|
||||
"ja": "ja",
|
||||
"jpn": "ja",
|
||||
"japanese": "ja",
|
||||
}
|
||||
|
||||
# Tokens that indicate SDH / hearing-impaired
|
||||
_SDH_TOKENS = {"sdh", "hi", "hearing", "impaired", "cc", "closedcaption"}
|
||||
|
||||
# Tokens that indicate forced subtitles
|
||||
_FORCED_TOKENS = {"forced", "foreign"}
|
||||
|
||||
|
||||
@dataclass
|
||||
class SubtitleCandidate:
|
||||
"""A subtitle file that passed the filter, ready to be placed."""
|
||||
|
||||
source_path: Path
|
||||
language: str # ISO 639-1 code, e.g. "fr"
|
||||
is_sdh: bool
|
||||
is_forced: bool
|
||||
extension: str # e.g. ".srt"
|
||||
|
||||
@property
|
||||
def destination_name(self) -> str:
|
||||
"""
|
||||
Compute the destination filename per naming convention:
|
||||
{lang}.srt
|
||||
{lang}.sdh.srt
|
||||
{lang}.forced.srt
|
||||
"""
|
||||
ext = self.extension.lstrip(".")
|
||||
parts = [self.language]
|
||||
if self.is_sdh:
|
||||
parts.append("sdh")
|
||||
elif self.is_forced:
|
||||
parts.append("forced")
|
||||
return ".".join(parts) + "." + ext
|
||||
|
||||
|
||||
def _classify(path: Path) -> tuple[str | None, bool, bool]:
|
||||
"""
|
||||
Parse a subtitle filename and return (language_code, is_sdh, is_forced).
|
||||
|
||||
Returns (None, False, False) if the language cannot be determined.
|
||||
"""
|
||||
stem = path.stem.lower()
|
||||
# Split on dots, spaces, underscores, hyphens
|
||||
import re
|
||||
tokens = re.split(r"[\.\s_\-]+", stem)
|
||||
|
||||
language: str | None = None
|
||||
is_sdh = False
|
||||
is_forced = False
|
||||
|
||||
for token in tokens:
|
||||
if token in _LANG_KEYWORDS:
|
||||
language = _LANG_KEYWORDS[token]
|
||||
if token in _SDH_TOKENS:
|
||||
is_sdh = True
|
||||
if token in _FORCED_TOKENS:
|
||||
is_forced = True
|
||||
|
||||
return language, is_sdh, is_forced
|
||||
|
||||
|
||||
class SubtitleScanner:
|
||||
"""
|
||||
Scans subtitle files next to a video and filters them per SubtitlePreferences.
|
||||
|
||||
Usage:
|
||||
scanner = SubtitleScanner(prefs)
|
||||
candidates = scanner.scan(video_path)
|
||||
# Each candidate has .source_path and .destination_name
|
||||
"""
|
||||
|
||||
def __init__(self, languages: list[str], min_size_kb: int, keep_sdh: bool, keep_forced: bool):
|
||||
self.languages = [l.lower() for l in languages]
|
||||
self.min_size_kb = min_size_kb
|
||||
self.keep_sdh = keep_sdh
|
||||
self.keep_forced = keep_forced
|
||||
|
||||
def scan(self, video_path: Path) -> list[SubtitleCandidate]:
|
||||
"""
|
||||
Return all subtitle candidates found next to the video that pass the filter.
|
||||
|
||||
Scans:
|
||||
- Same directory as the video (flat siblings)
|
||||
- Subs/ subfolder if present
|
||||
"""
|
||||
candidates: list[SubtitleCandidate] = []
|
||||
search_dirs = [video_path.parent]
|
||||
|
||||
subs_dir = video_path.parent / "Subs"
|
||||
if subs_dir.is_dir():
|
||||
search_dirs.append(subs_dir)
|
||||
logger.debug(f"SubtitleScanner: found Subs/ folder at {subs_dir}")
|
||||
|
||||
for directory in search_dirs:
|
||||
for path in sorted(directory.iterdir()):
|
||||
if not path.is_file():
|
||||
continue
|
||||
if path.suffix.lower() not in SUBTITLE_EXTENSIONS:
|
||||
continue
|
||||
|
||||
candidate = self._evaluate(path)
|
||||
if candidate is not None:
|
||||
candidates.append(candidate)
|
||||
|
||||
logger.info(f"SubtitleScanner: {len(candidates)} candidate(s) found for {video_path.name}")
|
||||
return candidates
|
||||
|
||||
def _evaluate(self, path: Path) -> SubtitleCandidate | None:
|
||||
"""Apply all filters to a single subtitle file. Returns None if it should be dropped."""
|
||||
# Size filter
|
||||
size_kb = path.stat().st_size / 1024
|
||||
if size_kb < self.min_size_kb:
|
||||
logger.debug(f"SubtitleScanner: skip {path.name} (too small: {size_kb:.1f} KB)")
|
||||
return None
|
||||
|
||||
language, is_sdh, is_forced = _classify(path)
|
||||
|
||||
# Language filter
|
||||
if language is None:
|
||||
logger.debug(f"SubtitleScanner: skip {path.name} (language unknown)")
|
||||
return None
|
||||
|
||||
if language not in self.languages:
|
||||
logger.debug(f"SubtitleScanner: skip {path.name} (language '{language}' not in prefs)")
|
||||
return None
|
||||
|
||||
# SDH filter
|
||||
if is_sdh and not self.keep_sdh:
|
||||
logger.debug(f"SubtitleScanner: skip {path.name} (SDH not wanted)")
|
||||
return None
|
||||
|
||||
# Forced filter
|
||||
if is_forced and not self.keep_forced:
|
||||
logger.debug(f"SubtitleScanner: skip {path.name} (forced not wanted)")
|
||||
return None
|
||||
|
||||
return SubtitleCandidate(
|
||||
source_path=path,
|
||||
language=language,
|
||||
is_sdh=is_sdh,
|
||||
is_forced=is_forced,
|
||||
extension=path.suffix.lower(),
|
||||
)
|
||||
@@ -0,0 +1,13 @@
|
||||
from .identifier import SubtitleIdentifier
|
||||
from .matcher import SubtitleMatcher
|
||||
from .pattern_detector import PatternDetector
|
||||
from .placer import PlacedTrack, PlaceResult, SubtitlePlacer
|
||||
|
||||
__all__ = [
|
||||
"SubtitleIdentifier",
|
||||
"SubtitleMatcher",
|
||||
"PatternDetector",
|
||||
"SubtitlePlacer",
|
||||
"PlacedTrack",
|
||||
"PlaceResult",
|
||||
]
|
||||
@@ -0,0 +1,287 @@
|
||||
"""SubtitleIdentifier — finds and classifies all subtitle tracks for a video file."""
|
||||
|
||||
import logging
|
||||
import re
|
||||
import subprocess
|
||||
import json
|
||||
from pathlib import Path
|
||||
|
||||
from ...shared.value_objects import ImdbId
|
||||
from ..entities import MediaSubtitleMetadata, SubtitleTrack
|
||||
from ..knowledge.base import SubtitleKnowledgeBase
|
||||
from ..value_objects import ScanStrategy, SubtitlePattern, SubtitleType
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _tokenize(name: str) -> list[str]:
|
||||
"""Split a filename stem into lowercase tokens."""
|
||||
return [t.lower() for t in re.split(r"[\.\s_\-]+", name) if t]
|
||||
|
||||
|
||||
def _count_entries(path: Path) -> int:
|
||||
"""Return the entry count of an SRT file by finding the last cue number."""
|
||||
try:
|
||||
with open(path, encoding="utf-8", errors="replace") as f:
|
||||
lines = f.read().splitlines()
|
||||
for line in reversed(lines):
|
||||
if line.strip().isdigit():
|
||||
return int(line.strip())
|
||||
return 0
|
||||
except Exception:
|
||||
return 0
|
||||
|
||||
|
||||
class SubtitleIdentifier:
|
||||
"""
|
||||
Finds all subtitle tracks for a given video file using a known pattern,
|
||||
then attempts to classify each track (language, type, format).
|
||||
|
||||
Returns a MediaSubtitleMetadata with embedded + external tracks.
|
||||
External tracks with unknown language or low confidence are left as-is —
|
||||
the caller (use case) decides whether to ask the user for clarification.
|
||||
"""
|
||||
|
||||
def __init__(self, kb: SubtitleKnowledgeBase):
|
||||
self.kb = kb
|
||||
|
||||
def identify(
|
||||
self,
|
||||
video_path: Path,
|
||||
pattern: SubtitlePattern,
|
||||
media_id: ImdbId | None,
|
||||
media_type: str,
|
||||
release_group: str | None = None,
|
||||
) -> MediaSubtitleMetadata:
|
||||
metadata = MediaSubtitleMetadata(
|
||||
media_id=media_id,
|
||||
media_type=media_type,
|
||||
release_group=release_group,
|
||||
detected_pattern_id=pattern.id,
|
||||
)
|
||||
|
||||
if pattern.scan_strategy == ScanStrategy.EMBEDDED:
|
||||
metadata.embedded_tracks = self._scan_embedded(video_path)
|
||||
else:
|
||||
metadata.external_tracks = self._scan_external(video_path, pattern)
|
||||
# Always also check for embedded tracks
|
||||
metadata.embedded_tracks = self._scan_embedded(video_path)
|
||||
|
||||
return metadata
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Embedded tracks — ffprobe
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def _scan_embedded(self, video_path: Path) -> list[SubtitleTrack]:
|
||||
if not video_path.exists():
|
||||
return []
|
||||
try:
|
||||
result = subprocess.run(
|
||||
[
|
||||
"ffprobe", "-v", "quiet",
|
||||
"-print_format", "json",
|
||||
"-show_streams",
|
||||
"-select_streams", "s",
|
||||
str(video_path),
|
||||
],
|
||||
capture_output=True, text=True, timeout=30,
|
||||
)
|
||||
data = json.loads(result.stdout)
|
||||
except (subprocess.TimeoutExpired, json.JSONDecodeError, FileNotFoundError) as e:
|
||||
logger.debug(f"SubtitleIdentifier: ffprobe failed for {video_path.name}: {e}")
|
||||
return []
|
||||
|
||||
tracks = []
|
||||
for stream in data.get("streams", []):
|
||||
tags = stream.get("tags", {})
|
||||
disposition = stream.get("disposition", {})
|
||||
lang_code = tags.get("language", "")
|
||||
title = tags.get("title", "")
|
||||
|
||||
lang = self.kb.language_for_token(lang_code) if lang_code else None
|
||||
|
||||
if disposition.get("hearing_impaired"):
|
||||
stype = SubtitleType.SDH
|
||||
elif disposition.get("forced"):
|
||||
stype = SubtitleType.FORCED
|
||||
else:
|
||||
stype = SubtitleType.STANDARD
|
||||
|
||||
tracks.append(SubtitleTrack(
|
||||
language=lang,
|
||||
format=None,
|
||||
subtitle_type=stype,
|
||||
is_embedded=True,
|
||||
raw_tokens=[lang_code] if lang_code else [],
|
||||
))
|
||||
|
||||
logger.debug(f"SubtitleIdentifier: {len(tracks)} embedded track(s) in {video_path.name}")
|
||||
return tracks
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# External tracks — filesystem scan per pattern strategy
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def _scan_external(self, video_path: Path, pattern: SubtitlePattern) -> list[SubtitleTrack]:
|
||||
strategy = pattern.scan_strategy
|
||||
|
||||
if strategy == ScanStrategy.ADJACENT:
|
||||
candidates = self._find_adjacent(video_path)
|
||||
elif strategy == ScanStrategy.FLAT:
|
||||
candidates = self._find_flat(video_path, pattern.root_folder or "Subs")
|
||||
elif strategy == ScanStrategy.EPISODE_SUBFOLDER:
|
||||
candidates = self._find_episode_subfolder(video_path, pattern.root_folder or "Subs")
|
||||
else:
|
||||
return []
|
||||
|
||||
return self._classify_files(candidates, pattern)
|
||||
|
||||
def _find_adjacent(self, video_path: Path) -> list[Path]:
|
||||
return [
|
||||
p for p in sorted(video_path.parent.iterdir())
|
||||
if p.is_file() and p.suffix.lower() in self.kb.known_extensions()
|
||||
and p.stem != video_path.stem
|
||||
]
|
||||
|
||||
def _find_flat(self, video_path: Path, root_folder: str) -> list[Path]:
|
||||
subs_dir = video_path.parent / root_folder
|
||||
if not subs_dir.is_dir():
|
||||
# Also look at release root (one level up)
|
||||
subs_dir = video_path.parent.parent / root_folder
|
||||
if not subs_dir.is_dir():
|
||||
return []
|
||||
return [
|
||||
p for p in sorted(subs_dir.iterdir())
|
||||
if p.is_file() and p.suffix.lower() in self.kb.known_extensions()
|
||||
]
|
||||
|
||||
def _find_episode_subfolder(self, video_path: Path, root_folder: str) -> list[Path]:
|
||||
"""
|
||||
Look for Subs/{episode_stem}/*.srt
|
||||
|
||||
Checks two locations:
|
||||
1. Adjacent to the video: video_path.parent / root_folder / video_path.stem
|
||||
2. Release root (one level up): video_path.parent.parent / root_folder / video_path.stem
|
||||
"""
|
||||
episode_stem = video_path.stem
|
||||
candidates_dirs = [
|
||||
video_path.parent / root_folder / episode_stem,
|
||||
video_path.parent.parent / root_folder / episode_stem,
|
||||
]
|
||||
for subs_dir in candidates_dirs:
|
||||
if subs_dir.is_dir():
|
||||
files = [
|
||||
p for p in sorted(subs_dir.iterdir())
|
||||
if p.is_file() and p.suffix.lower() in self.kb.known_extensions()
|
||||
]
|
||||
if files:
|
||||
logger.debug(f"SubtitleIdentifier: found {len(files)} file(s) in {subs_dir}")
|
||||
return files
|
||||
return []
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Classification
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def _classify_files(self, paths: list[Path], pattern: SubtitlePattern) -> list[SubtitleTrack]:
|
||||
tracks = []
|
||||
for path in paths:
|
||||
track = self._classify_single(path)
|
||||
tracks.append(track)
|
||||
|
||||
# Post-process: if multiple tracks share same language but type is ambiguous,
|
||||
# apply size_and_count disambiguation
|
||||
if pattern.type_detection.value == "size_and_count":
|
||||
tracks = self._disambiguate_by_size(tracks)
|
||||
|
||||
return tracks
|
||||
|
||||
def _classify_single(self, path: Path) -> SubtitleTrack:
|
||||
fmt = self.kb.format_for_extension(path.suffix)
|
||||
tokens = _tokenize(path.stem)
|
||||
|
||||
language = None
|
||||
subtitle_type = SubtitleType.UNKNOWN
|
||||
unknown_tokens = []
|
||||
matched_tokens = 0
|
||||
|
||||
for token in tokens:
|
||||
if self.kb.is_known_lang_token(token):
|
||||
language = self.kb.language_for_token(token)
|
||||
matched_tokens += 1
|
||||
elif self.kb.is_known_type_token(token):
|
||||
subtitle_type = self.kb.type_for_token(token) or subtitle_type
|
||||
matched_tokens += 1
|
||||
elif token.isdigit():
|
||||
pass # numeric prefix — ignore
|
||||
elif len(token) > 1:
|
||||
unknown_tokens.append(token)
|
||||
|
||||
# Confidence: proportion of meaningful tokens that were recognized
|
||||
meaningful = [t for t in tokens if not t.isdigit() and len(t) > 1]
|
||||
confidence = matched_tokens / max(len(meaningful), 1) if meaningful else 0.5
|
||||
|
||||
if unknown_tokens:
|
||||
logger.debug(
|
||||
f"SubtitleIdentifier: unknown tokens in '{path.name}': {unknown_tokens}"
|
||||
)
|
||||
|
||||
size_kb = path.stat().st_size / 1024 if path.exists() else None
|
||||
entry_count = _count_entries(path) if path.exists() else None
|
||||
|
||||
return SubtitleTrack(
|
||||
language=language,
|
||||
format=fmt,
|
||||
subtitle_type=subtitle_type,
|
||||
is_embedded=False,
|
||||
file_path=path,
|
||||
file_size_kb=size_kb,
|
||||
entry_count=entry_count,
|
||||
confidence=confidence,
|
||||
raw_tokens=tokens,
|
||||
)
|
||||
|
||||
def _disambiguate_by_size(self, tracks: list[SubtitleTrack]) -> list[SubtitleTrack]:
|
||||
"""
|
||||
When multiple tracks share the same language and type is UNKNOWN/STANDARD,
|
||||
the one with the most entries (lines) is SDH, the smallest is FORCED if
|
||||
there are 3+, otherwise the smaller is STANDARD.
|
||||
|
||||
Only applied when type_detection = size_and_count.
|
||||
"""
|
||||
from itertools import groupby
|
||||
|
||||
# Group by language code
|
||||
lang_groups: dict[str, list[SubtitleTrack]] = {}
|
||||
for track in tracks:
|
||||
key = track.language.code if track.language else "__unknown__"
|
||||
lang_groups.setdefault(key, []).append(track)
|
||||
|
||||
result = []
|
||||
for lang_code, group in lang_groups.items():
|
||||
if len(group) == 1:
|
||||
result.extend(group)
|
||||
continue
|
||||
|
||||
# Sort by entry_count ascending (None treated as 0)
|
||||
sorted_group = sorted(group, key=lambda t: t.entry_count or 0)
|
||||
|
||||
if len(sorted_group) == 2:
|
||||
# smaller = standard, larger = sdh
|
||||
self._set_type(sorted_group[0], SubtitleType.STANDARD)
|
||||
self._set_type(sorted_group[1], SubtitleType.SDH)
|
||||
elif len(sorted_group) >= 3:
|
||||
# smallest = forced, middle = standard, largest = sdh
|
||||
self._set_type(sorted_group[0], SubtitleType.FORCED)
|
||||
for t in sorted_group[1:-1]:
|
||||
self._set_type(t, SubtitleType.STANDARD)
|
||||
self._set_type(sorted_group[-1], SubtitleType.SDH)
|
||||
|
||||
result.extend(sorted_group)
|
||||
|
||||
return result
|
||||
|
||||
def _set_type(self, track: SubtitleTrack, stype: SubtitleType) -> None:
|
||||
"""Mutate track type in-place."""
|
||||
track.subtitle_type = stype
|
||||
@@ -0,0 +1,118 @@
|
||||
"""SubtitleMatcher — filters tracks against resolved rules."""
|
||||
|
||||
import logging
|
||||
|
||||
from ..entities import SubtitleTrack
|
||||
from ..value_objects import SubtitleMatchingRules, SubtitleType
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class SubtitleMatcher:
|
||||
"""
|
||||
Filters a list of SubtitleTrack against effective SubtitleMatchingRules.
|
||||
|
||||
Returns matched tracks (pass all filters, confidence >= min_confidence)
|
||||
and unresolved tracks (need user clarification).
|
||||
|
||||
Conflict resolution: when two tracks share the same language + type,
|
||||
format_priority decides which one to keep.
|
||||
"""
|
||||
|
||||
def match(
|
||||
self,
|
||||
tracks: list[SubtitleTrack],
|
||||
rules: SubtitleMatchingRules,
|
||||
) -> tuple[list[SubtitleTrack], list[SubtitleTrack]]:
|
||||
"""
|
||||
Returns (matched, unresolved).
|
||||
"""
|
||||
matched: list[SubtitleTrack] = []
|
||||
unresolved: list[SubtitleTrack] = []
|
||||
|
||||
for track in tracks:
|
||||
if track.is_embedded:
|
||||
continue
|
||||
|
||||
if track.language is None or track.confidence < rules.min_confidence:
|
||||
unresolved.append(track)
|
||||
continue
|
||||
|
||||
if not self._passes_filters(track, rules):
|
||||
logger.debug(f"SubtitleMatcher: filtered out {track}")
|
||||
continue
|
||||
|
||||
matched.append(track)
|
||||
|
||||
matched = self._resolve_conflicts(matched, rules)
|
||||
logger.info(
|
||||
f"SubtitleMatcher: {len(matched)} matched, {len(unresolved)} unresolved"
|
||||
)
|
||||
return matched, unresolved
|
||||
|
||||
def _passes_filters(self, track: SubtitleTrack, rules: SubtitleMatchingRules) -> bool:
|
||||
# Language filter
|
||||
if rules.preferred_languages:
|
||||
if not track.language:
|
||||
return False
|
||||
if track.language.code not in rules.preferred_languages:
|
||||
return False
|
||||
|
||||
# Format filter (only for external files)
|
||||
if rules.preferred_formats and not track.is_embedded:
|
||||
if not track.format:
|
||||
return False
|
||||
if track.format.id not in rules.preferred_formats:
|
||||
return False
|
||||
|
||||
# Type filter
|
||||
if rules.allowed_types:
|
||||
if track.subtitle_type.value not in rules.allowed_types:
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
def _resolve_conflicts(
|
||||
self,
|
||||
tracks: list[SubtitleTrack],
|
||||
rules: SubtitleMatchingRules,
|
||||
) -> list[SubtitleTrack]:
|
||||
"""
|
||||
When multiple tracks have same language + type, keep only the best one
|
||||
according to format_priority. If no format_priority applies, keep the first.
|
||||
"""
|
||||
seen: dict[tuple, SubtitleTrack] = {}
|
||||
|
||||
for track in tracks:
|
||||
lang = track.language.code if track.language else None
|
||||
stype = track.subtitle_type.value
|
||||
key = (lang, stype)
|
||||
|
||||
if key not in seen:
|
||||
seen[key] = track
|
||||
else:
|
||||
existing = seen[key]
|
||||
if self._prefer(track, existing, rules.format_priority):
|
||||
logger.debug(
|
||||
f"SubtitleMatcher: conflict {key} — "
|
||||
f"preferring {track.format.id if track.format else 'embedded'} "
|
||||
f"over {existing.format.id if existing.format else 'embedded'}"
|
||||
)
|
||||
seen[key] = track
|
||||
|
||||
return list(seen.values())
|
||||
|
||||
def _prefer(
|
||||
self,
|
||||
candidate: SubtitleTrack,
|
||||
existing: SubtitleTrack,
|
||||
format_priority: list[str],
|
||||
) -> bool:
|
||||
"""Return True if candidate is preferable to existing."""
|
||||
if not format_priority:
|
||||
return False
|
||||
c_fmt = candidate.format.id if candidate.format else ""
|
||||
e_fmt = existing.format.id if existing.format else ""
|
||||
c_rank = format_priority.index(c_fmt) if c_fmt in format_priority else 999
|
||||
e_rank = format_priority.index(e_fmt) if e_fmt in format_priority else 999
|
||||
return c_rank < e_rank
|
||||
@@ -0,0 +1,205 @@
|
||||
"""PatternDetector — discovers the subtitle structure of a release folder."""
|
||||
|
||||
import json
|
||||
import logging
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
|
||||
from ..knowledge.base import SubtitleKnowledgeBase
|
||||
from ..value_objects import ScanStrategy, SubtitlePattern
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class PatternDetector:
|
||||
"""
|
||||
Inspects a release folder and returns the best matching known pattern,
|
||||
plus a confidence score and a description of what was found.
|
||||
|
||||
Used for "pattern discovery" — when we don't yet know which pattern
|
||||
a release follows. The result is proposed to the user for confirmation.
|
||||
"""
|
||||
|
||||
def __init__(self, kb: SubtitleKnowledgeBase):
|
||||
self.kb = kb
|
||||
|
||||
def detect(self, release_root: Path, sample_video: Path) -> dict:
|
||||
"""
|
||||
Analyse the release folder and return:
|
||||
{
|
||||
"detected": SubtitlePattern | None,
|
||||
"confidence": float,
|
||||
"description": str, # human-readable description of what was found
|
||||
"candidate_pattern_ids": list[str],
|
||||
}
|
||||
"""
|
||||
findings = self._inspect(release_root, sample_video)
|
||||
best, confidence = self._match_pattern(findings)
|
||||
|
||||
return {
|
||||
"detected": best,
|
||||
"confidence": confidence,
|
||||
"description": self._describe(findings),
|
||||
"candidate_pattern_ids": [best.id] if best else [],
|
||||
"raw_findings": findings,
|
||||
}
|
||||
|
||||
def _has_embedded_subtitles(self, video_path: Path) -> bool:
|
||||
"""Run ffprobe to check whether the video has embedded subtitle streams."""
|
||||
try:
|
||||
result = subprocess.run(
|
||||
[
|
||||
"ffprobe", "-v", "quiet",
|
||||
"-print_format", "json",
|
||||
"-show_streams",
|
||||
"-select_streams", "s",
|
||||
str(video_path),
|
||||
],
|
||||
capture_output=True, text=True, timeout=30,
|
||||
)
|
||||
data = json.loads(result.stdout)
|
||||
return len(data.get("streams", [])) > 0
|
||||
except (subprocess.TimeoutExpired, json.JSONDecodeError, FileNotFoundError):
|
||||
return False
|
||||
|
||||
def _inspect(self, release_root: Path, sample_video: Path) -> dict:
|
||||
"""Gather structural facts about the release."""
|
||||
known_exts = self.kb.known_extensions()
|
||||
findings: dict = {
|
||||
"has_subs_folder": False,
|
||||
"subs_strategy": None, # "flat" | "episode_subfolder"
|
||||
"subs_root": None,
|
||||
"adjacent_subs": False,
|
||||
"has_embedded": self._has_embedded_subtitles(sample_video),
|
||||
"files_per_episode": 0,
|
||||
"has_lang_tokens": False,
|
||||
"has_numeric_prefix": False,
|
||||
}
|
||||
|
||||
# Check for Subs/ folder — adjacent or at release root
|
||||
for subs_candidate in [
|
||||
sample_video.parent / "Subs",
|
||||
release_root / "Subs",
|
||||
]:
|
||||
if subs_candidate.is_dir():
|
||||
findings["has_subs_folder"] = True
|
||||
findings["subs_root"] = str(subs_candidate)
|
||||
|
||||
# Is it flat or episode_subfolder?
|
||||
children = list(subs_candidate.iterdir())
|
||||
sub_files = [c for c in children if c.is_file() and c.suffix.lower() in known_exts]
|
||||
sub_dirs = [c for c in children if c.is_dir()]
|
||||
|
||||
if sub_dirs and not sub_files:
|
||||
findings["subs_strategy"] = "episode_subfolder"
|
||||
# Count files in a sample subfolder
|
||||
sample_sub = sub_dirs[0]
|
||||
sample_files = [f for f in sample_sub.iterdir()
|
||||
if f.is_file() and f.suffix.lower() in known_exts]
|
||||
findings["files_per_episode"] = len(sample_files)
|
||||
# Check naming conventions
|
||||
for f in sample_files:
|
||||
stem = f.stem
|
||||
parts = stem.split("_")
|
||||
if parts[0].isdigit():
|
||||
findings["has_numeric_prefix"] = True
|
||||
if any(self.kb.is_known_lang_token(t.lower())
|
||||
for t in stem.replace("_", ".").split(".")):
|
||||
findings["has_lang_tokens"] = True
|
||||
else:
|
||||
findings["subs_strategy"] = "flat"
|
||||
findings["files_per_episode"] = len(sub_files)
|
||||
for f in sub_files:
|
||||
if any(self.kb.is_known_lang_token(t.lower())
|
||||
for t in f.stem.replace("_", ".").split(".")):
|
||||
findings["has_lang_tokens"] = True
|
||||
break
|
||||
|
||||
# Check adjacent subs (next to the video)
|
||||
if not findings["has_subs_folder"]:
|
||||
adjacent = [
|
||||
p for p in sample_video.parent.iterdir()
|
||||
if p.is_file() and p.suffix.lower() in known_exts
|
||||
]
|
||||
if adjacent:
|
||||
findings["adjacent_subs"] = True
|
||||
findings["files_per_episode"] = len(adjacent)
|
||||
|
||||
return findings
|
||||
|
||||
def _match_pattern(self, findings: dict) -> tuple[SubtitlePattern | None, float]:
|
||||
"""Score all known patterns against the findings."""
|
||||
scores: list[tuple[float, SubtitlePattern]] = []
|
||||
|
||||
for pattern in self.kb.patterns().values():
|
||||
score = self._score(pattern, findings)
|
||||
scores.append((score, pattern))
|
||||
|
||||
if not scores:
|
||||
return None, 0.0
|
||||
|
||||
scores.sort(key=lambda x: x[0], reverse=True)
|
||||
best_score, best_pattern = scores[0]
|
||||
|
||||
if best_score < 0.4:
|
||||
return None, best_score
|
||||
|
||||
return best_pattern, best_score
|
||||
|
||||
def _score(self, pattern: SubtitlePattern, findings: dict) -> float:
|
||||
"""Return a 0.0–1.0 match score for this pattern against the findings."""
|
||||
score = 0.0
|
||||
total = 0.0
|
||||
|
||||
strategy = pattern.scan_strategy
|
||||
|
||||
if strategy == ScanStrategy.EMBEDDED:
|
||||
total += 1
|
||||
if findings.get("has_embedded"):
|
||||
score += 1.0
|
||||
if not findings.get("has_subs_folder") and not findings.get("adjacent_subs"):
|
||||
score += 0.5
|
||||
total += 0.5
|
||||
|
||||
elif strategy == ScanStrategy.EPISODE_SUBFOLDER:
|
||||
total += 3
|
||||
if findings.get("has_subs_folder"):
|
||||
score += 1.0
|
||||
if findings.get("subs_strategy") == "episode_subfolder":
|
||||
score += 2.0
|
||||
|
||||
elif strategy == ScanStrategy.FLAT:
|
||||
total += 2
|
||||
if findings.get("has_subs_folder"):
|
||||
score += 1.0
|
||||
if findings.get("subs_strategy") == "flat":
|
||||
score += 1.0
|
||||
|
||||
elif strategy == ScanStrategy.ADJACENT:
|
||||
total += 2
|
||||
if findings.get("adjacent_subs"):
|
||||
score += 1.0
|
||||
if not findings.get("has_subs_folder"):
|
||||
score += 1.0
|
||||
|
||||
return score / total if total > 0 else 0.0
|
||||
|
||||
def _describe(self, findings: dict) -> str:
|
||||
parts = []
|
||||
if findings.get("has_subs_folder"):
|
||||
strategy = findings.get("subs_strategy", "?")
|
||||
n = findings.get("files_per_episode", 0)
|
||||
parts.append(f"Subs/ folder found ({strategy}), ~{n} file(s) per episode")
|
||||
if findings.get("has_numeric_prefix"):
|
||||
parts.append("files have numeric prefix (e.g. 2_English.srt)")
|
||||
if findings.get("has_lang_tokens"):
|
||||
parts.append("language tokens found in filenames")
|
||||
elif findings.get("adjacent_subs"):
|
||||
parts.append("subtitle files adjacent to video")
|
||||
else:
|
||||
parts.append("no external subtitle files found")
|
||||
|
||||
if findings.get("has_embedded"):
|
||||
parts.append("embedded tracks detected (ffprobe)")
|
||||
|
||||
return " — ".join(parts) if parts else "nothing found"
|
||||
@@ -0,0 +1,93 @@
|
||||
"""SubtitlePlacer — hard-links matched subtitle tracks next to the destination video."""
|
||||
|
||||
import logging
|
||||
import os
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
|
||||
from ..entities import SubtitleTrack
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class PlacedTrack:
|
||||
source: Path
|
||||
destination: Path
|
||||
filename: str
|
||||
|
||||
|
||||
@dataclass
|
||||
class PlaceResult:
|
||||
placed: list[PlacedTrack]
|
||||
skipped: list[tuple[SubtitleTrack, str]] # (track, reason)
|
||||
|
||||
@property
|
||||
def placed_count(self) -> int:
|
||||
return len(self.placed)
|
||||
|
||||
@property
|
||||
def skipped_count(self) -> int:
|
||||
return len(self.skipped)
|
||||
|
||||
|
||||
class SubtitlePlacer:
|
||||
"""
|
||||
Hard-links matched SubtitleTrack files next to a destination video.
|
||||
|
||||
Uses the same hard-link strategy as FileManager.copy_file:
|
||||
instant, no data duplication, qBittorrent keeps seeding.
|
||||
|
||||
Embedded tracks are skipped — nothing to place on disk.
|
||||
"""
|
||||
|
||||
def place(
|
||||
self,
|
||||
tracks: list[SubtitleTrack],
|
||||
destination_video: Path,
|
||||
) -> PlaceResult:
|
||||
placed: list[PlacedTrack] = []
|
||||
skipped: list[tuple[SubtitleTrack, str]] = []
|
||||
|
||||
dest_dir = destination_video.parent
|
||||
|
||||
for track in tracks:
|
||||
if track.is_embedded:
|
||||
logger.debug(f"SubtitlePlacer: skip embedded track ({track.language})")
|
||||
skipped.append((track, "embedded — no file to place"))
|
||||
continue
|
||||
|
||||
if not track.file_path or not track.file_path.exists():
|
||||
skipped.append((track, "source file not found"))
|
||||
continue
|
||||
|
||||
try:
|
||||
dest_name = track.destination_name
|
||||
except ValueError as e:
|
||||
skipped.append((track, str(e)))
|
||||
continue
|
||||
|
||||
dest_path = dest_dir / dest_name
|
||||
|
||||
if dest_path.exists():
|
||||
logger.debug(f"SubtitlePlacer: skip {dest_name} — already exists")
|
||||
skipped.append((track, "destination already exists"))
|
||||
continue
|
||||
|
||||
try:
|
||||
os.link(track.file_path, dest_path)
|
||||
placed.append(PlacedTrack(
|
||||
source=track.file_path,
|
||||
destination=dest_path,
|
||||
filename=dest_name,
|
||||
))
|
||||
logger.info(f"SubtitlePlacer: placed {dest_name}")
|
||||
except OSError as e:
|
||||
logger.warning(f"SubtitlePlacer: failed to place {dest_name}: {e}")
|
||||
skipped.append((track, str(e)))
|
||||
|
||||
logger.info(
|
||||
f"SubtitlePlacer: {len(placed)} placed, {len(skipped)} skipped "
|
||||
f"for {destination_video.name}"
|
||||
)
|
||||
return PlaceResult(placed=placed, skipped=skipped)
|
||||
@@ -0,0 +1,21 @@
|
||||
"""Subtitle service utilities."""
|
||||
|
||||
from ..entities import SubtitleTrack
|
||||
|
||||
|
||||
def available_subtitles(tracks: list[SubtitleTrack]) -> list[SubtitleTrack]:
|
||||
"""
|
||||
Return the distinct subtitle tracks available, deduped by (language, type).
|
||||
|
||||
Useful to display what is available for a media item regardless of user
|
||||
preferences — e.g. eng, eng.sdh, fra all show up as separate entries.
|
||||
"""
|
||||
seen: set[tuple] = set()
|
||||
result: list[SubtitleTrack] = []
|
||||
for track in tracks:
|
||||
lang = track.language.code if track.language else None
|
||||
key = (lang, track.subtitle_type)
|
||||
if key not in seen:
|
||||
seen.add(key)
|
||||
result.append(track)
|
||||
return result
|
||||
@@ -1,91 +1,93 @@
|
||||
"""Subtitle domain value objects."""
|
||||
|
||||
from dataclasses import dataclass
|
||||
from dataclasses import dataclass, field
|
||||
from enum import Enum
|
||||
|
||||
from ..shared.exceptions import ValidationError
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
|
||||
class Language(Enum):
|
||||
"""Supported subtitle languages."""
|
||||
class ScanStrategy(Enum):
|
||||
"""How to locate subtitle files for a given release."""
|
||||
|
||||
ENGLISH = "en"
|
||||
FRENCH = "fr"
|
||||
|
||||
@classmethod
|
||||
def from_code(cls, code: str) -> "Language":
|
||||
"""
|
||||
Get language from ISO 639-1 code.
|
||||
|
||||
Args:
|
||||
code: Two-letter language code
|
||||
|
||||
Returns:
|
||||
Language enum value
|
||||
|
||||
Raises:
|
||||
ValidationError: If code is not supported
|
||||
"""
|
||||
code_lower = code.lower()
|
||||
for lang in cls:
|
||||
if lang.value == code_lower:
|
||||
return lang
|
||||
raise ValidationError(f"Unsupported language code: {code}")
|
||||
ADJACENT = "adjacent" # .srt next to the video
|
||||
FLAT = "flat" # Subs/*.srt
|
||||
EPISODE_SUBFOLDER = "episode_subfolder" # Subs/{episode_name}/*.srt
|
||||
EMBEDDED = "embedded" # tracks inside the video container
|
||||
|
||||
|
||||
class SubtitleFormat(Enum):
|
||||
"""Supported subtitle formats."""
|
||||
class TypeDetectionMethod(Enum):
|
||||
"""How to differentiate standard / SDH / forced when tokens are ambiguous."""
|
||||
|
||||
SRT = "srt" # SubRip
|
||||
ASS = "ass" # Advanced SubStation Alpha
|
||||
SSA = "ssa" # SubStation Alpha
|
||||
VTT = "vtt" # WebVTT
|
||||
SUB = "sub" # MicroDVD
|
||||
TOKEN_IN_NAME = "token_in_name"
|
||||
SIZE_AND_COUNT = "size_and_count"
|
||||
FFPROBE_METADATA = "ffprobe_metadata"
|
||||
|
||||
@classmethod
|
||||
def from_extension(cls, extension: str) -> "SubtitleFormat":
|
||||
"""
|
||||
Get format from file extension.
|
||||
|
||||
Args:
|
||||
extension: File extension (with or without dot)
|
||||
|
||||
Returns:
|
||||
SubtitleFormat enum value
|
||||
|
||||
Raises:
|
||||
ValidationError: If extension is not supported
|
||||
"""
|
||||
ext = extension.lower().lstrip(".")
|
||||
for fmt in cls:
|
||||
if fmt.value == ext:
|
||||
return fmt
|
||||
raise ValidationError(f"Unsupported subtitle format: {extension}")
|
||||
class SubtitleType(Enum):
|
||||
STANDARD = "standard"
|
||||
SDH = "sdh"
|
||||
FORCED = "forced"
|
||||
UNKNOWN = "unknown"
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class TimingOffset:
|
||||
"""
|
||||
Value object representing subtitle timing offset in milliseconds.
|
||||
class SubtitleFormat:
|
||||
"""A known subtitle file format."""
|
||||
|
||||
Used for synchronizing subtitles with video.
|
||||
id: str
|
||||
extensions: list[str]
|
||||
description: str = ""
|
||||
|
||||
def matches_extension(self, ext: str) -> bool:
|
||||
return ext.lower() in [e.lower() for e in self.extensions]
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class SubtitleLanguage:
|
||||
"""A known subtitle language with its recognition tokens."""
|
||||
|
||||
code: str # ISO 639-1
|
||||
tokens: list[str] # lowercase
|
||||
|
||||
def matches_token(self, token: str) -> bool:
|
||||
return token.lower() in self.tokens
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class SubtitlePattern:
|
||||
"""
|
||||
A known structural pattern for how a release group organises subtitle files.
|
||||
|
||||
Patterns are loaded from alfred/knowledge/patterns/*.yaml and are
|
||||
independent of any specific release group — multiple groups can share
|
||||
the same pattern.
|
||||
"""
|
||||
|
||||
milliseconds: int
|
||||
id: str
|
||||
description: str
|
||||
scan_strategy: ScanStrategy
|
||||
root_folder: str | None # e.g. "Subs", None for adjacent/embedded
|
||||
type_detection: TypeDetectionMethod
|
||||
version: str = "1.0"
|
||||
|
||||
def __post_init__(self):
|
||||
"""Validate timing offset."""
|
||||
if not isinstance(self.milliseconds, int):
|
||||
raise ValidationError(
|
||||
f"Timing offset must be an integer, got {type(self.milliseconds)}"
|
||||
)
|
||||
|
||||
def to_seconds(self) -> float:
|
||||
"""Convert to seconds."""
|
||||
return self.milliseconds / 1000.0
|
||||
@dataclass(frozen=True)
|
||||
class SubtitleMatchingRules:
|
||||
"""
|
||||
Effective rules after scope resolution (global → group → show → season → episode).
|
||||
Only stores actual values — None means "inherited, not overridden at this level".
|
||||
"""
|
||||
|
||||
def __str__(self) -> str:
|
||||
return f"{self.milliseconds}ms"
|
||||
preferred_languages: list[str] = field(default_factory=list) # ISO 639-1 codes
|
||||
preferred_formats: list[str] = field(default_factory=list) # format ids
|
||||
allowed_types: list[str] = field(default_factory=list) # SubtitleType ids
|
||||
format_priority: list[str] = field(default_factory=list) # ordered format ids
|
||||
min_confidence: float = 0.7
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return f"TimingOffset({self.milliseconds})"
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class RuleScope:
|
||||
"""At which level a rule set applies."""
|
||||
|
||||
level: str # "global" | "release_group" | "movie" | "show" | "season" | "episode"
|
||||
identifier: str | None = None # imdb_id, group name, "S01", "S01E03"…
|
||||
|
||||
@@ -2,7 +2,6 @@
|
||||
|
||||
import re
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime
|
||||
|
||||
from ..shared.value_objects import FilePath, FileSize, ImdbId
|
||||
from .value_objects import EpisodeNumber, SeasonNumber, ShowStatus
|
||||
@@ -22,8 +21,6 @@ class TVShow:
|
||||
seasons_count: int
|
||||
status: ShowStatus
|
||||
tmdb_id: int | None = None
|
||||
first_air_date: str | None = None
|
||||
added_at: datetime = field(default_factory=datetime.now)
|
||||
|
||||
def __post_init__(self):
|
||||
"""Validate TV show entity."""
|
||||
@@ -87,9 +84,6 @@ class Season:
|
||||
season_number: SeasonNumber
|
||||
episode_count: int
|
||||
name: str | None = None
|
||||
overview: str | None = None
|
||||
air_date: str | None = None
|
||||
poster_path: str | None = None
|
||||
|
||||
def __post_init__(self):
|
||||
"""Validate season entity."""
|
||||
@@ -146,11 +140,6 @@ class Episode:
|
||||
title: str
|
||||
file_path: FilePath | None = None
|
||||
file_size: FileSize | None = None
|
||||
overview: str | None = None
|
||||
air_date: str | None = None
|
||||
still_path: str | None = None
|
||||
vote_average: float | None = None
|
||||
runtime: int | None = None # in minutes
|
||||
|
||||
def __post_init__(self):
|
||||
"""Validate episode entity."""
|
||||
|
||||
@@ -2,8 +2,7 @@
|
||||
|
||||
import logging
|
||||
import os
|
||||
import shutil
|
||||
from enum import Enum
|
||||
from collections import namedtuple
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
@@ -13,14 +12,11 @@ from .exceptions import PathTraversalError
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
FileOperationResult = namedtuple("FileOperationResult", ["success", "error", "message"])
|
||||
|
||||
class FolderName(Enum):
|
||||
"""Types of folders that can be managed."""
|
||||
|
||||
DOWNLOAD = "download"
|
||||
TVSHOW = "tvshow"
|
||||
MOVIE = "movie"
|
||||
TORRENT = "torrent"
|
||||
def _err(error: str, message: str) -> dict[str, Any]:
|
||||
return {"status": "error", "error": error, "message": message}
|
||||
|
||||
|
||||
class FileManager:
|
||||
@@ -35,8 +31,6 @@ class FileManager:
|
||||
"""
|
||||
Set a folder path in the configuration.
|
||||
|
||||
Validates that the path exists, is a directory, and is readable.
|
||||
|
||||
Args:
|
||||
folder_name: Name of folder (download, tvshow, movie, torrent).
|
||||
path_value: Absolute path to the folder.
|
||||
@@ -45,53 +39,39 @@ class FileManager:
|
||||
Dict with status or error information.
|
||||
"""
|
||||
try:
|
||||
self._validate_folder_name(folder_name)
|
||||
path_obj = Path(path_value).resolve()
|
||||
|
||||
if not path_obj.exists():
|
||||
logger.warning(f"Path does not exist: {path_value}")
|
||||
return {
|
||||
"error": "invalid_path",
|
||||
"message": f"Path does not exist: {path_value}",
|
||||
}
|
||||
return _err("invalid_path", f"Path does not exist: {path_value}")
|
||||
|
||||
if not path_obj.is_dir():
|
||||
logger.warning(f"Path is not a directory: {path_value}")
|
||||
return {
|
||||
"error": "invalid_path",
|
||||
"message": f"Path is not a directory: {path_value}",
|
||||
}
|
||||
return _err("invalid_path", f"Path is not a directory: {path_value}")
|
||||
|
||||
if not os.access(path_obj, os.R_OK):
|
||||
logger.warning(f"Path is not readable: {path_value}")
|
||||
return {
|
||||
"error": "permission_denied",
|
||||
"message": f"Path is not readable: {path_value}",
|
||||
}
|
||||
return _err("permission_denied", f"Path is not readable: {path_value}")
|
||||
|
||||
memory = get_memory()
|
||||
memory.ltm.set_config(f"{folder_name}_folder", str(path_obj))
|
||||
# workspace folders have fixed attributes; library folders go in the dict
|
||||
if folder_name in ("download", "torrent"):
|
||||
setattr(memory.ltm.workspace, folder_name, str(path_obj))
|
||||
else:
|
||||
memory.ltm.library_paths.set(folder_name, str(path_obj))
|
||||
memory.save()
|
||||
|
||||
logger.info(f"Set {folder_name}_folder to: {path_obj}")
|
||||
logger.info(f"Set {folder_name} to: {path_obj}")
|
||||
return {"status": "ok", "folder_name": folder_name, "path": str(path_obj)}
|
||||
|
||||
except ValueError as e:
|
||||
logger.error(f"Validation error: {e}")
|
||||
return {"error": "validation_failed", "message": str(e)}
|
||||
return _err("validation_failed", str(e))
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Unexpected error setting path: {e}", exc_info=True)
|
||||
return {"error": "internal_error", "message": "Failed to set path"}
|
||||
return _err("internal_error", "Failed to set path")
|
||||
|
||||
def list_folder( # noqa: PLR0911
|
||||
self, folder_type: str, path: str = "."
|
||||
) -> dict[str, Any]:
|
||||
def list_folder(self, folder_type: str, path: str = ".") -> dict[str, Any]:
|
||||
"""
|
||||
List contents of a configured folder.
|
||||
|
||||
Includes security checks to prevent path traversal.
|
||||
|
||||
Args:
|
||||
folder_type: Type of folder (download, tvshow, movie, torrent).
|
||||
path: Relative path within the folder (default: root).
|
||||
@@ -100,43 +80,28 @@ class FileManager:
|
||||
Dict with folder contents or error information.
|
||||
"""
|
||||
try:
|
||||
self._validate_folder_name(folder_type)
|
||||
safe_path = self._sanitize_path(path)
|
||||
|
||||
memory = get_memory()
|
||||
folder_key = f"{folder_type}_folder"
|
||||
folder_path = memory.ltm.get_config(folder_key)
|
||||
if folder_type in ("download", "torrent"):
|
||||
folder_path = getattr(memory.ltm.workspace, folder_type, None)
|
||||
else:
|
||||
folder_path = memory.ltm.library_paths.get(folder_type)
|
||||
|
||||
if not folder_path:
|
||||
logger.warning(f"Folder not configured: {folder_type}")
|
||||
return {
|
||||
"error": "folder_not_set",
|
||||
"message": f"{folder_type.capitalize()} folder not configured.",
|
||||
}
|
||||
return _err("folder_not_set", f"{folder_type.capitalize()} folder not configured.")
|
||||
|
||||
root = Path(folder_path)
|
||||
target = root / safe_path
|
||||
|
||||
if not self._is_safe_path(root, target):
|
||||
logger.warning(f"Path traversal attempt: {path}")
|
||||
return {
|
||||
"error": "forbidden",
|
||||
"message": "Access denied: path outside allowed directory",
|
||||
}
|
||||
return _err("forbidden", "Access denied: path outside allowed directory")
|
||||
|
||||
if not target.exists():
|
||||
logger.warning(f"Path does not exist: {target}")
|
||||
return {
|
||||
"error": "not_found",
|
||||
"message": f"Path does not exist: {safe_path}",
|
||||
}
|
||||
return _err("not_found", f"Path does not exist: {safe_path}")
|
||||
|
||||
if not target.is_dir():
|
||||
logger.warning(f"Path is not a directory: {target}")
|
||||
return {
|
||||
"error": "not_a_directory",
|
||||
"message": f"Path is not a directory: {safe_path}",
|
||||
}
|
||||
return _err("not_a_directory", f"Path is not a directory: {safe_path}")
|
||||
|
||||
try:
|
||||
entries = [entry.name for entry in target.iterdir()]
|
||||
@@ -149,35 +114,28 @@ class FileManager:
|
||||
"count": len(entries),
|
||||
}
|
||||
except PermissionError:
|
||||
logger.warning(f"Permission denied: {target}")
|
||||
return {
|
||||
"error": "permission_denied",
|
||||
"message": f"Permission denied: {safe_path}",
|
||||
}
|
||||
return _err("permission_denied", f"Permission denied: {safe_path}")
|
||||
|
||||
except PathTraversalError as e:
|
||||
logger.warning(f"Path traversal attempt: {e}")
|
||||
return {"error": "forbidden", "message": str(e)}
|
||||
return _err("forbidden", str(e))
|
||||
|
||||
except ValueError as e:
|
||||
logger.error(f"Validation error: {e}")
|
||||
return {"error": "validation_failed", "message": str(e)}
|
||||
return _err("validation_failed", str(e))
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Unexpected error listing folder: {e}", exc_info=True)
|
||||
return {"error": "internal_error", "message": "Failed to list folder"}
|
||||
return _err("internal_error", "Failed to list folder")
|
||||
|
||||
def move_file( # noqa: PLR0911
|
||||
self, source: str, destination: str
|
||||
) -> dict[str, Any]:
|
||||
def copy_file(self, source: str, destination: str) -> dict[str, Any]:
|
||||
"""
|
||||
Move a file from one location to another.
|
||||
Hard-link a file to a destination (instant, no data duplication).
|
||||
|
||||
Includes validation and verification after move.
|
||||
Both paths must be on the same filesystem. qBittorrent keeps seeding
|
||||
the original inode unaffected.
|
||||
|
||||
Args:
|
||||
source: Source file path.
|
||||
destination: Destination file path.
|
||||
source: Absolute path to the source file.
|
||||
destination: Absolute path to the destination file.
|
||||
|
||||
Returns:
|
||||
Dict with status or error information.
|
||||
@@ -186,126 +144,174 @@ class FileManager:
|
||||
source_path = Path(source).resolve()
|
||||
dest_path = Path(destination).resolve()
|
||||
|
||||
logger.info(f"Moving file: {source_path} -> {dest_path}")
|
||||
logger.info(f"Hard-linking: {source_path} -> {dest_path}")
|
||||
|
||||
if not source_path.exists():
|
||||
return {
|
||||
"error": "source_not_found",
|
||||
"message": f"Source does not exist: {source}",
|
||||
}
|
||||
return _err("source_not_found", f"Source does not exist: {source}")
|
||||
|
||||
if not source_path.is_file():
|
||||
return {
|
||||
"error": "source_not_file",
|
||||
"message": f"Source is not a file: {source}",
|
||||
}
|
||||
return _err("source_not_file", f"Source is not a file: {source}")
|
||||
|
||||
source_size = source_path.stat().st_size
|
||||
dest_parent = dest_path.parent
|
||||
|
||||
if not dest_parent.exists():
|
||||
return {
|
||||
"error": "destination_dir_not_found",
|
||||
"message": f"Destination directory does not exist: {dest_parent}",
|
||||
}
|
||||
if not dest_path.parent.exists():
|
||||
return _err("destination_dir_not_found", f"Destination directory does not exist: {dest_path.parent}")
|
||||
|
||||
if dest_path.exists():
|
||||
return {
|
||||
"error": "destination_exists",
|
||||
"message": f"Destination already exists: {destination}",
|
||||
}
|
||||
return _err("destination_exists", f"Destination already exists: {destination}")
|
||||
|
||||
shutil.move(str(source_path), str(dest_path))
|
||||
os.link(source_path, dest_path)
|
||||
|
||||
# Verify move
|
||||
if not dest_path.exists():
|
||||
return {
|
||||
"error": "move_verification_failed",
|
||||
"message": "File was not moved successfully",
|
||||
}
|
||||
|
||||
dest_size = dest_path.stat().st_size
|
||||
if dest_size != source_size:
|
||||
return {
|
||||
"error": "size_mismatch",
|
||||
"message": "File size mismatch after move",
|
||||
}
|
||||
|
||||
logger.info(f"File moved successfully: {dest_path.name}")
|
||||
logger.info(f"Hard link created: {dest_path.name}")
|
||||
return {
|
||||
"status": "ok",
|
||||
"source": str(source_path),
|
||||
"destination": str(dest_path),
|
||||
"filename": dest_path.name,
|
||||
"size": dest_size,
|
||||
"size": source_path.stat().st_size,
|
||||
}
|
||||
|
||||
except OSError as e:
|
||||
logger.error(f"Error creating hard link: {e}", exc_info=True)
|
||||
return _err("link_failed", str(e))
|
||||
|
||||
def move_file(self, source: str, destination: str) -> dict[str, Any]:
|
||||
"""
|
||||
Move a file via hard link + source deletion.
|
||||
|
||||
Hard-links the file to the destination, then removes the source.
|
||||
qBittorrent keeps seeding during the operation since the inode
|
||||
is still referenced until the source is removed.
|
||||
|
||||
Args:
|
||||
source: Absolute path to the source file.
|
||||
destination: Absolute path to the destination file.
|
||||
|
||||
Returns:
|
||||
Dict with status or error information.
|
||||
"""
|
||||
try:
|
||||
source_path = Path(source).resolve()
|
||||
|
||||
link_result = self.copy_file(source, destination)
|
||||
if link_result.get("status") != "ok":
|
||||
return link_result
|
||||
|
||||
source_path.unlink()
|
||||
|
||||
logger.info(f"File moved: {source_path.name} -> {link_result['destination']}")
|
||||
return {
|
||||
"status": "ok",
|
||||
"source": str(source_path),
|
||||
"destination": link_result["destination"],
|
||||
"filename": link_result["filename"],
|
||||
"size": link_result["size"],
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error moving file: {e}", exc_info=True)
|
||||
return {"error": "move_failed", "message": str(e)}
|
||||
return _err("move_failed", str(e))
|
||||
|
||||
def _validate_folder_name(self, folder_name: str) -> bool:
|
||||
def create_seed_links(
|
||||
self, library_file: str, original_download_folder: str, torrent_folder: str
|
||||
) -> dict[str, Any]:
|
||||
"""
|
||||
Validate folder name against allowed values.
|
||||
Prepare a torrent folder so qBittorrent can keep seeding after a move.
|
||||
|
||||
- Hard-links the moved video file from the library back into
|
||||
torrents/<original_folder_name>/ (same inode, no data copy).
|
||||
- Copies every other file from the original download folder
|
||||
(.srt, .nfo, .jpg, .txt, …) into the same torrent subfolder,
|
||||
preserving relative paths.
|
||||
|
||||
Args:
|
||||
folder_name: Name to validate.
|
||||
library_file: Absolute path to the video file in the library.
|
||||
original_download_folder: Absolute path to the download folder
|
||||
that contained the original release (may still have subs etc.).
|
||||
torrent_folder: Absolute path to the root torrents/ directory.
|
||||
|
||||
Returns:
|
||||
True if valid.
|
||||
|
||||
Raises:
|
||||
ValueError: If folder name is invalid.
|
||||
Dict with status, linked_file, copied_files list, skipped list.
|
||||
"""
|
||||
valid_names = [fn.value for fn in FolderName]
|
||||
if folder_name not in valid_names:
|
||||
raise ValueError(
|
||||
f"Invalid folder_name '{folder_name}'. "
|
||||
f"Must be one of: {', '.join(valid_names)}"
|
||||
)
|
||||
return True
|
||||
try:
|
||||
lib_path = Path(library_file).resolve()
|
||||
src_folder = Path(original_download_folder).resolve()
|
||||
torrent_root = Path(torrent_folder).resolve()
|
||||
|
||||
if not lib_path.exists():
|
||||
return _err("library_file_not_found", f"Library file not found: {library_file}")
|
||||
if not src_folder.exists():
|
||||
return _err("source_folder_not_found", f"Download folder not found: {original_download_folder}")
|
||||
if not torrent_root.exists():
|
||||
return _err("torrent_folder_not_found", f"Torrent folder not found: {torrent_folder}")
|
||||
|
||||
dest_folder = torrent_root / src_folder.name
|
||||
dest_folder.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Hard-link the video file from library → torrent subfolder
|
||||
link_dest = dest_folder / lib_path.name
|
||||
if link_dest.exists():
|
||||
return _err("destination_exists", f"Link already exists: {link_dest}")
|
||||
os.link(lib_path, link_dest)
|
||||
logger.info(f"Hard-linked for seeding: {lib_path.name} → {dest_folder}")
|
||||
|
||||
# Copy everything else from the original download folder
|
||||
copied: list[str] = []
|
||||
skipped: list[str] = []
|
||||
for item in src_folder.rglob("*"):
|
||||
if not item.is_file():
|
||||
continue
|
||||
rel = item.relative_to(src_folder)
|
||||
dest_item = dest_folder / rel
|
||||
dest_item.parent.mkdir(parents=True, exist_ok=True)
|
||||
if dest_item.exists():
|
||||
skipped.append(str(rel))
|
||||
continue
|
||||
import shutil
|
||||
shutil.copy2(item, dest_item)
|
||||
copied.append(str(rel))
|
||||
logger.debug(f"Copied for seeding: {rel}")
|
||||
|
||||
return {
|
||||
"status": "ok",
|
||||
"torrent_subfolder": str(dest_folder),
|
||||
"linked_file": str(link_dest),
|
||||
"copied_files": copied,
|
||||
"copied_count": len(copied),
|
||||
"skipped": skipped,
|
||||
}
|
||||
|
||||
except OSError as e:
|
||||
logger.error(f"create_seed_links failed: {e}", exc_info=True)
|
||||
return _err("link_failed", str(e))
|
||||
except Exception as e:
|
||||
logger.error(f"create_seed_links unexpected error: {e}", exc_info=True)
|
||||
return _err("internal_error", str(e))
|
||||
|
||||
def _sanitize_path(self, path: str) -> str:
|
||||
"""
|
||||
Sanitize path to prevent path traversal attacks.
|
||||
Sanitize a relative path to prevent path traversal attacks.
|
||||
|
||||
Args:
|
||||
path: Path to sanitize.
|
||||
|
||||
Returns:
|
||||
Sanitized path.
|
||||
|
||||
Raises:
|
||||
PathTraversalError: If path contains traversal attempts.
|
||||
Raises PathTraversalError if the path tries to escape the root.
|
||||
"""
|
||||
normalized = os.path.normpath(path)
|
||||
|
||||
# Reject absolute paths
|
||||
if os.path.isabs(normalized):
|
||||
raise PathTraversalError("Absolute paths are not allowed")
|
||||
|
||||
# Reject parent directory references
|
||||
if normalized.startswith("..") or "/.." in normalized or "\\.." in normalized:
|
||||
raise PathTraversalError("Parent directory references not allowed")
|
||||
|
||||
# Reject null bytes
|
||||
if "\x00" in normalized:
|
||||
raise PathTraversalError("Null bytes in path not allowed")
|
||||
|
||||
return normalized
|
||||
|
||||
def _is_safe_path(self, base_path: Path, target_path: Path) -> bool:
|
||||
"""
|
||||
Check if target path is within base path.
|
||||
|
||||
Args:
|
||||
base_path: The allowed base directory.
|
||||
target_path: The path to check.
|
||||
|
||||
Returns:
|
||||
True if target is within base, False otherwise.
|
||||
"""
|
||||
"""Return True if target_path is inside base_path (prevents traversal)."""
|
||||
try:
|
||||
base_resolved = base_path.resolve()
|
||||
target_resolved = target_path.resolve()
|
||||
target_resolved.relative_to(base_resolved)
|
||||
target_path.resolve().relative_to(base_path.resolve())
|
||||
return True
|
||||
except (ValueError, OSError):
|
||||
return False
|
||||
|
||||
@@ -1,17 +1,10 @@
|
||||
"""Persistence layer - Data storage implementations."""
|
||||
|
||||
from .context import (
|
||||
get_memory,
|
||||
has_memory,
|
||||
init_memory,
|
||||
set_memory,
|
||||
)
|
||||
from .memory import (
|
||||
EpisodicMemory,
|
||||
LongTermMemory,
|
||||
Memory,
|
||||
ShortTermMemory,
|
||||
)
|
||||
from .context import get_memory, has_memory, init_memory, set_memory
|
||||
from .memory import Memory
|
||||
from .memory.episodic import EpisodicMemory
|
||||
from .memory.ltm import LongTermMemory
|
||||
from .memory.stm import ShortTermMemory
|
||||
|
||||
__all__ = [
|
||||
"Memory",
|
||||
|
||||
@@ -1,28 +1,26 @@
|
||||
"""
|
||||
Memory context using contextvars.
|
||||
Memory context — global singleton.
|
||||
|
||||
Provides thread-safe and async-safe access to the Memory instance
|
||||
Provides async-safe access to the Memory instance
|
||||
without passing it explicitly through all function calls.
|
||||
|
||||
Usage:
|
||||
# At application startup
|
||||
from alfred.infrastructure.persistence import init_memory, get_memory
|
||||
|
||||
init_memory("memory_data")
|
||||
init_memory("memory")
|
||||
|
||||
# Anywhere in the code
|
||||
memory = get_memory()
|
||||
memory.ltm.set_config("key", "value")
|
||||
"""
|
||||
|
||||
from contextvars import ContextVar
|
||||
|
||||
from .memory import Memory
|
||||
|
||||
_memory_ctx: ContextVar[Memory | None] = ContextVar("memory", default=None)
|
||||
_memory: Memory | None = None
|
||||
|
||||
|
||||
def init_memory(storage_dir: str = "memory_data") -> Memory:
|
||||
def init_memory(storage_dir: str = "memory") -> Memory:
|
||||
"""
|
||||
Initialize the memory and set it in the context.
|
||||
|
||||
@@ -34,9 +32,9 @@ def init_memory(storage_dir: str = "memory_data") -> Memory:
|
||||
Returns:
|
||||
The initialized Memory instance.
|
||||
"""
|
||||
memory = Memory(storage_dir=storage_dir)
|
||||
_memory_ctx.set(memory)
|
||||
return memory
|
||||
global _memory
|
||||
_memory = Memory(storage_dir=storage_dir)
|
||||
return _memory
|
||||
|
||||
|
||||
def set_memory(memory: Memory) -> None:
|
||||
@@ -48,7 +46,8 @@ def set_memory(memory: Memory) -> None:
|
||||
Args:
|
||||
memory: Memory instance to set.
|
||||
"""
|
||||
_memory_ctx.set(memory)
|
||||
global _memory
|
||||
_memory = memory
|
||||
|
||||
|
||||
def get_memory() -> Memory:
|
||||
@@ -61,12 +60,11 @@ def get_memory() -> Memory:
|
||||
Raises:
|
||||
RuntimeError: If memory has not been initialized.
|
||||
"""
|
||||
memory = _memory_ctx.get()
|
||||
if memory is None:
|
||||
if _memory is None:
|
||||
raise RuntimeError(
|
||||
"Memory not initialized. Call init_memory() at application startup."
|
||||
)
|
||||
return memory
|
||||
return _memory
|
||||
|
||||
|
||||
def has_memory() -> bool:
|
||||
@@ -76,4 +74,12 @@ def has_memory() -> bool:
|
||||
Returns:
|
||||
True if memory is available, False otherwise.
|
||||
"""
|
||||
return _memory_ctx.get() is not None
|
||||
return _memory is not None
|
||||
|
||||
|
||||
def reset_memory() -> None:
|
||||
"""
|
||||
Reset the memory singleton to None. For use in tests only.
|
||||
"""
|
||||
global _memory
|
||||
_memory = None
|
||||
|
||||
@@ -119,10 +119,6 @@ class JsonSubtitleRepository(SubtitleRepository):
|
||||
"timing_offset": subtitle.timing_offset.milliseconds,
|
||||
"hearing_impaired": subtitle.hearing_impaired,
|
||||
"forced": subtitle.forced,
|
||||
"source": subtitle.source,
|
||||
"uploader": subtitle.uploader,
|
||||
"download_count": subtitle.download_count,
|
||||
"rating": subtitle.rating,
|
||||
}
|
||||
|
||||
def _from_dict(self, data: dict[str, Any]) -> Subtitle:
|
||||
@@ -137,8 +133,4 @@ class JsonSubtitleRepository(SubtitleRepository):
|
||||
timing_offset=TimingOffset(data.get("timing_offset", 0)),
|
||||
hearing_impaired=data.get("hearing_impaired", False),
|
||||
forced=data.get("forced", False),
|
||||
source=data.get("source"),
|
||||
uploader=data.get("uploader"),
|
||||
download_count=data.get("download_count"),
|
||||
rating=data.get("rating"),
|
||||
)
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
"""JSON-based TV show repository implementation."""
|
||||
|
||||
import logging
|
||||
from datetime import datetime
|
||||
from typing import Any
|
||||
|
||||
from alfred.domain.shared.value_objects import ImdbId
|
||||
@@ -115,8 +114,6 @@ class JsonTVShowRepository(TVShowRepository):
|
||||
"seasons_count": show.seasons_count,
|
||||
"status": show.status.value,
|
||||
"tmdb_id": show.tmdb_id,
|
||||
"first_air_date": show.first_air_date,
|
||||
"added_at": show.added_at.isoformat(),
|
||||
}
|
||||
|
||||
def _from_dict(self, data: dict[str, Any]) -> TVShow:
|
||||
@@ -127,10 +124,4 @@ class JsonTVShowRepository(TVShowRepository):
|
||||
seasons_count=data["seasons_count"],
|
||||
status=ShowStatus.from_string(data["status"]),
|
||||
tmdb_id=data.get("tmdb_id"),
|
||||
first_air_date=data.get("first_air_date"),
|
||||
added_at=(
|
||||
datetime.fromisoformat(data["added_at"])
|
||||
if data.get("added_at")
|
||||
else datetime.now()
|
||||
),
|
||||
)
|
||||
|
||||
@@ -1,577 +0,0 @@
|
||||
"""
|
||||
Memory - Unified management of 3 memory types.
|
||||
|
||||
Architecture:
|
||||
- LTM (Long-Term Memory): Configuration, library, preferences - Persistent
|
||||
- STM (Short-Term Memory): Conversation, current workflow - Volatile
|
||||
- Episodic Memory: Search results, transient states - Very volatile
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# LONG-TERM MEMORY (LTM) - Persistent
|
||||
# =============================================================================
|
||||
|
||||
|
||||
@dataclass
|
||||
class LongTermMemory:
|
||||
"""
|
||||
Long-term memory - Persistent and static.
|
||||
|
||||
Stores:
|
||||
- User configuration (folders, URLs)
|
||||
- Preferences (quality, languages)
|
||||
- Library (owned movies/TV shows)
|
||||
- Followed shows (watchlist)
|
||||
"""
|
||||
|
||||
# Folder and service configuration
|
||||
config: dict[str, str] = field(default_factory=dict)
|
||||
|
||||
# User preferences
|
||||
preferences: dict[str, Any] = field(
|
||||
default_factory=lambda: {
|
||||
"preferred_quality": "1080p",
|
||||
"preferred_languages": ["en", "fr"],
|
||||
"auto_organize": False,
|
||||
"naming_format": "{title}.{year}.{quality}",
|
||||
}
|
||||
)
|
||||
|
||||
# Library of owned media
|
||||
library: dict[str, list[dict]] = field(
|
||||
default_factory=lambda: {"movies": [], "tv_shows": []}
|
||||
)
|
||||
|
||||
# Followed shows (watchlist)
|
||||
following: list[dict] = field(default_factory=list)
|
||||
|
||||
def get_config(self, key: str, default: Any = None) -> Any:
|
||||
"""Get a configuration value."""
|
||||
return self.config.get(key, default)
|
||||
|
||||
def set_config(self, key: str, value: Any) -> None:
|
||||
"""Set a configuration value."""
|
||||
self.config[key] = value
|
||||
logger.debug(f"LTM: Set config {key}")
|
||||
|
||||
def has_config(self, key: str) -> bool:
|
||||
"""Check if a configuration exists."""
|
||||
return key in self.config and self.config[key] is not None
|
||||
|
||||
def add_to_library(self, media_type: str, media: dict) -> None:
|
||||
"""Add a media item to the library."""
|
||||
if media_type not in self.library:
|
||||
self.library[media_type] = []
|
||||
|
||||
# Avoid duplicates by imdb_id
|
||||
existing_ids = [m.get("imdb_id") for m in self.library[media_type]]
|
||||
if media.get("imdb_id") not in existing_ids:
|
||||
media["added_at"] = datetime.now().isoformat()
|
||||
self.library[media_type].append(media)
|
||||
logger.info(f"LTM: Added {media.get('title')} to {media_type}")
|
||||
|
||||
def get_library(self, media_type: str) -> list[dict]:
|
||||
"""Get the library for a media type."""
|
||||
return self.library.get(media_type, [])
|
||||
|
||||
def follow_show(self, show: dict) -> None:
|
||||
"""Add a show to the watchlist."""
|
||||
existing_ids = [s.get("imdb_id") for s in self.following]
|
||||
if show.get("imdb_id") not in existing_ids:
|
||||
show["followed_at"] = datetime.now().isoformat()
|
||||
self.following.append(show)
|
||||
logger.info(f"LTM: Now following {show.get('title')}")
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
"""Convert to dictionary for serialization."""
|
||||
return {
|
||||
"config": self.config,
|
||||
"preferences": self.preferences,
|
||||
"library": self.library,
|
||||
"following": self.following,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: dict) -> "LongTermMemory":
|
||||
"""Create an instance from a dictionary."""
|
||||
return cls(
|
||||
config=data.get("config", {}),
|
||||
preferences=data.get(
|
||||
"preferences",
|
||||
{
|
||||
"preferred_quality": "1080p",
|
||||
"preferred_languages": ["en", "fr"],
|
||||
"auto_organize": False,
|
||||
"naming_format": "{title}.{year}.{quality}",
|
||||
},
|
||||
),
|
||||
library=data.get("library", {"movies": [], "tv_shows": []}),
|
||||
following=data.get("following", []),
|
||||
)
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# SHORT-TERM MEMORY (STM) - Conversation
|
||||
# =============================================================================
|
||||
|
||||
|
||||
@dataclass
|
||||
class ShortTermMemory:
|
||||
"""
|
||||
Short-term memory - Volatile and conversational.
|
||||
|
||||
Stores:
|
||||
- Current conversation history
|
||||
- Current workflow (what we're doing)
|
||||
- Extracted entities from conversation
|
||||
- Current discussion topic
|
||||
"""
|
||||
|
||||
# Conversation message history
|
||||
conversation_history: list[dict[str, str]] = field(default_factory=list)
|
||||
|
||||
# Current workflow
|
||||
current_workflow: dict | None = None
|
||||
|
||||
# Extracted entities (title, year, requested quality, etc.)
|
||||
extracted_entities: dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
# Current conversation topic
|
||||
current_topic: str | None = None
|
||||
|
||||
# Conversation language
|
||||
language: str = "en"
|
||||
|
||||
# History message limit
|
||||
max_history: int = 20
|
||||
|
||||
def add_message(self, role: str, content: str) -> None:
|
||||
"""Add a message to history."""
|
||||
self.conversation_history.append(
|
||||
{"role": role, "content": content, "timestamp": datetime.now().isoformat()}
|
||||
)
|
||||
# Keep only the last N messages
|
||||
if len(self.conversation_history) > self.max_history:
|
||||
self.conversation_history = self.conversation_history[-self.max_history :]
|
||||
logger.debug(f"STM: Added {role} message")
|
||||
|
||||
def get_recent_history(self, n: int = 10) -> list[dict]:
|
||||
"""Get the last N messages."""
|
||||
return self.conversation_history[-n:]
|
||||
|
||||
def start_workflow(self, workflow_type: str, target: dict) -> None:
|
||||
"""Start a new workflow."""
|
||||
self.current_workflow = {
|
||||
"type": workflow_type,
|
||||
"target": target,
|
||||
"stage": "started",
|
||||
"started_at": datetime.now().isoformat(),
|
||||
}
|
||||
logger.info(f"STM: Started workflow '{workflow_type}'")
|
||||
|
||||
def update_workflow_stage(self, stage: str) -> None:
|
||||
"""Update the workflow stage."""
|
||||
if self.current_workflow:
|
||||
self.current_workflow["stage"] = stage
|
||||
logger.debug(f"STM: Workflow stage -> {stage}")
|
||||
|
||||
def end_workflow(self) -> None:
|
||||
"""End the current workflow."""
|
||||
if self.current_workflow:
|
||||
logger.info(f"STM: Ended workflow '{self.current_workflow.get('type')}'")
|
||||
self.current_workflow = None
|
||||
|
||||
def set_entity(self, key: str, value: Any) -> None:
|
||||
"""Store an extracted entity."""
|
||||
self.extracted_entities[key] = value
|
||||
logger.debug(f"STM: Set entity {key}={value}")
|
||||
|
||||
def get_entity(self, key: str, default: Any = None) -> Any:
|
||||
"""Get an extracted entity."""
|
||||
return self.extracted_entities.get(key, default)
|
||||
|
||||
def clear_entities(self) -> None:
|
||||
"""Clear extracted entities."""
|
||||
self.extracted_entities = {}
|
||||
|
||||
def set_topic(self, topic: str) -> None:
|
||||
"""Set the current topic."""
|
||||
self.current_topic = topic
|
||||
logger.debug(f"STM: Topic -> {topic}")
|
||||
|
||||
def set_language(self, language: str) -> None:
|
||||
"""Set the conversation language."""
|
||||
self.language = language
|
||||
logger.debug(f"STM: Language -> {language}")
|
||||
|
||||
def clear(self) -> None:
|
||||
"""Reset short-term memory."""
|
||||
self.conversation_history = []
|
||||
self.current_workflow = None
|
||||
self.extracted_entities = {}
|
||||
self.current_topic = None
|
||||
self.language = "en"
|
||||
logger.info("STM: Cleared")
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
"""Convert to dictionary."""
|
||||
return {
|
||||
"conversation_history": self.conversation_history,
|
||||
"current_workflow": self.current_workflow,
|
||||
"extracted_entities": self.extracted_entities,
|
||||
"current_topic": self.current_topic,
|
||||
"language": self.language,
|
||||
}
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# EPISODIC MEMORY - Transient states
|
||||
# =============================================================================
|
||||
|
||||
|
||||
@dataclass
|
||||
class EpisodicMemory:
|
||||
"""
|
||||
Episodic/sensory memory - Temporary and event-driven.
|
||||
|
||||
Stores:
|
||||
- Last search results
|
||||
- Active downloads
|
||||
- Recent errors
|
||||
- Pending questions awaiting user response
|
||||
- Background events
|
||||
"""
|
||||
|
||||
# Last search results
|
||||
last_search_results: dict | None = None
|
||||
|
||||
# Active downloads
|
||||
active_downloads: list[dict] = field(default_factory=list)
|
||||
|
||||
# Recent errors
|
||||
recent_errors: list[dict] = field(default_factory=list)
|
||||
|
||||
# Pending question awaiting user response
|
||||
pending_question: dict | None = None
|
||||
|
||||
# Background events (download complete, new files, etc.)
|
||||
background_events: list[dict] = field(default_factory=list)
|
||||
|
||||
# Limits for errors/events kept
|
||||
max_errors: int = 5
|
||||
max_events: int = 10
|
||||
|
||||
def store_search_results(
|
||||
self, query: str, results: list[dict], search_type: str = "torrent"
|
||||
) -> None:
|
||||
"""
|
||||
Store search results with index.
|
||||
|
||||
Args:
|
||||
query: The search query
|
||||
results: List of results
|
||||
search_type: Type of search (torrent, movie, tvshow)
|
||||
"""
|
||||
self.last_search_results = {
|
||||
"query": query,
|
||||
"type": search_type,
|
||||
"timestamp": datetime.now().isoformat(),
|
||||
"results": [{"index": i + 1, **r} for i, r in enumerate(results)],
|
||||
}
|
||||
logger.info(f"Episodic: Stored {len(results)} search results for '{query}'")
|
||||
|
||||
def get_result_by_index(self, index: int) -> dict | None:
|
||||
"""
|
||||
Get a result by its number (1-indexed).
|
||||
|
||||
Args:
|
||||
index: Result number (1, 2, 3, ...)
|
||||
|
||||
Returns:
|
||||
The result or None if not found
|
||||
"""
|
||||
if not self.last_search_results:
|
||||
logger.warning("Episodic: No search results stored")
|
||||
return None
|
||||
|
||||
for result in self.last_search_results.get("results", []):
|
||||
if result.get("index") == index:
|
||||
return result
|
||||
|
||||
logger.warning(f"Episodic: Result #{index} not found")
|
||||
return None
|
||||
|
||||
def get_search_results(self) -> dict | None:
|
||||
"""Get the last search results."""
|
||||
return self.last_search_results
|
||||
|
||||
def clear_search_results(self) -> None:
|
||||
"""Clear search results."""
|
||||
self.last_search_results = None
|
||||
|
||||
def add_active_download(self, download: dict) -> None:
|
||||
"""Add an active download."""
|
||||
download["started_at"] = datetime.now().isoformat()
|
||||
self.active_downloads.append(download)
|
||||
logger.info(f"Episodic: Added download '{download.get('name')}'")
|
||||
|
||||
def update_download_progress(
|
||||
self, task_id: str, progress: int, status: str = "downloading"
|
||||
) -> None:
|
||||
"""Update download progress."""
|
||||
for dl in self.active_downloads:
|
||||
if dl.get("task_id") == task_id:
|
||||
dl["progress"] = progress
|
||||
dl["status"] = status
|
||||
dl["updated_at"] = datetime.now().isoformat()
|
||||
break
|
||||
|
||||
def complete_download(self, task_id: str, file_path: str) -> dict | None:
|
||||
"""Mark a download as complete and remove it."""
|
||||
for i, dl in enumerate(self.active_downloads):
|
||||
if dl.get("task_id") == task_id:
|
||||
completed = self.active_downloads.pop(i)
|
||||
completed["status"] = "completed"
|
||||
completed["file_path"] = file_path
|
||||
completed["completed_at"] = datetime.now().isoformat()
|
||||
|
||||
# Add a background event
|
||||
self.add_background_event(
|
||||
"download_complete",
|
||||
{"name": completed.get("name"), "file_path": file_path},
|
||||
)
|
||||
|
||||
logger.info(f"Episodic: Download completed '{completed.get('name')}'")
|
||||
return completed
|
||||
return None
|
||||
|
||||
def get_active_downloads(self) -> list[dict]:
|
||||
"""Get active downloads."""
|
||||
return self.active_downloads
|
||||
|
||||
def add_error(self, action: str, error: str, context: dict | None = None) -> None:
|
||||
"""Record a recent error."""
|
||||
self.recent_errors.append(
|
||||
{
|
||||
"timestamp": datetime.now().isoformat(),
|
||||
"action": action,
|
||||
"error": error,
|
||||
"context": context or {},
|
||||
}
|
||||
)
|
||||
# Keep only the last N errors
|
||||
self.recent_errors = self.recent_errors[-self.max_errors :]
|
||||
logger.warning(f"Episodic: Error in '{action}': {error}")
|
||||
|
||||
def get_recent_errors(self) -> list[dict]:
|
||||
"""Get recent errors."""
|
||||
return self.recent_errors
|
||||
|
||||
def set_pending_question(
|
||||
self,
|
||||
question: str,
|
||||
options: list[dict],
|
||||
context: dict,
|
||||
question_type: str = "choice",
|
||||
) -> None:
|
||||
"""
|
||||
Record a question awaiting user response.
|
||||
|
||||
Args:
|
||||
question: The question asked
|
||||
options: List of possible options
|
||||
context: Question context
|
||||
question_type: Type of question (choice, confirmation, input)
|
||||
"""
|
||||
self.pending_question = {
|
||||
"type": question_type,
|
||||
"question": question,
|
||||
"options": options,
|
||||
"context": context,
|
||||
"timestamp": datetime.now().isoformat(),
|
||||
}
|
||||
logger.info(f"Episodic: Pending question set ({question_type})")
|
||||
|
||||
def get_pending_question(self) -> dict | None:
|
||||
"""Get the pending question."""
|
||||
return self.pending_question
|
||||
|
||||
def resolve_pending_question(self, answer_index: int | None = None) -> dict | None:
|
||||
"""
|
||||
Resolve the pending question and return the chosen option.
|
||||
|
||||
Args:
|
||||
answer_index: Answer index (1-indexed) or None to cancel
|
||||
|
||||
Returns:
|
||||
The chosen option or None
|
||||
"""
|
||||
if not self.pending_question:
|
||||
return None
|
||||
|
||||
result = None
|
||||
if answer_index is not None and self.pending_question.get("options"):
|
||||
for opt in self.pending_question["options"]:
|
||||
if opt.get("index") == answer_index:
|
||||
result = opt
|
||||
break
|
||||
|
||||
self.pending_question = None
|
||||
logger.info("Episodic: Pending question resolved")
|
||||
return result
|
||||
|
||||
def add_background_event(self, event_type: str, data: dict) -> None:
|
||||
"""Add a background event."""
|
||||
self.background_events.append(
|
||||
{
|
||||
"type": event_type,
|
||||
"timestamp": datetime.now().isoformat(),
|
||||
"data": data,
|
||||
"read": False,
|
||||
}
|
||||
)
|
||||
# Keep only the last N events
|
||||
self.background_events = self.background_events[-self.max_events :]
|
||||
logger.info(f"Episodic: Background event '{event_type}'")
|
||||
|
||||
def get_unread_events(self) -> list[dict]:
|
||||
"""Get unread events and mark them as read."""
|
||||
unread = [e for e in self.background_events if not e.get("read")]
|
||||
for e in self.background_events:
|
||||
e["read"] = True
|
||||
return unread
|
||||
|
||||
def clear(self) -> None:
|
||||
"""Reset episodic memory."""
|
||||
self.last_search_results = None
|
||||
self.active_downloads = []
|
||||
self.recent_errors = []
|
||||
self.pending_question = None
|
||||
self.background_events = []
|
||||
logger.info("Episodic: Cleared")
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
"""Convert to dictionary."""
|
||||
return {
|
||||
"last_search_results": self.last_search_results,
|
||||
"active_downloads": self.active_downloads,
|
||||
"recent_errors": self.recent_errors,
|
||||
"pending_question": self.pending_question,
|
||||
"background_events": self.background_events,
|
||||
}
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# MEMORY MANAGER - Unified manager
|
||||
# =============================================================================
|
||||
|
||||
|
||||
class Memory:
|
||||
"""
|
||||
Unified manager for the 3 memory types.
|
||||
|
||||
Usage:
|
||||
memory = Memory("memory_data")
|
||||
memory.ltm.set_config("download_folder", "/path")
|
||||
memory.stm.add_message("user", "Hello")
|
||||
memory.episodic.store_search_results("query", results)
|
||||
memory.save()
|
||||
"""
|
||||
|
||||
def __init__(self, storage_dir: str = "memory_data"):
|
||||
"""
|
||||
Initialize the memory.
|
||||
|
||||
Args:
|
||||
storage_dir: Directory for persistent storage
|
||||
"""
|
||||
self.storage_dir = Path(storage_dir)
|
||||
self.storage_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
self.ltm_file = self.storage_dir / "ltm.json"
|
||||
|
||||
# Initialize the 3 memory types
|
||||
self.ltm = self._load_ltm()
|
||||
self.stm = ShortTermMemory()
|
||||
self.episodic = EpisodicMemory()
|
||||
|
||||
logger.info(f"Memory initialized (storage: {storage_dir})")
|
||||
|
||||
def _load_ltm(self) -> LongTermMemory:
|
||||
"""Load LTM from file."""
|
||||
if self.ltm_file.exists():
|
||||
try:
|
||||
data = json.loads(self.ltm_file.read_text(encoding="utf-8"))
|
||||
logger.info("LTM loaded from file")
|
||||
return LongTermMemory.from_dict(data)
|
||||
except (OSError, json.JSONDecodeError) as e:
|
||||
logger.warning(f"Could not load LTM: {e}")
|
||||
return LongTermMemory()
|
||||
|
||||
def save(self) -> None:
|
||||
"""Save LTM (the only persistent memory)."""
|
||||
try:
|
||||
self.ltm_file.write_text(
|
||||
json.dumps(self.ltm.to_dict(), indent=2, ensure_ascii=False),
|
||||
encoding="utf-8",
|
||||
)
|
||||
logger.debug("LTM saved to file")
|
||||
except OSError as e:
|
||||
logger.error(f"Failed to save LTM: {e}")
|
||||
raise
|
||||
|
||||
def get_context_for_prompt(self) -> dict:
|
||||
"""
|
||||
Generate context to include in the system prompt.
|
||||
|
||||
Returns:
|
||||
Dictionary with relevant context from all 3 memories
|
||||
"""
|
||||
return {
|
||||
"config": self.ltm.config,
|
||||
"preferences": self.ltm.preferences,
|
||||
"current_workflow": self.stm.current_workflow,
|
||||
"current_topic": self.stm.current_topic,
|
||||
"extracted_entities": self.stm.extracted_entities,
|
||||
"last_search": {
|
||||
"query": (
|
||||
self.episodic.last_search_results.get("query")
|
||||
if self.episodic.last_search_results
|
||||
else None
|
||||
),
|
||||
"result_count": (
|
||||
len(self.episodic.last_search_results.get("results", []))
|
||||
if self.episodic.last_search_results
|
||||
else 0
|
||||
),
|
||||
},
|
||||
"active_downloads_count": len(self.episodic.active_downloads),
|
||||
"pending_question": self.episodic.pending_question is not None,
|
||||
"unread_events": len(
|
||||
[e for e in self.episodic.background_events if not e.get("read")]
|
||||
),
|
||||
}
|
||||
|
||||
def get_full_state(self) -> dict:
|
||||
"""Return the full state of all 3 memories (for debug)."""
|
||||
return {
|
||||
"ltm": self.ltm.to_dict(),
|
||||
"stm": self.stm.to_dict(),
|
||||
"episodic": self.episodic.to_dict(),
|
||||
}
|
||||
|
||||
def clear_session(self) -> None:
|
||||
"""Clear session memories (STM + Episodic)."""
|
||||
self.stm.clear()
|
||||
self.episodic.clear()
|
||||
logger.info("Session memories cleared")
|
||||
@@ -0,0 +1,4 @@
|
||||
from .base import Memory
|
||||
from .registry import MemoryRegistry
|
||||
|
||||
__all__ = ["Memory", "MemoryRegistry"]
|
||||
@@ -0,0 +1,90 @@
|
||||
"""Memory — unified manager for the 3 memory tiers."""
|
||||
|
||||
import json
|
||||
import logging
|
||||
from pathlib import Path
|
||||
|
||||
from .episodic import EpisodicMemory
|
||||
from .ltm import LongTermMemory
|
||||
from .stm import ShortTermMemory
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class Memory:
|
||||
"""
|
||||
Unified manager for the 3 memory tiers.
|
||||
|
||||
Usage:
|
||||
memory = Memory("data/memory")
|
||||
memory.ltm.workspace.download = "/downloads"
|
||||
memory.stm.add_message("user", "Hello")
|
||||
memory.episodic.store_search_results("query", results)
|
||||
memory.save()
|
||||
"""
|
||||
|
||||
def __init__(self, storage_dir: str = "memory"):
|
||||
self.storage_dir = Path(storage_dir)
|
||||
self.storage_dir.mkdir(parents=True, exist_ok=True)
|
||||
self.ltm_file = self.storage_dir / "ltm.json"
|
||||
|
||||
self.ltm = self._load_ltm()
|
||||
self.stm = ShortTermMemory()
|
||||
self.episodic = EpisodicMemory()
|
||||
|
||||
logger.info(f"Memory initialized (storage: {storage_dir})")
|
||||
|
||||
def _load_ltm(self) -> LongTermMemory:
|
||||
"""Load LTM from disk, or return a fresh instance."""
|
||||
if self.ltm_file.exists():
|
||||
try:
|
||||
data = json.loads(self.ltm_file.read_text(encoding="utf-8"))
|
||||
logger.info("LTM loaded from file")
|
||||
return LongTermMemory.from_dict(data)
|
||||
except (OSError, json.JSONDecodeError) as e:
|
||||
logger.warning(f"Could not load LTM: {e}")
|
||||
return LongTermMemory()
|
||||
|
||||
def save(self) -> None:
|
||||
"""Persist LTM to disk (STM and Episodic are volatile)."""
|
||||
try:
|
||||
self.ltm_file.write_text(
|
||||
json.dumps(self.ltm.to_dict(), indent=2, ensure_ascii=False),
|
||||
encoding="utf-8",
|
||||
)
|
||||
logger.debug("LTM saved")
|
||||
except OSError as e:
|
||||
logger.error(f"Failed to save LTM: {e}")
|
||||
raise
|
||||
|
||||
def get_context_for_prompt(self) -> dict:
|
||||
"""Snapshot of relevant memory for the system prompt."""
|
||||
return {
|
||||
"workspace": self.ltm.workspace.as_dict(),
|
||||
"library_paths": self.ltm.library_paths.to_dict(),
|
||||
"preferences": self.ltm.preferences.to_dict(),
|
||||
"current_workflow": self.stm.workflow.to_dict(),
|
||||
"current_topic": self.stm.entities.topic,
|
||||
"extracted_entities": self.stm.entities.data,
|
||||
"last_search": {
|
||||
"query": self.episodic.search_results.last.get("query") if self.episodic.search_results.last else None,
|
||||
"result_count": len(self.episodic.search_results.last.get("results", [])) if self.episodic.search_results.last else 0,
|
||||
},
|
||||
"active_downloads_count": len(self.episodic.downloads.active),
|
||||
"pending_question": self.episodic.pending_question is not None,
|
||||
"unread_events": len([e for e in self.episodic.events.items if not e.get("read")]),
|
||||
}
|
||||
|
||||
def get_full_state(self) -> dict:
|
||||
"""Full state dump for debug/API."""
|
||||
return {
|
||||
"ltm": self.ltm.to_dict(),
|
||||
"stm": self.stm.to_dict(),
|
||||
"episodic": self.episodic.to_dict(),
|
||||
}
|
||||
|
||||
def clear_session(self) -> None:
|
||||
"""Reset volatile memories (STM + Episodic)."""
|
||||
self.stm.clear()
|
||||
self.episodic.clear()
|
||||
logger.info("Session memories cleared")
|
||||
@@ -0,0 +1,3 @@
|
||||
from .episodic import EpisodicMemory
|
||||
|
||||
__all__ = ["EpisodicMemory"]
|
||||
@@ -0,0 +1,6 @@
|
||||
from .downloads import Downloads
|
||||
from .errors import Errors
|
||||
from .events import Events
|
||||
from .search_results import SearchResults
|
||||
|
||||
__all__ = ["SearchResults", "Downloads", "Events", "Errors"]
|
||||
@@ -0,0 +1,56 @@
|
||||
"""Downloads — active torrent downloads being tracked."""
|
||||
|
||||
import logging
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class Downloads:
|
||||
active: list[dict] = field(default_factory=list)
|
||||
|
||||
def add(self, download: dict) -> None:
|
||||
download["started_at"] = datetime.now().isoformat()
|
||||
self.active.append(download)
|
||||
logger.info(f"Downloads: Added '{download.get('name')}'")
|
||||
|
||||
def update_progress(self, task_id: str, progress: int, status: str = "downloading") -> None:
|
||||
for dl in self.active:
|
||||
if dl.get("task_id") == task_id:
|
||||
dl["progress"] = progress
|
||||
dl["status"] = status
|
||||
dl["updated_at"] = datetime.now().isoformat()
|
||||
break
|
||||
|
||||
def complete(self, task_id: str, file_path: str) -> dict | None:
|
||||
for i, dl in enumerate(self.active):
|
||||
if dl.get("task_id") == task_id:
|
||||
completed = self.active.pop(i)
|
||||
completed.update({"status": "completed", "file_path": file_path, "completed_at": datetime.now().isoformat()})
|
||||
logger.info(f"Downloads: Completed '{completed.get('name')}'")
|
||||
return completed
|
||||
return None
|
||||
|
||||
def clear(self) -> None:
|
||||
self.active = []
|
||||
|
||||
@classmethod
|
||||
def describe(cls) -> dict:
|
||||
return {
|
||||
"name": "Downloads",
|
||||
"tier": "episodic",
|
||||
"access": "read-write",
|
||||
"description": (
|
||||
"Active torrent downloads currently in progress. "
|
||||
"Read to report download status to the user. "
|
||||
"Write to track new downloads or update progress."
|
||||
),
|
||||
"fields": {
|
||||
"active": "List of active downloads. Each entry has task_id, name, progress, status, started_at.",
|
||||
},
|
||||
}
|
||||
|
||||
def to_dict(self) -> list:
|
||||
return self.active
|
||||
@@ -0,0 +1,46 @@
|
||||
"""Errors — recent agent errors for context and debugging."""
|
||||
|
||||
import logging
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
MAX_ERRORS = 5
|
||||
|
||||
|
||||
@dataclass
|
||||
class Errors:
|
||||
items: list[dict] = field(default_factory=list)
|
||||
max_errors: int = MAX_ERRORS
|
||||
|
||||
def add(self, action: str, error: str, context: dict | None = None) -> None:
|
||||
self.items.append({
|
||||
"timestamp": datetime.now().isoformat(),
|
||||
"action": action,
|
||||
"error": error,
|
||||
"context": context or {},
|
||||
})
|
||||
self.items = self.items[-self.max_errors:]
|
||||
logger.warning(f"Errors: '{action}': {error}")
|
||||
|
||||
def clear(self) -> None:
|
||||
self.items = []
|
||||
|
||||
@classmethod
|
||||
def describe(cls) -> dict:
|
||||
return {
|
||||
"name": "Errors",
|
||||
"tier": "episodic",
|
||||
"access": "read",
|
||||
"description": (
|
||||
"Recent errors encountered during tool execution. "
|
||||
"Read when something goes wrong to understand what failed and avoid repeating the same mistake."
|
||||
),
|
||||
"fields": {
|
||||
"items": f"Last {MAX_ERRORS} errors. Each has timestamp, action, error message, context.",
|
||||
},
|
||||
}
|
||||
|
||||
def to_dict(self) -> list:
|
||||
return self.items
|
||||
@@ -0,0 +1,49 @@
|
||||
"""Events — background events (download complete, new files, etc.)."""
|
||||
|
||||
import logging
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
MAX_EVENTS = 10
|
||||
|
||||
|
||||
@dataclass
|
||||
class Events:
|
||||
items: list[dict] = field(default_factory=list)
|
||||
max_events: int = MAX_EVENTS
|
||||
|
||||
def add(self, event_type: str, data: dict) -> None:
|
||||
self.items.append({"type": event_type, "timestamp": datetime.now().isoformat(), "data": data, "read": False})
|
||||
self.items = self.items[-self.max_events:]
|
||||
logger.info(f"Events: '{event_type}'")
|
||||
|
||||
def get_unread(self) -> list[dict]:
|
||||
"""Return unread events and mark them as read."""
|
||||
unread = [e for e in self.items if not e.get("read")]
|
||||
for e in self.items:
|
||||
e["read"] = True
|
||||
return unread
|
||||
|
||||
def clear(self) -> None:
|
||||
self.items = []
|
||||
|
||||
@classmethod
|
||||
def describe(cls) -> dict:
|
||||
return {
|
||||
"name": "Events",
|
||||
"tier": "episodic",
|
||||
"access": "read",
|
||||
"description": (
|
||||
"Background events that occurred during the session (download complete, file moved, etc.). "
|
||||
"Read unread events at the start of each turn to surface anything that happened in the background. "
|
||||
"Events are written automatically by other components."
|
||||
),
|
||||
"fields": {
|
||||
"items": f"Last {MAX_EVENTS} events. Each has type, timestamp, data, read flag.",
|
||||
},
|
||||
}
|
||||
|
||||
def to_dict(self) -> list:
|
||||
return self.items
|
||||
@@ -0,0 +1,52 @@
|
||||
"""SearchResults — last torrent/media search results."""
|
||||
|
||||
import logging
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class SearchResults:
|
||||
last: dict | None = None
|
||||
|
||||
def store(self, query: str, results: list[dict], search_type: str = "torrent") -> None:
|
||||
self.last = {
|
||||
"query": query,
|
||||
"type": search_type,
|
||||
"timestamp": datetime.now().isoformat(),
|
||||
"results": [{"index": i + 1, **r} for i, r in enumerate(results)],
|
||||
}
|
||||
logger.info(f"SearchResults: Stored {len(results)} results for '{query}'")
|
||||
|
||||
def get_by_index(self, index: int) -> dict | None:
|
||||
if not self.last:
|
||||
return None
|
||||
for result in self.last.get("results", []):
|
||||
if result.get("index") == index:
|
||||
return result
|
||||
logger.warning(f"SearchResults: #{index} not found")
|
||||
return None
|
||||
|
||||
def clear(self) -> None:
|
||||
self.last = None
|
||||
|
||||
@classmethod
|
||||
def describe(cls) -> dict:
|
||||
return {
|
||||
"name": "SearchResults",
|
||||
"tier": "episodic",
|
||||
"access": "read-write",
|
||||
"description": (
|
||||
"Last torrent or media search results. "
|
||||
"Read to let the user pick a result by index without searching again. "
|
||||
"Write after every search to store fresh results."
|
||||
),
|
||||
"fields": {
|
||||
"last": "Dict with query, type, timestamp, and results list. Each result has an index (1-based) plus provider fields.",
|
||||
},
|
||||
}
|
||||
|
||||
def to_dict(self) -> dict | None:
|
||||
return self.last
|
||||
@@ -0,0 +1,126 @@
|
||||
"""EpisodicMemory — transient event-driven memory, reset on restart."""
|
||||
|
||||
import logging
|
||||
from dataclasses import dataclass, field
|
||||
|
||||
from .components import Downloads, Errors, Events, SearchResults
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class EpisodicMemory:
|
||||
"""
|
||||
Episodic memory — very short-lived, event-driven.
|
||||
|
||||
Composed of:
|
||||
- search_results: last torrent/media search
|
||||
- downloads: active downloads being tracked
|
||||
- events: background events (download complete, etc.)
|
||||
- errors: recent errors for context
|
||||
"""
|
||||
|
||||
search_results: SearchResults = field(default_factory=SearchResults)
|
||||
downloads: Downloads = field(default_factory=Downloads)
|
||||
events: Events = field(default_factory=Events)
|
||||
errors: Errors = field(default_factory=Errors)
|
||||
|
||||
# Convenience proxies for backward compatibility
|
||||
@property
|
||||
def last_search_results(self) -> dict | None:
|
||||
return self.search_results.last
|
||||
|
||||
@property
|
||||
def active_downloads(self) -> list[dict]:
|
||||
return self.downloads.active
|
||||
|
||||
@property
|
||||
def background_events(self) -> list[dict]:
|
||||
return self.events.items
|
||||
|
||||
@property
|
||||
def recent_errors(self) -> list[dict]:
|
||||
return self.errors.items
|
||||
|
||||
# Pending question — not a component yet, kept simple
|
||||
pending_question: dict | None = None
|
||||
|
||||
# Convenience methods forwarded to components
|
||||
def store_search_results(self, query: str, results: list[dict], search_type: str = "torrent") -> None:
|
||||
self.search_results.store(query, results, search_type)
|
||||
|
||||
def get_result_by_index(self, index: int) -> dict | None:
|
||||
return self.search_results.get_by_index(index)
|
||||
|
||||
def get_search_results(self) -> dict | None:
|
||||
return self.search_results.last
|
||||
|
||||
def clear_search_results(self) -> None:
|
||||
self.search_results.clear()
|
||||
|
||||
def add_active_download(self, download: dict) -> None:
|
||||
self.downloads.add(download)
|
||||
|
||||
def update_download_progress(self, task_id: str, progress: int, status: str = "downloading") -> None:
|
||||
self.downloads.update_progress(task_id, progress, status)
|
||||
|
||||
def complete_download(self, task_id: str, file_path: str) -> dict | None:
|
||||
completed = self.downloads.complete(task_id, file_path)
|
||||
if completed:
|
||||
self.events.add("download_complete", {"name": completed.get("name"), "file_path": file_path})
|
||||
return completed
|
||||
|
||||
def get_active_downloads(self) -> list[dict]:
|
||||
return self.downloads.active
|
||||
|
||||
def add_error(self, action: str, error: str, context: dict | None = None) -> None:
|
||||
self.errors.add(action, error, context)
|
||||
|
||||
def get_recent_errors(self) -> list[dict]:
|
||||
return self.errors.items
|
||||
|
||||
def set_pending_question(self, question: str, options: list[dict], context: dict, question_type: str = "choice") -> None:
|
||||
self.pending_question = {
|
||||
"type": question_type,
|
||||
"question": question,
|
||||
"options": options,
|
||||
"context": context,
|
||||
}
|
||||
|
||||
def get_pending_question(self) -> dict | None:
|
||||
return self.pending_question
|
||||
|
||||
def resolve_pending_question(self, answer_index: int | None = None) -> dict | None:
|
||||
if not self.pending_question:
|
||||
return None
|
||||
result = None
|
||||
if answer_index is not None:
|
||||
for opt in self.pending_question.get("options", []):
|
||||
if opt.get("index") == answer_index:
|
||||
result = opt
|
||||
break
|
||||
self.pending_question = None
|
||||
return result
|
||||
|
||||
def add_background_event(self, event_type: str, data: dict) -> None:
|
||||
self.events.add(event_type, data)
|
||||
|
||||
def get_unread_events(self) -> list[dict]:
|
||||
return self.events.get_unread()
|
||||
|
||||
def clear(self) -> None:
|
||||
self.search_results.clear()
|
||||
self.downloads.clear()
|
||||
self.events.clear()
|
||||
self.errors.clear()
|
||||
self.pending_question = None
|
||||
logger.info("Episodic: Cleared")
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
return {
|
||||
"last_search_results": self.search_results.to_dict(),
|
||||
"active_downloads": self.downloads.to_dict(),
|
||||
"recent_errors": self.errors.to_dict(),
|
||||
"pending_question": self.pending_question,
|
||||
"background_events": self.events.to_dict(),
|
||||
}
|
||||
@@ -0,0 +1,3 @@
|
||||
from .ltm import LongTermMemory
|
||||
|
||||
__all__ = ["LongTermMemory"]
|
||||
@@ -0,0 +1,15 @@
|
||||
from .following import Following
|
||||
from .library import Library
|
||||
from .library_paths import LibraryPaths
|
||||
from .media_preferences import MediaPreferences
|
||||
from .subtitle_preferences import SubtitlePreferences
|
||||
from .workspace import WorkspacePaths
|
||||
|
||||
__all__ = [
|
||||
"WorkspacePaths",
|
||||
"LibraryPaths",
|
||||
"MediaPreferences",
|
||||
"SubtitlePreferences",
|
||||
"Library",
|
||||
"Following",
|
||||
]
|
||||
@@ -0,0 +1,43 @@
|
||||
"""Following — watchlist of TV shows being followed."""
|
||||
|
||||
import logging
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class Following:
|
||||
shows: list[dict] = field(default_factory=list)
|
||||
|
||||
def add(self, show: dict) -> None:
|
||||
"""Follow a show, skipping duplicates by imdb_id."""
|
||||
existing_ids = [s.get("imdb_id") for s in self.shows]
|
||||
if show.get("imdb_id") not in existing_ids:
|
||||
show["followed_at"] = datetime.now().isoformat()
|
||||
self.shows.append(show)
|
||||
logger.info(f"Following: Now following {show.get('title')}")
|
||||
|
||||
def to_dict(self) -> list:
|
||||
return self.shows
|
||||
|
||||
@classmethod
|
||||
def describe(cls) -> dict:
|
||||
return {
|
||||
"name": "Following",
|
||||
"tier": "ltm",
|
||||
"access": "read-write",
|
||||
"description": (
|
||||
"Watchlist of TV shows the user is actively following. "
|
||||
"Read to check if a show should be monitored for new episodes. "
|
||||
"Write (add) when the user explicitly asks to follow a show."
|
||||
),
|
||||
"fields": {
|
||||
"shows": "List of followed shows. Each entry has imdb_id, title, followed_at.",
|
||||
},
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: list) -> "Following":
|
||||
return cls(shows=data)
|
||||
@@ -0,0 +1,64 @@
|
||||
"""Library — owned movies and TV shows."""
|
||||
|
||||
import logging
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class Library:
|
||||
movies: list[dict] = field(default_factory=list)
|
||||
tv_shows: list[dict] = field(default_factory=list)
|
||||
|
||||
def add(self, media_type: str, media: dict) -> None:
|
||||
"""Add a media item, skipping duplicates by imdb_id."""
|
||||
collection = self._collection(media_type)
|
||||
if collection is None:
|
||||
return
|
||||
|
||||
existing_ids = [m.get("imdb_id") for m in collection]
|
||||
if media.get("imdb_id") not in existing_ids:
|
||||
media["added_at"] = datetime.now().isoformat()
|
||||
collection.append(media)
|
||||
logger.info(f"Library: Added {media.get('title')} to {media_type}")
|
||||
|
||||
def get(self, media_type: str) -> list[dict]:
|
||||
"""Get all items for a media type."""
|
||||
return self._collection(media_type) or []
|
||||
|
||||
@classmethod
|
||||
def describe(cls) -> dict:
|
||||
return {
|
||||
"name": "Library",
|
||||
"tier": "ltm",
|
||||
"access": "read-write",
|
||||
"description": (
|
||||
"Catalogue of media owned by the user. "
|
||||
"Read to check if a title is already in the library before downloading. "
|
||||
"Write (add) after successfully moving a media file to its destination."
|
||||
),
|
||||
"fields": {
|
||||
"movies": "List of owned movies. Each entry has imdb_id, title, year, quality, file_path, added_at.",
|
||||
"tv_shows": "List of owned TV shows. Each entry has imdb_id, title, seasons, added_at.",
|
||||
},
|
||||
}
|
||||
|
||||
def _collection(self, media_type: str) -> list[dict] | None:
|
||||
if media_type == "movies":
|
||||
return self.movies
|
||||
if media_type == "tv_shows":
|
||||
return self.tv_shows
|
||||
logger.warning(f"Library: Unknown media type '{media_type}'")
|
||||
return None
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
return {"movies": self.movies, "tv_shows": self.tv_shows}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: dict) -> "Library":
|
||||
return cls(
|
||||
movies=data.get("movies", []),
|
||||
tv_shows=data.get("tv_shows", []),
|
||||
)
|
||||
@@ -0,0 +1,70 @@
|
||||
"""LibraryPaths — user-defined media library folders.
|
||||
|
||||
Extensible: the user creates collections as needed (tv_shows, movies, music, games…).
|
||||
Each collection name maps to its root folder on disk.
|
||||
|
||||
Set via /set_path, never modified by the agent autonomously.
|
||||
Access: READ ONLY for the agent — used to resolve destination paths when organizing media.
|
||||
"""
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
|
||||
|
||||
@dataclass
|
||||
class LibraryPaths:
|
||||
"""
|
||||
User-defined media library folders.
|
||||
|
||||
folders is a free dict: {"tv_shows": "/media/tv", "movies": "/media/movies", ...}
|
||||
Add new collections simply by setting a new key via /set_path.
|
||||
|
||||
Access: READ ONLY for the agent — set via /set_path only.
|
||||
"""
|
||||
|
||||
folders: dict[str, str] = field(default_factory=dict)
|
||||
|
||||
def get(self, collection: str) -> str | None:
|
||||
return self.folders.get(collection)
|
||||
|
||||
def set(self, collection: str, path: str) -> None:
|
||||
self.folders[collection] = path
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
return self.folders
|
||||
|
||||
@classmethod
|
||||
def describe(cls) -> dict:
|
||||
return {
|
||||
"name": "LibraryPaths",
|
||||
"tier": "ltm",
|
||||
"access": "read",
|
||||
"description": (
|
||||
"User-defined media library folders. "
|
||||
"Read these paths to resolve where to move an organised media file. "
|
||||
"Keys are collection names (tv_shows, movies, music, games…), values are root paths. "
|
||||
"New collections are added by the user via /set_path — never by the agent."
|
||||
),
|
||||
"fields": {
|
||||
"folders": "Dict of collection_name → absolute path. E.g. {'tv_shows': '/media/tv', 'movies': '/media/movies'}.",
|
||||
},
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: dict) -> "LibraryPaths":
|
||||
# Migrate from old flat format (tvshow_folder, movie_folder)
|
||||
folders = dict(data)
|
||||
if not folders:
|
||||
return cls()
|
||||
|
||||
migrated = {}
|
||||
legacy_map = {
|
||||
"tvshow_folder": "tv_shows",
|
||||
"movie_folder": "movies",
|
||||
}
|
||||
for old_key, new_key in legacy_map.items():
|
||||
if old_key in folders:
|
||||
migrated[new_key] = folders.pop(old_key)
|
||||
|
||||
# Keep any already-migrated keys
|
||||
migrated.update(folders)
|
||||
return cls(folders=migrated)
|
||||
@@ -0,0 +1,52 @@
|
||||
"""MediaPreferences — user preferences for video quality and audio."""
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
|
||||
|
||||
@dataclass
|
||||
class MediaPreferences:
|
||||
"""Quality and audio preferences for media downloads and organisation."""
|
||||
|
||||
quality: str = "1080p"
|
||||
audio_languages: list[str] = field(default_factory=lambda: ["fr", "en"])
|
||||
auto_organize: bool = False
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
return {
|
||||
"quality": self.quality,
|
||||
"audio_languages": self.audio_languages,
|
||||
"auto_organize": self.auto_organize,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def describe(cls) -> dict:
|
||||
return {
|
||||
"name": "MediaPreferences",
|
||||
"tier": "ltm",
|
||||
"access": "read",
|
||||
"description": (
|
||||
"User preferences for video quality and audio. "
|
||||
"Use these when searching torrents or choosing a release to download. "
|
||||
"Never modify autonomously — only via explicit user command."
|
||||
),
|
||||
"fields": {
|
||||
"quality": "Preferred video quality, e.g. '1080p', '4K', '720p'.",
|
||||
"audio_languages": (
|
||||
"Ordered list of preferred audio languages (ISO 639-1). "
|
||||
"First = most preferred."
|
||||
),
|
||||
"auto_organize": "If True, organise files into the library automatically after download.",
|
||||
},
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: dict) -> "MediaPreferences":
|
||||
return cls(
|
||||
# migration: old key was preferred_quality / preferred_languages
|
||||
quality=data.get("quality") or data.get("preferred_quality", "1080p"),
|
||||
audio_languages=(
|
||||
data.get("audio_languages")
|
||||
or data.get("preferred_languages", ["fr", "en"])
|
||||
),
|
||||
auto_organize=data.get("auto_organize", False),
|
||||
)
|
||||
@@ -0,0 +1,80 @@
|
||||
"""SubtitlePreferences — user preferences for subtitle handling."""
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
|
||||
|
||||
@dataclass
|
||||
class SubtitlePreferences:
|
||||
"""
|
||||
User-level defaults for subtitle selection, applied globally as the
|
||||
base of the SubtitleRuleSet inheritance chain.
|
||||
|
||||
These are the top-level defaults — individual shows/movies/release groups
|
||||
can override them via .alfred/rules.yaml.
|
||||
|
||||
Naming convention used when placing subtitle files alongside a video:
|
||||
{lang}.srt → standard track (e.g. fr.srt, en.srt)
|
||||
{lang}.sdh.srt → SDH / hearing-impaired track
|
||||
{lang}.forced.srt → forced track (foreign lines only)
|
||||
|
||||
Fields mirror SubtitleRuleSet.override() parameters:
|
||||
- languages: ordered list of ISO 639-1 codes to keep (others ignored)
|
||||
- formats: list of subtitle formats to keep (e.g. ["srt", "ass"])
|
||||
- types: list of subtitle types to keep (e.g. ["standard", "forced", "sdh"])
|
||||
"""
|
||||
|
||||
languages: list[str] = field(default_factory=lambda: ["fr", "en"])
|
||||
formats: list[str] = field(default_factory=lambda: ["srt", "ass"])
|
||||
types: list[str] = field(default_factory=lambda: ["standard", "forced", "sdh"])
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
return {
|
||||
"languages": self.languages,
|
||||
"formats": self.formats,
|
||||
"types": self.types,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def describe(cls) -> dict:
|
||||
return {
|
||||
"name": "SubtitlePreferences",
|
||||
"tier": "ltm",
|
||||
"access": "read",
|
||||
"description": (
|
||||
"User defaults for subtitle selection. Applied as global base rules; "
|
||||
"overridden per show/movie/release group via .alfred/rules.yaml. "
|
||||
"Never modify autonomously — only via explicit user command."
|
||||
),
|
||||
"fields": {
|
||||
"languages": (
|
||||
"Ordered list of subtitle languages to keep (ISO 639-1). "
|
||||
"Others are ignored. First = most preferred."
|
||||
),
|
||||
"formats": (
|
||||
"List of subtitle formats to keep, e.g. ['srt', 'ass']. "
|
||||
"Others are skipped."
|
||||
),
|
||||
"types": (
|
||||
"List of subtitle types to keep: 'standard', 'sdh', 'forced'. "
|
||||
"Omit a type to drop those tracks globally."
|
||||
),
|
||||
},
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: dict) -> "SubtitlePreferences":
|
||||
# Migration: old fields (min_size_kb, keep_sdh, keep_forced, link_subs_folder) are silently dropped
|
||||
prefs = cls(
|
||||
languages=data.get("languages", ["fr", "en"]),
|
||||
formats=data.get("formats", ["srt", "ass"]),
|
||||
types=data.get("types", ["standard", "forced", "sdh"]),
|
||||
)
|
||||
# Back-compat: keep_sdh / keep_forced → types list
|
||||
if "types" not in data:
|
||||
types = ["standard"]
|
||||
if data.get("keep_sdh", True):
|
||||
types.append("sdh")
|
||||
if data.get("keep_forced", True):
|
||||
types.append("forced")
|
||||
prefs.types = types
|
||||
return prefs
|
||||
@@ -0,0 +1,57 @@
|
||||
"""WorkspacePaths — fixed infrastructure folders.
|
||||
|
||||
Set once via /set_path, never modified by the agent.
|
||||
These are operational paths (where files land), not the media library.
|
||||
"""
|
||||
|
||||
from dataclasses import dataclass
|
||||
|
||||
|
||||
@dataclass
|
||||
class WorkspacePaths:
|
||||
"""
|
||||
Fixed infrastructure folders.
|
||||
|
||||
- download: where qBittorrent drops completed downloads
|
||||
- torrent: where .torrent files are stored
|
||||
|
||||
Access: READ ONLY for the agent — set via /set_path only.
|
||||
"""
|
||||
|
||||
download: str | None = None
|
||||
torrent: str | None = None
|
||||
|
||||
def as_dict(self) -> dict[str, str]:
|
||||
"""Return configured paths, skipping unset values."""
|
||||
return {k: v for k, v in {
|
||||
"download": self.download,
|
||||
"torrent": self.torrent,
|
||||
}.items() if v is not None}
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
return {"download": self.download, "torrent": self.torrent}
|
||||
|
||||
@classmethod
|
||||
def describe(cls) -> dict:
|
||||
return {
|
||||
"name": "WorkspacePaths",
|
||||
"tier": "ltm",
|
||||
"access": "read",
|
||||
"description": (
|
||||
"Fixed infrastructure folders used during file operations. "
|
||||
"Read these paths to know where to find downloaded files or .torrent files. "
|
||||
"Never modify — set exclusively via /set_path."
|
||||
),
|
||||
"fields": {
|
||||
"download": "Root folder where qBittorrent drops completed downloads.",
|
||||
"torrent": "Folder where .torrent files are stored.",
|
||||
},
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: dict) -> "WorkspacePaths":
|
||||
# Migrate from old flat format (download_folder, torrent_folder)
|
||||
return cls(
|
||||
download=data.get("download") or data.get("download_folder"),
|
||||
torrent=data.get("torrent") or data.get("torrent_folder"),
|
||||
)
|
||||
@@ -0,0 +1,65 @@
|
||||
"""LongTermMemory — persistent memory across sessions."""
|
||||
|
||||
import logging
|
||||
from dataclasses import dataclass, field
|
||||
|
||||
from .components import (
|
||||
Following,
|
||||
Library,
|
||||
LibraryPaths,
|
||||
MediaPreferences,
|
||||
SubtitlePreferences,
|
||||
WorkspacePaths,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class LongTermMemory:
|
||||
"""
|
||||
Long-term memory — persisted to disk, survives restarts.
|
||||
|
||||
- workspace: fixed infrastructure paths (download, torrent) — READ ONLY for agent
|
||||
- library_paths: user-defined media folders (tv_shows, movies, …) — READ ONLY for agent
|
||||
- media_preferences: quality and audio language preferences
|
||||
- subtitle_preferences: subtitle selection and naming rules
|
||||
- library: owned media catalogue
|
||||
- following: watchlist
|
||||
"""
|
||||
|
||||
workspace: WorkspacePaths = field(default_factory=WorkspacePaths)
|
||||
library_paths: LibraryPaths = field(default_factory=LibraryPaths)
|
||||
media_preferences: MediaPreferences = field(default_factory=MediaPreferences)
|
||||
subtitle_preferences: SubtitlePreferences = field(default_factory=SubtitlePreferences)
|
||||
library: Library = field(default_factory=Library)
|
||||
following: Following = field(default_factory=Following)
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
return {
|
||||
"workspace": self.workspace.to_dict(),
|
||||
"library_paths": self.library_paths.to_dict(),
|
||||
"media_preferences": self.media_preferences.to_dict(),
|
||||
"subtitle_preferences": self.subtitle_preferences.to_dict(),
|
||||
"library": self.library.to_dict(),
|
||||
"following": self.following.to_dict(),
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: dict) -> "LongTermMemory":
|
||||
# Migration: old flat format had paths at the top level
|
||||
workspace_data = data.get("workspace") or data
|
||||
library_paths_data = data.get("library_paths") or data.get("paths") or data
|
||||
return cls(
|
||||
workspace=WorkspacePaths.from_dict(workspace_data),
|
||||
library_paths=LibraryPaths.from_dict(library_paths_data),
|
||||
# migration: old key was "preferences"
|
||||
media_preferences=MediaPreferences.from_dict(
|
||||
data.get("media_preferences") or data.get("preferences", {})
|
||||
),
|
||||
subtitle_preferences=SubtitlePreferences.from_dict(
|
||||
data.get("subtitle_preferences", {})
|
||||
),
|
||||
library=Library.from_dict(data.get("library", {})),
|
||||
following=Following.from_dict(data.get("following", [])),
|
||||
)
|
||||
@@ -0,0 +1,80 @@
|
||||
"""MemoryRegistry — autodiscovers and describes all memory components.
|
||||
|
||||
Scans the components/ subfolder of each memory tier (ltm, stm, episodic),
|
||||
imports every class that has a describe() classmethod, and exposes their
|
||||
descriptions for use in the system prompt.
|
||||
|
||||
No manual registration needed — drop a new component file in the right
|
||||
components/ folder and it will be picked up automatically.
|
||||
"""
|
||||
|
||||
import importlib
|
||||
import inspect
|
||||
import logging
|
||||
import pkgutil
|
||||
from pathlib import Path
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Tier packages relative to this file's package
|
||||
_TIER_PACKAGES = [
|
||||
"alfred.infrastructure.persistence.memory.ltm.components",
|
||||
"alfred.infrastructure.persistence.memory.stm.components",
|
||||
"alfred.infrastructure.persistence.memory.episodic.components",
|
||||
]
|
||||
|
||||
|
||||
def _load_components(package_name: str) -> list[dict]:
|
||||
"""Import all modules in a package and collect describe() results."""
|
||||
descriptions = []
|
||||
try:
|
||||
package = importlib.import_module(package_name)
|
||||
package_path = Path(package.__file__).parent
|
||||
|
||||
for module_info in pkgutil.iter_modules([str(package_path)]):
|
||||
module = importlib.import_module(f"{package_name}.{module_info.name}")
|
||||
for _, cls in inspect.getmembers(module, inspect.isclass):
|
||||
if cls.__module__ == module.__name__ and hasattr(cls, "describe"):
|
||||
try:
|
||||
descriptions.append(cls.describe())
|
||||
except Exception as e:
|
||||
logger.warning(f"MemoryRegistry: describe() failed on {cls.__name__}: {e}")
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"MemoryRegistry: Could not load package {package_name}: {e}")
|
||||
|
||||
return descriptions
|
||||
|
||||
|
||||
class MemoryRegistry:
|
||||
"""
|
||||
Autodiscovers memory components and exposes their descriptions.
|
||||
|
||||
Usage:
|
||||
registry = MemoryRegistry()
|
||||
all_components = registry.all() # flat list
|
||||
ltm_components = registry.by_tier("ltm") # filtered by tier
|
||||
schema = registry.schema() # grouped by tier, for the prompt
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self._components: list[dict] = []
|
||||
for package in _TIER_PACKAGES:
|
||||
self._components.extend(_load_components(package))
|
||||
logger.info(f"MemoryRegistry: Loaded {len(self._components)} components")
|
||||
|
||||
def all(self) -> list[dict]:
|
||||
"""Return all component descriptions."""
|
||||
return self._components
|
||||
|
||||
def by_tier(self, tier: str) -> list[dict]:
|
||||
"""Return components for a specific tier (ltm, stm, episodic)."""
|
||||
return [c for c in self._components if c.get("tier") == tier]
|
||||
|
||||
def schema(self) -> dict[str, list[dict]]:
|
||||
"""Return components grouped by tier."""
|
||||
result: dict[str, list[dict]] = {"ltm": [], "stm": [], "episodic": []}
|
||||
for component in self._components:
|
||||
tier = component.get("tier", "unknown")
|
||||
result.setdefault(tier, []).append(component)
|
||||
return result
|
||||
@@ -0,0 +1,3 @@
|
||||
from .stm import ShortTermMemory
|
||||
|
||||
__all__ = ["ShortTermMemory"]
|
||||
@@ -0,0 +1,5 @@
|
||||
from .conversation import Conversation
|
||||
from .entities import Entities
|
||||
from .workflow import Workflow
|
||||
|
||||
__all__ = ["Conversation", "Workflow", "Entities"]
|
||||
@@ -0,0 +1,55 @@
|
||||
"""Conversation — message history for the current session."""
|
||||
|
||||
import logging
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
MAX_HISTORY = 20
|
||||
|
||||
|
||||
@dataclass
|
||||
class Conversation:
|
||||
messages: list[dict] = field(default_factory=list)
|
||||
max_history: int = MAX_HISTORY
|
||||
language: str = "en"
|
||||
|
||||
def add(self, role: str, content: str) -> None:
|
||||
"""Append a message, capping at max_history."""
|
||||
self.messages.append({"role": role, "content": content, "timestamp": datetime.now().isoformat()})
|
||||
if len(self.messages) > self.max_history:
|
||||
self.messages = self.messages[-self.max_history:]
|
||||
logger.debug(f"Conversation: Added {role} message")
|
||||
|
||||
def recent(self, n: int = 10) -> list[dict]:
|
||||
"""Return the last N messages."""
|
||||
return self.messages[-n:]
|
||||
|
||||
def set_language(self, language: str) -> None:
|
||||
self.language = language
|
||||
logger.debug(f"Conversation: Language -> {language}")
|
||||
|
||||
def clear(self) -> None:
|
||||
self.messages = []
|
||||
self.language = "en"
|
||||
|
||||
@classmethod
|
||||
def describe(cls) -> dict:
|
||||
return {
|
||||
"name": "Conversation",
|
||||
"tier": "stm",
|
||||
"access": "read",
|
||||
"description": (
|
||||
"Current session message history and detected language. "
|
||||
"Read to maintain conversational context. "
|
||||
"Messages are managed automatically — never write directly."
|
||||
),
|
||||
"fields": {
|
||||
"messages": f"Last {MAX_HISTORY} messages (role, content, timestamp).",
|
||||
"language": "Detected conversation language (ISO 639-1 code, e.g. 'fr', 'en').",
|
||||
},
|
||||
}
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
return {"messages": self.messages, "language": self.language}
|
||||
@@ -0,0 +1,48 @@
|
||||
"""Entities — extracted entities from the current conversation (title, year, quality, etc.)."""
|
||||
|
||||
import logging
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class Entities:
|
||||
data: dict[str, Any] = field(default_factory=dict)
|
||||
topic: str | None = None
|
||||
|
||||
def set(self, key: str, value: Any) -> None:
|
||||
self.data[key] = value
|
||||
logger.debug(f"Entities: {key}={value}")
|
||||
|
||||
def get(self, key: str, default: Any = None) -> Any:
|
||||
return self.data.get(key, default)
|
||||
|
||||
def set_topic(self, topic: str) -> None:
|
||||
self.topic = topic
|
||||
logger.debug(f"Entities: Topic -> {topic}")
|
||||
|
||||
def clear(self) -> None:
|
||||
self.data = {}
|
||||
self.topic = None
|
||||
|
||||
@classmethod
|
||||
def describe(cls) -> dict:
|
||||
return {
|
||||
"name": "Entities",
|
||||
"tier": "stm",
|
||||
"access": "read-write",
|
||||
"description": (
|
||||
"Entities and topic extracted from the current conversation. "
|
||||
"Read to retrieve what the user is talking about (title, year, quality, etc.) without re-parsing the history. "
|
||||
"Write when you identify a new entity or topic shift."
|
||||
),
|
||||
"fields": {
|
||||
"data": "Key-value pairs of extracted entities. E.g. {'title': 'Breaking Bad', 'year': 2008, 'quality': '1080p'}.",
|
||||
"topic": "Current conversation topic as a short string. E.g. 'media_search', 'organize_file'.",
|
||||
},
|
||||
}
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
return {"data": self.data, "topic": self.topic}
|
||||
@@ -0,0 +1,53 @@
|
||||
"""Workflow — tracks the current in-progress agent task."""
|
||||
|
||||
import logging
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class Workflow:
|
||||
current: dict | None = None
|
||||
|
||||
def start(self, workflow_type: str, target: dict) -> None:
|
||||
self.current = {
|
||||
"type": workflow_type,
|
||||
"target": target,
|
||||
"stage": "started",
|
||||
"started_at": datetime.now().isoformat(),
|
||||
}
|
||||
logger.info(f"Workflow: Started '{workflow_type}'")
|
||||
|
||||
def update_stage(self, stage: str) -> None:
|
||||
if self.current:
|
||||
self.current["stage"] = stage
|
||||
logger.debug(f"Workflow: Stage -> {stage}")
|
||||
|
||||
def end(self) -> None:
|
||||
if self.current:
|
||||
logger.info(f"Workflow: Ended '{self.current.get('type')}'")
|
||||
self.current = None
|
||||
|
||||
def clear(self) -> None:
|
||||
self.current = None
|
||||
|
||||
@classmethod
|
||||
def describe(cls) -> dict:
|
||||
return {
|
||||
"name": "Workflow",
|
||||
"tier": "stm",
|
||||
"access": "read-write",
|
||||
"description": (
|
||||
"Tracks the current in-progress multi-step task. "
|
||||
"Read to know what you are currently doing and what stage you are at. "
|
||||
"Write to start, advance, or end a workflow as you execute steps."
|
||||
),
|
||||
"fields": {
|
||||
"current": "Active workflow dict with keys: type, target, stage, started_at. None if idle.",
|
||||
},
|
||||
}
|
||||
|
||||
def to_dict(self) -> dict | None:
|
||||
return self.current
|
||||
@@ -0,0 +1,91 @@
|
||||
"""ShortTermMemory — volatile session memory, reset on restart."""
|
||||
|
||||
import logging
|
||||
from dataclasses import dataclass, field
|
||||
|
||||
from .components import Conversation, Entities, Workflow
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class ShortTermMemory:
|
||||
"""
|
||||
Short-term memory — lives for the duration of a session.
|
||||
|
||||
Composed of:
|
||||
- conversation: message history + language
|
||||
- workflow: current in-progress task
|
||||
- entities: extracted context (title, year, quality…)
|
||||
"""
|
||||
|
||||
conversation: Conversation = field(default_factory=Conversation)
|
||||
workflow: Workflow = field(default_factory=Workflow)
|
||||
entities: Entities = field(default_factory=Entities)
|
||||
|
||||
# Convenience proxies kept for backward compatibility with existing callers
|
||||
@property
|
||||
def conversation_history(self) -> list[dict]:
|
||||
return self.conversation.messages
|
||||
|
||||
@property
|
||||
def current_workflow(self) -> dict | None:
|
||||
return self.workflow.current
|
||||
|
||||
@property
|
||||
def extracted_entities(self) -> dict:
|
||||
return self.entities.data
|
||||
|
||||
@property
|
||||
def current_topic(self) -> str | None:
|
||||
return self.entities.topic
|
||||
|
||||
@property
|
||||
def language(self) -> str:
|
||||
return self.conversation.language
|
||||
|
||||
# Convenience methods forwarded to components
|
||||
def add_message(self, role: str, content: str) -> None:
|
||||
self.conversation.add(role, content)
|
||||
|
||||
def get_recent_history(self, n: int = 10) -> list[dict]:
|
||||
return self.conversation.recent(n)
|
||||
|
||||
def start_workflow(self, workflow_type: str, target: dict) -> None:
|
||||
self.workflow.start(workflow_type, target)
|
||||
|
||||
def update_workflow_stage(self, stage: str) -> None:
|
||||
self.workflow.update_stage(stage)
|
||||
|
||||
def end_workflow(self) -> None:
|
||||
self.workflow.end()
|
||||
|
||||
def set_entity(self, key: str, value) -> None:
|
||||
self.entities.set(key, value)
|
||||
|
||||
def get_entity(self, key: str, default=None):
|
||||
return self.entities.get(key, default)
|
||||
|
||||
def clear_entities(self) -> None:
|
||||
self.entities.clear()
|
||||
|
||||
def set_topic(self, topic: str) -> None:
|
||||
self.entities.set_topic(topic)
|
||||
|
||||
def set_language(self, language: str) -> None:
|
||||
self.conversation.set_language(language)
|
||||
|
||||
def clear(self) -> None:
|
||||
self.conversation.clear()
|
||||
self.workflow.clear()
|
||||
self.entities.clear()
|
||||
logger.info("STM: Cleared")
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
return {
|
||||
"conversation_history": self.conversation.messages,
|
||||
"current_workflow": self.workflow.to_dict(),
|
||||
"extracted_entities": self.entities.data,
|
||||
"current_topic": self.entities.topic,
|
||||
"language": self.conversation.language,
|
||||
}
|
||||
@@ -0,0 +1,6 @@
|
||||
"""Infrastructure adapters for subtitle persistence."""
|
||||
|
||||
from .metadata_store import SubtitleMetadataStore
|
||||
from .rule_repository import RuleSetRepository
|
||||
|
||||
__all__ = ["SubtitleMetadataStore", "RuleSetRepository"]
|
||||
@@ -0,0 +1,144 @@
|
||||
"""SubtitleMetadataStore — reads/writes .alfred/metadata.yaml colocated with media."""
|
||||
|
||||
import logging
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
import yaml
|
||||
|
||||
from alfred.domain.subtitles.entities import SubtitleTrack
|
||||
from alfred.domain.subtitles.services.placer import PlacedTrack
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class SubtitleMetadataStore:
|
||||
"""
|
||||
Manages the .alfred/metadata.yaml file that lives inside the media library folder.
|
||||
|
||||
For TV shows: /media/tv_shows/The X-Files/.alfred/metadata.yaml
|
||||
For movies: /media/movies/Inception (2010)/.alfred/metadata.yaml
|
||||
|
||||
The store never raises on a missing file — it returns empty defaults.
|
||||
Writes are atomic (write to .tmp then rename).
|
||||
"""
|
||||
|
||||
def __init__(self, library_root: Path):
|
||||
self._root = library_root
|
||||
self._alfred_dir = library_root / ".alfred"
|
||||
self._metadata_path = self._alfred_dir / "metadata.yaml"
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Load / Save
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def load(self) -> dict:
|
||||
"""Return the full metadata dict. Empty dict if file absent."""
|
||||
if not self._metadata_path.exists():
|
||||
return {}
|
||||
try:
|
||||
with open(self._metadata_path, encoding="utf-8") as f:
|
||||
return yaml.safe_load(f) or {}
|
||||
except Exception as e:
|
||||
logger.warning(f"MetadataStore: could not read {self._metadata_path}: {e}")
|
||||
return {}
|
||||
|
||||
def save(self, data: dict) -> None:
|
||||
"""Atomically write metadata.yaml. Creates .alfred/ if needed."""
|
||||
self._alfred_dir.mkdir(parents=True, exist_ok=True)
|
||||
tmp = self._metadata_path.with_suffix(".yaml.tmp")
|
||||
try:
|
||||
with open(tmp, "w", encoding="utf-8") as f:
|
||||
yaml.safe_dump(data, f, allow_unicode=True, default_flow_style=False, sort_keys=False)
|
||||
tmp.rename(self._metadata_path)
|
||||
except Exception as e:
|
||||
logger.error(f"MetadataStore: could not write {self._metadata_path}: {e}")
|
||||
tmp.unlink(missing_ok=True)
|
||||
raise
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Pattern
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def confirmed_pattern(self) -> str | None:
|
||||
"""Return the confirmed pattern_id, or None."""
|
||||
data = self.load()
|
||||
if data.get("pattern_confirmed"):
|
||||
return data.get("detected_pattern")
|
||||
return None
|
||||
|
||||
def mark_pattern_confirmed(self, pattern_id: str, media_info: dict | None = None) -> None:
|
||||
"""Persist detected_pattern + pattern_confirmed=true."""
|
||||
data = self.load()
|
||||
data["detected_pattern"] = pattern_id
|
||||
data["pattern_confirmed"] = True
|
||||
if media_info:
|
||||
data.setdefault("media_type", media_info.get("media_type"))
|
||||
data.setdefault("imdb_id", media_info.get("imdb_id"))
|
||||
data.setdefault("title", media_info.get("title"))
|
||||
self.save(data)
|
||||
logger.info(f"MetadataStore: confirmed pattern '{pattern_id}' for {self._root.name}")
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Subtitle history
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def append_history(
|
||||
self,
|
||||
placed_pairs: list[tuple[PlacedTrack, SubtitleTrack]],
|
||||
season: int | None = None,
|
||||
episode: int | None = None,
|
||||
release_group: str | None = None,
|
||||
) -> None:
|
||||
"""Append one history entry with all placed tracks."""
|
||||
if not placed_pairs:
|
||||
return
|
||||
|
||||
data = self.load()
|
||||
history = data.setdefault("subtitle_history", [])
|
||||
|
||||
tracks_data: list[dict[str, Any]] = []
|
||||
for placed, track in placed_pairs:
|
||||
# Infer type from destination filename parts (e.g. en.sdh.srt → sdh)
|
||||
parts = placed.filename.rsplit(".", 2) # ["en", "sdh", "srt"] or ["en", "srt"]
|
||||
inferred_type = parts[1] if len(parts) == 3 else "standard"
|
||||
|
||||
tracks_data.append({
|
||||
"language": track.language.code if track.language else "unknown",
|
||||
"type": inferred_type,
|
||||
"format": placed.destination.suffix.lstrip("."),
|
||||
"is_embedded": track.is_embedded,
|
||||
"source_file": placed.source.name,
|
||||
"placed_as": placed.filename,
|
||||
"confidence": round(track.confidence, 3),
|
||||
})
|
||||
|
||||
entry: dict[str, Any] = {
|
||||
"placed_at": datetime.now(timezone.utc).isoformat(),
|
||||
"release_group": release_group,
|
||||
"tracks": tracks_data,
|
||||
}
|
||||
if season is not None:
|
||||
entry["season"] = season
|
||||
if episode is not None:
|
||||
entry["episode"] = episode
|
||||
|
||||
history.append(entry)
|
||||
|
||||
# Update release_groups list
|
||||
if release_group:
|
||||
groups = data.setdefault("release_groups", [])
|
||||
if release_group not in groups:
|
||||
groups.append(release_group)
|
||||
|
||||
self.save(data)
|
||||
logger.info(
|
||||
f"MetadataStore: appended history "
|
||||
f"({'S%02dE%02d' % (season, episode) if season and episode else 'movie'}) "
|
||||
f"— {len(tracks_data)} track(s)"
|
||||
)
|
||||
|
||||
def history(self) -> list[dict]:
|
||||
"""Return the raw history list."""
|
||||
return self.load().get("subtitle_history", [])
|
||||
@@ -0,0 +1,116 @@
|
||||
"""RuleSetRepository — loads SubtitleRuleSet from .alfred/ YAML files."""
|
||||
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
import yaml
|
||||
|
||||
from alfred.domain.subtitles.aggregates import SubtitleRuleSet
|
||||
from alfred.domain.subtitles.value_objects import RuleScope
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from alfred.infrastructure.persistence.memory.ltm.components.subtitle_preferences import SubtitlePreferences
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _load_yaml(path: Path) -> dict:
|
||||
if not path.exists():
|
||||
return {}
|
||||
try:
|
||||
with open(path, encoding="utf-8") as f:
|
||||
return yaml.safe_load(f) or {}
|
||||
except Exception as e:
|
||||
logger.warning(f"RuleSetRepository: could not read {path}: {e}")
|
||||
return {}
|
||||
|
||||
|
||||
class RuleSetRepository:
|
||||
"""
|
||||
Builds a fully chained SubtitleRuleSet by reading YAML from .alfred/.
|
||||
|
||||
Inheritance chain:
|
||||
global (hardcoded defaults)
|
||||
└── release_group (.alfred/release_groups/{GROUP}.yaml)
|
||||
└── local (.alfred/rules.yaml)
|
||||
|
||||
Rules are delta-only — None means "inherit from parent".
|
||||
The repository only creates intermediate nodes when the corresponding
|
||||
file exists and contains an override section.
|
||||
"""
|
||||
|
||||
def __init__(self, library_root: Path):
|
||||
self._alfred_dir = library_root / ".alfred"
|
||||
|
||||
def load(
|
||||
self,
|
||||
release_group: str | None = None,
|
||||
subtitle_preferences: "SubtitlePreferences | None" = None,
|
||||
) -> SubtitleRuleSet:
|
||||
"""
|
||||
Build and return the resolved RuleSet chain.
|
||||
|
||||
If subtitle_preferences is provided, it seeds the global base rule set
|
||||
from LTM (overriding the hardcoded DEFAULT_RULES).
|
||||
Returns global default if no overrides exist.
|
||||
"""
|
||||
base = SubtitleRuleSet.global_default()
|
||||
if subtitle_preferences is not None:
|
||||
base.override(
|
||||
languages=subtitle_preferences.languages,
|
||||
formats=subtitle_preferences.formats,
|
||||
types=subtitle_preferences.types,
|
||||
)
|
||||
current = base
|
||||
|
||||
# Release group level
|
||||
if release_group:
|
||||
rg_path = self._alfred_dir / "release_groups" / f"{release_group}.yaml"
|
||||
rg_data = _load_yaml(rg_path).get("override", {})
|
||||
if rg_data:
|
||||
rg_ruleset = SubtitleRuleSet(
|
||||
scope=RuleScope(level="release_group", identifier=release_group),
|
||||
parent=current,
|
||||
)
|
||||
rg_ruleset.override(**_filter_override(rg_data))
|
||||
current = rg_ruleset
|
||||
logger.debug(f"RuleSetRepository: loaded release_group override for '{release_group}'")
|
||||
|
||||
# Local (show/movie) level
|
||||
local_data = _load_yaml(self._alfred_dir / "rules.yaml").get("override", {})
|
||||
if local_data:
|
||||
local_ruleset = SubtitleRuleSet(
|
||||
scope=RuleScope(level="show"),
|
||||
parent=current,
|
||||
)
|
||||
local_ruleset.override(**_filter_override(local_data))
|
||||
current = local_ruleset
|
||||
logger.debug("RuleSetRepository: loaded local rules.yaml override")
|
||||
|
||||
return current
|
||||
|
||||
def save_local(self, delta: dict) -> None:
|
||||
"""Write or update .alfred/rules.yaml with override delta."""
|
||||
self._alfred_dir.mkdir(parents=True, exist_ok=True)
|
||||
path = self._alfred_dir / "rules.yaml"
|
||||
existing = _load_yaml(path)
|
||||
existing_override = existing.get("override", {})
|
||||
existing_override.update(delta)
|
||||
data = {"override": existing_override}
|
||||
tmp = path.with_suffix(".yaml.tmp")
|
||||
try:
|
||||
with open(tmp, "w", encoding="utf-8") as f:
|
||||
yaml.safe_dump(data, f, allow_unicode=True, default_flow_style=False, sort_keys=False)
|
||||
tmp.rename(path)
|
||||
logger.info(f"RuleSetRepository: saved local rules to {path}")
|
||||
except Exception as e:
|
||||
logger.error(f"RuleSetRepository: could not write {path}: {e}")
|
||||
tmp.unlink(missing_ok=True)
|
||||
raise
|
||||
|
||||
|
||||
def _filter_override(data: dict) -> dict:
|
||||
"""Keep only keys that SubtitleRuleSet.override() accepts."""
|
||||
valid = {"languages", "formats", "types", "format_priority", "min_confidence"}
|
||||
return {k: v for k, v in data.items() if k in valid}
|
||||
@@ -0,0 +1,13 @@
|
||||
id: adjacent
|
||||
version: "1.0"
|
||||
description: >
|
||||
Subtitle files sit directly alongside the video file, in the same directory.
|
||||
Example: Show.S01E01.mkv + Show.S01E01.English.srt in the same folder.
|
||||
|
||||
scan_strategy: adjacent
|
||||
root_folder: null
|
||||
|
||||
type_detection:
|
||||
method: token_in_name
|
||||
description: >
|
||||
Type (standard/SDH/forced) is determined from tokens in the filename.
|
||||
@@ -0,0 +1,14 @@
|
||||
id: embedded
|
||||
version: "1.0"
|
||||
description: >
|
||||
Subtitle tracks are embedded inside the video container (MKV, MP4).
|
||||
Detected via ffprobe — no external files.
|
||||
|
||||
scan_strategy: embedded
|
||||
root_folder: null
|
||||
|
||||
type_detection:
|
||||
method: ffprobe_metadata
|
||||
description: >
|
||||
Language, type (SDH/forced) and format are read directly from the
|
||||
container track metadata via ffprobe.
|
||||
@@ -0,0 +1,16 @@
|
||||
id: episode_subfolder
|
||||
version: "1.0"
|
||||
description: >
|
||||
Subtitle files are in a Subs/ folder at the release root, with one subfolder
|
||||
per episode named after the episode filename (without extension).
|
||||
Example: Subs/Show.S01E01.BluRay.x265-RARBG/2_English.srt
|
||||
|
||||
scan_strategy: episode_subfolder
|
||||
root_folder: "Subs"
|
||||
|
||||
type_detection:
|
||||
method: size_and_count
|
||||
description: >
|
||||
When multiple files share the same detected language, differentiate
|
||||
standard vs SDH by comparing file size and subtitle entry count.
|
||||
Larger file (more entries) = SDH.
|
||||
@@ -0,0 +1,14 @@
|
||||
id: subs_flat
|
||||
version: "1.0"
|
||||
description: >
|
||||
Subtitle files are directly in a Subs/ folder at the release root,
|
||||
with no per-episode subfolder.
|
||||
Example: Subs/Show.S01E01.English.srt
|
||||
|
||||
scan_strategy: flat
|
||||
root_folder: "Subs"
|
||||
|
||||
type_detection:
|
||||
method: token_in_name
|
||||
description: >
|
||||
Type (standard/SDH/forced) is determined from tokens in the filename.
|
||||
@@ -0,0 +1,5 @@
|
||||
name: KONSTRAST
|
||||
known_patterns: ["episode_subfolder", "embedded"]
|
||||
notes: >
|
||||
Follows similar conventions to RARBG. Pattern varies per release — always
|
||||
verify per season.
|
||||
@@ -0,0 +1,2 @@
|
||||
name: RARBG
|
||||
known_patterns: ["episode_subfolder"]
|
||||
@@ -0,0 +1,89 @@
|
||||
name: subtitles
|
||||
version: "1.0"
|
||||
description: "Subtitle classification rules — formats, types, languages and their tokens"
|
||||
|
||||
defaults:
|
||||
languages: ["fra", "eng"]
|
||||
formats: ["srt"]
|
||||
types: ["standard", "forced", "sdh"]
|
||||
format_priority: ["srt", "ass"]
|
||||
min_confidence: 0.7
|
||||
|
||||
formats:
|
||||
srt:
|
||||
extensions: [".srt"]
|
||||
description: "SubRip — plain text, universal"
|
||||
ass:
|
||||
extensions: [".ass", ".ssa"]
|
||||
description: "Advanced SubStation Alpha — with styles and positioning"
|
||||
|
||||
types:
|
||||
standard:
|
||||
tokens: []
|
||||
description: "Normal subtitle track"
|
||||
sdh:
|
||||
tokens: ["sdh", "hi", "cc", "hearing"]
|
||||
description: "Hearing-impaired — includes sound effects and speaker labels"
|
||||
forced:
|
||||
tokens: ["forced", "foreign"]
|
||||
description: "Foreign lines only — e.g. alien speech in an otherwise English film"
|
||||
|
||||
languages:
|
||||
fra:
|
||||
tokens: ["fr", "fra", "french", "francais", "vf", "vff", "vostfr"]
|
||||
eng:
|
||||
tokens: ["en", "eng", "english"]
|
||||
spa:
|
||||
tokens: ["es", "spa", "spanish", "espanol", "español"]
|
||||
deu:
|
||||
tokens: ["de", "deu", "ger", "german", "deutsch"]
|
||||
ita:
|
||||
tokens: ["it", "ita", "italian", "italiano"]
|
||||
por:
|
||||
tokens: ["pt", "por", "portuguese", "portugues", "português"]
|
||||
nld:
|
||||
tokens: ["nl", "nld", "dut", "dutch", "nederlands"]
|
||||
nor:
|
||||
tokens: ["no", "nor", "norwegian", "norsk"]
|
||||
swe:
|
||||
tokens: ["sv", "swe", "swedish", "svenska"]
|
||||
dan:
|
||||
tokens: ["da", "dan", "danish", "dansk"]
|
||||
fin:
|
||||
tokens: ["fi", "fin", "finnish", "suomi"]
|
||||
pol:
|
||||
tokens: ["pl", "pol", "polish", "polski"]
|
||||
ces:
|
||||
tokens: ["cs", "ces", "cze", "czech"]
|
||||
slk:
|
||||
tokens: ["sk", "slk", "slo", "slovak"]
|
||||
hun:
|
||||
tokens: ["hu", "hun", "hungarian", "magyar"]
|
||||
ron:
|
||||
tokens: ["ro", "ron", "rum", "romanian", "romana", "română"]
|
||||
bul:
|
||||
tokens: ["bg", "bul", "bulgarian"]
|
||||
hrv:
|
||||
tokens: ["hr", "hrv", "croatian", "hrvatski"]
|
||||
srp:
|
||||
tokens: ["sr", "srp", "serbian", "srpski"]
|
||||
slv:
|
||||
tokens: ["sl", "slv", "slovenian", "slovensko"]
|
||||
est:
|
||||
tokens: ["et", "est", "estonian", "eesti"]
|
||||
lav:
|
||||
tokens: ["lv", "lav", "latvian", "latviesu"]
|
||||
lit:
|
||||
tokens: ["lt", "lit", "lithuanian", "lietuviu"]
|
||||
mkd:
|
||||
tokens: ["mk", "mkd", "mac", "macedonian"]
|
||||
jpn:
|
||||
tokens: ["ja", "jpn", "japanese"]
|
||||
zho:
|
||||
tokens: ["zh", "zho", "chi", "chinese"]
|
||||
kor:
|
||||
tokens: ["ko", "kor", "korean"]
|
||||
ara:
|
||||
tokens: ["ar", "ara", "arabic"]
|
||||
tur:
|
||||
tokens: ["tr", "tur", "turkish"]
|
||||
+44
-156
@@ -1,209 +1,97 @@
|
||||
import secrets
|
||||
from pathlib import Path
|
||||
from typing import NamedTuple
|
||||
"""
|
||||
Application settings — Alfred only.
|
||||
|
||||
import tomllib
|
||||
from pydantic import Field, computed_field, field_validator
|
||||
Only declares what Alfred's Python code actually consumes.
|
||||
Everything else (.env.alfred, .env.secrets) is loaded by Docker Compose
|
||||
for other services and ignored here via extra="ignore".
|
||||
"""
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
from pydantic import field_validator
|
||||
from pydantic_settings import BaseSettings, SettingsConfigDict
|
||||
|
||||
BASE_DIR = Path(__file__).resolve().parent.parent
|
||||
ENV_FILE_PATH = BASE_DIR / ".env"
|
||||
toml_path = BASE_DIR / "pyproject.toml"
|
||||
|
||||
|
||||
class ConfigurationError(Exception):
|
||||
"""Raised when configuration is invalid."""
|
||||
|
||||
pass
|
||||
|
||||
|
||||
class ProjectVersions(NamedTuple):
|
||||
"""
|
||||
Immutable structure for project versions.
|
||||
Forces explicit naming and prevents accidental swaps.
|
||||
"""
|
||||
|
||||
librechat: str
|
||||
rag: str
|
||||
alfred: str
|
||||
|
||||
|
||||
def get_versions_from_toml() -> ProjectVersions:
|
||||
"""
|
||||
Reads versioning information from pyproject.toml.
|
||||
Returns the default value if the file or key is missing.
|
||||
"""
|
||||
|
||||
if not toml_path.exists():
|
||||
raise FileNotFoundError(f"pyproject.toml not found: {toml_path}")
|
||||
|
||||
with open(toml_path, "rb") as f:
|
||||
data = tomllib.load(f)
|
||||
try:
|
||||
return ProjectVersions(
|
||||
librechat=data["tool"]["alfred"]["settings"]["librechat_version"],
|
||||
rag=data["tool"]["alfred"]["settings"]["rag_version"],
|
||||
alfred=data["tool"]["poetry"]["version"],
|
||||
)
|
||||
except KeyError as e:
|
||||
raise KeyError(f"Error: Missing key {e} in pyproject.toml") from e
|
||||
|
||||
|
||||
# Load versions once
|
||||
VERSIONS: ProjectVersions = get_versions_from_toml()
|
||||
|
||||
|
||||
class Settings(BaseSettings):
|
||||
model_config = SettingsConfigDict(
|
||||
env_file=ENV_FILE_PATH,
|
||||
env_file=[BASE_DIR / ".env.alfred", BASE_DIR / ".env.secrets", BASE_DIR / ".env.make"],
|
||||
env_file_encoding="utf-8",
|
||||
extra="ignore",
|
||||
case_sensitive=False,
|
||||
)
|
||||
# --- GENERAL SETTINGS ---
|
||||
host: str = "0.0.0.0"
|
||||
port: int = 3080
|
||||
debug_logging: bool = False
|
||||
debug_console: bool = False
|
||||
data_storage: str = "data"
|
||||
librechat_version: str = Field(VERSIONS.librechat, description="Librechat version")
|
||||
rag_version: str = Field(VERSIONS.rag, description="RAG engine version")
|
||||
alfred_version: str = Field(VERSIONS.alfred, description="Alfred version")
|
||||
|
||||
# --- CONTEXT SETTINGS ---
|
||||
# --- APP ---
|
||||
max_history_messages: int = 10
|
||||
max_tool_iterations: int = 10
|
||||
request_timeout: int = 30
|
||||
llm_temperature: float = 0.2
|
||||
data_storage_dir: str = "data"
|
||||
|
||||
# TODO: Finish
|
||||
# --- BUILD ---
|
||||
alfred_version: str | None = None
|
||||
|
||||
# --- LLM ---
|
||||
default_llm_provider: str = "local"
|
||||
ollama_base_url: str = "http://ollama:11434"
|
||||
ollama_model: str = "llama3.3:latest"
|
||||
deepseek_base_url: str = "https://api.deepseek.com"
|
||||
deepseek_model: str = "deepseek-chat"
|
||||
|
||||
# --- API KEYS ---
|
||||
anthropic_api_key: str | None = Field(None, description="Claude API key")
|
||||
deepseek_api_key: str | None = Field(None, description="Deepseek API key")
|
||||
google_api_key: str | None = Field(None, description="Gemini API key")
|
||||
kimi_api_key: str | None = Field(None, description="Kimi API key")
|
||||
openai_api_key: str | None = Field(None, description="ChatGPT API key")
|
||||
|
||||
# --- SECURITY KEYS ---
|
||||
# Generated automatically if not in .env to ensure "Secure by Default"
|
||||
jwt_secret: str = Field(default_factory=lambda: secrets.token_urlsafe(32))
|
||||
jwt_refresh_secret: str = Field(default_factory=lambda: secrets.token_urlsafe(32))
|
||||
|
||||
# We keep these for encryption of keys in MongoDB (AES-256 Hex format)
|
||||
creds_key: str = Field(default_factory=lambda: secrets.token_hex(32))
|
||||
creds_iv: str = Field(default_factory=lambda: secrets.token_hex(16))
|
||||
|
||||
# --- SERVICES ---
|
||||
qbittorrent_url: str = "http://qbittorrent:16140"
|
||||
qbittorrent_username: str = "admin"
|
||||
qbittorrent_password: str = Field(default_factory=lambda: secrets.token_urlsafe(16))
|
||||
|
||||
mongo_host: str = "mongodb"
|
||||
mongo_user: str = "alfred"
|
||||
mongo_password: str = Field(
|
||||
default_factory=lambda: secrets.token_urlsafe(24), repr=False, exclude=True
|
||||
)
|
||||
mongo_port: int = 27017
|
||||
mongo_db_name: str = "alfred"
|
||||
|
||||
@computed_field(repr=False)
|
||||
@property
|
||||
def mongo_uri(self) -> str:
|
||||
return (
|
||||
f"mongodb://{self.mongo_user}:{self.mongo_password}"
|
||||
f"@{self.mongo_host}:{self.mongo_port}/{self.mongo_db_name}"
|
||||
f"?authSource=admin"
|
||||
)
|
||||
|
||||
postgres_host: str = "vectordb"
|
||||
postgres_user: str = "alfred"
|
||||
postgres_password: str = Field(
|
||||
default_factory=lambda: secrets.token_urlsafe(24), repr=False, exclude=True
|
||||
)
|
||||
postgres_port: int = 5432
|
||||
postgres_db_name: str = "alfred"
|
||||
|
||||
@computed_field(repr=False)
|
||||
@property
|
||||
def postgres_uri(self) -> str:
|
||||
return (
|
||||
f"postgresql://{self.postgres_user}:{self.postgres_password}"
|
||||
f"@{self.postgres_host}:{self.postgres_port}/{self.postgres_db_name}"
|
||||
)
|
||||
|
||||
tmdb_api_key: str | None = Field(None, description="The Movie Database API key")
|
||||
tmdb_api_key: str | None = None
|
||||
tmdb_base_url: str = "https://api.themoviedb.org/3"
|
||||
|
||||
# --- LLM PICKER & CONFIG ---
|
||||
# Providers: 'local', 'deepseek', ...
|
||||
default_llm_provider: str = "local"
|
||||
ollama_base_url: str = "http://ollama:11434"
|
||||
# Models: ...
|
||||
ollama_model: str = "llama3.3:latest"
|
||||
llm_temperature: float = 0.2
|
||||
|
||||
# --- RAG ENGINE ---
|
||||
rag_enabled: bool = True # TODO: Handle False
|
||||
rag_api_url: str = "http://rag_api:8000"
|
||||
embeddings_provider: str = "ollama"
|
||||
# Models: ...
|
||||
embeddings_model: str = "nomic-embed-text"
|
||||
|
||||
# --- MEILISEARCH ---
|
||||
meili_enabled: bool = Field(True, description="Enable meili")
|
||||
meili_no_analytics: bool = True
|
||||
meili_host: str = "http://meilisearch:7700"
|
||||
meili_master_key: str = Field(
|
||||
default_factory=lambda: secrets.token_urlsafe(32),
|
||||
description="Master key for Meilisearch",
|
||||
repr=False,
|
||||
)
|
||||
deepseek_api_key: str | None = None
|
||||
openai_api_key: str | None = None
|
||||
anthropic_api_key: str | None = None
|
||||
google_api_key: str | None = None
|
||||
kimi_api_key: str | None = None
|
||||
|
||||
# --- VALIDATORS ---
|
||||
@field_validator("llm_temperature")
|
||||
@classmethod
|
||||
def validate_temperature(cls, v: float) -> float:
|
||||
if not 0.0 <= v <= 2.0:
|
||||
raise ConfigurationError(
|
||||
f"Temperature must be between 0.0 and 2.0, got {v}"
|
||||
)
|
||||
raise ConfigurationError(f"Temperature must be between 0.0 and 2.0, got {v}")
|
||||
return v
|
||||
|
||||
@field_validator("max_tool_iterations")
|
||||
@classmethod
|
||||
def validate_max_iterations(cls, v: int) -> int:
|
||||
if not 1 <= v <= 20:
|
||||
raise ConfigurationError(
|
||||
f"max_tool_iterations must be between 1 and 50, got {v}"
|
||||
)
|
||||
raise ConfigurationError(f"max_tool_iterations must be between 1 and 20, got {v}")
|
||||
return v
|
||||
|
||||
@field_validator("request_timeout")
|
||||
@classmethod
|
||||
def validate_timeout(cls, v: int) -> int:
|
||||
if not 1 <= v <= 300:
|
||||
raise ConfigurationError(
|
||||
f"request_timeout must be between 1 and 300 seconds, got {v}"
|
||||
)
|
||||
raise ConfigurationError(f"request_timeout must be between 1 and 300 seconds, got {v}")
|
||||
return v
|
||||
|
||||
@field_validator("deepseek_base_url", "tmdb_base_url")
|
||||
@classmethod
|
||||
def validate_url(cls, v: str, info) -> str:
|
||||
if not v.startswith(("http://", "https://")):
|
||||
raise ConfigurationError(f"Invalid {info.field_name}")
|
||||
return v
|
||||
|
||||
def is_tmdb_configured(self):
|
||||
# --- HELPERS ---
|
||||
def is_tmdb_configured(self) -> bool:
|
||||
return bool(self.tmdb_api_key)
|
||||
|
||||
def is_deepseek_configured(self):
|
||||
def is_deepseek_configured(self) -> bool:
|
||||
return bool(self.deepseek_api_key)
|
||||
|
||||
def dump_safe(self):
|
||||
return self.model_dump(exclude_none=False)
|
||||
def is_openai_configured(self) -> bool:
|
||||
return bool(self.openai_api_key)
|
||||
|
||||
def is_anthropic_configured(self) -> bool:
|
||||
return bool(self.anthropic_api_key)
|
||||
|
||||
def is_google_configured(self) -> bool:
|
||||
return bool(self.google_api_key)
|
||||
|
||||
def is_kimi_configured(self) -> bool:
|
||||
return bool(self.kimi_api_key)
|
||||
|
||||
|
||||
settings = Settings()
|
||||
|
||||
+44
-21
@@ -8,8 +8,7 @@ services:
|
||||
target: builder
|
||||
args:
|
||||
PYTHON_VERSION: ${PYTHON_VERSION}
|
||||
PYTHON_VERSION_SHORT: ${PYTHON_VERSION_SHORT}
|
||||
RUNNER: ${RUNNER}
|
||||
UV_VERSION: ${UV_VERSION}
|
||||
command: python scripts/bootstrap.py
|
||||
networks:
|
||||
- alfred-net
|
||||
@@ -17,24 +16,30 @@ services:
|
||||
# --- MAIN APPLICATION ---
|
||||
alfred:
|
||||
container_name: alfred-core
|
||||
image: alfred_media_organizer:latest
|
||||
build:
|
||||
context: .
|
||||
args:
|
||||
PYTHON_VERSION: ${PYTHON_VERSION}
|
||||
PYTHON_VERSION_SHORT: ${PYTHON_VERSION_SHORT}
|
||||
RUNNER: ${RUNNER}
|
||||
UV_VERSION: ${UV_VERSION}
|
||||
depends_on:
|
||||
alfred-init:
|
||||
condition: service_completed_successfully
|
||||
restart: unless-stopped
|
||||
env_file:
|
||||
- path: .env
|
||||
- path: .env.alfred
|
||||
required: true
|
||||
- path: .env.secrets
|
||||
required: true
|
||||
- path: .env.make
|
||||
required: true
|
||||
volumes:
|
||||
- ./data:/data
|
||||
- ./logs:/logs
|
||||
# TODO: Hot reload (comment out in production)
|
||||
#- ./alfred:/home/appuser/alfred
|
||||
- ./alfred:/home/appuser/alfred
|
||||
command: >
|
||||
sh -c "python -u -m uvicorn alfred.app:app --host 0.0.0.0 --port 8000 --reload 2>&1 | tee -a /logs/alfred.log"
|
||||
networks:
|
||||
- alfred-net
|
||||
|
||||
@@ -49,7 +54,11 @@ services:
|
||||
condition: service_healthy
|
||||
restart: unless-stopped
|
||||
env_file:
|
||||
- path: .env
|
||||
- path: .env.librechat
|
||||
required: true
|
||||
- path: .env.alfred
|
||||
required: true
|
||||
- path: .env.secrets
|
||||
required: true
|
||||
environment:
|
||||
# Remap value name
|
||||
@@ -75,21 +84,23 @@ services:
|
||||
alfred-init:
|
||||
condition: service_completed_successfully
|
||||
env_file:
|
||||
- path: .env
|
||||
- path: .env.alfred
|
||||
required: true
|
||||
- path: .env.secrets
|
||||
required: true
|
||||
environment:
|
||||
# Remap value name
|
||||
- MONGO_INITDB_ROOT_USERNAME=${MONGO_USER}
|
||||
- MONGO_INITDB_ROOT_PASSWORD=${MONGO_PASSWORD}
|
||||
# Fix MongoDB + Linux kernel >= 6.19
|
||||
- GLIBC_TUNABLES=glibc.cpu.hwcaps=-SHSTK
|
||||
ports:
|
||||
- "${MONGO_PORT}:${MONGO_PORT}"
|
||||
volumes:
|
||||
- ./data/mongo:/data/db
|
||||
command: mongod --quiet --setParameter logComponentVerbosity='{"network":{"verbosity":0}}'
|
||||
- ./data/mongodb:/data/db
|
||||
- ./mongod.conf:/etc/mongod.conf:ro
|
||||
command: ["mongod", "--config", "/etc/mongod.conf"]
|
||||
healthcheck:
|
||||
test: |
|
||||
mongosh --quiet --eval "db.adminCommand('ping')" || \
|
||||
mongosh --quiet -u "${MONGO_USER}" -p "${MONGO_PASSWORD}" --authenticationDatabase admin --eval "db.adminCommand('ping')"
|
||||
test: bash -c "echo > /dev/tcp/localhost/27017"
|
||||
interval: 10s
|
||||
timeout: 5s
|
||||
retries: 5
|
||||
@@ -105,7 +116,9 @@ services:
|
||||
condition: service_completed_successfully
|
||||
restart: unless-stopped
|
||||
env_file:
|
||||
- path: .env
|
||||
- path: .env.alfred
|
||||
required: true
|
||||
- path: .env.secrets
|
||||
required: true
|
||||
volumes:
|
||||
- ./data/ollama:/root/.ollama
|
||||
@@ -122,7 +135,9 @@ services:
|
||||
condition: service_completed_successfully
|
||||
restart: unless-stopped
|
||||
env_file:
|
||||
- path: .env
|
||||
- path: .env.alfred
|
||||
required: true
|
||||
- path: .env.secrets
|
||||
required: true
|
||||
volumes:
|
||||
- ./data/meilisearch:/meili_data
|
||||
@@ -141,7 +156,9 @@ services:
|
||||
condition: service_healthy
|
||||
restart: unless-stopped
|
||||
env_file:
|
||||
- path: .env
|
||||
- path: .env.alfred
|
||||
required: true
|
||||
- path: .env.secrets
|
||||
required: true
|
||||
ports:
|
||||
- "${RAG_API_PORT}:${RAG_API_PORT}"
|
||||
@@ -160,7 +177,9 @@ services:
|
||||
condition: service_completed_successfully
|
||||
restart: unless-stopped
|
||||
env_file:
|
||||
- path: .env
|
||||
- path: .env.alfred
|
||||
required: true
|
||||
- path: .env.secrets
|
||||
required: true
|
||||
ports:
|
||||
- "${POSTGRES_PORT}:${POSTGRES_PORT}"
|
||||
@@ -168,12 +187,14 @@ services:
|
||||
- ./data/vectordb:/var/lib/postgresql/data
|
||||
profiles: ["rag", "full"]
|
||||
healthcheck:
|
||||
test: [ "CMD-SHELL", "pg_isready -U ${POSTGRES_USER:-alfred} -d ${POSTGRES_DB_NAME:-alfred}" ]
|
||||
test: [ "CMD-SHELL", "pg_isready -U $${POSTGRES_USER:-alfred} -d $${POSTGRES_DB_NAME:-alfred}" ]
|
||||
interval: 5s
|
||||
timeout: 5s
|
||||
retries: 5
|
||||
networks:
|
||||
- alfred-net
|
||||
alfred-net:
|
||||
aliases:
|
||||
- db
|
||||
|
||||
# --- QBITTORENT (Optional) ---
|
||||
qbittorrent:
|
||||
@@ -184,7 +205,9 @@ services:
|
||||
condition: service_completed_successfully
|
||||
restart: unless-stopped
|
||||
env_file:
|
||||
- path: .env
|
||||
- path: .env.alfred
|
||||
required: true
|
||||
- path: .env.secrets
|
||||
required: true
|
||||
environment:
|
||||
- PUID=1000
|
||||
|
||||
@@ -0,0 +1,878 @@
|
||||
#=====================================================================#
|
||||
# LibreChat Configuration #
|
||||
#=====================================================================#
|
||||
# Please refer to the reference documentation for assistance #
|
||||
# with configuring your LibreChat environment. #
|
||||
# #
|
||||
# https://www.librechat.ai/docs/configuration/dotenv #
|
||||
#=====================================================================#
|
||||
|
||||
#==================================================#
|
||||
# Server Configuration #
|
||||
#==================================================#
|
||||
|
||||
HOST=localhost
|
||||
PORT=3080
|
||||
|
||||
MONGO_URI=mongodb://127.0.0.1:27017/LibreChat
|
||||
#The maximum number of connections in the connection pool. */
|
||||
MONGO_MAX_POOL_SIZE=
|
||||
#The minimum number of connections in the connection pool. */
|
||||
MONGO_MIN_POOL_SIZE=
|
||||
#The maximum number of connections that may be in the process of being established concurrently by the connection pool. */
|
||||
MONGO_MAX_CONNECTING=
|
||||
#The maximum number of milliseconds that a connection can remain idle in the pool before being removed and closed. */
|
||||
MONGO_MAX_IDLE_TIME_MS=
|
||||
#The maximum time in milliseconds that a thread can wait for a connection to become available. */
|
||||
MONGO_WAIT_QUEUE_TIMEOUT_MS=
|
||||
# Set to false to disable automatic index creation for all models associated with this connection. */
|
||||
MONGO_AUTO_INDEX=
|
||||
# Set to `false` to disable Mongoose automatically calling `createCollection()` on every model created on this connection. */
|
||||
MONGO_AUTO_CREATE=
|
||||
|
||||
DOMAIN_CLIENT=http://localhost:3080
|
||||
DOMAIN_SERVER=http://localhost:3080
|
||||
|
||||
NO_INDEX=true
|
||||
# Use the address that is at most n number of hops away from the Express application.
|
||||
# req.socket.remoteAddress is the first hop, and the rest are looked for in the X-Forwarded-For header from right to left.
|
||||
# A value of 0 means that the first untrusted address would be req.socket.remoteAddress, i.e. there is no reverse proxy.
|
||||
# Defaulted to 1.
|
||||
TRUST_PROXY=1
|
||||
|
||||
# Minimum password length for user authentication
|
||||
# Default: 8
|
||||
# Note: When using LDAP authentication, you may want to set this to 1
|
||||
# to bypass local password validation, as LDAP servers handle their own
|
||||
# password policies.
|
||||
# MIN_PASSWORD_LENGTH=8
|
||||
|
||||
# When enabled, the app will continue running after encountering uncaught exceptions
|
||||
# instead of exiting the process. Not recommended for production unless necessary.
|
||||
# CONTINUE_ON_UNCAUGHT_EXCEPTION=false
|
||||
|
||||
#===============#
|
||||
# JSON Logging #
|
||||
#===============#
|
||||
|
||||
# Use when process console logs in cloud deployment like GCP/AWS
|
||||
CONSOLE_JSON=false
|
||||
|
||||
#===============#
|
||||
# Debug Logging #
|
||||
#===============#
|
||||
|
||||
DEBUG_LOGGING=true
|
||||
DEBUG_CONSOLE=false
|
||||
# Set to true to enable agent debug logging
|
||||
AGENT_DEBUG_LOGGING=false
|
||||
|
||||
# Enable memory diagnostics (logs heap/RSS snapshots every 60s, auto-enabled with --inspect)
|
||||
# MEM_DIAG=true
|
||||
|
||||
#=============#
|
||||
# Permissions #
|
||||
#=============#
|
||||
|
||||
# UID=1000
|
||||
# GID=1000
|
||||
|
||||
#==============#
|
||||
# Node Options #
|
||||
#==============#
|
||||
|
||||
# NOTE: NODE_MAX_OLD_SPACE_SIZE is NOT recognized by Node.js directly.
|
||||
# This variable is used as a build argument for Docker or CI/CD workflows,
|
||||
# and is NOT used by Node.js to set the heap size at runtime.
|
||||
# To configure Node.js memory, use NODE_OPTIONS, e.g.:
|
||||
# NODE_OPTIONS="--max-old-space-size=6144"
|
||||
# See: https://nodejs.org/api/cli.html#--max-old-space-sizesize-in-mib
|
||||
NODE_MAX_OLD_SPACE_SIZE=6144
|
||||
|
||||
#===============#
|
||||
# Configuration #
|
||||
#===============#
|
||||
# Use an absolute path, a relative path, or a URL
|
||||
|
||||
# CONFIG_PATH="/alternative/path/to/librechat.yaml"
|
||||
|
||||
#==================#
|
||||
# Langfuse Tracing #
|
||||
#==================#
|
||||
|
||||
# Get Langfuse API keys for your project from the project settings page: https://cloud.langfuse.com
|
||||
|
||||
# LANGFUSE_PUBLIC_KEY=
|
||||
# LANGFUSE_SECRET_KEY=
|
||||
# LANGFUSE_BASE_URL=
|
||||
|
||||
#===================================================#
|
||||
# Endpoints #
|
||||
#===================================================#
|
||||
|
||||
# ENDPOINTS=openAI,assistants,azureOpenAI,google,anthropic
|
||||
|
||||
PROXY=
|
||||
|
||||
#===================================#
|
||||
# Known Endpoints - librechat.yaml #
|
||||
#===================================#
|
||||
# https://www.librechat.ai/docs/configuration/librechat_yaml/ai_endpoints
|
||||
|
||||
# ANYSCALE_API_KEY=
|
||||
# APIPIE_API_KEY=
|
||||
# COHERE_API_KEY=
|
||||
# DEEPSEEK_API_KEY=
|
||||
# DATABRICKS_API_KEY=
|
||||
# FIREWORKS_API_KEY=
|
||||
# GROQ_API_KEY=
|
||||
# HUGGINGFACE_TOKEN=
|
||||
# MISTRAL_API_KEY=
|
||||
# OPENROUTER_KEY=
|
||||
# PERPLEXITY_API_KEY=
|
||||
# SHUTTLEAI_API_KEY=
|
||||
# TOGETHERAI_API_KEY=
|
||||
# UNIFY_API_KEY=
|
||||
# XAI_API_KEY=
|
||||
|
||||
#============#
|
||||
# Anthropic #
|
||||
#============#
|
||||
|
||||
ANTHROPIC_API_KEY=user_provided
|
||||
# ANTHROPIC_MODELS=claude-sonnet-4-6,claude-opus-4-6,claude-opus-4-20250514,claude-sonnet-4-20250514,claude-3-7-sonnet-20250219,claude-3-5-sonnet-20241022,claude-3-5-haiku-20241022,claude-3-opus-20240229,claude-3-sonnet-20240229,claude-3-haiku-20240307
|
||||
# ANTHROPIC_REVERSE_PROXY=
|
||||
|
||||
# Set to true to use Anthropic models through Google Vertex AI instead of direct API
|
||||
# ANTHROPIC_USE_VERTEX=
|
||||
# ANTHROPIC_VERTEX_REGION=us-east5
|
||||
|
||||
#============#
|
||||
# Azure #
|
||||
#============#
|
||||
|
||||
# Note: these variables are DEPRECATED
|
||||
# Use the `librechat.yaml` configuration for `azureOpenAI` instead
|
||||
# You may also continue to use them if you opt out of using the `librechat.yaml` configuration
|
||||
|
||||
# AZURE_OPENAI_DEFAULT_MODEL=gpt-3.5-turbo # Deprecated
|
||||
# AZURE_OPENAI_MODELS=gpt-3.5-turbo,gpt-4 # Deprecated
|
||||
# AZURE_USE_MODEL_AS_DEPLOYMENT_NAME=TRUE # Deprecated
|
||||
# AZURE_API_KEY= # Deprecated
|
||||
# AZURE_OPENAI_API_INSTANCE_NAME= # Deprecated
|
||||
# AZURE_OPENAI_API_DEPLOYMENT_NAME= # Deprecated
|
||||
# AZURE_OPENAI_API_VERSION= # Deprecated
|
||||
# AZURE_OPENAI_API_COMPLETIONS_DEPLOYMENT_NAME= # Deprecated
|
||||
# AZURE_OPENAI_API_EMBEDDINGS_DEPLOYMENT_NAME= # Deprecated
|
||||
|
||||
#=================#
|
||||
# AWS Bedrock #
|
||||
#=================#
|
||||
|
||||
# BEDROCK_AWS_DEFAULT_REGION=us-east-1 # A default region must be provided
|
||||
# BEDROCK_AWS_ACCESS_KEY_ID=someAccessKey
|
||||
# BEDROCK_AWS_SECRET_ACCESS_KEY=someSecretAccessKey
|
||||
# BEDROCK_AWS_SESSION_TOKEN=someSessionToken
|
||||
|
||||
# Note: This example list is not meant to be exhaustive. If omitted, all known, supported model IDs will be included for you.
|
||||
# BEDROCK_AWS_MODELS=anthropic.claude-sonnet-4-6,anthropic.claude-opus-4-6-v1,anthropic.claude-3-5-sonnet-20240620-v1:0,meta.llama3-1-8b-instruct-v1:0
|
||||
# Cross-region inference model IDs: us.anthropic.claude-sonnet-4-6,us.anthropic.claude-opus-4-6-v1,global.anthropic.claude-opus-4-6-v1
|
||||
|
||||
# See all Bedrock model IDs here: https://docs.aws.amazon.com/bedrock/latest/userguide/model-ids.html#model-ids-arns
|
||||
|
||||
# Notes on specific models:
|
||||
# The following models are not support due to not supporting streaming:
|
||||
# ai21.j2-mid-v1
|
||||
|
||||
# The following models are not support due to not supporting conversation history:
|
||||
# ai21.j2-ultra-v1, cohere.command-text-v14, cohere.command-light-text-v14
|
||||
|
||||
#============#
|
||||
# Google #
|
||||
#============#
|
||||
|
||||
GOOGLE_KEY=user_provided
|
||||
|
||||
# GOOGLE_REVERSE_PROXY=
|
||||
# Some reverse proxies do not support the X-goog-api-key header, uncomment to pass the API key in Authorization header instead.
|
||||
# GOOGLE_AUTH_HEADER=true
|
||||
|
||||
# Gemini API (AI Studio)
|
||||
# GOOGLE_MODELS=gemini-3.1-pro-preview,gemini-3.1-pro-preview-customtools,gemini-3.1-flash-lite-preview,gemini-2.5-pro,gemini-2.5-flash,gemini-2.5-flash-lite,gemini-2.0-flash,gemini-2.0-flash-lite
|
||||
|
||||
# Vertex AI
|
||||
# GOOGLE_MODELS=gemini-3.1-pro-preview,gemini-3.1-pro-preview-customtools,gemini-3.1-flash-lite-preview,gemini-2.5-pro,gemini-2.5-flash,gemini-2.5-flash-lite,gemini-2.0-flash-001,gemini-2.0-flash-lite-001
|
||||
|
||||
# GOOGLE_TITLE_MODEL=gemini-2.0-flash-lite-001
|
||||
|
||||
# Google Cloud region for Vertex AI (used by both chat and image generation)
|
||||
# GOOGLE_LOC=us-central1
|
||||
|
||||
# Alternative region env var for Gemini Image Generation
|
||||
# GOOGLE_CLOUD_LOCATION=global
|
||||
|
||||
# Vertex AI Service Account Configuration
|
||||
# Path to your Google Cloud service account JSON file
|
||||
# GOOGLE_SERVICE_KEY_FILE=/path/to/service-account.json
|
||||
|
||||
# Google Safety Settings
|
||||
# NOTE: These settings apply to both Vertex AI and Gemini API (AI Studio)
|
||||
#
|
||||
# For Vertex AI:
|
||||
# To use the BLOCK_NONE setting, you need either:
|
||||
# (a) Access through an allowlist via your Google account team, or
|
||||
# (b) Switch to monthly invoiced billing: https://cloud.google.com/billing/docs/how-to/invoiced-billing
|
||||
#
|
||||
# For Gemini API (AI Studio):
|
||||
# BLOCK_NONE is available by default, no special account requirements.
|
||||
#
|
||||
# Available options: BLOCK_NONE, BLOCK_ONLY_HIGH, BLOCK_MEDIUM_AND_ABOVE, BLOCK_LOW_AND_ABOVE
|
||||
#
|
||||
# GOOGLE_SAFETY_SEXUALLY_EXPLICIT=BLOCK_ONLY_HIGH
|
||||
# GOOGLE_SAFETY_HATE_SPEECH=BLOCK_ONLY_HIGH
|
||||
# GOOGLE_SAFETY_HARASSMENT=BLOCK_ONLY_HIGH
|
||||
# GOOGLE_SAFETY_DANGEROUS_CONTENT=BLOCK_ONLY_HIGH
|
||||
# GOOGLE_SAFETY_CIVIC_INTEGRITY=BLOCK_ONLY_HIGH
|
||||
|
||||
#========================#
|
||||
# Gemini Image Generation #
|
||||
#========================#
|
||||
|
||||
# Gemini Image Generation Tool (for Agents)
|
||||
# Supports multiple authentication methods in priority order:
|
||||
# 1. User-provided API key (via GUI)
|
||||
# 2. GEMINI_API_KEY env var (admin-configured)
|
||||
# 3. GOOGLE_KEY env var (shared with Google chat endpoint)
|
||||
# 4. Vertex AI service account (via GOOGLE_SERVICE_KEY_FILE)
|
||||
|
||||
# Option A: Use dedicated Gemini API key for image generation
|
||||
# GEMINI_API_KEY=your-gemini-api-key
|
||||
|
||||
# Vertex AI model for image generation (defaults to gemini-2.5-flash-image)
|
||||
# GEMINI_IMAGE_MODEL=gemini-2.5-flash-image
|
||||
|
||||
#============#
|
||||
# OpenAI #
|
||||
#============#
|
||||
|
||||
OPENAI_API_KEY=user_provided
|
||||
# OPENAI_MODELS=gpt-5,gpt-5-codex,gpt-5-mini,gpt-5-nano,o3-pro,o3,o4-mini,gpt-4.1,gpt-4.1-mini,gpt-4.1-nano,o3-mini,o1-pro,o1,gpt-4o,gpt-4o-mini
|
||||
|
||||
DEBUG_OPENAI=false
|
||||
|
||||
# TITLE_CONVO=false
|
||||
# OPENAI_TITLE_MODEL=gpt-4o-mini
|
||||
|
||||
# OPENAI_SUMMARIZE=true
|
||||
# OPENAI_SUMMARY_MODEL=gpt-4o-mini
|
||||
|
||||
# OPENAI_FORCE_PROMPT=true
|
||||
|
||||
# OPENAI_REVERSE_PROXY=
|
||||
|
||||
# OPENAI_ORGANIZATION=
|
||||
|
||||
#====================#
|
||||
# Assistants API #
|
||||
#====================#
|
||||
|
||||
ASSISTANTS_API_KEY=user_provided
|
||||
# ASSISTANTS_BASE_URL=
|
||||
# ASSISTANTS_MODELS=gpt-4o,gpt-4o-mini,gpt-3.5-turbo-0125,gpt-3.5-turbo-16k-0613,gpt-3.5-turbo-16k,gpt-3.5-turbo,gpt-4,gpt-4-0314,gpt-4-32k-0314,gpt-4-0613,gpt-3.5-turbo-0613,gpt-3.5-turbo-1106,gpt-4-0125-preview,gpt-4-turbo-preview,gpt-4-1106-preview
|
||||
|
||||
#==========================#
|
||||
# Azure Assistants API #
|
||||
#==========================#
|
||||
|
||||
# Note: You should map your credentials with custom variables according to your Azure OpenAI Configuration
|
||||
# The models for Azure Assistants are also determined by your Azure OpenAI configuration.
|
||||
|
||||
# More info, including how to enable use of Assistants with Azure here:
|
||||
# https://www.librechat.ai/docs/configuration/librechat_yaml/ai_endpoints/azure#using-assistants-with-azure
|
||||
|
||||
CREDS_KEY=f34be427ebb29de8d88c107a71546019685ed8b241d8f2ed00c3df97ad2566f0
|
||||
CREDS_IV=e2341419ec3dd3d19b13a1a87fafcbfb
|
||||
|
||||
# Azure AI Search
|
||||
#-----------------
|
||||
AZURE_AI_SEARCH_SERVICE_ENDPOINT=
|
||||
AZURE_AI_SEARCH_INDEX_NAME=
|
||||
AZURE_AI_SEARCH_API_KEY=
|
||||
|
||||
AZURE_AI_SEARCH_API_VERSION=
|
||||
AZURE_AI_SEARCH_SEARCH_OPTION_QUERY_TYPE=
|
||||
AZURE_AI_SEARCH_SEARCH_OPTION_TOP=
|
||||
AZURE_AI_SEARCH_SEARCH_OPTION_SELECT=
|
||||
|
||||
# OpenAI Image Tools Customization
|
||||
#----------------
|
||||
# IMAGE_GEN_OAI_API_KEY= # Create or reuse OpenAI API key for image generation tool
|
||||
# IMAGE_GEN_OAI_BASEURL= # Custom OpenAI base URL for image generation tool
|
||||
# IMAGE_GEN_OAI_AZURE_API_VERSION= # Custom Azure OpenAI deployments
|
||||
# IMAGE_GEN_OAI_MODEL=gpt-image-1 # OpenAI image model (e.g., gpt-image-1, gpt-image-1.5)
|
||||
# IMAGE_GEN_OAI_DESCRIPTION=
|
||||
# IMAGE_GEN_OAI_DESCRIPTION_WITH_FILES=Custom description for image generation tool when files are present
|
||||
# IMAGE_GEN_OAI_DESCRIPTION_NO_FILES=Custom description for image generation tool when no files are present
|
||||
# IMAGE_EDIT_OAI_DESCRIPTION=Custom description for image editing tool
|
||||
# IMAGE_GEN_OAI_PROMPT_DESCRIPTION=Custom prompt description for image generation tool
|
||||
# IMAGE_EDIT_OAI_PROMPT_DESCRIPTION=Custom prompt description for image editing tool
|
||||
|
||||
# DALL·E
|
||||
#----------------
|
||||
# DALLE_API_KEY=
|
||||
# DALLE3_API_KEY=
|
||||
# DALLE2_API_KEY=
|
||||
# DALLE3_SYSTEM_PROMPT=
|
||||
# DALLE2_SYSTEM_PROMPT=
|
||||
# DALLE_REVERSE_PROXY=
|
||||
# DALLE3_BASEURL=
|
||||
# DALLE2_BASEURL=
|
||||
|
||||
# DALL·E (via Azure OpenAI)
|
||||
# Note: requires some of the variables above to be set
|
||||
#----------------
|
||||
# DALLE3_AZURE_API_VERSION=
|
||||
# DALLE2_AZURE_API_VERSION=
|
||||
|
||||
# Flux
|
||||
#-----------------
|
||||
FLUX_API_BASE_URL=https://api.us1.bfl.ai
|
||||
# FLUX_API_BASE_URL = 'https://api.bfl.ml';
|
||||
|
||||
# Get your API key at https://api.us1.bfl.ai/auth/profile
|
||||
# FLUX_API_KEY=
|
||||
|
||||
# Google
|
||||
#-----------------
|
||||
GOOGLE_SEARCH_API_KEY=
|
||||
GOOGLE_CSE_ID=
|
||||
|
||||
# Stable Diffusion
|
||||
#-----------------
|
||||
SD_WEBUI_URL=http://host.docker.internal:7860
|
||||
|
||||
# Tavily
|
||||
#-----------------
|
||||
TAVILY_API_KEY=
|
||||
|
||||
# Traversaal
|
||||
#-----------------
|
||||
TRAVERSAAL_API_KEY=
|
||||
|
||||
# WolframAlpha
|
||||
#-----------------
|
||||
WOLFRAM_APP_ID=
|
||||
|
||||
# Zapier
|
||||
#-----------------
|
||||
ZAPIER_NLA_API_KEY=
|
||||
|
||||
#==================================================#
|
||||
# Search #
|
||||
#==================================================#
|
||||
|
||||
SEARCH=true
|
||||
MEILI_NO_ANALYTICS=true
|
||||
MEILI_HOST=http://0.0.0.0:7700
|
||||
MEILI_MASTER_KEY=DrhYf7zENyR6AlUCKmnz0eYASOQdl6zxH7s7MKFSfFCt
|
||||
|
||||
# Optional: Disable indexing, useful in a multi-node setup
|
||||
# where only one instance should perform an index sync.
|
||||
# MEILI_NO_SYNC=true
|
||||
|
||||
#==================================================#
|
||||
# Speech to Text & Text to Speech #
|
||||
#==================================================#
|
||||
|
||||
STT_API_KEY=
|
||||
TTS_API_KEY=
|
||||
|
||||
#==================================================#
|
||||
# RAG #
|
||||
#==================================================#
|
||||
# More info: https://www.librechat.ai/docs/configuration/rag_api
|
||||
|
||||
# RAG_OPENAI_BASEURL=
|
||||
# RAG_OPENAI_API_KEY=
|
||||
# RAG_USE_FULL_CONTEXT=
|
||||
# EMBEDDINGS_PROVIDER=openai
|
||||
# EMBEDDINGS_MODEL=text-embedding-3-small
|
||||
|
||||
#===================================================#
|
||||
# User System #
|
||||
#===================================================#
|
||||
|
||||
#========================#
|
||||
# Moderation #
|
||||
#========================#
|
||||
|
||||
OPENAI_MODERATION=false
|
||||
OPENAI_MODERATION_API_KEY=
|
||||
# OPENAI_MODERATION_REVERSE_PROXY=
|
||||
|
||||
BAN_VIOLATIONS=true
|
||||
BAN_DURATION=1000 * 60 * 60 * 2
|
||||
BAN_INTERVAL=20
|
||||
|
||||
LOGIN_VIOLATION_SCORE=1
|
||||
REGISTRATION_VIOLATION_SCORE=1
|
||||
CONCURRENT_VIOLATION_SCORE=1
|
||||
MESSAGE_VIOLATION_SCORE=1
|
||||
NON_BROWSER_VIOLATION_SCORE=20
|
||||
TTS_VIOLATION_SCORE=0
|
||||
STT_VIOLATION_SCORE=0
|
||||
FORK_VIOLATION_SCORE=0
|
||||
IMPORT_VIOLATION_SCORE=0
|
||||
FILE_UPLOAD_VIOLATION_SCORE=0
|
||||
|
||||
LOGIN_MAX=7
|
||||
LOGIN_WINDOW=5
|
||||
REGISTER_MAX=5
|
||||
REGISTER_WINDOW=60
|
||||
|
||||
LIMIT_CONCURRENT_MESSAGES=true
|
||||
CONCURRENT_MESSAGE_MAX=2
|
||||
|
||||
LIMIT_MESSAGE_IP=true
|
||||
MESSAGE_IP_MAX=40
|
||||
MESSAGE_IP_WINDOW=1
|
||||
|
||||
LIMIT_MESSAGE_USER=false
|
||||
MESSAGE_USER_MAX=40
|
||||
MESSAGE_USER_WINDOW=1
|
||||
|
||||
ILLEGAL_MODEL_REQ_SCORE=5
|
||||
|
||||
#========================#
|
||||
# Balance #
|
||||
#========================#
|
||||
|
||||
# CHECK_BALANCE=false
|
||||
# START_BALANCE=20000 # note: the number of tokens that will be credited after registration.
|
||||
|
||||
#========================#
|
||||
# Registration and Login #
|
||||
#========================#
|
||||
|
||||
ALLOW_EMAIL_LOGIN=true
|
||||
ALLOW_REGISTRATION=true
|
||||
ALLOW_SOCIAL_LOGIN=false
|
||||
ALLOW_SOCIAL_REGISTRATION=false
|
||||
ALLOW_PASSWORD_RESET=false
|
||||
# ALLOW_ACCOUNT_DELETION=true # note: enabled by default if omitted/commented out
|
||||
ALLOW_UNVERIFIED_EMAIL_LOGIN=true
|
||||
|
||||
SESSION_EXPIRY=1000 * 60 * 15
|
||||
REFRESH_TOKEN_EXPIRY=(1000 * 60 * 60 * 24) * 7
|
||||
|
||||
JWT_SECRET=16f8c0ef4a5d391b26034086c628469d3f9f497f08163ab9b40137092f2909ef
|
||||
JWT_REFRESH_SECRET=eaa5191f2914e30b9387fd84e254e4ba6fc51b4654968a9b0803b456a54b8418
|
||||
|
||||
# Discord
|
||||
DISCORD_CLIENT_ID=
|
||||
DISCORD_CLIENT_SECRET=
|
||||
DISCORD_CALLBACK_URL=/oauth/discord/callback
|
||||
|
||||
# Facebook
|
||||
FACEBOOK_CLIENT_ID=
|
||||
FACEBOOK_CLIENT_SECRET=
|
||||
FACEBOOK_CALLBACK_URL=/oauth/facebook/callback
|
||||
|
||||
# GitHub
|
||||
GITHUB_CLIENT_ID=
|
||||
GITHUB_CLIENT_SECRET=
|
||||
GITHUB_CALLBACK_URL=/oauth/github/callback
|
||||
# GitHub Enterprise
|
||||
# GITHUB_ENTERPRISE_BASE_URL=
|
||||
# GITHUB_ENTERPRISE_USER_AGENT=
|
||||
|
||||
# Google
|
||||
GOOGLE_CLIENT_ID=
|
||||
GOOGLE_CLIENT_SECRET=
|
||||
GOOGLE_CALLBACK_URL=/oauth/google/callback
|
||||
|
||||
# Apple
|
||||
APPLE_CLIENT_ID=
|
||||
APPLE_TEAM_ID=
|
||||
APPLE_KEY_ID=
|
||||
APPLE_PRIVATE_KEY_PATH=
|
||||
APPLE_CALLBACK_URL=/oauth/apple/callback
|
||||
|
||||
# OpenID
|
||||
OPENID_CLIENT_ID=
|
||||
OPENID_CLIENT_SECRET=
|
||||
OPENID_ISSUER=
|
||||
OPENID_SESSION_SECRET=
|
||||
OPENID_SCOPE="openid profile email"
|
||||
OPENID_CALLBACK_URL=/oauth/openid/callback
|
||||
OPENID_REQUIRED_ROLE=
|
||||
OPENID_REQUIRED_ROLE_TOKEN_KIND=
|
||||
OPENID_REQUIRED_ROLE_PARAMETER_PATH=
|
||||
OPENID_ADMIN_ROLE=
|
||||
OPENID_ADMIN_ROLE_PARAMETER_PATH=
|
||||
OPENID_ADMIN_ROLE_TOKEN_KIND=
|
||||
# Set to determine which user info property returned from OpenID Provider to store as the User's username
|
||||
OPENID_USERNAME_CLAIM=
|
||||
# Set to determine which user info property returned from OpenID Provider to store as the User's name
|
||||
OPENID_NAME_CLAIM=
|
||||
# Set to determine which user info claim to use as the email/identifier for user matching (e.g., "upn" for Entra ID)
|
||||
# When not set, defaults to: email -> preferred_username -> upn
|
||||
OPENID_EMAIL_CLAIM=
|
||||
# Optional audience parameter for OpenID authorization requests
|
||||
OPENID_AUDIENCE=
|
||||
|
||||
OPENID_BUTTON_LABEL=
|
||||
OPENID_IMAGE_URL=
|
||||
# Set to true to automatically redirect to the OpenID provider when a user visits the login page
|
||||
# This will bypass the login form completely for users, only use this if OpenID is your only authentication method
|
||||
OPENID_AUTO_REDIRECT=false
|
||||
# Set to true to use PKCE (Proof Key for Code Exchange) for OpenID authentication
|
||||
OPENID_USE_PKCE=false
|
||||
#Set to true to reuse openid tokens for authentication management instead of using the mongodb session and the custom refresh token.
|
||||
OPENID_REUSE_TOKENS=
|
||||
#By default, signing key verification results are cached in order to prevent excessive HTTP requests to the JWKS endpoint.
|
||||
#If a signing key matching the kid is found, this will be cached and the next time this kid is requested the signing key will be served from the cache.
|
||||
#Default is true.
|
||||
OPENID_JWKS_URL_CACHE_ENABLED=
|
||||
OPENID_JWKS_URL_CACHE_TIME= # 600000 ms eq to 10 minutes leave empty to disable caching
|
||||
#Set to true to trigger token exchange flow to acquire access token for the userinfo endpoint.
|
||||
OPENID_ON_BEHALF_FLOW_FOR_USERINFO_REQUIRED=
|
||||
OPENID_ON_BEHALF_FLOW_USERINFO_SCOPE="user.read" # example for Scope Needed for Microsoft Graph API
|
||||
# Set to true to use the OpenID Connect end session endpoint for logout
|
||||
OPENID_USE_END_SESSION_ENDPOINT=
|
||||
# URL to redirect to after OpenID logout (defaults to ${DOMAIN_CLIENT}/login)
|
||||
OPENID_POST_LOGOUT_REDIRECT_URI=
|
||||
# Maximum logout URL length before using logout_hint instead of id_token_hint (default: 2000)
|
||||
OPENID_MAX_LOGOUT_URL_LENGTH=
|
||||
|
||||
#========================#
|
||||
# SharePoint Integration #
|
||||
#========================#
|
||||
# Requires Entra ID (OpenID) authentication to be configured
|
||||
|
||||
# Enable SharePoint file picker in chat and agent panels
|
||||
# ENABLE_SHAREPOINT_FILEPICKER=true
|
||||
|
||||
# SharePoint tenant base URL (e.g., https://yourtenant.sharepoint.com)
|
||||
# SHAREPOINT_BASE_URL=https://yourtenant.sharepoint.com
|
||||
|
||||
# Microsoft Graph API And SharePoint scopes for file picker
|
||||
# SHAREPOINT_PICKER_SHAREPOINT_SCOPE==https://yourtenant.sharepoint.com/AllSites.Read
|
||||
# SHAREPOINT_PICKER_GRAPH_SCOPE=Files.Read.All
|
||||
#========================#
|
||||
|
||||
# SAML
|
||||
# Note: If OpenID is enabled, SAML authentication will be automatically disabled.
|
||||
SAML_ENTRY_POINT=
|
||||
SAML_ISSUER=
|
||||
SAML_CERT=
|
||||
SAML_CALLBACK_URL=/oauth/saml/callback
|
||||
SAML_SESSION_SECRET=
|
||||
|
||||
# Attribute mappings (optional)
|
||||
SAML_EMAIL_CLAIM=
|
||||
SAML_USERNAME_CLAIM=
|
||||
SAML_GIVEN_NAME_CLAIM=
|
||||
SAML_FAMILY_NAME_CLAIM=
|
||||
SAML_PICTURE_CLAIM=
|
||||
SAML_NAME_CLAIM=
|
||||
|
||||
# Logint buttion settings (optional)
|
||||
SAML_BUTTON_LABEL=
|
||||
SAML_IMAGE_URL=
|
||||
|
||||
# Whether the SAML Response should be signed.
|
||||
# - If "true", the entire `SAML Response` will be signed.
|
||||
# - If "false" or unset, only the `SAML Assertion` will be signed (default behavior).
|
||||
# SAML_USE_AUTHN_RESPONSE_SIGNED=
|
||||
|
||||
|
||||
#===============================================#
|
||||
# Microsoft Graph API / Entra ID Integration #
|
||||
#===============================================#
|
||||
|
||||
# Enable Entra ID people search integration in permissions/sharing system
|
||||
# When enabled, the people picker will search both local database and Entra ID
|
||||
USE_ENTRA_ID_FOR_PEOPLE_SEARCH=false
|
||||
|
||||
# When enabled, entra id groups owners will be considered as members of the group
|
||||
ENTRA_ID_INCLUDE_OWNERS_AS_MEMBERS=false
|
||||
|
||||
# Microsoft Graph API scopes needed for people/group search
|
||||
# Default scopes provide access to user profiles and group memberships
|
||||
OPENID_GRAPH_SCOPES=User.Read,People.Read,GroupMember.Read.All
|
||||
|
||||
# LDAP
|
||||
LDAP_URL=
|
||||
LDAP_BIND_DN=
|
||||
LDAP_BIND_CREDENTIALS=
|
||||
LDAP_USER_SEARCH_BASE=
|
||||
#LDAP_SEARCH_FILTER="mail="
|
||||
LDAP_CA_CERT_PATH=
|
||||
# LDAP_TLS_REJECT_UNAUTHORIZED=
|
||||
# LDAP_STARTTLS=
|
||||
# LDAP_LOGIN_USES_USERNAME=true
|
||||
# LDAP_ID=
|
||||
# LDAP_USERNAME=
|
||||
# LDAP_EMAIL=
|
||||
# LDAP_FULL_NAME=
|
||||
|
||||
#========================#
|
||||
# Email Password Reset #
|
||||
#========================#
|
||||
|
||||
EMAIL_SERVICE=
|
||||
EMAIL_HOST=
|
||||
EMAIL_PORT=25
|
||||
EMAIL_ENCRYPTION=
|
||||
EMAIL_ENCRYPTION_HOSTNAME=
|
||||
EMAIL_ALLOW_SELFSIGNED=
|
||||
# Leave both empty for SMTP servers that do not require authentication
|
||||
EMAIL_USERNAME=
|
||||
EMAIL_PASSWORD=
|
||||
EMAIL_FROM_NAME=
|
||||
EMAIL_FROM=noreply@librechat.ai
|
||||
|
||||
#========================#
|
||||
# Mailgun API #
|
||||
#========================#
|
||||
|
||||
# MAILGUN_API_KEY=your-mailgun-api-key
|
||||
# MAILGUN_DOMAIN=mg.yourdomain.com
|
||||
# EMAIL_FROM=noreply@yourdomain.com
|
||||
# EMAIL_FROM_NAME="LibreChat"
|
||||
|
||||
# # Optional: For EU region
|
||||
# MAILGUN_HOST=https://api.eu.mailgun.net
|
||||
|
||||
#========================#
|
||||
# Firebase CDN #
|
||||
#========================#
|
||||
|
||||
FIREBASE_API_KEY=
|
||||
FIREBASE_AUTH_DOMAIN=
|
||||
FIREBASE_PROJECT_ID=
|
||||
FIREBASE_STORAGE_BUCKET=
|
||||
FIREBASE_MESSAGING_SENDER_ID=
|
||||
FIREBASE_APP_ID=
|
||||
|
||||
#========================#
|
||||
# S3 AWS Bucket #
|
||||
#========================#
|
||||
|
||||
AWS_ENDPOINT_URL=
|
||||
AWS_ACCESS_KEY_ID=
|
||||
AWS_SECRET_ACCESS_KEY=
|
||||
AWS_REGION=
|
||||
AWS_BUCKET_NAME=
|
||||
# Required for path-style S3-compatible providers (MinIO, Hetzner, Backblaze B2, etc.)
|
||||
# that don't support virtual-hosted-style URLs (bucket.endpoint). Not needed for AWS S3.
|
||||
# AWS_FORCE_PATH_STYLE=false
|
||||
|
||||
#========================#
|
||||
# Azure Blob Storage #
|
||||
#========================#
|
||||
|
||||
AZURE_STORAGE_CONNECTION_STRING=
|
||||
AZURE_STORAGE_PUBLIC_ACCESS=false
|
||||
AZURE_CONTAINER_NAME=files
|
||||
|
||||
#========================#
|
||||
# Shared Links #
|
||||
#========================#
|
||||
|
||||
ALLOW_SHARED_LINKS=true
|
||||
# Allows unauthenticated access to shared links. Defaults to false (auth required) if not set.
|
||||
ALLOW_SHARED_LINKS_PUBLIC=false
|
||||
|
||||
#==============================#
|
||||
# Static File Cache Control #
|
||||
#==============================#
|
||||
|
||||
# Leave commented out to use defaults: 1 day (86400 seconds) for s-maxage and 2 days (172800 seconds) for max-age
|
||||
# NODE_ENV must be set to production for these to take effect
|
||||
# STATIC_CACHE_MAX_AGE=172800
|
||||
# STATIC_CACHE_S_MAX_AGE=86400
|
||||
|
||||
# If you have another service in front of your LibreChat doing compression, disable express based compression here
|
||||
# DISABLE_COMPRESSION=true
|
||||
|
||||
# If you have gzipped version of uploaded image images in the same folder, this will enable gzip scan and serving of these images
|
||||
# Note: The images folder will be scanned on startup and a ma kept in memory. Be careful for large number of images.
|
||||
# ENABLE_IMAGE_OUTPUT_GZIP_SCAN=true
|
||||
|
||||
#===================================================#
|
||||
# UI #
|
||||
#===================================================#
|
||||
|
||||
APP_TITLE=LibreChat
|
||||
# CUSTOM_FOOTER="My custom footer"
|
||||
HELP_AND_FAQ_URL=https://librechat.ai
|
||||
|
||||
# SHOW_BIRTHDAY_ICON=true
|
||||
|
||||
# Google tag manager id
|
||||
#ANALYTICS_GTM_ID=user provided google tag manager id
|
||||
|
||||
# limit conversation file imports to a certain number of bytes in size to avoid the container
|
||||
# maxing out memory limitations by unremarking this line and supplying a file size in bytes
|
||||
# such as the below example of 250 mib
|
||||
# CONVERSATION_IMPORT_MAX_FILE_SIZE_BYTES=262144000
|
||||
|
||||
|
||||
#===============#
|
||||
# REDIS Options #
|
||||
#===============#
|
||||
|
||||
# Enable Redis for caching and session storage
|
||||
# USE_REDIS=true
|
||||
# Enable Redis for resumable LLM streams (defaults to USE_REDIS value if not set)
|
||||
# Set to false to use in-memory storage for streams while keeping Redis for other caches
|
||||
# USE_REDIS_STREAMS=true
|
||||
|
||||
# Single Redis instance
|
||||
# REDIS_URI=redis://127.0.0.1:6379
|
||||
|
||||
# Redis cluster (multiple nodes)
|
||||
# REDIS_URI=redis://127.0.0.1:7001,redis://127.0.0.1:7002,redis://127.0.0.1:7003
|
||||
|
||||
# Redis with TLS/SSL encryption and CA certificate
|
||||
# REDIS_URI=rediss://127.0.0.1:6380
|
||||
# REDIS_CA=/path/to/ca-cert.pem
|
||||
|
||||
# Elasticache may need to use an alternate dnsLookup for TLS connections. see "Special Note: Aws Elasticache Clusters with TLS" on this webpage: https://www.npmjs.com/package/ioredis
|
||||
# Enable alternative dnsLookup for redis
|
||||
# REDIS_USE_ALTERNATIVE_DNS_LOOKUP=true
|
||||
|
||||
# Redis authentication (if required)
|
||||
# REDIS_USERNAME=your_redis_username
|
||||
# REDIS_PASSWORD=your_redis_password
|
||||
|
||||
# Redis key prefix configuration
|
||||
# Use environment variable name for dynamic prefix (recommended for cloud deployments)
|
||||
# REDIS_KEY_PREFIX_VAR=K_REVISION
|
||||
# Or use static prefix directly
|
||||
# REDIS_KEY_PREFIX=librechat
|
||||
|
||||
# Redis connection limits
|
||||
# REDIS_MAX_LISTENERS=40
|
||||
|
||||
# Redis ping interval in seconds (0 = disabled, >0 = enabled)
|
||||
# When set to a positive integer, Redis clients will ping the server at this interval to keep connections alive
|
||||
# When unset or 0, no pinging is performed (recommended for most use cases)
|
||||
# REDIS_PING_INTERVAL=300
|
||||
|
||||
# Force specific cache namespaces to use in-memory storage even when Redis is enabled
|
||||
# Comma-separated list of CacheKeys
|
||||
# Defaults to CONFIG_STORE,APP_CONFIG so YAML-derived config stays per-container (safe for blue/green deployments)
|
||||
# Set to empty string to force all namespaces through Redis: FORCED_IN_MEMORY_CACHE_NAMESPACES=
|
||||
# FORCED_IN_MEMORY_CACHE_NAMESPACES=CONFIG_STORE,APP_CONFIG
|
||||
|
||||
# Leader Election Configuration (for multi-instance deployments with Redis)
|
||||
# Duration in seconds that the leader lease is valid before it expires (default: 25)
|
||||
# LEADER_LEASE_DURATION=25
|
||||
# Interval in seconds at which the leader renews its lease (default: 10)
|
||||
# LEADER_RENEW_INTERVAL=10
|
||||
# Maximum number of retry attempts when renewing the lease fails (default: 3)
|
||||
# LEADER_RENEW_ATTEMPTS=3
|
||||
# Delay in seconds between retry attempts when renewing the lease (default: 0.5)
|
||||
# LEADER_RENEW_RETRY_DELAY=0.5
|
||||
|
||||
#==================================================#
|
||||
# Others #
|
||||
#==================================================#
|
||||
# You should leave the following commented out #
|
||||
|
||||
# NODE_ENV=
|
||||
|
||||
# E2E_USER_EMAIL=
|
||||
# E2E_USER_PASSWORD=
|
||||
|
||||
#=====================================================#
|
||||
# Cache Headers #
|
||||
#=====================================================#
|
||||
# Headers that control caching of the index.html #
|
||||
# Default configuration prevents caching to ensure #
|
||||
# users always get the latest version. Customize #
|
||||
# only if you understand caching implications. #
|
||||
|
||||
# INDEX_CACHE_CONTROL=no-cache, no-store, must-revalidate
|
||||
# INDEX_PRAGMA=no-cache
|
||||
# INDEX_EXPIRES=0
|
||||
|
||||
# no-cache: Forces validation with server before using cached version
|
||||
# no-store: Prevents storing the response entirely
|
||||
# must-revalidate: Prevents using stale content when offline
|
||||
|
||||
#=====================================================#
|
||||
# OpenWeather #
|
||||
#=====================================================#
|
||||
OPENWEATHER_API_KEY=
|
||||
|
||||
#====================================#
|
||||
# LibreChat Code Interpreter API #
|
||||
#====================================#
|
||||
|
||||
# https://code.librechat.ai
|
||||
# LIBRECHAT_CODE_API_KEY=your-key
|
||||
|
||||
#======================#
|
||||
# Web Search #
|
||||
#======================#
|
||||
|
||||
# Note: All of the following variable names can be customized.
|
||||
# Omit values to allow user to provide them.
|
||||
|
||||
# For more information on configuration values, see:
|
||||
# https://librechat.ai/docs/features/web_search
|
||||
|
||||
# Search Provider (Required)
|
||||
# SERPER_API_KEY=your_serper_api_key
|
||||
|
||||
# Scraper (Required)
|
||||
# FIRECRAWL_API_KEY=your_firecrawl_api_key
|
||||
# Optional: Custom Firecrawl API URL
|
||||
# FIRECRAWL_API_URL=your_firecrawl_api_url
|
||||
|
||||
# Reranker (Required)
|
||||
# JINA_API_KEY=your_jina_api_key
|
||||
# or
|
||||
# COHERE_API_KEY=your_cohere_api_key
|
||||
|
||||
#======================#
|
||||
# MCP Configuration #
|
||||
#======================#
|
||||
|
||||
# Treat 401/403 responses as OAuth requirement when no oauth metadata found
|
||||
# MCP_OAUTH_ON_AUTH_ERROR=true
|
||||
|
||||
# Timeout for OAuth detection requests in milliseconds
|
||||
# MCP_OAUTH_DETECTION_TIMEOUT=5000
|
||||
|
||||
# Cache connection status checks for this many milliseconds to avoid expensive verification
|
||||
# MCP_CONNECTION_CHECK_TTL=60000
|
||||
|
||||
# Skip code challenge method validation (e.g., for AWS Cognito that supports S256 but doesn't advertise it)
|
||||
# When set to true, forces S256 code challenge even if not advertised in .well-known/openid-configuration
|
||||
# MCP_SKIP_CODE_CHALLENGE_CHECK=false
|
||||
|
||||
# Circuit breaker: max connect/disconnect cycles before tripping (per server)
|
||||
# MCP_CB_MAX_CYCLES=7
|
||||
|
||||
# Circuit breaker: sliding window (ms) for counting cycles
|
||||
# MCP_CB_CYCLE_WINDOW_MS=45000
|
||||
|
||||
# Circuit breaker: cooldown (ms) after the cycle breaker trips
|
||||
# MCP_CB_CYCLE_COOLDOWN_MS=15000
|
||||
|
||||
# Circuit breaker: max consecutive failed connection rounds before backoff
|
||||
# MCP_CB_MAX_FAILED_ROUNDS=3
|
||||
|
||||
# Circuit breaker: sliding window (ms) for counting failed rounds
|
||||
# MCP_CB_FAILED_WINDOW_MS=120000
|
||||
|
||||
# Circuit breaker: base backoff (ms) after failed round threshold is reached
|
||||
# MCP_CB_BASE_BACKOFF_MS=30000
|
||||
|
||||
# Circuit breaker: max backoff cap (ms) for exponential backoff
|
||||
# MCP_CB_MAX_BACKOFF_MS=300000
|
||||
@@ -6,9 +6,7 @@ cache: true
|
||||
endpoints:
|
||||
anthropic:
|
||||
apiKey: "${ANTHROPIC_API_KEY}"
|
||||
models:
|
||||
default: ["claude-sonnet-4-5", "claude-haiku-4-5", "claude-opus-4-5"]
|
||||
fetch: false
|
||||
models: ["claude-sonnet-4-5", "claude-haiku-4-5", "claude-opus-4-5"]
|
||||
titleConvo: true
|
||||
titleModel: "claude-haiku-4-5"
|
||||
modelDisplayLabel: "Claude AI"
|
||||
@@ -72,7 +70,7 @@ endpoints:
|
||||
apiKey: "dummy_key"
|
||||
baseURL: "http://alfred:8000/v1"
|
||||
models:
|
||||
default: ["local-deepseek-agent"]
|
||||
default: ["glm-4.7-flash:latest"]
|
||||
fetch: false
|
||||
titleConvo: false
|
||||
titleModel: "current_model"
|
||||
|
||||
+45
@@ -0,0 +1,45 @@
|
||||
# MongoDB Configuration File
|
||||
|
||||
# Network settings
|
||||
net:
|
||||
port: 27017
|
||||
bindIp: 0.0.0.0
|
||||
|
||||
# Storage settings
|
||||
storage:
|
||||
dbPath: /data/db
|
||||
|
||||
# System log settings
|
||||
systemLog:
|
||||
destination: file
|
||||
path: /dev/stdout
|
||||
logAppend: true
|
||||
verbosity: 0
|
||||
quiet: true
|
||||
component:
|
||||
accessControl:
|
||||
verbosity: -1
|
||||
command:
|
||||
verbosity: 0
|
||||
control:
|
||||
verbosity: 0
|
||||
ftdc:
|
||||
verbosity: 0
|
||||
geo:
|
||||
verbosity: 0
|
||||
index:
|
||||
verbosity: 0
|
||||
network:
|
||||
verbosity: 0
|
||||
query:
|
||||
verbosity: 0
|
||||
replication:
|
||||
verbosity: 0
|
||||
sharding:
|
||||
verbosity: 0
|
||||
storage:
|
||||
verbosity: 0
|
||||
write:
|
||||
verbosity: 0
|
||||
transaction:
|
||||
verbosity: 0
|
||||
Generated
-1221
File diff suppressed because it is too large
Load Diff
+51
-34
@@ -1,50 +1,67 @@
|
||||
[tool.poetry]
|
||||
[project]
|
||||
name = "alfred"
|
||||
version = "0.1.7"
|
||||
description = "AI agent for managing a local media library"
|
||||
authors = ["Francwa <francois.hodiaumont@gmail.com>"]
|
||||
readme = "README.md"
|
||||
package-mode = false
|
||||
requires-python = "==3.14.3"
|
||||
dependencies = [
|
||||
"python-dotenv~=1.0.0",
|
||||
"requests~=2.32.5",
|
||||
"fastapi~=0.127.1",
|
||||
"pydantic~=2.12.4",
|
||||
"uvicorn~=0.40.0",
|
||||
"httpx~=0.28.1",
|
||||
"pydantic-settings~=2.12.0",
|
||||
"click~=8.1",
|
||||
]
|
||||
|
||||
[tool.alfred.settings]
|
||||
[tool.alfred]
|
||||
image_name = "alfred_media_organizer"
|
||||
librechat_version = "v0.8.1"
|
||||
rag_version = "v0.7.0"
|
||||
runner = "poetry"
|
||||
librechat_version = "v0.8.4"
|
||||
rag_version = "v0.7.3"
|
||||
service_name = "alfred"
|
||||
uv_version = "0.11.6"
|
||||
|
||||
[tool.alfred.security]
|
||||
jwt_secret = "32:b64"
|
||||
jwt_refresh_secret = "32:b64"
|
||||
creds_key = "32:b64"
|
||||
creds_iv = "16:b64"
|
||||
meili_master_key = "32:b64"
|
||||
mongo_password = "16:hex"
|
||||
postgres_password = "16:hex"
|
||||
qbittorrent_password = "16:hex"
|
||||
[tool.alfred.secrets]
|
||||
JWT_SECRET = "32:hex"
|
||||
JWT_REFRESH_SECRET = "32:hex"
|
||||
CREDS_KEY = "32:hex"
|
||||
CREDS_IV = "16:hex"
|
||||
MEILI_MASTER_KEY = "32:b64"
|
||||
MONGO_PASSWORD = "16:hex"
|
||||
POSTGRES_PASSWORD = "16:hex"
|
||||
QBITTORRENT_PASSWORD = "16:hex"
|
||||
|
||||
[tool.poetry.dependencies]
|
||||
python = "==3.14.2"
|
||||
python-dotenv = "^1.0.0"
|
||||
requests = "^2.32.5"
|
||||
fastapi = "^0.127.1"
|
||||
pydantic = "^2.12.4"
|
||||
uvicorn = "^0.40.0"
|
||||
pytest-xdist = "^3.8.0"
|
||||
httpx = "^0.28.1"
|
||||
pydantic-settings = "^2.12.0"
|
||||
[tool.alfred.config.pattern]
|
||||
type = "multi"
|
||||
patterns = [
|
||||
"^#[=\\-*#]{3,}#?\\s*$",
|
||||
"^#\\s+(.+?)\\s+#\\s*$",
|
||||
"^#[=\\-*#]{3,}#?\\s*$",
|
||||
]
|
||||
|
||||
[tool.alfred.config]
|
||||
extra_fields = []
|
||||
|
||||
[tool.uv]
|
||||
package = false
|
||||
|
||||
[dependency-groups]
|
||||
dev = [
|
||||
"pytest~=8.0.0",
|
||||
"pytest-cov~=4.1.0",
|
||||
"pytest-asyncio~=0.23.0",
|
||||
"pytest-xdist~=3.8.0",
|
||||
"ruff~=0.14.7",
|
||||
"pre-commit~=4.5.1",
|
||||
"bump-my-version~=1.2.5",
|
||||
]
|
||||
|
||||
[tool.poetry.group.dev.dependencies]
|
||||
pytest = "^8.0.0"
|
||||
pytest-cov = "^4.1.0"
|
||||
pytest-asyncio = "^0.23.0"
|
||||
ruff = "^0.15.0"
|
||||
pre-commit = "^4.5.1"
|
||||
bump-my-version = "^1.2.5"
|
||||
|
||||
[build-system]
|
||||
requires = ["poetry-core"]
|
||||
build-backend = "poetry.core.masonry.api"
|
||||
requires = ["hatchling"]
|
||||
build-backend = "hatchling.build"
|
||||
|
||||
[tool.pytest.ini_options]
|
||||
# Chemins où pytest cherche les tests
|
||||
|
||||
+154
-209
@@ -1,239 +1,184 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Bootstrap script - generates .env.alfred, .env.librechat, .env.secrets and .env.make."""
|
||||
|
||||
import re
|
||||
import secrets
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
import tomllib
|
||||
from config_loader import load_build_config, write_env_make
|
||||
|
||||
BASE_DIR = Path(__file__).resolve().parent.parent
|
||||
|
||||
def load_secrets_spec(toml_data: dict) -> dict[str, tuple[int, str]]:
|
||||
"""Load secrets spec from pyproject.toml [tool.alfred.secrets]."""
|
||||
raw = toml_data.get("tool", {}).get("alfred", {}).get("secrets", {})
|
||||
result = {}
|
||||
for key, rule in raw.items():
|
||||
size_str, fmt = rule.split(":")
|
||||
result[key] = (int(size_str), fmt)
|
||||
return result
|
||||
|
||||
|
||||
def generate_secret(rule: str) -> str:
|
||||
"""
|
||||
Generates a cryptographically secure secret based on a spec string.
|
||||
Example specs: '32:b64', '16:hex'.
|
||||
"""
|
||||
chunks: list[str] = rule.split(":")
|
||||
size: int = int(chunks[0])
|
||||
tech: str = chunks[1]
|
||||
def generate_secret(size: int, fmt: str) -> str:
|
||||
match fmt:
|
||||
case "hex":
|
||||
return secrets.token_hex(size)
|
||||
case "b64":
|
||||
return secrets.token_urlsafe(size)
|
||||
case _:
|
||||
raise ValueError(f"Unknown format: {fmt}")
|
||||
|
||||
if tech == "b64":
|
||||
return secrets.token_urlsafe(size)
|
||||
elif tech == "hex":
|
||||
return secrets.token_hex(size)
|
||||
|
||||
def load_env_file(path: Path) -> dict[str, str]:
|
||||
"""Load key=value pairs from an env file, ignoring comments and blanks."""
|
||||
result = {}
|
||||
if not path.exists():
|
||||
return result
|
||||
for line in path.read_text().splitlines():
|
||||
stripped = line.strip()
|
||||
if stripped and not stripped.startswith("#") and "=" in stripped:
|
||||
key, _, value = stripped.partition("=")
|
||||
result[key.strip()] = value.strip()
|
||||
return result
|
||||
|
||||
|
||||
def copy_example_if_missing(src: Path, dst: Path, label: str) -> None:
|
||||
"""Copy src to dst only if dst doesn't exist yet."""
|
||||
if dst.exists():
|
||||
print(f" ↻ {dst.name} already exists, skipping")
|
||||
return
|
||||
if not src.exists():
|
||||
print(f" ⚠ {label} example not found at {src} — skipping (add it manually)")
|
||||
return
|
||||
dst.write_text(src.read_text())
|
||||
print(f" + {dst.name} created from {src.name}")
|
||||
|
||||
|
||||
def generate_secrets_file(path: Path, secrets_spec: dict[str, tuple[int, str]]) -> None:
|
||||
"""Generate .env.secrets with missing secrets, never overwrite existing ones."""
|
||||
existing = load_env_file(path)
|
||||
lines = list(path.read_text().splitlines()) if path.exists() else [
|
||||
"# Auto-generated secrets — DO NOT COMMIT",
|
||||
"# Run 'make bootstrap' to generate missing secrets",
|
||||
"",
|
||||
]
|
||||
|
||||
added = []
|
||||
for key, (size, fmt) in secrets_spec.items():
|
||||
if key not in existing:
|
||||
value = generate_secret(size, fmt)
|
||||
lines.append(f"{key}={value}")
|
||||
added.append(key)
|
||||
|
||||
path.write_text("\n".join(lines) + "\n")
|
||||
|
||||
if added:
|
||||
print(f" + Generated: {', '.join(added)}")
|
||||
else:
|
||||
raise ValueError(f"Invalid security format: {tech}")
|
||||
print(" ↻ All secrets already exist, nothing generated")
|
||||
|
||||
|
||||
def extract_python_version(version_string: str) -> tuple[str, str]:
|
||||
"""
|
||||
Extract Python version from poetry dependency string.
|
||||
Examples:
|
||||
"==3.14.2" -> ("3.14.2", "3.14")
|
||||
"^3.14.2" -> ("3.14.2", "3.14")
|
||||
"~3.14.2" -> ("3.14.2", "3.14")
|
||||
"3.14.2" -> ("3.14.2", "3.14")
|
||||
"""
|
||||
import re # noqa: PLC0415
|
||||
|
||||
# Remove poetry version operators (==, ^, ~, >=, etc.)
|
||||
clean_version = re.sub(r"^[=^~><]+", "", version_string.strip())
|
||||
|
||||
# Extract version parts
|
||||
parts = clean_version.split(".")
|
||||
|
||||
clean = re.sub(r"^[=^~><]+", "", version_string.strip())
|
||||
parts = clean.split(".")
|
||||
if len(parts) >= 2:
|
||||
full_version = clean_version
|
||||
short_version = f"{parts[0]}.{parts[1]}"
|
||||
return full_version, short_version
|
||||
return clean, f"{parts[0]}.{parts[1]}"
|
||||
raise ValueError(f"Invalid Python version: {version_string}")
|
||||
|
||||
|
||||
def build_uris(env_alfred: Path, env_secrets: Path) -> None:
|
||||
"""Build MONGO_URI and POSTGRES_URI from components and append them to .env.secrets."""
|
||||
env = {**load_env_file(env_alfred), **load_env_file(env_secrets)}
|
||||
existing = load_env_file(env_secrets)
|
||||
|
||||
computed = {
|
||||
"MONGO_URI": (
|
||||
f"mongodb://{env['MONGO_USER']}:{env['MONGO_PASSWORD']}"
|
||||
f"@{env['MONGO_HOST']}:{env['MONGO_PORT']}/{env['MONGO_DB_NAME']}"
|
||||
f"?authSource=admin"
|
||||
),
|
||||
"POSTGRES_URI": (
|
||||
f"postgresql://{env['POSTGRES_USER']}:{env['POSTGRES_PASSWORD']}"
|
||||
f"@{env['POSTGRES_HOST']}:{env['POSTGRES_PORT']}/{env['POSTGRES_DB_NAME']}"
|
||||
),
|
||||
}
|
||||
|
||||
content = env_secrets.read_text()
|
||||
added = []
|
||||
for key, value in computed.items():
|
||||
if key in existing:
|
||||
content = re.sub(rf"^{key}=.*$", f"{key}={value}", content, flags=re.MULTILINE)
|
||||
else:
|
||||
content = content.rstrip("\n") + f"\n{key}={value}\n"
|
||||
added.append(key)
|
||||
env_secrets.write_text(content)
|
||||
|
||||
if added:
|
||||
print(f" + Computed: {', '.join(added)}")
|
||||
else:
|
||||
raise ValueError(f"Invalid Python version format: {version_string}")
|
||||
print(" ↻ URIs updated")
|
||||
|
||||
|
||||
# TODO: Refactor
|
||||
def bootstrap(): # noqa: PLR0912, PLR0915
|
||||
"""
|
||||
Initializes the .env file by merging .env.example with generated secrets
|
||||
and build variables from pyproject.toml.
|
||||
Also generates .env.make for Makefile.
|
||||
def write_env_make(toml_data: dict) -> None:
|
||||
"""Write .env.make from pyproject.toml."""
|
||||
project = toml_data["project"]
|
||||
alfred = toml_data["tool"]["alfred"]
|
||||
|
||||
ALWAYS preserves existing secrets!
|
||||
"""
|
||||
base_dir = Path(__file__).resolve().parent.parent
|
||||
env_path = base_dir / ".env"
|
||||
python_full, python_short = extract_python_version(project["requires-python"])
|
||||
|
||||
example_path = base_dir / ".env.example"
|
||||
if not example_path.exists():
|
||||
print(f"❌ {example_path.name} not found.")
|
||||
return
|
||||
lines = [
|
||||
"# Auto-generated from pyproject.toml — do not edit manually",
|
||||
f"ALFRED_VERSION={project['version']}",
|
||||
f"PYTHON_VERSION={python_full}",
|
||||
f"IMAGE_NAME={alfred['image_name']}",
|
||||
f"SERVICE_NAME={alfred['service_name']}",
|
||||
f"LIBRECHAT_VERSION={alfred['librechat_version']}",
|
||||
f"RAG_VERSION={alfred['rag_version']}",
|
||||
f"UV_VERSION={alfred['uv_version']}",
|
||||
]
|
||||
|
||||
toml_path = base_dir / "pyproject.toml"
|
||||
env_make_path = BASE_DIR / ".env.make"
|
||||
env_make_path.write_text("\n".join(lines) + "\n")
|
||||
print(f" + {env_make_path.name} written")
|
||||
|
||||
|
||||
def main() -> int:
|
||||
print("🚀 Starting bootstrap...")
|
||||
|
||||
toml_path = BASE_DIR / "pyproject.toml"
|
||||
if not toml_path.exists():
|
||||
print(f"❌ {toml_path.name} not found.")
|
||||
return
|
||||
print(f"❌ pyproject.toml not found: {toml_path}")
|
||||
return 1
|
||||
|
||||
# ALWAYS load existing .env if it exists
|
||||
existing_env = {}
|
||||
if env_path.exists():
|
||||
print("🔄 Reading existing .env...")
|
||||
with open(env_path) as f:
|
||||
for line in f:
|
||||
if "=" in line and not line.strip().startswith("#"):
|
||||
key, value = line.split("=", 1)
|
||||
existing_env[key.strip()] = value.strip()
|
||||
print(f" Found {len(existing_env)} existing keys")
|
||||
print("🔧 Updating .env file (keeping secrets)...")
|
||||
else:
|
||||
print("🔧 Initializing: Creating secure .env file...")
|
||||
|
||||
# Load data from pyproject.toml
|
||||
with open(toml_path, "rb") as f:
|
||||
data = tomllib.load(f)
|
||||
security_keys = data["tool"]["alfred"]["security"]
|
||||
settings_keys = data["tool"]["alfred"]["settings"]
|
||||
dependencies = data["tool"]["poetry"]["dependencies"]
|
||||
alfred_version = data["tool"]["poetry"]["version"]
|
||||
toml_data = tomllib.load(f)
|
||||
|
||||
# Normalize TOML keys to UPPER_CASE for .env format (done once)
|
||||
security_keys_upper = {k.upper(): v for k, v in security_keys.items()}
|
||||
settings_keys_upper = {k.upper(): v for k, v in settings_keys.items()}
|
||||
|
||||
# Extract Python version
|
||||
python_version_full, python_version_short = extract_python_version(
|
||||
dependencies["python"]
|
||||
print("\n📄 Env files:")
|
||||
copy_example_if_missing(
|
||||
src=BASE_DIR / ".env.example",
|
||||
dst=BASE_DIR / ".env.alfred",
|
||||
label="Alfred",
|
||||
)
|
||||
copy_example_if_missing(
|
||||
src=BASE_DIR / "librechat" / ".env.example",
|
||||
dst=BASE_DIR / ".env.librechat",
|
||||
label="LibreChat",
|
||||
)
|
||||
|
||||
# Read .env.example
|
||||
with open(example_path) as f:
|
||||
example_lines = f.readlines()
|
||||
secrets_spec = load_secrets_spec(toml_data)
|
||||
print("\n🔐 Secrets:")
|
||||
generate_secrets_file(BASE_DIR / ".env.secrets", secrets_spec)
|
||||
|
||||
new_lines = []
|
||||
# Process each line from .env.example
|
||||
for raw_line in example_lines:
|
||||
line = raw_line.strip()
|
||||
print("\n🔗 URIs:")
|
||||
build_uris(BASE_DIR / ".env.alfred", BASE_DIR / ".env.secrets")
|
||||
|
||||
if line and not line.startswith("#") and "=" in line:
|
||||
key, value = line.split("=", 1)
|
||||
key = key.strip()
|
||||
print("\n🔧 Build config:")
|
||||
write_env_make(toml_data)
|
||||
|
||||
# Check if key exists in current .env (update mode)
|
||||
if key in existing_env:
|
||||
# Keep existing value for secrets
|
||||
if key in security_keys_upper:
|
||||
new_lines.append(f"{key}={existing_env[key]}\n")
|
||||
print(f" ↻ Kept existing {key}")
|
||||
# Update build vars from pyproject.toml
|
||||
elif key in settings_keys_upper:
|
||||
new_value = settings_keys_upper[key]
|
||||
if existing_env[key] != new_value:
|
||||
new_lines.append(f"{key}={new_value}\n")
|
||||
print(f" ↻ Updated {key}: {existing_env[key]} → {new_value}")
|
||||
else:
|
||||
new_lines.append(f"{key}={existing_env[key]}\n")
|
||||
print(f" ↻ Kept {key}={existing_env[key]}")
|
||||
# Update Python versions
|
||||
elif key == "PYTHON_VERSION":
|
||||
if existing_env[key] != python_version_full:
|
||||
new_lines.append(f"{key}={python_version_full}\n")
|
||||
print(
|
||||
f" ↻ Updated Python: {existing_env[key]} → {python_version_full}"
|
||||
)
|
||||
else:
|
||||
new_lines.append(f"{key}={existing_env[key]}\n")
|
||||
print(f" ↻ Kept Python: {existing_env[key]}")
|
||||
elif key == "PYTHON_VERSION_SHORT":
|
||||
if existing_env[key] != python_version_short:
|
||||
new_lines.append(f"{key}={python_version_short}\n")
|
||||
print(
|
||||
f" ↻ Updated Python (short): {existing_env[key]} → {python_version_short}"
|
||||
)
|
||||
else:
|
||||
new_lines.append(f"{key}={existing_env[key]}\n")
|
||||
print(f" ↻ Kept Python (short): {existing_env[key]}")
|
||||
elif key == "ALFRED_VERSION":
|
||||
if existing_env.get(key) != alfred_version:
|
||||
new_lines.append(f"{key}={alfred_version}\n")
|
||||
print(
|
||||
f" ↻ Updated Alfred version: {existing_env.get(key, 'N/A')} → {alfred_version}"
|
||||
)
|
||||
else:
|
||||
new_lines.append(f"{key}={alfred_version}\n")
|
||||
print(f" ↻ Kept Alfred version: {alfred_version}")
|
||||
# Keep other existing values
|
||||
else:
|
||||
new_lines.append(f"{key}={existing_env[key]}\n")
|
||||
# Key doesn't exist, generate/add it
|
||||
elif key in security_keys_upper:
|
||||
rule = security_keys_upper[key]
|
||||
secret = generate_secret(rule)
|
||||
new_lines.append(f"{key}={secret}\n")
|
||||
print(f" + Secret generated for {key} ({rule})")
|
||||
elif key in settings_keys_upper:
|
||||
value = settings_keys_upper[key]
|
||||
new_lines.append(f"{key}={value}\n")
|
||||
print(f" + Setting added: {key}={value}")
|
||||
elif key == "PYTHON_VERSION":
|
||||
new_lines.append(f"{key}={python_version_full}\n")
|
||||
print(f" + Python version: {python_version_full}")
|
||||
elif key == "PYTHON_VERSION_SHORT":
|
||||
new_lines.append(f"{key}={python_version_short}\n")
|
||||
print(f" + Python version (short): {python_version_short}")
|
||||
elif key == "ALFRED_VERSION":
|
||||
new_lines.append(f"{key}={alfred_version}\n")
|
||||
print(f" + Alfred version: {alfred_version}")
|
||||
else:
|
||||
new_lines.append(raw_line)
|
||||
else:
|
||||
# Keep comments and empty lines
|
||||
new_lines.append(raw_line)
|
||||
|
||||
# Compute database URIs from the generated values
|
||||
final_env = {}
|
||||
for line in new_lines:
|
||||
if "=" in line and not line.strip().startswith("#"):
|
||||
key, value = line.split("=", 1)
|
||||
final_env[key.strip()] = value.strip()
|
||||
|
||||
# Compute MONGO_URI
|
||||
if "MONGO_USER" in final_env and "MONGO_PASSWORD" in final_env:
|
||||
mongo_uri = (
|
||||
f"mongodb://{final_env.get('MONGO_USER', 'alfred')}:"
|
||||
f"{final_env.get('MONGO_PASSWORD', '')}@"
|
||||
f"{final_env.get('MONGO_HOST', 'mongodb')}:"
|
||||
f"{final_env.get('MONGO_PORT', '27017')}/"
|
||||
f"{final_env.get('MONGO_DB_NAME', 'alfred')}?authSource=admin"
|
||||
)
|
||||
# Update MONGO_URI in new_lines
|
||||
for i, line in enumerate(new_lines):
|
||||
if line.startswith("MONGO_URI="):
|
||||
new_lines[i] = f"MONGO_URI={mongo_uri}\n"
|
||||
print(" ✓ Computed MONGO_URI")
|
||||
break
|
||||
|
||||
# Compute POSTGRES_URI
|
||||
if "POSTGRES_USER" in final_env and "POSTGRES_PASSWORD" in final_env:
|
||||
postgres_uri = (
|
||||
f"postgresql://{final_env.get('POSTGRES_USER', 'alfred')}:"
|
||||
f"{final_env.get('POSTGRES_PASSWORD', '')}@"
|
||||
f"{final_env.get('POSTGRES_HOST', 'vectordb')}:"
|
||||
f"{final_env.get('POSTGRES_PORT', '5432')}/"
|
||||
f"{final_env.get('POSTGRES_DB_NAME', 'alfred')}"
|
||||
)
|
||||
# Update POSTGRES_URI in new_lines
|
||||
for i, line in enumerate(new_lines):
|
||||
if line.startswith("POSTGRES_URI="):
|
||||
new_lines[i] = f"POSTGRES_URI={postgres_uri}\n"
|
||||
print(" ✓ Computed POSTGRES_URI")
|
||||
break
|
||||
|
||||
# Write .env file
|
||||
with open(env_path, "w", encoding="utf-8") as f:
|
||||
f.writelines(new_lines)
|
||||
print(f"\n✅ {env_path.name} generated successfully.")
|
||||
|
||||
# Generate .env.make for Makefile using shared config loader
|
||||
config = load_build_config(base_dir)
|
||||
write_env_make(config, base_dir)
|
||||
print("✅ .env.make generated for Makefile.")
|
||||
print("\n⚠️ Reminder: Please manually add your API keys to the .env file.")
|
||||
print("\n✅ Bootstrap complete!")
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
bootstrap()
|
||||
sys.exit(main())
|
||||
|
||||
+32
-38
@@ -1,4 +1,4 @@
|
||||
"""Shared configuration loader for bootstrap and CI."""
|
||||
"""Shared configuration loader — reads build config from pyproject.toml."""
|
||||
|
||||
import re
|
||||
from pathlib import Path
|
||||
@@ -13,31 +13,25 @@ class BuildConfig(NamedTuple):
|
||||
alfred_version: str
|
||||
python_version: str
|
||||
python_version_short: str
|
||||
runner: str
|
||||
image_name: str
|
||||
service_name: str
|
||||
librechat_version: str
|
||||
rag_version: str
|
||||
uv_version: str
|
||||
|
||||
|
||||
def extract_python_version(version_string: str) -> tuple[str, str]:
|
||||
"""
|
||||
Extract Python version from poetry dependency string.
|
||||
Extract Python version from uv dependency string.
|
||||
Examples:
|
||||
"==3.14.2" -> ("3.14.2", "3.14")
|
||||
"^3.14.2" -> ("3.14.2", "3.14")
|
||||
"~3.14.2" -> ("3.14.2", "3.14")
|
||||
"3.14.2" -> ("3.14.2", "3.14")
|
||||
"^3.14.2" -> ("3.14.2", "3.14")
|
||||
"""
|
||||
clean_version = re.sub(r"^[=^~><]+", "", version_string.strip())
|
||||
parts = clean_version.split(".")
|
||||
|
||||
clean = re.sub(r"^[=^~><]+", "", version_string.strip())
|
||||
parts = clean.split(".")
|
||||
if len(parts) >= 2:
|
||||
full_version = clean_version
|
||||
short_version = f"{parts[0]}.{parts[1]}"
|
||||
return full_version, short_version
|
||||
else:
|
||||
raise ValueError(f"Invalid Python version format: {version_string}")
|
||||
return clean, f"{parts[0]}.{parts[1]}"
|
||||
raise ValueError(f"Invalid Python version format: {version_string}")
|
||||
|
||||
|
||||
def load_build_config(base_dir: Path | None = None) -> BuildConfig:
|
||||
@@ -51,23 +45,21 @@ def load_build_config(base_dir: Path | None = None) -> BuildConfig:
|
||||
|
||||
with open(toml_path, "rb") as f:
|
||||
data = tomllib.load(f)
|
||||
settings_keys = data["tool"]["alfred"]["settings"]
|
||||
dependencies = data["tool"]["poetry"]["dependencies"]
|
||||
alfred_version = data["tool"]["poetry"]["version"]
|
||||
|
||||
python_version_full, python_version_short = extract_python_version(
|
||||
dependencies["python"]
|
||||
)
|
||||
project = data["project"]
|
||||
alfred = data["tool"]["alfred"]
|
||||
|
||||
python_full, python_short = extract_python_version(project["requires-python"])
|
||||
|
||||
return BuildConfig(
|
||||
alfred_version=alfred_version,
|
||||
python_version=python_version_full,
|
||||
python_version_short=python_version_short,
|
||||
runner=settings_keys["runner"],
|
||||
image_name=settings_keys["image_name"],
|
||||
service_name=settings_keys["service_name"],
|
||||
librechat_version=settings_keys["librechat_version"],
|
||||
rag_version=settings_keys["rag_version"],
|
||||
alfred_version=project["version"],
|
||||
python_version=python_full,
|
||||
python_version_short=python_short,
|
||||
image_name=alfred["image_name"],
|
||||
service_name=alfred["service_name"],
|
||||
librechat_version=alfred["librechat_version"],
|
||||
rag_version=alfred["rag_version"],
|
||||
uv_version=alfred["uv_version"],
|
||||
)
|
||||
|
||||
|
||||
@@ -76,14 +68,16 @@ def write_env_make(config: BuildConfig, base_dir: Path | None = None) -> None:
|
||||
if base_dir is None:
|
||||
base_dir = Path(__file__).resolve().parent.parent
|
||||
|
||||
lines = [
|
||||
"# Auto-generated from pyproject.toml — do not edit manually",
|
||||
f"export ALFRED_VERSION={config.alfred_version}",
|
||||
f"export PYTHON_VERSION={config.python_version}",
|
||||
f"export IMAGE_NAME={config.image_name}",
|
||||
f"export SERVICE_NAME={config.service_name}",
|
||||
f"export LIBRECHAT_VERSION={config.librechat_version}",
|
||||
f"export RAG_VERSION={config.rag_version}",
|
||||
f"export UV_VERSION={config.uv_version}",
|
||||
]
|
||||
|
||||
env_make_path = base_dir / ".env.make"
|
||||
with open(env_make_path, "w", encoding="utf-8") as f:
|
||||
f.write("# Auto-generated from pyproject.toml\n")
|
||||
f.write(f"export ALFRED_VERSION={config.alfred_version}\n")
|
||||
f.write(f"export PYTHON_VERSION={config.python_version}\n")
|
||||
f.write(f"export PYTHON_VERSION_SHORT={config.python_version_short}\n")
|
||||
f.write(f"export RUNNER={config.runner}\n")
|
||||
f.write(f"export IMAGE_NAME={config.image_name}\n")
|
||||
f.write(f"export SERVICE_NAME={config.service_name}\n")
|
||||
f.write(f"export LIBRECHAT_VERSION={config.librechat_version}\n")
|
||||
f.write(f"export RAG_VERSION={config.rag_version}\n")
|
||||
env_make_path.write_text("\n".join(lines) + "\n")
|
||||
|
||||
@@ -1,22 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Generate .env.make for CI/CD without generating secrets."""
|
||||
|
||||
import sys
|
||||
|
||||
from config_loader import load_build_config, write_env_make
|
||||
|
||||
|
||||
def main():
|
||||
"""Generate .env.make from pyproject.toml."""
|
||||
try:
|
||||
config = load_build_config()
|
||||
write_env_make(config)
|
||||
print("✅ .env.make generated successfully.")
|
||||
return 0
|
||||
except Exception as e:
|
||||
print(f"❌ Failed to generate .env.make: {e}")
|
||||
return 1
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
@@ -0,0 +1,528 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
scan_subtitles.py — CLI pour tester le pipeline de scan de sous-titres Alfred.
|
||||
|
||||
Usage:
|
||||
uv run testing/subtitles/scan_subtitles.py <season_folder> [options]
|
||||
|
||||
Options:
|
||||
--release-group RARBG Groupe de release (optionnel — active les known patterns)
|
||||
--pattern adjacent Forcer un pattern (adjacent|flat|episode_subfolder|embedded)
|
||||
--video FILE Fichier vidéo de référence (défaut: premier .mkv/.mp4 trouvé)
|
||||
--verbose Détails sur chaque token analysé
|
||||
--no-color Désactive la colorisation
|
||||
|
||||
Exemples:
|
||||
uv run scripts/scan_subtitles.py "/media/tv/The X-Files/Season 01"
|
||||
uv run scripts/scan_subtitles.py "/media/tv/The X-Files/Season 01" --release-group RARBG
|
||||
uv run scripts/scan_subtitles.py "/media/tv/The X-Files/Season 01" --pattern episode_subfolder --verbose
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import sys
|
||||
import textwrap
|
||||
from pathlib import Path
|
||||
|
||||
# Ajoute la racine du projet au path (testing/subtitles/ → ../../)
|
||||
_PROJECT_ROOT = Path(__file__).resolve().parents[2]
|
||||
if str(_PROJECT_ROOT) not in sys.path:
|
||||
sys.path.insert(0, str(_PROJECT_ROOT))
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Colorisation simple (pas de dépendance externe)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
USE_COLOR = True
|
||||
|
||||
RESET = "\033[0m"
|
||||
BOLD = "\033[1m"
|
||||
DIM = "\033[2m"
|
||||
GREEN = "\033[32m"
|
||||
YELLOW = "\033[33m"
|
||||
RED = "\033[31m"
|
||||
CYAN = "\033[36m"
|
||||
BLUE = "\033[34m"
|
||||
MAGENTA = "\033[35m"
|
||||
|
||||
|
||||
def c(text: str, *codes: str) -> str:
|
||||
if not USE_COLOR:
|
||||
return text
|
||||
return "".join(codes) + text + RESET
|
||||
|
||||
|
||||
def section(title: str) -> None:
|
||||
width = 70
|
||||
print()
|
||||
print(c("─" * width, DIM))
|
||||
print(c(f" {title}", BOLD, CYAN))
|
||||
print(c("─" * width, DIM))
|
||||
|
||||
|
||||
def ok(msg: str) -> None:
|
||||
print(c(" ✓ ", GREEN, BOLD) + msg)
|
||||
|
||||
|
||||
def warn(msg: str) -> None:
|
||||
print(c(" ⚠ ", YELLOW, BOLD) + msg)
|
||||
|
||||
|
||||
def err(msg: str) -> None:
|
||||
print(c(" ✗ ", RED, BOLD) + msg)
|
||||
|
||||
|
||||
def info(msg: str, indent: int = 2) -> None:
|
||||
print(" " * indent + msg)
|
||||
|
||||
|
||||
def kv(key: str, value: str, indent: int = 4) -> None:
|
||||
print(" " * indent + c(f"{key}: ", BOLD) + value)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
VIDEO_EXTS = {".mkv", ".mp4", ".avi", ".mov", ".ts", ".m2ts"}
|
||||
|
||||
|
||||
def find_videos(folder: Path) -> list[Path]:
|
||||
return sorted(
|
||||
p for p in folder.iterdir()
|
||||
if p.is_file() and p.suffix.lower() in VIDEO_EXTS
|
||||
)
|
||||
|
||||
|
||||
def confidence_bar(conf: float, width: int = 20) -> str:
|
||||
filled = int(conf * width)
|
||||
bar = "█" * filled + "░" * (width - filled)
|
||||
if conf >= 0.8:
|
||||
color = GREEN
|
||||
elif conf >= 0.5:
|
||||
color = YELLOW
|
||||
else:
|
||||
color = RED
|
||||
return c(bar, color) + c(f" {conf:.0%}", BOLD)
|
||||
|
||||
|
||||
def track_summary(track, verbose: bool = False) -> None:
|
||||
lang = track.language.code if track.language else c("?", RED)
|
||||
fmt = track.format.id if track.format else c("?", RED)
|
||||
typ = track.subtitle_type.value
|
||||
src = "embedded" if track.is_embedded else (track.file_path.name if track.file_path else "?")
|
||||
|
||||
# Couleur du type
|
||||
type_colors = {
|
||||
"standard": GREEN,
|
||||
"sdh": YELLOW,
|
||||
"forced": BLUE,
|
||||
"unknown": RED,
|
||||
}
|
||||
typ_str = c(typ, type_colors.get(typ, RESET))
|
||||
|
||||
unresolved = not track.is_embedded and track.language is None
|
||||
clarif = c(" [langue inconnue]", RED, BOLD) if unresolved else ""
|
||||
|
||||
print(f" {c(src, BOLD)}")
|
||||
print(f" lang={c(lang, CYAN)} type={typ_str} format={fmt}")
|
||||
conf_str = c("n/a (embedded)", DIM) if track.is_embedded else confidence_bar(track.confidence)
|
||||
print(f" confidence={conf_str}{clarif}")
|
||||
|
||||
if track.entry_count is not None:
|
||||
print(f" entries={track.entry_count} size={track.file_size_kb:.1f} KB" if track.file_size_kb else f" entries={track.entry_count}")
|
||||
|
||||
if verbose and track.raw_tokens:
|
||||
print(f" tokens={track.raw_tokens}")
|
||||
|
||||
if track.is_resolved() and track.language and track.format:
|
||||
try:
|
||||
dest = track.destination_name
|
||||
print(f" → {c(dest, GREEN, BOLD)}")
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Étapes du pipeline
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def step_load_kb() -> "SubtitleKnowledgeBase":
|
||||
from alfred.domain.subtitles.knowledge.base import SubtitleKnowledgeBase
|
||||
from alfred.domain.subtitles.knowledge.loader import KnowledgeLoader
|
||||
|
||||
section("ÉTAPE 1 — Chargement de la base de connaissances")
|
||||
kb = SubtitleKnowledgeBase(KnowledgeLoader())
|
||||
|
||||
fmts = kb.formats()
|
||||
langs = kb.languages()
|
||||
patterns = kb.patterns()
|
||||
|
||||
ok(f"{len(fmts)} format(s) connu(s): {', '.join(fmts.keys())}")
|
||||
ok(f"{len(langs)} langue(s) connue(s): {', '.join(langs.keys())}")
|
||||
ok(f"{len(patterns)} pattern(s) connu(s): {', '.join(patterns.keys())}")
|
||||
|
||||
total_tokens = sum(len(l.tokens) for l in langs.values())
|
||||
info(c(f"→ {total_tokens} tokens de langue au total", DIM), indent=4)
|
||||
|
||||
return kb
|
||||
|
||||
|
||||
def step_detect_pattern(
|
||||
kb: "SubtitleKnowledgeBase",
|
||||
season_folder: Path,
|
||||
sample_video: Path,
|
||||
release_group: str | None,
|
||||
forced_pattern: str | None,
|
||||
) -> "SubtitlePattern":
|
||||
from alfred.domain.subtitles.services.pattern_detector import PatternDetector
|
||||
|
||||
section("ÉTAPE 2 — Détection du pattern de release")
|
||||
|
||||
# Priorité: forced > known patterns from release_group > auto-detect
|
||||
if forced_pattern:
|
||||
pattern = kb.pattern(forced_pattern)
|
||||
if not pattern:
|
||||
err(f"Pattern inconnu: '{forced_pattern}'")
|
||||
print(f" Patterns disponibles: {', '.join(kb.patterns().keys())}")
|
||||
sys.exit(1)
|
||||
ok(f"Pattern forcé: {c(forced_pattern, CYAN, BOLD)}")
|
||||
return pattern
|
||||
|
||||
if release_group:
|
||||
known = kb.patterns_for_group(release_group)
|
||||
if known:
|
||||
kv("Release group", release_group)
|
||||
ok(f"Pattern(s) connu(s) pour {release_group}: {', '.join(p.id for p in known)}")
|
||||
pattern = known[0]
|
||||
kv("Pattern sélectionné", c(pattern.id, CYAN, BOLD))
|
||||
return pattern
|
||||
else:
|
||||
warn(f"Groupe '{release_group}' inconnu — lancement de la détection auto")
|
||||
|
||||
# Auto-detect
|
||||
kv("Dossier analysé", str(season_folder))
|
||||
kv("Vidéo de référence", sample_video.name)
|
||||
|
||||
detector = PatternDetector(kb)
|
||||
result = detector.detect(season_folder, sample_video)
|
||||
|
||||
findings = result.get("raw_findings", {})
|
||||
info(c("Observations:", BOLD), indent=4)
|
||||
for key, val in findings.items():
|
||||
if val not in (False, None, 0):
|
||||
info(f" {key}: {c(str(val), CYAN)}", indent=4)
|
||||
|
||||
detected = result.get("detected")
|
||||
confidence = result.get("confidence", 0.0)
|
||||
description = result.get("description", "")
|
||||
|
||||
print()
|
||||
info(c(f'Description: "{description}"', DIM), indent=4)
|
||||
print(f" Confiance: {confidence_bar(confidence)}")
|
||||
|
||||
if detected:
|
||||
ok(f"Pattern détecté: {c(detected.id, CYAN, BOLD)}")
|
||||
kv("Stratégie de scan", detected.scan_strategy.value)
|
||||
kv("Détection de type", detected.type_detection.value)
|
||||
if detected.root_folder:
|
||||
kv("Dossier racine", detected.root_folder)
|
||||
return detected
|
||||
else:
|
||||
warn("Aucun pattern détecté avec confiance suffisante — fallback: adjacent")
|
||||
fallback = kb.pattern("adjacent")
|
||||
if not fallback:
|
||||
err("Pattern 'adjacent' introuvable dans la KB !")
|
||||
sys.exit(1)
|
||||
return fallback
|
||||
|
||||
|
||||
def step_identify_tracks(
|
||||
kb: "SubtitleKnowledgeBase",
|
||||
sample_video: Path,
|
||||
pattern: "SubtitlePattern",
|
||||
release_group: str | None,
|
||||
verbose: bool,
|
||||
) -> "MediaSubtitleMetadata":
|
||||
from alfred.domain.subtitles.services.identifier import SubtitleIdentifier
|
||||
|
||||
section("ÉTAPE 3 — Identification des pistes")
|
||||
|
||||
kv("Vidéo", sample_video.name)
|
||||
kv("Pattern", pattern.id)
|
||||
|
||||
identifier = SubtitleIdentifier(kb)
|
||||
metadata = identifier.identify(
|
||||
video_path=sample_video,
|
||||
pattern=pattern,
|
||||
media_id=None,
|
||||
media_type="tv_show",
|
||||
release_group=release_group,
|
||||
)
|
||||
|
||||
n_emb = len(metadata.embedded_tracks)
|
||||
n_ext = len(metadata.external_tracks)
|
||||
n_unresolved = len(metadata.unresolved_tracks)
|
||||
|
||||
print()
|
||||
ok(f"{n_ext} piste(s) externe(s) trouvée(s)")
|
||||
if n_emb:
|
||||
ok(f"{n_emb} piste(s) embarquée(s) (ffprobe)")
|
||||
if n_unresolved:
|
||||
warn(f"{n_unresolved} piste(s) externe(s) sans langue reconnue")
|
||||
|
||||
if metadata.external_tracks:
|
||||
print()
|
||||
info(c("Pistes externes:", BOLD))
|
||||
for track in metadata.external_tracks:
|
||||
track_summary(track, verbose)
|
||||
|
||||
if metadata.embedded_tracks:
|
||||
print()
|
||||
info(c("Pistes embarquées:", BOLD))
|
||||
for track in metadata.embedded_tracks:
|
||||
track_summary(track, verbose)
|
||||
|
||||
return metadata
|
||||
|
||||
|
||||
def step_apply_rules(
|
||||
metadata: "MediaSubtitleMetadata",
|
||||
release_group: str | None,
|
||||
) -> tuple["SubtitleMatchingRules | None", list, list]:
|
||||
from alfred.domain.subtitles.aggregates import DEFAULT_RULES
|
||||
from alfred.domain.subtitles.services.matcher import SubtitleMatcher
|
||||
from alfred.domain.subtitles.services.utils import available_subtitles
|
||||
from alfred.domain.subtitles.value_objects import ScanStrategy
|
||||
|
||||
section("ÉTAPE 4 — Application des règles")
|
||||
|
||||
# Cas embedded : pas de matcher, on liste directement les pistes disponibles
|
||||
if metadata.detected_pattern_id == ScanStrategy.EMBEDDED.value:
|
||||
info(c("Pattern embedded — le matcher est court-circuité", DIM), indent=4)
|
||||
tracks = available_subtitles(metadata.embedded_tracks)
|
||||
ok(f"{len(tracks)} piste(s) disponible(s)")
|
||||
return None, tracks, []
|
||||
|
||||
rules = DEFAULT_RULES()
|
||||
kv("Langues préférées", str(rules.preferred_languages))
|
||||
kv("Formats préférés", str(rules.preferred_formats))
|
||||
kv("Types autorisés", str(rules.allowed_types))
|
||||
kv("Confiance min", str(rules.min_confidence))
|
||||
info(c("(règles globales par défaut — pas de .alfred/ en mode scan)", DIM), indent=4)
|
||||
|
||||
matcher = SubtitleMatcher()
|
||||
matched, unresolved = matcher.match(metadata.external_tracks, rules)
|
||||
|
||||
print()
|
||||
ok(f"{len(matched)} piste(s) retenue(s)")
|
||||
if unresolved:
|
||||
warn(f"{len(unresolved)} piste(s) écartée(s) ou non résolue(s)")
|
||||
|
||||
return rules, matched, unresolved
|
||||
|
||||
|
||||
def step_show_results(
|
||||
matched: list,
|
||||
unresolved: list,
|
||||
is_embedded: bool,
|
||||
verbose: bool,
|
||||
) -> None:
|
||||
section("RÉSULTAT FINAL")
|
||||
|
||||
if matched:
|
||||
label = "piste(s) disponible(s)" if is_embedded else "piste(s) qui seraient placées"
|
||||
ok(f"{len(matched)} {label}:")
|
||||
for track in matched:
|
||||
lang = track.language.code if track.language else "?"
|
||||
typ = track.subtitle_type.value
|
||||
if is_embedded:
|
||||
print(f" {c(lang, CYAN)} {c(typ, GREEN)}")
|
||||
else:
|
||||
try:
|
||||
dest = track.destination_name
|
||||
src = track.file_path.name if track.file_path else "?"
|
||||
print(f" {c(src, DIM)} → {c(dest, GREEN, BOLD)}")
|
||||
except ValueError:
|
||||
warn(f" Piste incomplète (lang ou format manquant): {track}")
|
||||
else:
|
||||
warn("Aucune piste retenue.")
|
||||
|
||||
if unresolved:
|
||||
print()
|
||||
warn(f"{len(unresolved)} piste(s) écartées ou à clarifier:")
|
||||
for track in unresolved:
|
||||
src = track.file_path.name if track.file_path else "?"
|
||||
reason = "langue inconnue" if track.language is None else "confiance insuffisante"
|
||||
line = f" {c(src, DIM)} ({reason})"
|
||||
if verbose and track.raw_tokens:
|
||||
line += c(f" tokens: {track.raw_tokens}", YELLOW)
|
||||
print(line)
|
||||
|
||||
print()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Scan multi-épisodes (résumé)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def scan_season(
|
||||
kb: "SubtitleKnowledgeBase",
|
||||
pattern: "SubtitlePattern",
|
||||
season_folder: Path,
|
||||
release_group: str | None,
|
||||
verbose: bool,
|
||||
) -> None:
|
||||
from alfred.domain.subtitles.aggregates import DEFAULT_RULES
|
||||
from alfred.domain.subtitles.services.identifier import SubtitleIdentifier
|
||||
from alfred.domain.subtitles.services.matcher import SubtitleMatcher
|
||||
|
||||
videos = find_videos(season_folder)
|
||||
|
||||
section(f"SCAN COMPLET DE LA SAISON ({len(videos)} épisode(s))")
|
||||
|
||||
if not videos:
|
||||
warn("Aucun fichier vidéo trouvé dans ce dossier.")
|
||||
return
|
||||
|
||||
identifier = SubtitleIdentifier(kb)
|
||||
matcher = SubtitleMatcher()
|
||||
rules = DEFAULT_RULES()
|
||||
|
||||
col_w = max(len(v.name) for v in videos) + 2
|
||||
|
||||
for video in videos:
|
||||
metadata = identifier.identify(
|
||||
video_path=video,
|
||||
pattern=pattern,
|
||||
media_id=None,
|
||||
media_type="tv_show",
|
||||
release_group=release_group,
|
||||
)
|
||||
matched, unresolved = matcher.match(metadata.external_tracks, rules)
|
||||
|
||||
placed_names = []
|
||||
for t in matched:
|
||||
try:
|
||||
placed_names.append(t.destination_name)
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
status_icon = c("✓", GREEN, BOLD) if placed_names else c("✗", RED, BOLD)
|
||||
warn_icon = c(f" [{len(unresolved)} non-résolue(s)]", YELLOW) if unresolved else ""
|
||||
|
||||
print(f" {status_icon} {video.name:{col_w}} {c(', '.join(placed_names) or '—', GREEN if placed_names else DIM)}{warn_icon}")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Main
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def parse_args() -> argparse.Namespace:
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Scanner de sous-titres Alfred — pipeline de diagnostic",
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog=textwrap.dedent(__doc__ or ""),
|
||||
)
|
||||
parser.add_argument("season_folder", help="Dossier de la saison (ou du film)")
|
||||
parser.add_argument("--release-group", "-g", metavar="GROUP",
|
||||
help="Groupe de release (ex: RARBG, KONSTRAST)")
|
||||
parser.add_argument("--pattern", "-p", metavar="PATTERN",
|
||||
help="Forcer un pattern (adjacent|flat|episode_subfolder|embedded)")
|
||||
parser.add_argument("--video", "-v", metavar="FILE",
|
||||
help="Fichier vidéo de référence (défaut: premier trouvé)")
|
||||
parser.add_argument("--verbose", action="store_true",
|
||||
help="Affiche les tokens bruts par piste")
|
||||
parser.add_argument("--no-color", action="store_true",
|
||||
help="Désactive la colorisation ANSI")
|
||||
parser.add_argument("--season-scan", action="store_true",
|
||||
help="Après le diagnostic, scanner tous les épisodes de la saison")
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def main() -> None:
|
||||
global USE_COLOR
|
||||
|
||||
args = parse_args()
|
||||
|
||||
if args.no_color or not sys.stdout.isatty():
|
||||
USE_COLOR = False
|
||||
|
||||
season_folder = Path(args.season_folder).expanduser().resolve()
|
||||
if not season_folder.is_dir():
|
||||
print(f"Erreur: '{season_folder}' n'est pas un dossier.", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
print()
|
||||
print(c("━" * 70, BOLD))
|
||||
print(c(" Alfred — Subtitle Scanner", BOLD, MAGENTA))
|
||||
print(c("━" * 70, BOLD))
|
||||
kv("Dossier", str(season_folder), indent=2)
|
||||
|
||||
# Trouver la vidéo de référence
|
||||
if args.video:
|
||||
sample_video = Path(args.video).expanduser().resolve()
|
||||
if not sample_video.exists():
|
||||
print(f"Erreur: '{sample_video}' introuvable.", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
else:
|
||||
videos = find_videos(season_folder)
|
||||
if not videos:
|
||||
# Chercher un niveau plus bas (structure release root)
|
||||
for sub in season_folder.iterdir():
|
||||
if sub.is_dir():
|
||||
videos = find_videos(sub)
|
||||
if videos:
|
||||
break
|
||||
if not videos:
|
||||
print("Erreur: aucun fichier vidéo trouvé dans ce dossier.", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
sample_video = videos[0]
|
||||
|
||||
kv("Vidéo de référence", sample_video.name, indent=2)
|
||||
|
||||
# ---- Pipeline ----
|
||||
kb = step_load_kb()
|
||||
|
||||
pattern = step_detect_pattern(
|
||||
kb=kb,
|
||||
season_folder=season_folder,
|
||||
sample_video=sample_video,
|
||||
release_group=args.release_group,
|
||||
forced_pattern=args.pattern,
|
||||
)
|
||||
|
||||
metadata = step_identify_tracks(
|
||||
kb=kb,
|
||||
sample_video=sample_video,
|
||||
pattern=pattern,
|
||||
release_group=args.release_group,
|
||||
verbose=args.verbose,
|
||||
)
|
||||
|
||||
rules, matched, unresolved = step_apply_rules(
|
||||
metadata=metadata,
|
||||
release_group=args.release_group,
|
||||
)
|
||||
|
||||
step_show_results(
|
||||
matched=matched,
|
||||
unresolved=unresolved,
|
||||
is_embedded=rules is None,
|
||||
verbose=args.verbose,
|
||||
)
|
||||
|
||||
if args.season_scan:
|
||||
scan_season(
|
||||
kb=kb,
|
||||
pattern=pattern,
|
||||
season_folder=season_folder,
|
||||
release_group=args.release_group,
|
||||
verbose=args.verbose,
|
||||
)
|
||||
|
||||
print(c("━" * 70, BOLD))
|
||||
print()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Executable
+479
@@ -0,0 +1,479 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
run_workflow.py — Simulate an Alfred workflow step by step (dry-run or live).
|
||||
|
||||
Usage:
|
||||
uv run testing/workflows/run_workflow.py organize_media [options]
|
||||
|
||||
Options:
|
||||
--dry-run Print what each step would do without executing tools (default).
|
||||
--live Actually execute the tools (uses real filesystem + memory).
|
||||
--source PATH Source video file (download folder).
|
||||
--dest PATH Destination video file (library path).
|
||||
--download-folder P Original download folder (for create_seed_links).
|
||||
--imdb-id ID IMDb ID for identify_media step (tt1234567).
|
||||
--seed Answer "yes" to the seeding question.
|
||||
--no-color Disable ANSI colours.
|
||||
|
||||
Examples:
|
||||
uv run testing/workflows/run_workflow.py organize_media --dry-run \\
|
||||
--source "/downloads/Breaking.Bad.S01E01.mkv" \\
|
||||
--dest "/tv/Breaking Bad/Season 01/Breaking Bad.S01E01.mkv"
|
||||
|
||||
uv run testing/workflows/run_workflow.py organize_media --live \\
|
||||
--source "/downloads/BB/Breaking.Bad.S01E01.mkv" \\
|
||||
--dest "/tv/Breaking Bad/Season 01/Breaking Bad.S01E01.mkv" \\
|
||||
--download-folder "/downloads/BB" --seed
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import sys
|
||||
import textwrap
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
# Project root on sys.path
|
||||
_PROJECT_ROOT = Path(__file__).resolve().parents[2]
|
||||
if str(_PROJECT_ROOT) not in sys.path:
|
||||
sys.path.insert(0, str(_PROJECT_ROOT))
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Colours
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
USE_COLOR = True
|
||||
|
||||
RESET = "\033[0m"
|
||||
BOLD = "\033[1m"
|
||||
DIM = "\033[2m"
|
||||
GREEN = "\033[32m"
|
||||
YELLOW = "\033[33m"
|
||||
RED = "\033[31m"
|
||||
CYAN = "\033[36m"
|
||||
BLUE = "\033[34m"
|
||||
MAGENTA = "\033[35m"
|
||||
|
||||
|
||||
def c(text: str, *codes: str) -> str:
|
||||
if not USE_COLOR:
|
||||
return text
|
||||
return "".join(codes) + str(text) + RESET
|
||||
|
||||
|
||||
def section(title: str) -> None:
|
||||
print()
|
||||
print(c("─" * 70, DIM))
|
||||
print(c(f" {title}", BOLD, CYAN))
|
||||
print(c("─" * 70, DIM))
|
||||
|
||||
|
||||
def ok(msg: str) -> None: print(c(" ✓ ", GREEN, BOLD) + msg)
|
||||
def warn(msg: str) -> None: print(c(" ⚠ ", YELLOW, BOLD) + msg)
|
||||
def err(msg: str) -> None: print(c(" ✗ ", RED, BOLD) + msg)
|
||||
def info(msg: str) -> None: print(f" {msg}")
|
||||
def kv(key: str, val: str) -> None:
|
||||
print(f" {c(key + ':', BOLD)} {val}")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Dry-run tool stubs
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _dry_list_folder(folder_type: str, path: str = ".") -> dict[str, Any]:
|
||||
return {
|
||||
"status": "ok",
|
||||
"folder_type": folder_type,
|
||||
"path": path,
|
||||
"entries": ["[dry-run — no real listing]"],
|
||||
"count": 1,
|
||||
}
|
||||
|
||||
|
||||
def _dry_find_media_imdb_id(**kwargs) -> dict[str, Any]:
|
||||
return {
|
||||
"status": "ok",
|
||||
"imdb_id": kwargs.get("imdb_id") or "tt0000000",
|
||||
"title": "Dry Run Show",
|
||||
"type": "tv_show",
|
||||
"year": 2024,
|
||||
}
|
||||
|
||||
|
||||
def _dry_resolve_destination(
|
||||
release_name: str,
|
||||
source_file: str,
|
||||
tmdb_title: str,
|
||||
tmdb_year: int,
|
||||
tmdb_episode_title: str | None = None,
|
||||
confirmed_folder: str | None = None,
|
||||
) -> dict[str, Any]:
|
||||
from alfred.domain.media.release_parser import parse_release
|
||||
parsed = parse_release(release_name)
|
||||
ext = Path(source_file).suffix
|
||||
if parsed.is_movie:
|
||||
folder = parsed.movie_folder_name(tmdb_title, tmdb_year)
|
||||
fname = parsed.movie_filename(tmdb_title, tmdb_year, ext)
|
||||
return {
|
||||
"status": "ok",
|
||||
"library_file": f"/movies/{folder}/{fname}",
|
||||
"series_folder": f"/movies/{folder}",
|
||||
"series_folder_name": folder,
|
||||
"season_folder": None,
|
||||
"season_folder_name": None,
|
||||
"filename": fname,
|
||||
"is_new_series_folder": True,
|
||||
}
|
||||
season_folder = parsed.season_folder_name()
|
||||
show_folder = confirmed_folder or parsed.show_folder_name(tmdb_title, tmdb_year)
|
||||
fname = parsed.episode_filename(tmdb_episode_title, ext) if not parsed.is_season_pack else season_folder + ext
|
||||
return {
|
||||
"status": "ok",
|
||||
"library_file": f"/tv/{show_folder}/{season_folder}/{fname}",
|
||||
"series_folder": f"/tv/{show_folder}",
|
||||
"season_folder": f"/tv/{show_folder}/{season_folder}",
|
||||
"series_folder_name": show_folder,
|
||||
"season_folder_name": season_folder,
|
||||
"filename": fname,
|
||||
"is_new_series_folder": confirmed_folder is None,
|
||||
}
|
||||
|
||||
|
||||
def _dry_move_media(source: str, destination: str) -> dict[str, Any]:
|
||||
return {
|
||||
"status": "ok",
|
||||
"source": source,
|
||||
"destination": destination,
|
||||
"filename": Path(destination).name,
|
||||
"size": 0,
|
||||
}
|
||||
|
||||
|
||||
def _dry_manage_subtitles(source_video: str, destination_video: str) -> dict[str, Any]:
|
||||
return {
|
||||
"status": "ok",
|
||||
"video_path": destination_video,
|
||||
"placed": [],
|
||||
"placed_count": 0,
|
||||
"skipped_count": 0,
|
||||
}
|
||||
|
||||
|
||||
def _dry_create_seed_links(library_file: str, original_download_folder: str) -> dict[str, Any]:
|
||||
return {
|
||||
"status": "ok",
|
||||
"torrent_subfolder": f"/torrents/{Path(original_download_folder).name}",
|
||||
"linked_file": f"/torrents/{Path(original_download_folder).name}/{Path(library_file).name}",
|
||||
"copied_files": ["[dry-run — no real copy]"],
|
||||
"copied_count": 1,
|
||||
"skipped": [],
|
||||
}
|
||||
|
||||
|
||||
DRY_RUN_TOOLS: dict[str, Any] = {
|
||||
"list_folder": _dry_list_folder,
|
||||
"find_media_imdb_id": _dry_find_media_imdb_id,
|
||||
"resolve_destination": _dry_resolve_destination,
|
||||
"move_media": _dry_move_media,
|
||||
"manage_subtitles": _dry_manage_subtitles,
|
||||
"create_seed_links": _dry_create_seed_links,
|
||||
}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Live tools
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _load_live_tools() -> dict[str, Any]:
|
||||
from alfred.agent.tools.filesystem import (
|
||||
create_seed_links,
|
||||
list_folder,
|
||||
manage_subtitles,
|
||||
move_media,
|
||||
)
|
||||
# find_media_imdb_id lives in the api tools
|
||||
try:
|
||||
from alfred.agent.tools.api import find_media_imdb_id
|
||||
except ImportError:
|
||||
def find_media_imdb_id(**kwargs): # type: ignore[misc]
|
||||
return {"status": "error", "error": "not_available", "message": "api tools not loaded"}
|
||||
|
||||
return {
|
||||
"list_folder": list_folder,
|
||||
"find_media_imdb_id": find_media_imdb_id,
|
||||
"move_media": move_media,
|
||||
"manage_subtitles": manage_subtitles,
|
||||
"create_seed_links": create_seed_links,
|
||||
}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Workflow runner
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class WorkflowRunner:
|
||||
def __init__(self, workflow: dict, tools: dict[str, Any], live: bool, args: argparse.Namespace):
|
||||
self.workflow = workflow
|
||||
self.tools = tools
|
||||
self.live = live
|
||||
self.args = args
|
||||
self.context: dict[str, Any] = {} # step results accumulate here
|
||||
self.step_results: list[dict] = []
|
||||
|
||||
def run(self) -> None:
|
||||
name = self.workflow.get("name", "?")
|
||||
desc = self.workflow.get("description", "").strip()
|
||||
mode = c("LIVE", RED, BOLD) if self.live else c("DRY-RUN", YELLOW, BOLD)
|
||||
|
||||
print()
|
||||
print(c("━" * 70, BOLD))
|
||||
print(c(f" Alfred — Workflow Simulator [{mode}]", BOLD, MAGENTA))
|
||||
print(c("━" * 70, BOLD))
|
||||
kv("Workflow", c(name, CYAN, BOLD))
|
||||
kv("Description", desc)
|
||||
kv("Tools allowed", ", ".join(self.workflow.get("tools", [])))
|
||||
|
||||
steps = self.workflow.get("steps", [])
|
||||
for step in steps:
|
||||
self._run_step(step)
|
||||
|
||||
section("SIMULATION TERMINÉE")
|
||||
ok(f"{len(self.step_results)} step(s) exécuté(s)")
|
||||
errors = [r for r in self.step_results if r.get("result", {}).get("status") == "error"]
|
||||
if errors:
|
||||
warn(f"{len(errors)} step(s) en erreur")
|
||||
for r in errors:
|
||||
err(f" {r['id']}: {r['result'].get('error')} — {r['result'].get('message')}")
|
||||
print()
|
||||
print(c("━" * 70, BOLD))
|
||||
print()
|
||||
|
||||
def _run_step(self, step: dict) -> None:
|
||||
step_id = step.get("id", "?")
|
||||
|
||||
# --- ask_user step ---
|
||||
if "ask_user" in step:
|
||||
section(f"STEP [{step_id}] — ask_user")
|
||||
q = step["ask_user"].get("question", "")
|
||||
answers = step["ask_user"].get("answers", {})
|
||||
info(c(f'Question: "{q}"', BOLD))
|
||||
info(f"Réponses possibles: {', '.join(str(k) for k in answers.keys())}")
|
||||
|
||||
answer = "yes" if self.args.seed else "no"
|
||||
# PyYAML parses bare yes/no as booleans — normalise keys to str
|
||||
answers_str = {str(k): v for k, v in answers.items()}
|
||||
next_step = answers_str.get(answer, {}).get("next_step", "update_library")
|
||||
ok(f"Réponse simulée: {c(answer, CYAN)} → next: {c(next_step, CYAN)}")
|
||||
self.context["seeding"] = (answer == "yes")
|
||||
self.context["ask_seeding_answer"] = answer
|
||||
self.context["next_after_ask"] = next_step
|
||||
|
||||
# If "no", skip create_seed_links
|
||||
if answer == "no":
|
||||
self.context["skip_create_seed_links"] = True
|
||||
return
|
||||
|
||||
# --- memory_write step ---
|
||||
if "memory_write" in step:
|
||||
section(f"STEP [{step_id}] — memory_write ({step['memory_write']})")
|
||||
if self.live:
|
||||
warn("memory_write: pas encore implémenté dans le simulator live")
|
||||
else:
|
||||
ok("(dry-run) Library entry would be written to LTM")
|
||||
self.step_results.append({"id": step_id, "result": {"status": "ok"}})
|
||||
return
|
||||
|
||||
# --- tool step ---
|
||||
tool_name = step.get("tool")
|
||||
if not tool_name:
|
||||
warn(f"Step '{step_id}' has no tool or ask_user — skipped")
|
||||
return
|
||||
|
||||
# Skip create_seed_links if user said no to seeding
|
||||
if tool_name == "create_seed_links" and self.context.get("skip_create_seed_links"):
|
||||
section(f"STEP [{step_id}] — {tool_name}")
|
||||
warn("Skipped (user chose not to seed)")
|
||||
return
|
||||
|
||||
section(f"STEP [{step_id}] — {c(tool_name, CYAN, BOLD)}")
|
||||
|
||||
desc = step.get("description", "").strip()
|
||||
if desc:
|
||||
info(c(desc, DIM))
|
||||
|
||||
kwargs = self._build_kwargs(tool_name, step)
|
||||
for k, v in kwargs.items():
|
||||
kv(k, str(v))
|
||||
|
||||
if tool_name not in self.tools:
|
||||
err(f"Tool '{tool_name}' not found in tool registry")
|
||||
self.step_results.append({"id": step_id, "result": {"status": "error", "error": "unknown_tool"}})
|
||||
return
|
||||
|
||||
try:
|
||||
result = self.tools[tool_name](**kwargs)
|
||||
except Exception as e:
|
||||
err(f"Tool raised an exception: {e}")
|
||||
self.step_results.append({"id": step_id, "result": {"status": "error", "error": str(e)}})
|
||||
return
|
||||
|
||||
self._print_result(result)
|
||||
self.context[step_id] = result
|
||||
self.step_results.append({"id": step_id, "result": result})
|
||||
|
||||
def _build_kwargs(self, tool_name: str, step: dict) -> dict[str, Any]:
|
||||
"""Build tool kwargs from step params + CLI args + previous context."""
|
||||
# Start from step-level params (static defaults from YAML)
|
||||
kwargs: dict[str, Any] = dict(step.get("params") or {})
|
||||
|
||||
a = self.args
|
||||
|
||||
if tool_name == "list_folder":
|
||||
kwargs.setdefault("folder_type", "download")
|
||||
|
||||
elif tool_name == "find_media_imdb_id":
|
||||
if a.imdb_id:
|
||||
kwargs["imdb_id"] = a.imdb_id
|
||||
|
||||
elif tool_name == "resolve_destination":
|
||||
if a.release:
|
||||
kwargs["release_name"] = a.release
|
||||
elif a.source:
|
||||
kwargs.setdefault("release_name", Path(a.source).parent.name)
|
||||
if a.source:
|
||||
kwargs["source_file"] = a.source
|
||||
if a.tmdb_title:
|
||||
kwargs["tmdb_title"] = a.tmdb_title
|
||||
if a.tmdb_year:
|
||||
kwargs["tmdb_year"] = a.tmdb_year
|
||||
if a.episode_title:
|
||||
kwargs["tmdb_episode_title"] = a.episode_title
|
||||
|
||||
elif tool_name == "move_media":
|
||||
# If resolve_destination ran, use its library_file as destination
|
||||
resolved = self.context.get("resolve_destination", {})
|
||||
if a.source:
|
||||
kwargs["source"] = a.source
|
||||
dest = a.dest or resolved.get("library_file")
|
||||
if dest:
|
||||
kwargs["destination"] = dest
|
||||
|
||||
elif tool_name == "manage_subtitles":
|
||||
resolved = self.context.get("resolve_destination", {})
|
||||
if a.source:
|
||||
kwargs["source_video"] = a.source
|
||||
dest = a.dest or resolved.get("library_file")
|
||||
if dest:
|
||||
kwargs["destination_video"] = dest
|
||||
|
||||
elif tool_name == "create_seed_links":
|
||||
resolved = self.context.get("resolve_destination", {})
|
||||
library_file = a.dest or resolved.get("library_file")
|
||||
if library_file:
|
||||
kwargs["library_file"] = library_file
|
||||
if a.download_folder:
|
||||
kwargs["original_download_folder"] = a.download_folder
|
||||
elif a.source:
|
||||
kwargs.setdefault("original_download_folder", str(Path(a.source).parent))
|
||||
|
||||
return kwargs
|
||||
|
||||
def _print_result(self, result: dict) -> None:
|
||||
status = result.get("status", "?")
|
||||
if status == "ok":
|
||||
ok(f"status={c('ok', GREEN)}")
|
||||
elif status == "needs_clarification":
|
||||
warn(f"status={c('needs_clarification', YELLOW)}")
|
||||
else:
|
||||
err(f"status={c(status, RED)} error={result.get('error')} msg={result.get('message')}")
|
||||
return
|
||||
|
||||
# Pretty-print notable fields
|
||||
skip = {"status", "error", "message"}
|
||||
for k, v in result.items():
|
||||
if k in skip:
|
||||
continue
|
||||
if isinstance(v, list):
|
||||
if v:
|
||||
info(c(f"{k}:", BOLD))
|
||||
for item in v[:10]:
|
||||
info(f" • {item}")
|
||||
if len(v) > 10:
|
||||
info(c(f" … and {len(v) - 10} more", DIM))
|
||||
else:
|
||||
info(f"{c(k + ':', BOLD)} (empty)")
|
||||
else:
|
||||
kv(k, str(v))
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# CLI
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def parse_args() -> argparse.Namespace:
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Alfred workflow simulator",
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog=textwrap.dedent(__doc__ or ""),
|
||||
)
|
||||
parser.add_argument("workflow", help="Workflow name (e.g. organize_media)")
|
||||
parser.add_argument("--dry-run", dest="dry_run", action="store_true", default=True,
|
||||
help="Simulate steps without executing tools (default)")
|
||||
parser.add_argument("--live", action="store_true",
|
||||
help="Actually execute tools against the real filesystem")
|
||||
parser.add_argument("--source", metavar="PATH",
|
||||
help="Source video file (in download folder)")
|
||||
parser.add_argument("--dest", metavar="PATH",
|
||||
help="Destination video file (in library, overrides resolve_destination)")
|
||||
parser.add_argument("--download-folder", metavar="PATH",
|
||||
help="Original download folder (for create_seed_links)")
|
||||
parser.add_argument("--imdb-id", metavar="ID",
|
||||
help="IMDb ID for identify_media (tt1234567)")
|
||||
parser.add_argument("--release", metavar="NAME",
|
||||
help="Release name (e.g. Oz.S03.1080p.WEBRip.x265-KONTRAST)")
|
||||
parser.add_argument("--tmdb-title", metavar="TITLE",
|
||||
help="Canonical title from TMDB (e.g. 'Oz')")
|
||||
parser.add_argument("--tmdb-year", metavar="YEAR", type=int,
|
||||
help="Start/release year from TMDB (e.g. 1997)")
|
||||
parser.add_argument("--episode-title", metavar="TITLE",
|
||||
help="Episode title from TMDB for single-episode releases")
|
||||
parser.add_argument("--seed", action="store_true",
|
||||
help='Answer "yes" to the seeding question')
|
||||
parser.add_argument("--no-color", action="store_true")
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def main() -> None:
|
||||
global USE_COLOR
|
||||
args = parse_args()
|
||||
|
||||
if args.no_color or not sys.stdout.isatty():
|
||||
USE_COLOR = False
|
||||
|
||||
if args.live:
|
||||
args.dry_run = False
|
||||
|
||||
# Load workflow
|
||||
from alfred.agent.workflows.loader import WorkflowLoader
|
||||
loader = WorkflowLoader()
|
||||
workflow = loader.get(args.workflow)
|
||||
if not workflow:
|
||||
print(f"Erreur: workflow '{args.workflow}' introuvable.", file=sys.stderr)
|
||||
print(f"Disponibles: {', '.join(loader.names())}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
# Load tools
|
||||
if args.live:
|
||||
try:
|
||||
tools = _load_live_tools()
|
||||
except Exception as e:
|
||||
print(f"Erreur chargement des tools live: {e}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
else:
|
||||
tools = DRY_RUN_TOOLS
|
||||
|
||||
runner = WorkflowRunner(workflow, tools, live=args.live, args=args)
|
||||
runner.run()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -0,0 +1,208 @@
|
||||
"""
|
||||
Tests for alfred.agent.registry — tool registration and JSON schema generation.
|
||||
"""
|
||||
|
||||
import pytest
|
||||
|
||||
from alfred.agent.registry import Tool, _create_tool_from_function, make_tools
|
||||
from alfred.settings import settings
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# _create_tool_from_function
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestCreateToolFromFunction:
|
||||
|
||||
def test_name_from_function(self):
|
||||
def my_tool(x: str) -> dict:
|
||||
"""Does something."""
|
||||
return {}
|
||||
tool = _create_tool_from_function(my_tool)
|
||||
assert tool.name == "my_tool"
|
||||
|
||||
def test_description_from_docstring_first_line(self):
|
||||
def my_tool(x: str) -> dict:
|
||||
"""First line description.
|
||||
|
||||
More details here.
|
||||
"""
|
||||
return {}
|
||||
tool = _create_tool_from_function(my_tool)
|
||||
assert tool.description == "First line description."
|
||||
|
||||
def test_description_fallback_to_name(self):
|
||||
def no_doc(x: str) -> dict:
|
||||
return {}
|
||||
tool = _create_tool_from_function(no_doc)
|
||||
assert tool.description == "no_doc"
|
||||
|
||||
def test_required_params_without_default(self):
|
||||
def tool(a: str, b: int) -> dict:
|
||||
"""Tool."""
|
||||
return {}
|
||||
t = _create_tool_from_function(tool)
|
||||
assert "a" in t.parameters["required"]
|
||||
assert "b" in t.parameters["required"]
|
||||
|
||||
def test_optional_params_not_required(self):
|
||||
def tool(a: str, b: str = "default") -> dict:
|
||||
"""Tool."""
|
||||
return {}
|
||||
t = _create_tool_from_function(tool)
|
||||
assert "a" in t.parameters["required"]
|
||||
assert "b" not in t.parameters["required"]
|
||||
|
||||
def test_none_default_not_required(self):
|
||||
def tool(a: str, b: str | None = None) -> dict:
|
||||
"""Tool."""
|
||||
return {}
|
||||
t = _create_tool_from_function(tool)
|
||||
assert "b" not in t.parameters["required"]
|
||||
|
||||
def test_type_mapping_str(self):
|
||||
def tool(x: str) -> dict:
|
||||
"""T."""
|
||||
return {}
|
||||
t = _create_tool_from_function(tool)
|
||||
assert t.parameters["properties"]["x"]["type"] == "string"
|
||||
|
||||
def test_type_mapping_int(self):
|
||||
def tool(x: int) -> dict:
|
||||
"""T."""
|
||||
return {}
|
||||
t = _create_tool_from_function(tool)
|
||||
assert t.parameters["properties"]["x"]["type"] == "integer"
|
||||
|
||||
def test_type_mapping_float(self):
|
||||
def tool(x: float) -> dict:
|
||||
"""T."""
|
||||
return {}
|
||||
t = _create_tool_from_function(tool)
|
||||
assert t.parameters["properties"]["x"]["type"] == "number"
|
||||
|
||||
def test_type_mapping_bool(self):
|
||||
def tool(x: bool) -> dict:
|
||||
"""T."""
|
||||
return {}
|
||||
t = _create_tool_from_function(tool)
|
||||
assert t.parameters["properties"]["x"]["type"] == "boolean"
|
||||
|
||||
def test_unknown_type_defaults_to_string(self):
|
||||
def tool(x: list) -> dict:
|
||||
"""T."""
|
||||
return {}
|
||||
t = _create_tool_from_function(tool)
|
||||
assert t.parameters["properties"]["x"]["type"] == "string"
|
||||
|
||||
def test_no_annotation_defaults_to_string(self):
|
||||
def tool(x) -> dict:
|
||||
"""T."""
|
||||
return {}
|
||||
t = _create_tool_from_function(tool)
|
||||
assert t.parameters["properties"]["x"]["type"] == "string"
|
||||
|
||||
def test_self_param_excluded(self):
|
||||
class MyClass:
|
||||
def tool(self, x: str) -> dict:
|
||||
"""T."""
|
||||
return {}
|
||||
t = _create_tool_from_function(MyClass().tool)
|
||||
assert "self" not in t.parameters["properties"]
|
||||
|
||||
def test_parameters_schema_structure(self):
|
||||
def tool(a: str, b: int = 0) -> dict:
|
||||
"""T."""
|
||||
return {}
|
||||
t = _create_tool_from_function(tool)
|
||||
assert t.parameters["type"] == "object"
|
||||
assert "properties" in t.parameters
|
||||
assert "required" in t.parameters
|
||||
|
||||
def test_func_stored_on_tool(self):
|
||||
def tool(x: str) -> dict:
|
||||
"""T."""
|
||||
return {"x": x}
|
||||
t = _create_tool_from_function(tool)
|
||||
assert t.func("hello") == {"x": "hello"}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# make_tools
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestMakeTools:
|
||||
|
||||
def test_returns_dict(self):
|
||||
tools = make_tools(settings)
|
||||
assert isinstance(tools, dict)
|
||||
|
||||
def test_all_expected_tools_present(self):
|
||||
tools = make_tools(settings)
|
||||
expected = {
|
||||
"set_path_for_folder",
|
||||
"list_folder",
|
||||
"resolve_destination",
|
||||
"move_media",
|
||||
"manage_subtitles",
|
||||
"create_seed_links",
|
||||
"learn",
|
||||
"find_media_imdb_id",
|
||||
"find_torrent",
|
||||
"add_torrent_by_index",
|
||||
"add_torrent_to_qbittorrent",
|
||||
"get_torrent_by_index",
|
||||
"set_language",
|
||||
}
|
||||
assert expected.issubset(tools.keys())
|
||||
|
||||
def test_each_tool_is_tool_instance(self):
|
||||
tools = make_tools(settings)
|
||||
for name, tool in tools.items():
|
||||
assert isinstance(tool, Tool), f"{name} is not a Tool instance"
|
||||
|
||||
def test_each_tool_has_callable_func(self):
|
||||
tools = make_tools(settings)
|
||||
for name, tool in tools.items():
|
||||
assert callable(tool.func), f"{name}.func is not callable"
|
||||
|
||||
def test_tool_name_matches_key(self):
|
||||
tools = make_tools(settings)
|
||||
for key, tool in tools.items():
|
||||
assert tool.name == key
|
||||
|
||||
def test_resolve_destination_schema(self):
|
||||
tools = make_tools(settings)
|
||||
t = tools["resolve_destination"]
|
||||
props = t.parameters["properties"]
|
||||
required = t.parameters["required"]
|
||||
# Required args
|
||||
assert "release_name" in required
|
||||
assert "source_file" in required
|
||||
assert "tmdb_title" in required
|
||||
assert "tmdb_year" in required
|
||||
# Optional args not required
|
||||
assert "tmdb_episode_title" not in required
|
||||
assert "confirmed_folder" not in required
|
||||
# tmdb_year is int
|
||||
assert props["tmdb_year"]["type"] == "integer"
|
||||
|
||||
def test_move_media_schema(self):
|
||||
tools = make_tools(settings)
|
||||
t = tools["move_media"]
|
||||
required = t.parameters["required"]
|
||||
assert "source" in required
|
||||
assert "destination" in required
|
||||
|
||||
def test_create_seed_links_schema(self):
|
||||
tools = make_tools(settings)
|
||||
t = tools["create_seed_links"]
|
||||
required = t.parameters["required"]
|
||||
assert "library_file" in required
|
||||
assert "original_download_folder" in required
|
||||
|
||||
def test_no_duplicate_tools(self):
|
||||
tools = make_tools(settings)
|
||||
# dict keys are unique by definition, but verify no name conflicts
|
||||
names = [t.name for t in tools.values()]
|
||||
assert len(names) == len(set(names))
|
||||
@@ -0,0 +1,41 @@
|
||||
"""Fixtures for application-layer tests."""
|
||||
|
||||
import shutil
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from alfred.infrastructure.persistence import Memory, set_memory
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def app_temp(tmp_path):
|
||||
"""Real folder structure: downloads, movies, tv_shows, torrents."""
|
||||
(tmp_path / "downloads").mkdir()
|
||||
(tmp_path / "movies").mkdir()
|
||||
(tmp_path / "tv_shows").mkdir()
|
||||
(tmp_path / "torrents").mkdir()
|
||||
return tmp_path
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def memory_configured(app_temp, tmp_path):
|
||||
"""
|
||||
Fresh Memory with library_paths and workspace configured using the real API.
|
||||
Replaces the broken memory_with_config from root conftest for these tests.
|
||||
"""
|
||||
import tempfile, os
|
||||
storage = tempfile.mkdtemp()
|
||||
mem = Memory(storage_dir=storage)
|
||||
set_memory(mem)
|
||||
|
||||
mem.ltm.workspace.download = str(app_temp / "downloads")
|
||||
mem.ltm.workspace.torrent = str(app_temp / "torrents")
|
||||
mem.ltm.library_paths.set("movie", str(app_temp / "movies"))
|
||||
mem.ltm.library_paths.set("tv_show", str(app_temp / "tv_shows"))
|
||||
mem.save()
|
||||
|
||||
yield mem
|
||||
|
||||
shutil.rmtree(storage, ignore_errors=True)
|
||||
@@ -0,0 +1,117 @@
|
||||
"""
|
||||
Tests for alfred.application.filesystem.create_seed_links.CreateSeedLinksUseCase
|
||||
"""
|
||||
|
||||
import os
|
||||
from pathlib import Path
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
from alfred.application.filesystem.create_seed_links import CreateSeedLinksUseCase
|
||||
from alfred.infrastructure.filesystem.file_manager import FileManager
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def fm():
|
||||
return FileManager()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def use_case(fm):
|
||||
return CreateSeedLinksUseCase(fm)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def seed_env(tmp_path_factory):
|
||||
"""
|
||||
Realistic post-move environment (uses its own tmp dir, independent of app_temp):
|
||||
- library video file (hard-linked from original)
|
||||
- original download folder with remaining files
|
||||
- torrents root folder
|
||||
"""
|
||||
d = tmp_path_factory.mktemp("seed_env")
|
||||
|
||||
lib_dir = d / "tv" / "Oz.1997.1080p.WEBRip.x265-KONTRAST" / "Oz.S01.1080p.WEBRip.x265-KONTRAST"
|
||||
lib_dir.mkdir(parents=True)
|
||||
lib_video = lib_dir / "Oz.S01E01.1080p.WEBRip.x265-KONTRAST.mp4"
|
||||
lib_video.write_bytes(b"video")
|
||||
|
||||
dl = d / "downloads" / "Oz.S01.1080p.WEBRip.x265-KONTRAST"
|
||||
dl.mkdir(parents=True)
|
||||
(dl / "KONTRAST.txt").write_text("release notes")
|
||||
(dl / "[TGx]info.txt").write_text("tgx")
|
||||
subs = dl / "Subs" / "Oz.S01E01.1080p.WEBRip.x265-KONTRAST"
|
||||
subs.mkdir(parents=True)
|
||||
(subs / "2_eng,English [CC][SDH].srt").write_text("1\n00:00:01 --> 00:00:02\nHello\n")
|
||||
|
||||
torrents = d / "torrents"
|
||||
torrents.mkdir()
|
||||
|
||||
return lib_video, dl, torrents
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Happy path
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestCreateSeedLinksHappyPath:
|
||||
|
||||
def test_ok_when_torrent_folder_configured(self, use_case, seed_env, memory_configured):
|
||||
from alfred.infrastructure.persistence import get_memory
|
||||
mem = get_memory()
|
||||
lib_video, dl, torrents = seed_env
|
||||
mem.ltm.workspace.torrent = str(torrents)
|
||||
mem.save()
|
||||
|
||||
result = use_case.execute(str(lib_video), str(dl))
|
||||
|
||||
assert result.status == "ok"
|
||||
assert result.torrent_subfolder is not None
|
||||
assert result.linked_file is not None
|
||||
assert result.copied_count > 0
|
||||
|
||||
def test_to_dict_ok(self, use_case, seed_env, memory_configured):
|
||||
from alfred.infrastructure.persistence import get_memory
|
||||
mem = get_memory()
|
||||
lib_video, dl, torrents = seed_env
|
||||
mem.ltm.workspace.torrent = str(torrents)
|
||||
mem.save()
|
||||
|
||||
d = use_case.execute(str(lib_video), str(dl)).to_dict()
|
||||
assert d["status"] == "ok"
|
||||
assert "torrent_subfolder" in d
|
||||
assert "copied_files" in d
|
||||
assert isinstance(d["copied_files"], list)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Error: torrent folder not configured
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestCreateSeedLinksErrors:
|
||||
|
||||
def test_error_when_torrent_not_configured(self, use_case, seed_env, memory):
|
||||
lib_video, dl, _ = seed_env
|
||||
result = use_case.execute(str(lib_video), str(dl))
|
||||
|
||||
assert result.status == "error"
|
||||
assert result.error == "torrent_folder_not_set"
|
||||
assert result.message is not None
|
||||
|
||||
def test_to_dict_error(self, use_case, seed_env, memory):
|
||||
lib_video, dl, _ = seed_env
|
||||
d = use_case.execute(str(lib_video), str(dl)).to_dict()
|
||||
assert d["status"] == "error"
|
||||
assert "error" in d
|
||||
assert "message" in d
|
||||
|
||||
def test_error_delegates_to_file_manager(self, memory_configured):
|
||||
"""FileManager errors are propagated correctly."""
|
||||
from alfred.infrastructure.persistence import get_memory
|
||||
mem = get_memory()
|
||||
# torrent already configured by memory_configured fixture
|
||||
# library_file does not exist → should propagate error from FileManager
|
||||
uc = CreateSeedLinksUseCase(FileManager())
|
||||
result = uc.execute("/nonexistent/lib.mkv", "/nonexistent/dl")
|
||||
assert result.status == "error"
|
||||
@@ -0,0 +1,179 @@
|
||||
"""Tests for ListFolderUseCase and MoveMediaUseCase."""
|
||||
|
||||
import pytest
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
from alfred.application.filesystem.list_folder import ListFolderUseCase
|
||||
from alfred.application.filesystem.move_media import MoveMediaUseCase
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# ListFolderUseCase
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestListFolderUseCase:
|
||||
|
||||
def _use_case(self, fm_result):
|
||||
fm = MagicMock()
|
||||
fm.list_folder.return_value = fm_result
|
||||
return ListFolderUseCase(fm)
|
||||
|
||||
def test_success_returns_response(self):
|
||||
uc = self._use_case({
|
||||
"status": "ok",
|
||||
"folder_type": "download",
|
||||
"path": ".",
|
||||
"entries": ["movie.mkv", "show/"],
|
||||
"count": 2,
|
||||
})
|
||||
resp = uc.execute("download")
|
||||
assert resp.status == "ok"
|
||||
assert resp.folder_type == "download"
|
||||
assert resp.path == "."
|
||||
assert resp.entries == ["movie.mkv", "show/"]
|
||||
assert resp.count == 2
|
||||
|
||||
def test_error_propagates(self):
|
||||
uc = self._use_case({
|
||||
"status": "error",
|
||||
"error": "folder_not_set",
|
||||
"message": "Download folder not configured.",
|
||||
})
|
||||
resp = uc.execute("download")
|
||||
assert resp.status == "error"
|
||||
assert resp.error == "folder_not_set"
|
||||
assert resp.message == "Download folder not configured."
|
||||
|
||||
def test_delegates_folder_type_and_path(self):
|
||||
fm = MagicMock()
|
||||
fm.list_folder.return_value = {
|
||||
"status": "ok",
|
||||
"folder_type": "tv_show",
|
||||
"path": "Breaking Bad",
|
||||
"entries": [],
|
||||
"count": 0,
|
||||
}
|
||||
uc = ListFolderUseCase(fm)
|
||||
uc.execute("tv_show", "Breaking Bad")
|
||||
fm.list_folder.assert_called_once_with("tv_show", "Breaking Bad")
|
||||
|
||||
def test_default_path_is_dot(self):
|
||||
fm = MagicMock()
|
||||
fm.list_folder.return_value = {
|
||||
"status": "ok", "folder_type": "download",
|
||||
"path": ".", "entries": [], "count": 0,
|
||||
}
|
||||
uc = ListFolderUseCase(fm)
|
||||
uc.execute("download")
|
||||
fm.list_folder.assert_called_once_with("download", ".")
|
||||
|
||||
def test_success_response_has_no_error(self):
|
||||
uc = self._use_case({
|
||||
"status": "ok",
|
||||
"folder_type": "movie",
|
||||
"path": ".",
|
||||
"entries": [],
|
||||
"count": 0,
|
||||
})
|
||||
resp = uc.execute("movie")
|
||||
assert resp.error is None
|
||||
|
||||
def test_error_response_has_no_entries(self):
|
||||
uc = self._use_case({
|
||||
"status": "error",
|
||||
"error": "not_found",
|
||||
"message": "Path does not exist",
|
||||
})
|
||||
resp = uc.execute("download", "some/path")
|
||||
assert resp.entries is None
|
||||
assert resp.count is None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# MoveMediaUseCase
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestMoveMediaUseCase:
|
||||
|
||||
def _use_case(self, fm_result):
|
||||
fm = MagicMock()
|
||||
fm.move_file.return_value = fm_result
|
||||
return MoveMediaUseCase(fm)
|
||||
|
||||
def test_success_returns_response(self, tmp_path):
|
||||
src = str(tmp_path / "src.mkv")
|
||||
dst = str(tmp_path / "dst.mkv")
|
||||
uc = self._use_case({
|
||||
"status": "ok",
|
||||
"source": src,
|
||||
"destination": dst,
|
||||
"filename": "dst.mkv",
|
||||
"size": 1024,
|
||||
})
|
||||
resp = uc.execute(src, dst)
|
||||
assert resp.status == "ok"
|
||||
assert resp.source == src
|
||||
assert resp.destination == dst
|
||||
assert resp.filename == "dst.mkv"
|
||||
assert resp.size == 1024
|
||||
|
||||
def test_error_propagates(self, tmp_path):
|
||||
uc = self._use_case({
|
||||
"status": "error",
|
||||
"error": "source_not_found",
|
||||
"message": "Source does not exist: /ghost.mkv",
|
||||
})
|
||||
resp = uc.execute("/ghost.mkv", str(tmp_path / "dst.mkv"))
|
||||
assert resp.status == "error"
|
||||
assert resp.error == "source_not_found"
|
||||
|
||||
def test_delegates_to_file_manager(self, tmp_path):
|
||||
src = "/downloads/movie.mkv"
|
||||
dst = "/movies/Movie.2024/movie.mkv"
|
||||
fm = MagicMock()
|
||||
fm.move_file.return_value = {
|
||||
"status": "ok", "source": src, "destination": dst,
|
||||
"filename": "movie.mkv", "size": 1,
|
||||
}
|
||||
uc = MoveMediaUseCase(fm)
|
||||
uc.execute(src, dst)
|
||||
fm.move_file.assert_called_once_with(src, dst)
|
||||
|
||||
def test_error_response_has_no_paths(self):
|
||||
uc = self._use_case({
|
||||
"status": "error",
|
||||
"error": "destination_exists",
|
||||
"message": "File already exists",
|
||||
})
|
||||
resp = uc.execute("/src.mkv", "/dst.mkv")
|
||||
assert resp.source is None
|
||||
assert resp.destination is None
|
||||
assert resp.filename is None
|
||||
|
||||
def test_to_dict_success(self, tmp_path):
|
||||
src = "/downloads/movie.mkv"
|
||||
dst = "/movies/movie.mkv"
|
||||
uc = self._use_case({
|
||||
"status": "ok",
|
||||
"source": src,
|
||||
"destination": dst,
|
||||
"filename": "movie.mkv",
|
||||
"size": 2048,
|
||||
})
|
||||
resp = uc.execute(src, dst)
|
||||
d = resp.to_dict()
|
||||
assert d["status"] == "ok"
|
||||
assert d["filename"] == "movie.mkv"
|
||||
assert d["size"] == 2048
|
||||
|
||||
def test_to_dict_error(self):
|
||||
uc = self._use_case({
|
||||
"status": "error",
|
||||
"error": "link_failed",
|
||||
"message": "Cross-device link not permitted",
|
||||
})
|
||||
resp = uc.execute("/src.mkv", "/dst.mkv")
|
||||
d = resp.to_dict()
|
||||
assert d["status"] == "error"
|
||||
assert "error" in d
|
||||
assert "message" in d
|
||||
@@ -0,0 +1,315 @@
|
||||
"""
|
||||
Tests for alfred.application.filesystem.resolve_destination
|
||||
|
||||
Uses a real temp filesystem + a real Memory instance (via conftest fixtures).
|
||||
No network calls — TMDB data is passed in directly.
|
||||
"""
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from alfred.application.filesystem.resolve_destination import (
|
||||
ResolveDestinationUseCase,
|
||||
_find_existing_series_folders,
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _use_case():
|
||||
return ResolveDestinationUseCase()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Movies
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestResolveMovie:
|
||||
|
||||
def test_basic_movie(self, memory_configured):
|
||||
result = _use_case().execute(
|
||||
release_name="Another.Round.2020.1080p.BluRay.x264-YTS",
|
||||
source_file="/downloads/Another.Round.2020.1080p.BluRay.x264-YTS/Another.Round.2020.1080p.BluRay.x264-YTS.mp4",
|
||||
tmdb_title="Another Round",
|
||||
tmdb_year=2020,
|
||||
)
|
||||
assert result.status == "ok"
|
||||
assert "Another.Round.2020" in result.series_folder_name
|
||||
assert "1080p.BluRay.x264-YTS" in result.series_folder_name
|
||||
assert result.filename.endswith(".mp4")
|
||||
assert result.season_folder is None
|
||||
|
||||
def test_movie_library_file_path_is_inside_series_folder(self, memory_configured):
|
||||
result = _use_case().execute(
|
||||
release_name="Revolver.2005.1080p.BluRay.x265-RARBG",
|
||||
source_file="/downloads/Revolver.2005.1080p.BluRay.x265-RARBG.mkv",
|
||||
tmdb_title="Revolver",
|
||||
tmdb_year=2005,
|
||||
)
|
||||
assert result.status == "ok"
|
||||
assert result.library_file.startswith(result.series_folder)
|
||||
|
||||
def test_movie_library_not_set(self, memory):
|
||||
# memory has no library paths configured
|
||||
result = _use_case().execute(
|
||||
release_name="Revolver.2005.1080p.BluRay.x265-RARBG",
|
||||
source_file="/downloads/Revolver.2005.1080p.BluRay.x265-RARBG.mkv",
|
||||
tmdb_title="Revolver",
|
||||
tmdb_year=2005,
|
||||
)
|
||||
assert result.status == "error"
|
||||
assert result.error == "library_not_set"
|
||||
|
||||
def test_movie_folder_marked_new(self, memory_configured):
|
||||
# No existing folder → is_new_series_folder = True
|
||||
result = _use_case().execute(
|
||||
release_name="Godzilla.Minus.One.2023.1080p.BluRay.x265-YTS",
|
||||
source_file="/downloads/Godzilla.Minus.One.2023.1080p.BluRay.x265-YTS.mp4",
|
||||
tmdb_title="Godzilla Minus One",
|
||||
tmdb_year=2023,
|
||||
)
|
||||
assert result.status == "ok"
|
||||
assert result.is_new_series_folder is True
|
||||
|
||||
def test_movie_sanitises_forbidden_chars_in_title(self, memory_configured):
|
||||
result = _use_case().execute(
|
||||
release_name="Alien.Earth.2024.1080p.WEBRip.x265-KONTRAST",
|
||||
source_file="/downloads/Alien.Earth.2024.1080p.WEBRip.x265-KONTRAST.mkv",
|
||||
tmdb_title="Alien: Earth",
|
||||
tmdb_year=2024,
|
||||
)
|
||||
assert result.status == "ok"
|
||||
assert ":" not in result.series_folder_name
|
||||
|
||||
def test_to_dict_ok(self, memory_configured):
|
||||
result = _use_case().execute(
|
||||
release_name="Revolver.2005.1080p.BluRay.x265-RARBG",
|
||||
source_file="/downloads/Revolver.mkv",
|
||||
tmdb_title="Revolver",
|
||||
tmdb_year=2005,
|
||||
)
|
||||
d = result.to_dict()
|
||||
assert d["status"] == "ok"
|
||||
assert "library_file" in d
|
||||
assert "series_folder_name" in d
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# TV shows — no existing folder
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestResolveTVShowNewFolder:
|
||||
|
||||
def test_oz_s01_creates_new_folder(self, memory_configured):
|
||||
result = _use_case().execute(
|
||||
release_name="Oz.S01.1080p.WEBRip.x265-KONTRAST",
|
||||
source_file="/downloads/Oz.S01.1080p.WEBRip.x265-KONTRAST/Oz.S01E01.1080p.WEBRip.x265-KONTRAST.mp4",
|
||||
tmdb_title="Oz",
|
||||
tmdb_year=1997,
|
||||
)
|
||||
assert result.status == "ok"
|
||||
assert result.is_new_series_folder is True
|
||||
assert result.series_folder_name == "Oz.1997.1080p.WEBRip.x265-KONTRAST"
|
||||
assert result.season_folder_name == "Oz.S01.1080p.WEBRip.x265-KONTRAST"
|
||||
|
||||
def test_tv_library_not_set(self, memory):
|
||||
result = _use_case().execute(
|
||||
release_name="Oz.S01.1080p.WEBRip.x265-KONTRAST",
|
||||
source_file="/downloads/Oz.S01E01.mp4",
|
||||
tmdb_title="Oz",
|
||||
tmdb_year=1997,
|
||||
)
|
||||
assert result.status == "error"
|
||||
assert result.error == "library_not_set"
|
||||
|
||||
def test_single_episode_filename(self, memory_configured):
|
||||
result = _use_case().execute(
|
||||
release_name="Fallout.2024.S02E01.1080p.x265-ELiTE",
|
||||
source_file="/downloads/Fallout.2024.S02E01.1080p.x265-ELiTE.mkv",
|
||||
tmdb_title="Fallout",
|
||||
tmdb_year=2024,
|
||||
tmdb_episode_title="The Beginning",
|
||||
)
|
||||
assert result.status == "ok"
|
||||
assert "S02E01" in result.filename
|
||||
assert "The.Beginning" in result.filename
|
||||
assert result.filename.endswith(".mkv")
|
||||
|
||||
def test_season_pack_filename_is_folder_name_plus_ext(self, memory_configured):
|
||||
result = _use_case().execute(
|
||||
release_name="Oz.S01.1080p.WEBRip.x265-KONTRAST",
|
||||
source_file="/downloads/Oz.S01.1080p.WEBRip.x265-KONTRAST/Oz.S01E01.mp4",
|
||||
tmdb_title="Oz",
|
||||
tmdb_year=1997,
|
||||
)
|
||||
assert result.status == "ok"
|
||||
# Season pack: filename = season_folder_name + ext
|
||||
assert result.filename == result.season_folder_name + ".mp4"
|
||||
|
||||
def test_library_file_is_inside_season_folder(self, memory_configured):
|
||||
result = _use_case().execute(
|
||||
release_name="Oz.S01.1080p.WEBRip.x265-KONTRAST",
|
||||
source_file="/downloads/Oz.S01E01.mp4",
|
||||
tmdb_title="Oz",
|
||||
tmdb_year=1997,
|
||||
)
|
||||
assert result.library_file.startswith(result.season_folder)
|
||||
assert result.season_folder.startswith(result.series_folder)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# TV shows — existing folder matching
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestResolveTVShowExistingFolder:
|
||||
|
||||
def _make_series_folder(self, tv_root, name):
|
||||
"""Create a series folder in the tv library."""
|
||||
import os
|
||||
path = tv_root / name
|
||||
path.mkdir(parents=True, exist_ok=True)
|
||||
return path
|
||||
|
||||
def test_uses_existing_single_folder(self, memory_configured, app_temp):
|
||||
"""When exactly one folder matches title+year, use it regardless of group."""
|
||||
from alfred.infrastructure.persistence import get_memory
|
||||
mem = get_memory()
|
||||
tv_root = Path(mem.ltm.library_paths.get("tv_show"))
|
||||
|
||||
existing = tv_root / "Oz.1997.1080p.WEBRip.x265-RARBG"
|
||||
existing.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
result = _use_case().execute(
|
||||
release_name="Oz.S02.1080p.WEBRip.x265-KONTRAST",
|
||||
source_file="/downloads/Oz.S02E01.mp4",
|
||||
tmdb_title="Oz",
|
||||
tmdb_year=1997,
|
||||
)
|
||||
assert result.status == "ok"
|
||||
assert result.series_folder_name == "Oz.1997.1080p.WEBRip.x265-RARBG"
|
||||
assert result.is_new_series_folder is False
|
||||
|
||||
def test_needs_clarification_on_multiple_folders(self, memory_configured, app_temp):
|
||||
"""When multiple folders match, return needs_clarification with options."""
|
||||
from alfred.infrastructure.persistence import get_memory
|
||||
mem = get_memory()
|
||||
tv_root = Path(mem.ltm.library_paths.get("tv_show"))
|
||||
|
||||
(tv_root / "Slow.Horses.2022.1080p.WEBRip.x265-RARBG").mkdir(parents=True, exist_ok=True)
|
||||
(tv_root / "Slow.Horses.2022.1080p.WEBRip.x265-KONTRAST").mkdir(parents=True, exist_ok=True)
|
||||
|
||||
result = _use_case().execute(
|
||||
release_name="Slow.Horses.S05.1080p.WEBRip.x265-KONTRAST",
|
||||
source_file="/downloads/Slow.Horses.S05E01.mkv",
|
||||
tmdb_title="Slow Horses",
|
||||
tmdb_year=2022,
|
||||
)
|
||||
assert result.status == "needs_clarification"
|
||||
assert result.question is not None
|
||||
assert len(result.options) == 2
|
||||
assert "Slow.Horses.2022.1080p.WEBRip.x265-RARBG" in result.options
|
||||
assert "Slow.Horses.2022.1080p.WEBRip.x265-KONTRAST" in result.options
|
||||
|
||||
def test_confirmed_folder_bypasses_detection(self, memory_configured, app_temp):
|
||||
"""confirmed_folder skips the folder search."""
|
||||
from alfred.infrastructure.persistence import get_memory
|
||||
mem = get_memory()
|
||||
tv_root = Path(mem.ltm.library_paths.get("tv_show"))
|
||||
chosen = "Slow.Horses.2022.1080p.WEBRip.x265-RARBG"
|
||||
(tv_root / chosen).mkdir(parents=True, exist_ok=True)
|
||||
|
||||
result = _use_case().execute(
|
||||
release_name="Slow.Horses.S05.1080p.WEBRip.x265-KONTRAST",
|
||||
source_file="/downloads/Slow.Horses.S05E01.mkv",
|
||||
tmdb_title="Slow Horses",
|
||||
tmdb_year=2022,
|
||||
confirmed_folder=chosen,
|
||||
)
|
||||
assert result.status == "ok"
|
||||
assert result.series_folder_name == chosen
|
||||
|
||||
def test_to_dict_needs_clarification(self, memory_configured, app_temp):
|
||||
from alfred.infrastructure.persistence import get_memory
|
||||
mem = get_memory()
|
||||
tv_root = Path(mem.ltm.library_paths.get("tv_show"))
|
||||
(tv_root / "Oz.1997.1080p.WEBRip.x265-RARBG").mkdir(parents=True, exist_ok=True)
|
||||
(tv_root / "Oz.1997.1080p.WEBRip.x265-KONTRAST").mkdir(parents=True, exist_ok=True)
|
||||
|
||||
result = _use_case().execute(
|
||||
release_name="Oz.S03.1080p.WEBRip.x265-KONTRAST",
|
||||
source_file="/downloads/Oz.S03E01.mp4",
|
||||
tmdb_title="Oz",
|
||||
tmdb_year=1997,
|
||||
)
|
||||
d = result.to_dict()
|
||||
assert d["status"] == "needs_clarification"
|
||||
assert "question" in d
|
||||
assert isinstance(d["options"], list)
|
||||
|
||||
def test_to_dict_error(self, memory):
|
||||
result = _use_case().execute(
|
||||
release_name="Oz.S01.1080p.WEBRip.x265-KONTRAST",
|
||||
source_file="/downloads/Oz.S01E01.mp4",
|
||||
tmdb_title="Oz",
|
||||
tmdb_year=1997,
|
||||
)
|
||||
d = result.to_dict()
|
||||
assert d["status"] == "error"
|
||||
assert "error" in d
|
||||
assert "message" in d
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# _find_existing_series_folders
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestFindExistingSeriesFolders:
|
||||
|
||||
def test_empty_library(self, tmp_path):
|
||||
assert _find_existing_series_folders(tmp_path, "Oz", 1997) == []
|
||||
|
||||
def test_nonexistent_root(self, tmp_path):
|
||||
assert _find_existing_series_folders(tmp_path / "nope", "Oz", 1997) == []
|
||||
|
||||
def test_single_match(self, tmp_path):
|
||||
(tmp_path / "Oz.1997.1080p.WEBRip.x265-KONTRAST").mkdir()
|
||||
result = _find_existing_series_folders(tmp_path, "Oz", 1997)
|
||||
assert result == ["Oz.1997.1080p.WEBRip.x265-KONTRAST"]
|
||||
|
||||
def test_multiple_matches(self, tmp_path):
|
||||
(tmp_path / "Oz.1997.1080p.WEBRip.x265-KONTRAST").mkdir()
|
||||
(tmp_path / "Oz.1997.1080p.WEBRip.x265-RARBG").mkdir()
|
||||
result = _find_existing_series_folders(tmp_path, "Oz", 1997)
|
||||
assert len(result) == 2
|
||||
assert sorted(result) == result # sorted
|
||||
|
||||
def test_no_match_different_year(self, tmp_path):
|
||||
(tmp_path / "Oz.1997.1080p.WEBRip.x265-KONTRAST").mkdir()
|
||||
result = _find_existing_series_folders(tmp_path, "Oz", 2000)
|
||||
assert result == []
|
||||
|
||||
def test_no_match_different_title(self, tmp_path):
|
||||
(tmp_path / "Oz.1997.1080p.WEBRip.x265-KONTRAST").mkdir()
|
||||
result = _find_existing_series_folders(tmp_path, "Breaking Bad", 2008)
|
||||
assert result == []
|
||||
|
||||
def test_ignores_files_not_dirs(self, tmp_path):
|
||||
(tmp_path / "Oz.1997.1080p.WEBRip.x265-KONTRAST").mkdir()
|
||||
(tmp_path / "Oz.1997.some.file.txt").touch()
|
||||
result = _find_existing_series_folders(tmp_path, "Oz", 1997)
|
||||
assert len(result) == 1
|
||||
|
||||
def test_case_insensitive_prefix(self, tmp_path):
|
||||
# Folder stored with mixed case
|
||||
(tmp_path / "OZ.1997.1080p.WEBRip.x265-KONTRAST").mkdir()
|
||||
result = _find_existing_series_folders(tmp_path, "Oz", 1997)
|
||||
assert len(result) == 1
|
||||
|
||||
def test_title_with_special_chars_sanitised(self, tmp_path):
|
||||
# "Star Wars: Andor" → sanitised (colon removed) + spaces→dots → "Star.Wars.Andor.2022"
|
||||
(tmp_path / "Star.Wars.Andor.2022.1080p.WEBRip.x265-GROUP").mkdir()
|
||||
result = _find_existing_series_folders(tmp_path, "Star Wars: Andor", 2022)
|
||||
assert len(result) == 1
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user