Compare commits
71 Commits
v0.1.4
...
de02bdea06
| Author | SHA1 | Date | |
|---|---|---|---|
| de02bdea06 | |||
| 62b5d0b998 | |||
| 610dee365c | |||
| 58408d0dbe | |||
| 2f1ac3c758 | |||
| d3b69f7459 | |||
| 50c8204fa0 | |||
| 507fe0f40e | |||
| b7b40eada1 | |||
| 9765386405 | |||
| aa89a3fb00 | |||
| 64aeb5fc80 | |||
| 9540520dc4 | |||
| 300ed387f5 | |||
| dea81de5b5 | |||
| 01a00a12af | |||
| 504d0162bb | |||
| cda23d074f | |||
| 0357108077 | |||
| ab1df3dd0f | |||
| c50091f6bf | |||
| 8b406370f1 | |||
| c56bf2b92c | |||
| b1507db4d0 | |||
| 3074962314 | |||
| 84799879bb | |||
| 1052c1b619 | |||
| 9958b8e848 | |||
| b15161dad7 | |||
| 52f025ae32 | |||
| 2cfe7a035b | |||
| 2441c2dc29 | |||
| 261a1f3918 | |||
| 253903a1e5 | |||
| 20a113e335 | |||
| fed83e7d79 | |||
| 3880a4ec49 | |||
| 6195abbaa5 | |||
| b132554631 | |||
| 561796cec1 | |||
| 26d90acc16 | |||
| d8234b2958 | |||
| 156d1fe567 | |||
| f8eee120cf | |||
| c5e4a5e1a7 | |||
| d10c9160f3 | |||
| 1f88e99e8b | |||
| e097a13221 | |||
| 086fff803d | |||
| 45fbf975b3 | |||
| b8f2798e29 | |||
| c762d91eb1 | |||
| 35a68387ab | |||
| 9b13c69631 | |||
| 2ca1ea29b2 | |||
| 5e86615bde | |||
| 6701a4b392 | |||
| 68372405d6 | |||
| f1ea0de247 | |||
| 974d008825 | |||
| 8a87d94e6d | |||
| ec99a501fc | |||
| c256b26601 | |||
| 56a3c1257d | |||
| 79d23f936a | |||
| f02e916d33 | |||
| 4e64c83c4b | |||
| 07cae9abd1 | |||
| 21b2dffc37 | |||
| 2d1055cccf | |||
| fdb2447862 |
@@ -1,5 +1,5 @@
|
|||||||
[tool.bumpversion]
|
[tool.bumpversion]
|
||||||
current_version = "0.1.4"
|
current_version = "0.1.7"
|
||||||
parse = "(?P<major>\\d+)\\.(?P<minor>\\d+)\\.(?P<patch>\\d+)"
|
parse = "(?P<major>\\d+)\\.(?P<minor>\\d+)\\.(?P<patch>\\d+)"
|
||||||
serialize = ["{major}.{minor}.{patch}"]
|
serialize = ["{major}.{minor}.{patch}"]
|
||||||
search = "{current_version}"
|
search = "{current_version}"
|
||||||
@@ -22,8 +22,7 @@ venv
|
|||||||
.venv
|
.venv
|
||||||
env
|
env
|
||||||
.env
|
.env
|
||||||
.env.*
|
.env-
|
||||||
|
|
||||||
# IDE
|
# IDE
|
||||||
.vscode
|
.vscode
|
||||||
.idea
|
.idea
|
||||||
@@ -41,11 +40,8 @@ docs/
|
|||||||
*.md
|
*.md
|
||||||
!README.md
|
!README.md
|
||||||
|
|
||||||
# Tests
|
# Data
|
||||||
tests/
|
data/
|
||||||
pytest.ini
|
|
||||||
|
|
||||||
# Data (will be mounted as volumes)
|
|
||||||
memory_data/
|
memory_data/
|
||||||
logs/
|
logs/
|
||||||
*.log
|
*.log
|
||||||
+77
@@ -0,0 +1,77 @@
|
|||||||
|
# --- IMPORTANT ---
|
||||||
|
# Settings are split across multiple files for clarity.
|
||||||
|
# Files (loaded in this order, last wins):
|
||||||
|
# .env.alfred — app config and service addresses (safe to commit)
|
||||||
|
# .env.secrets — generated secrets, passwords, URIs and API keys (DO NOT COMMIT)
|
||||||
|
# .env.make — build metadata synced from pyproject.toml (safe to commit)
|
||||||
|
#
|
||||||
|
# To customize: edit .env.alfred for config, .env.secrets for secrets.
|
||||||
|
|
||||||
|
# --- Alfred ---
|
||||||
|
MAX_HISTORY_MESSAGES=10
|
||||||
|
MAX_TOOL_ITERATIONS=10
|
||||||
|
REQUEST_TIMEOUT=30
|
||||||
|
|
||||||
|
# LLM Settings
|
||||||
|
LLM_TEMPERATURE=0.2
|
||||||
|
|
||||||
|
# Persistence
|
||||||
|
DATA_STORAGE_DIR=data
|
||||||
|
|
||||||
|
# Network
|
||||||
|
HOST=0.0.0.0
|
||||||
|
PORT=3080
|
||||||
|
|
||||||
|
# --- DATABASES ---
|
||||||
|
# Passwords and connection URIs are auto-generated in .env.secrets.
|
||||||
|
# Edit host/port/user/dbname here if needed.
|
||||||
|
|
||||||
|
# MongoDB (Application Data)
|
||||||
|
MONGO_HOST=mongodb
|
||||||
|
MONGO_PORT=27017
|
||||||
|
MONGO_USER=alfred
|
||||||
|
MONGO_DB_NAME=alfred
|
||||||
|
|
||||||
|
# PostgreSQL (Vector Database / RAG)
|
||||||
|
POSTGRES_HOST=vectordb
|
||||||
|
POSTGRES_PORT=5432
|
||||||
|
POSTGRES_USER=alfred
|
||||||
|
POSTGRES_DB_NAME=alfred
|
||||||
|
|
||||||
|
# --- EXTERNAL SERVICES ---
|
||||||
|
|
||||||
|
# TMDB — Media metadata (required). Get your key at https://www.themoviedb.org/
|
||||||
|
# → TMDB_API_KEY goes in .env.secrets
|
||||||
|
TMDB_BASE_URL=https://api.themoviedb.org/3
|
||||||
|
|
||||||
|
# qBittorrent
|
||||||
|
# → QBITTORRENT_PASSWORD goes in .env.secrets
|
||||||
|
QBITTORRENT_URL=http://qbittorrent:16140
|
||||||
|
QBITTORRENT_USERNAME=admin
|
||||||
|
QBITTORRENT_PORT=16140
|
||||||
|
|
||||||
|
# Meilisearch
|
||||||
|
# → MEILI_MASTER_KEY goes in .env.secrets
|
||||||
|
# MEILI_ENABLED=false # KEY DOESN'T EXISTS => SEARCH IS THE PROPER KEY
|
||||||
|
SEARCH=false
|
||||||
|
MEILI_NO_ANALYTICS=true
|
||||||
|
MEILI_HOST=http://meilisearch:7700
|
||||||
|
|
||||||
|
# --- LLM CONFIGURATION ---
|
||||||
|
# Providers: local, openai, anthropic, deepseek, google, kimi
|
||||||
|
# → API keys go in .env.secrets
|
||||||
|
DEFAULT_LLM_PROVIDER=local
|
||||||
|
|
||||||
|
# Local LLM (Ollama)
|
||||||
|
#OLLAMA_BASE_URL=http://ollama:11434
|
||||||
|
#OLLAMA_MODEL=llama3.3:latest
|
||||||
|
|
||||||
|
OLLAMA_BASE_URL=http://10.0.0.11:11434
|
||||||
|
OLLAMA_MODEL=glm-4.7-flash:latest
|
||||||
|
|
||||||
|
# --- RAG ENGINE ---
|
||||||
|
RAG_ENABLED=TRUE
|
||||||
|
RAG_API_URL=http://rag_api:8000
|
||||||
|
RAG_API_PORT=8000
|
||||||
|
EMBEDDINGS_PROVIDER=ollama
|
||||||
|
EMBEDDINGS_MODEL=nomic-embed-text
|
||||||
+61
-57
@@ -1,69 +1,73 @@
|
|||||||
# Agent Media - Environment Variables
|
# --- IMPORTANT ---
|
||||||
|
# Settings are split across multiple files for clarity.
|
||||||
|
# Files (loaded in this order, last wins):
|
||||||
|
# .env.alfred — app config and service addresses (safe to commit)
|
||||||
|
# .env.secrets — generated secrets, passwords, URIs and API keys (DO NOT COMMIT)
|
||||||
|
# .env.make — build metadata synced from pyproject.toml (safe to commit)
|
||||||
|
#
|
||||||
|
# To customize: edit .env.alfred for config, .env.secrets for secrets.
|
||||||
|
|
||||||
# LibreChat Security Keys
|
# --- Alfred ---
|
||||||
# Generate secure keys with: openssl rand -base64 32
|
MAX_HISTORY_MESSAGES=10
|
||||||
JWT_SECRET=your-super-secret-jwt-key-change-this-in-production
|
MAX_TOOL_ITERATIONS=10
|
||||||
JWT_REFRESH_SECRET=your-super-secret-refresh-key-change-this-too
|
REQUEST_TIMEOUT=30
|
||||||
|
|
||||||
# Generate with: openssl rand -hex 16 (for CREDS_KEY)
|
# LLM Settings
|
||||||
CREDS_KEY=your-32-character-secret-key-here
|
LLM_TEMPERATURE=0.2
|
||||||
|
|
||||||
# Generate with: openssl rand -hex 8 (for CREDS_IV)
|
# Persistence
|
||||||
CREDS_IV=your-16-character-iv-here
|
DATA_STORAGE_DIR=data
|
||||||
|
|
||||||
# LibreChat Configuration
|
# Network
|
||||||
DOMAIN_CLIENT=http://localhost:3080
|
HOST=0.0.0.0
|
||||||
DOMAIN_SERVER=http://localhost:3080
|
PORT=3080
|
||||||
|
|
||||||
# Session expiry (in milliseconds)
|
# --- DATABASES ---
|
||||||
# Default: 15 minutes
|
# Passwords and connection URIs are auto-generated in .env.secrets.
|
||||||
SESSION_EXPIRY=900000
|
# Edit host/port/user/dbname here if needed.
|
||||||
|
|
||||||
# Refresh token expiry (in milliseconds)
|
# MongoDB (Application Data)
|
||||||
# Default: 7 days
|
MONGO_HOST=mongodb
|
||||||
REFRESH_TOKEN_EXPIRY=604800000
|
MONGO_PORT=27017
|
||||||
|
MONGO_USER=alfred
|
||||||
|
MONGO_DB_NAME=LibreChat
|
||||||
|
|
||||||
# Meilisearch Configuration
|
# PostgreSQL (Vector Database / RAG)
|
||||||
# Master key for Meilisearch (generate with: openssl rand -base64 32)
|
POSTGRES_HOST=vectordb
|
||||||
MEILI_MASTER_KEY=DrhYf7zENyR6AlUCKmnz0eYASOQdl6zxH7s7MKFSfFU
|
POSTGRES_PORT=5432
|
||||||
|
POSTGRES_USER=alfred
|
||||||
|
POSTGRES_DB_NAME=alfred
|
||||||
|
|
||||||
# PostgreSQL Configuration (for RAG API)
|
# --- EXTERNAL SERVICES ---
|
||||||
POSTGRES_DB=librechat_rag
|
|
||||||
POSTGRES_USER=postgres
|
|
||||||
POSTGRES_PASSWORD=postgres
|
|
||||||
|
|
||||||
# RAG API Configuration (Vector Database)
|
# TMDB — Media metadata (required). Get your key at https://www.themoviedb.org/
|
||||||
RAG_COLLECTION_NAME=testcollection
|
# → TMDB_API_KEY goes in .env.secrets
|
||||||
RAG_EMBEDDINGS_PROVIDER=openai
|
TMDB_BASE_URL=https://api.themoviedb.org/3
|
||||||
RAG_EMBEDDINGS_MODEL=text-embedding-3-small
|
|
||||||
|
|
||||||
# API Keys
|
# qBittorrent
|
||||||
# OpenAI API Key (required for RAG embeddings)
|
# → QBITTORRENT_PASSWORD goes in .env.secrets
|
||||||
OPENAI_API_KEY=your-openai-api-key-here
|
QBITTORRENT_URL=http://qbittorrent:16140
|
||||||
|
|
||||||
# Deepseek API Key (for LLM in agent-brain)
|
|
||||||
DEEPSEEK_API_KEY=your-deepseek-api-key-here
|
|
||||||
|
|
||||||
# Agent Brain Configuration
|
|
||||||
|
|
||||||
# LLM Provider (deepseek or ollama)
|
|
||||||
LLM_PROVIDER=deepseek
|
|
||||||
|
|
||||||
# Memory storage directory (inside container)
|
|
||||||
MEMORY_STORAGE_DIR=/data/memory
|
|
||||||
|
|
||||||
# API Key for agent-brain (used by LibreChat custom endpoint)
|
|
||||||
AGENT_BRAIN_API_KEY=agent-brain-secret-key
|
|
||||||
|
|
||||||
# External Services (Optional)
|
|
||||||
# TMDB API Key (for movie metadata)
|
|
||||||
TMDB_API_KEY=your-tmdb-key
|
|
||||||
|
|
||||||
# qBittorrent Configuration
|
|
||||||
QBITTORRENT_URL=http://localhost:8080
|
|
||||||
QBITTORRENT_USERNAME=admin
|
QBITTORRENT_USERNAME=admin
|
||||||
QBITTORRENT_PASSWORD=adminpass
|
QBITTORRENT_PORT=16140
|
||||||
|
|
||||||
# Debug Options
|
# Meilisearch
|
||||||
DEBUG_LOGGING=false
|
# → MEILI_MASTER_KEY goes in .env.secrets
|
||||||
DEBUG_CONSOLE=false
|
MEILI_ENABLED=FALSE
|
||||||
|
MEILI_NO_ANALYTICS=TRUE
|
||||||
|
MEILI_HOST=http://meilisearch:7700
|
||||||
|
|
||||||
|
# --- LLM CONFIGURATION ---
|
||||||
|
# Providers: local, openai, anthropic, deepseek, google, kimi
|
||||||
|
# → API keys go in .env.secrets
|
||||||
|
DEFAULT_LLM_PROVIDER=local
|
||||||
|
|
||||||
|
# Local LLM (Ollama)
|
||||||
|
OLLAMA_BASE_URL=http://ollama:11434
|
||||||
|
OLLAMA_MODEL=llama3.3:latest
|
||||||
|
|
||||||
|
# --- RAG ENGINE ---
|
||||||
|
RAG_ENABLED=TRUE
|
||||||
|
RAG_API_URL=http://rag_api:8000
|
||||||
|
RAG_API_PORT=8000
|
||||||
|
EMBEDDINGS_PROVIDER=ollama
|
||||||
|
EMBEDDINGS_MODEL=nomic-embed-text
|
||||||
|
|||||||
+878
@@ -0,0 +1,878 @@
|
|||||||
|
#=====================================================================#
|
||||||
|
# LibreChat Configuration #
|
||||||
|
#=====================================================================#
|
||||||
|
# Please refer to the reference documentation for assistance #
|
||||||
|
# with configuring your LibreChat environment. #
|
||||||
|
# #
|
||||||
|
# https://www.librechat.ai/docs/configuration/dotenv #
|
||||||
|
#=====================================================================#
|
||||||
|
|
||||||
|
#==================================================#
|
||||||
|
# Server Configuration #
|
||||||
|
#==================================================#
|
||||||
|
|
||||||
|
HOST=localhost
|
||||||
|
PORT=3080
|
||||||
|
|
||||||
|
MONGO_URI=mongodb://127.0.0.1:27017/LibreChat
|
||||||
|
#The maximum number of connections in the connection pool. */
|
||||||
|
MONGO_MAX_POOL_SIZE=
|
||||||
|
#The minimum number of connections in the connection pool. */
|
||||||
|
MONGO_MIN_POOL_SIZE=
|
||||||
|
#The maximum number of connections that may be in the process of being established concurrently by the connection pool. */
|
||||||
|
MONGO_MAX_CONNECTING=
|
||||||
|
#The maximum number of milliseconds that a connection can remain idle in the pool before being removed and closed. */
|
||||||
|
MONGO_MAX_IDLE_TIME_MS=
|
||||||
|
#The maximum time in milliseconds that a thread can wait for a connection to become available. */
|
||||||
|
MONGO_WAIT_QUEUE_TIMEOUT_MS=
|
||||||
|
# Set to false to disable automatic index creation for all models associated with this connection. */
|
||||||
|
MONGO_AUTO_INDEX=
|
||||||
|
# Set to `false` to disable Mongoose automatically calling `createCollection()` on every model created on this connection. */
|
||||||
|
MONGO_AUTO_CREATE=
|
||||||
|
|
||||||
|
DOMAIN_CLIENT=http://localhost:3080
|
||||||
|
DOMAIN_SERVER=http://localhost:3080
|
||||||
|
|
||||||
|
NO_INDEX=true
|
||||||
|
# Use the address that is at most n number of hops away from the Express application.
|
||||||
|
# req.socket.remoteAddress is the first hop, and the rest are looked for in the X-Forwarded-For header from right to left.
|
||||||
|
# A value of 0 means that the first untrusted address would be req.socket.remoteAddress, i.e. there is no reverse proxy.
|
||||||
|
# Defaulted to 1.
|
||||||
|
TRUST_PROXY=1
|
||||||
|
|
||||||
|
# Minimum password length for user authentication
|
||||||
|
# Default: 8
|
||||||
|
# Note: When using LDAP authentication, you may want to set this to 1
|
||||||
|
# to bypass local password validation, as LDAP servers handle their own
|
||||||
|
# password policies.
|
||||||
|
# MIN_PASSWORD_LENGTH=8
|
||||||
|
|
||||||
|
# When enabled, the app will continue running after encountering uncaught exceptions
|
||||||
|
# instead of exiting the process. Not recommended for production unless necessary.
|
||||||
|
# CONTINUE_ON_UNCAUGHT_EXCEPTION=false
|
||||||
|
|
||||||
|
#===============#
|
||||||
|
# JSON Logging #
|
||||||
|
#===============#
|
||||||
|
|
||||||
|
# Use when process console logs in cloud deployment like GCP/AWS
|
||||||
|
CONSOLE_JSON=false
|
||||||
|
|
||||||
|
#===============#
|
||||||
|
# Debug Logging #
|
||||||
|
#===============#
|
||||||
|
|
||||||
|
DEBUG_LOGGING=true
|
||||||
|
DEBUG_CONSOLE=false
|
||||||
|
# Set to true to enable agent debug logging
|
||||||
|
AGENT_DEBUG_LOGGING=false
|
||||||
|
|
||||||
|
# Enable memory diagnostics (logs heap/RSS snapshots every 60s, auto-enabled with --inspect)
|
||||||
|
# MEM_DIAG=true
|
||||||
|
|
||||||
|
#=============#
|
||||||
|
# Permissions #
|
||||||
|
#=============#
|
||||||
|
|
||||||
|
# UID=1000
|
||||||
|
# GID=1000
|
||||||
|
|
||||||
|
#==============#
|
||||||
|
# Node Options #
|
||||||
|
#==============#
|
||||||
|
|
||||||
|
# NOTE: NODE_MAX_OLD_SPACE_SIZE is NOT recognized by Node.js directly.
|
||||||
|
# This variable is used as a build argument for Docker or CI/CD workflows,
|
||||||
|
# and is NOT used by Node.js to set the heap size at runtime.
|
||||||
|
# To configure Node.js memory, use NODE_OPTIONS, e.g.:
|
||||||
|
# NODE_OPTIONS="--max-old-space-size=6144"
|
||||||
|
# See: https://nodejs.org/api/cli.html#--max-old-space-sizesize-in-mib
|
||||||
|
NODE_MAX_OLD_SPACE_SIZE=6144
|
||||||
|
|
||||||
|
#===============#
|
||||||
|
# Configuration #
|
||||||
|
#===============#
|
||||||
|
# Use an absolute path, a relative path, or a URL
|
||||||
|
|
||||||
|
# CONFIG_PATH="/alternative/path/to/librechat.yaml"
|
||||||
|
|
||||||
|
#==================#
|
||||||
|
# Langfuse Tracing #
|
||||||
|
#==================#
|
||||||
|
|
||||||
|
# Get Langfuse API keys for your project from the project settings page: https://cloud.langfuse.com
|
||||||
|
|
||||||
|
# LANGFUSE_PUBLIC_KEY=
|
||||||
|
# LANGFUSE_SECRET_KEY=
|
||||||
|
# LANGFUSE_BASE_URL=
|
||||||
|
|
||||||
|
#===================================================#
|
||||||
|
# Endpoints #
|
||||||
|
#===================================================#
|
||||||
|
|
||||||
|
# ENDPOINTS=openAI,assistants,azureOpenAI,google,anthropic
|
||||||
|
|
||||||
|
PROXY=
|
||||||
|
|
||||||
|
#===================================#
|
||||||
|
# Known Endpoints - librechat.yaml #
|
||||||
|
#===================================#
|
||||||
|
# https://www.librechat.ai/docs/configuration/librechat_yaml/ai_endpoints
|
||||||
|
|
||||||
|
# ANYSCALE_API_KEY=
|
||||||
|
# APIPIE_API_KEY=
|
||||||
|
# COHERE_API_KEY=
|
||||||
|
# DEEPSEEK_API_KEY=
|
||||||
|
# DATABRICKS_API_KEY=
|
||||||
|
# FIREWORKS_API_KEY=
|
||||||
|
# GROQ_API_KEY=
|
||||||
|
# HUGGINGFACE_TOKEN=
|
||||||
|
# MISTRAL_API_KEY=
|
||||||
|
# OPENROUTER_KEY=
|
||||||
|
# PERPLEXITY_API_KEY=
|
||||||
|
# SHUTTLEAI_API_KEY=
|
||||||
|
# TOGETHERAI_API_KEY=
|
||||||
|
# UNIFY_API_KEY=
|
||||||
|
# XAI_API_KEY=
|
||||||
|
|
||||||
|
#============#
|
||||||
|
# Anthropic #
|
||||||
|
#============#
|
||||||
|
|
||||||
|
ANTHROPIC_API_KEY=user_provided
|
||||||
|
# ANTHROPIC_MODELS=claude-sonnet-4-6,claude-opus-4-6,claude-opus-4-20250514,claude-sonnet-4-20250514,claude-3-7-sonnet-20250219,claude-3-5-sonnet-20241022,claude-3-5-haiku-20241022,claude-3-opus-20240229,claude-3-sonnet-20240229,claude-3-haiku-20240307
|
||||||
|
# ANTHROPIC_REVERSE_PROXY=
|
||||||
|
|
||||||
|
# Set to true to use Anthropic models through Google Vertex AI instead of direct API
|
||||||
|
# ANTHROPIC_USE_VERTEX=
|
||||||
|
# ANTHROPIC_VERTEX_REGION=us-east5
|
||||||
|
|
||||||
|
#============#
|
||||||
|
# Azure #
|
||||||
|
#============#
|
||||||
|
|
||||||
|
# Note: these variables are DEPRECATED
|
||||||
|
# Use the `librechat.yaml` configuration for `azureOpenAI` instead
|
||||||
|
# You may also continue to use them if you opt out of using the `librechat.yaml` configuration
|
||||||
|
|
||||||
|
# AZURE_OPENAI_DEFAULT_MODEL=gpt-3.5-turbo # Deprecated
|
||||||
|
# AZURE_OPENAI_MODELS=gpt-3.5-turbo,gpt-4 # Deprecated
|
||||||
|
# AZURE_USE_MODEL_AS_DEPLOYMENT_NAME=TRUE # Deprecated
|
||||||
|
# AZURE_API_KEY= # Deprecated
|
||||||
|
# AZURE_OPENAI_API_INSTANCE_NAME= # Deprecated
|
||||||
|
# AZURE_OPENAI_API_DEPLOYMENT_NAME= # Deprecated
|
||||||
|
# AZURE_OPENAI_API_VERSION= # Deprecated
|
||||||
|
# AZURE_OPENAI_API_COMPLETIONS_DEPLOYMENT_NAME= # Deprecated
|
||||||
|
# AZURE_OPENAI_API_EMBEDDINGS_DEPLOYMENT_NAME= # Deprecated
|
||||||
|
|
||||||
|
#=================#
|
||||||
|
# AWS Bedrock #
|
||||||
|
#=================#
|
||||||
|
|
||||||
|
# BEDROCK_AWS_DEFAULT_REGION=us-east-1 # A default region must be provided
|
||||||
|
# BEDROCK_AWS_ACCESS_KEY_ID=someAccessKey
|
||||||
|
# BEDROCK_AWS_SECRET_ACCESS_KEY=someSecretAccessKey
|
||||||
|
# BEDROCK_AWS_SESSION_TOKEN=someSessionToken
|
||||||
|
|
||||||
|
# Note: This example list is not meant to be exhaustive. If omitted, all known, supported model IDs will be included for you.
|
||||||
|
# BEDROCK_AWS_MODELS=anthropic.claude-sonnet-4-6,anthropic.claude-opus-4-6-v1,anthropic.claude-3-5-sonnet-20240620-v1:0,meta.llama3-1-8b-instruct-v1:0
|
||||||
|
# Cross-region inference model IDs: us.anthropic.claude-sonnet-4-6,us.anthropic.claude-opus-4-6-v1,global.anthropic.claude-opus-4-6-v1
|
||||||
|
|
||||||
|
# See all Bedrock model IDs here: https://docs.aws.amazon.com/bedrock/latest/userguide/model-ids.html#model-ids-arns
|
||||||
|
|
||||||
|
# Notes on specific models:
|
||||||
|
# The following models are not support due to not supporting streaming:
|
||||||
|
# ai21.j2-mid-v1
|
||||||
|
|
||||||
|
# The following models are not support due to not supporting conversation history:
|
||||||
|
# ai21.j2-ultra-v1, cohere.command-text-v14, cohere.command-light-text-v14
|
||||||
|
|
||||||
|
#============#
|
||||||
|
# Google #
|
||||||
|
#============#
|
||||||
|
|
||||||
|
GOOGLE_KEY=user_provided
|
||||||
|
|
||||||
|
# GOOGLE_REVERSE_PROXY=
|
||||||
|
# Some reverse proxies do not support the X-goog-api-key header, uncomment to pass the API key in Authorization header instead.
|
||||||
|
# GOOGLE_AUTH_HEADER=true
|
||||||
|
|
||||||
|
# Gemini API (AI Studio)
|
||||||
|
# GOOGLE_MODELS=gemini-3.1-pro-preview,gemini-3.1-pro-preview-customtools,gemini-3.1-flash-lite-preview,gemini-2.5-pro,gemini-2.5-flash,gemini-2.5-flash-lite,gemini-2.0-flash,gemini-2.0-flash-lite
|
||||||
|
|
||||||
|
# Vertex AI
|
||||||
|
# GOOGLE_MODELS=gemini-3.1-pro-preview,gemini-3.1-pro-preview-customtools,gemini-3.1-flash-lite-preview,gemini-2.5-pro,gemini-2.5-flash,gemini-2.5-flash-lite,gemini-2.0-flash-001,gemini-2.0-flash-lite-001
|
||||||
|
|
||||||
|
# GOOGLE_TITLE_MODEL=gemini-2.0-flash-lite-001
|
||||||
|
|
||||||
|
# Google Cloud region for Vertex AI (used by both chat and image generation)
|
||||||
|
# GOOGLE_LOC=us-central1
|
||||||
|
|
||||||
|
# Alternative region env var for Gemini Image Generation
|
||||||
|
# GOOGLE_CLOUD_LOCATION=global
|
||||||
|
|
||||||
|
# Vertex AI Service Account Configuration
|
||||||
|
# Path to your Google Cloud service account JSON file
|
||||||
|
# GOOGLE_SERVICE_KEY_FILE=/path/to/service-account.json
|
||||||
|
|
||||||
|
# Google Safety Settings
|
||||||
|
# NOTE: These settings apply to both Vertex AI and Gemini API (AI Studio)
|
||||||
|
#
|
||||||
|
# For Vertex AI:
|
||||||
|
# To use the BLOCK_NONE setting, you need either:
|
||||||
|
# (a) Access through an allowlist via your Google account team, or
|
||||||
|
# (b) Switch to monthly invoiced billing: https://cloud.google.com/billing/docs/how-to/invoiced-billing
|
||||||
|
#
|
||||||
|
# For Gemini API (AI Studio):
|
||||||
|
# BLOCK_NONE is available by default, no special account requirements.
|
||||||
|
#
|
||||||
|
# Available options: BLOCK_NONE, BLOCK_ONLY_HIGH, BLOCK_MEDIUM_AND_ABOVE, BLOCK_LOW_AND_ABOVE
|
||||||
|
#
|
||||||
|
# GOOGLE_SAFETY_SEXUALLY_EXPLICIT=BLOCK_ONLY_HIGH
|
||||||
|
# GOOGLE_SAFETY_HATE_SPEECH=BLOCK_ONLY_HIGH
|
||||||
|
# GOOGLE_SAFETY_HARASSMENT=BLOCK_ONLY_HIGH
|
||||||
|
# GOOGLE_SAFETY_DANGEROUS_CONTENT=BLOCK_ONLY_HIGH
|
||||||
|
# GOOGLE_SAFETY_CIVIC_INTEGRITY=BLOCK_ONLY_HIGH
|
||||||
|
|
||||||
|
#========================#
|
||||||
|
# Gemini Image Generation #
|
||||||
|
#========================#
|
||||||
|
|
||||||
|
# Gemini Image Generation Tool (for Agents)
|
||||||
|
# Supports multiple authentication methods in priority order:
|
||||||
|
# 1. User-provided API key (via GUI)
|
||||||
|
# 2. GEMINI_API_KEY env var (admin-configured)
|
||||||
|
# 3. GOOGLE_KEY env var (shared with Google chat endpoint)
|
||||||
|
# 4. Vertex AI service account (via GOOGLE_SERVICE_KEY_FILE)
|
||||||
|
|
||||||
|
# Option A: Use dedicated Gemini API key for image generation
|
||||||
|
# GEMINI_API_KEY=your-gemini-api-key
|
||||||
|
|
||||||
|
# Vertex AI model for image generation (defaults to gemini-2.5-flash-image)
|
||||||
|
# GEMINI_IMAGE_MODEL=gemini-2.5-flash-image
|
||||||
|
|
||||||
|
#============#
|
||||||
|
# OpenAI #
|
||||||
|
#============#
|
||||||
|
|
||||||
|
OPENAI_API_KEY=user_provided
|
||||||
|
# OPENAI_MODELS=gpt-5,gpt-5-codex,gpt-5-mini,gpt-5-nano,o3-pro,o3,o4-mini,gpt-4.1,gpt-4.1-mini,gpt-4.1-nano,o3-mini,o1-pro,o1,gpt-4o,gpt-4o-mini
|
||||||
|
|
||||||
|
DEBUG_OPENAI=false
|
||||||
|
|
||||||
|
# TITLE_CONVO=false
|
||||||
|
# OPENAI_TITLE_MODEL=gpt-4o-mini
|
||||||
|
|
||||||
|
# OPENAI_SUMMARIZE=true
|
||||||
|
# OPENAI_SUMMARY_MODEL=gpt-4o-mini
|
||||||
|
|
||||||
|
# OPENAI_FORCE_PROMPT=true
|
||||||
|
|
||||||
|
# OPENAI_REVERSE_PROXY=
|
||||||
|
|
||||||
|
# OPENAI_ORGANIZATION=
|
||||||
|
|
||||||
|
#====================#
|
||||||
|
# Assistants API #
|
||||||
|
#====================#
|
||||||
|
|
||||||
|
ASSISTANTS_API_KEY=user_provided
|
||||||
|
# ASSISTANTS_BASE_URL=
|
||||||
|
# ASSISTANTS_MODELS=gpt-4o,gpt-4o-mini,gpt-3.5-turbo-0125,gpt-3.5-turbo-16k-0613,gpt-3.5-turbo-16k,gpt-3.5-turbo,gpt-4,gpt-4-0314,gpt-4-32k-0314,gpt-4-0613,gpt-3.5-turbo-0613,gpt-3.5-turbo-1106,gpt-4-0125-preview,gpt-4-turbo-preview,gpt-4-1106-preview
|
||||||
|
|
||||||
|
#==========================#
|
||||||
|
# Azure Assistants API #
|
||||||
|
#==========================#
|
||||||
|
|
||||||
|
# Note: You should map your credentials with custom variables according to your Azure OpenAI Configuration
|
||||||
|
# The models for Azure Assistants are also determined by your Azure OpenAI configuration.
|
||||||
|
|
||||||
|
# More info, including how to enable use of Assistants with Azure here:
|
||||||
|
# https://www.librechat.ai/docs/configuration/librechat_yaml/ai_endpoints/azure#using-assistants-with-azure
|
||||||
|
|
||||||
|
CREDS_KEY=f34be427ebb29de8d88c107a71546019685ed8b241d8f2ed00c3df97ad2566f0
|
||||||
|
CREDS_IV=e2341419ec3dd3d19b13a1a87fafcbfb
|
||||||
|
|
||||||
|
# Azure AI Search
|
||||||
|
#-----------------
|
||||||
|
AZURE_AI_SEARCH_SERVICE_ENDPOINT=
|
||||||
|
AZURE_AI_SEARCH_INDEX_NAME=
|
||||||
|
AZURE_AI_SEARCH_API_KEY=
|
||||||
|
|
||||||
|
AZURE_AI_SEARCH_API_VERSION=
|
||||||
|
AZURE_AI_SEARCH_SEARCH_OPTION_QUERY_TYPE=
|
||||||
|
AZURE_AI_SEARCH_SEARCH_OPTION_TOP=
|
||||||
|
AZURE_AI_SEARCH_SEARCH_OPTION_SELECT=
|
||||||
|
|
||||||
|
# OpenAI Image Tools Customization
|
||||||
|
#----------------
|
||||||
|
# IMAGE_GEN_OAI_API_KEY= # Create or reuse OpenAI API key for image generation tool
|
||||||
|
# IMAGE_GEN_OAI_BASEURL= # Custom OpenAI base URL for image generation tool
|
||||||
|
# IMAGE_GEN_OAI_AZURE_API_VERSION= # Custom Azure OpenAI deployments
|
||||||
|
# IMAGE_GEN_OAI_MODEL=gpt-image-1 # OpenAI image model (e.g., gpt-image-1, gpt-image-1.5)
|
||||||
|
# IMAGE_GEN_OAI_DESCRIPTION=
|
||||||
|
# IMAGE_GEN_OAI_DESCRIPTION_WITH_FILES=Custom description for image generation tool when files are present
|
||||||
|
# IMAGE_GEN_OAI_DESCRIPTION_NO_FILES=Custom description for image generation tool when no files are present
|
||||||
|
# IMAGE_EDIT_OAI_DESCRIPTION=Custom description for image editing tool
|
||||||
|
# IMAGE_GEN_OAI_PROMPT_DESCRIPTION=Custom prompt description for image generation tool
|
||||||
|
# IMAGE_EDIT_OAI_PROMPT_DESCRIPTION=Custom prompt description for image editing tool
|
||||||
|
|
||||||
|
# DALL·E
|
||||||
|
#----------------
|
||||||
|
# DALLE_API_KEY=
|
||||||
|
# DALLE3_API_KEY=
|
||||||
|
# DALLE2_API_KEY=
|
||||||
|
# DALLE3_SYSTEM_PROMPT=
|
||||||
|
# DALLE2_SYSTEM_PROMPT=
|
||||||
|
# DALLE_REVERSE_PROXY=
|
||||||
|
# DALLE3_BASEURL=
|
||||||
|
# DALLE2_BASEURL=
|
||||||
|
|
||||||
|
# DALL·E (via Azure OpenAI)
|
||||||
|
# Note: requires some of the variables above to be set
|
||||||
|
#----------------
|
||||||
|
# DALLE3_AZURE_API_VERSION=
|
||||||
|
# DALLE2_AZURE_API_VERSION=
|
||||||
|
|
||||||
|
# Flux
|
||||||
|
#-----------------
|
||||||
|
FLUX_API_BASE_URL=https://api.us1.bfl.ai
|
||||||
|
# FLUX_API_BASE_URL = 'https://api.bfl.ml';
|
||||||
|
|
||||||
|
# Get your API key at https://api.us1.bfl.ai/auth/profile
|
||||||
|
# FLUX_API_KEY=
|
||||||
|
|
||||||
|
# Google
|
||||||
|
#-----------------
|
||||||
|
GOOGLE_SEARCH_API_KEY=
|
||||||
|
GOOGLE_CSE_ID=
|
||||||
|
|
||||||
|
# Stable Diffusion
|
||||||
|
#-----------------
|
||||||
|
SD_WEBUI_URL=http://host.docker.internal:7860
|
||||||
|
|
||||||
|
# Tavily
|
||||||
|
#-----------------
|
||||||
|
TAVILY_API_KEY=
|
||||||
|
|
||||||
|
# Traversaal
|
||||||
|
#-----------------
|
||||||
|
TRAVERSAAL_API_KEY=
|
||||||
|
|
||||||
|
# WolframAlpha
|
||||||
|
#-----------------
|
||||||
|
WOLFRAM_APP_ID=
|
||||||
|
|
||||||
|
# Zapier
|
||||||
|
#-----------------
|
||||||
|
ZAPIER_NLA_API_KEY=
|
||||||
|
|
||||||
|
#==================================================#
|
||||||
|
# Search #
|
||||||
|
#==================================================#
|
||||||
|
|
||||||
|
SEARCH=true
|
||||||
|
MEILI_NO_ANALYTICS=true
|
||||||
|
MEILI_HOST=http://0.0.0.0:7700
|
||||||
|
MEILI_MASTER_KEY=DrhYf7zENyR6AlUCKmnz0eYASOQdl6zxH7s7MKFSfFCt
|
||||||
|
|
||||||
|
# Optional: Disable indexing, useful in a multi-node setup
|
||||||
|
# where only one instance should perform an index sync.
|
||||||
|
# MEILI_NO_SYNC=true
|
||||||
|
|
||||||
|
#==================================================#
|
||||||
|
# Speech to Text & Text to Speech #
|
||||||
|
#==================================================#
|
||||||
|
|
||||||
|
STT_API_KEY=
|
||||||
|
TTS_API_KEY=
|
||||||
|
|
||||||
|
#==================================================#
|
||||||
|
# RAG #
|
||||||
|
#==================================================#
|
||||||
|
# More info: https://www.librechat.ai/docs/configuration/rag_api
|
||||||
|
|
||||||
|
# RAG_OPENAI_BASEURL=
|
||||||
|
# RAG_OPENAI_API_KEY=
|
||||||
|
# RAG_USE_FULL_CONTEXT=
|
||||||
|
# EMBEDDINGS_PROVIDER=openai
|
||||||
|
# EMBEDDINGS_MODEL=text-embedding-3-small
|
||||||
|
|
||||||
|
#===================================================#
|
||||||
|
# User System #
|
||||||
|
#===================================================#
|
||||||
|
|
||||||
|
#========================#
|
||||||
|
# Moderation #
|
||||||
|
#========================#
|
||||||
|
|
||||||
|
OPENAI_MODERATION=false
|
||||||
|
OPENAI_MODERATION_API_KEY=
|
||||||
|
# OPENAI_MODERATION_REVERSE_PROXY=
|
||||||
|
|
||||||
|
BAN_VIOLATIONS=true
|
||||||
|
BAN_DURATION=1000 * 60 * 60 * 2
|
||||||
|
BAN_INTERVAL=20
|
||||||
|
|
||||||
|
LOGIN_VIOLATION_SCORE=1
|
||||||
|
REGISTRATION_VIOLATION_SCORE=1
|
||||||
|
CONCURRENT_VIOLATION_SCORE=1
|
||||||
|
MESSAGE_VIOLATION_SCORE=1
|
||||||
|
NON_BROWSER_VIOLATION_SCORE=20
|
||||||
|
TTS_VIOLATION_SCORE=0
|
||||||
|
STT_VIOLATION_SCORE=0
|
||||||
|
FORK_VIOLATION_SCORE=0
|
||||||
|
IMPORT_VIOLATION_SCORE=0
|
||||||
|
FILE_UPLOAD_VIOLATION_SCORE=0
|
||||||
|
|
||||||
|
LOGIN_MAX=7
|
||||||
|
LOGIN_WINDOW=5
|
||||||
|
REGISTER_MAX=5
|
||||||
|
REGISTER_WINDOW=60
|
||||||
|
|
||||||
|
LIMIT_CONCURRENT_MESSAGES=true
|
||||||
|
CONCURRENT_MESSAGE_MAX=2
|
||||||
|
|
||||||
|
LIMIT_MESSAGE_IP=true
|
||||||
|
MESSAGE_IP_MAX=40
|
||||||
|
MESSAGE_IP_WINDOW=1
|
||||||
|
|
||||||
|
LIMIT_MESSAGE_USER=false
|
||||||
|
MESSAGE_USER_MAX=40
|
||||||
|
MESSAGE_USER_WINDOW=1
|
||||||
|
|
||||||
|
ILLEGAL_MODEL_REQ_SCORE=5
|
||||||
|
|
||||||
|
#========================#
|
||||||
|
# Balance #
|
||||||
|
#========================#
|
||||||
|
|
||||||
|
# CHECK_BALANCE=false
|
||||||
|
# START_BALANCE=20000 # note: the number of tokens that will be credited after registration.
|
||||||
|
|
||||||
|
#========================#
|
||||||
|
# Registration and Login #
|
||||||
|
#========================#
|
||||||
|
|
||||||
|
ALLOW_EMAIL_LOGIN=true
|
||||||
|
ALLOW_REGISTRATION=true
|
||||||
|
ALLOW_SOCIAL_LOGIN=false
|
||||||
|
ALLOW_SOCIAL_REGISTRATION=false
|
||||||
|
ALLOW_PASSWORD_RESET=false
|
||||||
|
# ALLOW_ACCOUNT_DELETION=true # note: enabled by default if omitted/commented out
|
||||||
|
ALLOW_UNVERIFIED_EMAIL_LOGIN=true
|
||||||
|
|
||||||
|
SESSION_EXPIRY=1000 * 60 * 15
|
||||||
|
REFRESH_TOKEN_EXPIRY=(1000 * 60 * 60 * 24) * 7
|
||||||
|
|
||||||
|
JWT_SECRET=16f8c0ef4a5d391b26034086c628469d3f9f497f08163ab9b40137092f2909ef
|
||||||
|
JWT_REFRESH_SECRET=eaa5191f2914e30b9387fd84e254e4ba6fc51b4654968a9b0803b456a54b8418
|
||||||
|
|
||||||
|
# Discord
|
||||||
|
DISCORD_CLIENT_ID=
|
||||||
|
DISCORD_CLIENT_SECRET=
|
||||||
|
DISCORD_CALLBACK_URL=/oauth/discord/callback
|
||||||
|
|
||||||
|
# Facebook
|
||||||
|
FACEBOOK_CLIENT_ID=
|
||||||
|
FACEBOOK_CLIENT_SECRET=
|
||||||
|
FACEBOOK_CALLBACK_URL=/oauth/facebook/callback
|
||||||
|
|
||||||
|
# GitHub
|
||||||
|
GITHUB_CLIENT_ID=
|
||||||
|
GITHUB_CLIENT_SECRET=
|
||||||
|
GITHUB_CALLBACK_URL=/oauth/github/callback
|
||||||
|
# GitHub Enterprise
|
||||||
|
# GITHUB_ENTERPRISE_BASE_URL=
|
||||||
|
# GITHUB_ENTERPRISE_USER_AGENT=
|
||||||
|
|
||||||
|
# Google
|
||||||
|
GOOGLE_CLIENT_ID=
|
||||||
|
GOOGLE_CLIENT_SECRET=
|
||||||
|
GOOGLE_CALLBACK_URL=/oauth/google/callback
|
||||||
|
|
||||||
|
# Apple
|
||||||
|
APPLE_CLIENT_ID=
|
||||||
|
APPLE_TEAM_ID=
|
||||||
|
APPLE_KEY_ID=
|
||||||
|
APPLE_PRIVATE_KEY_PATH=
|
||||||
|
APPLE_CALLBACK_URL=/oauth/apple/callback
|
||||||
|
|
||||||
|
# OpenID
|
||||||
|
OPENID_CLIENT_ID=
|
||||||
|
OPENID_CLIENT_SECRET=
|
||||||
|
OPENID_ISSUER=
|
||||||
|
OPENID_SESSION_SECRET=
|
||||||
|
OPENID_SCOPE="openid profile email"
|
||||||
|
OPENID_CALLBACK_URL=/oauth/openid/callback
|
||||||
|
OPENID_REQUIRED_ROLE=
|
||||||
|
OPENID_REQUIRED_ROLE_TOKEN_KIND=
|
||||||
|
OPENID_REQUIRED_ROLE_PARAMETER_PATH=
|
||||||
|
OPENID_ADMIN_ROLE=
|
||||||
|
OPENID_ADMIN_ROLE_PARAMETER_PATH=
|
||||||
|
OPENID_ADMIN_ROLE_TOKEN_KIND=
|
||||||
|
# Set to determine which user info property returned from OpenID Provider to store as the User's username
|
||||||
|
OPENID_USERNAME_CLAIM=
|
||||||
|
# Set to determine which user info property returned from OpenID Provider to store as the User's name
|
||||||
|
OPENID_NAME_CLAIM=
|
||||||
|
# Set to determine which user info claim to use as the email/identifier for user matching (e.g., "upn" for Entra ID)
|
||||||
|
# When not set, defaults to: email -> preferred_username -> upn
|
||||||
|
OPENID_EMAIL_CLAIM=
|
||||||
|
# Optional audience parameter for OpenID authorization requests
|
||||||
|
OPENID_AUDIENCE=
|
||||||
|
|
||||||
|
OPENID_BUTTON_LABEL=
|
||||||
|
OPENID_IMAGE_URL=
|
||||||
|
# Set to true to automatically redirect to the OpenID provider when a user visits the login page
|
||||||
|
# This will bypass the login form completely for users, only use this if OpenID is your only authentication method
|
||||||
|
OPENID_AUTO_REDIRECT=false
|
||||||
|
# Set to true to use PKCE (Proof Key for Code Exchange) for OpenID authentication
|
||||||
|
OPENID_USE_PKCE=false
|
||||||
|
#Set to true to reuse openid tokens for authentication management instead of using the mongodb session and the custom refresh token.
|
||||||
|
OPENID_REUSE_TOKENS=
|
||||||
|
#By default, signing key verification results are cached in order to prevent excessive HTTP requests to the JWKS endpoint.
|
||||||
|
#If a signing key matching the kid is found, this will be cached and the next time this kid is requested the signing key will be served from the cache.
|
||||||
|
#Default is true.
|
||||||
|
OPENID_JWKS_URL_CACHE_ENABLED=
|
||||||
|
OPENID_JWKS_URL_CACHE_TIME= # 600000 ms eq to 10 minutes leave empty to disable caching
|
||||||
|
#Set to true to trigger token exchange flow to acquire access token for the userinfo endpoint.
|
||||||
|
OPENID_ON_BEHALF_FLOW_FOR_USERINFO_REQUIRED=
|
||||||
|
OPENID_ON_BEHALF_FLOW_USERINFO_SCOPE="user.read" # example for Scope Needed for Microsoft Graph API
|
||||||
|
# Set to true to use the OpenID Connect end session endpoint for logout
|
||||||
|
OPENID_USE_END_SESSION_ENDPOINT=
|
||||||
|
# URL to redirect to after OpenID logout (defaults to ${DOMAIN_CLIENT}/login)
|
||||||
|
OPENID_POST_LOGOUT_REDIRECT_URI=
|
||||||
|
# Maximum logout URL length before using logout_hint instead of id_token_hint (default: 2000)
|
||||||
|
OPENID_MAX_LOGOUT_URL_LENGTH=
|
||||||
|
|
||||||
|
#========================#
|
||||||
|
# SharePoint Integration #
|
||||||
|
#========================#
|
||||||
|
# Requires Entra ID (OpenID) authentication to be configured
|
||||||
|
|
||||||
|
# Enable SharePoint file picker in chat and agent panels
|
||||||
|
# ENABLE_SHAREPOINT_FILEPICKER=true
|
||||||
|
|
||||||
|
# SharePoint tenant base URL (e.g., https://yourtenant.sharepoint.com)
|
||||||
|
# SHAREPOINT_BASE_URL=https://yourtenant.sharepoint.com
|
||||||
|
|
||||||
|
# Microsoft Graph API And SharePoint scopes for file picker
|
||||||
|
# SHAREPOINT_PICKER_SHAREPOINT_SCOPE==https://yourtenant.sharepoint.com/AllSites.Read
|
||||||
|
# SHAREPOINT_PICKER_GRAPH_SCOPE=Files.Read.All
|
||||||
|
#========================#
|
||||||
|
|
||||||
|
# SAML
|
||||||
|
# Note: If OpenID is enabled, SAML authentication will be automatically disabled.
|
||||||
|
SAML_ENTRY_POINT=
|
||||||
|
SAML_ISSUER=
|
||||||
|
SAML_CERT=
|
||||||
|
SAML_CALLBACK_URL=/oauth/saml/callback
|
||||||
|
SAML_SESSION_SECRET=
|
||||||
|
|
||||||
|
# Attribute mappings (optional)
|
||||||
|
SAML_EMAIL_CLAIM=
|
||||||
|
SAML_USERNAME_CLAIM=
|
||||||
|
SAML_GIVEN_NAME_CLAIM=
|
||||||
|
SAML_FAMILY_NAME_CLAIM=
|
||||||
|
SAML_PICTURE_CLAIM=
|
||||||
|
SAML_NAME_CLAIM=
|
||||||
|
|
||||||
|
# Logint buttion settings (optional)
|
||||||
|
SAML_BUTTON_LABEL=
|
||||||
|
SAML_IMAGE_URL=
|
||||||
|
|
||||||
|
# Whether the SAML Response should be signed.
|
||||||
|
# - If "true", the entire `SAML Response` will be signed.
|
||||||
|
# - If "false" or unset, only the `SAML Assertion` will be signed (default behavior).
|
||||||
|
# SAML_USE_AUTHN_RESPONSE_SIGNED=
|
||||||
|
|
||||||
|
|
||||||
|
#===============================================#
|
||||||
|
# Microsoft Graph API / Entra ID Integration #
|
||||||
|
#===============================================#
|
||||||
|
|
||||||
|
# Enable Entra ID people search integration in permissions/sharing system
|
||||||
|
# When enabled, the people picker will search both local database and Entra ID
|
||||||
|
USE_ENTRA_ID_FOR_PEOPLE_SEARCH=false
|
||||||
|
|
||||||
|
# When enabled, entra id groups owners will be considered as members of the group
|
||||||
|
ENTRA_ID_INCLUDE_OWNERS_AS_MEMBERS=false
|
||||||
|
|
||||||
|
# Microsoft Graph API scopes needed for people/group search
|
||||||
|
# Default scopes provide access to user profiles and group memberships
|
||||||
|
OPENID_GRAPH_SCOPES=User.Read,People.Read,GroupMember.Read.All
|
||||||
|
|
||||||
|
# LDAP
|
||||||
|
LDAP_URL=
|
||||||
|
LDAP_BIND_DN=
|
||||||
|
LDAP_BIND_CREDENTIALS=
|
||||||
|
LDAP_USER_SEARCH_BASE=
|
||||||
|
#LDAP_SEARCH_FILTER="mail="
|
||||||
|
LDAP_CA_CERT_PATH=
|
||||||
|
# LDAP_TLS_REJECT_UNAUTHORIZED=
|
||||||
|
# LDAP_STARTTLS=
|
||||||
|
# LDAP_LOGIN_USES_USERNAME=true
|
||||||
|
# LDAP_ID=
|
||||||
|
# LDAP_USERNAME=
|
||||||
|
# LDAP_EMAIL=
|
||||||
|
# LDAP_FULL_NAME=
|
||||||
|
|
||||||
|
#========================#
|
||||||
|
# Email Password Reset #
|
||||||
|
#========================#
|
||||||
|
|
||||||
|
EMAIL_SERVICE=
|
||||||
|
EMAIL_HOST=
|
||||||
|
EMAIL_PORT=25
|
||||||
|
EMAIL_ENCRYPTION=
|
||||||
|
EMAIL_ENCRYPTION_HOSTNAME=
|
||||||
|
EMAIL_ALLOW_SELFSIGNED=
|
||||||
|
# Leave both empty for SMTP servers that do not require authentication
|
||||||
|
EMAIL_USERNAME=
|
||||||
|
EMAIL_PASSWORD=
|
||||||
|
EMAIL_FROM_NAME=
|
||||||
|
EMAIL_FROM=noreply@librechat.ai
|
||||||
|
|
||||||
|
#========================#
|
||||||
|
# Mailgun API #
|
||||||
|
#========================#
|
||||||
|
|
||||||
|
# MAILGUN_API_KEY=your-mailgun-api-key
|
||||||
|
# MAILGUN_DOMAIN=mg.yourdomain.com
|
||||||
|
# EMAIL_FROM=noreply@yourdomain.com
|
||||||
|
# EMAIL_FROM_NAME="LibreChat"
|
||||||
|
|
||||||
|
# # Optional: For EU region
|
||||||
|
# MAILGUN_HOST=https://api.eu.mailgun.net
|
||||||
|
|
||||||
|
#========================#
|
||||||
|
# Firebase CDN #
|
||||||
|
#========================#
|
||||||
|
|
||||||
|
FIREBASE_API_KEY=
|
||||||
|
FIREBASE_AUTH_DOMAIN=
|
||||||
|
FIREBASE_PROJECT_ID=
|
||||||
|
FIREBASE_STORAGE_BUCKET=
|
||||||
|
FIREBASE_MESSAGING_SENDER_ID=
|
||||||
|
FIREBASE_APP_ID=
|
||||||
|
|
||||||
|
#========================#
|
||||||
|
# S3 AWS Bucket #
|
||||||
|
#========================#
|
||||||
|
|
||||||
|
AWS_ENDPOINT_URL=
|
||||||
|
AWS_ACCESS_KEY_ID=
|
||||||
|
AWS_SECRET_ACCESS_KEY=
|
||||||
|
AWS_REGION=
|
||||||
|
AWS_BUCKET_NAME=
|
||||||
|
# Required for path-style S3-compatible providers (MinIO, Hetzner, Backblaze B2, etc.)
|
||||||
|
# that don't support virtual-hosted-style URLs (bucket.endpoint). Not needed for AWS S3.
|
||||||
|
# AWS_FORCE_PATH_STYLE=false
|
||||||
|
|
||||||
|
#========================#
|
||||||
|
# Azure Blob Storage #
|
||||||
|
#========================#
|
||||||
|
|
||||||
|
AZURE_STORAGE_CONNECTION_STRING=
|
||||||
|
AZURE_STORAGE_PUBLIC_ACCESS=false
|
||||||
|
AZURE_CONTAINER_NAME=files
|
||||||
|
|
||||||
|
#========================#
|
||||||
|
# Shared Links #
|
||||||
|
#========================#
|
||||||
|
|
||||||
|
ALLOW_SHARED_LINKS=true
|
||||||
|
# Allows unauthenticated access to shared links. Defaults to false (auth required) if not set.
|
||||||
|
ALLOW_SHARED_LINKS_PUBLIC=false
|
||||||
|
|
||||||
|
#==============================#
|
||||||
|
# Static File Cache Control #
|
||||||
|
#==============================#
|
||||||
|
|
||||||
|
# Leave commented out to use defaults: 1 day (86400 seconds) for s-maxage and 2 days (172800 seconds) for max-age
|
||||||
|
# NODE_ENV must be set to production for these to take effect
|
||||||
|
# STATIC_CACHE_MAX_AGE=172800
|
||||||
|
# STATIC_CACHE_S_MAX_AGE=86400
|
||||||
|
|
||||||
|
# If you have another service in front of your LibreChat doing compression, disable express based compression here
|
||||||
|
# DISABLE_COMPRESSION=true
|
||||||
|
|
||||||
|
# If you have gzipped version of uploaded image images in the same folder, this will enable gzip scan and serving of these images
|
||||||
|
# Note: The images folder will be scanned on startup and a ma kept in memory. Be careful for large number of images.
|
||||||
|
# ENABLE_IMAGE_OUTPUT_GZIP_SCAN=true
|
||||||
|
|
||||||
|
#===================================================#
|
||||||
|
# UI #
|
||||||
|
#===================================================#
|
||||||
|
|
||||||
|
APP_TITLE=LibreChat
|
||||||
|
# CUSTOM_FOOTER="My custom footer"
|
||||||
|
HELP_AND_FAQ_URL=https://librechat.ai
|
||||||
|
|
||||||
|
# SHOW_BIRTHDAY_ICON=true
|
||||||
|
|
||||||
|
# Google tag manager id
|
||||||
|
#ANALYTICS_GTM_ID=user provided google tag manager id
|
||||||
|
|
||||||
|
# limit conversation file imports to a certain number of bytes in size to avoid the container
|
||||||
|
# maxing out memory limitations by unremarking this line and supplying a file size in bytes
|
||||||
|
# such as the below example of 250 mib
|
||||||
|
# CONVERSATION_IMPORT_MAX_FILE_SIZE_BYTES=262144000
|
||||||
|
|
||||||
|
|
||||||
|
#===============#
|
||||||
|
# REDIS Options #
|
||||||
|
#===============#
|
||||||
|
|
||||||
|
# Enable Redis for caching and session storage
|
||||||
|
# USE_REDIS=true
|
||||||
|
# Enable Redis for resumable LLM streams (defaults to USE_REDIS value if not set)
|
||||||
|
# Set to false to use in-memory storage for streams while keeping Redis for other caches
|
||||||
|
# USE_REDIS_STREAMS=true
|
||||||
|
|
||||||
|
# Single Redis instance
|
||||||
|
# REDIS_URI=redis://127.0.0.1:6379
|
||||||
|
|
||||||
|
# Redis cluster (multiple nodes)
|
||||||
|
# REDIS_URI=redis://127.0.0.1:7001,redis://127.0.0.1:7002,redis://127.0.0.1:7003
|
||||||
|
|
||||||
|
# Redis with TLS/SSL encryption and CA certificate
|
||||||
|
# REDIS_URI=rediss://127.0.0.1:6380
|
||||||
|
# REDIS_CA=/path/to/ca-cert.pem
|
||||||
|
|
||||||
|
# Elasticache may need to use an alternate dnsLookup for TLS connections. see "Special Note: Aws Elasticache Clusters with TLS" on this webpage: https://www.npmjs.com/package/ioredis
|
||||||
|
# Enable alternative dnsLookup for redis
|
||||||
|
# REDIS_USE_ALTERNATIVE_DNS_LOOKUP=true
|
||||||
|
|
||||||
|
# Redis authentication (if required)
|
||||||
|
# REDIS_USERNAME=your_redis_username
|
||||||
|
# REDIS_PASSWORD=your_redis_password
|
||||||
|
|
||||||
|
# Redis key prefix configuration
|
||||||
|
# Use environment variable name for dynamic prefix (recommended for cloud deployments)
|
||||||
|
# REDIS_KEY_PREFIX_VAR=K_REVISION
|
||||||
|
# Or use static prefix directly
|
||||||
|
# REDIS_KEY_PREFIX=librechat
|
||||||
|
|
||||||
|
# Redis connection limits
|
||||||
|
# REDIS_MAX_LISTENERS=40
|
||||||
|
|
||||||
|
# Redis ping interval in seconds (0 = disabled, >0 = enabled)
|
||||||
|
# When set to a positive integer, Redis clients will ping the server at this interval to keep connections alive
|
||||||
|
# When unset or 0, no pinging is performed (recommended for most use cases)
|
||||||
|
# REDIS_PING_INTERVAL=300
|
||||||
|
|
||||||
|
# Force specific cache namespaces to use in-memory storage even when Redis is enabled
|
||||||
|
# Comma-separated list of CacheKeys
|
||||||
|
# Defaults to CONFIG_STORE,APP_CONFIG so YAML-derived config stays per-container (safe for blue/green deployments)
|
||||||
|
# Set to empty string to force all namespaces through Redis: FORCED_IN_MEMORY_CACHE_NAMESPACES=
|
||||||
|
# FORCED_IN_MEMORY_CACHE_NAMESPACES=CONFIG_STORE,APP_CONFIG
|
||||||
|
|
||||||
|
# Leader Election Configuration (for multi-instance deployments with Redis)
|
||||||
|
# Duration in seconds that the leader lease is valid before it expires (default: 25)
|
||||||
|
# LEADER_LEASE_DURATION=25
|
||||||
|
# Interval in seconds at which the leader renews its lease (default: 10)
|
||||||
|
# LEADER_RENEW_INTERVAL=10
|
||||||
|
# Maximum number of retry attempts when renewing the lease fails (default: 3)
|
||||||
|
# LEADER_RENEW_ATTEMPTS=3
|
||||||
|
# Delay in seconds between retry attempts when renewing the lease (default: 0.5)
|
||||||
|
# LEADER_RENEW_RETRY_DELAY=0.5
|
||||||
|
|
||||||
|
#==================================================#
|
||||||
|
# Others #
|
||||||
|
#==================================================#
|
||||||
|
# You should leave the following commented out #
|
||||||
|
|
||||||
|
# NODE_ENV=
|
||||||
|
|
||||||
|
# E2E_USER_EMAIL=
|
||||||
|
# E2E_USER_PASSWORD=
|
||||||
|
|
||||||
|
#=====================================================#
|
||||||
|
# Cache Headers #
|
||||||
|
#=====================================================#
|
||||||
|
# Headers that control caching of the index.html #
|
||||||
|
# Default configuration prevents caching to ensure #
|
||||||
|
# users always get the latest version. Customize #
|
||||||
|
# only if you understand caching implications. #
|
||||||
|
|
||||||
|
# INDEX_CACHE_CONTROL=no-cache, no-store, must-revalidate
|
||||||
|
# INDEX_PRAGMA=no-cache
|
||||||
|
# INDEX_EXPIRES=0
|
||||||
|
|
||||||
|
# no-cache: Forces validation with server before using cached version
|
||||||
|
# no-store: Prevents storing the response entirely
|
||||||
|
# must-revalidate: Prevents using stale content when offline
|
||||||
|
|
||||||
|
#=====================================================#
|
||||||
|
# OpenWeather #
|
||||||
|
#=====================================================#
|
||||||
|
OPENWEATHER_API_KEY=
|
||||||
|
|
||||||
|
#====================================#
|
||||||
|
# LibreChat Code Interpreter API #
|
||||||
|
#====================================#
|
||||||
|
|
||||||
|
# https://code.librechat.ai
|
||||||
|
# LIBRECHAT_CODE_API_KEY=your-key
|
||||||
|
|
||||||
|
#======================#
|
||||||
|
# Web Search #
|
||||||
|
#======================#
|
||||||
|
|
||||||
|
# Note: All of the following variable names can be customized.
|
||||||
|
# Omit values to allow user to provide them.
|
||||||
|
|
||||||
|
# For more information on configuration values, see:
|
||||||
|
# https://librechat.ai/docs/features/web_search
|
||||||
|
|
||||||
|
# Search Provider (Required)
|
||||||
|
# SERPER_API_KEY=your_serper_api_key
|
||||||
|
|
||||||
|
# Scraper (Required)
|
||||||
|
# FIRECRAWL_API_KEY=your_firecrawl_api_key
|
||||||
|
# Optional: Custom Firecrawl API URL
|
||||||
|
# FIRECRAWL_API_URL=your_firecrawl_api_url
|
||||||
|
|
||||||
|
# Reranker (Required)
|
||||||
|
# JINA_API_KEY=your_jina_api_key
|
||||||
|
# or
|
||||||
|
# COHERE_API_KEY=your_cohere_api_key
|
||||||
|
|
||||||
|
#======================#
|
||||||
|
# MCP Configuration #
|
||||||
|
#======================#
|
||||||
|
|
||||||
|
# Treat 401/403 responses as OAuth requirement when no oauth metadata found
|
||||||
|
# MCP_OAUTH_ON_AUTH_ERROR=true
|
||||||
|
|
||||||
|
# Timeout for OAuth detection requests in milliseconds
|
||||||
|
# MCP_OAUTH_DETECTION_TIMEOUT=5000
|
||||||
|
|
||||||
|
# Cache connection status checks for this many milliseconds to avoid expensive verification
|
||||||
|
# MCP_CONNECTION_CHECK_TTL=60000
|
||||||
|
|
||||||
|
# Skip code challenge method validation (e.g., for AWS Cognito that supports S256 but doesn't advertise it)
|
||||||
|
# When set to true, forces S256 code challenge even if not advertised in .well-known/openid-configuration
|
||||||
|
# MCP_SKIP_CODE_CHALLENGE_CHECK=false
|
||||||
|
|
||||||
|
# Circuit breaker: max connect/disconnect cycles before tripping (per server)
|
||||||
|
# MCP_CB_MAX_CYCLES=7
|
||||||
|
|
||||||
|
# Circuit breaker: sliding window (ms) for counting cycles
|
||||||
|
# MCP_CB_CYCLE_WINDOW_MS=45000
|
||||||
|
|
||||||
|
# Circuit breaker: cooldown (ms) after the cycle breaker trips
|
||||||
|
# MCP_CB_CYCLE_COOLDOWN_MS=15000
|
||||||
|
|
||||||
|
# Circuit breaker: max consecutive failed connection rounds before backoff
|
||||||
|
# MCP_CB_MAX_FAILED_ROUNDS=3
|
||||||
|
|
||||||
|
# Circuit breaker: sliding window (ms) for counting failed rounds
|
||||||
|
# MCP_CB_FAILED_WINDOW_MS=120000
|
||||||
|
|
||||||
|
# Circuit breaker: base backoff (ms) after failed round threshold is reached
|
||||||
|
# MCP_CB_BASE_BACKOFF_MS=30000
|
||||||
|
|
||||||
|
# Circuit breaker: max backoff cap (ms) for exponential backoff
|
||||||
|
# MCP_CB_MAX_BACKOFF_MS=300000
|
||||||
@@ -0,0 +1,8 @@
|
|||||||
|
# Auto-generated from pyproject.toml — do not edit manually
|
||||||
|
ALFRED_VERSION=0.1.7
|
||||||
|
PYTHON_VERSION=3.14.3
|
||||||
|
IMAGE_NAME=alfred_media_organizer
|
||||||
|
SERVICE_NAME=alfred
|
||||||
|
LIBRECHAT_VERSION=v0.8.4
|
||||||
|
RAG_VERSION=v0.7.3
|
||||||
|
UV_VERSION=0.11.6
|
||||||
+54
-24
@@ -2,11 +2,10 @@ name: CI/CD Awesome Pipeline
|
|||||||
|
|
||||||
on:
|
on:
|
||||||
push:
|
push:
|
||||||
branches: [main]
|
|
||||||
tags:
|
tags:
|
||||||
- 'v*.*.*'
|
- 'v*.*.*'
|
||||||
pull_request:
|
|
||||||
branches: [main]
|
workflow_dispatch:
|
||||||
|
|
||||||
env:
|
env:
|
||||||
REGISTRY_URL: ${{ vars.REGISTRY_URL || 'gitea.iswearihadsomethingforthis.net' }}
|
REGISTRY_URL: ${{ vars.REGISTRY_URL || 'gitea.iswearihadsomethingforthis.net' }}
|
||||||
@@ -30,33 +29,64 @@ jobs:
|
|||||||
name: Build & Push to Registry
|
name: Build & Push to Registry
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
needs: test
|
needs: test
|
||||||
if: contains(github.ref, 'refs/tags/v')
|
|
||||||
steps:
|
|
||||||
- name: Debug ref
|
|
||||||
run: |
|
|
||||||
echo "github.ref = ${{ github.ref }}"
|
|
||||||
echo "GITHUB_REF = $GITHUB_REF"
|
|
||||||
|
|
||||||
|
steps:
|
||||||
- name: Checkout code
|
- name: Checkout code
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- name: Generate build variables
|
||||||
|
run: python scripts/generate_build_vars.py
|
||||||
|
|
||||||
- name: Load config from Makefile
|
- name: Load config from Makefile
|
||||||
id: config
|
id: config
|
||||||
run: |
|
run: make -s _ci-dump-config >> $GITHUB_OUTPUT
|
||||||
eval "$(make _ci-image-name)"
|
|
||||||
echo "image_name=${IMAGE_NAME}" >> $GITHUB_OUTPUT
|
|
||||||
|
|
||||||
- name: Extract version from tag
|
- name: 🏷️ Docker Metadata (Tags & Labels)
|
||||||
id: version
|
id: meta
|
||||||
run: echo "version=${GITHUB_REF#refs/tags/v}" >> $GITHUB_OUTPUT
|
uses: docker/metadata-action@v5
|
||||||
|
with:
|
||||||
|
images: gitea.iswearihadsomethingforthis.net/francwa/${{ steps.config.outputs.image_name }}
|
||||||
|
tags: |
|
||||||
|
# Tagged (v1.2.3)
|
||||||
|
type=semver,pattern={{ version }}
|
||||||
|
# Latest (main)
|
||||||
|
type=raw,value=latest,enable={{ is_default_branch }}
|
||||||
|
# Feature branches
|
||||||
|
type=ref,event=branch
|
||||||
|
|
||||||
- name: Build production image
|
- name: Login to Gitea Registry
|
||||||
run: make build
|
uses: docker/login-action@v3
|
||||||
|
with:
|
||||||
|
registry: gitea.iswearihadsomethingforthis.net
|
||||||
|
username: ${{ gitea.actor }}
|
||||||
|
password: ${{ secrets.G1T34_TOKEN }}
|
||||||
|
|
||||||
- name: Tag and push to registry
|
- name: Build and push
|
||||||
run: |
|
id: docker_build
|
||||||
docker tag ${{ steps.config.outputs.image_name }}:latest ${{ env.REGISTRY_URL }}/${{ env.REGISTRY_USER }}/${{ steps.config.outputs.image_name }}:${{ steps.version.outputs.version }}
|
uses: docker/build-push-action@v5
|
||||||
docker tag ${{ steps.config.outputs.image_name }}:latest ${{ env.REGISTRY_URL }}/${{ env.REGISTRY_USER }}/${{ steps.config.outputs.image_name }}:latest
|
with:
|
||||||
echo "${{ secrets.GITEA_TOKEN }}" | docker login ${{ env.REGISTRY_URL }} -u ${{ env.REGISTRY_USER }} --password-stdin
|
context: .
|
||||||
docker push ${{ env.REGISTRY_URL }}/${{ env.REGISTRY_USER }}/${{ steps.config.outputs.image_name }}:${{ steps.version.outputs.version }}
|
push: true
|
||||||
docker push ${{ env.REGISTRY_URL }}/${{ env.REGISTRY_USER }}/${{ steps.config.outputs.image_name }}:latest
|
tags: ${{ steps.meta.outputs.tags }}
|
||||||
|
labels: ${{ steps.meta.outputs.labels }}
|
||||||
|
build-args: |
|
||||||
|
PYTHON_VERSION=${{ steps.config.outputs.python_version }}
|
||||||
|
PYTHON_VERSION_SHORT=${{ steps.config.outputs.python_version_short }}
|
||||||
|
RUNNER=${{ steps.config.outputs.runner }}
|
||||||
|
|
||||||
|
- name: 🛡️ Run Trivy Vulnerability Scanner
|
||||||
|
uses: docker://aquasec/trivy:latest
|
||||||
|
env:
|
||||||
|
TRIVY_USERNAME: ${{ gitea.actor }}
|
||||||
|
TRIVY_PASSWORD: ${{ secrets.G1T34_TOKEN }}
|
||||||
|
# Unset the fake GITHUB_TOKEN injected by Gitea
|
||||||
|
GITHUB_TOKEN: ""
|
||||||
|
with:
|
||||||
|
args: image --format table --output trivy-report.txt --exit-code 0 --ignore-unfixed --severity CRITICAL,HIGH gitea.iswearihadsomethingforthis.net/francwa/${{ steps.config.outputs.image_name }}:latest
|
||||||
|
|
||||||
|
- name: 📤 Upload Security Report
|
||||||
|
uses: actions/upload-artifact@v3
|
||||||
|
with:
|
||||||
|
name: security-report
|
||||||
|
path: trivy-report.txt
|
||||||
|
retention-days: 7
|
||||||
|
|||||||
@@ -0,0 +1,22 @@
|
|||||||
|
name: Renovate Bot
|
||||||
|
|
||||||
|
on:
|
||||||
|
schedule:
|
||||||
|
# Every Monday 4AM
|
||||||
|
- cron: '0 4 * * 1'
|
||||||
|
workflow_dispatch:
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
renovate:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- name: Run Renovate
|
||||||
|
uses: docker://renovate/renovate:latest
|
||||||
|
env:
|
||||||
|
RENOVATE_PLATFORM: "gitea"
|
||||||
|
RENOVATE_ENDPOINT: "https://gitea.iswearihadsomethingforthis.net/api/v1"
|
||||||
|
RENOVATE_TOKEN: "${{ secrets.RENOVATE_TOKEN }}"
|
||||||
|
RENOVATE_REPOSITORIES: '["${{ gitea.repository }}"]'
|
||||||
|
RENOVATE_GIT_AUTHOR: "Renovate Bot <renovate@bot.local>"
|
||||||
|
# Might need a free github token if lots of depencies
|
||||||
|
# RENOVATE_GITHUB_TOKEN: "${{ secrets.GITHUB_COM_TOKEN }}"
|
||||||
+15
-1
@@ -55,7 +55,21 @@ coverage.xml
|
|||||||
Thumbs.db
|
Thumbs.db
|
||||||
|
|
||||||
# Secrets
|
# Secrets
|
||||||
.env
|
.env.secrets
|
||||||
|
|
||||||
# Backup files
|
# Backup files
|
||||||
*.backup
|
*.backup
|
||||||
|
|
||||||
|
# Application data dir
|
||||||
|
data/*
|
||||||
|
|
||||||
|
# Application logs
|
||||||
|
logs/*
|
||||||
|
|
||||||
|
# Documentation folder
|
||||||
|
docs/
|
||||||
|
|
||||||
|
# .md files
|
||||||
|
*.md
|
||||||
|
|
||||||
|
#
|
||||||
|
|||||||
+91
@@ -0,0 +1,91 @@
|
|||||||
|
# syntax=docker/dockerfile:1
|
||||||
|
# check=skip=InvalidDefaultArgInFrom
|
||||||
|
|
||||||
|
ARG PYTHON_VERSION
|
||||||
|
ARG UV_VERSION
|
||||||
|
|
||||||
|
# Stage 0: uv binary (workaround — --from doesn't support ARG expansion)
|
||||||
|
FROM ghcr.io/astral-sh/uv:${UV_VERSION} AS uv-bin
|
||||||
|
|
||||||
|
# ===========================================
|
||||||
|
# Stage 1: Builder
|
||||||
|
# ===========================================
|
||||||
|
FROM python:${PYTHON_VERSION}-slim-bookworm AS builder
|
||||||
|
|
||||||
|
ENV DEBIAN_FRONTEND=noninteractive \
|
||||||
|
PYTHONDONTWRITEBYTECODE=1 \
|
||||||
|
PYTHONUNBUFFERED=1 \
|
||||||
|
UV_PROJECT_ENVIRONMENT=/venv
|
||||||
|
|
||||||
|
# Install build dependencies
|
||||||
|
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
|
||||||
|
--mount=type=cache,target=/var/lib/apt,sharing=locked \
|
||||||
|
apt-get update \
|
||||||
|
&& apt-get install -y --no-install-recommends build-essential
|
||||||
|
|
||||||
|
# Install uv globally
|
||||||
|
COPY --from=uv-bin /uv /usr/local/bin/uv
|
||||||
|
|
||||||
|
WORKDIR /tmp
|
||||||
|
|
||||||
|
COPY pyproject.toml uv.lock Makefile ./
|
||||||
|
|
||||||
|
# Install dependencies into /venv
|
||||||
|
RUN --mount=type=cache,target=/root/.cache/uv uv sync
|
||||||
|
|
||||||
|
COPY scripts/ ./scripts/
|
||||||
|
COPY .env.example ./
|
||||||
|
|
||||||
|
# ===========================================
|
||||||
|
# Stage 2: Testing
|
||||||
|
# ===========================================
|
||||||
|
FROM builder AS test
|
||||||
|
|
||||||
|
RUN --mount=type=cache,target=/root/.cache/uv uv sync --group dev
|
||||||
|
|
||||||
|
COPY alfred/ ./alfred
|
||||||
|
COPY scripts ./scripts
|
||||||
|
COPY tests/ ./tests
|
||||||
|
|
||||||
|
# ===========================================
|
||||||
|
# Stage 3: Runtime
|
||||||
|
# ===========================================
|
||||||
|
FROM python:${PYTHON_VERSION}-slim-bookworm AS runtime
|
||||||
|
|
||||||
|
ENV PYTHONDONTWRITEBYTECODE=1 \
|
||||||
|
PYTHONUNBUFFERED=1 \
|
||||||
|
PYTHONPATH=/home/appuser \
|
||||||
|
PATH="/venv/bin:$PATH"
|
||||||
|
|
||||||
|
# Install runtime dependencies
|
||||||
|
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
|
||||||
|
--mount=type=cache,target=/var/lib/apt,sharing=locked \
|
||||||
|
apt-get update \
|
||||||
|
&& apt-get install -y --no-install-recommends ca-certificates
|
||||||
|
|
||||||
|
# Create non-root user
|
||||||
|
RUN useradd -m -u 1000 -s /bin/bash appuser
|
||||||
|
|
||||||
|
# Create data directories
|
||||||
|
RUN mkdir -p /data /logs \
|
||||||
|
&& chown -R appuser:appuser /data /logs
|
||||||
|
|
||||||
|
USER appuser
|
||||||
|
WORKDIR /home/appuser
|
||||||
|
|
||||||
|
# Copy venv from builder stage
|
||||||
|
COPY --from=builder /venv /venv
|
||||||
|
|
||||||
|
# Copy application code
|
||||||
|
COPY --chown=appuser:appuser alfred/ ./alfred
|
||||||
|
COPY --chown=appuser:appuser scripts/ ./scripts
|
||||||
|
COPY --chown=appuser:appuser .env.example ./
|
||||||
|
COPY --chown=appuser:appuser pyproject.toml ./
|
||||||
|
|
||||||
|
VOLUME ["/data", "/logs"]
|
||||||
|
EXPOSE 8000
|
||||||
|
|
||||||
|
HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
|
||||||
|
CMD python -c "import requests; requests.get('http://localhost:8000/health', timeout=5).raise_for_status()" || exit 1
|
||||||
|
|
||||||
|
CMD ["python", "-m", "uvicorn", "alfred.app:app", "--host", "0.0.0.0", "--port", "8000"]
|
||||||
@@ -1,249 +1,187 @@
|
|||||||
.POSIX:
|
|
||||||
.SUFFIXES:
|
|
||||||
.DEFAULT_GOAL := help
|
.DEFAULT_GOAL := help
|
||||||
|
|
||||||
# --- SETTINGS ---
|
# --- Load Config from pyproject.toml ---
|
||||||
PYTHON_VERSION = 3.12.7
|
export
|
||||||
PYTHON_VERSION_SHORT = $(shell echo $(PYTHON_VERSION) | cut -d. -f1,2)
|
-include .env.make
|
||||||
# Change to 'uv' when ready.
|
|
||||||
RUNNER ?= poetry
|
|
||||||
|
|
||||||
export PYTHON_VERSION
|
# --- Profiles management ---
|
||||||
export PYTHON_VERSION_SHORT
|
# Usage: make up p=rag,meili
|
||||||
export RUNNER
|
p ?= full
|
||||||
export IMAGE_NAME
|
PROFILES_PARAM := COMPOSE_PROFILES=$(p)
|
||||||
|
|
||||||
# --- VARIABLES ---
|
# --- Commands ---
|
||||||
CORE_DIR = brain
|
DOCKER_COMPOSE := docker compose \
|
||||||
SERVICE_NAME = agent_media
|
--env-file .env.alfred \
|
||||||
IMAGE_NAME = agent_media
|
--env-file .env.secrets \
|
||||||
|
--env-file .env.make
|
||||||
|
DOCKER_BUILD := DOCKER_BUILDKIT=1 docker build \
|
||||||
|
--build-arg PYTHON_VERSION=$(PYTHON_VERSION) \
|
||||||
|
--build-arg UV_VERSION=$(UV_VERSION)
|
||||||
|
|
||||||
# --- ADAPTERS ---
|
# --- Phony ---
|
||||||
# UV uses "sync", Poetry uses "install". Both install DEV deps by default.
|
.PHONY: bootstrap up down restart logs ps shell build build-test install \
|
||||||
INSTALL_CMD = $(if $(filter uv,$(RUNNER)),sync,install)
|
update install-hooks test coverage lint format clean major minor patch help
|
||||||
|
|
||||||
# --- MACROS ---
|
# --- Setup ---
|
||||||
ARGS = $(filter-out $@,$(MAKECMDGOALS))
|
.env.alfred .env.librechat .env.secrets .env.make:
|
||||||
BUMP_CMD = cd $(CORE_DIR) && $(RUNNER) run bump-my-version bump
|
@echo "Initializing environment..."
|
||||||
COMPOSE_CMD = docker-compose
|
@uv run python scripts/bootstrap.py \
|
||||||
DOCKER_CMD = docker build \
|
&& echo "✓ Environment ready" \
|
||||||
--build-arg PYTHON_VERSION=$(PYTHON_VERSION) \
|
|| (echo "✗ Environment setup failed" && exit 1)
|
||||||
--build-arg PYTHON_VERSION_SHORT=$(PYTHON_VERSION_SHORT) \
|
|
||||||
--build-arg RUNNER=$(RUNNER) \
|
|
||||||
-f $(CORE_DIR)/Dockerfile \
|
|
||||||
-t $(IMAGE_NAME):latest .
|
|
||||||
|
|
||||||
RUNNER_ADD = cd $(CORE_DIR) && $(RUNNER) add
|
bootstrap: .env.alfred .env.librechat .env.secrets .env.make
|
||||||
RUNNER_HOOKS = cd $(CORE_DIR) && $(RUNNER) run pre-commit install -c ../.pre-commit-config.yaml
|
|
||||||
RUNNER_INSTALL = cd $(CORE_DIR) && $(RUNNER) $(INSTALL_CMD)
|
|
||||||
RUNNER_RUN = cd $(CORE_DIR) && $(RUNNER) run
|
|
||||||
RUNNER_UPDATE = cd $(CORE_DIR) && $(RUNNER) update
|
|
||||||
|
|
||||||
# --- STYLES ---
|
# --- Docker ---
|
||||||
B = \033[1m
|
up: .env.alfred .env.secrets
|
||||||
G = \033[32m
|
@echo "Starting containers with profiles: [full]..."
|
||||||
T = \033[36m
|
@$(PROFILES_PARAM) $(DOCKER_COMPOSE) up -d --remove-orphans \
|
||||||
R = \033[0m
|
&& echo "✓ Containers started" \
|
||||||
|
|| (echo "✗ Failed to start containers" && exit 1)
|
||||||
|
|
||||||
# --- TARGETS ---
|
down:
|
||||||
.PHONY: add build build-test check-docker check-runner clean coverage down format help init-dotenv install install-hooks lint logs major minor patch prune ps restart run shell test up update _check_branch _ci-image-name _ci-run-tests
|
@echo "Stopping containers..."
|
||||||
|
@$(PROFILES_PARAM) $(DOCKER_COMPOSE) down \
|
||||||
|
&& echo "✓ Containers stopped" \
|
||||||
|
|| (echo "✗ Failed to stop containers" && exit 1)
|
||||||
|
|
||||||
# Catch-all for args
|
restart:
|
||||||
%:
|
@echo "Restarting containers..."
|
||||||
@:
|
@$(PROFILES_PARAM) $(DOCKER_COMPOSE) restart \
|
||||||
|
&& echo "✓ Containers restarted" \
|
||||||
|
|| (echo "✗ Failed to restart containers" && exit 1)
|
||||||
|
|
||||||
add: check-runner
|
logs:
|
||||||
@echo "$(T)➕ Adding dependency ($(RUNNER)): $(ARGS)$(R)"
|
@echo "Following logs (Ctrl+C to exit)..."
|
||||||
$(RUNNER_ADD) $(ARGS)
|
@$(PROFILES_PARAM) $(DOCKER_COMPOSE) logs -f
|
||||||
|
|
||||||
build: check-docker
|
ps:
|
||||||
@echo "$(T)🐳 Building Docker image...$(R)"
|
@echo "Container status:"
|
||||||
$(DOCKER_CMD)
|
@$(PROFILES_PARAM) $(DOCKER_COMPOSE) ps
|
||||||
@echo "✅ Image $(IMAGE_NAME):latest ready."
|
|
||||||
|
|
||||||
build-test: check-docker
|
shell:
|
||||||
@echo "$(T)🐳 Building test image (with dev deps)...$(R)"
|
@echo "Opening shell in $(SERVICE_NAME)..."
|
||||||
docker build \
|
@$(DOCKER_COMPOSE) exec $(SERVICE_NAME) /bin/bash
|
||||||
--build-arg RUNNER=$(RUNNER) \
|
|
||||||
--build-arg PYTHON_VERSION=$(PYTHON_VERSION) \
|
|
||||||
--build-arg PYTHON_VERSION_SHORT=$(PYTHON_VERSION_SHORT) \
|
|
||||||
-f $(CORE_DIR)/Dockerfile \
|
|
||||||
--target test \
|
|
||||||
-t $(IMAGE_NAME):test .
|
|
||||||
@echo "✅ Test image $(IMAGE_NAME):test ready."
|
|
||||||
|
|
||||||
check-docker:
|
# --- Build ---
|
||||||
@command -v docker >/dev/null 2>&1 || { echo "$(R)❌ Docker not installed$(R)"; exit 1; }
|
build: .env.make
|
||||||
@docker info >/dev/null 2>&1 || { echo "$(R)❌ Docker daemon not running$(R)"; exit 1; }
|
@echo "Building image $(IMAGE_NAME):latest ..."
|
||||||
|
@$(DOCKER_BUILD) -t $(IMAGE_NAME):latest . \
|
||||||
|
&& echo "✓ Build complete" \
|
||||||
|
|| (echo "✗ Build failed" && exit 1)
|
||||||
|
|
||||||
check-runner:
|
build-test: .env.make
|
||||||
@command -v $(RUNNER) >/dev/null 2>&1 || { echo "$(R)❌ $(RUNNER) not installed$(R)"; exit 1; }
|
@echo "Building test image $(IMAGE_NAME):test..."
|
||||||
|
@$(DOCKER_BUILD) --target test -t $(IMAGE_NAME):test . \
|
||||||
|
&& echo "✓ Test image built" \
|
||||||
|
|| (echo "✗ Build failed" && exit 1)
|
||||||
|
|
||||||
|
# --- Dependencies ---
|
||||||
|
install:
|
||||||
|
@echo "Installing dependencies with uv..."
|
||||||
|
@uv install \
|
||||||
|
&& echo "✓ Dependencies installed" \
|
||||||
|
|| (echo "✗ Installation failed" && exit 1)
|
||||||
|
|
||||||
|
install-hooks:
|
||||||
|
@echo "Installing pre-commit hooks..."
|
||||||
|
@uv run pre-commit install \
|
||||||
|
&& echo "✓ Hooks installed" \
|
||||||
|
|| (echo "✗ Hook installation failed" && exit 1)
|
||||||
|
|
||||||
|
update:
|
||||||
|
@echo "Updating dependencies with uv..."
|
||||||
|
@uv update \
|
||||||
|
&& echo "✓ Dependencies updated" \
|
||||||
|
|| (echo "✗ Update failed" && exit 1)
|
||||||
|
|
||||||
|
# --- Quality ---
|
||||||
|
test:
|
||||||
|
@echo "Running tests..."
|
||||||
|
@uv run pytest \
|
||||||
|
&& echo "✓ Tests passed" \
|
||||||
|
|| (echo "✗ Tests failed" && exit 1)
|
||||||
|
|
||||||
|
coverage:
|
||||||
|
@echo "Running tests with coverage..."
|
||||||
|
@uv run pytest --cov=. --cov-report=html --cov-report=term \
|
||||||
|
&& echo "✓ Coverage report generated" \
|
||||||
|
|| (echo "✗ Coverage failed" && exit 1)
|
||||||
|
|
||||||
|
lint:
|
||||||
|
@echo "Linting code..."
|
||||||
|
@uv run ruff check --fix . \
|
||||||
|
&& echo "✓ Linting complete" \
|
||||||
|
|| (echo "✗ Linting failed" && exit 1)
|
||||||
|
|
||||||
|
format:
|
||||||
|
@echo "Formatting code..."
|
||||||
|
@uv run ruff format . && uv run ruff check --fix . \
|
||||||
|
&& echo "✓ Code formatted" \
|
||||||
|
|| (echo "✗ Formatting failed" && exit 1)
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
@echo "$(T)🧹 Cleaning caches...$(R)"
|
@echo "Cleaning build artifacts..."
|
||||||
cd $(CORE_DIR) && rm -rf .ruff_cache __pycache__ .pytest_cache
|
@rm -rf .ruff_cache __pycache__ .pytest_cache htmlcov .coverage
|
||||||
find $(CORE_DIR) -type d -name "__pycache__" -exec rm -rf {} + 2>/dev/null || true
|
@find . -type d -name "__pycache__" -exec rm -rf {} + 2>/dev/null || true
|
||||||
find $(CORE_DIR) -type d -name ".pytest_cache" -exec rm -rf {} + 2>/dev/null || true
|
@echo "✓ Cleanup complete"
|
||||||
find $(CORE_DIR) -type f -name "*.pyc" -delete 2>/dev/null || true
|
|
||||||
@echo "✅ Caches cleaned."
|
|
||||||
|
|
||||||
coverage: check-runner
|
# --- Versioning ---
|
||||||
@echo "$(T)📊 Running tests with coverage...$(R)"
|
major minor patch: _check-main
|
||||||
$(RUNNER_RUN) pytest --cov=. --cov-report=html --cov-report=term $(ARGS)
|
@echo "Bumping $@ version..."
|
||||||
@echo "✅ Report generated in htmlcov/"
|
@uv run bump-my-version bump $@ \
|
||||||
|
&& echo "✓ Version bumped" \
|
||||||
|
|| (echo "✗ Version bump failed" && exit 1)
|
||||||
|
|
||||||
down: check-docker
|
@echo "Pushing tags..."
|
||||||
@echo "$(T)🛑 Stopping containers...$(R)"
|
@git push --tags \
|
||||||
$(COMPOSE_CMD) down
|
&& echo "✓ Tags pushed" \
|
||||||
@echo "✅ System stopped."
|
|| (echo "✗ Push failed" && exit 1)
|
||||||
|
|
||||||
format: check-runner
|
# CI/CD helpers
|
||||||
@echo "$(T)✨ Formatting with Ruff...$(R)"
|
_ci-dump-config:
|
||||||
$(RUNNER_RUN) ruff format .
|
@echo "image_name=$(IMAGE_NAME)"
|
||||||
$(RUNNER_RUN) ruff check --fix .
|
@echo "python_version=$(PYTHON_VERSION)"
|
||||||
@echo "✅ Code cleaned."
|
@echo "uv_version=$(UV_VERSION)"
|
||||||
|
@echo "service_name=$(SERVICE_NAME)"
|
||||||
|
|
||||||
help:
|
_ci-run-tests:build-test
|
||||||
@echo "$(B)Available commands:$(R)"
|
@echo "Running tests in Docker..."
|
||||||
@echo ""
|
|
||||||
@echo "$(G)Setup:$(R)"
|
|
||||||
@echo " $(T)check-docker $(R) Verify Docker is installed and running."
|
|
||||||
@echo " $(T)check-runner $(R) Verify package manager ($(RUNNER))."
|
|
||||||
@echo " $(T)init-dotenv $(R) Create .env from .env.example with generated secrets."
|
|
||||||
@echo " $(T)install $(R) Install ALL dependencies (Prod + Dev)."
|
|
||||||
@echo " $(T)install-hooks $(R) Install git pre-commit hooks."
|
|
||||||
@echo ""
|
|
||||||
@echo "$(G)Docker:$(R)"
|
|
||||||
@echo " $(T)build $(R) Build the docker image (production)."
|
|
||||||
@echo " $(T)build-test $(R) Build the docker image (with dev deps for testing)."
|
|
||||||
@echo " $(T)down $(R) Stop and remove containers."
|
|
||||||
@echo " $(T)logs $(R) Follow logs."
|
|
||||||
@echo " $(T)prune $(R) Clean Docker system."
|
|
||||||
@echo " $(T)ps $(R) Show container status."
|
|
||||||
@echo " $(T)restart $(R) Restart all containers."
|
|
||||||
@echo " $(T)shell $(R) Open shell in container."
|
|
||||||
@echo " $(T)up $(R) Start the agent."
|
|
||||||
@echo ""
|
|
||||||
@echo "$(G)Development:$(R)"
|
|
||||||
@echo " $(T)add ... $(R) Add dependency (use --group dev or --dev if needed)."
|
|
||||||
@echo " $(T)clean $(R) Clean caches."
|
|
||||||
@echo " $(T)coverage $(R) Run tests with coverage."
|
|
||||||
@echo " $(T)format $(R) Format code (Ruff)."
|
|
||||||
@echo " $(T)lint $(R) Lint code without fixing."
|
|
||||||
@echo " $(T)test ... $(R) Run tests (local with $(RUNNER))."
|
|
||||||
@echo " $(T)update $(R) Update dependencies."
|
|
||||||
@echo ""
|
|
||||||
@echo "$(G)Versioning:$(R)"
|
|
||||||
@echo " $(T)major/minor/patch $(R) Bump version."
|
|
||||||
|
|
||||||
init-dotenv:
|
|
||||||
@echo "$(T)🔑 Initializing .env file...$(R)"
|
|
||||||
@if [ -f .env ]; then \
|
|
||||||
echo "$(R)⚠️ .env already exists. Skipping.$(R)"; \
|
|
||||||
exit 0; \
|
|
||||||
fi
|
|
||||||
@if [ ! -f .env.example ]; then \
|
|
||||||
echo "$(R)❌ .env.example not found$(R)"; \
|
|
||||||
exit 1; \
|
|
||||||
fi
|
|
||||||
@if ! command -v openssl >/dev/null 2>&1; then \
|
|
||||||
echo "$(R)❌ openssl not found. Please install it first.$(R)"; \
|
|
||||||
exit 1; \
|
|
||||||
fi
|
|
||||||
@echo "$(T) → Copying .env.example...$(R)"
|
|
||||||
@cp .env.example .env
|
|
||||||
@echo "$(T) → Generating secrets...$(R)"
|
|
||||||
@sed -i.bak "s|JWT_SECRET=.*|JWT_SECRET=$$(openssl rand -base64 32)|" .env
|
|
||||||
@sed -i.bak "s|JWT_REFRESH_SECRET=.*|JWT_REFRESH_SECRET=$$(openssl rand -base64 32)|" .env
|
|
||||||
@sed -i.bak "s|CREDS_KEY=.*|CREDS_KEY=$$(openssl rand -hex 16)|" .env
|
|
||||||
@sed -i.bak "s|CREDS_IV=.*|CREDS_IV=$$(openssl rand -hex 8)|" .env
|
|
||||||
@sed -i.bak "s|MEILI_MASTER_KEY=.*|MEILI_MASTER_KEY=$$(openssl rand -base64 32)|" .env
|
|
||||||
@sed -i.bak "s|AGENT_BRAIN_API_KEY=.*|AGENT_BRAIN_API_KEY=$$(openssl rand -base64 24)|" .env
|
|
||||||
@rm -f .env.bak
|
|
||||||
@echo "$(G)✅ .env created with generated secrets!$(R)"
|
|
||||||
@echo "$(T)⚠️ Don't forget to add your API keys:$(R)"
|
|
||||||
@echo " - OPENAI_API_KEY"
|
|
||||||
@echo " - DEEPSEEK_API_KEY"
|
|
||||||
@echo " - TMDB_API_KEY (optional)"
|
|
||||||
|
|
||||||
install: check-runner
|
|
||||||
@echo "$(T)📦 Installing FULL environment ($(RUNNER))...$(R)"
|
|
||||||
$(RUNNER_INSTALL)
|
|
||||||
@echo "✅ Environment ready (Prod + Dev)."
|
|
||||||
|
|
||||||
install-hooks: check-runner
|
|
||||||
@echo "$(T)🔧 Installing hooks...$(R)"
|
|
||||||
$(RUNNER_HOOKS)
|
|
||||||
@echo "✅ Hooks ready."
|
|
||||||
|
|
||||||
lint: check-runner
|
|
||||||
@echo "$(T)🔍 Linting code...$(R)"
|
|
||||||
$(RUNNER_RUN) ruff check .
|
|
||||||
|
|
||||||
logs: check-docker
|
|
||||||
@echo "$(T)📋 Following logs...$(R)"
|
|
||||||
$(COMPOSE_CMD) logs -f
|
|
||||||
|
|
||||||
major: _check_branch
|
|
||||||
@echo "$(T)💥 Bumping major...$(R)"
|
|
||||||
SKIP=all $(BUMP_CMD) major
|
|
||||||
|
|
||||||
minor: _check_branch
|
|
||||||
@echo "$(T)✨ Bumping minor...$(R)"
|
|
||||||
SKIP=all $(BUMP_CMD) minor
|
|
||||||
|
|
||||||
patch: _check_branch
|
|
||||||
@echo "$(T)🚀 Bumping patch...$(R)"
|
|
||||||
SKIP=all $(BUMP_CMD) patch
|
|
||||||
|
|
||||||
prune: check-docker
|
|
||||||
@echo "$(T)🗑️ Pruning Docker resources...$(R)"
|
|
||||||
docker system prune -af
|
|
||||||
@echo "✅ Docker cleaned."
|
|
||||||
|
|
||||||
ps: check-docker
|
|
||||||
@echo "$(T)📋 Container status:$(R)"
|
|
||||||
@$(COMPOSE_CMD) ps
|
|
||||||
|
|
||||||
restart: check-docker
|
|
||||||
@echo "$(T)🔄 Restarting containers...$(R)"
|
|
||||||
$(COMPOSE_CMD) restart
|
|
||||||
@echo "✅ Containers restarted."
|
|
||||||
|
|
||||||
run: check-runner
|
|
||||||
$(RUNNER_RUN) $(ARGS)
|
|
||||||
|
|
||||||
shell: check-docker
|
|
||||||
@echo "$(T)🐚 Opening shell in $(SERVICE_NAME)...$(R)"
|
|
||||||
$(COMPOSE_CMD) exec $(SERVICE_NAME) /bin/sh
|
|
||||||
|
|
||||||
test: check-runner
|
|
||||||
@echo "$(T)🧪 Running tests...$(R)"
|
|
||||||
$(RUNNER_RUN) pytest $(ARGS)
|
|
||||||
|
|
||||||
up: check-docker
|
|
||||||
@echo "$(T)🚀 Starting Agent Media...$(R)"
|
|
||||||
$(COMPOSE_CMD) up -d
|
|
||||||
@echo "✅ System is up."
|
|
||||||
|
|
||||||
update: check-runner
|
|
||||||
@echo "$(T)🔄 Updating dependencies...$(R)"
|
|
||||||
$(RUNNER_UPDATE)
|
|
||||||
@echo "✅ All packages up to date."
|
|
||||||
|
|
||||||
_check_branch:
|
|
||||||
@curr=$$(git rev-parse --abbrev-ref HEAD); \
|
|
||||||
if [ "$$curr" != "main" ]; then \
|
|
||||||
echo "❌ Error: not on the main branch"; exit 1; \
|
|
||||||
fi
|
|
||||||
|
|
||||||
_ci-image-name:
|
|
||||||
@echo "IMAGE_NAME=$(IMAGE_NAME)"
|
|
||||||
|
|
||||||
_ci-run-tests: build-test
|
|
||||||
@echo "$(T)🧪 Running tests in Docker...$(R)"
|
|
||||||
docker run --rm \
|
docker run --rm \
|
||||||
-e DEEPSEEK_API_KEY \
|
-e DEEPSEEK_API_KEY \
|
||||||
-e TMDB_API_KEY \
|
-e TMDB_API_KEY \
|
||||||
|
-e QBITTORRENT_URL \
|
||||||
$(IMAGE_NAME):test pytest
|
$(IMAGE_NAME):test pytest
|
||||||
@echo "✅ Tests passed."
|
@echo "✓ Tests passed."
|
||||||
|
|
||||||
|
_check-main:
|
||||||
|
@test "$$(git rev-parse --abbrev-ref HEAD)" = "main" \
|
||||||
|
|| (echo "✗ ERROR: Not on main branch" && exit 1)
|
||||||
|
|
||||||
|
# --- Help ---
|
||||||
|
help:
|
||||||
|
@echo "Cleverly Crafted Unawareness - Management Commands"
|
||||||
|
@echo ""
|
||||||
|
@echo "Usage: make [target] [p=profile1,profile2]"
|
||||||
|
@echo ""
|
||||||
|
@echo "Setup:"
|
||||||
|
@echo " bootstrap Generate .env.alfred, .env.librechat, .env.secrets and .env.make"
|
||||||
|
@echo ""
|
||||||
|
@echo "Docker:"
|
||||||
|
@echo " up Start containers (default profile: core)"
|
||||||
|
@echo " Example: make up p=rag,meili"
|
||||||
|
@echo " down Stop all containers"
|
||||||
|
@echo " restart Restart containers (supports p=...)"
|
||||||
|
@echo " logs Follow logs (supports p=...)"
|
||||||
|
@echo " ps Status of containers"
|
||||||
|
@echo " shell Open bash in the core container"
|
||||||
|
@echo " build Build the production Docker image"
|
||||||
|
@echo ""
|
||||||
|
@echo "Dev & Quality:"
|
||||||
|
@echo " setup Bootstrap .env and security keys"
|
||||||
|
@echo " install Install dependencies via uv"
|
||||||
|
@echo " test Run pytest suite"
|
||||||
|
@echo " coverage Run tests and generate HTML report"
|
||||||
|
@echo " lint/format Quality and style checks"
|
||||||
|
@echo ""
|
||||||
|
@echo "Release:"
|
||||||
|
@echo " major|minor|patch Bump version and push tags (main branch only)"
|
||||||
|
|||||||
@@ -0,0 +1,433 @@
|
|||||||
|
# Alfred Media Organizer 🎬
|
||||||
|
|
||||||
|
An AI-powered agent for managing your local media library with natural language. Search, download, and organize movies and TV shows effortlessly through a conversational interface.
|
||||||
|
|
||||||
|
[](https://www.python.org/downloads/)
|
||||||
|
[](https://github.com/astral-sh/uv)
|
||||||
|
[](https://opensource.org/licenses/MIT)
|
||||||
|
[](https://github.com/astral-sh/ruff)
|
||||||
|
|
||||||
|
## ✨ Features
|
||||||
|
|
||||||
|
- 🤖 **Natural Language Interface** — Talk to your media library in plain language
|
||||||
|
- 🔍 **Smart Search** — Find movies and TV shows via TMDB with rich metadata
|
||||||
|
- 📥 **Torrent Integration** — Search and download via qBittorrent
|
||||||
|
- 🧠 **Contextual Memory** — Remembers your preferences and conversation history
|
||||||
|
- 📁 **Auto-Organization** — Moves and renames media files, resolves destinations, handles subtitles
|
||||||
|
- 🎞️ **Subtitle Pipeline** — Identifies, matches, and places subtitle tracks automatically
|
||||||
|
- 🔄 **Workflow Engine** — YAML-defined multi-step workflows (e.g. `organize_media`)
|
||||||
|
- 🌐 **OpenAI-Compatible API** — Works with any OpenAI-compatible client (LibreChat, OpenWebUI, etc.)
|
||||||
|
- 🔒 **Secure by Default** — Auto-generated secrets and encrypted credentials
|
||||||
|
|
||||||
|
## 🏗️ Architecture
|
||||||
|
|
||||||
|
Built with **Domain-Driven Design (DDD)** principles for clean separation of concerns:
|
||||||
|
|
||||||
|
```
|
||||||
|
alfred/
|
||||||
|
├── agent/ # AI agent orchestration
|
||||||
|
│ ├── llm/ # LLM clients (Ollama, DeepSeek)
|
||||||
|
│ ├── tools/ # Tool implementations (api, filesystem, language)
|
||||||
|
│ └── workflows/ # YAML-defined multi-step workflows
|
||||||
|
├── application/ # Use cases & DTOs
|
||||||
|
│ ├── movies/ # Movie search
|
||||||
|
│ ├── torrents/ # Torrent management
|
||||||
|
│ └── filesystem/ # File operations (move, list, subtitles, seed links)
|
||||||
|
├── domain/ # Business logic & entities
|
||||||
|
│ ├── media/ # Release parsing
|
||||||
|
│ ├── movies/ # Movie entities
|
||||||
|
│ ├── tv_shows/ # TV show entities & value objects
|
||||||
|
│ ├── subtitles/ # Subtitle scanner, services, knowledge base
|
||||||
|
│ └── shared/ # Common value objects (ImdbId, FilePath, FileSize)
|
||||||
|
└── infrastructure/ # External services & persistence
|
||||||
|
├── api/ # External API clients (TMDB, qBittorrent, Knaben)
|
||||||
|
├── filesystem/ # File manager (hard-link based, path-traversal safe)
|
||||||
|
├── persistence/ # Three-tier memory (LTM/STM/Episodic) + JSON repositories
|
||||||
|
└── subtitle/ # Subtitle infrastructure
|
||||||
|
```
|
||||||
|
|
||||||
|
### Key flows
|
||||||
|
|
||||||
|
**Agent execution:** `agent.step(user_input)` → LLM call → if tool_calls, execute each via registry → loop until no tool calls or `max_tool_iterations` → return final response.
|
||||||
|
|
||||||
|
**Media organization workflow:**
|
||||||
|
1. `resolve_destination` — Determines target folder/filename from release name
|
||||||
|
2. `move_media` — Hard-links file to library, deletes source
|
||||||
|
3. `manage_subtitles` — Scans, classifies, and places subtitle tracks
|
||||||
|
4. `create_seed_links` — Hard-links library file back to torrents/ for continued seeding
|
||||||
|
|
||||||
|
**Memory tiers:**
|
||||||
|
- **LTM** (`data/memory/ltm.json`) — Persisted config, media library, watchlist
|
||||||
|
- **STM** — Conversation history (capped at `MAX_HISTORY_MESSAGES`)
|
||||||
|
- **Episodic** — Transient search results, active downloads, recent errors
|
||||||
|
|
||||||
|
## 🚀 Quick Start
|
||||||
|
|
||||||
|
### Prerequisites
|
||||||
|
|
||||||
|
- **Python 3.14+**
|
||||||
|
- **uv** (dependency manager)
|
||||||
|
- **Docker & Docker Compose** (recommended for full stack)
|
||||||
|
- **API Keys:**
|
||||||
|
- TMDB API key ([get one here](https://www.themoviedb.org/settings/api))
|
||||||
|
- Optional: DeepSeek or other LLM provider keys
|
||||||
|
|
||||||
|
### Installation
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Clone the repository
|
||||||
|
git clone https://github.com/francwa/alfred_media_organizer.git
|
||||||
|
cd alfred_media_organizer
|
||||||
|
|
||||||
|
# Install dependencies
|
||||||
|
make install
|
||||||
|
|
||||||
|
# Install pre-commit hooks
|
||||||
|
make install-hooks
|
||||||
|
|
||||||
|
# Bootstrap environment (generates .env with secure secrets)
|
||||||
|
make bootstrap
|
||||||
|
|
||||||
|
# Validate your .env against the schema
|
||||||
|
make validate
|
||||||
|
|
||||||
|
# Edit .env with your API keys
|
||||||
|
nano .env
|
||||||
|
```
|
||||||
|
|
||||||
|
### Running with Docker (Recommended)
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Start all services (LibreChat + Alfred + MongoDB + Ollama)
|
||||||
|
make up
|
||||||
|
|
||||||
|
# Or start with specific profiles
|
||||||
|
make up p=rag,meili # Include RAG and Meilisearch
|
||||||
|
make up p=qbittorrent # Include qBittorrent
|
||||||
|
make up p=full # Everything
|
||||||
|
|
||||||
|
# View logs
|
||||||
|
make logs
|
||||||
|
|
||||||
|
# Stop all services
|
||||||
|
make down
|
||||||
|
```
|
||||||
|
|
||||||
|
The web interface will be available at **http://localhost:3080**
|
||||||
|
|
||||||
|
### Running Locally (Development)
|
||||||
|
|
||||||
|
```bash
|
||||||
|
uv run uvicorn alfred.app:app --reload --port 8000
|
||||||
|
```
|
||||||
|
|
||||||
|
## ⚙️ Configuration
|
||||||
|
|
||||||
|
### Settings system
|
||||||
|
|
||||||
|
`settings.toml` is the single source of truth. The schema flows:
|
||||||
|
|
||||||
|
```
|
||||||
|
settings.toml → settings_schema.py → settings_bootstrap.py → .env + .env.make → settings.py
|
||||||
|
```
|
||||||
|
|
||||||
|
To add a setting: define it in `settings.toml`, run `make bootstrap`, then access via `settings.my_new_setting`.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# First time setup
|
||||||
|
make bootstrap
|
||||||
|
|
||||||
|
# Validate existing .env against schema
|
||||||
|
make validate
|
||||||
|
|
||||||
|
# Re-run after settings.toml changes (existing secrets preserved)
|
||||||
|
make bootstrap
|
||||||
|
```
|
||||||
|
|
||||||
|
**Never commit `.env` or `.env.make`** — both are gitignored and auto-generated.
|
||||||
|
|
||||||
|
### Key settings (.env)
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# --- CORE ---
|
||||||
|
MAX_HISTORY_MESSAGES=10
|
||||||
|
MAX_TOOL_ITERATIONS=10
|
||||||
|
|
||||||
|
# --- LLM ---
|
||||||
|
DEFAULT_LLM_PROVIDER=local # local (Ollama) | deepseek
|
||||||
|
OLLAMA_BASE_URL=http://ollama:11434
|
||||||
|
OLLAMA_MODEL=llama3.3:latest
|
||||||
|
LLM_TEMPERATURE=0.2
|
||||||
|
|
||||||
|
# --- API KEYS ---
|
||||||
|
TMDB_API_KEY=your-tmdb-key # Required for movie/show search
|
||||||
|
DEEPSEEK_API_KEY= # Optional
|
||||||
|
|
||||||
|
# --- SECURITY (auto-generated) ---
|
||||||
|
JWT_SECRET=<auto>
|
||||||
|
CREDS_KEY=<auto>
|
||||||
|
MONGO_PASSWORD=<auto>
|
||||||
|
```
|
||||||
|
|
||||||
|
## 🐳 Docker Services
|
||||||
|
|
||||||
|
### Docker Profiles
|
||||||
|
|
||||||
|
| Profile | Extra services | Use case |
|
||||||
|
|---------|---------------|----------|
|
||||||
|
| (default) | — | LibreChat + Alfred + MongoDB + Ollama |
|
||||||
|
| `meili` | Meilisearch | Fast full-text search |
|
||||||
|
| `rag` | RAG API + VectorDB (PostgreSQL) | Document retrieval |
|
||||||
|
| `qbittorrent` | qBittorrent | Torrent downloads |
|
||||||
|
| `full` | All of the above | Complete setup |
|
||||||
|
|
||||||
|
```bash
|
||||||
|
make up # Start (default profile)
|
||||||
|
make up p=full # Start with all services
|
||||||
|
make down # Stop
|
||||||
|
make restart # Restart
|
||||||
|
make logs # Follow logs
|
||||||
|
make ps # Container status
|
||||||
|
```
|
||||||
|
|
||||||
|
## 🛠️ Available Tools
|
||||||
|
|
||||||
|
| Tool | Description |
|
||||||
|
|------|-------------|
|
||||||
|
| `find_media_imdb_id` | Search for movies/TV shows on TMDB by title |
|
||||||
|
| `find_torrent` | Search for torrents across multiple indexers |
|
||||||
|
| `get_torrent_by_index` | Get detailed info about a specific result |
|
||||||
|
| `add_torrent_by_index` | Download a torrent from search results |
|
||||||
|
| `add_torrent_to_qbittorrent` | Add a torrent via magnet link directly |
|
||||||
|
| `resolve_destination` | Compute the target library path for a release |
|
||||||
|
| `move_media` | Hard-link a file to its library destination |
|
||||||
|
| `manage_subtitles` | Scan, classify, and place subtitle tracks |
|
||||||
|
| `create_seed_links` | Prepare torrent folder so qBittorrent keeps seeding |
|
||||||
|
| `learn` | Teach Alfred a new pattern (release group, naming convention) |
|
||||||
|
| `set_path_for_folder` | Configure folder paths |
|
||||||
|
| `list_folder` | List contents of a configured folder |
|
||||||
|
| `set_language` | Set preferred language for the session |
|
||||||
|
|
||||||
|
## 💬 Usage Examples
|
||||||
|
|
||||||
|
### Via Web Interface (LibreChat)
|
||||||
|
|
||||||
|
Navigate to **http://localhost:3080** and start chatting:
|
||||||
|
|
||||||
|
```
|
||||||
|
You: Find Inception in 1080p
|
||||||
|
Alfred: I found 3 torrents for Inception (2010):
|
||||||
|
1. Inception.2010.1080p.BluRay.x264 (150 seeders) - 2.1 GB
|
||||||
|
2. Inception.2010.1080p.WEB-DL.x265 (80 seeders) - 1.8 GB
|
||||||
|
3. Inception.2010.1080p.REMUX (45 seeders) - 25 GB
|
||||||
|
|
||||||
|
You: Download the first one
|
||||||
|
Alfred: ✓ Added to qBittorrent! Download started.
|
||||||
|
|
||||||
|
You: Organize the Breaking Bad S01 download
|
||||||
|
Alfred: ✓ Resolved destination: /tv_shows/Breaking.Bad/Season 01/
|
||||||
|
✓ Moved 6 episode files
|
||||||
|
✓ Placed 6 subtitle tracks (fr, en)
|
||||||
|
✓ Seed links created in /torrents/
|
||||||
|
```
|
||||||
|
|
||||||
|
### Via API
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Health check
|
||||||
|
curl http://localhost:8000/health
|
||||||
|
|
||||||
|
# Chat (OpenAI-compatible)
|
||||||
|
curl -X POST http://localhost:8000/v1/chat/completions \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d '{
|
||||||
|
"model": "alfred",
|
||||||
|
"messages": [{"role": "user", "content": "Find The Matrix 4K"}]
|
||||||
|
}'
|
||||||
|
|
||||||
|
# List models
|
||||||
|
curl http://localhost:8000/v1/models
|
||||||
|
|
||||||
|
# View memory state
|
||||||
|
curl http://localhost:8000/memory/state
|
||||||
|
```
|
||||||
|
|
||||||
|
Alfred is compatible with any OpenAI-compatible client. Point it at `http://localhost:8000/v1`, model `alfred`.
|
||||||
|
|
||||||
|
## 🧠 Memory System
|
||||||
|
|
||||||
|
Alfred uses a three-tier memory system:
|
||||||
|
|
||||||
|
| Tier | Storage | Contents | Lifetime |
|
||||||
|
|------|---------|----------|----------|
|
||||||
|
| **LTM** | JSON file (`data/memory/ltm.json`) | Config, library, watchlist, learned patterns | Permanent |
|
||||||
|
| **STM** | RAM | Conversation history (capped) | Session |
|
||||||
|
| **Episodic** | RAM | Search results, active downloads, errors | Short-lived |
|
||||||
|
|
||||||
|
## 🧪 Development
|
||||||
|
|
||||||
|
### Running Tests
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Run full suite (parallel)
|
||||||
|
make test
|
||||||
|
|
||||||
|
# Run with coverage report
|
||||||
|
make coverage
|
||||||
|
|
||||||
|
# Run a single file
|
||||||
|
uv run pytest tests/test_agent.py -v
|
||||||
|
|
||||||
|
# Run a single class
|
||||||
|
uv run pytest tests/test_agent.py::TestAgentInit -v
|
||||||
|
|
||||||
|
# Skip slow tests
|
||||||
|
uv run pytest -m "not slow"
|
||||||
|
```
|
||||||
|
|
||||||
|
### Test coverage
|
||||||
|
|
||||||
|
The suite covers:
|
||||||
|
- **Agent loop** — tool execution, history, max iterations, error handling
|
||||||
|
- **Tool registry** — OpenAI schema format, parameter extraction
|
||||||
|
- **Prompts** — system prompt building, tool inclusion
|
||||||
|
- **Memory** — LTM/STM/Episodic operations, persistence
|
||||||
|
- **Filesystem tools** — path traversal security, folder listing
|
||||||
|
- **File manager** — hard-link, move, seed links (real filesystem, no mocks)
|
||||||
|
- **Application use cases** — `resolve_destination`, `create_seed_links`, `list_folder`, `move_media`
|
||||||
|
- **Domain** — TV show/movie entities, shared value objects (`ImdbId`, `FilePath`, `FileSize`), subtitle scanner
|
||||||
|
- **Repositories** — JSON-backed movie, TV show, subtitle repos
|
||||||
|
- **Bootstrap** — secret generation, idempotency, URI construction
|
||||||
|
- **Workflows** — YAML loading, structure validation
|
||||||
|
- **Configuration** — boundary validation for all settings
|
||||||
|
|
||||||
|
### Code Quality
|
||||||
|
|
||||||
|
```bash
|
||||||
|
make lint # Ruff check --fix
|
||||||
|
make format # Ruff format + check --fix
|
||||||
|
```
|
||||||
|
|
||||||
|
### Adding a New Tool
|
||||||
|
|
||||||
|
1. Implement the function in `alfred/agent/tools/`:
|
||||||
|
|
||||||
|
```python
|
||||||
|
# alfred/agent/tools/api.py
|
||||||
|
def my_new_tool(param: str) -> dict[str, Any]:
|
||||||
|
"""Short description shown to the LLM to decide when to call this tool."""
|
||||||
|
memory = get_memory()
|
||||||
|
# ...
|
||||||
|
return {"status": "ok", "data": result}
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Register it in `alfred/agent/registry.py`:
|
||||||
|
|
||||||
|
```python
|
||||||
|
tool_functions = [
|
||||||
|
# ... existing tools ...
|
||||||
|
api_tools.my_new_tool,
|
||||||
|
]
|
||||||
|
```
|
||||||
|
|
||||||
|
The registry auto-generates the JSON schema from the function signature and docstring.
|
||||||
|
|
||||||
|
### Adding a Workflow
|
||||||
|
|
||||||
|
Create a YAML file in `alfred/agent/workflows/`:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
name: my_workflow
|
||||||
|
description: What this workflow does
|
||||||
|
steps:
|
||||||
|
- tool: resolve_destination
|
||||||
|
description: Find where the file should go
|
||||||
|
- tool: move_media
|
||||||
|
description: Move the file
|
||||||
|
```
|
||||||
|
|
||||||
|
Workflows are loaded automatically at startup.
|
||||||
|
|
||||||
|
### Version Management
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Must be on main branch
|
||||||
|
make patch # 0.1.7 → 0.1.8
|
||||||
|
make minor # 0.1.7 → 0.2.0
|
||||||
|
make major # 0.1.7 → 1.0.0
|
||||||
|
```
|
||||||
|
|
||||||
|
## 📚 API Reference
|
||||||
|
|
||||||
|
### Endpoints
|
||||||
|
|
||||||
|
| Method | Path | Description |
|
||||||
|
|--------|------|-------------|
|
||||||
|
| `GET` | `/health` | Health check |
|
||||||
|
| `GET` | `/v1/models` | List models (OpenAI-compatible) |
|
||||||
|
| `POST` | `/v1/chat/completions` | Chat (OpenAI-compatible, streaming supported) |
|
||||||
|
| `GET` | `/memory/state` | Full memory dump (debug) |
|
||||||
|
| `POST` | `/memory/clear-session` | Clear STM + Episodic |
|
||||||
|
| `GET` | `/memory/episodic/search-results` | Current search results |
|
||||||
|
|
||||||
|
## 🔧 Troubleshooting
|
||||||
|
|
||||||
|
### Agent doesn't respond
|
||||||
|
|
||||||
|
1. Check API keys in `.env`
|
||||||
|
2. Verify the LLM is running:
|
||||||
|
```bash
|
||||||
|
docker logs alfred-ollama
|
||||||
|
docker exec alfred-ollama ollama list
|
||||||
|
```
|
||||||
|
3. Check Alfred logs: `docker logs alfred-core`
|
||||||
|
|
||||||
|
### qBittorrent connection failed
|
||||||
|
|
||||||
|
1. Verify qBittorrent is running: `docker ps | grep qbittorrent`
|
||||||
|
2. Check credentials in `.env` (`QBITTORRENT_URL`, `QBITTORRENT_USERNAME`, `QBITTORRENT_PASSWORD`)
|
||||||
|
|
||||||
|
### Memory not persisting
|
||||||
|
|
||||||
|
1. Check `data/` directory is writable
|
||||||
|
2. Verify volume mounts in `docker-compose.yaml`
|
||||||
|
|
||||||
|
### Bootstrap fails
|
||||||
|
|
||||||
|
```bash
|
||||||
|
make validate # Check what's wrong with .env
|
||||||
|
make bootstrap # Regenerate (preserves existing secrets)
|
||||||
|
```
|
||||||
|
|
||||||
|
### Tests failing
|
||||||
|
|
||||||
|
```bash
|
||||||
|
uv run pytest tests/test_failing.py -v --tb=long
|
||||||
|
```
|
||||||
|
|
||||||
|
## 🤝 Contributing
|
||||||
|
|
||||||
|
1. Fork the repository
|
||||||
|
2. Create a feature branch: `git checkout -b feat/my-feature`
|
||||||
|
3. Make your changes + add tests
|
||||||
|
4. Run `make test && make lint && make format`
|
||||||
|
5. Commit with [Conventional Commits](https://www.conventionalcommits.org/): `feat:`, `fix:`, `docs:`, `refactor:`, `test:`, `chore:`, `infra:`
|
||||||
|
6. Open a Pull Request
|
||||||
|
|
||||||
|
## 📄 License
|
||||||
|
|
||||||
|
MIT License — see [LICENSE](LICENSE) file for details.
|
||||||
|
|
||||||
|
## 🙏 Acknowledgments
|
||||||
|
|
||||||
|
- [LibreChat](https://github.com/danny-avila/LibreChat) — Chat interface
|
||||||
|
- [Ollama](https://ollama.ai/) — Local LLM runtime
|
||||||
|
- [DeepSeek](https://www.deepseek.com/) — LLM provider
|
||||||
|
- [TMDB](https://www.themoviedb.org/) — Movie & TV database
|
||||||
|
- [qBittorrent](https://www.qbittorrent.org/) — Torrent client
|
||||||
|
- [FastAPI](https://fastapi.tiangolo.com/) — Web framework
|
||||||
|
- [uv](https://github.com/astral-sh/uv) — Fast Python package manager
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
<p align="center">Made with ❤️ by <a href="https://github.com/francwa">Francwa</a></p>
|
||||||
@@ -1,6 +1,7 @@
|
|||||||
"""Agent module for media library management."""
|
"""Agent module for media library management."""
|
||||||
|
|
||||||
|
from alfred.settings import settings
|
||||||
|
|
||||||
from .agent import Agent
|
from .agent import Agent
|
||||||
from .config import settings
|
|
||||||
|
|
||||||
__all__ = ["Agent", "settings"]
|
__all__ = ["Agent", "settings"]
|
||||||
@@ -5,9 +5,9 @@ import logging
|
|||||||
from collections.abc import AsyncGenerator
|
from collections.abc import AsyncGenerator
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
from infrastructure.persistence import get_memory
|
from alfred.infrastructure.persistence import get_memory
|
||||||
|
from alfred.settings import settings
|
||||||
|
|
||||||
from .config import settings
|
|
||||||
from .prompts import PromptBuilder
|
from .prompts import PromptBuilder
|
||||||
from .registry import Tool, make_tools
|
from .registry import Tool, make_tools
|
||||||
|
|
||||||
@@ -21,17 +21,20 @@ class Agent:
|
|||||||
Uses OpenAI-compatible tool calling API.
|
Uses OpenAI-compatible tool calling API.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, llm, max_tool_iterations: int = 5):
|
def __init__(self, settings, llm, max_tool_iterations: int = 5):
|
||||||
"""
|
"""
|
||||||
Initialize the agent.
|
Initialize the agent.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
|
settings: Application settings instance
|
||||||
llm: LLM client with complete() method
|
llm: LLM client with complete() method
|
||||||
max_tool_iterations: Maximum number of tool execution iterations
|
max_tool_iterations: Maximum number of tool execution iterations
|
||||||
"""
|
"""
|
||||||
|
self.settings = settings
|
||||||
self.llm = llm
|
self.llm = llm
|
||||||
self.tools: dict[str, Tool] = make_tools()
|
self.tools: dict[str, Tool] = make_tools(settings)
|
||||||
self.prompt_builder = PromptBuilder(self.tools)
|
self.prompt_builder = PromptBuilder(self.tools)
|
||||||
|
self.settings = settings
|
||||||
self.max_tool_iterations = max_tool_iterations
|
self.max_tool_iterations = max_tool_iterations
|
||||||
|
|
||||||
def step(self, user_input: str) -> str:
|
def step(self, user_input: str) -> str:
|
||||||
@@ -78,7 +81,7 @@ class Agent:
|
|||||||
tools_spec = self.prompt_builder.build_tools_spec()
|
tools_spec = self.prompt_builder.build_tools_spec()
|
||||||
|
|
||||||
# Tool execution loop
|
# Tool execution loop
|
||||||
for _iteration in range(self.max_tool_iterations):
|
for _iteration in range(self.settings.max_tool_iterations):
|
||||||
# Call LLM with tools
|
# Call LLM with tools
|
||||||
llm_result = self.llm.complete(messages, tools=tools_spec)
|
llm_result = self.llm.complete(messages, tools=tools_spec)
|
||||||
|
|
||||||
@@ -230,7 +233,7 @@ class Agent:
|
|||||||
tools_spec = self.prompt_builder.build_tools_spec()
|
tools_spec = self.prompt_builder.build_tools_spec()
|
||||||
|
|
||||||
# Tool execution loop
|
# Tool execution loop
|
||||||
for _iteration in range(self.max_tool_iterations):
|
for _iteration in range(self.settings.max_tool_iterations):
|
||||||
# Call LLM with tools
|
# Call LLM with tools
|
||||||
llm_result = self.llm.complete(messages, tools=tools_spec)
|
llm_result = self.llm.complete(messages, tools=tools_spec)
|
||||||
|
|
||||||
@@ -6,7 +6,8 @@ from typing import Any
|
|||||||
import requests
|
import requests
|
||||||
from requests.exceptions import HTTPError, RequestException, Timeout
|
from requests.exceptions import HTTPError, RequestException, Timeout
|
||||||
|
|
||||||
from ..config import settings
|
from alfred.settings import Settings, settings
|
||||||
|
|
||||||
from .exceptions import LLMAPIError, LLMConfigurationError
|
from .exceptions import LLMAPIError, LLMConfigurationError
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
@@ -21,6 +22,7 @@ class DeepSeekClient:
|
|||||||
base_url: str | None = None,
|
base_url: str | None = None,
|
||||||
model: str | None = None,
|
model: str | None = None,
|
||||||
timeout: int | None = None,
|
timeout: int | None = None,
|
||||||
|
settings: Settings | None = None,
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Initialize DeepSeek client.
|
Initialize DeepSeek client.
|
||||||
@@ -34,10 +36,10 @@ class DeepSeekClient:
|
|||||||
Raises:
|
Raises:
|
||||||
LLMConfigurationError: If API key is missing
|
LLMConfigurationError: If API key is missing
|
||||||
"""
|
"""
|
||||||
self.api_key = api_key or settings.deepseek_api_key
|
self.api_key = api_key or self.settings.deepseek_api_key
|
||||||
self.base_url = base_url or settings.deepseek_base_url
|
self.base_url = base_url or self.settings.deepseek_base_url
|
||||||
self.model = model or settings.model
|
self.model = model or self.settings.deepseek_model
|
||||||
self.timeout = timeout or settings.request_timeout
|
self.timeout = timeout or self.settings.request_timeout
|
||||||
|
|
||||||
if not self.api_key:
|
if not self.api_key:
|
||||||
raise LLMConfigurationError(
|
raise LLMConfigurationError(
|
||||||
@@ -94,7 +96,7 @@ class DeepSeekClient:
|
|||||||
payload = {
|
payload = {
|
||||||
"model": self.model,
|
"model": self.model,
|
||||||
"messages": messages,
|
"messages": messages,
|
||||||
"temperature": settings.temperature,
|
"temperature": settings.llm_temperature,
|
||||||
}
|
}
|
||||||
|
|
||||||
# Add tools if provided
|
# Add tools if provided
|
||||||
@@ -1,13 +1,13 @@
|
|||||||
"""Ollama LLM client with robust error handling."""
|
"""Ollama LLM client with robust error handling."""
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
import os
|
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
from requests.exceptions import HTTPError, RequestException, Timeout
|
from requests.exceptions import HTTPError, RequestException, Timeout
|
||||||
|
|
||||||
from ..config import settings
|
from alfred.settings import Settings
|
||||||
|
|
||||||
from .exceptions import LLMAPIError, LLMConfigurationError
|
from .exceptions import LLMAPIError, LLMConfigurationError
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
@@ -32,6 +32,7 @@ class OllamaClient:
|
|||||||
model: str | None = None,
|
model: str | None = None,
|
||||||
timeout: int | None = None,
|
timeout: int | None = None,
|
||||||
temperature: float | None = None,
|
temperature: float | None = None,
|
||||||
|
settings: Settings | None = None,
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Initialize Ollama client.
|
Initialize Ollama client.
|
||||||
@@ -45,13 +46,11 @@ class OllamaClient:
|
|||||||
Raises:
|
Raises:
|
||||||
LLMConfigurationError: If configuration is invalid
|
LLMConfigurationError: If configuration is invalid
|
||||||
"""
|
"""
|
||||||
self.base_url = base_url or os.getenv(
|
self.base_url = base_url or settings.ollama_base_url
|
||||||
"OLLAMA_BASE_URL", "http://localhost:11434"
|
self.model = model or settings.ollama_model
|
||||||
)
|
|
||||||
self.model = model or os.getenv("OLLAMA_MODEL", "llama3.2")
|
|
||||||
self.timeout = timeout or settings.request_timeout
|
self.timeout = timeout or settings.request_timeout
|
||||||
self.temperature = (
|
self.temperature = (
|
||||||
temperature if temperature is not None else settings.temperature
|
temperature if temperature is not None else settings.llm_temperature
|
||||||
)
|
)
|
||||||
|
|
||||||
if not self.base_url:
|
if not self.base_url:
|
||||||
@@ -3,7 +3,8 @@
|
|||||||
import json
|
import json
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
from infrastructure.persistence import get_memory
|
from alfred.infrastructure.persistence import get_memory
|
||||||
|
from alfred.infrastructure.persistence.memory import MemoryRegistry
|
||||||
|
|
||||||
from .registry import Tool
|
from .registry import Tool
|
||||||
|
|
||||||
@@ -13,6 +14,7 @@ class PromptBuilder:
|
|||||||
|
|
||||||
def __init__(self, tools: dict[str, Tool]):
|
def __init__(self, tools: dict[str, Tool]):
|
||||||
self.tools = tools
|
self.tools = tools
|
||||||
|
self._memory_registry = MemoryRegistry()
|
||||||
|
|
||||||
def build_tools_spec(self) -> list[dict[str, Any]]:
|
def build_tools_spec(self) -> list[dict[str, Any]]:
|
||||||
"""Build the tool specification for the LLM API."""
|
"""Build the tool specification for the LLM API."""
|
||||||
@@ -52,7 +54,7 @@ class PromptBuilder:
|
|||||||
# Show first 5 results
|
# Show first 5 results
|
||||||
for i, result in enumerate(result_list[:5]):
|
for i, result in enumerate(result_list[:5]):
|
||||||
name = result.get("name", "Unknown")
|
name = result.get("name", "Unknown")
|
||||||
lines.append(f" {i+1}. {name}")
|
lines.append(f" {i + 1}. {name}")
|
||||||
if len(result_list) > 5:
|
if len(result_list) > 5:
|
||||||
lines.append(f" ... and {len(result_list) - 5} more")
|
lines.append(f" ... and {len(result_list) - 5} more")
|
||||||
|
|
||||||
@@ -109,11 +111,30 @@ class PromptBuilder:
|
|||||||
|
|
||||||
return "\n".join(lines)
|
return "\n".join(lines)
|
||||||
|
|
||||||
|
def _format_memory_schema(self) -> str:
|
||||||
|
"""Describe available memory components so the agent knows what to read/write and when."""
|
||||||
|
schema = self._memory_registry.schema()
|
||||||
|
tier_labels = {"ltm": "LONG-TERM (persisted)", "stm": "SHORT-TERM (session)", "episodic": "EPISODIC (volatile)"}
|
||||||
|
lines = ["MEMORY COMPONENTS:"]
|
||||||
|
|
||||||
|
for tier, components in schema.items():
|
||||||
|
if not components:
|
||||||
|
continue
|
||||||
|
lines.append(f"\n [{tier_labels.get(tier, tier.upper())}]")
|
||||||
|
for c in components:
|
||||||
|
access = c.get("access", "read")
|
||||||
|
lines.append(f" {c['name']} ({access}): {c['description']}")
|
||||||
|
for field_name, field_desc in c.get("fields", {}).items():
|
||||||
|
lines.append(f" · {field_name}: {field_desc}")
|
||||||
|
|
||||||
|
return "\n".join(lines)
|
||||||
|
|
||||||
def _format_config_context(self, memory) -> str:
|
def _format_config_context(self, memory) -> str:
|
||||||
"""Format configuration context."""
|
"""Format configuration context."""
|
||||||
lines = ["CURRENT CONFIGURATION:"]
|
lines = ["CURRENT CONFIGURATION:"]
|
||||||
if memory.ltm.config:
|
folders = {**memory.ltm.workspace.as_dict(), **memory.ltm.library_paths.to_dict()}
|
||||||
for key, value in memory.ltm.config.items():
|
if folders:
|
||||||
|
for key, value in folders.items():
|
||||||
lines.append(f" - {key}: {value}")
|
lines.append(f" - {key}: {value}")
|
||||||
else:
|
else:
|
||||||
lines.append(" (no configuration set)")
|
lines.append(" (no configuration set)")
|
||||||
@@ -138,6 +159,9 @@ class PromptBuilder:
|
|||||||
tools_desc = self._format_tools_description()
|
tools_desc = self._format_tools_description()
|
||||||
tools_section = f"\nAVAILABLE TOOLS:\n{tools_desc}" if tools_desc else ""
|
tools_section = f"\nAVAILABLE TOOLS:\n{tools_desc}" if tools_desc else ""
|
||||||
|
|
||||||
|
# Memory schema
|
||||||
|
memory_schema = self._format_memory_schema()
|
||||||
|
|
||||||
# Configuration
|
# Configuration
|
||||||
config_section = self._format_config_context(memory)
|
config_section = self._format_config_context(memory)
|
||||||
if config_section:
|
if config_section:
|
||||||
@@ -172,6 +196,8 @@ EXAMPLES:
|
|||||||
|
|
||||||
{language_instruction}
|
{language_instruction}
|
||||||
{tools_section}
|
{tools_section}
|
||||||
|
|
||||||
|
{memory_schema}
|
||||||
{config_section}
|
{config_section}
|
||||||
{stm_context}
|
{stm_context}
|
||||||
{episodic_context}
|
{episodic_context}
|
||||||
@@ -78,10 +78,13 @@ def _create_tool_from_function(func: Callable) -> Tool:
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def make_tools() -> dict[str, Tool]:
|
def make_tools(settings) -> dict[str, Tool]:
|
||||||
"""
|
"""
|
||||||
Create and register all available tools.
|
Create and register all available tools.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
settings: Application settings instance
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Dictionary mapping tool names to Tool objects
|
Dictionary mapping tool names to Tool objects
|
||||||
"""
|
"""
|
||||||
@@ -94,6 +97,11 @@ def make_tools() -> dict[str, Tool]:
|
|||||||
tool_functions = [
|
tool_functions = [
|
||||||
fs_tools.set_path_for_folder,
|
fs_tools.set_path_for_folder,
|
||||||
fs_tools.list_folder,
|
fs_tools.list_folder,
|
||||||
|
fs_tools.resolve_destination,
|
||||||
|
fs_tools.move_media,
|
||||||
|
fs_tools.manage_subtitles,
|
||||||
|
fs_tools.create_seed_links,
|
||||||
|
fs_tools.learn,
|
||||||
api_tools.find_media_imdb_id,
|
api_tools.find_media_imdb_id,
|
||||||
api_tools.find_torrent,
|
api_tools.find_torrent,
|
||||||
api_tools.add_torrent_by_index,
|
api_tools.add_torrent_by_index,
|
||||||
@@ -3,12 +3,12 @@
|
|||||||
import logging
|
import logging
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
from application.movies import SearchMovieUseCase
|
from alfred.application.movies import SearchMovieUseCase
|
||||||
from application.torrents import AddTorrentUseCase, SearchTorrentsUseCase
|
from alfred.application.torrents import AddTorrentUseCase, SearchTorrentsUseCase
|
||||||
from infrastructure.api.knaben import knaben_client
|
from alfred.infrastructure.api.knaben import knaben_client
|
||||||
from infrastructure.api.qbittorrent import qbittorrent_client
|
from alfred.infrastructure.api.qbittorrent import qbittorrent_client
|
||||||
from infrastructure.api.tmdb import tmdb_client
|
from alfred.infrastructure.api.tmdb import tmdb_client
|
||||||
from infrastructure.persistence import get_memory
|
from alfred.infrastructure.persistence import get_memory
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
@@ -0,0 +1,230 @@
|
|||||||
|
"""Filesystem tools for folder management."""
|
||||||
|
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
import alfred as _alfred_pkg
|
||||||
|
import yaml
|
||||||
|
|
||||||
|
from alfred.application.filesystem import (
|
||||||
|
CreateSeedLinksUseCase,
|
||||||
|
ListFolderUseCase,
|
||||||
|
ManageSubtitlesUseCase,
|
||||||
|
MoveMediaUseCase,
|
||||||
|
ResolveDestinationUseCase,
|
||||||
|
SetFolderPathUseCase,
|
||||||
|
)
|
||||||
|
from alfred.infrastructure.filesystem import FileManager
|
||||||
|
|
||||||
|
_LEARNED_ROOT = Path(_alfred_pkg.__file__).parent.parent / "data" / "knowledge"
|
||||||
|
|
||||||
|
|
||||||
|
def move_media(source: str, destination: str) -> dict[str, Any]:
|
||||||
|
"""
|
||||||
|
Move a media file to a destination path.
|
||||||
|
|
||||||
|
Copies the file safely first (with integrity check), then deletes the source.
|
||||||
|
Use this to organise a downloaded file into the media library.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
source: Absolute path to the source file.
|
||||||
|
destination: Absolute path to the destination file (must not already exist).
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dict with status, source, destination, filename, and size — or error details.
|
||||||
|
"""
|
||||||
|
file_manager = FileManager()
|
||||||
|
use_case = MoveMediaUseCase(file_manager)
|
||||||
|
return use_case.execute(source, destination).to_dict()
|
||||||
|
|
||||||
|
|
||||||
|
def resolve_destination(
|
||||||
|
release_name: str,
|
||||||
|
source_file: str,
|
||||||
|
tmdb_title: str,
|
||||||
|
tmdb_year: int,
|
||||||
|
tmdb_episode_title: str | None = None,
|
||||||
|
confirmed_folder: str | None = None,
|
||||||
|
) -> dict[str, Any]:
|
||||||
|
"""
|
||||||
|
Compute the destination path in the media library for a release.
|
||||||
|
|
||||||
|
Call this before move_media to get the correct library path. Handles:
|
||||||
|
- Parsing the release name (quality, codec, group, season/episode)
|
||||||
|
- Looking up any existing series folder in the library
|
||||||
|
- Applying group-conflict rules (asks user if ambiguous)
|
||||||
|
- Building the full destination path with correct naming conventions
|
||||||
|
|
||||||
|
Args:
|
||||||
|
release_name: Raw release folder or file name
|
||||||
|
(e.g. "Oz.S03.1080p.WEBRip.x265-KONTRAST").
|
||||||
|
source_file: Absolute path to the source video file (used for extension).
|
||||||
|
tmdb_title: Canonical show/movie title from TMDB (e.g. "Oz").
|
||||||
|
tmdb_year: Release/start year from TMDB (e.g. 1997).
|
||||||
|
tmdb_episode_title: Episode title from TMDB for single-episode releases
|
||||||
|
(e.g. "The Routine"). Omit for season packs and movies.
|
||||||
|
confirmed_folder: If a previous call returned needs_clarification, pass
|
||||||
|
the user-chosen folder name here to proceed.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
On success: dict with status, library_file, series_folder, season_folder,
|
||||||
|
series_folder_name, season_folder_name, filename,
|
||||||
|
is_new_series_folder.
|
||||||
|
On ambiguity: dict with status="needs_clarification", question, options.
|
||||||
|
On error: dict with status="error", error, message.
|
||||||
|
"""
|
||||||
|
use_case = ResolveDestinationUseCase()
|
||||||
|
return use_case.execute(
|
||||||
|
release_name=release_name,
|
||||||
|
source_file=source_file,
|
||||||
|
tmdb_title=tmdb_title,
|
||||||
|
tmdb_year=tmdb_year,
|
||||||
|
tmdb_episode_title=tmdb_episode_title,
|
||||||
|
confirmed_folder=confirmed_folder,
|
||||||
|
).to_dict()
|
||||||
|
|
||||||
|
|
||||||
|
def create_seed_links(library_file: str, original_download_folder: str) -> dict[str, Any]:
|
||||||
|
"""
|
||||||
|
Prepare a torrent subfolder so qBittorrent can keep seeding after a move.
|
||||||
|
|
||||||
|
Hard-links the video file from the library into torrents/<original_folder_name>/,
|
||||||
|
then copies all remaining files from the original download folder (subtitles,
|
||||||
|
.nfo, .jpg, .txt, …) so the torrent data is complete.
|
||||||
|
|
||||||
|
Call this after move_media when the user wants to keep seeding.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
library_file: Absolute path to the video file now in the library.
|
||||||
|
original_download_folder: Absolute path to the original download folder
|
||||||
|
(may still contain subs, nfo, and other release files).
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dict with status, torrent_subfolder, linked_file, copied_files,
|
||||||
|
copied_count, skipped — or error details.
|
||||||
|
"""
|
||||||
|
file_manager = FileManager()
|
||||||
|
use_case = CreateSeedLinksUseCase(file_manager)
|
||||||
|
return use_case.execute(library_file, original_download_folder).to_dict()
|
||||||
|
|
||||||
|
|
||||||
|
def manage_subtitles(source_video: str, destination_video: str) -> dict[str, Any]:
|
||||||
|
"""
|
||||||
|
Place subtitle files alongside an organised video file.
|
||||||
|
|
||||||
|
Scans for subtitle files (.srt, .ass, .ssa, .vtt, .sub) next to the source
|
||||||
|
video, filters them according to the user's SubtitlePreferences (languages,
|
||||||
|
min size, SDH, forced), and hard-links the passing files next to the
|
||||||
|
destination video with the correct naming convention:
|
||||||
|
fr.srt / fr.sdh.srt / fr.forced.srt / en.srt …
|
||||||
|
|
||||||
|
Call this right after move_media or copy_media, passing the same source and
|
||||||
|
destination paths. If no subtitles are found, returns ok with placed_count=0.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
source_video: Absolute path to the original video file (in the download folder).
|
||||||
|
destination_video: Absolute path to the placed video file (in the library).
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dict with status, placed list (source, destination, filename), placed_count,
|
||||||
|
skipped_count — or error details.
|
||||||
|
"""
|
||||||
|
file_manager = FileManager()
|
||||||
|
use_case = ManageSubtitlesUseCase(file_manager)
|
||||||
|
return use_case.execute(source_video, destination_video).to_dict()
|
||||||
|
|
||||||
|
|
||||||
|
def learn(pack: str, category: str, key: str, values: list[str]) -> dict[str, Any]:
|
||||||
|
"""
|
||||||
|
Teach Alfred a new token mapping and persist it to the learned knowledge pack.
|
||||||
|
|
||||||
|
Use this when a subtitle file contains an unrecognised token — after confirming
|
||||||
|
with the user what the token means, call learn() to persist it so Alfred
|
||||||
|
recognises it in future scans.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
pack: Knowledge pack name. Currently only "subtitles" is supported.
|
||||||
|
category: Category within the pack: "languages", "types", or "formats".
|
||||||
|
key: The entry key — e.g. ISO 639-1 language code ("es"), type id ("sdh").
|
||||||
|
values: List of tokens to add — e.g. ["spanish", "espanol", "spa"].
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dict with status, added_count, and the updated token list.
|
||||||
|
"""
|
||||||
|
_VALID_PACKS = {"subtitles"}
|
||||||
|
_VALID_CATEGORIES = {"languages", "types", "formats"}
|
||||||
|
|
||||||
|
if pack not in _VALID_PACKS:
|
||||||
|
return {"status": "error", "error": "unknown_pack", "message": f"Unknown pack '{pack}'. Valid: {sorted(_VALID_PACKS)}"}
|
||||||
|
|
||||||
|
if category not in _VALID_CATEGORIES:
|
||||||
|
return {"status": "error", "error": "unknown_category", "message": f"Unknown category '{category}'. Valid: {sorted(_VALID_CATEGORIES)}"}
|
||||||
|
|
||||||
|
learned_path = _LEARNED_ROOT / "subtitles_learned.yaml"
|
||||||
|
_LEARNED_ROOT.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
data: dict = {}
|
||||||
|
if learned_path.exists():
|
||||||
|
try:
|
||||||
|
with open(learned_path, encoding="utf-8") as f:
|
||||||
|
data = yaml.safe_load(f) or {}
|
||||||
|
except Exception as e:
|
||||||
|
return {"status": "error", "error": "read_failed", "message": str(e)}
|
||||||
|
|
||||||
|
cat_data = data.setdefault(category, {})
|
||||||
|
entry = cat_data.setdefault(key, {"tokens": []})
|
||||||
|
existing = entry.get("tokens", [])
|
||||||
|
new_tokens = [v for v in values if v not in existing]
|
||||||
|
entry["tokens"] = existing + new_tokens
|
||||||
|
|
||||||
|
tmp = learned_path.with_suffix(".yaml.tmp")
|
||||||
|
try:
|
||||||
|
with open(tmp, "w", encoding="utf-8") as f:
|
||||||
|
yaml.safe_dump(data, f, allow_unicode=True, default_flow_style=False, sort_keys=False)
|
||||||
|
tmp.rename(learned_path)
|
||||||
|
except Exception as e:
|
||||||
|
tmp.unlink(missing_ok=True)
|
||||||
|
return {"status": "error", "error": "write_failed", "message": str(e)}
|
||||||
|
|
||||||
|
return {
|
||||||
|
"status": "ok",
|
||||||
|
"pack": pack,
|
||||||
|
"category": category,
|
||||||
|
"key": key,
|
||||||
|
"added_count": len(new_tokens),
|
||||||
|
"tokens": entry["tokens"],
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def set_path_for_folder(folder_name: str, path_value: str) -> dict[str, Any]:
|
||||||
|
"""
|
||||||
|
Set a folder path in the configuration.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
folder_name: Name of folder to set (download, tvshow, movie, torrent).
|
||||||
|
path_value: Absolute path to the folder.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dict with status or error information.
|
||||||
|
"""
|
||||||
|
file_manager = FileManager()
|
||||||
|
use_case = SetFolderPathUseCase(file_manager)
|
||||||
|
response = use_case.execute(folder_name, path_value)
|
||||||
|
return response.to_dict()
|
||||||
|
|
||||||
|
|
||||||
|
def list_folder(folder_type: str, path: str = ".") -> dict[str, Any]:
|
||||||
|
"""
|
||||||
|
List contents of a configured folder.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
folder_type: Type of folder to list (download, tvshow, movie, torrent).
|
||||||
|
path: Relative path within the folder (default: root).
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dict with folder contents or error information.
|
||||||
|
"""
|
||||||
|
file_manager = FileManager()
|
||||||
|
use_case = ListFolderUseCase(file_manager)
|
||||||
|
response = use_case.execute(folder_type, path)
|
||||||
|
return response.to_dict()
|
||||||
@@ -3,7 +3,7 @@
|
|||||||
import logging
|
import logging
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
from infrastructure.persistence import get_memory
|
from alfred.infrastructure.persistence import get_memory
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
@@ -0,0 +1,3 @@
|
|||||||
|
from .loader import WorkflowLoader
|
||||||
|
|
||||||
|
__all__ = ["WorkflowLoader"]
|
||||||
@@ -0,0 +1,52 @@
|
|||||||
|
"""WorkflowLoader — autodiscovers and loads workflow YAML files.
|
||||||
|
|
||||||
|
Scans the workflows/ directory for all .yaml files and exposes them
|
||||||
|
as dicts. No manual registration needed — drop a new .yaml file and
|
||||||
|
it will be picked up automatically.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import yaml
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
_WORKFLOWS_DIR = Path(__file__).parent
|
||||||
|
|
||||||
|
|
||||||
|
class WorkflowLoader:
|
||||||
|
"""
|
||||||
|
Loads all workflow definitions from the workflows/ directory.
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
loader = WorkflowLoader()
|
||||||
|
all_workflows = loader.all()
|
||||||
|
workflow = loader.get("organize_media")
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self._workflows: dict[str, dict] = {}
|
||||||
|
self._load()
|
||||||
|
|
||||||
|
def _load(self) -> None:
|
||||||
|
for path in sorted(_WORKFLOWS_DIR.glob("*.yaml")):
|
||||||
|
try:
|
||||||
|
data = yaml.safe_load(path.read_text(encoding="utf-8"))
|
||||||
|
name = data.get("name") or path.stem
|
||||||
|
self._workflows[name] = data
|
||||||
|
logger.info(f"WorkflowLoader: Loaded '{name}' from {path.name}")
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"WorkflowLoader: Could not load {path.name}: {e}")
|
||||||
|
|
||||||
|
def all(self) -> dict[str, dict]:
|
||||||
|
"""Return all loaded workflows keyed by name."""
|
||||||
|
return self._workflows
|
||||||
|
|
||||||
|
def get(self, name: str) -> dict | None:
|
||||||
|
"""Return a specific workflow by name, or None if not found."""
|
||||||
|
return self._workflows.get(name)
|
||||||
|
|
||||||
|
def names(self) -> list[str]:
|
||||||
|
"""Return all available workflow names."""
|
||||||
|
return list(self._workflows.keys())
|
||||||
@@ -0,0 +1,69 @@
|
|||||||
|
name: manage_subtitles
|
||||||
|
description: >
|
||||||
|
Place subtitle files alongside a video that has just been organised into the library.
|
||||||
|
Detects the release pattern automatically, identifies and classifies all tracks,
|
||||||
|
filters by user rules, and hard-links matching files to the destination.
|
||||||
|
If any tracks are unrecognised, asks the user and optionally teaches Alfred.
|
||||||
|
|
||||||
|
trigger:
|
||||||
|
examples:
|
||||||
|
- "handle subtitles for The X-Files S01E01"
|
||||||
|
- "place the subs next to the file"
|
||||||
|
- "subtitles are in the Subs/ folder"
|
||||||
|
- "add subtitles"
|
||||||
|
|
||||||
|
tools:
|
||||||
|
- manage_subtitles
|
||||||
|
- learn
|
||||||
|
|
||||||
|
memory:
|
||||||
|
SubtitlePreferences: read
|
||||||
|
Workflow: read-write
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- id: place_subtitles
|
||||||
|
tool: manage_subtitles
|
||||||
|
description: >
|
||||||
|
Detect release pattern, identify and classify all subtitle tracks,
|
||||||
|
filter by rules, hard-link matching files next to the destination video.
|
||||||
|
Reads SubtitlePreferences from LTM for language/type/format filtering.
|
||||||
|
params:
|
||||||
|
source_video: "{source_video}"
|
||||||
|
destination_video: "{destination_video}"
|
||||||
|
imdb_id: "{imdb_id}"
|
||||||
|
media_type: "{media_type}"
|
||||||
|
release_group: "{release_group}"
|
||||||
|
season: "{season}"
|
||||||
|
episode: "{episode}"
|
||||||
|
on_result:
|
||||||
|
ok_placed_zero: skip # no subtitles found — not an error
|
||||||
|
needs_clarification: ask_user # unrecognised tokens found
|
||||||
|
|
||||||
|
- id: ask_user
|
||||||
|
description: >
|
||||||
|
Some tracks could not be classified. Show the user the unresolved tokens
|
||||||
|
and ask if they want to teach Alfred what they mean.
|
||||||
|
If yes → go to learn_tokens. If no → end workflow.
|
||||||
|
ask_user:
|
||||||
|
question: >
|
||||||
|
I could not identify some tokens in the subtitle files: {unresolved}.
|
||||||
|
Do you want to teach me what they mean?
|
||||||
|
answers:
|
||||||
|
yes: { next_step: learn_tokens }
|
||||||
|
no: { next_step: end }
|
||||||
|
|
||||||
|
- id: learn_tokens
|
||||||
|
tool: learn
|
||||||
|
description: >
|
||||||
|
Persist a new token mapping to the learned knowledge pack so Alfred
|
||||||
|
recognises it in future scans without asking again.
|
||||||
|
params:
|
||||||
|
pack: "subtitles"
|
||||||
|
category: "{token_category}" # "languages" or "types"
|
||||||
|
key: "{token_key}" # e.g. "es", "de"
|
||||||
|
values: "{token_values}" # e.g. ["spanish", "espanol"]
|
||||||
|
|
||||||
|
subtitle_naming:
|
||||||
|
standard: "{lang}.{ext}"
|
||||||
|
sdh: "{lang}.sdh.{ext}"
|
||||||
|
forced: "{lang}.forced.{ext}"
|
||||||
@@ -0,0 +1,82 @@
|
|||||||
|
name: organize_media
|
||||||
|
description: >
|
||||||
|
Organise a downloaded series or movie into the media library.
|
||||||
|
Triggered when the user asks to move/organize a specific title.
|
||||||
|
Always moves the video file. Optionally creates seed links in the
|
||||||
|
torrents folder so qBittorrent can keep seeding.
|
||||||
|
|
||||||
|
trigger:
|
||||||
|
examples:
|
||||||
|
- "organize Breaking Bad"
|
||||||
|
- "organise Severance season 2"
|
||||||
|
- "move Inception to my library"
|
||||||
|
- "organize Breaking Bad season 1, keep seeding"
|
||||||
|
|
||||||
|
tools:
|
||||||
|
- list_folder
|
||||||
|
- find_media_imdb_id
|
||||||
|
- resolve_destination
|
||||||
|
- move_media
|
||||||
|
- manage_subtitles
|
||||||
|
- create_seed_links
|
||||||
|
|
||||||
|
memory:
|
||||||
|
WorkspacePaths: read
|
||||||
|
LibraryPaths: read
|
||||||
|
Library: read-write
|
||||||
|
Workflow: read-write
|
||||||
|
Entities: read-write
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- id: list_downloads
|
||||||
|
tool: list_folder
|
||||||
|
description: List the download folder to find the target files.
|
||||||
|
params:
|
||||||
|
folder_type: download
|
||||||
|
|
||||||
|
- id: identify_media
|
||||||
|
tool: find_media_imdb_id
|
||||||
|
description: Confirm title, type (series/movie), and metadata via TMDB.
|
||||||
|
|
||||||
|
- id: resolve_destination
|
||||||
|
tool: resolve_destination
|
||||||
|
description: >
|
||||||
|
Compute the correct destination path in the library.
|
||||||
|
Uses the release name + TMDB metadata to build folder and file names.
|
||||||
|
If multiple series folders exist for this title, returns
|
||||||
|
needs_clarification and the user must pick one (re-call with confirmed_folder).
|
||||||
|
|
||||||
|
- id: move_file
|
||||||
|
tool: move_media
|
||||||
|
description: >
|
||||||
|
Move the video file to library_file returned by resolve_destination.
|
||||||
|
|
||||||
|
- id: handle_subtitles
|
||||||
|
tool: manage_subtitles
|
||||||
|
description: >
|
||||||
|
Place subtitle files alongside the video in the library.
|
||||||
|
Pass the original source path and the new library destination path.
|
||||||
|
on_missing: skip
|
||||||
|
|
||||||
|
- id: ask_seeding
|
||||||
|
ask_user:
|
||||||
|
question: "Do you want to keep seeding this torrent?"
|
||||||
|
answers:
|
||||||
|
"yes": { next_step: create_seed_links }
|
||||||
|
"no": { next_step: update_library }
|
||||||
|
|
||||||
|
- id: create_seed_links
|
||||||
|
tool: create_seed_links
|
||||||
|
description: >
|
||||||
|
Hard-link the library video file back into torrents/<original_folder>/
|
||||||
|
and copy all remaining files from the original download folder
|
||||||
|
(subs, nfo, jpg, …) so the torrent stays complete for seeding.
|
||||||
|
|
||||||
|
- id: update_library
|
||||||
|
memory_write: Library
|
||||||
|
description: Add the entry to the LTM library after a successful move.
|
||||||
|
|
||||||
|
naming_convention:
|
||||||
|
# Resolved by domain entities (Movie, Episode) — not hardcoded here
|
||||||
|
tv_show: "{title}/Season {season:02d}/{title}.S{season:02d}E{episode:02d}.{ext}"
|
||||||
|
movie: "{title} ({year})/{title}.{year}.{ext}"
|
||||||
@@ -2,22 +2,21 @@
|
|||||||
|
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
import os
|
|
||||||
import time
|
import time
|
||||||
import uuid
|
import uuid
|
||||||
|
from pathlib import Path
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
from fastapi import FastAPI, HTTPException
|
from fastapi import FastAPI, HTTPException
|
||||||
from fastapi.responses import JSONResponse, StreamingResponse
|
from fastapi.responses import JSONResponse, StreamingResponse
|
||||||
from fastapi.staticfiles import StaticFiles
|
|
||||||
from pydantic import BaseModel, Field, validator
|
from pydantic import BaseModel, Field, validator
|
||||||
|
|
||||||
from agent.agent import Agent
|
from alfred.agent.agent import Agent
|
||||||
from agent.config import settings
|
from alfred.agent.llm.deepseek import DeepSeekClient
|
||||||
from agent.llm.deepseek import DeepSeekClient
|
from alfred.agent.llm.exceptions import LLMAPIError, LLMConfigurationError
|
||||||
from agent.llm.exceptions import LLMAPIError, LLMConfigurationError
|
from alfred.agent.llm.ollama import OllamaClient
|
||||||
from agent.llm.ollama import OllamaClient
|
from alfred.infrastructure.persistence import get_memory, init_memory
|
||||||
from infrastructure.persistence import get_memory, init_memory
|
from alfred.settings import settings
|
||||||
|
|
||||||
logging.basicConfig(
|
logging.basicConfig(
|
||||||
level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
|
level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
|
||||||
@@ -30,38 +29,33 @@ app = FastAPI(
|
|||||||
version="0.2.0",
|
version="0.2.0",
|
||||||
)
|
)
|
||||||
|
|
||||||
# TODO: Make a variable
|
memory_path = Path(settings.data_storage_dir) / "memory"
|
||||||
manifests = "manifests"
|
init_memory(storage_dir=str(memory_path))
|
||||||
# Sécurité : on vérifie que le dossier existe pour ne pas faire planter l'app au démarrage
|
logger.info(f"Memory context initialized (path: {memory_path})")
|
||||||
if os.path.exists(manifests):
|
|
||||||
app.mount("/manifests", StaticFiles(directory=manifests), name="manifests")
|
|
||||||
else:
|
|
||||||
print(
|
|
||||||
f"⚠️ ATTENTION : Le dossier '{manifests}' est introuvable. Le plugin ne marchera pas."
|
|
||||||
)
|
|
||||||
|
|
||||||
# Initialize memory context at startup
|
|
||||||
# Use /data/memory in Docker, fallback to memory_data for local dev
|
|
||||||
storage_dir = os.getenv("MEMORY_STORAGE_DIR", "memory_data")
|
|
||||||
init_memory(storage_dir=storage_dir)
|
|
||||||
logger.info(f"Memory context initialized (storage: {storage_dir})")
|
|
||||||
|
|
||||||
# Initialize LLM based on environment variable
|
# Initialize LLM based on environment variable
|
||||||
llm_provider = os.getenv("LLM_PROVIDER", "deepseek").lower()
|
llm_provider = settings.default_llm_provider.lower()
|
||||||
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
if llm_provider == "ollama":
|
if llm_provider == "local":
|
||||||
logger.info("Using Ollama LLM")
|
logger.info("Using local Ollama LLM")
|
||||||
llm = OllamaClient()
|
llm = OllamaClient(settings=settings)
|
||||||
else:
|
elif llm_provider == "deepseek":
|
||||||
logger.info("Using DeepSeek LLM")
|
logger.info("Using DeepSeek LLM")
|
||||||
llm = DeepSeekClient()
|
llm = DeepSeekClient()
|
||||||
|
elif llm_provider == "claude":
|
||||||
|
raise ValueError(f"LLM provider not fully implemented: {llm_provider}")
|
||||||
|
else:
|
||||||
|
raise ValueError(f"Unknown LLM provider: {llm_provider}")
|
||||||
except LLMConfigurationError as e:
|
except LLMConfigurationError as e:
|
||||||
logger.error(f"Failed to initialize LLM: {e}")
|
logger.error(f"Failed to initialize LLM: {e}")
|
||||||
raise
|
raise
|
||||||
|
|
||||||
# Initialize agent
|
# Initialize agent
|
||||||
agent = Agent(llm=llm, max_tool_iterations=settings.max_tool_iterations)
|
agent = Agent(
|
||||||
|
settings=settings, llm=llm, max_tool_iterations=settings.max_tool_iterations
|
||||||
|
)
|
||||||
logger.info("Agent Media API initialized")
|
logger.info("Agent Media API initialized")
|
||||||
|
|
||||||
|
|
||||||
@@ -116,7 +110,7 @@ def extract_last_user_content(messages: list[dict[str, Any]]) -> str:
|
|||||||
@app.get("/health")
|
@app.get("/health")
|
||||||
async def health_check():
|
async def health_check():
|
||||||
"""Health check endpoint."""
|
"""Health check endpoint."""
|
||||||
return {"status": "healthy", "version": "0.2.0"}
|
return {"status": "healthy", "version": f"v{settings.alfred_version}"}
|
||||||
|
|
||||||
|
|
||||||
@app.get("/v1/models")
|
@app.get("/v1/models")
|
||||||
@@ -0,0 +1,32 @@
|
|||||||
|
"""Filesystem use cases."""
|
||||||
|
|
||||||
|
from .create_seed_links import CreateSeedLinksUseCase
|
||||||
|
from .dto import (
|
||||||
|
CreateSeedLinksResponse,
|
||||||
|
ListFolderResponse,
|
||||||
|
ManageSubtitlesResponse,
|
||||||
|
MoveMediaResponse,
|
||||||
|
PlacedSubtitle,
|
||||||
|
SetFolderPathResponse,
|
||||||
|
)
|
||||||
|
from .list_folder import ListFolderUseCase
|
||||||
|
from .manage_subtitles import ManageSubtitlesUseCase
|
||||||
|
from .move_media import MoveMediaUseCase
|
||||||
|
from .resolve_destination import ResolveDestinationUseCase, ResolvedDestination
|
||||||
|
from .set_folder_path import SetFolderPathUseCase
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
"SetFolderPathUseCase",
|
||||||
|
"ListFolderUseCase",
|
||||||
|
"CreateSeedLinksUseCase",
|
||||||
|
"MoveMediaUseCase",
|
||||||
|
"ManageSubtitlesUseCase",
|
||||||
|
"ResolveDestinationUseCase",
|
||||||
|
"ResolvedDestination",
|
||||||
|
"SetFolderPathResponse",
|
||||||
|
"ListFolderResponse",
|
||||||
|
"CreateSeedLinksResponse",
|
||||||
|
"MoveMediaResponse",
|
||||||
|
"ManageSubtitlesResponse",
|
||||||
|
"PlacedSubtitle",
|
||||||
|
]
|
||||||
@@ -0,0 +1,54 @@
|
|||||||
|
"""CreateSeedLinksUseCase — prepares a torrent folder for continued seeding."""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
|
||||||
|
from alfred.infrastructure.filesystem import FileManager
|
||||||
|
from alfred.infrastructure.persistence import get_memory
|
||||||
|
|
||||||
|
from .dto import CreateSeedLinksResponse
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class CreateSeedLinksUseCase:
|
||||||
|
"""
|
||||||
|
Prepares a torrent subfolder so qBittorrent can keep seeding after a move.
|
||||||
|
|
||||||
|
Hard-links the video file from the library back into torrents/<original_folder>/,
|
||||||
|
then copies all remaining files from the original download folder (subs, nfo, …).
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, file_manager: FileManager):
|
||||||
|
self.file_manager = file_manager
|
||||||
|
|
||||||
|
def execute(
|
||||||
|
self, library_file: str, original_download_folder: str
|
||||||
|
) -> CreateSeedLinksResponse:
|
||||||
|
memory = get_memory()
|
||||||
|
torrent_folder = memory.ltm.workspace.torrent
|
||||||
|
|
||||||
|
if not torrent_folder:
|
||||||
|
return CreateSeedLinksResponse(
|
||||||
|
status="error",
|
||||||
|
error="torrent_folder_not_set",
|
||||||
|
message="Torrent folder is not configured. Use set_path_for_folder to set it.",
|
||||||
|
)
|
||||||
|
|
||||||
|
result = self.file_manager.create_seed_links(
|
||||||
|
library_file, original_download_folder, torrent_folder
|
||||||
|
)
|
||||||
|
|
||||||
|
if result.get("status") == "ok":
|
||||||
|
return CreateSeedLinksResponse(
|
||||||
|
status="ok",
|
||||||
|
torrent_subfolder=result.get("torrent_subfolder"),
|
||||||
|
linked_file=result.get("linked_file"),
|
||||||
|
copied_files=result.get("copied_files"),
|
||||||
|
copied_count=result.get("copied_count", 0),
|
||||||
|
skipped=result.get("skipped"),
|
||||||
|
)
|
||||||
|
return CreateSeedLinksResponse(
|
||||||
|
status="error",
|
||||||
|
error=result.get("error"),
|
||||||
|
message=result.get("message"),
|
||||||
|
)
|
||||||
@@ -0,0 +1,209 @@
|
|||||||
|
"""Filesystem application DTOs."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class CopyMediaResponse:
|
||||||
|
"""Response from copying a media file."""
|
||||||
|
|
||||||
|
status: str
|
||||||
|
source: str | None = None
|
||||||
|
destination: str | None = None
|
||||||
|
filename: str | None = None
|
||||||
|
size: int | None = None
|
||||||
|
error: str | None = None
|
||||||
|
message: str | None = None
|
||||||
|
|
||||||
|
def to_dict(self) -> dict:
|
||||||
|
if self.error:
|
||||||
|
return {"status": self.status, "error": self.error, "message": self.message}
|
||||||
|
return {
|
||||||
|
"status": self.status,
|
||||||
|
"source": self.source,
|
||||||
|
"destination": self.destination,
|
||||||
|
"filename": self.filename,
|
||||||
|
"size": self.size,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class MoveMediaResponse:
|
||||||
|
"""Response from moving a media file."""
|
||||||
|
|
||||||
|
status: str
|
||||||
|
source: str | None = None
|
||||||
|
destination: str | None = None
|
||||||
|
filename: str | None = None
|
||||||
|
size: int | None = None
|
||||||
|
error: str | None = None
|
||||||
|
message: str | None = None
|
||||||
|
|
||||||
|
def to_dict(self) -> dict:
|
||||||
|
if self.error:
|
||||||
|
return {"status": self.status, "error": self.error, "message": self.message}
|
||||||
|
return {
|
||||||
|
"status": self.status,
|
||||||
|
"source": self.source,
|
||||||
|
"destination": self.destination,
|
||||||
|
"filename": self.filename,
|
||||||
|
"size": self.size,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class SetFolderPathResponse:
|
||||||
|
"""Response from setting a folder path."""
|
||||||
|
|
||||||
|
status: str
|
||||||
|
folder_name: str | None = None
|
||||||
|
path: str | None = None
|
||||||
|
error: str | None = None
|
||||||
|
message: str | None = None
|
||||||
|
|
||||||
|
def to_dict(self):
|
||||||
|
"""Convert to dict for agent compatibility."""
|
||||||
|
result = {"status": self.status}
|
||||||
|
|
||||||
|
if self.error:
|
||||||
|
result["error"] = self.error
|
||||||
|
result["message"] = self.message
|
||||||
|
else:
|
||||||
|
if self.folder_name:
|
||||||
|
result["folder_name"] = self.folder_name
|
||||||
|
if self.path:
|
||||||
|
result["path"] = self.path
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class PlacedSubtitle:
|
||||||
|
"""One subtitle file successfully placed."""
|
||||||
|
|
||||||
|
source: str
|
||||||
|
destination: str
|
||||||
|
filename: str
|
||||||
|
|
||||||
|
def to_dict(self) -> dict:
|
||||||
|
return {"source": self.source, "destination": self.destination, "filename": self.filename}
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class UnresolvedTrack:
|
||||||
|
"""A subtitle track that needs agent clarification before placement."""
|
||||||
|
|
||||||
|
raw_tokens: list[str]
|
||||||
|
file_path: str | None = None
|
||||||
|
file_size_kb: float | None = None
|
||||||
|
reason: str = "" # "unknown_language" | "low_confidence"
|
||||||
|
|
||||||
|
def to_dict(self) -> dict:
|
||||||
|
return {
|
||||||
|
"raw_tokens": self.raw_tokens,
|
||||||
|
"file_path": self.file_path,
|
||||||
|
"file_size_kb": self.file_size_kb,
|
||||||
|
"reason": self.reason,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class AvailableSubtitle:
|
||||||
|
"""One subtitle track available on an embedded media item."""
|
||||||
|
|
||||||
|
language: str # ISO 639-2 code
|
||||||
|
subtitle_type: str # "standard" | "sdh" | "forced" | "unknown"
|
||||||
|
|
||||||
|
def to_dict(self) -> dict:
|
||||||
|
return {"language": self.language, "type": self.subtitle_type}
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class ManageSubtitlesResponse:
|
||||||
|
"""Response from the manage_subtitles use case."""
|
||||||
|
|
||||||
|
status: str # "ok" | "needs_clarification" | "error"
|
||||||
|
video_path: str | None = None
|
||||||
|
placed: list[PlacedSubtitle] | None = None
|
||||||
|
skipped_count: int = 0
|
||||||
|
unresolved: list[UnresolvedTrack] | None = None
|
||||||
|
available: list[AvailableSubtitle] | None = None # embedded tracks summary
|
||||||
|
error: str | None = None
|
||||||
|
message: str | None = None
|
||||||
|
|
||||||
|
def to_dict(self) -> dict:
|
||||||
|
if self.error:
|
||||||
|
return {"status": self.status, "error": self.error, "message": self.message}
|
||||||
|
result = {
|
||||||
|
"status": self.status,
|
||||||
|
"video_path": self.video_path,
|
||||||
|
"placed": [p.to_dict() for p in (self.placed or [])],
|
||||||
|
"placed_count": len(self.placed or []),
|
||||||
|
"skipped_count": self.skipped_count,
|
||||||
|
}
|
||||||
|
if self.unresolved:
|
||||||
|
result["unresolved"] = [u.to_dict() for u in self.unresolved]
|
||||||
|
result["unresolved_count"] = len(self.unresolved)
|
||||||
|
if self.available:
|
||||||
|
result["available"] = [a.to_dict() for a in self.available]
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class CreateSeedLinksResponse:
|
||||||
|
"""Response from creating seed links for a torrent."""
|
||||||
|
|
||||||
|
status: str
|
||||||
|
torrent_subfolder: str | None = None
|
||||||
|
linked_file: str | None = None
|
||||||
|
copied_files: list[str] | None = None
|
||||||
|
copied_count: int = 0
|
||||||
|
skipped: list[str] | None = None
|
||||||
|
error: str | None = None
|
||||||
|
message: str | None = None
|
||||||
|
|
||||||
|
def to_dict(self) -> dict:
|
||||||
|
if self.error:
|
||||||
|
return {"status": self.status, "error": self.error, "message": self.message}
|
||||||
|
return {
|
||||||
|
"status": self.status,
|
||||||
|
"torrent_subfolder": self.torrent_subfolder,
|
||||||
|
"linked_file": self.linked_file,
|
||||||
|
"copied_files": self.copied_files or [],
|
||||||
|
"copied_count": self.copied_count,
|
||||||
|
"skipped": self.skipped or [],
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class ListFolderResponse:
|
||||||
|
"""Response from listing a folder."""
|
||||||
|
|
||||||
|
status: str
|
||||||
|
folder_type: str | None = None
|
||||||
|
path: str | None = None
|
||||||
|
entries: list[str] | None = None
|
||||||
|
count: int | None = None
|
||||||
|
error: str | None = None
|
||||||
|
message: str | None = None
|
||||||
|
|
||||||
|
def to_dict(self):
|
||||||
|
"""Convert to dict for agent compatibility."""
|
||||||
|
result = {"status": self.status}
|
||||||
|
|
||||||
|
if self.error:
|
||||||
|
result["error"] = self.error
|
||||||
|
result["message"] = self.message
|
||||||
|
else:
|
||||||
|
if self.folder_type:
|
||||||
|
result["folder_type"] = self.folder_type
|
||||||
|
if self.path:
|
||||||
|
result["path"] = self.path
|
||||||
|
if self.entries is not None:
|
||||||
|
result["entries"] = self.entries
|
||||||
|
if self.count is not None:
|
||||||
|
result["count"] = self.count
|
||||||
|
|
||||||
|
return result
|
||||||
+1
-1
@@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
import logging
|
import logging
|
||||||
|
|
||||||
from infrastructure.filesystem import FileManager
|
from alfred.infrastructure.filesystem import FileManager
|
||||||
|
|
||||||
from .dto import ListFolderResponse
|
from .dto import ListFolderResponse
|
||||||
|
|
||||||
@@ -0,0 +1,258 @@
|
|||||||
|
"""ManageSubtitlesUseCase — orchestrates the full subtitle pipeline for a video file."""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from alfred.domain.shared.value_objects import ImdbId
|
||||||
|
from alfred.domain.subtitles.entities import SubtitleTrack
|
||||||
|
from alfred.domain.subtitles.knowledge.base import SubtitleKnowledgeBase
|
||||||
|
from alfred.domain.subtitles.knowledge.loader import KnowledgeLoader
|
||||||
|
from alfred.domain.subtitles.services.identifier import SubtitleIdentifier
|
||||||
|
from alfred.domain.subtitles.services.matcher import SubtitleMatcher
|
||||||
|
from alfred.domain.subtitles.services.pattern_detector import PatternDetector
|
||||||
|
from alfred.domain.subtitles.services.placer import PlacedTrack, SubtitlePlacer
|
||||||
|
from alfred.domain.subtitles.services.utils import available_subtitles
|
||||||
|
from alfred.domain.subtitles.value_objects import ScanStrategy
|
||||||
|
from alfred.infrastructure.persistence.context import get_memory
|
||||||
|
from alfred.infrastructure.subtitle.metadata_store import SubtitleMetadataStore
|
||||||
|
from alfred.infrastructure.subtitle.rule_repository import RuleSetRepository
|
||||||
|
|
||||||
|
from .dto import AvailableSubtitle, ManageSubtitlesResponse, PlacedSubtitle, UnresolvedTrack
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
def _infer_library_root(dest_video: Path, media_type: str) -> Path:
|
||||||
|
"""
|
||||||
|
Infer the media library root folder from the destination video path.
|
||||||
|
|
||||||
|
TV show: video → Season 01 → The X-Files (3 levels up)
|
||||||
|
Movie: video → Inception (2010) (1 level up)
|
||||||
|
"""
|
||||||
|
if media_type == "tv_show":
|
||||||
|
return dest_video.parent.parent
|
||||||
|
return dest_video.parent
|
||||||
|
|
||||||
|
|
||||||
|
def _to_imdb_id(raw: str | None) -> ImdbId | None:
|
||||||
|
if not raw:
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
return ImdbId(raw)
|
||||||
|
except Exception:
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
class ManageSubtitlesUseCase:
|
||||||
|
"""
|
||||||
|
Full subtitle pipeline:
|
||||||
|
|
||||||
|
1. Load knowledge base
|
||||||
|
2. Detect (or confirm) the release pattern
|
||||||
|
3. Identify all tracks (ffprobe + filesystem scan)
|
||||||
|
4. Load + resolve rules for this media
|
||||||
|
5. Match tracks against rules
|
||||||
|
6. If any tracks are unresolved → return needs_clarification (don't place yet)
|
||||||
|
7. Place matched tracks via hard-link
|
||||||
|
8. Persist to .alfred/metadata.yaml
|
||||||
|
|
||||||
|
The use case is stateless — all dependencies are instantiated inline.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def execute(
|
||||||
|
self,
|
||||||
|
source_video: str,
|
||||||
|
destination_video: str,
|
||||||
|
imdb_id: str | None = None,
|
||||||
|
media_type: str = "tv_show",
|
||||||
|
release_group: str | None = None,
|
||||||
|
season: int | None = None,
|
||||||
|
episode: int | None = None,
|
||||||
|
confirmed_pattern_id: str | None = None,
|
||||||
|
) -> ManageSubtitlesResponse:
|
||||||
|
source_path = Path(source_video)
|
||||||
|
dest_path = Path(destination_video)
|
||||||
|
|
||||||
|
if not source_path.exists():
|
||||||
|
return ManageSubtitlesResponse(
|
||||||
|
status="error",
|
||||||
|
error="source_not_found",
|
||||||
|
message=f"Source video not found: {source_video}",
|
||||||
|
)
|
||||||
|
|
||||||
|
kb = SubtitleKnowledgeBase(KnowledgeLoader())
|
||||||
|
library_root = _infer_library_root(dest_path, media_type)
|
||||||
|
store = SubtitleMetadataStore(library_root)
|
||||||
|
repo = RuleSetRepository(library_root)
|
||||||
|
|
||||||
|
# --- Pattern resolution ---
|
||||||
|
pattern = self._resolve_pattern(
|
||||||
|
kb, store, source_path, confirmed_pattern_id, release_group
|
||||||
|
)
|
||||||
|
if pattern is None:
|
||||||
|
return ManageSubtitlesResponse(
|
||||||
|
status="error",
|
||||||
|
error="pattern_not_found",
|
||||||
|
message="Could not determine subtitle pattern for this release.",
|
||||||
|
)
|
||||||
|
|
||||||
|
# --- Identify ---
|
||||||
|
media_id = _to_imdb_id(imdb_id)
|
||||||
|
identifier = SubtitleIdentifier(kb)
|
||||||
|
metadata = identifier.identify(
|
||||||
|
video_path=source_path,
|
||||||
|
pattern=pattern,
|
||||||
|
media_id=media_id,
|
||||||
|
media_type=media_type,
|
||||||
|
release_group=release_group,
|
||||||
|
)
|
||||||
|
|
||||||
|
if metadata.total_count == 0:
|
||||||
|
logger.info(f"ManageSubtitles: no subtitle tracks found for {source_path.name}")
|
||||||
|
return ManageSubtitlesResponse(
|
||||||
|
status="ok",
|
||||||
|
video_path=destination_video,
|
||||||
|
placed=[],
|
||||||
|
skipped_count=0,
|
||||||
|
)
|
||||||
|
|
||||||
|
# --- Embedded short-circuit ---
|
||||||
|
if pattern.scan_strategy == ScanStrategy.EMBEDDED:
|
||||||
|
logger.info("ManageSubtitles: embedded pattern — skipping matcher")
|
||||||
|
available = [
|
||||||
|
AvailableSubtitle(
|
||||||
|
language=t.language.code if t.language else "?",
|
||||||
|
subtitle_type=t.subtitle_type.value,
|
||||||
|
)
|
||||||
|
for t in available_subtitles(metadata.embedded_tracks)
|
||||||
|
]
|
||||||
|
return ManageSubtitlesResponse(
|
||||||
|
status="ok",
|
||||||
|
video_path=destination_video,
|
||||||
|
placed=[],
|
||||||
|
skipped_count=0,
|
||||||
|
available=available,
|
||||||
|
)
|
||||||
|
|
||||||
|
# --- Match (external only) ---
|
||||||
|
subtitle_prefs = None
|
||||||
|
try:
|
||||||
|
memory = get_memory()
|
||||||
|
subtitle_prefs = memory.ltm.subtitle_preferences
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
rules = repo.load(release_group, subtitle_prefs).resolve()
|
||||||
|
matcher = SubtitleMatcher()
|
||||||
|
matched, unresolved = matcher.match(metadata.external_tracks, rules)
|
||||||
|
|
||||||
|
if unresolved:
|
||||||
|
logger.info(
|
||||||
|
f"ManageSubtitles: {len(unresolved)} unresolved track(s) — needs clarification"
|
||||||
|
)
|
||||||
|
return ManageSubtitlesResponse(
|
||||||
|
status="needs_clarification",
|
||||||
|
video_path=destination_video,
|
||||||
|
placed=[],
|
||||||
|
unresolved=[_to_unresolved_dto(t) for t in unresolved],
|
||||||
|
)
|
||||||
|
|
||||||
|
if not matched:
|
||||||
|
return ManageSubtitlesResponse(
|
||||||
|
status="ok",
|
||||||
|
video_path=destination_video,
|
||||||
|
placed=[],
|
||||||
|
skipped_count=metadata.total_count,
|
||||||
|
)
|
||||||
|
|
||||||
|
# --- Place ---
|
||||||
|
placer = SubtitlePlacer()
|
||||||
|
place_result = placer.place(matched, dest_path)
|
||||||
|
|
||||||
|
# --- Persist ---
|
||||||
|
if place_result.placed:
|
||||||
|
pairs = _pair_placed_with_tracks(place_result.placed, matched)
|
||||||
|
store.append_history(pairs, season, episode, release_group)
|
||||||
|
|
||||||
|
placed_dtos = [
|
||||||
|
PlacedSubtitle(
|
||||||
|
source=str(p.source),
|
||||||
|
destination=str(p.destination),
|
||||||
|
filename=p.filename,
|
||||||
|
)
|
||||||
|
for p in place_result.placed
|
||||||
|
]
|
||||||
|
|
||||||
|
return ManageSubtitlesResponse(
|
||||||
|
status="ok",
|
||||||
|
video_path=destination_video,
|
||||||
|
placed=placed_dtos,
|
||||||
|
skipped_count=place_result.skipped_count,
|
||||||
|
)
|
||||||
|
|
||||||
|
def _resolve_pattern(
|
||||||
|
self,
|
||||||
|
kb: SubtitleKnowledgeBase,
|
||||||
|
store: SubtitleMetadataStore,
|
||||||
|
source_path: Path,
|
||||||
|
confirmed_pattern_id: str | None,
|
||||||
|
release_group: str | None,
|
||||||
|
):
|
||||||
|
# 1. Explicit override from caller
|
||||||
|
if confirmed_pattern_id:
|
||||||
|
p = kb.pattern(confirmed_pattern_id)
|
||||||
|
if p:
|
||||||
|
return p
|
||||||
|
logger.warning(f"ManageSubtitles: unknown pattern '{confirmed_pattern_id}'")
|
||||||
|
|
||||||
|
# 2. Previously confirmed in metadata store
|
||||||
|
stored_id = store.confirmed_pattern()
|
||||||
|
if stored_id:
|
||||||
|
p = kb.pattern(stored_id)
|
||||||
|
if p:
|
||||||
|
logger.debug(f"ManageSubtitles: using confirmed pattern '{stored_id}'")
|
||||||
|
return p
|
||||||
|
|
||||||
|
# 3. Auto-detect
|
||||||
|
release_root = source_path.parent
|
||||||
|
detector = PatternDetector(kb)
|
||||||
|
result = detector.detect(release_root, source_path)
|
||||||
|
|
||||||
|
if result["detected"] and result["confidence"] >= 0.6:
|
||||||
|
logger.info(
|
||||||
|
f"ManageSubtitles: auto-detected pattern '{result['detected'].id}' "
|
||||||
|
f"(confidence={result['confidence']:.2f})"
|
||||||
|
)
|
||||||
|
return result["detected"]
|
||||||
|
|
||||||
|
# 4. Fallback — adjacent (safest default)
|
||||||
|
logger.info("ManageSubtitles: falling back to 'adjacent' pattern")
|
||||||
|
return kb.pattern("adjacent")
|
||||||
|
|
||||||
|
|
||||||
|
def _to_unresolved_dto(track: SubtitleTrack, min_confidence: float = 0.7) -> UnresolvedTrack:
|
||||||
|
reason = "unknown_language" if track.language is None else "low_confidence"
|
||||||
|
return UnresolvedTrack(
|
||||||
|
raw_tokens=track.raw_tokens,
|
||||||
|
file_path=str(track.file_path) if track.file_path else None,
|
||||||
|
file_size_kb=track.file_size_kb,
|
||||||
|
reason=reason,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _pair_placed_with_tracks(
|
||||||
|
placed: list[PlacedTrack],
|
||||||
|
tracks: list[SubtitleTrack],
|
||||||
|
) -> list[tuple[PlacedTrack, SubtitleTrack]]:
|
||||||
|
"""
|
||||||
|
Pair each PlacedTrack with its originating SubtitleTrack by source path.
|
||||||
|
Falls back to positional matching if paths don't align.
|
||||||
|
"""
|
||||||
|
track_by_path = {t.file_path: t for t in tracks if t.file_path}
|
||||||
|
pairs = []
|
||||||
|
for p in placed:
|
||||||
|
track = track_by_path.get(p.source)
|
||||||
|
if track is None and tracks:
|
||||||
|
track = tracks[0] # positional fallback
|
||||||
|
if track:
|
||||||
|
pairs.append((p, track))
|
||||||
|
return pairs
|
||||||
@@ -0,0 +1,43 @@
|
|||||||
|
"""Move media use case."""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
|
||||||
|
from alfred.infrastructure.filesystem import FileManager
|
||||||
|
|
||||||
|
from .dto import MoveMediaResponse
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class MoveMediaUseCase:
|
||||||
|
"""Use case for moving a media file to a destination (copy + delete source)."""
|
||||||
|
|
||||||
|
def __init__(self, file_manager: FileManager):
|
||||||
|
self.file_manager = file_manager
|
||||||
|
|
||||||
|
def execute(self, source: str, destination: str) -> MoveMediaResponse:
|
||||||
|
"""
|
||||||
|
Move a media file from source to destination.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
source: Absolute path to the source file.
|
||||||
|
destination: Absolute path to the destination file.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
MoveMediaResponse with success or error information.
|
||||||
|
"""
|
||||||
|
result = self.file_manager.move_file(source, destination)
|
||||||
|
|
||||||
|
if result.get("status") == "ok":
|
||||||
|
return MoveMediaResponse(
|
||||||
|
status="ok",
|
||||||
|
source=result.get("source"),
|
||||||
|
destination=result.get("destination"),
|
||||||
|
filename=result.get("filename"),
|
||||||
|
size=result.get("size"),
|
||||||
|
)
|
||||||
|
return MoveMediaResponse(
|
||||||
|
status="error",
|
||||||
|
error=result.get("error"),
|
||||||
|
message=result.get("message"),
|
||||||
|
)
|
||||||
@@ -0,0 +1,246 @@
|
|||||||
|
"""
|
||||||
|
ResolveDestinationUseCase — compute the library destination path for a release.
|
||||||
|
|
||||||
|
Steps:
|
||||||
|
1. Parse the release name
|
||||||
|
2. Look up TMDB for title + year (+ episode title if single episode)
|
||||||
|
3. Scan the library for an existing series folder
|
||||||
|
4. Apply group-conflict rules
|
||||||
|
5. Return the computed paths (or needs_clarification if ambiguous)
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import re
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from alfred.domain.media.release_parser import ParsedRelease, parse_release
|
||||||
|
from alfred.infrastructure.persistence import get_memory
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# Characters forbidden on Windows filesystems (served via NFS)
|
||||||
|
_WIN_FORBIDDEN = re.compile(r'[?:*"<>|\\]')
|
||||||
|
|
||||||
|
|
||||||
|
def _sanitise(text: str) -> str:
|
||||||
|
return _WIN_FORBIDDEN.sub("", text)
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# DTOs
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class ResolvedDestination:
|
||||||
|
"""All computed paths for a release, ready to hand to move_media."""
|
||||||
|
|
||||||
|
status: str # "ok" | "needs_clarification" | "error"
|
||||||
|
|
||||||
|
# Populated on "ok"
|
||||||
|
library_file: str | None = None # absolute path of the destination video file
|
||||||
|
series_folder: str | None = None # absolute path of the series root folder
|
||||||
|
season_folder: str | None = None # absolute path of the season subfolder
|
||||||
|
series_folder_name: str | None = None # just the folder name (for display)
|
||||||
|
season_folder_name: str | None = None
|
||||||
|
filename: str | None = None
|
||||||
|
is_new_series_folder: bool = False # True if we're creating the folder
|
||||||
|
|
||||||
|
# Populated on "needs_clarification"
|
||||||
|
question: str | None = None
|
||||||
|
options: list[str] | None = None # existing group folder names to pick from
|
||||||
|
|
||||||
|
# Populated on "error"
|
||||||
|
error: str | None = None
|
||||||
|
message: str | None = None
|
||||||
|
|
||||||
|
def to_dict(self) -> dict:
|
||||||
|
if self.status == "error":
|
||||||
|
return {"status": self.status, "error": self.error, "message": self.message}
|
||||||
|
if self.status == "needs_clarification":
|
||||||
|
return {
|
||||||
|
"status": self.status,
|
||||||
|
"question": self.question,
|
||||||
|
"options": self.options or [],
|
||||||
|
}
|
||||||
|
return {
|
||||||
|
"status": self.status,
|
||||||
|
"library_file": self.library_file,
|
||||||
|
"series_folder": self.series_folder,
|
||||||
|
"season_folder": self.season_folder,
|
||||||
|
"series_folder_name": self.series_folder_name,
|
||||||
|
"season_folder_name": self.season_folder_name,
|
||||||
|
"filename": self.filename,
|
||||||
|
"is_new_series_folder": self.is_new_series_folder,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Use case
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
class ResolveDestinationUseCase:
|
||||||
|
"""
|
||||||
|
Compute the full destination path for a media file being organised.
|
||||||
|
|
||||||
|
The caller provides:
|
||||||
|
- release_name: the raw release folder/file name
|
||||||
|
- source_file: path to the actual video file (to get extension)
|
||||||
|
- tmdb_title: canonical title from TMDB
|
||||||
|
- tmdb_year: release year from TMDB
|
||||||
|
- tmdb_episode_title: episode title from TMDB (None for movies / season packs)
|
||||||
|
- confirmed_folder: if the user already answered needs_clarification, pass
|
||||||
|
the chosen folder name here to skip the check
|
||||||
|
|
||||||
|
Returns a ResolvedDestination.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def execute(
|
||||||
|
self,
|
||||||
|
release_name: str,
|
||||||
|
source_file: str,
|
||||||
|
tmdb_title: str,
|
||||||
|
tmdb_year: int,
|
||||||
|
tmdb_episode_title: str | None = None,
|
||||||
|
confirmed_folder: str | None = None,
|
||||||
|
) -> ResolvedDestination:
|
||||||
|
parsed = parse_release(release_name)
|
||||||
|
ext = Path(source_file).suffix # ".mkv"
|
||||||
|
|
||||||
|
if parsed.is_movie:
|
||||||
|
return self._resolve_movie(parsed, tmdb_title, tmdb_year, ext)
|
||||||
|
return self._resolve_tvshow(
|
||||||
|
parsed, tmdb_title, tmdb_year, tmdb_episode_title, ext, confirmed_folder
|
||||||
|
)
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Movie
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
|
||||||
|
def _resolve_movie(
|
||||||
|
self, parsed: ParsedRelease, tmdb_title: str, tmdb_year: int, ext: str
|
||||||
|
) -> ResolvedDestination:
|
||||||
|
memory = get_memory()
|
||||||
|
movies_root = memory.ltm.library_paths.get("movie")
|
||||||
|
if not movies_root:
|
||||||
|
return ResolvedDestination(
|
||||||
|
status="error",
|
||||||
|
error="library_not_set",
|
||||||
|
message="Movie library path is not configured.",
|
||||||
|
)
|
||||||
|
|
||||||
|
folder_name = _sanitise(parsed.movie_folder_name(tmdb_title, tmdb_year))
|
||||||
|
filename = _sanitise(parsed.movie_filename(tmdb_title, tmdb_year, ext))
|
||||||
|
|
||||||
|
folder_path = Path(movies_root) / folder_name
|
||||||
|
file_path = folder_path / filename
|
||||||
|
|
||||||
|
return ResolvedDestination(
|
||||||
|
status="ok",
|
||||||
|
library_file=str(file_path),
|
||||||
|
series_folder=str(folder_path),
|
||||||
|
series_folder_name=folder_name,
|
||||||
|
filename=filename,
|
||||||
|
is_new_series_folder=not folder_path.exists(),
|
||||||
|
)
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# TV show
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
|
||||||
|
def _resolve_tvshow(
|
||||||
|
self,
|
||||||
|
parsed: ParsedRelease,
|
||||||
|
tmdb_title: str,
|
||||||
|
tmdb_year: int,
|
||||||
|
tmdb_episode_title: str | None,
|
||||||
|
ext: str,
|
||||||
|
confirmed_folder: str | None,
|
||||||
|
) -> ResolvedDestination:
|
||||||
|
memory = get_memory()
|
||||||
|
tv_root = memory.ltm.library_paths.get("tv_show")
|
||||||
|
if not tv_root:
|
||||||
|
return ResolvedDestination(
|
||||||
|
status="error",
|
||||||
|
error="library_not_set",
|
||||||
|
message="TV show library path is not configured.",
|
||||||
|
)
|
||||||
|
|
||||||
|
tv_root_path = Path(tv_root)
|
||||||
|
|
||||||
|
# --- Find existing series folders for this title ---
|
||||||
|
existing = _find_existing_series_folders(tv_root_path, tmdb_title, tmdb_year)
|
||||||
|
|
||||||
|
# --- Determine series folder name ---
|
||||||
|
if confirmed_folder:
|
||||||
|
series_folder_name = confirmed_folder
|
||||||
|
is_new = not (tv_root_path / confirmed_folder).exists()
|
||||||
|
elif len(existing) == 0:
|
||||||
|
# No existing folder — create with release group
|
||||||
|
series_folder_name = _sanitise(parsed.show_folder_name(tmdb_title, tmdb_year))
|
||||||
|
is_new = True
|
||||||
|
elif len(existing) == 1:
|
||||||
|
# Exactly one match — use it regardless of group
|
||||||
|
series_folder_name = existing[0]
|
||||||
|
is_new = False
|
||||||
|
else:
|
||||||
|
# Multiple folders — ask user
|
||||||
|
return ResolvedDestination(
|
||||||
|
status="needs_clarification",
|
||||||
|
question=(
|
||||||
|
f"Multiple folders found for '{tmdb_title}' in your library. "
|
||||||
|
f"Which one should I use for this release ({parsed.group})?"
|
||||||
|
),
|
||||||
|
options=existing,
|
||||||
|
)
|
||||||
|
|
||||||
|
# --- Build paths ---
|
||||||
|
season_folder_name = parsed.season_folder_name()
|
||||||
|
filename = _sanitise(
|
||||||
|
parsed.episode_filename(tmdb_episode_title, ext)
|
||||||
|
if not parsed.is_season_pack
|
||||||
|
else parsed.season_folder_name() + ext
|
||||||
|
)
|
||||||
|
|
||||||
|
series_path = tv_root_path / series_folder_name
|
||||||
|
season_path = series_path / season_folder_name
|
||||||
|
file_path = season_path / filename
|
||||||
|
|
||||||
|
return ResolvedDestination(
|
||||||
|
status="ok",
|
||||||
|
library_file=str(file_path),
|
||||||
|
series_folder=str(series_path),
|
||||||
|
season_folder=str(season_path),
|
||||||
|
series_folder_name=series_folder_name,
|
||||||
|
season_folder_name=season_folder_name,
|
||||||
|
filename=filename,
|
||||||
|
is_new_series_folder=is_new,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Helpers
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def _find_existing_series_folders(tv_root: Path, tmdb_title: str, tmdb_year: int) -> list[str]:
|
||||||
|
"""
|
||||||
|
Return names of folders in tv_root that match the given title + year.
|
||||||
|
|
||||||
|
Matching is loose: normalised title (dots, no special chars) + year must
|
||||||
|
appear at the start of the folder name.
|
||||||
|
"""
|
||||||
|
if not tv_root.exists():
|
||||||
|
return []
|
||||||
|
|
||||||
|
# Build a normalised prefix to match against: "Oz.1997"
|
||||||
|
clean_title = _sanitise(tmdb_title).replace(" ", ".")
|
||||||
|
prefix = f"{clean_title}.{tmdb_year}".lower()
|
||||||
|
|
||||||
|
matches = []
|
||||||
|
for entry in tv_root.iterdir():
|
||||||
|
if entry.is_dir() and entry.name.lower().startswith(prefix):
|
||||||
|
matches.append(entry.name)
|
||||||
|
|
||||||
|
return sorted(matches)
|
||||||
+1
-1
@@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
import logging
|
import logging
|
||||||
|
|
||||||
from infrastructure.filesystem import FileManager
|
from alfred.infrastructure.filesystem import FileManager
|
||||||
|
|
||||||
from .dto import SetFolderPathResponse
|
from .dto import SetFolderPathResponse
|
||||||
|
|
||||||
+1
-1
@@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
import logging
|
import logging
|
||||||
|
|
||||||
from infrastructure.api.tmdb import (
|
from alfred.infrastructure.api.tmdb import (
|
||||||
TMDBAPIError,
|
TMDBAPIError,
|
||||||
TMDBClient,
|
TMDBClient,
|
||||||
TMDBConfigurationError,
|
TMDBConfigurationError,
|
||||||
+1
-1
@@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
import logging
|
import logging
|
||||||
|
|
||||||
from infrastructure.api.qbittorrent import (
|
from alfred.infrastructure.api.qbittorrent import (
|
||||||
QBittorrentAPIError,
|
QBittorrentAPIError,
|
||||||
QBittorrentAuthError,
|
QBittorrentAuthError,
|
||||||
QBittorrentClient,
|
QBittorrentClient,
|
||||||
+5
-1
@@ -2,7 +2,11 @@
|
|||||||
|
|
||||||
import logging
|
import logging
|
||||||
|
|
||||||
from infrastructure.api.knaben import KnabenAPIError, KnabenClient, KnabenNotFoundError
|
from alfred.infrastructure.api.knaben import (
|
||||||
|
KnabenAPIError,
|
||||||
|
KnabenClient,
|
||||||
|
KnabenNotFoundError,
|
||||||
|
)
|
||||||
|
|
||||||
from .dto import SearchTorrentsResponse
|
from .dto import SearchTorrentsResponse
|
||||||
|
|
||||||
@@ -0,0 +1,5 @@
|
|||||||
|
"""Media domain — shared naming and release parsing."""
|
||||||
|
|
||||||
|
from .release_parser import ParsedRelease, parse_release
|
||||||
|
|
||||||
|
__all__ = ["ParsedRelease", "parse_release"]
|
||||||
@@ -0,0 +1,306 @@
|
|||||||
|
"""
|
||||||
|
release_parser.py — Parse a release name into structured components.
|
||||||
|
|
||||||
|
Handles both dot-separated and space-separated release names:
|
||||||
|
Oz.S03.1080p.WEBRip.x265-KONTRAST
|
||||||
|
Oz S03 1080p WEBRip x265-KONTRAST
|
||||||
|
Inception.2010.1080p.BluRay.x265-GROUP
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import re
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
|
||||||
|
# Known quality tokens
|
||||||
|
_QUALITIES = {"2160p", "1080p", "720p", "480p", "576p", "4k", "8k"}
|
||||||
|
|
||||||
|
# Known source tokens (case-insensitive match)
|
||||||
|
_SOURCES = {
|
||||||
|
"bluray", "blu-ray", "bdrip", "brrip",
|
||||||
|
"webrip", "web-rip", "webdl", "web-dl", "web",
|
||||||
|
"hdtv", "hdrip", "dvdrip", "dvd", "vodrip",
|
||||||
|
"amzn", "nf", "dsnp", "hmax", "atvp",
|
||||||
|
}
|
||||||
|
|
||||||
|
# Known codec tokens
|
||||||
|
_CODECS = {
|
||||||
|
"x264", "x265", "h264", "h265", "hevc", "avc",
|
||||||
|
"xvid", "divx", "av1", "vp9",
|
||||||
|
"h.264", "h.265",
|
||||||
|
}
|
||||||
|
|
||||||
|
# Windows-forbidden characters (we strip these from display names)
|
||||||
|
_WIN_FORBIDDEN = re.compile(r'[?:*"<>|\\]')
|
||||||
|
|
||||||
|
# Episode/season pattern: S01, S01E02, S01E02E03, 1x02, etc.
|
||||||
|
_SEASON_EP_RE = re.compile(
|
||||||
|
r"S(\d{1,2})(?:E(\d{2})(?:E(\d{2}))?)?",
|
||||||
|
re.IGNORECASE,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Year pattern
|
||||||
|
_YEAR_RE = re.compile(r"\b(19\d{2}|20\d{2})\b")
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class ParsedRelease:
|
||||||
|
"""Structured representation of a parsed release name."""
|
||||||
|
|
||||||
|
raw: str # original release name (untouched)
|
||||||
|
normalised: str # dots instead of spaces
|
||||||
|
title: str # show/movie title (dots, no year/season/tech)
|
||||||
|
year: int | None # movie year or show start year (from TMDB)
|
||||||
|
season: int | None # season number (None for movies)
|
||||||
|
episode: int | None # first episode number (None if season-pack)
|
||||||
|
episode_end: int | None # last episode for multi-ep (None otherwise)
|
||||||
|
quality: str | None # 1080p, 2160p, …
|
||||||
|
source: str | None # WEBRip, BluRay, …
|
||||||
|
codec: str | None # x265, HEVC, …
|
||||||
|
group: str # release group, "UNKNOWN" if missing
|
||||||
|
tech_string: str # quality.source.codec joined with dots
|
||||||
|
|
||||||
|
# -------------------------------------------------------------------------
|
||||||
|
# Derived helpers
|
||||||
|
# -------------------------------------------------------------------------
|
||||||
|
|
||||||
|
@property
|
||||||
|
def is_movie(self) -> bool:
|
||||||
|
return self.season is None
|
||||||
|
|
||||||
|
@property
|
||||||
|
def is_season_pack(self) -> bool:
|
||||||
|
return self.season is not None and self.episode is None
|
||||||
|
|
||||||
|
def show_folder_name(self, tmdb_title: str, tmdb_year: int) -> str:
|
||||||
|
"""
|
||||||
|
Build the series root folder name.
|
||||||
|
|
||||||
|
Format: {Title}.{Year}.{Tech}-{Group}
|
||||||
|
Example: Oz.1997.1080p.WEBRip.x265-KONTRAST
|
||||||
|
"""
|
||||||
|
title_part = _sanitise_for_fs(tmdb_title).replace(" ", ".")
|
||||||
|
tech = self.tech_string or "Unknown"
|
||||||
|
return f"{title_part}.{tmdb_year}.{tech}-{self.group}"
|
||||||
|
|
||||||
|
def season_folder_name(self) -> str:
|
||||||
|
"""
|
||||||
|
Build the season subfolder name = normalised release name (no episode).
|
||||||
|
|
||||||
|
Example: Oz.S03.1080p.WEBRip.x265-KONTRAST
|
||||||
|
For a single-episode release we still strip the episode token so the
|
||||||
|
folder can hold the whole season.
|
||||||
|
"""
|
||||||
|
return _strip_episode_from_normalised(self.normalised)
|
||||||
|
|
||||||
|
def episode_filename(self, tmdb_episode_title: str | None, ext: str) -> str:
|
||||||
|
"""
|
||||||
|
Build the episode filename.
|
||||||
|
|
||||||
|
Format: {Title}.{SxxExx}.{EpisodeTitle}.{Tech}-{Group}.{ext}
|
||||||
|
Example: Oz.S01E01.The.Routine.1080p.WEBRip.x265-KONTRAST.mkv
|
||||||
|
|
||||||
|
If tmdb_episode_title is None, omits the episode title segment.
|
||||||
|
"""
|
||||||
|
title_part = _sanitise_for_fs(self.title) # already dotted from normalised
|
||||||
|
s = f"S{self.season:02d}" if self.season is not None else ""
|
||||||
|
e = f"E{self.episode:02d}" if self.episode is not None else ""
|
||||||
|
se = s + e
|
||||||
|
|
||||||
|
ep_title = ""
|
||||||
|
if tmdb_episode_title:
|
||||||
|
ep_title = "." + _sanitise_for_fs(tmdb_episode_title).replace(" ", ".")
|
||||||
|
|
||||||
|
tech = self.tech_string or "Unknown"
|
||||||
|
ext_clean = ext.lstrip(".")
|
||||||
|
return f"{title_part}.{se}{ep_title}.{tech}-{self.group}.{ext_clean}"
|
||||||
|
|
||||||
|
def movie_folder_name(self, tmdb_title: str, tmdb_year: int) -> str:
|
||||||
|
"""
|
||||||
|
Build the movie folder name.
|
||||||
|
|
||||||
|
Format: {Title}.{Year}.{Tech}-{Group}
|
||||||
|
Example: Inception.2010.1080p.BluRay.x265-GROUP
|
||||||
|
"""
|
||||||
|
return self.show_folder_name(tmdb_title, tmdb_year)
|
||||||
|
|
||||||
|
def movie_filename(self, tmdb_title: str, tmdb_year: int, ext: str) -> str:
|
||||||
|
"""
|
||||||
|
Build the movie filename (same as folder name + extension).
|
||||||
|
|
||||||
|
Example: Inception.2010.1080p.BluRay.x265-GROUP.mkv
|
||||||
|
"""
|
||||||
|
ext_clean = ext.lstrip(".")
|
||||||
|
return f"{self.movie_folder_name(tmdb_title, tmdb_year)}.{ext_clean}"
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Public API
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def parse_release(name: str) -> ParsedRelease:
|
||||||
|
"""
|
||||||
|
Parse a release name and return a ParsedRelease.
|
||||||
|
|
||||||
|
Accepts both dot-separated and space-separated names.
|
||||||
|
"""
|
||||||
|
normalised = _normalise(name)
|
||||||
|
tokens = normalised.split(".")
|
||||||
|
|
||||||
|
season, episode, episode_end = _extract_season_episode(tokens)
|
||||||
|
quality, source, codec, group, tech_tokens = _extract_tech(tokens)
|
||||||
|
title = _extract_title(tokens, season, episode, tech_tokens)
|
||||||
|
year = _extract_year(tokens, title)
|
||||||
|
|
||||||
|
tech_parts = [p for p in [quality, source, codec] if p]
|
||||||
|
tech_string = ".".join(tech_parts)
|
||||||
|
|
||||||
|
return ParsedRelease(
|
||||||
|
raw=name,
|
||||||
|
normalised=normalised,
|
||||||
|
title=title,
|
||||||
|
year=year,
|
||||||
|
season=season,
|
||||||
|
episode=episode,
|
||||||
|
episode_end=episode_end,
|
||||||
|
quality=quality,
|
||||||
|
source=source,
|
||||||
|
codec=codec,
|
||||||
|
group=group,
|
||||||
|
tech_string=tech_string,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Internal helpers
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def _normalise(name: str) -> str:
|
||||||
|
"""Replace spaces with dots, collapse multiple dots."""
|
||||||
|
s = name.replace(" ", ".")
|
||||||
|
s = re.sub(r"\.{2,}", ".", s)
|
||||||
|
return s.strip(".")
|
||||||
|
|
||||||
|
|
||||||
|
def _sanitise_for_fs(text: str) -> str:
|
||||||
|
"""Remove Windows-forbidden characters from a string."""
|
||||||
|
return _WIN_FORBIDDEN.sub("", text)
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_season_episode(tokens: list[str]) -> tuple[int | None, int | None, int | None]:
|
||||||
|
joined = ".".join(tokens)
|
||||||
|
m = _SEASON_EP_RE.search(joined)
|
||||||
|
if not m:
|
||||||
|
return None, None, None
|
||||||
|
season = int(m.group(1))
|
||||||
|
episode = int(m.group(2)) if m.group(2) else None
|
||||||
|
episode_end = int(m.group(3)) if m.group(3) else None
|
||||||
|
return season, episode, episode_end
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_tech(
|
||||||
|
tokens: list[str],
|
||||||
|
) -> tuple[str | None, str | None, str | None, str, set[str]]:
|
||||||
|
"""
|
||||||
|
Extract quality, source, codec, group from tokens.
|
||||||
|
|
||||||
|
Returns (quality, source, codec, group, tech_token_set).
|
||||||
|
|
||||||
|
Group extraction strategy (in priority order):
|
||||||
|
1. Token where prefix is a known codec: x265-GROUP
|
||||||
|
2. Last token in the list that contains a dash (fallback for 10bit-GROUP, AAC5.1-GROUP, etc.)
|
||||||
|
"""
|
||||||
|
quality: str | None = None
|
||||||
|
source: str | None = None
|
||||||
|
codec: str | None = None
|
||||||
|
group = "UNKNOWN"
|
||||||
|
tech_tokens: set[str] = set()
|
||||||
|
|
||||||
|
for tok in tokens:
|
||||||
|
tl = tok.lower()
|
||||||
|
|
||||||
|
if tl in _QUALITIES:
|
||||||
|
quality = tok
|
||||||
|
tech_tokens.add(tok)
|
||||||
|
continue
|
||||||
|
|
||||||
|
if tl in _SOURCES:
|
||||||
|
source = tok
|
||||||
|
tech_tokens.add(tok)
|
||||||
|
continue
|
||||||
|
|
||||||
|
if "-" in tok:
|
||||||
|
parts = tok.rsplit("-", 1)
|
||||||
|
# codec-GROUP (highest priority for group)
|
||||||
|
if parts[0].lower() in _CODECS:
|
||||||
|
codec = parts[0]
|
||||||
|
group = parts[1] if parts[1] else "UNKNOWN"
|
||||||
|
tech_tokens.add(tok)
|
||||||
|
continue
|
||||||
|
# source with dash: Web-DL, WEB-DL, etc.
|
||||||
|
if parts[0].lower() in _SOURCES or tok.lower().replace("-", "") in _SOURCES:
|
||||||
|
source = tok
|
||||||
|
tech_tokens.add(tok)
|
||||||
|
continue
|
||||||
|
|
||||||
|
if tl in _CODECS:
|
||||||
|
codec = tok
|
||||||
|
tech_tokens.add(tok)
|
||||||
|
|
||||||
|
# Fallback: if group still UNKNOWN, use the rightmost token with a dash
|
||||||
|
# that isn't a known source (handles "10bit-Protozoan", "AAC5.1-YTS", etc.)
|
||||||
|
if group == "UNKNOWN":
|
||||||
|
for tok in reversed(tokens):
|
||||||
|
if "-" in tok:
|
||||||
|
parts = tok.rsplit("-", 1)
|
||||||
|
tl = tok.lower()
|
||||||
|
if tl in _SOURCES or tok.lower().replace("-", "") in _SOURCES:
|
||||||
|
continue
|
||||||
|
if parts[1]: # non-empty group part
|
||||||
|
group = parts[1]
|
||||||
|
break
|
||||||
|
|
||||||
|
return quality, source, codec, group, tech_tokens
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_title(tokens: list[str], season: int | None, episode: int | None, tech_tokens: set[str]) -> str:
|
||||||
|
"""
|
||||||
|
Extract the title portion: everything before the first season/year/tech token.
|
||||||
|
"""
|
||||||
|
title_parts = []
|
||||||
|
for tok in tokens:
|
||||||
|
# Stop at season token
|
||||||
|
if _SEASON_EP_RE.match(tok):
|
||||||
|
break
|
||||||
|
# Stop at year
|
||||||
|
if _YEAR_RE.fullmatch(tok):
|
||||||
|
break
|
||||||
|
# Stop at tech tokens
|
||||||
|
if tok in tech_tokens or tok.lower() in _QUALITIES | _SOURCES | _CODECS:
|
||||||
|
break
|
||||||
|
# Stop if token contains a dash (likely codec-GROUP)
|
||||||
|
if "-" in tok and any(p.lower() in _CODECS | _SOURCES for p in tok.split("-")):
|
||||||
|
break
|
||||||
|
title_parts.append(tok)
|
||||||
|
|
||||||
|
return ".".join(title_parts) if title_parts else tokens[0]
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_year(tokens: list[str], title: str) -> int | None:
|
||||||
|
"""Extract a 4-digit year from tokens (only after the title)."""
|
||||||
|
title_len = len(title.split("."))
|
||||||
|
for tok in tokens[title_len:]:
|
||||||
|
m = _YEAR_RE.fullmatch(tok)
|
||||||
|
if m:
|
||||||
|
return int(m.group(1))
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _strip_episode_from_normalised(normalised: str) -> str:
|
||||||
|
"""
|
||||||
|
Remove all episode parts (Exx) from a normalised release name, keeping Sxx.
|
||||||
|
|
||||||
|
Oz.S03E01.1080p... → Oz.S03.1080p...
|
||||||
|
Archer.S14E09E10E11.1080p... → Archer.S14.1080p...
|
||||||
|
"""
|
||||||
|
return re.sub(r"(S\d{2})(E\d{2})+", r"\1", normalised, flags=re.IGNORECASE)
|
||||||
@@ -0,0 +1,37 @@
|
|||||||
|
"""Subtitles domain — subtitle identification, classification and placement."""
|
||||||
|
|
||||||
|
from .aggregates import SubtitleRuleSet
|
||||||
|
from .entities import MediaSubtitleMetadata, SubtitleTrack
|
||||||
|
from .exceptions import SubtitleNotFound
|
||||||
|
from .knowledge import KnowledgeLoader, SubtitleKnowledgeBase
|
||||||
|
from .services import PatternDetector, SubtitleIdentifier, SubtitleMatcher
|
||||||
|
from .value_objects import (
|
||||||
|
RuleScope,
|
||||||
|
ScanStrategy,
|
||||||
|
SubtitleFormat,
|
||||||
|
SubtitleLanguage,
|
||||||
|
SubtitleMatchingRules,
|
||||||
|
SubtitlePattern,
|
||||||
|
SubtitleType,
|
||||||
|
TypeDetectionMethod,
|
||||||
|
)
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
"SubtitleTrack",
|
||||||
|
"MediaSubtitleMetadata",
|
||||||
|
"SubtitleRuleSet",
|
||||||
|
"SubtitleKnowledgeBase",
|
||||||
|
"KnowledgeLoader",
|
||||||
|
"SubtitleIdentifier",
|
||||||
|
"SubtitleMatcher",
|
||||||
|
"PatternDetector",
|
||||||
|
"SubtitleFormat",
|
||||||
|
"SubtitleLanguage",
|
||||||
|
"SubtitlePattern",
|
||||||
|
"SubtitleType",
|
||||||
|
"ScanStrategy",
|
||||||
|
"TypeDetectionMethod",
|
||||||
|
"SubtitleMatchingRules",
|
||||||
|
"RuleScope",
|
||||||
|
"SubtitleNotFound",
|
||||||
|
]
|
||||||
@@ -0,0 +1,90 @@
|
|||||||
|
"""Subtitle domain aggregates."""
|
||||||
|
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from ..shared.value_objects import ImdbId
|
||||||
|
from .knowledge.base import SubtitleKnowledgeBase
|
||||||
|
from .value_objects import RuleScope, SubtitleMatchingRules
|
||||||
|
|
||||||
|
|
||||||
|
def DEFAULT_RULES() -> SubtitleMatchingRules:
|
||||||
|
"""Load default matching rules from subtitles.yaml (defaults section)."""
|
||||||
|
return SubtitleKnowledgeBase().default_rules()
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class SubtitleRuleSet:
|
||||||
|
"""
|
||||||
|
Rules for subtitle selection at a given scope level, with inheritance.
|
||||||
|
|
||||||
|
Only delta fields are stored — None means "inherit from parent".
|
||||||
|
Resolution order: global → release_group → show/movie → season → episode.
|
||||||
|
|
||||||
|
A RuleSet can also be pinned to a specific media item (imdb_id),
|
||||||
|
bypassing the scope hierarchy for that item.
|
||||||
|
"""
|
||||||
|
|
||||||
|
scope: RuleScope
|
||||||
|
parent: "SubtitleRuleSet | None" = None
|
||||||
|
pinned_to: ImdbId | None = None
|
||||||
|
|
||||||
|
# Deltas — None = inherit
|
||||||
|
_languages: list[str] | None = field(default=None, repr=False)
|
||||||
|
_formats: list[str] | None = field(default=None, repr=False)
|
||||||
|
_types: list[str] | None = field(default=None, repr=False)
|
||||||
|
_format_priority: list[str] | None = field(default=None, repr=False)
|
||||||
|
_min_confidence: float | None = field(default=None, repr=False)
|
||||||
|
|
||||||
|
def resolve(self) -> SubtitleMatchingRules:
|
||||||
|
"""
|
||||||
|
Walk the parent chain and merge deltas into effective rules.
|
||||||
|
Falls back to DEFAULT_RULES at the top of the chain.
|
||||||
|
"""
|
||||||
|
base = self.parent.resolve() if self.parent else DEFAULT_RULES()
|
||||||
|
return SubtitleMatchingRules(
|
||||||
|
preferred_languages=self._languages or base.preferred_languages,
|
||||||
|
preferred_formats=self._formats or base.preferred_formats,
|
||||||
|
allowed_types=self._types or base.allowed_types,
|
||||||
|
format_priority=self._format_priority or base.format_priority,
|
||||||
|
min_confidence=self._min_confidence if self._min_confidence is not None else base.min_confidence,
|
||||||
|
)
|
||||||
|
|
||||||
|
def override(
|
||||||
|
self,
|
||||||
|
languages: list[str] | None = None,
|
||||||
|
formats: list[str] | None = None,
|
||||||
|
types: list[str] | None = None,
|
||||||
|
format_priority: list[str] | None = None,
|
||||||
|
min_confidence: float | None = None,
|
||||||
|
) -> None:
|
||||||
|
"""Set delta overrides at this scope level."""
|
||||||
|
if languages is not None:
|
||||||
|
self._languages = languages
|
||||||
|
if formats is not None:
|
||||||
|
self._formats = formats
|
||||||
|
if types is not None:
|
||||||
|
self._types = types
|
||||||
|
if format_priority is not None:
|
||||||
|
self._format_priority = format_priority
|
||||||
|
if min_confidence is not None:
|
||||||
|
self._min_confidence = min_confidence
|
||||||
|
|
||||||
|
def to_dict(self) -> dict:
|
||||||
|
"""Serialize deltas only (for persistence in rules.yaml)."""
|
||||||
|
delta: dict[str, Any] = {}
|
||||||
|
if self._languages is not None:
|
||||||
|
delta["languages"] = self._languages
|
||||||
|
if self._formats is not None:
|
||||||
|
delta["formats"] = self._formats
|
||||||
|
if self._types is not None:
|
||||||
|
delta["types"] = self._types
|
||||||
|
if self._format_priority is not None:
|
||||||
|
delta["format_priority"] = self._format_priority
|
||||||
|
if self._min_confidence is not None:
|
||||||
|
delta["min_confidence"] = self._min_confidence
|
||||||
|
return {"scope": {"level": self.scope.level, "identifier": self.scope.identifier}, "override": delta}
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def global_default(cls) -> "SubtitleRuleSet":
|
||||||
|
return cls(scope=RuleScope(level="global"))
|
||||||
@@ -0,0 +1,87 @@
|
|||||||
|
"""Subtitle domain entities."""
|
||||||
|
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from ..shared.value_objects import ImdbId
|
||||||
|
from .value_objects import SubtitleFormat, SubtitleLanguage, SubtitleMatchingRules, SubtitleType
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class SubtitleTrack:
|
||||||
|
"""
|
||||||
|
A single subtitle track — either an external file or an embedded stream.
|
||||||
|
|
||||||
|
State can evolve: unknown → resolved after user clarification.
|
||||||
|
confidence reflects how certain we are about language + type classification.
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Classification (may be None if not yet resolved)
|
||||||
|
language: SubtitleLanguage | None
|
||||||
|
format: SubtitleFormat | None
|
||||||
|
subtitle_type: SubtitleType = SubtitleType.UNKNOWN
|
||||||
|
|
||||||
|
# Source
|
||||||
|
is_embedded: bool = False
|
||||||
|
file_path: Path | None = None # None if embedded
|
||||||
|
file_size_kb: float | None = None
|
||||||
|
entry_count: int | None = None # number of subtitle cues in the file
|
||||||
|
|
||||||
|
# Matching state
|
||||||
|
confidence: float = 0.0 # 0.0 → 1.0, not applicable for embedded
|
||||||
|
raw_tokens: list[str] = field(default_factory=list) # tokens extracted from filename
|
||||||
|
|
||||||
|
def is_resolved(self) -> bool:
|
||||||
|
return self.language is not None
|
||||||
|
|
||||||
|
@property
|
||||||
|
def destination_name(self) -> str:
|
||||||
|
"""
|
||||||
|
Compute the output filename per naming convention:
|
||||||
|
{lang}.{ext}
|
||||||
|
{lang}.sdh.{ext}
|
||||||
|
{lang}.forced.{ext}
|
||||||
|
"""
|
||||||
|
if not self.language or not self.format:
|
||||||
|
raise ValueError("Cannot compute destination_name: language or format missing")
|
||||||
|
ext = self.format.extensions[0].lstrip(".")
|
||||||
|
parts = [self.language.code]
|
||||||
|
if self.subtitle_type == SubtitleType.SDH:
|
||||||
|
parts.append("sdh")
|
||||||
|
elif self.subtitle_type == SubtitleType.FORCED:
|
||||||
|
parts.append("forced")
|
||||||
|
return ".".join(parts) + "." + ext
|
||||||
|
|
||||||
|
def __repr__(self) -> str:
|
||||||
|
lang = self.language.code if self.language else "?"
|
||||||
|
fmt = self.format.id if self.format else "?"
|
||||||
|
src = "embedded" if self.is_embedded else str(self.file_path.name if self.file_path else "?")
|
||||||
|
return f"SubtitleTrack({lang}, {self.subtitle_type.value}, {fmt}, src={src}, conf={self.confidence:.2f})"
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class MediaSubtitleMetadata:
|
||||||
|
"""
|
||||||
|
Snapshot of all subtitle information known for a given media item.
|
||||||
|
Populated by the identifier service (ffprobe + filesystem scan).
|
||||||
|
"""
|
||||||
|
|
||||||
|
media_id: ImdbId | None
|
||||||
|
media_type: str # "movie" | "tv_show"
|
||||||
|
embedded_tracks: list[SubtitleTrack] = field(default_factory=list)
|
||||||
|
external_tracks: list[SubtitleTrack] = field(default_factory=list)
|
||||||
|
release_group: str | None = None
|
||||||
|
detected_pattern_id: str | None = None # pattern id from knowledge base
|
||||||
|
pattern_confirmed: bool = False
|
||||||
|
|
||||||
|
@property
|
||||||
|
def all_tracks(self) -> list[SubtitleTrack]:
|
||||||
|
return self.embedded_tracks + self.external_tracks
|
||||||
|
|
||||||
|
@property
|
||||||
|
def total_count(self) -> int:
|
||||||
|
return len(self.embedded_tracks) + len(self.external_tracks)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def unresolved_tracks(self) -> list[SubtitleTrack]:
|
||||||
|
return [t for t in self.external_tracks if t.language is None]
|
||||||
@@ -0,0 +1,4 @@
|
|||||||
|
from .base import SubtitleKnowledgeBase
|
||||||
|
from .loader import KnowledgeLoader
|
||||||
|
|
||||||
|
__all__ = ["SubtitleKnowledgeBase", "KnowledgeLoader"]
|
||||||
@@ -0,0 +1,151 @@
|
|||||||
|
"""SubtitleKnowledgeBase — parsed, typed view of the loaded knowledge."""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
from functools import cached_property
|
||||||
|
|
||||||
|
from ..value_objects import (
|
||||||
|
ScanStrategy,
|
||||||
|
SubtitleFormat,
|
||||||
|
SubtitleLanguage,
|
||||||
|
SubtitleMatchingRules,
|
||||||
|
SubtitlePattern,
|
||||||
|
SubtitleType,
|
||||||
|
TypeDetectionMethod,
|
||||||
|
)
|
||||||
|
from .loader import KnowledgeLoader
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class SubtitleKnowledgeBase:
|
||||||
|
"""
|
||||||
|
Typed access to subtitle knowledge (formats, types, languages, patterns).
|
||||||
|
|
||||||
|
Built from KnowledgeLoader — call kb.reload() to pick up newly learned entries
|
||||||
|
without restarting.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, loader: KnowledgeLoader | None = None):
|
||||||
|
self._loader = loader or KnowledgeLoader()
|
||||||
|
self._build()
|
||||||
|
|
||||||
|
def _build(self) -> None:
|
||||||
|
data = self._loader.subtitles()
|
||||||
|
|
||||||
|
self._formats: dict[str, SubtitleFormat] = {}
|
||||||
|
for fid, fdata in data.get("formats", {}).items():
|
||||||
|
self._formats[fid] = SubtitleFormat(
|
||||||
|
id=fid,
|
||||||
|
extensions=fdata.get("extensions", []),
|
||||||
|
description=fdata.get("description", ""),
|
||||||
|
)
|
||||||
|
|
||||||
|
self._languages: dict[str, SubtitleLanguage] = {}
|
||||||
|
for code, ldata in data.get("languages", {}).items():
|
||||||
|
self._languages[code] = SubtitleLanguage(
|
||||||
|
code=code,
|
||||||
|
tokens=ldata.get("tokens", []),
|
||||||
|
)
|
||||||
|
|
||||||
|
# Build reverse token → language code map
|
||||||
|
self._lang_token_map: dict[str, str] = {}
|
||||||
|
for code, lang in self._languages.items():
|
||||||
|
for token in lang.tokens:
|
||||||
|
self._lang_token_map[token.lower()] = code
|
||||||
|
|
||||||
|
# Build reverse token → type map
|
||||||
|
self._type_token_map: dict[str, SubtitleType] = {}
|
||||||
|
for type_id, tdata in data.get("types", {}).items():
|
||||||
|
stype = SubtitleType(type_id)
|
||||||
|
for token in tdata.get("tokens", []):
|
||||||
|
self._type_token_map[token.lower()] = stype
|
||||||
|
|
||||||
|
d = data.get("defaults", {})
|
||||||
|
self._default_rules = SubtitleMatchingRules(
|
||||||
|
preferred_languages=d.get("languages", ["fra", "eng"]),
|
||||||
|
preferred_formats=d.get("formats", ["srt"]),
|
||||||
|
allowed_types=d.get("types", ["standard", "forced"]),
|
||||||
|
format_priority=d.get("format_priority", ["srt", "ass"]),
|
||||||
|
min_confidence=d.get("min_confidence", 0.7),
|
||||||
|
)
|
||||||
|
|
||||||
|
self._patterns: dict[str, SubtitlePattern] = {}
|
||||||
|
for pid, pdata in self._loader.patterns().items():
|
||||||
|
try:
|
||||||
|
self._patterns[pid] = SubtitlePattern(
|
||||||
|
id=pid,
|
||||||
|
description=pdata.get("description", ""),
|
||||||
|
scan_strategy=ScanStrategy(pdata.get("scan_strategy", "adjacent")),
|
||||||
|
root_folder=pdata.get("root_folder"),
|
||||||
|
type_detection=TypeDetectionMethod(
|
||||||
|
pdata.get("type_detection", {}).get("method", "token_in_name")
|
||||||
|
),
|
||||||
|
version=pdata.get("version", "1.0"),
|
||||||
|
)
|
||||||
|
except ValueError as e:
|
||||||
|
logger.warning(f"SubtitleKnowledgeBase: skipping pattern '{pid}': {e}")
|
||||||
|
|
||||||
|
def reload(self) -> None:
|
||||||
|
self._loader = KnowledgeLoader()
|
||||||
|
self._build()
|
||||||
|
logger.info("SubtitleKnowledgeBase: reloaded")
|
||||||
|
|
||||||
|
# --- Defaults ---
|
||||||
|
|
||||||
|
def default_rules(self) -> SubtitleMatchingRules:
|
||||||
|
return self._default_rules
|
||||||
|
|
||||||
|
# --- Formats ---
|
||||||
|
|
||||||
|
def formats(self) -> dict[str, SubtitleFormat]:
|
||||||
|
return self._formats
|
||||||
|
|
||||||
|
def format_for_extension(self, ext: str) -> SubtitleFormat | None:
|
||||||
|
for fmt in self._formats.values():
|
||||||
|
if fmt.matches_extension(ext):
|
||||||
|
return fmt
|
||||||
|
return None
|
||||||
|
|
||||||
|
def known_extensions(self) -> set[str]:
|
||||||
|
exts = set()
|
||||||
|
for fmt in self._formats.values():
|
||||||
|
exts.update(fmt.extensions)
|
||||||
|
return exts
|
||||||
|
|
||||||
|
# --- Languages ---
|
||||||
|
|
||||||
|
def languages(self) -> dict[str, SubtitleLanguage]:
|
||||||
|
return self._languages
|
||||||
|
|
||||||
|
def language_for_token(self, token: str) -> SubtitleLanguage | None:
|
||||||
|
code = self._lang_token_map.get(token.lower())
|
||||||
|
return self._languages.get(code) if code else None
|
||||||
|
|
||||||
|
def is_known_lang_token(self, token: str) -> bool:
|
||||||
|
return token.lower() in self._lang_token_map
|
||||||
|
|
||||||
|
# --- Types ---
|
||||||
|
|
||||||
|
def type_for_token(self, token: str) -> SubtitleType | None:
|
||||||
|
return self._type_token_map.get(token.lower())
|
||||||
|
|
||||||
|
def is_known_type_token(self, token: str) -> bool:
|
||||||
|
return token.lower() in self._type_token_map
|
||||||
|
|
||||||
|
# --- Patterns ---
|
||||||
|
|
||||||
|
def patterns(self) -> dict[str, SubtitlePattern]:
|
||||||
|
return self._patterns
|
||||||
|
|
||||||
|
def pattern(self, pattern_id: str) -> SubtitlePattern | None:
|
||||||
|
return self._patterns.get(pattern_id)
|
||||||
|
|
||||||
|
def patterns_for_group(self, group_name: str) -> list[SubtitlePattern]:
|
||||||
|
group = self._loader.release_group(group_name)
|
||||||
|
if not group:
|
||||||
|
return []
|
||||||
|
return [
|
||||||
|
self._patterns[pid]
|
||||||
|
for pid in group.get("known_patterns", [])
|
||||||
|
if pid in self._patterns
|
||||||
|
]
|
||||||
@@ -0,0 +1,131 @@
|
|||||||
|
"""KnowledgeLoader — autodiscovers and merges builtin + learned YAML knowledge packs."""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import yaml
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
import alfred as _alfred_pkg
|
||||||
|
|
||||||
|
# Builtin knowledge — anchored on the alfred package itself, not on this file's depth
|
||||||
|
_BUILTIN_ROOT = Path(_alfred_pkg.__file__).parent / "knowledge"
|
||||||
|
|
||||||
|
# Learned knowledge — local to this instance, gitignored
|
||||||
|
_LEARNED_ROOT = Path(_alfred_pkg.__file__).parent.parent / "data" / "knowledge"
|
||||||
|
|
||||||
|
|
||||||
|
def _load_yaml(path: Path) -> dict:
|
||||||
|
try:
|
||||||
|
with open(path, encoding="utf-8") as f:
|
||||||
|
return yaml.safe_load(f) or {}
|
||||||
|
except FileNotFoundError:
|
||||||
|
return {}
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"KnowledgeLoader: could not load {path}: {e}")
|
||||||
|
return {}
|
||||||
|
|
||||||
|
|
||||||
|
def _merge(base: dict, override: dict) -> dict:
|
||||||
|
"""
|
||||||
|
Deep merge override into base.
|
||||||
|
Lists are extended (not replaced) — learned tokens are additive.
|
||||||
|
Scalar values in override win over base.
|
||||||
|
"""
|
||||||
|
result = dict(base)
|
||||||
|
for key, val in override.items():
|
||||||
|
if key in result and isinstance(result[key], dict) and isinstance(val, dict):
|
||||||
|
result[key] = _merge(result[key], val)
|
||||||
|
elif key in result and isinstance(result[key], list) and isinstance(val, list):
|
||||||
|
# Extend list, deduplicate, preserve order
|
||||||
|
combined = result[key] + [v for v in val if v not in result[key]]
|
||||||
|
result[key] = combined
|
||||||
|
else:
|
||||||
|
result[key] = val
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
class KnowledgeLoader:
|
||||||
|
"""
|
||||||
|
Loads subtitle knowledge from YAML files.
|
||||||
|
|
||||||
|
Builtin packs live in alfred/knowledge/ (versioned).
|
||||||
|
Learned packs live in data/knowledge/ (gitignored, instance-local).
|
||||||
|
|
||||||
|
Learned entries are merged additively — they can only add tokens/patterns,
|
||||||
|
never remove builtin ones.
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
loader = KnowledgeLoader()
|
||||||
|
subtitles = loader.subtitles() # merged subtitles.yaml
|
||||||
|
patterns = loader.patterns() # all patterns, keyed by id
|
||||||
|
groups = loader.release_groups() # all release groups, keyed by name
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self._cache: dict[str, dict] = {}
|
||||||
|
self._load()
|
||||||
|
|
||||||
|
def _load(self) -> None:
|
||||||
|
# Main subtitles knowledge
|
||||||
|
builtin = _load_yaml(_BUILTIN_ROOT / "subtitles.yaml")
|
||||||
|
learned = _load_yaml(_LEARNED_ROOT / "subtitles_learned.yaml")
|
||||||
|
self._cache["subtitles"] = _merge(builtin, learned)
|
||||||
|
|
||||||
|
# Patterns
|
||||||
|
self._cache["patterns"] = {}
|
||||||
|
for path in sorted((_BUILTIN_ROOT / "patterns").glob("*.yaml")):
|
||||||
|
data = _load_yaml(path)
|
||||||
|
pid = data.get("id", path.stem)
|
||||||
|
self._cache["patterns"][pid] = data
|
||||||
|
|
||||||
|
for path in sorted((_LEARNED_ROOT / "patterns").glob("*.yaml")):
|
||||||
|
data = _load_yaml(path)
|
||||||
|
pid = data.get("id", path.stem)
|
||||||
|
if pid in self._cache["patterns"]:
|
||||||
|
self._cache["patterns"][pid] = _merge(self._cache["patterns"][pid], data)
|
||||||
|
else:
|
||||||
|
self._cache["patterns"][pid] = data
|
||||||
|
logger.info(f"KnowledgeLoader: learned new pattern '{pid}'")
|
||||||
|
|
||||||
|
# Release groups
|
||||||
|
self._cache["release_groups"] = {}
|
||||||
|
for path in sorted((_BUILTIN_ROOT / "release_groups").glob("*.yaml")):
|
||||||
|
data = _load_yaml(path)
|
||||||
|
name = data.get("name", path.stem)
|
||||||
|
self._cache["release_groups"][name] = data
|
||||||
|
|
||||||
|
for path in sorted((_LEARNED_ROOT / "release_groups").glob("*.yaml")):
|
||||||
|
data = _load_yaml(path)
|
||||||
|
name = data.get("name", path.stem)
|
||||||
|
if name in self._cache["release_groups"]:
|
||||||
|
self._cache["release_groups"][name] = _merge(self._cache["release_groups"][name], data)
|
||||||
|
else:
|
||||||
|
self._cache["release_groups"][name] = data
|
||||||
|
logger.info(f"KnowledgeLoader: learned new release group '{name}'")
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
f"KnowledgeLoader: {len(self._cache['patterns'])} patterns, "
|
||||||
|
f"{len(self._cache['release_groups'])} release groups loaded"
|
||||||
|
)
|
||||||
|
|
||||||
|
def subtitles(self) -> dict:
|
||||||
|
return self._cache["subtitles"]
|
||||||
|
|
||||||
|
def patterns(self) -> dict[str, dict]:
|
||||||
|
return self._cache["patterns"]
|
||||||
|
|
||||||
|
def pattern(self, pattern_id: str) -> dict | None:
|
||||||
|
return self._cache["patterns"].get(pattern_id)
|
||||||
|
|
||||||
|
def release_groups(self) -> dict[str, dict]:
|
||||||
|
return self._cache["release_groups"]
|
||||||
|
|
||||||
|
def release_group(self, name: str) -> dict | None:
|
||||||
|
"""Case-insensitive lookup."""
|
||||||
|
name_lower = name.lower()
|
||||||
|
for key, val in self._cache["release_groups"].items():
|
||||||
|
if key.lower() == name_lower:
|
||||||
|
return val
|
||||||
|
return None
|
||||||
@@ -0,0 +1,221 @@
|
|||||||
|
"""SubtitleScanner — inspects local subtitle files and filters them per user preferences.
|
||||||
|
|
||||||
|
Given a video file path, the scanner:
|
||||||
|
1. Looks for subtitle files in the same directory as the video.
|
||||||
|
2. Optionally also inspects a Subs/ subfolder adjacent to the video.
|
||||||
|
3. Classifies each file (language, SDH, forced) from its filename.
|
||||||
|
4. Filters according to SubtitlePreferences (languages, min_size_kb, keep_sdh, keep_forced).
|
||||||
|
5. Returns a list of SubtitleCandidate — one per file that passes the filter,
|
||||||
|
with the destination filename already computed.
|
||||||
|
|
||||||
|
Filename classification heuristics
|
||||||
|
-----------------------------------
|
||||||
|
We parse the stem of each subtitle file looking for known patterns:
|
||||||
|
|
||||||
|
fr.srt → lang=fr, sdh=False, forced=False
|
||||||
|
fr.sdh.srt → lang=fr, sdh=True
|
||||||
|
fr.hi.srt → lang=fr, sdh=True (hi = hearing-impaired, alias for sdh)
|
||||||
|
fr.forced.srt → lang=fr, forced=True
|
||||||
|
Breaking.Bad.S01E01.French.srt → lang=fr (keyword match)
|
||||||
|
Breaking.Bad.S01E01.VOSTFR.srt → lang=fr (VOSTFR = French forced/foreign subs)
|
||||||
|
|
||||||
|
Output naming convention (matches SubtitlePreferences docstring):
|
||||||
|
{lang}.srt
|
||||||
|
{lang}.sdh.srt
|
||||||
|
{lang}.forced.srt
|
||||||
|
"""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# Subtitle file extensions we handle
|
||||||
|
SUBTITLE_EXTENSIONS = {".srt", ".ass", ".ssa", ".vtt", ".sub"}
|
||||||
|
|
||||||
|
# Language keyword map: lowercase token → ISO 639-1 code
|
||||||
|
_LANG_KEYWORDS: dict[str, str] = {
|
||||||
|
# French
|
||||||
|
"fr": "fr",
|
||||||
|
"fra": "fr",
|
||||||
|
"french": "fr",
|
||||||
|
"francais": "fr",
|
||||||
|
"français": "fr",
|
||||||
|
"vf": "fr",
|
||||||
|
"vff": "fr",
|
||||||
|
"vostfr": "fr",
|
||||||
|
# English
|
||||||
|
"en": "en",
|
||||||
|
"eng": "en",
|
||||||
|
"english": "en",
|
||||||
|
# Spanish
|
||||||
|
"es": "es",
|
||||||
|
"spa": "es",
|
||||||
|
"spanish": "es",
|
||||||
|
"espanol": "es",
|
||||||
|
# German
|
||||||
|
"de": "de",
|
||||||
|
"deu": "de",
|
||||||
|
"ger": "de",
|
||||||
|
"german": "de",
|
||||||
|
# Italian
|
||||||
|
"it": "it",
|
||||||
|
"ita": "it",
|
||||||
|
"italian": "it",
|
||||||
|
# Portuguese
|
||||||
|
"pt": "pt",
|
||||||
|
"por": "pt",
|
||||||
|
"portuguese": "pt",
|
||||||
|
# Dutch
|
||||||
|
"nl": "nl",
|
||||||
|
"nld": "nl",
|
||||||
|
"dutch": "nl",
|
||||||
|
# Japanese
|
||||||
|
"ja": "ja",
|
||||||
|
"jpn": "ja",
|
||||||
|
"japanese": "ja",
|
||||||
|
}
|
||||||
|
|
||||||
|
# Tokens that indicate SDH / hearing-impaired
|
||||||
|
_SDH_TOKENS = {"sdh", "hi", "hearing", "impaired", "cc", "closedcaption"}
|
||||||
|
|
||||||
|
# Tokens that indicate forced subtitles
|
||||||
|
_FORCED_TOKENS = {"forced", "foreign"}
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class SubtitleCandidate:
|
||||||
|
"""A subtitle file that passed the filter, ready to be placed."""
|
||||||
|
|
||||||
|
source_path: Path
|
||||||
|
language: str # ISO 639-1 code, e.g. "fr"
|
||||||
|
is_sdh: bool
|
||||||
|
is_forced: bool
|
||||||
|
extension: str # e.g. ".srt"
|
||||||
|
|
||||||
|
@property
|
||||||
|
def destination_name(self) -> str:
|
||||||
|
"""
|
||||||
|
Compute the destination filename per naming convention:
|
||||||
|
{lang}.srt
|
||||||
|
{lang}.sdh.srt
|
||||||
|
{lang}.forced.srt
|
||||||
|
"""
|
||||||
|
ext = self.extension.lstrip(".")
|
||||||
|
parts = [self.language]
|
||||||
|
if self.is_sdh:
|
||||||
|
parts.append("sdh")
|
||||||
|
elif self.is_forced:
|
||||||
|
parts.append("forced")
|
||||||
|
return ".".join(parts) + "." + ext
|
||||||
|
|
||||||
|
|
||||||
|
def _classify(path: Path) -> tuple[str | None, bool, bool]:
|
||||||
|
"""
|
||||||
|
Parse a subtitle filename and return (language_code, is_sdh, is_forced).
|
||||||
|
|
||||||
|
Returns (None, False, False) if the language cannot be determined.
|
||||||
|
"""
|
||||||
|
stem = path.stem.lower()
|
||||||
|
# Split on dots, spaces, underscores, hyphens
|
||||||
|
import re
|
||||||
|
tokens = re.split(r"[\.\s_\-]+", stem)
|
||||||
|
|
||||||
|
language: str | None = None
|
||||||
|
is_sdh = False
|
||||||
|
is_forced = False
|
||||||
|
|
||||||
|
for token in tokens:
|
||||||
|
if token in _LANG_KEYWORDS:
|
||||||
|
language = _LANG_KEYWORDS[token]
|
||||||
|
if token in _SDH_TOKENS:
|
||||||
|
is_sdh = True
|
||||||
|
if token in _FORCED_TOKENS:
|
||||||
|
is_forced = True
|
||||||
|
|
||||||
|
return language, is_sdh, is_forced
|
||||||
|
|
||||||
|
|
||||||
|
class SubtitleScanner:
|
||||||
|
"""
|
||||||
|
Scans subtitle files next to a video and filters them per SubtitlePreferences.
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
scanner = SubtitleScanner(prefs)
|
||||||
|
candidates = scanner.scan(video_path)
|
||||||
|
# Each candidate has .source_path and .destination_name
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, languages: list[str], min_size_kb: int, keep_sdh: bool, keep_forced: bool):
|
||||||
|
self.languages = [l.lower() for l in languages]
|
||||||
|
self.min_size_kb = min_size_kb
|
||||||
|
self.keep_sdh = keep_sdh
|
||||||
|
self.keep_forced = keep_forced
|
||||||
|
|
||||||
|
def scan(self, video_path: Path) -> list[SubtitleCandidate]:
|
||||||
|
"""
|
||||||
|
Return all subtitle candidates found next to the video that pass the filter.
|
||||||
|
|
||||||
|
Scans:
|
||||||
|
- Same directory as the video (flat siblings)
|
||||||
|
- Subs/ subfolder if present
|
||||||
|
"""
|
||||||
|
candidates: list[SubtitleCandidate] = []
|
||||||
|
search_dirs = [video_path.parent]
|
||||||
|
|
||||||
|
subs_dir = video_path.parent / "Subs"
|
||||||
|
if subs_dir.is_dir():
|
||||||
|
search_dirs.append(subs_dir)
|
||||||
|
logger.debug(f"SubtitleScanner: found Subs/ folder at {subs_dir}")
|
||||||
|
|
||||||
|
for directory in search_dirs:
|
||||||
|
for path in sorted(directory.iterdir()):
|
||||||
|
if not path.is_file():
|
||||||
|
continue
|
||||||
|
if path.suffix.lower() not in SUBTITLE_EXTENSIONS:
|
||||||
|
continue
|
||||||
|
|
||||||
|
candidate = self._evaluate(path)
|
||||||
|
if candidate is not None:
|
||||||
|
candidates.append(candidate)
|
||||||
|
|
||||||
|
logger.info(f"SubtitleScanner: {len(candidates)} candidate(s) found for {video_path.name}")
|
||||||
|
return candidates
|
||||||
|
|
||||||
|
def _evaluate(self, path: Path) -> SubtitleCandidate | None:
|
||||||
|
"""Apply all filters to a single subtitle file. Returns None if it should be dropped."""
|
||||||
|
# Size filter
|
||||||
|
size_kb = path.stat().st_size / 1024
|
||||||
|
if size_kb < self.min_size_kb:
|
||||||
|
logger.debug(f"SubtitleScanner: skip {path.name} (too small: {size_kb:.1f} KB)")
|
||||||
|
return None
|
||||||
|
|
||||||
|
language, is_sdh, is_forced = _classify(path)
|
||||||
|
|
||||||
|
# Language filter
|
||||||
|
if language is None:
|
||||||
|
logger.debug(f"SubtitleScanner: skip {path.name} (language unknown)")
|
||||||
|
return None
|
||||||
|
|
||||||
|
if language not in self.languages:
|
||||||
|
logger.debug(f"SubtitleScanner: skip {path.name} (language '{language}' not in prefs)")
|
||||||
|
return None
|
||||||
|
|
||||||
|
# SDH filter
|
||||||
|
if is_sdh and not self.keep_sdh:
|
||||||
|
logger.debug(f"SubtitleScanner: skip {path.name} (SDH not wanted)")
|
||||||
|
return None
|
||||||
|
|
||||||
|
# Forced filter
|
||||||
|
if is_forced and not self.keep_forced:
|
||||||
|
logger.debug(f"SubtitleScanner: skip {path.name} (forced not wanted)")
|
||||||
|
return None
|
||||||
|
|
||||||
|
return SubtitleCandidate(
|
||||||
|
source_path=path,
|
||||||
|
language=language,
|
||||||
|
is_sdh=is_sdh,
|
||||||
|
is_forced=is_forced,
|
||||||
|
extension=path.suffix.lower(),
|
||||||
|
)
|
||||||
@@ -0,0 +1,13 @@
|
|||||||
|
from .identifier import SubtitleIdentifier
|
||||||
|
from .matcher import SubtitleMatcher
|
||||||
|
from .pattern_detector import PatternDetector
|
||||||
|
from .placer import PlacedTrack, PlaceResult, SubtitlePlacer
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
"SubtitleIdentifier",
|
||||||
|
"SubtitleMatcher",
|
||||||
|
"PatternDetector",
|
||||||
|
"SubtitlePlacer",
|
||||||
|
"PlacedTrack",
|
||||||
|
"PlaceResult",
|
||||||
|
]
|
||||||
@@ -0,0 +1,287 @@
|
|||||||
|
"""SubtitleIdentifier — finds and classifies all subtitle tracks for a video file."""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import re
|
||||||
|
import subprocess
|
||||||
|
import json
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from ...shared.value_objects import ImdbId
|
||||||
|
from ..entities import MediaSubtitleMetadata, SubtitleTrack
|
||||||
|
from ..knowledge.base import SubtitleKnowledgeBase
|
||||||
|
from ..value_objects import ScanStrategy, SubtitlePattern, SubtitleType
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
def _tokenize(name: str) -> list[str]:
|
||||||
|
"""Split a filename stem into lowercase tokens."""
|
||||||
|
return [t.lower() for t in re.split(r"[\.\s_\-]+", name) if t]
|
||||||
|
|
||||||
|
|
||||||
|
def _count_entries(path: Path) -> int:
|
||||||
|
"""Return the entry count of an SRT file by finding the last cue number."""
|
||||||
|
try:
|
||||||
|
with open(path, encoding="utf-8", errors="replace") as f:
|
||||||
|
lines = f.read().splitlines()
|
||||||
|
for line in reversed(lines):
|
||||||
|
if line.strip().isdigit():
|
||||||
|
return int(line.strip())
|
||||||
|
return 0
|
||||||
|
except Exception:
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
class SubtitleIdentifier:
|
||||||
|
"""
|
||||||
|
Finds all subtitle tracks for a given video file using a known pattern,
|
||||||
|
then attempts to classify each track (language, type, format).
|
||||||
|
|
||||||
|
Returns a MediaSubtitleMetadata with embedded + external tracks.
|
||||||
|
External tracks with unknown language or low confidence are left as-is —
|
||||||
|
the caller (use case) decides whether to ask the user for clarification.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, kb: SubtitleKnowledgeBase):
|
||||||
|
self.kb = kb
|
||||||
|
|
||||||
|
def identify(
|
||||||
|
self,
|
||||||
|
video_path: Path,
|
||||||
|
pattern: SubtitlePattern,
|
||||||
|
media_id: ImdbId | None,
|
||||||
|
media_type: str,
|
||||||
|
release_group: str | None = None,
|
||||||
|
) -> MediaSubtitleMetadata:
|
||||||
|
metadata = MediaSubtitleMetadata(
|
||||||
|
media_id=media_id,
|
||||||
|
media_type=media_type,
|
||||||
|
release_group=release_group,
|
||||||
|
detected_pattern_id=pattern.id,
|
||||||
|
)
|
||||||
|
|
||||||
|
if pattern.scan_strategy == ScanStrategy.EMBEDDED:
|
||||||
|
metadata.embedded_tracks = self._scan_embedded(video_path)
|
||||||
|
else:
|
||||||
|
metadata.external_tracks = self._scan_external(video_path, pattern)
|
||||||
|
# Always also check for embedded tracks
|
||||||
|
metadata.embedded_tracks = self._scan_embedded(video_path)
|
||||||
|
|
||||||
|
return metadata
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Embedded tracks — ffprobe
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
|
||||||
|
def _scan_embedded(self, video_path: Path) -> list[SubtitleTrack]:
|
||||||
|
if not video_path.exists():
|
||||||
|
return []
|
||||||
|
try:
|
||||||
|
result = subprocess.run(
|
||||||
|
[
|
||||||
|
"ffprobe", "-v", "quiet",
|
||||||
|
"-print_format", "json",
|
||||||
|
"-show_streams",
|
||||||
|
"-select_streams", "s",
|
||||||
|
str(video_path),
|
||||||
|
],
|
||||||
|
capture_output=True, text=True, timeout=30,
|
||||||
|
)
|
||||||
|
data = json.loads(result.stdout)
|
||||||
|
except (subprocess.TimeoutExpired, json.JSONDecodeError, FileNotFoundError) as e:
|
||||||
|
logger.debug(f"SubtitleIdentifier: ffprobe failed for {video_path.name}: {e}")
|
||||||
|
return []
|
||||||
|
|
||||||
|
tracks = []
|
||||||
|
for stream in data.get("streams", []):
|
||||||
|
tags = stream.get("tags", {})
|
||||||
|
disposition = stream.get("disposition", {})
|
||||||
|
lang_code = tags.get("language", "")
|
||||||
|
title = tags.get("title", "")
|
||||||
|
|
||||||
|
lang = self.kb.language_for_token(lang_code) if lang_code else None
|
||||||
|
|
||||||
|
if disposition.get("hearing_impaired"):
|
||||||
|
stype = SubtitleType.SDH
|
||||||
|
elif disposition.get("forced"):
|
||||||
|
stype = SubtitleType.FORCED
|
||||||
|
else:
|
||||||
|
stype = SubtitleType.STANDARD
|
||||||
|
|
||||||
|
tracks.append(SubtitleTrack(
|
||||||
|
language=lang,
|
||||||
|
format=None,
|
||||||
|
subtitle_type=stype,
|
||||||
|
is_embedded=True,
|
||||||
|
raw_tokens=[lang_code] if lang_code else [],
|
||||||
|
))
|
||||||
|
|
||||||
|
logger.debug(f"SubtitleIdentifier: {len(tracks)} embedded track(s) in {video_path.name}")
|
||||||
|
return tracks
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# External tracks — filesystem scan per pattern strategy
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
|
||||||
|
def _scan_external(self, video_path: Path, pattern: SubtitlePattern) -> list[SubtitleTrack]:
|
||||||
|
strategy = pattern.scan_strategy
|
||||||
|
|
||||||
|
if strategy == ScanStrategy.ADJACENT:
|
||||||
|
candidates = self._find_adjacent(video_path)
|
||||||
|
elif strategy == ScanStrategy.FLAT:
|
||||||
|
candidates = self._find_flat(video_path, pattern.root_folder or "Subs")
|
||||||
|
elif strategy == ScanStrategy.EPISODE_SUBFOLDER:
|
||||||
|
candidates = self._find_episode_subfolder(video_path, pattern.root_folder or "Subs")
|
||||||
|
else:
|
||||||
|
return []
|
||||||
|
|
||||||
|
return self._classify_files(candidates, pattern)
|
||||||
|
|
||||||
|
def _find_adjacent(self, video_path: Path) -> list[Path]:
|
||||||
|
return [
|
||||||
|
p for p in sorted(video_path.parent.iterdir())
|
||||||
|
if p.is_file() and p.suffix.lower() in self.kb.known_extensions()
|
||||||
|
and p.stem != video_path.stem
|
||||||
|
]
|
||||||
|
|
||||||
|
def _find_flat(self, video_path: Path, root_folder: str) -> list[Path]:
|
||||||
|
subs_dir = video_path.parent / root_folder
|
||||||
|
if not subs_dir.is_dir():
|
||||||
|
# Also look at release root (one level up)
|
||||||
|
subs_dir = video_path.parent.parent / root_folder
|
||||||
|
if not subs_dir.is_dir():
|
||||||
|
return []
|
||||||
|
return [
|
||||||
|
p for p in sorted(subs_dir.iterdir())
|
||||||
|
if p.is_file() and p.suffix.lower() in self.kb.known_extensions()
|
||||||
|
]
|
||||||
|
|
||||||
|
def _find_episode_subfolder(self, video_path: Path, root_folder: str) -> list[Path]:
|
||||||
|
"""
|
||||||
|
Look for Subs/{episode_stem}/*.srt
|
||||||
|
|
||||||
|
Checks two locations:
|
||||||
|
1. Adjacent to the video: video_path.parent / root_folder / video_path.stem
|
||||||
|
2. Release root (one level up): video_path.parent.parent / root_folder / video_path.stem
|
||||||
|
"""
|
||||||
|
episode_stem = video_path.stem
|
||||||
|
candidates_dirs = [
|
||||||
|
video_path.parent / root_folder / episode_stem,
|
||||||
|
video_path.parent.parent / root_folder / episode_stem,
|
||||||
|
]
|
||||||
|
for subs_dir in candidates_dirs:
|
||||||
|
if subs_dir.is_dir():
|
||||||
|
files = [
|
||||||
|
p for p in sorted(subs_dir.iterdir())
|
||||||
|
if p.is_file() and p.suffix.lower() in self.kb.known_extensions()
|
||||||
|
]
|
||||||
|
if files:
|
||||||
|
logger.debug(f"SubtitleIdentifier: found {len(files)} file(s) in {subs_dir}")
|
||||||
|
return files
|
||||||
|
return []
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Classification
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
|
||||||
|
def _classify_files(self, paths: list[Path], pattern: SubtitlePattern) -> list[SubtitleTrack]:
|
||||||
|
tracks = []
|
||||||
|
for path in paths:
|
||||||
|
track = self._classify_single(path)
|
||||||
|
tracks.append(track)
|
||||||
|
|
||||||
|
# Post-process: if multiple tracks share same language but type is ambiguous,
|
||||||
|
# apply size_and_count disambiguation
|
||||||
|
if pattern.type_detection.value == "size_and_count":
|
||||||
|
tracks = self._disambiguate_by_size(tracks)
|
||||||
|
|
||||||
|
return tracks
|
||||||
|
|
||||||
|
def _classify_single(self, path: Path) -> SubtitleTrack:
|
||||||
|
fmt = self.kb.format_for_extension(path.suffix)
|
||||||
|
tokens = _tokenize(path.stem)
|
||||||
|
|
||||||
|
language = None
|
||||||
|
subtitle_type = SubtitleType.UNKNOWN
|
||||||
|
unknown_tokens = []
|
||||||
|
matched_tokens = 0
|
||||||
|
|
||||||
|
for token in tokens:
|
||||||
|
if self.kb.is_known_lang_token(token):
|
||||||
|
language = self.kb.language_for_token(token)
|
||||||
|
matched_tokens += 1
|
||||||
|
elif self.kb.is_known_type_token(token):
|
||||||
|
subtitle_type = self.kb.type_for_token(token) or subtitle_type
|
||||||
|
matched_tokens += 1
|
||||||
|
elif token.isdigit():
|
||||||
|
pass # numeric prefix — ignore
|
||||||
|
elif len(token) > 1:
|
||||||
|
unknown_tokens.append(token)
|
||||||
|
|
||||||
|
# Confidence: proportion of meaningful tokens that were recognized
|
||||||
|
meaningful = [t for t in tokens if not t.isdigit() and len(t) > 1]
|
||||||
|
confidence = matched_tokens / max(len(meaningful), 1) if meaningful else 0.5
|
||||||
|
|
||||||
|
if unknown_tokens:
|
||||||
|
logger.debug(
|
||||||
|
f"SubtitleIdentifier: unknown tokens in '{path.name}': {unknown_tokens}"
|
||||||
|
)
|
||||||
|
|
||||||
|
size_kb = path.stat().st_size / 1024 if path.exists() else None
|
||||||
|
entry_count = _count_entries(path) if path.exists() else None
|
||||||
|
|
||||||
|
return SubtitleTrack(
|
||||||
|
language=language,
|
||||||
|
format=fmt,
|
||||||
|
subtitle_type=subtitle_type,
|
||||||
|
is_embedded=False,
|
||||||
|
file_path=path,
|
||||||
|
file_size_kb=size_kb,
|
||||||
|
entry_count=entry_count,
|
||||||
|
confidence=confidence,
|
||||||
|
raw_tokens=tokens,
|
||||||
|
)
|
||||||
|
|
||||||
|
def _disambiguate_by_size(self, tracks: list[SubtitleTrack]) -> list[SubtitleTrack]:
|
||||||
|
"""
|
||||||
|
When multiple tracks share the same language and type is UNKNOWN/STANDARD,
|
||||||
|
the one with the most entries (lines) is SDH, the smallest is FORCED if
|
||||||
|
there are 3+, otherwise the smaller is STANDARD.
|
||||||
|
|
||||||
|
Only applied when type_detection = size_and_count.
|
||||||
|
"""
|
||||||
|
from itertools import groupby
|
||||||
|
|
||||||
|
# Group by language code
|
||||||
|
lang_groups: dict[str, list[SubtitleTrack]] = {}
|
||||||
|
for track in tracks:
|
||||||
|
key = track.language.code if track.language else "__unknown__"
|
||||||
|
lang_groups.setdefault(key, []).append(track)
|
||||||
|
|
||||||
|
result = []
|
||||||
|
for lang_code, group in lang_groups.items():
|
||||||
|
if len(group) == 1:
|
||||||
|
result.extend(group)
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Sort by entry_count ascending (None treated as 0)
|
||||||
|
sorted_group = sorted(group, key=lambda t: t.entry_count or 0)
|
||||||
|
|
||||||
|
if len(sorted_group) == 2:
|
||||||
|
# smaller = standard, larger = sdh
|
||||||
|
self._set_type(sorted_group[0], SubtitleType.STANDARD)
|
||||||
|
self._set_type(sorted_group[1], SubtitleType.SDH)
|
||||||
|
elif len(sorted_group) >= 3:
|
||||||
|
# smallest = forced, middle = standard, largest = sdh
|
||||||
|
self._set_type(sorted_group[0], SubtitleType.FORCED)
|
||||||
|
for t in sorted_group[1:-1]:
|
||||||
|
self._set_type(t, SubtitleType.STANDARD)
|
||||||
|
self._set_type(sorted_group[-1], SubtitleType.SDH)
|
||||||
|
|
||||||
|
result.extend(sorted_group)
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
def _set_type(self, track: SubtitleTrack, stype: SubtitleType) -> None:
|
||||||
|
"""Mutate track type in-place."""
|
||||||
|
track.subtitle_type = stype
|
||||||
@@ -0,0 +1,118 @@
|
|||||||
|
"""SubtitleMatcher — filters tracks against resolved rules."""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
|
||||||
|
from ..entities import SubtitleTrack
|
||||||
|
from ..value_objects import SubtitleMatchingRules, SubtitleType
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class SubtitleMatcher:
|
||||||
|
"""
|
||||||
|
Filters a list of SubtitleTrack against effective SubtitleMatchingRules.
|
||||||
|
|
||||||
|
Returns matched tracks (pass all filters, confidence >= min_confidence)
|
||||||
|
and unresolved tracks (need user clarification).
|
||||||
|
|
||||||
|
Conflict resolution: when two tracks share the same language + type,
|
||||||
|
format_priority decides which one to keep.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def match(
|
||||||
|
self,
|
||||||
|
tracks: list[SubtitleTrack],
|
||||||
|
rules: SubtitleMatchingRules,
|
||||||
|
) -> tuple[list[SubtitleTrack], list[SubtitleTrack]]:
|
||||||
|
"""
|
||||||
|
Returns (matched, unresolved).
|
||||||
|
"""
|
||||||
|
matched: list[SubtitleTrack] = []
|
||||||
|
unresolved: list[SubtitleTrack] = []
|
||||||
|
|
||||||
|
for track in tracks:
|
||||||
|
if track.is_embedded:
|
||||||
|
continue
|
||||||
|
|
||||||
|
if track.language is None or track.confidence < rules.min_confidence:
|
||||||
|
unresolved.append(track)
|
||||||
|
continue
|
||||||
|
|
||||||
|
if not self._passes_filters(track, rules):
|
||||||
|
logger.debug(f"SubtitleMatcher: filtered out {track}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
matched.append(track)
|
||||||
|
|
||||||
|
matched = self._resolve_conflicts(matched, rules)
|
||||||
|
logger.info(
|
||||||
|
f"SubtitleMatcher: {len(matched)} matched, {len(unresolved)} unresolved"
|
||||||
|
)
|
||||||
|
return matched, unresolved
|
||||||
|
|
||||||
|
def _passes_filters(self, track: SubtitleTrack, rules: SubtitleMatchingRules) -> bool:
|
||||||
|
# Language filter
|
||||||
|
if rules.preferred_languages:
|
||||||
|
if not track.language:
|
||||||
|
return False
|
||||||
|
if track.language.code not in rules.preferred_languages:
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Format filter (only for external files)
|
||||||
|
if rules.preferred_formats and not track.is_embedded:
|
||||||
|
if not track.format:
|
||||||
|
return False
|
||||||
|
if track.format.id not in rules.preferred_formats:
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Type filter
|
||||||
|
if rules.allowed_types:
|
||||||
|
if track.subtitle_type.value not in rules.allowed_types:
|
||||||
|
return False
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
|
def _resolve_conflicts(
|
||||||
|
self,
|
||||||
|
tracks: list[SubtitleTrack],
|
||||||
|
rules: SubtitleMatchingRules,
|
||||||
|
) -> list[SubtitleTrack]:
|
||||||
|
"""
|
||||||
|
When multiple tracks have same language + type, keep only the best one
|
||||||
|
according to format_priority. If no format_priority applies, keep the first.
|
||||||
|
"""
|
||||||
|
seen: dict[tuple, SubtitleTrack] = {}
|
||||||
|
|
||||||
|
for track in tracks:
|
||||||
|
lang = track.language.code if track.language else None
|
||||||
|
stype = track.subtitle_type.value
|
||||||
|
key = (lang, stype)
|
||||||
|
|
||||||
|
if key not in seen:
|
||||||
|
seen[key] = track
|
||||||
|
else:
|
||||||
|
existing = seen[key]
|
||||||
|
if self._prefer(track, existing, rules.format_priority):
|
||||||
|
logger.debug(
|
||||||
|
f"SubtitleMatcher: conflict {key} — "
|
||||||
|
f"preferring {track.format.id if track.format else 'embedded'} "
|
||||||
|
f"over {existing.format.id if existing.format else 'embedded'}"
|
||||||
|
)
|
||||||
|
seen[key] = track
|
||||||
|
|
||||||
|
return list(seen.values())
|
||||||
|
|
||||||
|
def _prefer(
|
||||||
|
self,
|
||||||
|
candidate: SubtitleTrack,
|
||||||
|
existing: SubtitleTrack,
|
||||||
|
format_priority: list[str],
|
||||||
|
) -> bool:
|
||||||
|
"""Return True if candidate is preferable to existing."""
|
||||||
|
if not format_priority:
|
||||||
|
return False
|
||||||
|
c_fmt = candidate.format.id if candidate.format else ""
|
||||||
|
e_fmt = existing.format.id if existing.format else ""
|
||||||
|
c_rank = format_priority.index(c_fmt) if c_fmt in format_priority else 999
|
||||||
|
e_rank = format_priority.index(e_fmt) if e_fmt in format_priority else 999
|
||||||
|
return c_rank < e_rank
|
||||||
@@ -0,0 +1,205 @@
|
|||||||
|
"""PatternDetector — discovers the subtitle structure of a release folder."""
|
||||||
|
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import subprocess
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from ..knowledge.base import SubtitleKnowledgeBase
|
||||||
|
from ..value_objects import ScanStrategy, SubtitlePattern
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class PatternDetector:
|
||||||
|
"""
|
||||||
|
Inspects a release folder and returns the best matching known pattern,
|
||||||
|
plus a confidence score and a description of what was found.
|
||||||
|
|
||||||
|
Used for "pattern discovery" — when we don't yet know which pattern
|
||||||
|
a release follows. The result is proposed to the user for confirmation.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, kb: SubtitleKnowledgeBase):
|
||||||
|
self.kb = kb
|
||||||
|
|
||||||
|
def detect(self, release_root: Path, sample_video: Path) -> dict:
|
||||||
|
"""
|
||||||
|
Analyse the release folder and return:
|
||||||
|
{
|
||||||
|
"detected": SubtitlePattern | None,
|
||||||
|
"confidence": float,
|
||||||
|
"description": str, # human-readable description of what was found
|
||||||
|
"candidate_pattern_ids": list[str],
|
||||||
|
}
|
||||||
|
"""
|
||||||
|
findings = self._inspect(release_root, sample_video)
|
||||||
|
best, confidence = self._match_pattern(findings)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"detected": best,
|
||||||
|
"confidence": confidence,
|
||||||
|
"description": self._describe(findings),
|
||||||
|
"candidate_pattern_ids": [best.id] if best else [],
|
||||||
|
"raw_findings": findings,
|
||||||
|
}
|
||||||
|
|
||||||
|
def _has_embedded_subtitles(self, video_path: Path) -> bool:
|
||||||
|
"""Run ffprobe to check whether the video has embedded subtitle streams."""
|
||||||
|
try:
|
||||||
|
result = subprocess.run(
|
||||||
|
[
|
||||||
|
"ffprobe", "-v", "quiet",
|
||||||
|
"-print_format", "json",
|
||||||
|
"-show_streams",
|
||||||
|
"-select_streams", "s",
|
||||||
|
str(video_path),
|
||||||
|
],
|
||||||
|
capture_output=True, text=True, timeout=30,
|
||||||
|
)
|
||||||
|
data = json.loads(result.stdout)
|
||||||
|
return len(data.get("streams", [])) > 0
|
||||||
|
except (subprocess.TimeoutExpired, json.JSONDecodeError, FileNotFoundError):
|
||||||
|
return False
|
||||||
|
|
||||||
|
def _inspect(self, release_root: Path, sample_video: Path) -> dict:
|
||||||
|
"""Gather structural facts about the release."""
|
||||||
|
known_exts = self.kb.known_extensions()
|
||||||
|
findings: dict = {
|
||||||
|
"has_subs_folder": False,
|
||||||
|
"subs_strategy": None, # "flat" | "episode_subfolder"
|
||||||
|
"subs_root": None,
|
||||||
|
"adjacent_subs": False,
|
||||||
|
"has_embedded": self._has_embedded_subtitles(sample_video),
|
||||||
|
"files_per_episode": 0,
|
||||||
|
"has_lang_tokens": False,
|
||||||
|
"has_numeric_prefix": False,
|
||||||
|
}
|
||||||
|
|
||||||
|
# Check for Subs/ folder — adjacent or at release root
|
||||||
|
for subs_candidate in [
|
||||||
|
sample_video.parent / "Subs",
|
||||||
|
release_root / "Subs",
|
||||||
|
]:
|
||||||
|
if subs_candidate.is_dir():
|
||||||
|
findings["has_subs_folder"] = True
|
||||||
|
findings["subs_root"] = str(subs_candidate)
|
||||||
|
|
||||||
|
# Is it flat or episode_subfolder?
|
||||||
|
children = list(subs_candidate.iterdir())
|
||||||
|
sub_files = [c for c in children if c.is_file() and c.suffix.lower() in known_exts]
|
||||||
|
sub_dirs = [c for c in children if c.is_dir()]
|
||||||
|
|
||||||
|
if sub_dirs and not sub_files:
|
||||||
|
findings["subs_strategy"] = "episode_subfolder"
|
||||||
|
# Count files in a sample subfolder
|
||||||
|
sample_sub = sub_dirs[0]
|
||||||
|
sample_files = [f for f in sample_sub.iterdir()
|
||||||
|
if f.is_file() and f.suffix.lower() in known_exts]
|
||||||
|
findings["files_per_episode"] = len(sample_files)
|
||||||
|
# Check naming conventions
|
||||||
|
for f in sample_files:
|
||||||
|
stem = f.stem
|
||||||
|
parts = stem.split("_")
|
||||||
|
if parts[0].isdigit():
|
||||||
|
findings["has_numeric_prefix"] = True
|
||||||
|
if any(self.kb.is_known_lang_token(t.lower())
|
||||||
|
for t in stem.replace("_", ".").split(".")):
|
||||||
|
findings["has_lang_tokens"] = True
|
||||||
|
else:
|
||||||
|
findings["subs_strategy"] = "flat"
|
||||||
|
findings["files_per_episode"] = len(sub_files)
|
||||||
|
for f in sub_files:
|
||||||
|
if any(self.kb.is_known_lang_token(t.lower())
|
||||||
|
for t in f.stem.replace("_", ".").split(".")):
|
||||||
|
findings["has_lang_tokens"] = True
|
||||||
|
break
|
||||||
|
|
||||||
|
# Check adjacent subs (next to the video)
|
||||||
|
if not findings["has_subs_folder"]:
|
||||||
|
adjacent = [
|
||||||
|
p for p in sample_video.parent.iterdir()
|
||||||
|
if p.is_file() and p.suffix.lower() in known_exts
|
||||||
|
]
|
||||||
|
if adjacent:
|
||||||
|
findings["adjacent_subs"] = True
|
||||||
|
findings["files_per_episode"] = len(adjacent)
|
||||||
|
|
||||||
|
return findings
|
||||||
|
|
||||||
|
def _match_pattern(self, findings: dict) -> tuple[SubtitlePattern | None, float]:
|
||||||
|
"""Score all known patterns against the findings."""
|
||||||
|
scores: list[tuple[float, SubtitlePattern]] = []
|
||||||
|
|
||||||
|
for pattern in self.kb.patterns().values():
|
||||||
|
score = self._score(pattern, findings)
|
||||||
|
scores.append((score, pattern))
|
||||||
|
|
||||||
|
if not scores:
|
||||||
|
return None, 0.0
|
||||||
|
|
||||||
|
scores.sort(key=lambda x: x[0], reverse=True)
|
||||||
|
best_score, best_pattern = scores[0]
|
||||||
|
|
||||||
|
if best_score < 0.4:
|
||||||
|
return None, best_score
|
||||||
|
|
||||||
|
return best_pattern, best_score
|
||||||
|
|
||||||
|
def _score(self, pattern: SubtitlePattern, findings: dict) -> float:
|
||||||
|
"""Return a 0.0–1.0 match score for this pattern against the findings."""
|
||||||
|
score = 0.0
|
||||||
|
total = 0.0
|
||||||
|
|
||||||
|
strategy = pattern.scan_strategy
|
||||||
|
|
||||||
|
if strategy == ScanStrategy.EMBEDDED:
|
||||||
|
total += 1
|
||||||
|
if findings.get("has_embedded"):
|
||||||
|
score += 1.0
|
||||||
|
if not findings.get("has_subs_folder") and not findings.get("adjacent_subs"):
|
||||||
|
score += 0.5
|
||||||
|
total += 0.5
|
||||||
|
|
||||||
|
elif strategy == ScanStrategy.EPISODE_SUBFOLDER:
|
||||||
|
total += 3
|
||||||
|
if findings.get("has_subs_folder"):
|
||||||
|
score += 1.0
|
||||||
|
if findings.get("subs_strategy") == "episode_subfolder":
|
||||||
|
score += 2.0
|
||||||
|
|
||||||
|
elif strategy == ScanStrategy.FLAT:
|
||||||
|
total += 2
|
||||||
|
if findings.get("has_subs_folder"):
|
||||||
|
score += 1.0
|
||||||
|
if findings.get("subs_strategy") == "flat":
|
||||||
|
score += 1.0
|
||||||
|
|
||||||
|
elif strategy == ScanStrategy.ADJACENT:
|
||||||
|
total += 2
|
||||||
|
if findings.get("adjacent_subs"):
|
||||||
|
score += 1.0
|
||||||
|
if not findings.get("has_subs_folder"):
|
||||||
|
score += 1.0
|
||||||
|
|
||||||
|
return score / total if total > 0 else 0.0
|
||||||
|
|
||||||
|
def _describe(self, findings: dict) -> str:
|
||||||
|
parts = []
|
||||||
|
if findings.get("has_subs_folder"):
|
||||||
|
strategy = findings.get("subs_strategy", "?")
|
||||||
|
n = findings.get("files_per_episode", 0)
|
||||||
|
parts.append(f"Subs/ folder found ({strategy}), ~{n} file(s) per episode")
|
||||||
|
if findings.get("has_numeric_prefix"):
|
||||||
|
parts.append("files have numeric prefix (e.g. 2_English.srt)")
|
||||||
|
if findings.get("has_lang_tokens"):
|
||||||
|
parts.append("language tokens found in filenames")
|
||||||
|
elif findings.get("adjacent_subs"):
|
||||||
|
parts.append("subtitle files adjacent to video")
|
||||||
|
else:
|
||||||
|
parts.append("no external subtitle files found")
|
||||||
|
|
||||||
|
if findings.get("has_embedded"):
|
||||||
|
parts.append("embedded tracks detected (ffprobe)")
|
||||||
|
|
||||||
|
return " — ".join(parts) if parts else "nothing found"
|
||||||
@@ -0,0 +1,93 @@
|
|||||||
|
"""SubtitlePlacer — hard-links matched subtitle tracks next to the destination video."""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from ..entities import SubtitleTrack
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class PlacedTrack:
|
||||||
|
source: Path
|
||||||
|
destination: Path
|
||||||
|
filename: str
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class PlaceResult:
|
||||||
|
placed: list[PlacedTrack]
|
||||||
|
skipped: list[tuple[SubtitleTrack, str]] # (track, reason)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def placed_count(self) -> int:
|
||||||
|
return len(self.placed)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def skipped_count(self) -> int:
|
||||||
|
return len(self.skipped)
|
||||||
|
|
||||||
|
|
||||||
|
class SubtitlePlacer:
|
||||||
|
"""
|
||||||
|
Hard-links matched SubtitleTrack files next to a destination video.
|
||||||
|
|
||||||
|
Uses the same hard-link strategy as FileManager.copy_file:
|
||||||
|
instant, no data duplication, qBittorrent keeps seeding.
|
||||||
|
|
||||||
|
Embedded tracks are skipped — nothing to place on disk.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def place(
|
||||||
|
self,
|
||||||
|
tracks: list[SubtitleTrack],
|
||||||
|
destination_video: Path,
|
||||||
|
) -> PlaceResult:
|
||||||
|
placed: list[PlacedTrack] = []
|
||||||
|
skipped: list[tuple[SubtitleTrack, str]] = []
|
||||||
|
|
||||||
|
dest_dir = destination_video.parent
|
||||||
|
|
||||||
|
for track in tracks:
|
||||||
|
if track.is_embedded:
|
||||||
|
logger.debug(f"SubtitlePlacer: skip embedded track ({track.language})")
|
||||||
|
skipped.append((track, "embedded — no file to place"))
|
||||||
|
continue
|
||||||
|
|
||||||
|
if not track.file_path or not track.file_path.exists():
|
||||||
|
skipped.append((track, "source file not found"))
|
||||||
|
continue
|
||||||
|
|
||||||
|
try:
|
||||||
|
dest_name = track.destination_name
|
||||||
|
except ValueError as e:
|
||||||
|
skipped.append((track, str(e)))
|
||||||
|
continue
|
||||||
|
|
||||||
|
dest_path = dest_dir / dest_name
|
||||||
|
|
||||||
|
if dest_path.exists():
|
||||||
|
logger.debug(f"SubtitlePlacer: skip {dest_name} — already exists")
|
||||||
|
skipped.append((track, "destination already exists"))
|
||||||
|
continue
|
||||||
|
|
||||||
|
try:
|
||||||
|
os.link(track.file_path, dest_path)
|
||||||
|
placed.append(PlacedTrack(
|
||||||
|
source=track.file_path,
|
||||||
|
destination=dest_path,
|
||||||
|
filename=dest_name,
|
||||||
|
))
|
||||||
|
logger.info(f"SubtitlePlacer: placed {dest_name}")
|
||||||
|
except OSError as e:
|
||||||
|
logger.warning(f"SubtitlePlacer: failed to place {dest_name}: {e}")
|
||||||
|
skipped.append((track, str(e)))
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
f"SubtitlePlacer: {len(placed)} placed, {len(skipped)} skipped "
|
||||||
|
f"for {destination_video.name}"
|
||||||
|
)
|
||||||
|
return PlaceResult(placed=placed, skipped=skipped)
|
||||||
@@ -0,0 +1,21 @@
|
|||||||
|
"""Subtitle service utilities."""
|
||||||
|
|
||||||
|
from ..entities import SubtitleTrack
|
||||||
|
|
||||||
|
|
||||||
|
def available_subtitles(tracks: list[SubtitleTrack]) -> list[SubtitleTrack]:
|
||||||
|
"""
|
||||||
|
Return the distinct subtitle tracks available, deduped by (language, type).
|
||||||
|
|
||||||
|
Useful to display what is available for a media item regardless of user
|
||||||
|
preferences — e.g. eng, eng.sdh, fra all show up as separate entries.
|
||||||
|
"""
|
||||||
|
seen: set[tuple] = set()
|
||||||
|
result: list[SubtitleTrack] = []
|
||||||
|
for track in tracks:
|
||||||
|
lang = track.language.code if track.language else None
|
||||||
|
key = (lang, track.subtitle_type)
|
||||||
|
if key not in seen:
|
||||||
|
seen.add(key)
|
||||||
|
result.append(track)
|
||||||
|
return result
|
||||||
@@ -0,0 +1,93 @@
|
|||||||
|
"""Subtitle domain value objects."""
|
||||||
|
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
from enum import Enum
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
|
||||||
|
class ScanStrategy(Enum):
|
||||||
|
"""How to locate subtitle files for a given release."""
|
||||||
|
|
||||||
|
ADJACENT = "adjacent" # .srt next to the video
|
||||||
|
FLAT = "flat" # Subs/*.srt
|
||||||
|
EPISODE_SUBFOLDER = "episode_subfolder" # Subs/{episode_name}/*.srt
|
||||||
|
EMBEDDED = "embedded" # tracks inside the video container
|
||||||
|
|
||||||
|
|
||||||
|
class TypeDetectionMethod(Enum):
|
||||||
|
"""How to differentiate standard / SDH / forced when tokens are ambiguous."""
|
||||||
|
|
||||||
|
TOKEN_IN_NAME = "token_in_name"
|
||||||
|
SIZE_AND_COUNT = "size_and_count"
|
||||||
|
FFPROBE_METADATA = "ffprobe_metadata"
|
||||||
|
|
||||||
|
|
||||||
|
class SubtitleType(Enum):
|
||||||
|
STANDARD = "standard"
|
||||||
|
SDH = "sdh"
|
||||||
|
FORCED = "forced"
|
||||||
|
UNKNOWN = "unknown"
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class SubtitleFormat:
|
||||||
|
"""A known subtitle file format."""
|
||||||
|
|
||||||
|
id: str
|
||||||
|
extensions: list[str]
|
||||||
|
description: str = ""
|
||||||
|
|
||||||
|
def matches_extension(self, ext: str) -> bool:
|
||||||
|
return ext.lower() in [e.lower() for e in self.extensions]
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class SubtitleLanguage:
|
||||||
|
"""A known subtitle language with its recognition tokens."""
|
||||||
|
|
||||||
|
code: str # ISO 639-1
|
||||||
|
tokens: list[str] # lowercase
|
||||||
|
|
||||||
|
def matches_token(self, token: str) -> bool:
|
||||||
|
return token.lower() in self.tokens
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class SubtitlePattern:
|
||||||
|
"""
|
||||||
|
A known structural pattern for how a release group organises subtitle files.
|
||||||
|
|
||||||
|
Patterns are loaded from alfred/knowledge/patterns/*.yaml and are
|
||||||
|
independent of any specific release group — multiple groups can share
|
||||||
|
the same pattern.
|
||||||
|
"""
|
||||||
|
|
||||||
|
id: str
|
||||||
|
description: str
|
||||||
|
scan_strategy: ScanStrategy
|
||||||
|
root_folder: str | None # e.g. "Subs", None for adjacent/embedded
|
||||||
|
type_detection: TypeDetectionMethod
|
||||||
|
version: str = "1.0"
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class SubtitleMatchingRules:
|
||||||
|
"""
|
||||||
|
Effective rules after scope resolution (global → group → show → season → episode).
|
||||||
|
Only stores actual values — None means "inherited, not overridden at this level".
|
||||||
|
"""
|
||||||
|
|
||||||
|
preferred_languages: list[str] = field(default_factory=list) # ISO 639-1 codes
|
||||||
|
preferred_formats: list[str] = field(default_factory=list) # format ids
|
||||||
|
allowed_types: list[str] = field(default_factory=list) # SubtitleType ids
|
||||||
|
format_priority: list[str] = field(default_factory=list) # ordered format ids
|
||||||
|
min_confidence: float = 0.7
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class RuleScope:
|
||||||
|
"""At which level a rule set applies."""
|
||||||
|
|
||||||
|
level: str # "global" | "release_group" | "movie" | "show" | "season" | "episode"
|
||||||
|
identifier: str | None = None # imdb_id, group name, "S01", "S01E03"…
|
||||||
@@ -2,7 +2,6 @@
|
|||||||
|
|
||||||
import re
|
import re
|
||||||
from dataclasses import dataclass, field
|
from dataclasses import dataclass, field
|
||||||
from datetime import datetime
|
|
||||||
|
|
||||||
from ..shared.value_objects import FilePath, FileSize, ImdbId
|
from ..shared.value_objects import FilePath, FileSize, ImdbId
|
||||||
from .value_objects import EpisodeNumber, SeasonNumber, ShowStatus
|
from .value_objects import EpisodeNumber, SeasonNumber, ShowStatus
|
||||||
@@ -22,8 +21,6 @@ class TVShow:
|
|||||||
seasons_count: int
|
seasons_count: int
|
||||||
status: ShowStatus
|
status: ShowStatus
|
||||||
tmdb_id: int | None = None
|
tmdb_id: int | None = None
|
||||||
first_air_date: str | None = None
|
|
||||||
added_at: datetime = field(default_factory=datetime.now)
|
|
||||||
|
|
||||||
def __post_init__(self):
|
def __post_init__(self):
|
||||||
"""Validate TV show entity."""
|
"""Validate TV show entity."""
|
||||||
@@ -87,9 +84,6 @@ class Season:
|
|||||||
season_number: SeasonNumber
|
season_number: SeasonNumber
|
||||||
episode_count: int
|
episode_count: int
|
||||||
name: str | None = None
|
name: str | None = None
|
||||||
overview: str | None = None
|
|
||||||
air_date: str | None = None
|
|
||||||
poster_path: str | None = None
|
|
||||||
|
|
||||||
def __post_init__(self):
|
def __post_init__(self):
|
||||||
"""Validate season entity."""
|
"""Validate season entity."""
|
||||||
@@ -146,11 +140,6 @@ class Episode:
|
|||||||
title: str
|
title: str
|
||||||
file_path: FilePath | None = None
|
file_path: FilePath | None = None
|
||||||
file_size: FileSize | None = None
|
file_size: FileSize | None = None
|
||||||
overview: str | None = None
|
|
||||||
air_date: str | None = None
|
|
||||||
still_path: str | None = None
|
|
||||||
vote_average: float | None = None
|
|
||||||
runtime: int | None = None # in minutes
|
|
||||||
|
|
||||||
def __post_init__(self):
|
def __post_init__(self):
|
||||||
"""Validate episode entity."""
|
"""Validate episode entity."""
|
||||||
+1
-1
@@ -6,7 +6,7 @@ from typing import Any
|
|||||||
import requests
|
import requests
|
||||||
from requests.exceptions import HTTPError, RequestException, Timeout
|
from requests.exceptions import HTTPError, RequestException, Timeout
|
||||||
|
|
||||||
from agent.config import Settings, settings
|
from alfred.settings import Settings, settings
|
||||||
|
|
||||||
from .dto import TorrentResult
|
from .dto import TorrentResult
|
||||||
from .exceptions import KnabenAPIError, KnabenNotFoundError
|
from .exceptions import KnabenAPIError, KnabenNotFoundError
|
||||||
+1
-1
@@ -6,7 +6,7 @@ from typing import Any
|
|||||||
import requests
|
import requests
|
||||||
from requests.exceptions import HTTPError, RequestException, Timeout
|
from requests.exceptions import HTTPError, RequestException, Timeout
|
||||||
|
|
||||||
from agent.config import Settings, settings
|
from alfred.settings import Settings, settings
|
||||||
|
|
||||||
from .dto import TorrentInfo
|
from .dto import TorrentInfo
|
||||||
from .exceptions import QBittorrentAPIError, QBittorrentAuthError
|
from .exceptions import QBittorrentAPIError, QBittorrentAuthError
|
||||||
@@ -6,7 +6,7 @@ from typing import Any
|
|||||||
import requests
|
import requests
|
||||||
from requests.exceptions import HTTPError, RequestException, Timeout
|
from requests.exceptions import HTTPError, RequestException, Timeout
|
||||||
|
|
||||||
from agent.config import Settings, settings
|
from alfred.settings import Settings, settings
|
||||||
|
|
||||||
from .dto import MediaResult
|
from .dto import MediaResult
|
||||||
from .exceptions import (
|
from .exceptions import (
|
||||||
@@ -0,0 +1,317 @@
|
|||||||
|
"""File manager for filesystem operations."""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
from collections import namedtuple
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from alfred.infrastructure.persistence import get_memory
|
||||||
|
|
||||||
|
from .exceptions import PathTraversalError
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
FileOperationResult = namedtuple("FileOperationResult", ["success", "error", "message"])
|
||||||
|
|
||||||
|
|
||||||
|
def _err(error: str, message: str) -> dict[str, Any]:
|
||||||
|
return {"status": "error", "error": error, "message": message}
|
||||||
|
|
||||||
|
|
||||||
|
class FileManager:
|
||||||
|
"""
|
||||||
|
File manager for filesystem operations.
|
||||||
|
|
||||||
|
Handles folder configuration, listing, and file operations
|
||||||
|
with security checks to prevent path traversal attacks.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def set_folder_path(self, folder_name: str, path_value: str) -> dict[str, Any]:
|
||||||
|
"""
|
||||||
|
Set a folder path in the configuration.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
folder_name: Name of folder (download, tvshow, movie, torrent).
|
||||||
|
path_value: Absolute path to the folder.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dict with status or error information.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
path_obj = Path(path_value).resolve()
|
||||||
|
|
||||||
|
if not path_obj.exists():
|
||||||
|
return _err("invalid_path", f"Path does not exist: {path_value}")
|
||||||
|
|
||||||
|
if not path_obj.is_dir():
|
||||||
|
return _err("invalid_path", f"Path is not a directory: {path_value}")
|
||||||
|
|
||||||
|
if not os.access(path_obj, os.R_OK):
|
||||||
|
return _err("permission_denied", f"Path is not readable: {path_value}")
|
||||||
|
|
||||||
|
memory = get_memory()
|
||||||
|
# workspace folders have fixed attributes; library folders go in the dict
|
||||||
|
if folder_name in ("download", "torrent"):
|
||||||
|
setattr(memory.ltm.workspace, folder_name, str(path_obj))
|
||||||
|
else:
|
||||||
|
memory.ltm.library_paths.set(folder_name, str(path_obj))
|
||||||
|
memory.save()
|
||||||
|
|
||||||
|
logger.info(f"Set {folder_name} to: {path_obj}")
|
||||||
|
return {"status": "ok", "folder_name": folder_name, "path": str(path_obj)}
|
||||||
|
|
||||||
|
except ValueError as e:
|
||||||
|
return _err("validation_failed", str(e))
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Unexpected error setting path: {e}", exc_info=True)
|
||||||
|
return _err("internal_error", "Failed to set path")
|
||||||
|
|
||||||
|
def list_folder(self, folder_type: str, path: str = ".") -> dict[str, Any]:
|
||||||
|
"""
|
||||||
|
List contents of a configured folder.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
folder_type: Type of folder (download, tvshow, movie, torrent).
|
||||||
|
path: Relative path within the folder (default: root).
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dict with folder contents or error information.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
safe_path = self._sanitize_path(path)
|
||||||
|
|
||||||
|
memory = get_memory()
|
||||||
|
if folder_type in ("download", "torrent"):
|
||||||
|
folder_path = getattr(memory.ltm.workspace, folder_type, None)
|
||||||
|
else:
|
||||||
|
folder_path = memory.ltm.library_paths.get(folder_type)
|
||||||
|
|
||||||
|
if not folder_path:
|
||||||
|
return _err("folder_not_set", f"{folder_type.capitalize()} folder not configured.")
|
||||||
|
|
||||||
|
root = Path(folder_path)
|
||||||
|
target = root / safe_path
|
||||||
|
|
||||||
|
if not self._is_safe_path(root, target):
|
||||||
|
return _err("forbidden", "Access denied: path outside allowed directory")
|
||||||
|
|
||||||
|
if not target.exists():
|
||||||
|
return _err("not_found", f"Path does not exist: {safe_path}")
|
||||||
|
|
||||||
|
if not target.is_dir():
|
||||||
|
return _err("not_a_directory", f"Path is not a directory: {safe_path}")
|
||||||
|
|
||||||
|
try:
|
||||||
|
entries = [entry.name for entry in target.iterdir()]
|
||||||
|
logger.debug(f"Listed {len(entries)} entries in {target}")
|
||||||
|
return {
|
||||||
|
"status": "ok",
|
||||||
|
"folder_type": folder_type,
|
||||||
|
"path": safe_path,
|
||||||
|
"entries": sorted(entries),
|
||||||
|
"count": len(entries),
|
||||||
|
}
|
||||||
|
except PermissionError:
|
||||||
|
return _err("permission_denied", f"Permission denied: {safe_path}")
|
||||||
|
|
||||||
|
except PathTraversalError as e:
|
||||||
|
return _err("forbidden", str(e))
|
||||||
|
|
||||||
|
except ValueError as e:
|
||||||
|
return _err("validation_failed", str(e))
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Unexpected error listing folder: {e}", exc_info=True)
|
||||||
|
return _err("internal_error", "Failed to list folder")
|
||||||
|
|
||||||
|
def copy_file(self, source: str, destination: str) -> dict[str, Any]:
|
||||||
|
"""
|
||||||
|
Hard-link a file to a destination (instant, no data duplication).
|
||||||
|
|
||||||
|
Both paths must be on the same filesystem. qBittorrent keeps seeding
|
||||||
|
the original inode unaffected.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
source: Absolute path to the source file.
|
||||||
|
destination: Absolute path to the destination file.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dict with status or error information.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
source_path = Path(source).resolve()
|
||||||
|
dest_path = Path(destination).resolve()
|
||||||
|
|
||||||
|
logger.info(f"Hard-linking: {source_path} -> {dest_path}")
|
||||||
|
|
||||||
|
if not source_path.exists():
|
||||||
|
return _err("source_not_found", f"Source does not exist: {source}")
|
||||||
|
|
||||||
|
if not source_path.is_file():
|
||||||
|
return _err("source_not_file", f"Source is not a file: {source}")
|
||||||
|
|
||||||
|
if not dest_path.parent.exists():
|
||||||
|
return _err("destination_dir_not_found", f"Destination directory does not exist: {dest_path.parent}")
|
||||||
|
|
||||||
|
if dest_path.exists():
|
||||||
|
return _err("destination_exists", f"Destination already exists: {destination}")
|
||||||
|
|
||||||
|
os.link(source_path, dest_path)
|
||||||
|
|
||||||
|
logger.info(f"Hard link created: {dest_path.name}")
|
||||||
|
return {
|
||||||
|
"status": "ok",
|
||||||
|
"source": str(source_path),
|
||||||
|
"destination": str(dest_path),
|
||||||
|
"filename": dest_path.name,
|
||||||
|
"size": source_path.stat().st_size,
|
||||||
|
}
|
||||||
|
|
||||||
|
except OSError as e:
|
||||||
|
logger.error(f"Error creating hard link: {e}", exc_info=True)
|
||||||
|
return _err("link_failed", str(e))
|
||||||
|
|
||||||
|
def move_file(self, source: str, destination: str) -> dict[str, Any]:
|
||||||
|
"""
|
||||||
|
Move a file via hard link + source deletion.
|
||||||
|
|
||||||
|
Hard-links the file to the destination, then removes the source.
|
||||||
|
qBittorrent keeps seeding during the operation since the inode
|
||||||
|
is still referenced until the source is removed.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
source: Absolute path to the source file.
|
||||||
|
destination: Absolute path to the destination file.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dict with status or error information.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
source_path = Path(source).resolve()
|
||||||
|
|
||||||
|
link_result = self.copy_file(source, destination)
|
||||||
|
if link_result.get("status") != "ok":
|
||||||
|
return link_result
|
||||||
|
|
||||||
|
source_path.unlink()
|
||||||
|
|
||||||
|
logger.info(f"File moved: {source_path.name} -> {link_result['destination']}")
|
||||||
|
return {
|
||||||
|
"status": "ok",
|
||||||
|
"source": str(source_path),
|
||||||
|
"destination": link_result["destination"],
|
||||||
|
"filename": link_result["filename"],
|
||||||
|
"size": link_result["size"],
|
||||||
|
}
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error moving file: {e}", exc_info=True)
|
||||||
|
return _err("move_failed", str(e))
|
||||||
|
|
||||||
|
def create_seed_links(
|
||||||
|
self, library_file: str, original_download_folder: str, torrent_folder: str
|
||||||
|
) -> dict[str, Any]:
|
||||||
|
"""
|
||||||
|
Prepare a torrent folder so qBittorrent can keep seeding after a move.
|
||||||
|
|
||||||
|
- Hard-links the moved video file from the library back into
|
||||||
|
torrents/<original_folder_name>/ (same inode, no data copy).
|
||||||
|
- Copies every other file from the original download folder
|
||||||
|
(.srt, .nfo, .jpg, .txt, …) into the same torrent subfolder,
|
||||||
|
preserving relative paths.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
library_file: Absolute path to the video file in the library.
|
||||||
|
original_download_folder: Absolute path to the download folder
|
||||||
|
that contained the original release (may still have subs etc.).
|
||||||
|
torrent_folder: Absolute path to the root torrents/ directory.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dict with status, linked_file, copied_files list, skipped list.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
lib_path = Path(library_file).resolve()
|
||||||
|
src_folder = Path(original_download_folder).resolve()
|
||||||
|
torrent_root = Path(torrent_folder).resolve()
|
||||||
|
|
||||||
|
if not lib_path.exists():
|
||||||
|
return _err("library_file_not_found", f"Library file not found: {library_file}")
|
||||||
|
if not src_folder.exists():
|
||||||
|
return _err("source_folder_not_found", f"Download folder not found: {original_download_folder}")
|
||||||
|
if not torrent_root.exists():
|
||||||
|
return _err("torrent_folder_not_found", f"Torrent folder not found: {torrent_folder}")
|
||||||
|
|
||||||
|
dest_folder = torrent_root / src_folder.name
|
||||||
|
dest_folder.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
# Hard-link the video file from library → torrent subfolder
|
||||||
|
link_dest = dest_folder / lib_path.name
|
||||||
|
if link_dest.exists():
|
||||||
|
return _err("destination_exists", f"Link already exists: {link_dest}")
|
||||||
|
os.link(lib_path, link_dest)
|
||||||
|
logger.info(f"Hard-linked for seeding: {lib_path.name} → {dest_folder}")
|
||||||
|
|
||||||
|
# Copy everything else from the original download folder
|
||||||
|
copied: list[str] = []
|
||||||
|
skipped: list[str] = []
|
||||||
|
for item in src_folder.rglob("*"):
|
||||||
|
if not item.is_file():
|
||||||
|
continue
|
||||||
|
rel = item.relative_to(src_folder)
|
||||||
|
dest_item = dest_folder / rel
|
||||||
|
dest_item.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
if dest_item.exists():
|
||||||
|
skipped.append(str(rel))
|
||||||
|
continue
|
||||||
|
import shutil
|
||||||
|
shutil.copy2(item, dest_item)
|
||||||
|
copied.append(str(rel))
|
||||||
|
logger.debug(f"Copied for seeding: {rel}")
|
||||||
|
|
||||||
|
return {
|
||||||
|
"status": "ok",
|
||||||
|
"torrent_subfolder": str(dest_folder),
|
||||||
|
"linked_file": str(link_dest),
|
||||||
|
"copied_files": copied,
|
||||||
|
"copied_count": len(copied),
|
||||||
|
"skipped": skipped,
|
||||||
|
}
|
||||||
|
|
||||||
|
except OSError as e:
|
||||||
|
logger.error(f"create_seed_links failed: {e}", exc_info=True)
|
||||||
|
return _err("link_failed", str(e))
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"create_seed_links unexpected error: {e}", exc_info=True)
|
||||||
|
return _err("internal_error", str(e))
|
||||||
|
|
||||||
|
def _sanitize_path(self, path: str) -> str:
|
||||||
|
"""
|
||||||
|
Sanitize a relative path to prevent path traversal attacks.
|
||||||
|
|
||||||
|
Raises PathTraversalError if the path tries to escape the root.
|
||||||
|
"""
|
||||||
|
normalized = os.path.normpath(path)
|
||||||
|
|
||||||
|
# Reject absolute paths
|
||||||
|
if os.path.isabs(normalized):
|
||||||
|
raise PathTraversalError("Absolute paths are not allowed")
|
||||||
|
|
||||||
|
# Reject parent directory references
|
||||||
|
if normalized.startswith("..") or "/.." in normalized or "\\.." in normalized:
|
||||||
|
raise PathTraversalError("Parent directory references not allowed")
|
||||||
|
|
||||||
|
# Reject null bytes
|
||||||
|
if "\x00" in normalized:
|
||||||
|
raise PathTraversalError("Null bytes in path not allowed")
|
||||||
|
|
||||||
|
return normalized
|
||||||
|
|
||||||
|
def _is_safe_path(self, base_path: Path, target_path: Path) -> bool:
|
||||||
|
"""Return True if target_path is inside base_path (prevents traversal)."""
|
||||||
|
try:
|
||||||
|
target_path.resolve().relative_to(base_path.resolve())
|
||||||
|
return True
|
||||||
|
except (ValueError, OSError):
|
||||||
|
return False
|
||||||
+3
-3
@@ -3,9 +3,9 @@
|
|||||||
import logging
|
import logging
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
from domain.movies.entities import Movie
|
from alfred.domain.movies.entities import Movie
|
||||||
from domain.tv_shows.entities import Episode, Season, TVShow
|
from alfred.domain.tv_shows.entities import Episode, Season, TVShow
|
||||||
from domain.tv_shows.value_objects import SeasonNumber
|
from alfred.domain.tv_shows.value_objects import SeasonNumber
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user