[Setup]
enable_first_time_setup = true
setup_completed = false
# When true, bypasses the local-only guard for setup API requests.
allow_remote_setup_access = false
# Optional: restrict which remote clients can access /setup
# Provide CIDR ranges or individual IPs, comma-separated
setup_ip_allowlist =
setup_ip_denylist =

[UI]
# Optional quickstart URL (absolute or /path). If blank, /api/v1/config/quickstart defaults to /docs.
quickstart_url =

[Server]
disable_cors = false
cors_allow_credentials = false

[Files]
# Max inline export size (bytes) for file artifacts. Default: 262144 (256KB).
inline_max_bytes = 262144

[Image-Generation]
# Default image backend when payload.backend is omitted.
default_backend = stable_diffusion_cpp
# JSON array of enabled image backends (empty disables image generation).
enabled_backends = ["stable_diffusion_cpp"]
# Limits (pixels = width * height).
max_width = 4096
max_height = 4096
max_pixels = 16777216
max_steps = 50
max_prompt_length = 2500
# Override Files.inline_max_bytes for image outputs (bytes).
inline_max_bytes = 4000000

# stable-diffusion.cpp backend settings
sd_cpp_binary_path =
sd_cpp_diffusion_model_path =
sd_cpp_model_path =
sd_cpp_llm_path =
sd_cpp_vae_path =
sd_cpp_lora_paths = []
# Optional allowlist for extra_params keys (JSON array or comma-separated).
# Empty list disables extra_params passthrough.
sd_cpp_allowed_extra_params = []
sd_cpp_default_steps = 25
sd_cpp_default_cfg_scale = 7.5
sd_cpp_default_sampler = euler_a
sd_cpp_device = auto
sd_cpp_timeout_seconds = 120

# SwarmUI backend settings
# Base URL for SwarmUI (eg http://localhost:7801)
# Dev default — override via .env for deployment
swarmui_base_url = http://127.0.0.1:7801
# Optional default model to use when payload.model is omitted.
swarmui_default_model =
# Optional swarm_token cookie value for authenticated SwarmUI instances.
swarmui_swarm_token =
# Optional allowlist for extra_params keys (JSON array or comma-separated).
# Empty list disables extra_params passthrough.
swarmui_allowed_extra_params = []
swarmui_timeout_seconds = 120

# OpenRouter image backend settings
openrouter_image_base_url = https://openrouter.ai/api/v1
openrouter_image_api_key =
openrouter_image_default_model = openai/gpt-image-1
openrouter_image_allowed_extra_params = []
openrouter_image_timeout_seconds = 120

# Novita image backend settings
novita_image_base_url = https://api.novita.ai
novita_image_api_key =
novita_image_default_model = sd_xl_base_1.0.safetensors
novita_image_allowed_extra_params = []
novita_image_timeout_seconds = 180
novita_image_poll_interval_seconds = 2

# Together image backend settings
together_image_base_url = https://api.together.xyz/v1
together_image_api_key =
together_image_default_model = black-forest-labs/FLUX.1-schnell-Free
together_image_allowed_extra_params = []
together_image_timeout_seconds = 120

# Alibaba Model Studio image backend settings
# Optional explicit endpoint override (leave blank to use region preset)
modelstudio_image_base_url =
modelstudio_image_api_key =
modelstudio_image_default_model = qwen-image
# Region preset for adapter routing (sg, cn, us)
modelstudio_image_region = sg
# Mode can be sync, async, or auto
modelstudio_image_mode = auto
modelstudio_image_poll_interval_seconds = 2
modelstudio_image_timeout_seconds = 180
modelstudio_image_allowed_extra_params = []

[ACP]
# Command used to launch the ACP runner (tldw-agent-acp).
runner_command = go
# Accepts JSON array or shell-style args.
runner_args = ["run", "./cmd/tldw-agent-acp"]
# Working directory for the runner process. Relative paths resolve from this Config_Files directory.
runner_cwd = ../../tools/tldw-agent
# Runner environment (comma-separated key=value or JSON).
# Relative HOME values are resolved against this Config_Files directory.
runner_env = HOME=./acp_runner_home,PYTHONUNBUFFERED=1
# Startup timeout for runner initialize.
startup_timeout_ms = 10000
# Phase 2b default-on rollout for the stable provider:model cohort.
run_first_rollout_mode = default_on
run_first_provider_allowlist = openai:gpt-4o-mini,anthropic:claude-3-7-sonnet,openai:gpt-4o,google:gemini-2.5-flash
run_first_presentation_variant = acp_phase2b_v1

[WORKSPACES]
# Optional. Project Workspace host-local roots must be under one of these paths.
# This is intentionally separate from ingestion_source_allowed_roots.
project_root_allowed_base_paths =

[ACP-WORKSPACE]
# Optional. If set, workspace root_path must be under one of these paths (comma-separated).
# Leave empty to allow any absolute path.
allowed_base_paths =
# Default max depth for workspace discovery scanning.
discovery_max_depth = 3
# Marker files/dirs used for workspace discovery (comma-separated).
discovery_patterns = .git,package.json,pyproject.toml,Cargo.toml,go.mod

[ACP-SANDBOX]
# Enable ACP sandbox mode (runs ACP in container/VM and exposes web SSH).
enabled = false
# Sandbox runtime (docker|firecracker|lima).
runtime = docker
# Base image for ACP sandbox (must include sshd + tldw-agent-acp).
base_image = tldw/acp-agent:latest
# Network policy for ACP sandbox (deny_all|allowlist). In strict Lima mode,
# allow_all is rejected and requests fail closed unless strict guarantees are provable.
network_policy = deny_all
# Security hardening defaults (override only for trusted debug workflows).
run_as_root = false
read_only_root = true
# SSH settings
ssh_enabled = true
ssh_user = acp
# Dev default — override via .env for deployment
ssh_host = 127.0.0.1
# Container-internal SSH listen port (host port remains allocated from ssh_port_min/max).
ssh_container_port = 2222
ssh_port_min = 2222
ssh_port_max = 2299
# Downstream agent command (inside container)
# IMPORTANT: This must be the coding agent executable (e.g. claude, codex, opencode),
# not tldw-agent-acp itself.
agent_command =
# JSON array or shell-style args
agent_args = []
# JSON or comma-separated key=value env
agent_env = {}
# Active-session close windows and hard-delete retention. Retention deletes only
# closed/error sessions; messages are removed by cascade.
session_ttl_seconds = 86400
max_session_duration_seconds = 14400
session_retention_days = 30
audit_retention_days = 30

[Audit]
# Auto-switch audit exports to streaming when max_rows exceeds this threshold to avoid large in-memory buffers.
export_stream_auto_max_rows = 5000
# Audit storage mode (per_user or shared)
storage_mode = per_user
# Shared audit DB path used when storage_mode=shared
shared_db_path = Databases/audit_shared.db
# Force per_user behavior even when shared mode is configured
storage_rollback = false

[Processing]
processing_choice = cuda

[Media-Processing]
max_audio_file_size_mb = 500
max_pdf_file_size_mb = 50
max_video_file_size_mb = 1000
max_epub_file_size_mb = 100
max_document_file_size_mb = 50
pdf_conversion_timeout_seconds = 300
audio_processing_timeout_seconds = 600
video_processing_timeout_seconds = 1200
max_archive_internal_files = 100
max_archive_uncompressed_size_mb = 200
max_archive_member_uncompressed_size_mb = 100
# Keep upload sanitization on by default for markup inputs.
sanitize_html_uploads = true
sanitize_xml_uploads = true
# Sanitize text/html email bodies before converting to plaintext.
sanitize_email_html_bodies = true
# Enforce deep archive content validation for email ZIP URL ingestion.
validate_email_archive_contents = true
audio_transcription_buffer_size_mb = 10
uuid_generation_length = 8
kept_video_max_files = 5
kept_video_max_storage_mb = 500
kept_video_retention_hours = 2

[Chat-Dictionaries]
enable_chat_dictionaries = True
post_gen_replacement = False
post_gen_replacement_dict = ./Helper_Scripts/Chat_Dictionaries/Post_Gen_Replacements.md
chat_dictionary_chat_prompts = ./Helper_Scripts/Chat_Dictionaries/Chat_Prompts.md
chat_dictionary_RAG_prompts = ./Helper_Scripts/Chat_Dictionaries/RAG_Prompts.md
strategy = character_lore_first
max_tokens = 1000
default_rag_prompt = simplified_promptflow_RAG_system_prompt

[Chat-Module]
enable_provider_fallback = False
max_base64_image_size_mb = 3
# When true, reject data:image payloads larger than max_base64_image_size_mb at ingress.
# Env override: CHAT_ENFORCE_BASE64_IMAGE_LIMIT
enforce_base64_image_limit = false
max_text_length_per_message = 400000
max_messages_per_request = 1000
max_images_per_request = 10
max_request_size_bytes = 1000000
streaming_idle_timeout_seconds = 300
streaming_heartbeat_interval_seconds = 0
streaming_max_response_size_mb = 10
# For queued streaming requests, bound the internal per-request channel size
# to limit memory use and apply backpressure. Default: 100
chat_stream_channel_maxsize = 100
# Include tldw_* metadata (conversation/message IDs) in chat SSE streaming chunks.
# Env override: CHAT_STREAM_INCLUDE_METADATA
chat_stream_include_metadata = true
chat_save_default = False
inject_assistant_name = true
conversation_creation_max_retries = 3
db_transaction_max_retries = 3
rate_limit_per_minute = 60
rate_limit_per_conversation_per_minute = 20
# Per-user token budget (legacy limiter). Default is 10000 if omitted.
rate_limit_tokens_per_minute = 100000
# Phase 2b default-on rollout for the stable provider:model cohort.
run_first_rollout_mode = default_on
run_first_provider_allowlist = openai:gpt-4o-mini,anthropic:claude-3-7-sonnet,openai:gpt-4o,google:gemini-2.5-flash
run_first_presentation_variant = chat_phase2b_v1
# Pre-dispatch prompt/cost guardrails for chat and character chat.
# Disabled by default; when enabled, warning thresholds are non-blocking unless
# prompt_guardrails_default_action = block or a hard-cap threshold is exceeded.
# Env overrides use upper-case names such as CHAT_PROMPT_GUARDRAILS_ENABLED.
prompt_guardrails_enabled = false
prompt_guardrails_default_action = warn
# prompt_guardrails_warn_total_estimated_tokens = 32000
# prompt_guardrails_block_total_estimated_tokens = 128000
# prompt_guardrails_warn_static_segment_tokens = 12000
# prompt_guardrails_warn_world_book_tokens = 8000
# prompt_guardrails_warn_max_output_tokens = 8192
# prompt_guardrails_warn_choice_count = 1
prompt_guardrails_warn_reasoning_efforts = high,xhigh
prompt_guardrails_warn_on_fingerprint_churn = true
# Controls how much historical context is loaded for each chat request.
# Accepts 1-500; values outside range are clamped. Default: 20.
history_messages_limit = 20
# Ordering for loaded history: asc (oldest first) or desc (newest first). Default: desc.
history_messages_order = desc
# Default persona exemplar budget tokens for character chat augmentation
# when request-level override is omitted.
# Env override: PERSONA_EXEMPLAR_DEFAULT_BUDGET_TOKENS
persona_exemplar_default_budget_tokens = 600
# Auto-adjust persona exemplar budget when sustained IOO alerts indicate
# over-copying risk. Applies only when request-level budget override is omitted.
# Env override: PERSONA_IOO_BUDGET_AUTO_ADJUST_ENABLED
persona_ioo_budget_auto_adjust_enabled = true
# Multiplicative reduction factor for sustained-IOO auto-adjust.
# Clamped to [0.10, 0.95]. Env override: PERSONA_IOO_BUDGET_AUTO_REDUCTION_FACTOR
persona_ioo_budget_auto_reduction_factor = 0.75
# Lower bound for auto-adjusted persona exemplar budget tokens.
# Env override: PERSONA_IOO_BUDGET_AUTO_MIN_TOKENS
persona_ioo_budget_auto_min_tokens = 240

# Server-side tool auto-execution (Phase 2; default remains off)
# Env: CHAT_AUTO_EXECUTE_TOOLS=true|false
chat_auto_execute_tools = false
# Per-response ceiling for model-requested tool calls. Clamped to [1, 20].
# Env: CHAT_MAX_TOOL_CALLS
chat_max_tool_calls = 3
# Per-tool-call execution timeout budget in milliseconds. Clamped to [1000, 120000].
# Env: CHAT_TOOL_TIMEOUT_MS
chat_tool_timeout_ms = 15000
# Allow-list for tool names/prefixes used by auto-exec.
# Use '*' for unrestricted, or comma-separated entries like:
#   notes.search,media.*,prompts.get
# Env: CHAT_TOOL_ALLOW_CATALOG
chat_tool_allow_catalog = *
# Attach idempotency keys when auto-executing tools (recommended for write-capable tools).
# Env: CHAT_TOOL_IDEMPOTENCY=true|false
chat_tool_idempotency = true
# Run one follow-up non-streaming assistant turn after tool results are persisted.
# Env: CHAT_TOOL_AUTO_CONTINUE_ONCE=true|false
chat_tool_auto_continue_once = false

[Chat-Commands]
# Enable slash commands (env: CHAT_COMMANDS_ENABLED)
commands_enabled = false

# Injection behavior for command results: 'system' (separate system message) or 'preface' (prepend to user text)
# env: CHAT_COMMAND_INJECTION_MODE
injection_mode = system

# Per-user, per-command requests per minute (RPM) limit
# env: CHAT_COMMANDS_RATE_LIMIT_USER
commands_rate_limit_user = 10

# Global per-command requests per minute (RPM) limit
# env: CHAT_COMMANDS_RATE_LIMIT_GLOBAL
commands_rate_limit_global = 100

# Legacy per-user alias retained for compatibility
# env: CHAT_COMMANDS_RATE_LIMIT
commands_rate_limit = 10

# Maximum characters injected from a command result
# env: CHAT_COMMANDS_MAX_CHARS
commands_max_chars = 300

# Enforce RBAC permissions on commands (env: CHAT_COMMANDS_REQUIRE_PERMISSIONS)
require_permissions = false

# Fallback default location for /weather when no args provided (env: DEFAULT_LOCATION)
default_location =

[Chat-Templating]
# Enable templating in chat dictionaries and chatbooks (env: CHAT_DICT_TEMPLATES_ENABLED)
enable_templates = false

# Allow random helpers in templates (env: CHAT_DICT_TEMPLATES_ALLOW_RANDOM)
allow_random = false

# Allow external calls from templates (e.g., weather()) (env: TEMPLATES_ALLOW_EXTERNAL_CALLS)
allow_external_calls = false

# Hard cap on rendered output length (env: MAX_TEMPLATE_OUTPUT_CHARS)
max_output_chars = 2000

# Per-render timeout in milliseconds (env: TEMPLATE_RENDER_TIMEOUT_MS)
render_timeout_ms = 250

# Default timezone for template functions like now_tz (env: TEMPLATE_DEFAULT_TZ)
default_timezone = UTC

# Default locale (reserved; currently not used unless Babel is enabled) (env: TEMPLATE_DEFAULT_LOCALE)
default_locale =

# Optional deterministic seeding for random helpers in tests (env: TEMPLATES_RANDOM_SEED)
random_seed =

[Character-Chat]
CHARACTER_RATE_LIMIT_OPS = 100
CHARACTER_RATE_LIMIT_WINDOW = 3600
MAX_CHARACTERS_PER_USER = 10000
MAX_CHARACTER_IMPORT_SIZE_MB = 10
MAX_CHATS_PER_USER = 100000
MAX_MESSAGES_PER_CHAT = 1000
MAX_MESSAGES_PER_CHAT_SOFT = 1000
MAX_CHAT_COMPLETIONS_PER_MINUTE = 20
MAX_MESSAGE_SENDS_PER_MINUTE = 60

[Settings]
chunk_duration = 30
words_per_second = 3
save_character_chats = False
save_rag_chats = False
save_video_transcripts = True

[Auto-Save]
save_character_chats = False
save_rag_chats = False

[Prompts]
prompt_sample = "What is the meaning of life?"
video_summarize_prompt = "Above is the transcript of a video. Please read through the transcript carefully. Identify the main topics that are discussed over the course of the transcript. Then, summarize the key points about each main topic in bullet points. The bullet points should cover the key information conveyed about each topic in the video, but should be much shorter than the full transcript. Please output your bullet point summary inside <bulletpoints> tags. Do not repeat yourself while writing the summary."

[Database]
type = sqlite
sqlite_path = Databases/server_media_summary.db
sqlite_wal_mode = true
sqlite_foreign_keys = true
backup_path = ./tldw_DB_Backups/
pg_connection_string =
# Dev default — override via .env for deployment
pg_host = localhost
pg_port = 5432
pg_database = tldw_content
pg_user = tldw_user
pg_password = your_secure_password
pg_sslmode = prefer
pg_pool_size = 20
pg_max_overflow = 40
pg_pool_timeout = 30.0
# Dev default — override via .env for deployment
elasticsearch_host = localhost
elasticsearch_port = 9200
chroma_db_path = Databases/chroma_db
prompts_db_path = Databases/prompts.db
rag_qa_db_path = Databases/RAG_QA_Chat.db
character_db_path = Databases/chatDB.db

[Chunking]
chunking_method = words
chunk_max_size = 400
chunk_overlap = 200
adaptive_chunking = false
chunking_multi_level = false
language = english
; Optional chunking-wide settings
; Maximum character cap for streaming flush threshold. 0 disables the cap.
max_streaming_flush_threshold_chars = 0
; Emit a single metadata chunk for JSON chunking and reference it from others
json_single_metadata_reference = false
; Reference key included in JSON chunks when single metadata reference is enabled
json_metadata_reference_key = __meta_ref__
; Chunking cache and logging controls
cache_copy_on_access = true
verbose_logging = false
; Regex safety controls (replaces former env toggles)
regex_timeout_seconds = 0
regex_disable_multiprocessing = true
regex_simple_only = false
enable_contextual_retrieval = false
context_window_size = 500
include_parent_context = false
article_chunking_method = 'words'
article_chunk_max_size = 400
article_chunk_overlap = 200
article_adaptive_chunking = false
article_chunking_multi_level = false
article_language = english
audio_chunking_method = 'words'
audio_chunk_max_size = 400
audio_chunk_overlap = 200
audio_adaptive_chunking = false
audio_chunking_multi_level = false
audio_language = english
book_chunking_method = 'words'
book_chunk_max_size = 400
book_chunk_overlap = 200
book_adaptive_chunking = false
book_chunking_multi_level = false
book_language = english
document_chunking_method = 'words'
document_chunk_max_size = 400
document_chunk_overlap = 200
document_adaptive_chunking = false
document_chunking_multi_level = false
document_language = english
mediawiki_article_chunking_method = 'words'
mediawiki_article_chunk_max_size = 400
mediawiki_article_chunk_overlap = 200
mediawiki_article_adaptive_chunking = false
mediawiki_article_chunking_multi_level = false
mediawiki_article_language = english
mediawiki_dump_chunking_method = 'words'
mediawiki_dump_chunk_max_size = 400
mediawiki_dump_chunk_overlap = 200
mediawiki_dump_adaptive_chunking = false
mediawiki_dump_chunking_multi_level = false
mediawiki_dump_language = english
obsidian_note_chunking_method = 'words'
obsidian_note_chunk_max_size = 400
obsidian_note_chunk_overlap = 200
obsidian_note_adaptive_chunking = false
obsidian_note_chunking_multi_level = false
obsidian_note_language = english
podcast_chunking_method = 'words'
podcast_chunk_max_size = 400
podcast_chunk_overlap = 200
podcast_adaptive_chunking = false
podcast_chunking_multi_level = false
podcast_language = english
text_chunking_method = 'words'
text_chunk_max_size = 400
text_chunk_overlap = 200
text_adaptive_chunking = false
text_chunking_multi_level = false
text_language = english
video_chunking_method = 'words'
video_chunk_max_size = 400
video_chunk_overlap = 200
video_adaptive_chunking = false
video_chunking_multi_level = false
video_language = english
chunking_types = 'article', 'audio', 'book', 'document', 'mediawiki_article', 'mediawiki_dump', 'obsidian_note', 'podcast', 'text', 'video'

[Visual-RAG]
enable_visual_rag = False
visual_caption_backend = local
visual_caption_model =
visual_ocr_enabled = True
visual_embedding_model =
max_images_per_media = 32
visual_max_docs_per_user = 50000
video_frame_sampling_interval_seconds = 10

[AuthNZ]
auth_mode = single_user
database_url = sqlite:///./Databases/users.db
enable_registration = true
require_registration_code = false
enable_org_scoped_registration_codes = false
org_invite_allow_missing_email = false
rate_limit_enabled = true
rate_limit_per_minute = 60
rate_limit_burst = 10
access_token_expire_minutes = 30
refresh_token_expire_days = 7
# Optional allowlist for service tokens (comma-separated). Empty = loopback-only.
service_token_allowed_ips =
byok_enabled = false
byok_allowed_providers =
byok_allowed_base_url_providers = openai,anthropic,zai,moonshot,openrouter,bedrock
byok_encryption_key =
byok_secondary_encryption_key =

[Streaming]
# Unified Streams: standardize SSE handling (heartbeats/idle/errors/metrics)
# for chat and selected SSE endpoints. Set to true to enable.
streams_unified = true
# Optional: allow setting the chat stream channel maxsize here as well
# (Chat-Module.chat_stream_channel_maxsize takes precedence when present)
# chat_stream_channel_maxsize = 100

[HTTP]
# Centralized outbound HTTP client configuration
# Env overrides: see [HTTP-Client] in Config_Files/README.md

# Core timeouts (seconds)
connect_timeout = 5.0
read_timeout = 30.0
write_timeout = 30.0
pool_timeout = 30.0

# Retries & backoff
retry_attempts = 3
backoff_base_ms = 250
backoff_cap_s = 30

# Connection limits
max_connections = 100
max_keepalive_connections = 20

# Redirect behavior for simple GET/HEAD helpers
allow_redirects = true
max_redirects = 5

# Redirect safety controls
allow_cross_host_redirects = false
allow_scheme_downgrade = false

# Trust system environment/proxies (HTTP_TRUST_ENV)
trust_env = false

# Optional override for the default User-Agent
default_user_agent =

# Optional maximum JSON payload size (bytes) for helpers that opt-in
json_max_bytes =

# Optional HTTP/3 toggle (currently a no-op; reserved for future use)
http3_enabled = false

# Proxy host allowlist (csv of hostnames/domains or URLs); deny-by-default when empty
proxy_allowlist =

# TLS minimum version enforcement
enforce_tls_min_version = false
tls_min_version = 1.2

# Env-driven certificate pins (HTTP_CERT_PINS); format: host=pin1|pin2[,host2=pin3]
cert_pins =

[Egress]
# Global egress allow/deny lists for outbound HTTP (including workflows and the HTTP client)
# Comma-separated hostnames or domains; empty means "no constraint" in permissive profile.
egress_allowlist =
egress_denylist =

# Workflows-specific allow/deny overrides (unioned with global lists)
workflows_allowlist =
workflows_denylist =

# Allowed outbound ports (comma-separated integers; default: 80,443)
allowed_ports = 80,443

# Block private/reserved IP ranges by default (true/false)
block_private = true

# Egress profile: strict|permissive|custom (see Security/egress.py)
profile =

[Embeddings]
embedding_provider = huggingface
embedding_model = Qwen/Qwen3-Embedding-0.6B
onnx_model_path = ./App_Function_Libraries/models/onnx_models/
model_dir = ./App_Function_Libraries/models/embedding_models
# Dev default — override via .env for deployment
embedding_api_url = http://localhost:8080/v1/embeddings
chunk_size = 400
overlap = 200
# Comma-separated media types that may legitimately produce zero embeddings
# (e.g., audio/video items without transcripts). When matched, media-embeddings
# jobs complete successfully even if no vectors are stored.
allow_zero_embeddings_media_types = audio,video
enable_contextual_chunking = false
contextual_llm_model = gpt-3.5-turbo
context_window_size = None
context_strategy = auto
context_token_budget = 6000

[Claims]
ENABLE_INGESTION_CLAIMS = false
CLAIM_EXTRACTOR_MODE = heuristic
CLAIMS_MAX_PER_CHUNK = 3
CLAIMS_EMBED = false
CLAIMS_EMBED_MODEL_ID = ""
CLAIMS_LLM_PROVIDER = ollama
CLAIMS_LLM_TEMPERATURE = 0.1
CLAIMS_LLM_MODEL =
CLAIMS_JOB_BUDGET_ENABLED = false
CLAIMS_JOB_MAX_COST_USD =
CLAIMS_JOB_MAX_TOKENS =
CLAIMS_JOB_BUDGET_STRICT = false
CLAIMS_REBUILD_ENABLED = false
CLAIMS_REBUILD_INTERVAL_SEC = 3600
CLAIMS_REBUILD_POLICY = missing
CLAIMS_STALE_DAYS = 7
contextual_llm_model = gpt-3.5-turbo
contextual_chunk_method = situate_context
trusted_hf_remote_code_models = NovaSearch/stella_en_400M_v5, BAAI/bge
max_models_in_memory = 3
max_model_memory_gb = 8
model_lru_ttl_seconds = 3600

# FVA (Falsification-Verification Alignment) Settings
# Enable FVA for enhanced claim verification with counter-evidence retrieval
FVA_ENABLED = true
# Confidence threshold below which falsification is triggered (0.0-1.0)
FVA_CONFIDENCE_THRESHOLD = 0.7
# Threshold for CONTESTED status when evidence is balanced (0.0-0.5)
FVA_CONTESTED_THRESHOLD = 0.4
# Maximum concurrent falsification operations
FVA_MAX_CONCURRENT = 5
# Timeout for falsification operations in seconds
FVA_TIMEOUT_SECONDS = 30.0
# Maximum budget ratio for FVA operations (0.0-1.0)
FVA_MAX_BUDGET_RATIO = 0.3
# Claim types that always trigger falsification (comma-separated)
FVA_FORCE_CLAIM_TYPES =
# Minimum confidence to skip falsification entirely (0.0-1.0)
FVA_MIN_CONFIDENCE_FOR_SKIP = 0.9

[ClaimsMonitoring]
CLAIMS_MONITORING_ENABLED = false
CLAIMS_ALERT_THRESHOLD_DEFAULT = 0.2
CLAIMS_REBUILD_MAX_QUEUE_ALERT = 1000
CLAIMS_REBUILD_HEARTBEAT_WARN_SEC = 600
CLAIMS_ADAPTIVE_THROTTLE_ENABLED = false
CLAIMS_ADAPTIVE_THROTTLE_LATENCY_MS = 0
CLAIMS_ADAPTIVE_THROTTLE_ERROR_RATE = 0
CLAIMS_ADAPTIVE_THROTTLE_BUDGET_RATIO = 0
CLAIMS_PROVIDER_COST_MULTIPLIERS = {}

[RAG]
vector_store_type = chromadb
# pgvector connection options (optional; do not set defaults here)
# pgvector_dsn =
# pgvector_host =
# pgvector_port =
# pgvector_database =
# pgvector_user =
# pgvector_password =
# pgvector_sslmode =
enable_parent_expansion = false
parent_expansion_size = 500
include_sibling_chunks = false
sibling_window = 1
include_parent_document = false
parent_max_tokens = 1200
default_llm_provider = openai
default_llm_model = gpt-4o-mini
hyde_provider = openai

hyde_model = gpt-4o-mini
semantic_cache_enabled = true
cache_similarity_threshold = 0.85
enable_reranking = true
rerank_top_k = 10
# FlashRank reranker model cache directory.
# Relative paths resolve from repo root in server code.
# Recommended: keep bundled FlashRank model files under models/flashrank/<model_name>/
flashrank_cache_dir = models/flashrank
# FlashRank model family name (directory name under cache_dir)
flashrank_model_name = ms-marco-TinyBERT-L-2-v2
llm_reranker_provider = openai
llm_reranker_model =
llama_reranker_binary = llama-embedding
llama_reranker_model =
llama_reranker_ngl = 0
llama_reranker_separator = <#sep#>
llama_reranker_output = json+
llama_reranker_pooling = last
llama_reranker_normalize = -1
llama_reranker_max_doc_chars = 2000
llama_reranker_template_mode = auto
transformers_reranker_model =

# --- RAG Feature Toggles (Phased Rollout) ---
# Structure index: persist headings/paragraph offsets and surface section metadata in retrieval
# Env override: RAG_ENABLE_STRUCTURE_INDEX=true|false (default true)
enable_structure_index = true

# Strict extractive mode for standard pipeline: assemble answers only from retrieved spans
# Env override: RAG_STRICT_EXTRACTIVE=true|false (default false)
strict_extractive = false

# Require per-sentence hard citations coverage; gates answer when coverage < 1.0
# Env override: RAG_REQUIRE_HARD_CITATIONS=true|false (default false)
require_hard_citations = false

# Behavior on low confidence (insufficient evidence): continue|ask|decline
# Env override: RAG_LOW_CONFIDENCE_BEHAVIOR=continue|ask|decline (default continue)
low_confidence_behavior = continue

# Agentic cache backend for ephemeral chunks used by the RAG agentic pipeline.
# Env override: RAG_AGENTIC_CACHE_BACKEND=memory|sqlite (default memory)
agentic_cache_backend = memory

# TTL (seconds) for agentic cache entries used by RAG.
# Env override: RAG_AGENTIC_CACHE_TTL_SEC=<seconds> (default 600)
# 600 seconds (10 minutes) is chosen as a pragmatic default that roughly matches a
# typical focused user interaction window for agentic RAG flows: long enough to
# reuse expensive planning/results across follow-up questions in the same session,
# but short enough to avoid unbounded growth in ephemeral cache state for idle tabs
# and background jobs.
agentic_cache_ttl_sec = 600

# Precomputed span index caps (late-interaction spans stored per corpus)
# Env overrides:
# - RAG_PRECOMPUTED_SPANS_MAX_VECTORS_PER_CORPUS
# - RAG_PRECOMPUTED_SPANS_MAX_MB_PER_CORPUS
# - RAG_PRECOMPUTED_SPANS_RETENTION_DAYS
precomputed_spans_max_vectors_per_corpus = 200000
precomputed_spans_max_mb_per_corpus = 512
precomputed_spans_retention_days = 30

# Enable/disable implicit feedback capture (click/copy/expand/dwell/citation-used)
# Env override: IMPLICIT_FEEDBACK_ENABLED=true|false (default true)
implicit_feedback_enabled = true

[Search-Agent]
# Enable LLM-based query classification before search (routes queries intelligently)
# Env override: SEARCH_QUERY_CLASSIFICATION=true|false (default false)
search_query_classification = false

# Default search depth mode: speed, balanced, quality
# Env override: SEARCH_DEFAULT_MODE=speed|balanced|quality (default balanced)
search_default_mode = balanced

# Enable standalone query reformulation for chat follow-ups
# Env override: SEARCH_QUERY_REFORMULATION=true|false (default true)
search_query_reformulation = true

# Enable iterative agentic research loop (for quality mode)
# Env override: SEARCH_RESEARCH_LOOP=true|false (default false)
search_research_loop = false

# Enable discussion/forum search as a search source
# Env override: SEARCH_DISCUSSIONS_ENABLED=true|false (default true)
search_discussions_enabled = true

# Discussion platforms to search (comma-separated)
# Env override: SEARCH_DISCUSSION_PLATFORMS=reddit,stackoverflow,hackernews
search_discussion_platforms = reddit,stackoverflow,hackernews

# Enable research progress streaming events
# Env override: SEARCH_PROGRESS_STREAMING=true|false (default true)
search_progress_streaming = true

# Enable URL scraping during research
# Env override: SEARCH_URL_SCRAPING=true|false (default true)
search_url_scraping = true

# LLM provider/model for query classification (uses default chat provider if empty)
# Env override: SEARCH_CLASSIFIER_PROVIDER, SEARCH_CLASSIFIER_MODEL
search_classifier_provider =
search_classifier_model =

# Max research iterations per mode (0 = use defaults: speed=2, balanced=6, quality=25)
# Env overrides: SEARCH_MAX_ITERATIONS_SPEED, SEARCH_MAX_ITERATIONS_BALANCED, SEARCH_MAX_ITERATIONS_QUALITY
search_max_iterations_speed = 0
search_max_iterations_balanced = 0
search_max_iterations_quality = 0

# Enable follow-up suggestion generation after RAG responses
# Env override: SEARCH_SUGGESTIONS=true|false (default false)
search_suggestions = false

# Enable structured response writer with XML-tagged context and citation enforcement
# Env override: SEARCH_STRUCTURED_RESPONSE=true|false (default false)
search_structured_response = false

# Enable image search as a research action
# Env override: SEARCH_IMAGE_SEARCH=true|false (default false)
search_image_search = false

# Enable video search as a research action
# Env override: SEARCH_VIDEO_SEARCH=true|false (default false)
search_video_search = false

[ResourceGovernor]
# Global Resource Governor enable flag (env: RG_ENABLED)
enabled = true
# Policy store: file-backed YAML or AuthNZ DB (env: RG_POLICY_STORE)
policy_store = file
# Backend implementation: memory | redis (env: RG_BACKEND)
backend = memory
# Redis failover behavior when RG backend is redis (env: RG_REDIS_FAIL_MODE)
redis_fail_mode = fallback_memory
# Default policy path (relative to the project root unless an absolute path is provided).
# The RG_POLICY_PATH environment variable overrides this when set; in containerized deployments,
# ensure the policy file path is mounted into the container (e.g., under /app/tldw_Server_API/Config_Files).
policy_path = tldw_Server_API/Config_Files/resource_governor_policies.yaml
# Deprecated: per-module RG enable flags were removed in v1.1; integrations are
# controlled by the global `enabled` flag. Kept for config compatibility only.
enable_audio = false

[Jobs]
# Jobs prune scheduler (env overrides: JOBS_PRUNE_*)
prune_enforce = false
prune_interval_sec = 86400
prune_dry_run = false
prune_domain =
prune_queue =
prune_job_type =

# Retention days (env overrides: JOBS_RETENTION_DAYS_*)
# terminal (completed/failed/cancelled/quarantined) override
retention_days_terminal =
retention_days_completed = 30
retention_days_failed = 60
retention_days_cancelled = 60
retention_days_quarantined = 90
# non-terminal (queued/processing); 0 disables
retention_days_nonterminal = 0

[Diarization]
# VAD backend:
#   silero_hub (default): use the Silero PyTorch repo via torch.hub (and optional local checkout under models/).
#   onnx_silero: use a local Silero ONNX model via onnxruntime (no torch.hub or faster-whisper required).
vad_backend = silero_hub

# VAD thresholds and timing (used by diarization VAD path)
vad_threshold = 0.5
vad_min_speech_duration = 0.25
vad_min_silence_duration = 0.25
allow_vad_fallback = true
enable_torch_hub_fetch = true

# ONNX backend (onnx_silero) settings
# Path to silero_vad_v6.onnx (installed via Helper_Scripts/install_silero_vad_weights.py or manually).
onnx_model_path = models/silero_vad/silero_vad_v6.onnx

# Segmentation
segment_duration = 30.0
segment_overlap = 0.5
min_segment_duration = 1.0
max_segment_duration = 3.0

# Embeddings & clustering
embedding_model = speechbrain/spkrec-ecapa-voxceleb
embedding_device = auto
embedding_local_only = false
embedding_batch_size = 32
clustering_method = spectral
similarity_threshold = 0.85
min_speakers = 1
max_speakers = 10

# Post-processing
merge_threshold = 0.5
min_speaker_duration = 3.0
detect_overlapping_speech = false
overlap_confidence_threshold = 0.7

# Memory
memory_efficient = false
max_memory_mb = 2048

[API]
anthropic_model = claude-opus-4-20250514
anthropic_streaming = True
anthropic_temperature = 0.7
anthropic_top_p = 0.95
anthropic_min_p = 0.05
anthropic_max_tokens = 4096
anthropic_api_timeout = 90
anthropic_api_retry = 3
anthropic_api_retry_delay = 1
cohere_model = command-a-03-2025
cohere_streaming = True
cohere_temperature = 0.7
cohere_max_tokens = 4096
cohere_api_timeout = 90
cohere_api_retry = 3
cohere_api_retry_delay = 1
deepseek_model = deepseek-chat
deepseek_streaming = True
deepseek_temperature = 0.7
deepseek_max_tokens = 4096
deepseek_api_timeout = 90
deepseek_api_retry = 3
deepseek_api_retry_delay = 1
qwen_model = qwen-plus
qwen_streaming = True
qwen_temperature = 0.7
qwen_top_p = 0.8
qwen_max_tokens = 4096
qwen_api_timeout = 90
qwen_api_retry = 3
qwen_api_retry_delay = 1
qwen_api_region = sg
qwen_api_base_url = https://dashscope-intl.aliyuncs.com/compatible-mode/v1
google_model = gemini-2.5-flash
google_streaming = True
google_temperature = 0.7
google_max_tokens = 4096
google_api_timeout = 90
google_api_retry = 3
google_api_retry_delay = 1
groq_model = llama-3.3-70b-versatile
groq_streaming = True
groq_temperature = 0.7
groq_max_tokens = 4096
groq_api_timeout = 90
groq_api_retry = 3
groq_api_retry_delay = 1
huggingface_use_router_url_format = false
huggingface_router_base_url = https://router.huggingface.co/hf-inference
huggingface_api_base_url = https://router.huggingface.co/v1
huggingface_api_chat_path = chat/completions
huggingface_model = ServiceNow-AI/Apriel-1.6-15b-Thinker:together
huggingface_streaming = True
huggingface_temperature = 0.7
huggingface_max_tokens = 4096
huggingface_api_timeout = 90
huggingface_api_retry = 3
huggingface_api_retry_delay = 1
mistral_model = open-mistral-nemo
mistral_streaming = True
mistral_temperature = 0.7
mistral_max_tokens = 4096
mistral_api_timeout = 90
mistral_api_retry = 3
mistral_api_retry_delay = 1
bedrock_model = anthropic.claude-3-5-sonnet-20241022-v2:0
bedrock_streaming = True
bedrock_temperature = 0.7
bedrock_top_p = 0.95
bedrock_max_tokens = 4096
bedrock_api_timeout = 90
bedrock_api_retry = 3
bedrock_api_retry_delay = 1
openai_model = gpt-4o
openai_streaming = False
openai_temperature = 0.7
openai_top_p = 0.95
openai_max_tokens = 4096
openai_api_timeout = 90
openai_api_retry = 3
openai_api_retry_delay = 1
model_for_summarization = gpt-4o
openrouter_model = openai/gpt-4o-mini
openrouter_max_tokens = 4096
openrouter_api_timeout = 90
openrouter_api_retry = 3
openrouter_api_retry_delay = 1
custom_openai_api_ip = https://api.openai.com/v1
custom_openai_api_model = gpt-4.1-2025-04-14
custom_openai_api_streaming = True
custom_openai_api_temperature = 0.7
custom_openai_api_top_p = 0.9
custom_openai_api_min_p = 0.05
custom_openai_api_max_tokens = 4096
custom_openai_api_timeout = 90
custom_openai_api_api_retry = 3
custom_openai_api_api_retry_delay = 1
custom_openai2_api_ip = https://api.openai.com/v1
custom_openai2_api_model = gpt-4.1-2025-04-14
custom_openai2_api_streaming = True
custom_openai2_api_temperature = 0.7
custom_openai2_api_top_p = 0.9
custom_openai2_api_min_p = 0.05
custom_openai2_api_max_tokens = 4096
custom_openai2_api_timeout = 90
custom_openai2_api_api_retry = 3
custom_openai2_api_api_retry_delay = 1
default_api = openai
default_api_for_tasks = anthropic

# MLX (Apple Silicon) local provider (in-process)
[MLX]
# Path or repo id for the MLX model to load
mlx_model_path = Qwen/Qwen3-0.6B-MLX-4bit
# Optional overrides; defaults favor single-concurrency + warmup/compile on.
mlx_max_seq_len =
mlx_max_batch_size =
mlx_device = auto
mlx_dtype =
mlx_quantization =
mlx_compile = true
mlx_warmup = true
mlx_prompt_template =
mlx_revision =
mlx_tokenizer =
mlx_adapter =
mlx_adapter_weights =
mlx_max_kv_cache_size =
mlx_max_concurrent = 1
mlx_trust_remote_code = false

# Local-API: OpenAI-compatible servers expect a 'model' in requests (per OpenAI spec + server docs).
# If your server does not expose a default, add a placeholder model name here so the UI/clients can send one.
[Local-API]
# Dev default — override via .env for deployment
kobold_api_IP = http://127.0.0.1:5001/api/v1/generate
# Dev default — override via .env for deployment
kobold_openai_api_IP = http://127.0.0.1:5001/v1/chat/completions
# Koboldcpp OpenAI endpoint follows OpenAI spec and expects a model id (README "OpenAI API" section).
# kobold_model = <model_name_for_openai_endpoint>
kobold_streaming = False
kobold_temperature = 0.7
kobold_top_p = 0.9
kobold_min_p = 0.05
kobold_top_k = 100
kobold_max_tokens = 4096
kobold_api_timeout = 90
kobold_api_retry = 3
kobold_api_retry_delay = 1
# Dev default — override via .env for deployment
llama_api_IP = http://127.0.0.1:8080/completion
# llama.cpp (llama-server) is OpenAI-compatible and expects model (OpenAI Chat API spec; llama-server docs).
# llama_model = <model_name_for_openai_endpoint>
llama_streaming = True
llama_temperature = 0.7
llama_top_p = 0.9
llama_min_p = 0.05
llama_top_k = 100
llama_max_tokens = 4096
llama_api_timeout = 90
llama_api_retry = 3
llama_api_retry_delay = 1
ooba_api_IP = http://192.168.2.235:5000/v1/chat/completions
# text-generation-webui OpenAI routes accept the OpenAI payload; include model to be explicit
# (docs/12 - OpenAI API.md; server will fall back to the loaded model if omitted).
# ooba_model = <model_name_for_openai_endpoint>
ooba_streaming = False
ooba_temperature = 0.7
ooba_top_p = 0.9
ooba_min_p = 0.05
ooba_top_k = 100
ooba_max_tokens = 4096
ooba_api_timeout = 90
ooba_api_retry = 3
ooba_api_retry_delay = 1
# Dev default — override via .env for deployment
tabby_api_IP = http://127.0.0.1:5000/v1/chat/completions
# TabbyAPI is OpenAI-compatible; send a model string per the OpenAI spec.
# tabby_model = <model_name_for_openai_endpoint>
tabby_streaming = False
tabby_temperature = 0.7
tabby_top_k = 100
tabby_max_tokens = 4096
tabby_api_timeout = 90
tabby_api_retry = 3
tabby_api_retry_delay = 1
# Dev default — override via .env for deployment
vllm_api_IP = http://127.0.0.1:8000/v1/chat/completions
# vLLM OpenAI server examples include model (docs: OpenAI-Compatible Server -> chat.completions).
vllm_model =
vllm_streaming = False
vllm_temperature = 0.7
vllm_top_p = 0.9
vllm_min_p = 0.05
vllm_top_k = 100
vllm_max_tokens = 4096
vllm_api_timeout = 90
vllm_api_retry = 3
vllm_api_retry_delay = 1
# Ollama API requires model (docs/api.md: POST /api/generate -> model required).
ollama_api_IP =  http://192.168.2.216:11434/v1
ollama_model = gemma3:1b
ollama_streaming = False
ollama_temperature = 0.7
ollama_top_p = 0.9
ollama_max_tokens = 4096
ollama_api_timeout = 9009
ollama_api_retry = 3
ollama_api_retry_delay = 1
# Dev default — override via .env for deployment
aphrodite_api_IP = http://127.0.0.1:8080/completion
# Aphrodite Engine exposes an OpenAI-compatible server; include model (README run examples).
aphrodite_model =
aphrodite_streaming = False
aphrodite_temperature = 0.7
aphrodite_top_p = 0.9
aphrodite_min_p = 0.05
aphrodite_max_tokens = 4096
aphrodite_api_timeout = 90
aphrodite_api_retry = 3
aphrodite_api_retry_delay = 1
max_tokens = 4096
local_api_timeout = 90
local_api_retries = 3
local_api_retry_delay = 5
streaming = True
temperature = 0.7
top_p = 0.9
min_p = 0.05

[LlamaCpp]
# Enable the managed llama.cpp handler (llama-server binary) for /llamacpp endpoints
enabled = false
executable_path = vendor/llama.cpp/server
models_dir = models/gguf_models
# Dev default — override via .env for deployment
default_host = 127.0.0.1
default_port = 8080
default_threads =
default_n_gpu_layers = 0
default_ctx_size = 2048
allow_unvalidated_args = false
allow_cli_secrets = false
port_autoselect = true
port_probe_max = 10
# Comma-separated list of additional directories where models/logs may reside
allowed_paths =
# Comma-separated list of explicit local GGUF model files registered for inventory display
registered_model_paths =
# Optional path to a log file for llama.cpp server stdout/stderr
log_output_file =

[STT-Settings]
default_transcriber = parakeet
default_stt_provider = parakeet
default_batch_transcription_model = parakeet-tdt-0.6b-v3-onnx
default_streaming_transcription_model = parakeet-tdt-0.6b-v3-onnx
nemo_model_variant = onnx
nemo_device = cpu
nemo_cache_dir = ./models/nemo
nemo_chunk_duration = 120
nemo_overlap_duration = 15
streaming_fallback_to_whisper = false
parakeet_onnx_model_id = istupakov/parakeet-tdt-0.6b-v3-onnx
# Pin a specific snapshot/commit for reproducibility; leave empty for repo default branch.
parakeet_onnx_revision =
# Optional: faster-whisper compute type override. When unset or set to "auto",
# the server uses float16 on CUDA and int8 on CPU. Examples of explicit
# overrides: float16, int8, int8_float16.
whisper_compute_type = auto
mlx_chunk_duration = 30.0
mlx_overlap_duration = 5.0
buffered_chunk_duration = 20.0
buffered_total_buffer = 25.0
buffered_merge_algo = lcs

# --- Custom Vocabulary (optional) ---
# Path to a file with domain terms (one per line) or JSON list.
# Used to prime Whisper via initial_prompt.
custom_vocab_terms_file =
# Path to a JSON mapping of misheard->correct replacements.
# Applied as post-processing across all STT providers.
custom_vocab_replacements_file =
# Enable/disable using the terms list to build a Whisper initial_prompt
custom_vocab_initial_prompt_enable = True
# Enable/disable post-processing replacements
custom_vocab_postprocess_enable = True
# Optional template for the initial prompt. "{terms}" is replaced with a comma-separated list.
custom_vocab_prompt_template = Domain terms: {terms}.
# When True, replacements are case-sensitive
custom_vocab_case_sensitive = False

# --- Transcript cache + retention ---
# Disable writing transcription cache files (env override: STT_DISABLE_TRANSCRIPT_CACHE)
disable_transcript_cache = false
# When true, keep writing transcripts but skip all age/size-based pruning (env: STT_DISABLE_TRANSCRIPT_CACHE_PRUNING).
disable_transcript_cache_pruning = false
# Max cached transcript files per source (newest kept). Leave empty for defaults
# or set 0/negative to disable this limit entirely.
transcript_cache_max_files_per_source =
# Delete cached transcripts older than this many days. Leave empty for defaults
# or set 0/negative to disable age-based pruning.
transcript_cache_max_age_days =
# Cap total transcript cache size (MB) per directory. Oldest files evicted first.
# Leave empty for defaults or set 0/negative to disable size-based pruning.
transcript_cache_max_total_mb =

# --- Qwen3-ASR Settings ---
# Qwen3-ASR offers state-of-the-art multilingual transcription.
# Models must be manually downloaded before use:
#   huggingface-cli download Qwen/Qwen3-ASR-1.7B --local-dir ./models/qwen3_asr/1.7B
#   huggingface-cli download Qwen/Qwen3-ASR-0.6B --local-dir ./models/qwen3_asr/0.6B
#   huggingface-cli download Qwen/Qwen3-ForcedAligner-0.6B --local-dir ./models/qwen3_asr/aligner
qwen3_asr_enabled = false
# LOCAL path to downloaded model (required if enabled, no auto-download)
qwen3_asr_model_path = ./models/qwen3_asr/1.7B
# Device: cuda or cpu
qwen3_asr_device = cuda
# Data type: bfloat16, float16, or float32
qwen3_asr_dtype = bfloat16
# Maximum batch size for inference
qwen3_asr_max_batch_size = 32
# Maximum new tokens for generation
qwen3_asr_max_new_tokens = 4096
# Enforce manual download (set to false to allow auto-download from HuggingFace)
qwen3_asr_allow_download = false
# Target sample rate for audio (default 16000 Hz)
qwen3_asr_sample_rate = 16000

# --- Qwen3-ASR Forced Aligner (for word-level timestamps) ---
# Enable forced alignment for word-level timestamps (requires separate model)
qwen3_asr_aligner_enabled = false
# LOCAL path to forced aligner model
qwen3_asr_aligner_path = ./models/qwen3_asr/aligner

# --- Qwen3-ASR Backend ---
# Backend: transformers (default) or vllm (for streaming, optional)
qwen3_asr_backend = transformers
# GPU memory utilization when using vLLM backend (0.0-1.0)
qwen3_asr_vllm_gpu_memory_utilization = 0.7
# Optional: vLLM HTTP server URL for external vLLM delegation (P2 feature)
# When set, transcription requests are delegated to this vLLM server
# Example: http://localhost:8000
qwen3_asr_vllm_base_url =

# --- Audio validation ---
# When true, skip ffprobe-based audio validation and rely on ffmpeg + STT to
# surface bad files (env: STT_SKIP_AUDIO_PREVALIDATION).
skip_audio_prevalidation = false

[external_providers]

[TTS-Settings]
# Base directory for per-user databases and voice assets.
# Recommended for production: point this at a dedicated volume
# with sufficient capacity, backups, and appropriate filesystem ACLs.
# If unset, the server falls back to Databases/user_databases under
# the project root.
# Setup-time updates to USER_DB_BASE_DIR are constrained to an allowlist.
# Default allowlist includes the project Databases directory and the parent of the
# current USER_DB_BASE_DIR.
# Add extra roots via USER_DB_BASE_DIR_ALLOWED_ROOTS and/or
# TLDW_USER_DB_BASE_DIR_ALLOWED_ROOTS (comma- or colon-separated); values are merged.
USER_DB_BASE_DIR = Databases/user_databases

# TTS history settings
# For privacy, set TTS_HISTORY_HASH_KEY in the environment (recommended) or here.
tts_history_enabled = True
tts_history_store_text = True
tts_history_store_failed = True
tts_history_hash_key =
tts_history_retention_days = 90
tts_history_max_rows_per_user = 10000
tts_history_purge_interval_hours = 24

local_tts_device = cpu
default_tts_provider = kokoro
default_tts_voice = af_bella
default_tts_speed = 1
default_openai_tts_voice = shimmer
default_openai_tts_speed = 1
default_openai_tts_model = tts-1-hd
default_openai_tts_output_format = mp3
default_openai_tts_streaming = False
default_eleven_tts_voice = pNInz6obpgDQGcFmaJgB
default_eleven_tts_model =
default_eleven_tts_language_code =
default_eleven_tts_voice_stability =
default_eleven_tts_voice_similiarity_boost =
default_eleven_tts_voice_style =
default_eleven_tts_voice_use_speaker_boost =
default_eleven_tts_voice_pronunciation_dictionary_locators_dict_id =
default_eleven_tts_voice_pronunciation_dictionary_locators_version_id =
default_eleven_tts_speed = 1
default_eleven_tts_output_format = mp3_44100_128
default_google_tts_model =
default_google_tts_voice =
default_google_tts_speed = 1
edge_tts_voice = en-US-AriaNeural
default_alltalk_tts_speed = 1.0
default_alltalk_tts_voice = alloy
default_alltalk_tts_model = alltalk
default_alltalk_tts_output_format = mp3
# Dev default — override via .env for deployment
alltalk_api_ip = http://127.0.0.1:7851/v1/audio/speech
kokoro_model_path = ./App_Function_Libraries/models/kokoro_models/
default_kokoro_tts_speed = 1.0
default_kokoro_tts_voice = af_sky
default_kokoro_tts_output_format = wav
default_custom_openai_tts_voice = shimmer
default_custom_openai_tts_speed = 1
default_custom_openai_tts_model = tts-1-hd
default_custom_openai_tts_output_format = mp3
# Placeholder; override with your custom OpenAI-compatible endpoint
# Dev default — override via .env for deployment
default_custom_openai_api_ip = http://localhost:8000/v1
# VibeVoice TTS Provider Configuration
# VibeVoice is a local, voice-cloning TTS engine. Configure model paths, inference parameters (CFG scale, diffusion steps),
# streaming behavior, and speaker context for optimal quality and performance.
default_custom_openai_api_streaming = True

[VibeVoice]
vibevoice_variant = 1.5B
vibevoice_model_path = microsoft/VibeVoice-1.5B
# Device: auto, cpu, cuda, or specific GPU name
vibevoice_device = auto
vibevoice_use_fp16 = True
vibevoice_batch_size = 1
vibevoice_use_quantization = False
vibevoice_auto_cleanup = True
vibevoice_auto_download = True
vibevoice_attention_type = auto
vibevoice_enable_sage = False
# CFG scale for classifier-free guidance (higher = more adherence to prompt, typically 1.0-2.0)
vibevoice_cfg_scale = 1.3
# Diffusion steps: more steps = higher quality but slower inference (typical: 10-30)
vibevoice_diffusion_steps = 20
vibevoice_temperature = 1.0
vibevoice_top_p = 0.95
vibevoice_top_k = 50
# Streaming chunk size (seconds) and internal buffer size (samples) control latency vs. smoothness
vibevoice_stream_chunk_size = 0.25
vibevoice_stream_buffer_size = 4096
vibevoice_sample_rate = 22050
vibevoice_default_speaker = 1
vibevoice_context = True
vibevoice_context_window = 512
vibevoice_background_music = False
vibevoice_enable_singing = False
vibevoice_model_dir = ./models/vibevoice
vibevoice_cache_dir = ./cache/vibevoice
vibevoice_voices_dir = ./voices

# When true, enforce strict duration requirements for uploaded voice
# samples based on provider-specific ranges in VoiceManager. Out-of-range
# samples will be rejected with a VoiceDurationError instead of only
# emitting warnings. Default is false (advisory-only).
TTS_VOICE_STRICT_DURATION = false

# ---------------------------------------------
# Optional: Audio streaming quota configuration
# ---------------------------------------------
# When the quota backing store (DB/Redis) is temporarily unavailable during
# real-time transcription, the server can allow a bounded amount of audio per
# WebSocket connection ("fail-open") before closing with quota_exceeded.
#
# You can configure this bounded allowance here or via env var:
#   Env:  AUDIO_FAILOPEN_CAP_MINUTES=<positive float>
#   INI:  [Audio-Quota] failopen_cap_minutes = <positive float>
#         [Audio]       failopen_cap_minutes = <positive float>
#
# If both env and INI are set, the environment variable takes precedence.
# Default (if unset): 5.0 minutes per connection.

[Audio-Quota]
# Bounded fail-open allowance (minutes) per connection when quota store is down
# failopen_cap_minutes = 5.0

# Alternatively, place the same setting in [Audio] section if you prefer (optional; env var takes precedence)

# Audio Studio provider configuration is environment-variable only in the MVP.
# Use .env for AUDIO_STUDIO_EXTERNAL_ENDPOINT_ALLOWLIST,
# AUDIO_STUDIO_ALLOW_HTTP_ENDPOINTS, AUDIO_STUDIO_ACE_STEP_BASE_URL,
# AUDIO_STUDIO_ACE_STEP_TIMEOUT_SECONDS, and AUDIO_STUDIO_ACE_STEP_API_KEY.

# ---------------------------------------------
# API Route Toggles (optional)
# ---------------------------------------------
# Control which API route groups are included at startup.
# You can also override via env vars:
#   ROUTES_STABLE_ONLY=true|false
#   ROUTES_DISABLE="sandbox,connectors"
#   ROUTES_ENABLE="workflows"
#
# Known route keys include (not exhaustive):
#   health, moderation, monitoring, audit, auth, auth-enhanced, users, privileges,
#   admin, mcp-catalogs, media, audio, audio-jobs, audio-websocket, audiobooks, chat, characters,
#   character-chat-sessions, character-messages, metrics, chunking, chunking-templates,
#   outputs-templates, outputs, embeddings, vector-stores, connectors, claims,
#   media-embeddings, items, reading, watchlists, subscriptions-deprecated, notes, prompts,
#   reading-highlights, prompt-studio, rag-health, rag-unified, workflows, scheduler,
#   research, paper-search, evaluations, ocr, vlm, benchmarks, setup, config, jobs,
#   sync, tools, sandbox, flashcards, personalization, persona, mcp-unified, chatbooks,
#   llm, llamacpp, web-scraping

[API-Routes]
# When true, disables a curated set of in-development (experimental) routes unless explicitly enabled.
stable_only = false

# Comma-separated list of route keys to disable regardless of stability
disable =

# Comma-separated list of route keys to force-enable (useful when stable_only = true)
enable = tools, jobs, acp, workflows, scheduler, ingestion-sources, scheduled-tasks

# Optionally extend the curated experimental set (comma-separated)
experimental_routes =

[Search-Engines]
search_provider_default = google
search_language_query = en
search_language_results = en
search_language_analysis = en
search_default_max_queries = 10
search_enable_subquery = True
search_enable_subquery_count_max = 5
search_result_rerank = True
search_result_max = 15
search_result_max_per_query = 10
search_result_blacklist = []
search_result_display_type = list
search_result_display_metadata = False
search_result_save_to_db = True
search_result_analysis_tone =
relevance_analysis_llm = openai
final_answer_llm = openai
search_engine_api_key_baidu = 1e1b1b1b1b1b1b1b1
search_engine_api_key_brave_regular = <brave_api_key>
search_engine_api_key_brave_ai = <brave_ai_api_key>
search_engine_country_code_brave = US
search_engine_api_key_google = <google_api_key>
search_engine_id_google = <google_search_engine_id>
enable_traditional_chinese = 0
limit_google_search_to_country = False
google_search_country_code = US
google_filter_setting = 1
google_user_geolocation = US
google_ui_language = en
google_limit_search_results_to_language =
google_default_search_results =
google_safe_search = "active"
google_enable_site_search =
google_site_search_include =
google_site_search_exclude =
google_sort_results_by =
search_engine_api_key_kagi = <kagi_api_key>>
search_engine_searx_api = https://search.rhscz.eu/
search_engine_api_key_tavily = <tavily_api_key>
search_engine_api_key_exa = <exa_api_key>
search_engine_api_url_exa = https://api.exa.ai/search
search_engine_api_key_firecrawl = <firecrawl_api_key>
search_engine_api_url_firecrawl = https://api.firecrawl.dev/v2/search
search_engine_api_key_yandex = 1e1b1b1b1b1b1b1b1
search_engine_id_yandex = 1e1b1b1b1b1b1b1b1

[Web-Scraper]
web_scraper_api_key =
web_scraper_api_url =
web_scraper_api_timeout = 90
web_scraper_api_retry = 3
web_scraper_api_retry_delay = 1
web_scraper_retry_count = 3
web_scraper_stealth_playwright = False
custom_scrapers_yaml_path = tldw_Server_API/Config_Files/custom_scrapers.yaml
web_scraper_default_backend = auto
web_scraper_ua_mode = fixed
web_scraper_respect_robots = True
# Shared outbound safety mode for scraping and websearch data-plane callers: compat|strict
web_outbound_policy_mode = compat
web_scraper_preflight_analyzers = False
web_scraper_preflight_timeout_s = 0
web_scraper_preflight_scan_depth = default
web_scraper_preflight_find_all_waf = False
web_scraper_preflight_impersonate = False
web_scraper_preflight_include_results = False

[Logging]
log_level = INFO
log_file = ./Logs/tldw_app_logs.json
log_metrics_file = ./Logs/tldw_metrics_logs.json
max_bytes =
backup_count = 5
# System log aggregation (admin system logs)
system_log_file_path = Databases/system_logs.jsonl
system_log_file_max_entries = 5000

[Moderation]
enabled = False
input_enabled = True
output_enabled = True
# Actions: block | redact | warn
input_action = block
output_action = redact
redact_replacement = [REDACTED]
# Optional files; can be overridden by env MODERATION_BLOCKLIST_FILE / MODERATION_USER_OVERRIDES_FILE
blocklist_file = tldw_Server_API/Config_Files/moderation_blocklist.txt
user_overrides_file = tldw_Server_API/Config_Files/moderation_user_overrides.json
per_user_overrides = True
# Optional: enable built-in PII rules and category filtering
pii_enabled = False
categories_enabled =
runtime_overrides_file = tldw_Server_API/Config_Files/moderation_runtime_overrides.json

[Redis]
redis_enabled = False
# Dev default — override via .env for deployment
redis_host = localhost
redis_port = 6379
redis_db = 0
cache_ttl = 300

[Web-Scraping]
stealth_wait_ms = 5000
# Ephemeral web-scraping result store tuning (env-only today):
# - EPHEMERAL_STORE_TTL_SECONDS (default: 900)
# - EPHEMERAL_STORE_MAX_ENTRIES (default: 256)
# - EPHEMERAL_STORE_MAX_BYTES (default: 0 = disabled)

# --- Feature Flags: Personalization & Persona Agent ---
[personalization]
enabled = true
alpha = 0.2
beta = 0.6
gamma = 0.2
recency_half_life_days = 14

[persona]
enabled = true
default_persona = Research Assistant
voice = default
stt = faster_whisper
max_tool_steps = 3
persona_memory_read_mode = legacy_only
persona_memory_write_mode = legacy_only
# Persona dialogue-tree eval/runtime controls.
# Environment overrides use the uppercase names in comments below.
# PERSONA_DIALOGUE_TREE_EVAL_ENABLED
dialogue_tree_eval_enabled = false
# PERSONA_RUNTIME_EXPLORER_ENABLED: runtime exploration is off by default.
runtime_explorer_enabled = false
# PERSONA_RUNTIME_EXPLORER_MAX_DEPTH
runtime_explorer_max_depth = 1
# PERSONA_RUNTIME_EXPLORER_MAX_BRANCHING
runtime_explorer_max_branching = 2
# PERSONA_RUNTIME_EXPLORER_MAX_PROVIDER_CALLS
runtime_explorer_max_provider_calls = 1
# PERSONA_RUNTIME_EXPLORER_TIMEOUT_MS
runtime_explorer_timeout_ms = 750
# PERSONA_RUNTIME_EXPLORER_MAX_TOKENS
runtime_explorer_max_tokens = 256
# PERSONA_RUNTIME_EXPLORER_P95_ADDED_LATENCY_MS
runtime_explorer_p95_added_latency_ms = 1000
# PERSONA_RUNTIME_EXPLORER_LLM_JUDGES_ENABLED: LLM judges cannot authorize actions.
runtime_explorer_llm_judges_enabled = false
# PERSONA_DIALOGUE_TREE_TRACE_RETENTION_DAYS
dialogue_tree_trace_retention_days = 7

[persona.rbac]
allow_export = false
allow_delete = false
