chatterbox-api/.env.example.docker at master · progress44/chatterbox-api · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
# Chatterbox TTS API Configuration - Docker Version
# Copy this file to .env when using Docker deployment

# =============================================================================
# Server Configuration
# =============================================================================

# Port to run the API server on
PORT=4123

# Port for frontend/proxy when using fullstack mode (when API runs behind proxy)
# Only used when running with --profile frontend
FRONTEND_PORT=4321

# Host to bind the server to (0.0.0.0 for all interfaces)
HOST=0.0.0.0

# CORS origins (comma-separated list)
CORS_ORIGINS=*

# =============================================================================
# Voice and Model Configuration (Docker Paths)
# =============================================================================

# Device to use for inference (auto/cuda/mps/cpu)
# auto = automatically detect best available device
DEVICE=auto

# Path to the voice sample file for cloning (Docker internal path)
VOICE_SAMPLE_PATH=/app/voice-sample.mp3

# Directory to cache downloaded models (Docker internal path)
MODEL_CACHE_DIR=/cache

# Directory to store uploaded voice library (Docker internal path)
VOICE_LIBRARY_DIR=/voices

# =============================================================================
# TTS Model Settings
# =============================================================================

# Enable multilingual TTS model (true/false)
# true = Support 23 languages with multilingual model (default)
# false = English-only with standard model
USE_MULTILINGUAL_MODEL=true

# Emotion intensity/exaggeration level (0.25 - 2.0)
# 0.5 = neutral, higher values = more expressive
EXAGGERATION=0.5

# CFG weight for pace control (0.0 - 1.0)
# Lower values = faster speech, higher values = slower/more deliberate
CFG_WEIGHT=0.5

# Sampling temperature (0.05 - 5.0)
# Lower values = more deterministic, higher values = more random/creative
TEMPERATURE=0.8

# =============================================================================
# Text Processing
# =============================================================================

# Maximum characters per text chunk (recommended: 200-300)
MAX_CHUNK_LENGTH=280

# Maximum total characters for entire input (hard limit)
MAX_TOTAL_LENGTH=3000

# =============================================================================
# Long Text TTS Configuration (Docker Paths)
# =============================================================================

# Directory for long text job data and state persistence (Docker internal path)
LONG_TEXT_DATA_DIR=/data/long_text_jobs

# Maximum characters allowed for long text TTS (default: 100,000)
LONG_TEXT_MAX_LENGTH=100000

# Chunk size for splitting long text (default: 2500 chars, must be < MAX_TOTAL_LENGTH)
LONG_TEXT_CHUNK_SIZE=2500

# Silence padding between chunks in milliseconds (default: 200ms)
LONG_TEXT_SILENCE_PADDING_MS=200

# How many days to keep completed long text jobs (default: 7 days)
LONG_TEXT_JOB_RETENTION_DAYS=7

# Maximum number of concurrent long text jobs (default: 3)
LONG_TEXT_MAX_CONCURRENT_JOBS=3

# =============================================================================
# Docker Volume Configuration
# =============================================================================

# Host path to voice sample file (for Docker volume mounting)
VOICE_SAMPLE_HOST_PATH=./voice-sample.mp3

# Host directory containing voice samples (optional)
# VOICE_SAMPLES_DIR=./voice-samples

# =============================================================================
# Advanced Settings (usually don't need to change)
# =============================================================================

# Memory Management
# Cleanup memory every N requests (default: 5)
MEMORY_CLEANUP_INTERVAL=5

# Clear CUDA cache every N requests (default: 3)
CUDA_CACHE_CLEAR_INTERVAL=3

# Enable detailed memory monitoring and logging (true/false)
ENABLE_MEMORY_MONITORING=true

# HuggingFace cache directory (Docker internal path)
# HF_HOME=/cache/huggingface

# Disable HuggingFace telemetry (true/false)
# HF_HUB_DISABLE_TELEMETRY=true

# PyTorch cache directory (Docker internal path)
# TORCH_HOME=/cache/torch

# =============================================================================
# Examples for different use cases:
# =============================================================================

# For more expressive/dramatic speech:
# EXAGGERATION=0.8
# CFG_WEIGHT=0.3
# TEMPERATURE=1.0

# For neutral/professional speech:
# EXAGGERATION=0.4
# CFG_WEIGHT=0.6
# TEMPERATURE=0.6

# For faster processing (less stable):
# MAX_CHUNK_LENGTH=400
# TEMPERATURE=0.5

# For slower, more careful speech:
# CFG_WEIGHT=0.8
# TEMPERATURE=0.4