Documentation Index
Fetch the complete documentation index at: https://mintlify.com/BerriAI/litellm/llms.txt
Use this file to discover all available pages before exploring further.
Quick Start with Docker
The fastest way to run LiteLLM Proxy in production.
Pull the Image
docker pull ghcr.io/berriai/litellm:main-latest
Run with Docker
docker run -d \
--name litellm-proxy \
-p 4000:4000 \
-e OPENAI_API_KEY=sk-... \
ghcr.io/berriai/litellm:main-latest
Docker Compose Setup
For production deployments with PostgreSQL and Prometheus.
Create docker-compose.yml
services:
litellm:
image: ghcr.io/berriai/litellm:main-latest
ports:
- "4000:4000"
volumes:
- ./config.yaml:/app/config.yaml
environment:
DATABASE_URL: "postgresql://llmproxy:dbpassword9090@db:5432/litellm"
STORE_MODEL_IN_DB: "True"
env_file:
- .env
depends_on:
db:
condition: service_healthy
healthcheck:
test:
- CMD-SHELL
- python3 -c "import urllib.request; urllib.request.urlopen('http://localhost:4000/health/liveliness')"
interval: 30s
timeout: 10s
retries: 3
start_period: 40s
command:
- "--config=/app/config.yaml"
db:
image: postgres:16
restart: always
container_name: litellm_db
environment:
POSTGRES_DB: litellm
POSTGRES_USER: llmproxy
POSTGRES_PASSWORD: dbpassword9090
ports:
- "5432:5432"
volumes:
- postgres_data:/var/lib/postgresql/data
healthcheck:
test: ["CMD-SHELL", "pg_isready -d litellm -U llmproxy"]
interval: 1s
timeout: 5s
retries: 10
prometheus:
image: prom/prometheus
volumes:
- prometheus_data:/prometheus
- ./prometheus.yml:/etc/prometheus/prometheus.yml
ports:
- "9090:9090"
command:
- "--config.file=/etc/prometheus/prometheus.yml"
- "--storage.tsdb.path=/prometheus"
- "--storage.tsdb.retention.time=15d"
restart: always
volumes:
prometheus_data:
driver: local
postgres_data:
name: litellm_postgres_data
Create config.yaml
model_list:
- model_name: gpt-3.5-turbo
litellm_params:
model: openai/gpt-3.5-turbo
api_key: os.environ/OPENAI_API_KEY
- model_name: gpt-4
litellm_params:
model: openai/gpt-4
api_key: os.environ/OPENAI_API_KEY
rpm: 480
timeout: 300
- model_name: claude-3-5-sonnet
litellm_params:
model: anthropic/claude-3-5-sonnet-20241022
api_key: os.environ/ANTHROPIC_API_KEY
litellm_settings:
drop_params: true
success_callback: ["prometheus"]
num_retries: 3
request_timeout: 600
telemetry: false
general_settings:
master_key: os.environ/LITELLM_MASTER_KEY
store_model_in_db: true
database_url: os.environ/DATABASE_URL
Create .env File
# Provider API Keys
OPENAI_API_KEY=sk-...
ANTHROPIC_API_KEY=sk-ant-...
# LiteLLM Settings
LITELLM_MASTER_KEY=sk-1234-change-this
DATABASE_URL=postgresql://llmproxy:dbpassword9090@db:5432/litellm
# Optional
REDIS_HOST=redis
REDIS_PORT=6379
REDIS_PASSWORD=your-redis-password
Never commit .env files to version control. Add .env to your .gitignore.
Create prometheus.yml
global:
scrape_interval: 15s
evaluation_interval: 15s
scrape_configs:
- job_name: 'litellm'
static_configs:
- targets: ['litellm:4000']
Start the Stack
Check Logs
docker-compose logs -f litellm
Wait for the message:Uvicorn running on http://0.0.0.0:4000
Verify Health
curl http://localhost:4000/health
Access UI
Open browser to http://localhost:4000/ui
Dockerfile Reference
The LiteLLM Dockerfile uses a multi-stage build process:
# Base images
ARG LITELLM_BUILD_IMAGE=cgr.dev/chainguard/wolfi-base
ARG LITELLM_RUNTIME_IMAGE=cgr.dev/chainguard/wolfi-base
# Builder stage
FROM $LITELLM_BUILD_IMAGE AS builder
WORKDIR /app
USER root
# Install build dependencies
RUN apk add --no-cache bash gcc py3-pip python3 python3-dev openssl openssl-dev
RUN python -m pip install build
# Copy source and build
COPY . .
# Build Admin UI
RUN sed -i 's/\r$//' docker/build_admin_ui.sh && \
chmod +x docker/build_admin_ui.sh && \
./docker/build_admin_ui.sh
# Build Python package
RUN rm -rf dist/* && python -m build
RUN pip install dist/*.whl
RUN pip wheel --no-cache-dir --wheel-dir=/wheels/ -r requirements.txt
# Runtime stage
FROM $LITELLM_RUNTIME_IMAGE AS runtime
USER root
WORKDIR /app
# Install runtime dependencies
RUN apk add --no-cache bash openssl tzdata nodejs npm python3 py3-pip libsndfile
# Copy built artifacts
COPY --from=builder /app/dist/*.whl .
COPY --from=builder /wheels/ /wheels/
# Install package
RUN pip install *.whl /wheels/* --no-index --find-links=/wheels/ && \
rm -f *.whl && rm -rf /wheels
# Generate Prisma client
RUN prisma generate --schema=./litellm/proxy/schema.prisma
# Setup entrypoint
COPY . .
RUN sed -i 's/\r$//' docker/entrypoint.sh && chmod +x docker/entrypoint.sh
RUN sed -i 's/\r$//' docker/prod_entrypoint.sh && chmod +x docker/prod_entrypoint.sh
EXPOSE 4000/tcp
RUN apk add --no-cache supervisor
COPY docker/supervisord.conf /etc/supervisord.conf
ENTRYPOINT ["docker/prod_entrypoint.sh"]
CMD ["--port", "4000"]
Production Deployment
With Redis Cache
Add Redis to your docker-compose.yml:
services:
redis:
image: redis:7-alpine
ports:
- "6379:6379"
volumes:
- redis_data:/data
command: redis-server --appendonly yes
healthcheck:
test: ["CMD", "redis-cli", "ping"]
interval: 5s
timeout: 3s
retries: 5
volumes:
redis_data:
driver: local
Update config.yaml:
litellm_settings:
cache: true
cache_params:
type: redis
host: redis
port: 6379
router_settings:
redis_host: redis
redis_port: 6379
Environment-Specific Configs
# docker-compose.prod.yml
services:
litellm:
image: ghcr.io/berriai/litellm:main-stable
restart: always
deploy:
replicas: 3
resources:
limits:
cpus: '2'
memory: 4G
reservations:
cpus: '1'
memory: 2G
Kubernetes Deployment
apiVersion: apps/v1
kind: Deployment
metadata:
name: litellm-proxy
spec:
replicas: 3
selector:
matchLabels:
app: litellm-proxy
template:
metadata:
labels:
app: litellm-proxy
spec:
containers:
- name: litellm
image: ghcr.io/berriai/litellm:main-stable
ports:
- containerPort: 4000
env:
- name: DATABASE_URL
valueFrom:
secretKeyRef:
name: litellm-secrets
key: database-url
- name: LITELLM_MASTER_KEY
valueFrom:
secretKeyRef:
name: litellm-secrets
key: master-key
volumeMounts:
- name: config
mountPath: /app/config.yaml
subPath: config.yaml
resources:
requests:
memory: "1Gi"
cpu: "500m"
limits:
memory: "2Gi"
cpu: "1000m"
livenessProbe:
httpGet:
path: /health/liveliness
port: 4000
initialDelaySeconds: 30
periodSeconds: 10
readinessProbe:
httpGet:
path: /health/readiness
port: 4000
initialDelaySeconds: 20
periodSeconds: 5
volumes:
- name: config
configMap:
name: litellm-config
---
apiVersion: v1
kind: Service
metadata:
name: litellm-proxy
spec:
selector:
app: litellm-proxy
ports:
- protocol: TCP
port: 4000
targetPort: 4000
type: LoadBalancer
Monitoring & Logs
View Logs
# All services
docker-compose logs -f
# Specific service
docker-compose logs -f litellm
# Last 100 lines
docker-compose logs --tail=100 litellm
Access Prometheus
Open http://localhost:9090 to view Prometheus metrics.
Useful queries:
litellm_requests_total - Total requests
litellm_request_duration_seconds - Request latency
litellm_spend_total - Total spend
Health Checks
# Liveness (is container running)
curl http://localhost:4000/health/liveliness
# Readiness (can accept traffic)
curl http://localhost:4000/health/readiness
# Full health check
curl http://localhost:4000/health
Maintenance
Update to Latest Version
docker-compose pull
docker-compose up -d
Backup Database
docker exec litellm_db pg_dump -U llmproxy litellm > backup.sql
Restore Database
cat backup.sql | docker exec -i litellm_db psql -U llmproxy litellm
Scale Services
# Scale to 3 replicas
docker-compose up -d --scale litellm=3
Troubleshooting
Container Won’t Start
Check logs:
docker-compose logs litellm
Common issues:
- Database not ready: Wait for DB health check
- Port conflict: Change port mapping
- Invalid config: Validate YAML syntax
High Memory Usage
Increase memory limits:
services:
litellm:
deploy:
resources:
limits:
memory: 4G
Database Connection Issues
Verify DATABASE_URL:
docker exec litellm env | grep DATABASE_URL
Test connection:
docker exec litellm_db psql -U llmproxy -d litellm -c "SELECT 1;"