Batch processing allows you to send asynchronous requests at significantly reduced costs. LiteLLM supports batch APIs across providers including OpenAI and Anthropic.
from litellm import batch_completion_createimport json# Prepare requestsrequests = [ { "custom_id": "request-1", "method": "POST", "url": "/v1/chat/completions", "body": { "model": "gpt-4o", "messages": [{"role": "user", "content": "What is AI?"}], "max_tokens": 100 } }, { "custom_id": "request-2", "method": "POST", "url": "/v1/chat/completions", "body": { "model": "gpt-4o", "messages": [{"role": "user", "content": "What is ML?"}], "max_tokens": 100 } }]# Save to filewith open("batch_requests.jsonl", "w") as f: for req in requests: f.write(json.dumps(req) + "\n")# Create batchbatch = batch_completion_create( input_file_path="batch_requests.jsonl", endpoint="/v1/chat/completions", completion_window="24h")print(f"Batch ID: {batch.id}")
from litellm import batch_completion_retrieve# Get batch statusbatch = batch_completion_retrieve(batch_id="batch_abc123")print(f"Status: {batch.status}")print(f"Request counts: {batch.request_counts}")# Possible statuses:# - validating: checking requests# - in_progress: processing# - finalizing: completing# - completed: done# - failed: error occurred# - expired: took too long# - cancelled: manually cancelled
from litellm import batch_completion_retrieveimport json# Wait for completionbatch = batch_completion_retrieve(batch_id="batch_abc123")if batch.status == "completed": # Download results output_file_id = batch.output_file_id # Get file content with open("batch_results.jsonl", "r") as f: for line in f: result = json.loads(line) print(f"Request {result['custom_id']}: {result['response']}")
from litellm import batch_completion_cancel# Cancel a batchbatch = batch_completion_cancel(batch_id="batch_abc123")print(f"Status: {batch.status}")
from litellm import batch_completion_retrieveimport jsonbatch = batch_completion_retrieve(batch_id="batch_abc123")if batch.status == "completed": # Check for errors in results with open("results.jsonl", "r") as f: for line in f: result = json.loads(line) if "error" in result: print(f"Error in {result['custom_id']}: {result['error']}") else: # Process successful result response = result["response"]["body"] print(f"Success: {result['custom_id']}")elif batch.status == "failed": print(f"Batch failed: {batch.errors}")