Skip to main content

Documentation Index

Fetch the complete documentation index at: https://mintlify.com/BerriAI/litellm/llms.txt

Use this file to discover all available pages before exploring further.

Overview

LiteLLM’s prompt management system allows you to store, version, and dynamically inject prompts from external prompt management tools. This enables:
  • Centralized prompt storage and versioning
  • A/B testing different prompt versions
  • Dynamic prompt updates without code changes
  • Team collaboration on prompt engineering
  • Integration with prompt management platforms

Supported Platforms

  • Langfuse: Full-featured prompt management with versioning
  • Custom: Build your own prompt management integration

Quick Start

1

Configure Prompt Management

Set up your prompt management integration:
import litellm
from litellm.integrations.langfuse import LangfusePromptManagement

# Initialize Langfuse prompt management
litellm.prompt_management = LangfusePromptManagement(
    langfuse_public_key="pk_...",
    langfuse_secret_key="sk_...",
    langfuse_host="https://cloud.langfuse.com"
)
2

Use Prompts in Completions

Reference prompts by ID:
response = await litellm.acompletion(
    model="gpt-4",
    messages=[{"role": "user", "content": "What is AI?"}],
    prompt_id="my-prompt-template",
    prompt_variables={
        "topic": "artificial intelligence",
        "detail_level": "beginner"
    }
)

print(response.choices[0].message.content)
3

Version Your Prompts

Specify prompt versions or labels:
# Use specific version
response = await litellm.acompletion(
    model="gpt-4",
    messages=[{"role": "user", "content": "Query"}],
    prompt_id="my-prompt",
    prompt_version=3
)

# Use labeled version
response = await litellm.acompletion(
    model="gpt-4",
    messages=[{"role": "user", "content": "Query"}],
    prompt_id="my-prompt",
    prompt_label="production"
)

Creating Custom Prompt Management

Implement the Base Class

from litellm.integrations.prompt_management_base import PromptManagementBase
from litellm.types.prompts.init_prompts import PromptSpec
from typing import Optional, List, Dict, Tuple

class CustomPromptManagement(PromptManagementBase):
    @property
    def integration_name(self) -> str:
        """Unique identifier for your integration"""
        return "my-prompt-manager"
    
    def should_run_prompt_management(
        self,
        prompt_id: Optional[str],
        prompt_spec: Optional[PromptSpec],
        dynamic_callback_params,
    ) -> bool:
        """
        Determine if prompt management should be activated.
        
        Returns:
            True if prompt_id is provided or prompt_spec exists
        """
        return prompt_id is not None or prompt_spec is not None
    
    def _compile_prompt_helper(
        self,
        prompt_id: Optional[str],
        prompt_spec: Optional[PromptSpec],
        prompt_variables: Optional[dict],
        dynamic_callback_params,
        prompt_label: Optional[str] = None,
        prompt_version: Optional[int] = None,
    ) -> dict:
        """
        Fetch and compile prompt from your backend.
        
        Returns:
            Dict with keys:
            - prompt_id: str
            - prompt_template: List[Message]
            - prompt_template_model: Optional[str]
            - prompt_template_optional_params: Optional[Dict]
            - completed_messages: Optional[List[Message]]
        """
        # Fetch prompt from your backend
        prompt_data = self._fetch_prompt(
            prompt_id=prompt_id,
            version=prompt_version,
            label=prompt_label
        )
        
        # Replace variables
        compiled_messages = self._replace_variables(
            template=prompt_data["messages"],
            variables=prompt_variables or {}
        )
        
        return {
            "prompt_id": prompt_id,
            "prompt_template": compiled_messages,
            "prompt_template_model": prompt_data.get("model"),
            "prompt_template_optional_params": prompt_data.get("params"),
            "completed_messages": None  # Will be merged with client messages
        }
    
    async def async_compile_prompt_helper(
        self,
        prompt_id: Optional[str],
        prompt_variables: Optional[dict],
        dynamic_callback_params,
        prompt_spec: Optional[PromptSpec] = None,
        prompt_label: Optional[str] = None,
        prompt_version: Optional[int] = None,
    ) -> dict:
        """
        Async version of _compile_prompt_helper.
        """
        # Async fetch from backend
        prompt_data = await self._async_fetch_prompt(
            prompt_id=prompt_id,
            version=prompt_version,
            label=prompt_label
        )
        
        compiled_messages = self._replace_variables(
            template=prompt_data["messages"],
            variables=prompt_variables or {}
        )
        
        return {
            "prompt_id": prompt_id,
            "prompt_template": compiled_messages,
            "prompt_template_model": prompt_data.get("model"),
            "prompt_template_optional_params": prompt_data.get("params"),
            "completed_messages": None
        }
    
    def _fetch_prompt(self, prompt_id, version=None, label=None):
        """Sync fetch from your backend"""
        # Implement your API call
        import httpx
        
        response = httpx.get(
            f"https://your-api.com/prompts/{prompt_id}",
            params={"version": version, "label": label}
        )
        
        return response.json()
    
    async def _async_fetch_prompt(self, prompt_id, version=None, label=None):
        """Async fetch from your backend"""
        import httpx
        
        async with httpx.AsyncClient() as client:
            response = await client.get(
                f"https://your-api.com/prompts/{prompt_id}",
                params={"version": version, "label": label}
            )
            
            return response.json()
    
    def _replace_variables(self, template: List[dict], variables: dict) -> List[dict]:
        """Replace {variable} placeholders in template"""
        compiled = []
        
        for message in template:
            content = message["content"]
            
            # Replace variables
            for key, value in variables.items():
                content = content.replace(f"{{{key}}}", str(value))
            
            compiled.append({
                "role": message["role"],
                "content": content
            })
        
        return compiled

# Register your prompt management
import litellm

litellm.prompt_management = CustomPromptManagement()

Using Your Custom Integration

# Use prompts from your system
response = await litellm.acompletion(
    model="gpt-4",
    messages=[{"role": "user", "content": "Additional context"}],
    prompt_id="customer-support-template",
    prompt_variables={
        "customer_name": "John Doe",
        "issue_type": "billing",
        "priority": "high"
    }
)

Advanced Features

Override Model from Prompt

# Prompt template can specify the model
response = await litellm.acompletion(
    model="gpt-3.5-turbo",  # Fallback model
    messages=[{"role": "user", "content": "Hello"}],
    prompt_id="special-model-prompt",  # May override to gpt-4
    ignore_prompt_manager_model=False  # Allow override (default)
)

# Force use of specified model
response = await litellm.acompletion(
    model="gpt-3.5-turbo",  # Always use this
    messages=[{"role": "user", "content": "Hello"}],
    prompt_id="special-model-prompt",
    ignore_prompt_manager_model=True  # Ignore prompt's model
)

Override Parameters from Prompt

# Prompt template can include parameters (temperature, max_tokens, etc.)
response = await litellm.acompletion(
    model="gpt-4",
    messages=[{"role": "user", "content": "Hello"}],
    prompt_id="creative-prompt",  # May set temperature=0.9
    temperature=0.5,  # Overridden by prompt
    ignore_prompt_manager_optional_params=False  # Allow override
)

# Keep your parameters
response = await litellm.acompletion(
    model="gpt-4",
    messages=[{"role": "user", "content": "Hello"}],
    prompt_id="creative-prompt",
    temperature=0.5,  # Use this instead
    ignore_prompt_manager_optional_params=True  # Ignore prompt's params
)

Message Merging

Prompt templates are prepended to your messages:
# Prompt template:
# [
#   {"role": "system", "content": "You are a helpful assistant."},
#   {"role": "user", "content": "Context: {context}"}
# ]

response = await litellm.acompletion(
    model="gpt-4",
    messages=[
        {"role": "user", "content": "What is the answer?"}
    ],
    prompt_id="template-with-context",
    prompt_variables={"context": "Paris is the capital of France."}
)

# Final messages sent to model:
# [
#   {"role": "system", "content": "You are a helpful assistant."},
#   {"role": "user", "content": "Context: Paris is the capital of France."},
#   {"role": "user", "content": "What is the answer?"}
# ]

Langfuse Integration

import litellm
from litellm.integrations.langfuse import LangfusePromptManagement

# Initialize
litellm.prompt_management = LangfusePromptManagement(
    langfuse_public_key="pk_lf_...",
    langfuse_secret_key="sk_lf_...",
    langfuse_host="https://cloud.langfuse.com"  # or your self-hosted URL
)

# Use with versioning
response = await litellm.acompletion(
    model="gpt-4",
    messages=[{"role": "user", "content": "Query"}],
    prompt_id="my-prompt",
    prompt_version=2,  # Specific version
    prompt_variables={"var1": "value1"}
)

# Use with labels
response = await litellm.acompletion(
    model="gpt-4",
    messages=[{"role": "user", "content": "Query"}],
    prompt_id="my-prompt",
    prompt_label="production",  # Use labeled version
    prompt_variables={"var1": "value1"}
)

Using with LiteLLM Proxy

Configure prompt management in your proxy config:
config.yaml
prompt_management:
  provider: langfuse
  langfuse_public_key: pk_lf_...
  langfuse_secret_key: sk_lf_...
  langfuse_host: https://cloud.langfuse.com
Then use via OpenAI SDK:
import openai

client = openai.OpenAI(
    api_key="proxy-key",
    base_url="http://localhost:4000"
)

# Prompt management via extra_body
response = client.chat.completions.create(
    model="gpt-4",
    messages=[{"role": "user", "content": "Hello"}],
    extra_body={
        "prompt_id": "customer-support",
        "prompt_variables": {
            "customer_name": "Jane",
            "issue": "login problem"
        }
    }
)

Best Practices

Always use versioning for production prompts:
# Good: Explicit version
response = await litellm.acompletion(
    model="gpt-4",
    messages=messages,
    prompt_id="prod-prompt",
    prompt_version=5  # Locked version
)

# Or use labels
response = await litellm.acompletion(
    model="gpt-4",
    messages=messages,
    prompt_id="prod-prompt",
    prompt_label="stable"  # Points to tested version
)

# Risky: Always latest
response = await litellm.acompletion(
    model="gpt-4",
    messages=messages,
    prompt_id="prod-prompt"  # Gets latest, may change unexpectedly
)
Check that all required variables are provided:
def validate_variables(prompt_id: str, variables: dict) -> bool:
    required_vars = get_required_variables(prompt_id)
    return all(var in variables for var in required_vars)

if validate_variables("my-prompt", prompt_variables):
    response = await litellm.acompletion(
        model="gpt-4",
        messages=messages,
        prompt_id="my-prompt",
        prompt_variables=prompt_variables
    )
else:
    raise ValueError("Missing required prompt variables")
Test different prompt versions:
import random

# Random A/B test
prompt_version = random.choice([1, 2])

response = await litellm.acompletion(
    model="gpt-4",
    messages=messages,
    prompt_id="experiment-prompt",
    prompt_version=prompt_version,
    metadata={"experiment": f"version_{prompt_version}"}  # Track in logs
)
Reduce API calls by caching:
from functools import lru_cache

@lru_cache(maxsize=100)
def get_cached_prompt(prompt_id: str, version: int):
    return litellm.prompt_management._compile_prompt_helper(
        prompt_id=prompt_id,
        prompt_version=version,
        prompt_variables={},
        dynamic_callback_params={}
    )

# Use cached version for static prompts
prompt = get_cached_prompt("static-prompt", 3)

Reference

Source Code

  • Base class: litellm/integrations/prompt_management_base.py:22
  • Custom prompt management: litellm/proxy/custom_prompt_management.py:10
  • Langfuse integration: litellm/integrations/langfuse/langfuse_prompt_management.py

Response Format

Prompt management returns:
{
    "prompt_id": str,
    "prompt_template": List[AllMessageValues],
    "prompt_template_model": Optional[str],
    "prompt_template_optional_params": Optional[Dict[str, Any]],
    "completed_messages": Optional[List[AllMessageValues]]
}

Build docs developers (and LLMs) love