Documentation Index
Fetch the complete documentation index at: https://mintlify.com/BerriAI/litellm/llms.txt
Use this file to discover all available pages before exploring further.
Overview
LiteLLM provides comprehensive guardrails to ensure safe, compliant, and policy-compliant LLM usage. Guardrails can inspect and moderate content before and after LLM calls, block inappropriate requests, and enforce custom policies.
What are Guardrails?
Guardrails are hooks that:
- Pre-call: Validate inputs before sending to LLM
- Post-call: Validate outputs before returning to user
- During call: Monitor streaming responses in real-time
They can:
- Block requests/responses
- Modify content
- Log policy violations
- Alert on issues
Built-in Guardrails
LiteLLM includes several pre-built guardrail integrations:
Azure Content Safety
from litellm import Router
router = Router(
model_list=[...],
guardrail_list=[
{
"guardrail_name": "azure-content-safety",
"litellm_params": {
"api_key": "your-azure-key",
"api_base": "https://<resource>.cognitiveservices.azure.com",
"api_version": "2023-10-01"
},
"mode": "pre_call", # Check before LLM call
"default_on": True
}
]
)
response = router.completion(
model="gpt-4",
messages=[{"role": "user", "content": "Hello!"}]
)
Azure Prompt Shield
Protect against prompt injection attacks:
router = Router(
model_list=[...],
guardrail_list=[
{
"guardrail_name": "azure-prompt-shield",
"litellm_params": {
"api_key": "your-azure-key",
"api_base": "https://<resource>.cognitiveservices.azure.com"
},
"mode": "pre_call"
}
]
)
Aporia AI Guardrails
router = Router(
model_list=[...],
guardrail_list=[
{
"guardrail_name": "aporia",
"litellm_params": {
"api_key": "your-aporia-key",
"api_base": "https://api.aporia.com"
},
"mode": "during_call", # Real-time monitoring
"default_on": True
}
]
)
LlamaGuard
router = Router(
model_list=[...],
guardrail_list=[
{
"guardrail_name": "llamaguard",
"litellm_params": {
"model": "llamaguard-7b"
},
"mode": "post_call" # Check after LLM response
}
]
)
Custom Guardrails
Create your own guardrail logic:
Basic Custom Guardrail
from litellm.integrations.custom_guardrail import CustomGuardrail
from litellm import Router
class ProfanityGuardrail(CustomGuardrail):
def __init__(self):
super().__init__(
guardrail_name="profanity-filter",
supported_event_hooks=["pre_call", "post_call"],
event_hook="pre_call"
)
self.banned_words = ["badword1", "badword2"]
async def async_pre_call_hook(
self,
user_api_key_dict,
cache,
call_type,
data,
**kwargs
):
# Check messages for banned words
messages = data.get("messages", [])
for message in messages:
content = message.get("content", "")
for word in self.banned_words:
if word in content.lower():
raise ValueError(f"Profanity detected: {word}")
return data
# Use the guardrail
profanity_guardrail = ProfanityGuardrail()
router = Router(
model_list=[...]
)
router.callbacks = [profanity_guardrail]
# This will be blocked
try:
response = router.completion(
model="gpt-4",
messages=[{"role": "user", "content": "This contains badword1"}]
)
except ValueError as e:
print(f"Blocked: {e}")
Post-Call Guardrail
class PIIDetectionGuardrail(CustomGuardrail):
def __init__(self):
super().__init__(
guardrail_name="pii-detection",
event_hook="post_call"
)
async def async_post_call_success_hook(
self,
data,
user_api_key_dict,
response,
**kwargs
):
import re
# Check for email addresses in response
content = response.choices[0].message.content
email_pattern = r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b'
if re.search(email_pattern, content):
# Mask emails
masked_content = re.sub(email_pattern, "[EMAIL REDACTED]", content)
response.choices[0].message.content = masked_content
return response
pii_guardrail = PIIDetectionGuardrail()
router.callbacks = [pii_guardrail]
response = router.completion(
model="gpt-4",
messages=[{"role": "user", "content": "What's your email?"}]
)
# Emails in response are automatically masked
Streaming Guardrail
class StreamingModerationGuardrail(CustomGuardrail):
def __init__(self):
super().__init__(
guardrail_name="streaming-moderation",
event_hook="during_call"
)
async def async_during_call_hook(
self,
data,
user_api_key_dict,
call_type,
**kwargs
):
# Monitor streaming chunks
banned_phrases = ["inappropriate content"]
async for chunk in data:
content = chunk.choices[0].delta.content or ""
for phrase in banned_phrases:
if phrase in content.lower():
raise ValueError(f"Inappropriate content detected: {phrase}")
yield chunk
moderation_guardrail = StreamingModerationGuardrail()
router.callbacks = [moderation_guardrail]
response = router.completion(
model="gpt-4",
messages=[{"role": "user", "content": "Tell me a story"}],
stream=True
)
Guardrail Modes
Pre-Call Mode
Validate before sending to LLM:
guardrail = CustomGuardrail(
guardrail_name="my-guardrail",
event_hook="pre_call"
)
Use cases:
- Input validation
- Prompt injection detection
- PII detection in prompts
- Rate limiting by content
Post-Call Mode
Validate after LLM response:
guardrail = CustomGuardrail(
guardrail_name="my-guardrail",
event_hook="post_call"
)
Use cases:
- Output content filtering
- PII redaction
- Fact checking
- Citation verification
During-Call Mode
Monitor streaming responses:
guardrail = CustomGuardrail(
guardrail_name="my-guardrail",
event_hook="during_call"
)
Use cases:
- Real-time content moderation
- Stop generation early
- Token-by-token filtering
Guardrail Configuration
Default On/Off
# Guardrail always active
guardrail = CustomGuardrail(
guardrail_name="my-guardrail",
default_on=True
)
# Guardrail opt-in only
guardrail = CustomGuardrail(
guardrail_name="my-guardrail",
default_on=False
)
# Enable per request
response = router.completion(
model="gpt-4",
messages=[{"role": "user", "content": "Hello"}],
metadata={"guardrails": ["my-guardrail"]}
)
Content Masking
guardrail = CustomGuardrail(
guardrail_name="my-guardrail",
mask_request_content=True, # Mask in logs
mask_response_content=True # Mask in logs
)
Violation Messages
guardrail = CustomGuardrail(
guardrail_name="my-guardrail",
violation_message_template="Blocked by {guardrail_name}: {default_message}"
)
Advanced Guardrail Features
Modify Response Exception
Return synthetic response instead of blocking:
from litellm.integrations.custom_guardrail import ModifyResponseException
class PolicyGuardrail(CustomGuardrail):
async def async_pre_call_hook(self, user_api_key_dict, cache, call_type, data, **kwargs):
messages = data.get("messages", [])
content = messages[-1].get("content", "")
if "sensitive topic" in content.lower():
# Return custom response instead of calling LLM
raise ModifyResponseException(
message="I cannot discuss that topic.",
model=data.get("model"),
request_data=data,
guardrail_name=self.guardrail_name
)
return data
Session-Based Guardrails
Track violations across a session:
class SessionGuardrail(CustomGuardrail):
def __init__(self):
super().__init__(
guardrail_name="session-guardrail",
end_session_after_n_fails=3, # End after 3 violations
on_violation="warn" # or "end_session"
)
self.violations = {}
async def async_pre_call_hook(self, user_api_key_dict, cache, call_type, data, **kwargs):
session_id = kwargs.get("session_id")
if session_id:
violation_count = self.violations.get(session_id, 0)
if violation_count >= 3:
raise ValueError("Session terminated due to policy violations")
return data
Logging Guardrail Events
from litellm.integrations import CustomLogger
class GuardrailLogger(CustomLogger):
def log_success_event(self, kwargs, response_obj, start_time, end_time):
guardrail_info = kwargs.get("guardrail_info", [])
for info in guardrail_info:
print(f"Guardrail: {info.get('name')}")
print(f"Status: {info.get('status')}")
print(f"Duration: {info.get('duration')}ms")
def log_failure_event(self, kwargs, response_obj, start_time, end_time):
exception = kwargs.get("exception")
if "guardrail" in str(exception).lower():
print(f"Blocked by guardrail: {exception}")
litellm.callbacks = [GuardrailLogger()]
Best Practices
Guardrail Recommendations
- Layer multiple guardrails - Combine different detection methods
- Use pre_call for speed - Block bad inputs early
- Use post_call for accuracy - Inspect actual LLM output
- Monitor performance - Track guardrail latency
- Test thoroughly - Verify guardrails don’t block valid requests
- Log violations - Track what’s being blocked and why
- Set appropriate thresholds - Balance safety vs. false positives
Common Patterns
Multi-Layer Safety
router = Router(
model_list=[...],
guardrail_list=[
# Layer 1: Prompt injection detection
{
"guardrail_name": "azure-prompt-shield",
"litellm_params": {...},
"mode": "pre_call"
},
# Layer 2: Content moderation
{
"guardrail_name": "azure-content-safety",
"litellm_params": {...},
"mode": "pre_call"
},
# Layer 3: Output filtering
{
"guardrail_name": "llamaguard",
"litellm_params": {...},
"mode": "post_call"
}
]
)
PII Protection Pipeline
class PIIPreCallGuardrail(CustomGuardrail):
"""Detect PII in input"""
def __init__(self):
super().__init__(guardrail_name="pii-input", event_hook="pre_call")
class PIIPostCallGuardrail(CustomGuardrail):
"""Redact PII in output"""
def __init__(self):
super().__init__(guardrail_name="pii-output", event_hook="post_call")
router = Router(model_list=[...])
router.callbacks = [PIIPreCallGuardrail(), PIIPostCallGuardrail()]
Industry Compliance
class HIPAAGuardrail(CustomGuardrail):
"""HIPAA compliance checks"""
def __init__(self):
super().__init__(
guardrail_name="hipaa-compliance",
event_hook="pre_call",
mask_request_content=True,
mask_response_content=True
)
async def async_pre_call_hook(self, user_api_key_dict, cache, call_type, data, **kwargs):
# Detect PHI (Protected Health Information)
messages = data.get("messages", [])
for message in messages:
content = message.get("content", "")
if self.contains_phi(content):
raise ValueError("PHI detected in request")
return data
def contains_phi(self, text):
# Implement PHI detection logic
import re
# Check for SSN, medical record numbers, etc.
ssn_pattern = r'\b\d{3}-\d{2}-\d{4}\b'
return bool(re.search(ssn_pattern, text))
Async Guardrails
Always use async methods for better performance:
class FastGuardrail(CustomGuardrail):
async def async_pre_call_hook(self, *args, **kwargs):
# Async implementation is much faster
pass
Parallel Guardrails
Multiple guardrails run in parallel when possible:
router = Router(
model_list=[...],
guardrail_list=[
{"guardrail_name": "guardrail-1", "mode": "pre_call"},
{"guardrail_name": "guardrail-2", "mode": "pre_call"},
# Both run concurrently
]
)
Caching Guardrail Results
class CachedGuardrail(CustomGuardrail):
def __init__(self):
super().__init__(guardrail_name="cached-guardrail")
self.cache = {}
async def async_pre_call_hook(self, user_api_key_dict, cache, call_type, data, **kwargs):
import hashlib
# Create cache key from content
content = str(data.get("messages", []))
cache_key = hashlib.sha256(content.encode()).hexdigest()
if cache_key in self.cache:
return self.cache[cache_key]
# Run guardrail logic
result = data # Your validation logic here
self.cache[cache_key] = result
return result
Troubleshooting
Guardrail Not Triggering
# Ensure guardrail is registered
router = Router(model_list=[...])
router.callbacks = [my_guardrail] # Add to callbacks
# Or use guardrail_list
router = Router(
model_list=[...],
guardrail_list=[{"guardrail_name": "my-guardrail", "default_on": True}]
)
High Latency
# Use async methods
# Minimize external API calls
# Cache results when possible
# Use pre_call to fail fast
False Positives
# Adjust thresholds
# Add whitelisting
# Log blocked requests for review