Basic usage
Create a simple chat completion with a model and messages:import os
from dedalus_labs import Dedalus
client = Dedalus(
api_key=os.environ.get("DEDALUS_API_KEY")
)
completion = client.chat.completions.create(
model="openai/gpt-5-nano",
messages=[
{
"role": "system",
"content": "You are Stephen Dedalus. Respond in morose Joycean malaise.",
},
{
"role": "user",
"content": "Hello, how are you today?",
},
],
)
print(completion.choices[0].message.content)
print(f"Tokens used: {completion.usage.total_tokens}")
Async usage
Use the async client for concurrent operations:import os
import asyncio
from dedalus_labs import AsyncDedalus
client = AsyncDedalus(
api_key=os.environ.get("DEDALUS_API_KEY")
)
async def main():
completion = await client.chat.completions.create(
model="openai/gpt-5-nano",
messages=[
{
"role": "system",
"content": "You are Stephen Dedalus. Respond in morose Joycean malaise.",
},
{
"role": "user",
"content": "Hello, how are you today?",
},
],
)
print(completion.choices[0].message.content)
asyncio.run(main())
Streaming responses
- Sync
- Async
from dedalus_labs import Dedalus
client = Dedalus()
stream = client.chat.completions.create(
model="openai/gpt-5-nano",
stream=True,
messages=[
{
"role": "system",
"content": "You are Stephen Dedalus. Respond in morose Joycean malaise.",
},
{
"role": "user",
"content": "What do you think of artificial intelligence?",
},
],
)
for chunk in stream:
if chunk.choices[0].delta.content:
print(chunk.choices[0].delta.content, end="")
from dedalus_labs import AsyncDedalus
client = AsyncDedalus()
stream = await client.chat.completions.create(
model="openai/gpt-5-nano",
stream=True,
messages=[
{
"role": "system",
"content": "You are Stephen Dedalus. Respond in morose Joycean malaise.",
},
{
"role": "user",
"content": "What do you think of artificial intelligence?",
},
],
)
async for chunk in stream:
if chunk.choices[0].delta.content:
print(chunk.choices[0].delta.content, end="")
Common parameters
Temperature and sampling
Control the randomness and creativity of responses:completion = client.chat.completions.create(
model="openai/gpt-5-nano",
messages=[{"role": "user", "content": "Write a creative story."}],
temperature=0.9, # Higher = more creative (0-2)
top_p=0.95, # Nucleus sampling threshold
max_tokens=500, # Maximum tokens in response
)
Reasoning effort
For reasoning models, control the computational effort:completion = client.chat.completions.create(
model="openai/gpt-5.1",
messages=[{"role": "user", "content": "Solve this complex problem..."}],
reasoning_effort="high", # none, minimal, low, medium, high, xhigh
)
Response format
Request structured JSON output:completion = client.chat.completions.create(
model="openai/gpt-5-nano",
messages=[{"role": "user", "content": "List three colors."}],
response_format={
"type": "json_schema",
"json_schema": {
"name": "colors",
"strict": True,
"schema": {
"type": "object",
"properties": {
"colors": {
"type": "array",
"items": {"type": "string"}
}
},
"required": ["colors"],
"additionalProperties": False
}
}
}
)
Tool calling
Define tools for the model to use:tools = [
{
"type": "function",
"function": {
"name": "get_weather",
"description": "Get the current weather in a location",
"parameters": {
"type": "object",
"properties": {
"location": {
"type": "string",
"description": "The city and state, e.g. San Francisco, CA"
},
"unit": {
"type": "string",
"enum": ["celsius", "fahrenheit"]
}
},
"required": ["location"]
}
}
}
]
completion = client.chat.completions.create(
model="openai/gpt-5-nano",
messages=[{"role": "user", "content": "What's the weather in Boston?"}],
tools=tools,
tool_choice="auto" # auto, none, required, or specific function
)
# Check if the model wants to call a tool
if completion.choices[0].message.tool_calls:
tool_call = completion.choices[0].message.tool_calls[0]
print(f"Function: {tool_call.function.name}")
print(f"Arguments: {tool_call.function.arguments}")
Error handling
Handle different error scenarios:import dedalus_labs
from dedalus_labs import Dedalus
client = Dedalus()
try:
completion = client.chat.completions.create(
model="openai/gpt-5-nano",
messages=[{"role": "user", "content": "Hello!"}],
)
except dedalus_labs.APIConnectionError as e:
print("The server could not be reached")
print(e.__cause__) # underlying Exception
except dedalus_labs.RateLimitError as e:
print("A 429 status code was received; we should back off a bit.")
except dedalus_labs.APIStatusError as e:
print("Another non-200-range status code was received")
print(f"Status code: {e.status_code}")
print(f"Response: {e.response}")
Advanced options
Retries and timeouts
from dedalus_labs import Dedalus
import httpx
# Configure default retries and timeout
client = Dedalus(
max_retries=5,
timeout=httpx.Timeout(60.0, read=5.0, write=10.0, connect=2.0),
)
# Override per request
completion = client.with_options(timeout=30.0, max_retries=3).chat.completions.create(
model="openai/gpt-5-nano",
messages=[{"role": "user", "content": "Hello!"}],
)
Accessing raw responses
from dedalus_labs import Dedalus
client = Dedalus()
response = client.chat.completions.with_raw_response.create(
model="openai/gpt-5-nano",
messages=[{"role": "user", "content": "Hello!"}],
)
print(response.headers.get('X-Request-ID'))
completion = response.parse() # Get the parsed ChatCompletion object
print(completion.choices[0].message.content)
The Dedalus API automatically handles:
- Token usage metering
- Provider routing
- Automatic retries on transient failures
- Response caching (when configured)
When using
stream=True, make sure to consume the entire stream to avoid connection leaks. The async client handles this automatically with async context managers.