Convert text into natural-sounding speech audio using OpenAI’s text-to-speech models.
Basic usage
Generate speech from text and save to a file:
import os
from pathlib import Path
from dedalus_labs import Dedalus
client = Dedalus(
api_key = os.environ.get( "DEDALUS_API_KEY" )
)
response = client.audio.speech.create(
model = "tts-1" ,
voice = "alloy" ,
input = "Hello! This is a test of the text-to-speech system."
)
# Save the audio to a file
output_path = Path( "speech.mp3" )
with output_path.open( "wb" ) as f:
for chunk in response.iter_bytes():
f.write(chunk)
print ( f "Audio saved to { output_path } " )
Async usage
Generate speech asynchronously:
import os
import asyncio
from pathlib import Path
from dedalus_labs import AsyncDedalus
client = AsyncDedalus(
api_key = os.environ.get( "DEDALUS_API_KEY" )
)
async def main ():
response = await client.audio.speech.create(
model = "gpt-4o-mini-tts" ,
voice = "nova" ,
input = "Hello from async!"
)
output_path = Path( "async_speech.mp3" )
with output_path.open( "wb" ) as f:
async for chunk in response.iter_bytes():
f.write(chunk)
print ( f "Audio saved to { output_path } " )
asyncio.run(main())
Available models
tts-1
tts-1-hd
gpt-4o-mini-tts
Fast and cost-effective model optimized for real-time applications: response = client.audio.speech.create(
model = "tts-1" ,
voice = "alloy" ,
input = "Your text here"
)
Higher quality model with improved audio fidelity: response = client.audio.speech.create(
model = "tts-1-hd" ,
voice = "alloy" ,
input = "Your text here"
)
Advanced model with instruction-following capabilities: response = client.audio.speech.create(
model = "gpt-4o-mini-tts" ,
voice = "nova" ,
input = "Your text here" ,
instructions = "Speak in a cheerful, upbeat tone"
)
Available voices
Choose from multiple built-in voices with different characteristics:
Alloy
Echo
Fable
Nova
Onyx
Shimmer
# Neutral, balanced voice
response = client.audio.speech.create(
model = "tts-1" ,
voice = "alloy" ,
input = "Your text here"
)
Standard voices:
alloy - Neutral and balanced
echo - Male voice
fable - British accent
onyx - Deep male voice
nova - Female voice
shimmer - Soft female voice
Additional voices:
ash - Clear and articulate
ballad - Storytelling quality
coral - Warm and friendly
sage - Wise and measured
verse - Poetic delivery
marin - Professional tone
cedar - Natural cadence
Custom voices
Use a custom voice by providing a voice ID:
response = client.audio.speech.create(
model = "gpt-4o-mini-tts" ,
voice = { "id" : "voice_1234567890" },
input = "Your text here"
)
Generate audio in different formats:
MP3 (default)
Opus
AAC
FLAC
WAV
PCM
response = client.audio.speech.create(
model = "tts-1" ,
voice = "alloy" ,
input = "Your text here" ,
response_format = "mp3" # Default
)
# Best for internet streaming and low latency
response = client.audio.speech.create(
model = "tts-1" ,
voice = "alloy" ,
input = "Your text here" ,
response_format = "opus"
)
# Good compression for mobile devices
response = client.audio.speech.create(
model = "tts-1" ,
voice = "alloy" ,
input = "Your text here" ,
response_format = "aac"
)
# Lossless audio quality
response = client.audio.speech.create(
model = "tts-1-hd" ,
voice = "alloy" ,
input = "Your text here" ,
response_format = "flac"
)
# Uncompressed, best compatibility
response = client.audio.speech.create(
model = "tts-1" ,
voice = "alloy" ,
input = "Your text here" ,
response_format = "wav"
)
# Raw audio data
response = client.audio.speech.create(
model = "tts-1" ,
voice = "alloy" ,
input = "Your text here" ,
response_format = "pcm"
)
Speech speed
Adjust the playback speed:
response = client.audio.speech.create(
model = "tts-1" ,
voice = "alloy" ,
input = "Your text here" ,
speed = 1.25 # Range: 0.25 to 4.0 (1.0 is default)
)
0.25 - Slowest (1/4 speed)
1.0 - Normal speed (default)
2.0 - Double speed
4.0 - Fastest (4x speed)
Voice instructions
Control voice characteristics with instructions (works with gpt-4o-mini-tts models):
response = client.audio.speech.create(
model = "gpt-4o-mini-tts" ,
voice = "nova" ,
input = "Welcome to our customer service line." ,
instructions = "Speak in a professional, friendly tone with a slight smile in your voice."
)
The instructions parameter does not work with tts-1 or tts-1-hd models. Use gpt-4o-mini-tts or newer models for instruction-following capabilities.
Streaming audio
Stream audio for real-time playback:
from dedalus_labs import Dedalus
client = Dedalus()
response = client.audio.speech.create(
model = "tts-1" ,
voice = "alloy" ,
input = "This is a streaming example."
)
# Stream audio chunks
for chunk in response.iter_bytes( chunk_size = 1024 ):
# Process or play chunk in real-time
pass
Complete example
Generate speech with all options:
from pathlib import Path
from dedalus_labs import Dedalus
client = Dedalus()
long_text = """
Welcome to our automated system.
Please listen carefully as our menu options have changed.
For customer service, press 1.
For technical support, press 2.
For billing inquiries, press 3.
"""
response = client.audio.speech.create(
model = "gpt-4o-mini-tts" ,
voice = "nova" ,
input = long_text,
instructions = "Speak clearly and professionally with appropriate pauses." ,
response_format = "mp3" ,
speed = 0.9 # Slightly slower for clarity
)
# Save to file
output_path = Path( "menu_audio.mp3" )
with output_path.open( "wb" ) as f:
for chunk in response.iter_bytes():
f.write(chunk)
print ( f "Audio saved to { output_path } " )
print ( f "Text length: { len (long_text) } characters" )
Error handling
import dedalus_labs
from pathlib import Path
from dedalus_labs import Dedalus
client = Dedalus()
try :
response = client.audio.speech.create(
model = "tts-1" ,
voice = "alloy" ,
input = "Your text here"
)
Path( "output.mp3" ).write_bytes(response.content)
except dedalus_labs.BadRequestError as e:
print ( f "Invalid request: { e.message } " )
# Common causes: text too long (>4096 chars), invalid voice
except dedalus_labs.APIConnectionError as e:
print ( "Network error occurred" )
print (e.__cause__)
except dedalus_labs.APIStatusError as e:
print ( f "API error: { e.status_code } " )
Text length limits
Maximum text length: 4,096 characters For longer text, split into multiple requests and concatenate the audio files.
Processing long text
from pathlib import Path
from dedalus_labs import Dedalus
import math
client = Dedalus()
def split_text ( text , max_length = 4000 ):
"""Split text into chunks at sentence boundaries."""
sentences = text.split( ". " )
chunks = []
current_chunk = ""
for sentence in sentences:
if len (current_chunk) + len (sentence) < max_length:
current_chunk += sentence + ". "
else :
chunks.append(current_chunk.strip())
current_chunk = sentence + ". "
if current_chunk:
chunks.append(current_chunk.strip())
return chunks
long_text = """[Your very long text here...]"""
chunks = split_text(long_text)
audio_files = []
for i, chunk in enumerate (chunks):
response = client.audio.speech.create(
model = "tts-1" ,
voice = "alloy" ,
input = chunk
)
filename = f "chunk_ { i :03d} .mp3"
Path(filename).write_bytes(response.content)
audio_files.append(filename)
print ( f "Generated { filename } " )
print ( f " \n Created { len (audio_files) } audio files" )
Consider using audio editing tools like ffmpeg to concatenate multiple audio files: ffmpeg -i "concat:chunk_000.mp3|chunk_001.mp3|chunk_002.mp3" -acodec copy output.mp3