Skip to main content
Convert text into natural-sounding speech audio using OpenAI’s text-to-speech models.

Basic usage

Generate speech from text and save to a file:
import os
from pathlib import Path
from dedalus_labs import Dedalus

client = Dedalus(
    api_key=os.environ.get("DEDALUS_API_KEY")
)

response = client.audio.speech.create(
    model="tts-1",
    voice="alloy",
    input="Hello! This is a test of the text-to-speech system."
)

# Save the audio to a file
output_path = Path("speech.mp3")
with output_path.open("wb") as f:
    for chunk in response.iter_bytes():
        f.write(chunk)

print(f"Audio saved to {output_path}")

Async usage

Generate speech asynchronously:
import os
import asyncio
from pathlib import Path
from dedalus_labs import AsyncDedalus

client = AsyncDedalus(
    api_key=os.environ.get("DEDALUS_API_KEY")
)

async def main():
    response = await client.audio.speech.create(
        model="gpt-4o-mini-tts",
        voice="nova",
        input="Hello from async!"
    )
    
    output_path = Path("async_speech.mp3")
    with output_path.open("wb") as f:
        async for chunk in response.iter_bytes():
            f.write(chunk)
    
    print(f"Audio saved to {output_path}")

asyncio.run(main())

Available models

Fast and cost-effective model optimized for real-time applications:
response = client.audio.speech.create(
    model="tts-1",
    voice="alloy",
    input="Your text here"
)

Available voices

Choose from multiple built-in voices with different characteristics:
# Neutral, balanced voice
response = client.audio.speech.create(
    model="tts-1",
    voice="alloy",
    input="Your text here"
)
Standard voices:
  • alloy - Neutral and balanced
  • echo - Male voice
  • fable - British accent
  • onyx - Deep male voice
  • nova - Female voice
  • shimmer - Soft female voice
Additional voices:
  • ash - Clear and articulate
  • ballad - Storytelling quality
  • coral - Warm and friendly
  • sage - Wise and measured
  • verse - Poetic delivery
  • marin - Professional tone
  • cedar - Natural cadence

Custom voices

Use a custom voice by providing a voice ID:
response = client.audio.speech.create(
    model="gpt-4o-mini-tts",
    voice={"id": "voice_1234567890"},
    input="Your text here"
)

Output formats

Generate audio in different formats:
response = client.audio.speech.create(
    model="tts-1",
    voice="alloy",
    input="Your text here",
    response_format="mp3"  # Default
)

Speech speed

Adjust the playback speed:
response = client.audio.speech.create(
    model="tts-1",
    voice="alloy",
    input="Your text here",
    speed=1.25  # Range: 0.25 to 4.0 (1.0 is default)
)
  • 0.25 - Slowest (1/4 speed)
  • 1.0 - Normal speed (default)
  • 2.0 - Double speed
  • 4.0 - Fastest (4x speed)

Voice instructions

Control voice characteristics with instructions (works with gpt-4o-mini-tts models):
response = client.audio.speech.create(
    model="gpt-4o-mini-tts",
    voice="nova",
    input="Welcome to our customer service line.",
    instructions="Speak in a professional, friendly tone with a slight smile in your voice."
)
The instructions parameter does not work with tts-1 or tts-1-hd models. Use gpt-4o-mini-tts or newer models for instruction-following capabilities.

Streaming audio

Stream audio for real-time playback:
from dedalus_labs import Dedalus

client = Dedalus()

response = client.audio.speech.create(
    model="tts-1",
    voice="alloy",
    input="This is a streaming example."
)

# Stream audio chunks
for chunk in response.iter_bytes(chunk_size=1024):
    # Process or play chunk in real-time
    pass

Complete example

Generate speech with all options:
from pathlib import Path
from dedalus_labs import Dedalus

client = Dedalus()

long_text = """
Welcome to our automated system. 
Please listen carefully as our menu options have changed.
For customer service, press 1.
For technical support, press 2.
For billing inquiries, press 3.
"""

response = client.audio.speech.create(
    model="gpt-4o-mini-tts",
    voice="nova",
    input=long_text,
    instructions="Speak clearly and professionally with appropriate pauses.",
    response_format="mp3",
    speed=0.9  # Slightly slower for clarity
)

# Save to file
output_path = Path("menu_audio.mp3")
with output_path.open("wb") as f:
    for chunk in response.iter_bytes():
        f.write(chunk)

print(f"Audio saved to {output_path}")
print(f"Text length: {len(long_text)} characters")

Error handling

import dedalus_labs
from pathlib import Path
from dedalus_labs import Dedalus

client = Dedalus()

try:
    response = client.audio.speech.create(
        model="tts-1",
        voice="alloy",
        input="Your text here"
    )
    
    Path("output.mp3").write_bytes(response.content)
    
except dedalus_labs.BadRequestError as e:
    print(f"Invalid request: {e.message}")
    # Common causes: text too long (>4096 chars), invalid voice
except dedalus_labs.APIConnectionError as e:
    print("Network error occurred")
    print(e.__cause__)
except dedalus_labs.APIStatusError as e:
    print(f"API error: {e.status_code}")

Text length limits

Maximum text length: 4,096 charactersFor longer text, split into multiple requests and concatenate the audio files.

Processing long text

from pathlib import Path
from dedalus_labs import Dedalus
import math

client = Dedalus()

def split_text(text, max_length=4000):
    """Split text into chunks at sentence boundaries."""
    sentences = text.split(". ")
    chunks = []
    current_chunk = ""
    
    for sentence in sentences:
        if len(current_chunk) + len(sentence) < max_length:
            current_chunk += sentence + ". "
        else:
            chunks.append(current_chunk.strip())
            current_chunk = sentence + ". "
    
    if current_chunk:
        chunks.append(current_chunk.strip())
    
    return chunks

long_text = """[Your very long text here...]"""

chunks = split_text(long_text)
audio_files = []

for i, chunk in enumerate(chunks):
    response = client.audio.speech.create(
        model="tts-1",
        voice="alloy",
        input=chunk
    )
    
    filename = f"chunk_{i:03d}.mp3"
    Path(filename).write_bytes(response.content)
    audio_files.append(filename)
    print(f"Generated {filename}")

print(f"\nCreated {len(audio_files)} audio files")
Consider using audio editing tools like ffmpeg to concatenate multiple audio files:
ffmpeg -i "concat:chunk_000.mp3|chunk_001.mp3|chunk_002.mp3" -acodec copy output.mp3

Build docs developers (and LLMs) love