Skip to main content
TTSKit provides comprehensive configuration through TTSKitConfig.

Basic Configuration

import TTSKit

// Default configuration (auto-loads 0.6B model)
let tts = try await TTSKit()

// Custom configuration
let config = TTSKitConfig(
    model: .qwen3TTS_1_7b,
    verbose: true,
    load: true
)
let tts = try await TTSKit(config)

Model Variants

TTSKit supports two Qwen3 TTS model variants:
public enum TTSModelVariant {
    case qwen3TTS_0_6b  // ~1 GB download, fast, all platforms
    case qwen3TTS_1_7b  // ~2.2 GB download, higher quality, macOS only, supports style instructions
}

Selecting a Variant

// Fast model (default)
let config = TTSKitConfig(model: .qwen3TTS_0_6b)
let tts = try await TTSKit(config)

// High-quality model
let config = TTSKitConfig(model: .qwen3TTS_1_7b)
let tts = try await TTSKit(config)

Platform Defaults

TTSKit automatically selects the best model for the current platform:
let recommended = TTSKit.recommendedModels()
// Returns .qwen3TTS_0_6b on all platforms

Compute Units

Configure which hardware accelerators each model component uses:
var config = TTSKitConfig()
config.computeOptions = ComputeOptions(
    embedderComputeUnits: .cpuOnly,              // Embeddings run on CPU
    codeDecoderComputeUnits: .cpuAndNeuralEngine,      // Decoder on CPU + ANE
    multiCodeDecoderComputeUnits: .cpuAndNeuralEngine, // Multi-decoder on CPU + ANE
    speechDecoderComputeUnits: .cpuAndNeuralEngine     // Speech decoder on CPU + ANE
)

let tts = try await TTSKit(config)

Available Compute Units

cpuOnly
MLComputeUnits
Run on CPU only. Lower power consumption, slower inference.
cpuAndNeuralEngine
MLComputeUnits
Run on CPU and Neural Engine (ANE). Best performance on Apple Silicon.
cpuAndGPU
MLComputeUnits
Run on CPU and GPU. Useful when ANE is unavailable or saturated.
all
MLComputeUnits
Use all available compute units (CPU, GPU, ANE).
// Default (recommended for most use cases)
let options = ComputeOptions(
    embedderComputeUnits: .cpuOnly,              // Simple table lookups
    codeDecoderComputeUnits: .cpuAndNeuralEngine,      // Heavy computation
    multiCodeDecoderComputeUnits: .cpuAndNeuralEngine, // Heavy computation
    speechDecoderComputeUnits: .cpuAndNeuralEngine     // Heavy computation
)

// Low power (slower but more energy efficient)
let lowPowerOptions = ComputeOptions(
    embedderComputeUnits: .cpuOnly,
    codeDecoderComputeUnits: .cpuOnly,
    multiCodeDecoderComputeUnits: .cpuOnly,
    speechDecoderComputeUnits: .cpuOnly
)

Model Loading

Control when and how models are loaded:
// Auto-load on init (default)
let tts = try await TTSKit()

// Defer loading
let config = TTSKitConfig(load: false)
let tts = try await TTSKit(config)
try await tts.loadModels()

// Prewarm: compile models sequentially to cap peak memory
let config = TTSKitConfig(prewarm: true, load: false)
let tts = try await TTSKit(config)
try await tts.loadModels()

Prewarm vs Load

Prewarm Mode: Compile models one at a time and discard weights to limit peak memory.
let config = TTSKitConfig(prewarm: true, load: false)
let tts = try await TTSKit(config)
Use on first launch or after a model update to serialize CoreML compilation.

Model Repository

By default, models are downloaded from the official HuggingFace repo:
let config = TTSKitConfig(
    model: .qwen3TTS_0_6b,
    modelRepo: "argmaxinc/ttskit-coreml",
    download: true
)
let tts = try await TTSKit(config)

Custom Model Repository

You can point to a custom HuggingFace repo:
let config = TTSKitConfig(
    model: .qwen3TTS_0_6b,
    modelRepo: "username/custom-ttskit-models",
    modelToken: "hf_..."  // Optional HuggingFace token for private repos
)
let tts = try await TTSKit(config)

Local Models

Skip download and use a local model folder:
let localFolder = URL(fileURLWithPath: "/path/to/models")
let config = TTSKitConfig(
    modelFolder: localFolder,
    download: false
)
let tts = try await TTSKit(config)

Model Discovery

Fetch available models from HuggingFace:
let models = try await TTSKit.fetchAvailableModels(
    from: "argmaxinc/ttskit-coreml",
    matching: ["*"]  // Glob patterns
)
print("Available models: \(models)")

Download a Model

Explicitly download a model without initializing TTSKit:
let folder = try await TTSKit.download(
    variant: .qwen3TTS_0_6b,
    from: "argmaxinc/ttskit-coreml"
) { progress in
    print("Download progress: \(Int(progress.fractionCompleted * 100))%")
}
print("Model cached at: \(folder.path)")

Seed for Reproducibility

Set a seed for deterministic generation:
let config = TTSKitConfig(seed: 42)
let tts = try await TTSKit(config)

let result1 = try await tts.generate(text: "Hello")
let result2 = try await tts.generate(text: "Hello")
// result1.audio == result2.audio (same seed, same input)
The seed is XORed with a per-task counter to ensure different samples across calls while maintaining reproducibility within a session.

Logging

Enable verbose logging for debugging:
var config = TTSKitConfig()
config.verbose = true
config.logLevel = .debug  // .debug, .info, .warning, .error

let tts = try await TTSKit(config)

Custom Log Handler

Redirect logs to your own handler:
tts.loggingCallback { level, message in
    print("[\(level)] \(message)")
}

// Restore default print-based logger
tts.loggingCallback(nil)

Component Overrides

Replace any model component with a custom implementation:
var config = TTSKitConfig(load: false)
let tts = try await TTSKit(config)

// Swap the code decoder
tts.codeDecoder = MyCustomCodeDecoder()

// Load models with the custom component
try await tts.loadModels()

Available Components

public class TTSKit {
    public var textProjector: any TextProjecting
    public var codeEmbedder: any CodeEmbedding
    public var multiCodeEmbedder: any MultiCodeEmbedding
    public var codeDecoder: any CodeDecoding
    public var multiCodeDecoder: any MultiCodeDecoding
    public var speechDecoder: any SpeechDecoding
    public var tokenizer: (any Tokenizer)?
}
Each component can be swapped independently. See the protocol definitions in the source code for implementation details.

Convenience Initializer

TTSKit provides a convenience initializer that exposes all config fields as parameters:
let tts = try await TTSKit(
    model: .qwen3TTS_1_7b,
    modelFolder: nil,
    downloadBase: nil,
    modelRepo: "argmaxinc/ttskit-coreml",
    tokenizerFolder: nil,
    modelToken: nil,
    computeOptions: ComputeOptions(),
    textProjector: nil,
    codeEmbedder: nil,
    multiCodeEmbedder: nil,
    codeDecoder: nil,
    multiCodeDecoder: nil,
    speechDecoder: nil,
    verbose: true,
    logLevel: .debug,
    prewarm: nil,
    load: true,
    download: true,
    useBackgroundDownloadSession: false,
    seed: nil
)

Background Downloads

Use a background URLSession for model downloads:
var config = TTSKitConfig()
config.useBackgroundDownloadSession = true
let tts = try await TTSKit(config)
Useful for downloading large models without blocking the app.

Model State

Track the model lifecycle with modelState:
public enum ModelState {
    case unloaded
    case downloading
    case downloaded
    case loading
    case loaded
    case prewarming
    case prewarmed
    case unloading
}

// Monitor state changes
let tts = try await TTSKit()
tts.modelStateCallback = { oldState, newState in
    print("Model state: \(oldState) -> \(newState)")
}

Memory Management

Unload models to free memory:
// Load models
let tts = try await TTSKit()

// Generate speech
let result = try await tts.generate(text: "Hello")

// Unload models to free ~1-2 GB
await tts.unloadModels()

// Reload when needed
try await tts.loadModels()

Timings

Access detailed timing information:
let tts = try await TTSKit()

print("Model load time: \(tts.modelLoadTime)s")
print("Tokenizer load time: \(tts.tokenizerLoadTime)s")

let result = try await tts.generate(text: "Hello")
print("Full pipeline: \(result.timings.fullPipeline)s")
print("Time to first buffer: \(result.timings.timeToFirstBuffer)s")
print("Total decoding loops: \(result.timings.totalDecodingLoops)")

Reset State

Clear timing statistics between runs:
tts.clearState()

Example: Full Configuration

Here’s a complete example with all configuration options:
import TTSKit
import CoreML

Task {
    var config = TTSKitConfig()
    
    // Model selection
    config.model = .qwen3TTS_1_7b
    config.modelRepo = "argmaxinc/ttskit-coreml"
    config.download = true
    
    // Compute units
    config.computeOptions = ComputeOptions(
        embedderComputeUnits: .cpuOnly,
        codeDecoderComputeUnits: .cpuAndNeuralEngine,
        multiCodeDecoderComputeUnits: .cpuAndNeuralEngine,
        speechDecoderComputeUnits: .cpuAndNeuralEngine
    )
    
    // Loading behavior
    config.load = true
    config.prewarm = nil  // Skip prewarm if models are already compiled
    
    // Logging
    config.verbose = true
    config.logLevel = .info
    
    // Seed for reproducibility
    config.seed = 42
    
    // Initialize TTSKit
    let tts = try await TTSKit(config)
    
    // Monitor state changes
    tts.modelStateCallback = { oldState, newState in
        print("Model state: \(oldState) -> \(newState)")
    }
    
    // Generate speech
    var options = GenerationOptions()
    options.temperature = 0.9
    options.instruction = "Speak warmly and slowly"
    
    let result = try await tts.generate(
        text: "Hello from TTSKit!",
        speaker: .ryan,
        language: .english,
        options: options
    )
    
    print("Generated \(result.audioDuration)s of audio")
    print("Model load: \(tts.modelLoadTime)s")
    print("Pipeline: \(result.timings.fullPipeline)s")
}

Next Steps

Overview

Return to TTSKit overview

Generation

Learn about generation options

Build docs developers (and LLMs) love