TTSKit provides comprehensive configuration through TTSKitConfig.
Basic Configuration
import TTSKit
// Default configuration (auto-loads 0.6B model)
let tts = try await TTSKit ()
// Custom configuration
let config = TTSKitConfig (
model : . qwen3TTS_1_7b ,
verbose : true ,
load : true
)
let tts = try await TTSKit (config)
Model Variants
TTSKit supports two Qwen3 TTS model variants:
public enum TTSModelVariant {
case qwen3TTS_0_6b // ~1 GB download, fast, all platforms
case qwen3TTS_1_7b // ~2.2 GB download, higher quality, macOS only, supports style instructions
}
Selecting a Variant
// Fast model (default)
let config = TTSKitConfig ( model : . qwen3TTS_0_6b )
let tts = try await TTSKit (config)
// High-quality model
let config = TTSKitConfig ( model : . qwen3TTS_1_7b )
let tts = try await TTSKit (config)
TTSKit automatically selects the best model for the current platform:
let recommended = TTSKit. recommendedModels ()
// Returns .qwen3TTS_0_6b on all platforms
Compute Units
Configure which hardware accelerators each model component uses:
var config = TTSKitConfig ()
config. computeOptions = ComputeOptions (
embedderComputeUnits : . cpuOnly , // Embeddings run on CPU
codeDecoderComputeUnits : . cpuAndNeuralEngine , // Decoder on CPU + ANE
multiCodeDecoderComputeUnits : . cpuAndNeuralEngine , // Multi-decoder on CPU + ANE
speechDecoderComputeUnits : . cpuAndNeuralEngine // Speech decoder on CPU + ANE
)
let tts = try await TTSKit (config)
Available Compute Units
Run on CPU only. Lower power consumption, slower inference.
Run on CPU and Neural Engine (ANE). Best performance on Apple Silicon.
Run on CPU and GPU. Useful when ANE is unavailable or saturated.
Use all available compute units (CPU, GPU, ANE).
Recommended Settings
// Default (recommended for most use cases)
let options = ComputeOptions (
embedderComputeUnits : . cpuOnly , // Simple table lookups
codeDecoderComputeUnits : . cpuAndNeuralEngine , // Heavy computation
multiCodeDecoderComputeUnits : . cpuAndNeuralEngine , // Heavy computation
speechDecoderComputeUnits : . cpuAndNeuralEngine // Heavy computation
)
// Low power (slower but more energy efficient)
let lowPowerOptions = ComputeOptions (
embedderComputeUnits : . cpuOnly ,
codeDecoderComputeUnits : . cpuOnly ,
multiCodeDecoderComputeUnits : . cpuOnly ,
speechDecoderComputeUnits : . cpuOnly
)
Model Loading
Control when and how models are loaded:
// Auto-load on init (default)
let tts = try await TTSKit ()
// Defer loading
let config = TTSKitConfig ( load : false )
let tts = try await TTSKit (config)
try await tts. loadModels ()
// Prewarm: compile models sequentially to cap peak memory
let config = TTSKitConfig ( prewarm : true , load : false )
let tts = try await TTSKit (config)
try await tts. loadModels ()
Prewarm vs Load
Prewarm Mode: Compile models one at a time and discard weights to limit peak memory.let config = TTSKitConfig ( prewarm : true , load : false )
let tts = try await TTSKit (config)
Use on first launch or after a model update to serialize CoreML compilation. Load Mode: Load all models concurrently (default).let config = TTSKitConfig ( load : true )
let tts = try await TTSKit (config)
Faster than prewarm after models are already compiled (cached .mlmodelc bundles).
Model Repository
By default, models are downloaded from the official HuggingFace repo :
let config = TTSKitConfig (
model : . qwen3TTS_0_6b ,
modelRepo : "argmaxinc/ttskit-coreml" ,
download : true
)
let tts = try await TTSKit (config)
Custom Model Repository
You can point to a custom HuggingFace repo:
let config = TTSKitConfig (
model : . qwen3TTS_0_6b ,
modelRepo : "username/custom-ttskit-models" ,
modelToken : "hf_..." // Optional HuggingFace token for private repos
)
let tts = try await TTSKit (config)
Local Models
Skip download and use a local model folder:
let localFolder = URL ( fileURLWithPath : "/path/to/models" )
let config = TTSKitConfig (
modelFolder : localFolder,
download : false
)
let tts = try await TTSKit (config)
Model Discovery
Fetch available models from HuggingFace:
let models = try await TTSKit. fetchAvailableModels (
from : "argmaxinc/ttskit-coreml" ,
matching : [ "*" ] // Glob patterns
)
print ( "Available models: \( models ) " )
Download a Model
Explicitly download a model without initializing TTSKit:
let folder = try await TTSKit. download (
variant : . qwen3TTS_0_6b ,
from : "argmaxinc/ttskit-coreml"
) { progress in
print ( "Download progress: \( Int (progress. fractionCompleted * 100 ) ) %" )
}
print ( "Model cached at: \( folder. path ) " )
Seed for Reproducibility
Set a seed for deterministic generation:
let config = TTSKitConfig ( seed : 42 )
let tts = try await TTSKit (config)
let result1 = try await tts. generate ( text : "Hello" )
let result2 = try await tts. generate ( text : "Hello" )
// result1.audio == result2.audio (same seed, same input)
The seed is XORed with a per-task counter to ensure different samples across calls while maintaining reproducibility within a session.
Logging
Enable verbose logging for debugging:
var config = TTSKitConfig ()
config. verbose = true
config. logLevel = . debug // .debug, .info, .warning, .error
let tts = try await TTSKit (config)
Custom Log Handler
Redirect logs to your own handler:
tts. loggingCallback { level, message in
print ( "[ \( level ) ] \( message ) " )
}
// Restore default print-based logger
tts. loggingCallback ( nil )
Component Overrides
Replace any model component with a custom implementation:
var config = TTSKitConfig ( load : false )
let tts = try await TTSKit (config)
// Swap the code decoder
tts. codeDecoder = MyCustomCodeDecoder ()
// Load models with the custom component
try await tts. loadModels ()
Available Components
public class TTSKit {
public var textProjector: any TextProjecting
public var codeEmbedder: any CodeEmbedding
public var multiCodeEmbedder: any MultiCodeEmbedding
public var codeDecoder: any CodeDecoding
public var multiCodeDecoder: any MultiCodeDecoding
public var speechDecoder: any SpeechDecoding
public var tokenizer: ( any Tokenizer) ?
}
Each component can be swapped independently. See the protocol definitions in the source code for implementation details.
Convenience Initializer
TTSKit provides a convenience initializer that exposes all config fields as parameters:
let tts = try await TTSKit (
model : . qwen3TTS_1_7b ,
modelFolder : nil ,
downloadBase : nil ,
modelRepo : "argmaxinc/ttskit-coreml" ,
tokenizerFolder : nil ,
modelToken : nil ,
computeOptions : ComputeOptions (),
textProjector : nil ,
codeEmbedder : nil ,
multiCodeEmbedder : nil ,
codeDecoder : nil ,
multiCodeDecoder : nil ,
speechDecoder : nil ,
verbose : true ,
logLevel : . debug ,
prewarm : nil ,
load : true ,
download : true ,
useBackgroundDownloadSession : false ,
seed : nil
)
Background Downloads
Use a background URLSession for model downloads:
var config = TTSKitConfig ()
config. useBackgroundDownloadSession = true
let tts = try await TTSKit (config)
Useful for downloading large models without blocking the app.
Model State
Track the model lifecycle with modelState:
public enum ModelState {
case unloaded
case downloading
case downloaded
case loading
case loaded
case prewarming
case prewarmed
case unloading
}
// Monitor state changes
let tts = try await TTSKit ()
tts. modelStateCallback = { oldState, newState in
print ( "Model state: \( oldState ) -> \( newState ) " )
}
Memory Management
Unload models to free memory:
// Load models
let tts = try await TTSKit ()
// Generate speech
let result = try await tts. generate ( text : "Hello" )
// Unload models to free ~1-2 GB
await tts. unloadModels ()
// Reload when needed
try await tts. loadModels ()
Timings
Access detailed timing information:
let tts = try await TTSKit ()
print ( "Model load time: \( tts. modelLoadTime ) s" )
print ( "Tokenizer load time: \( tts. tokenizerLoadTime ) s" )
let result = try await tts. generate ( text : "Hello" )
print ( "Full pipeline: \( result. timings . fullPipeline ) s" )
print ( "Time to first buffer: \( result. timings . timeToFirstBuffer ) s" )
print ( "Total decoding loops: \( result. timings . totalDecodingLoops ) " )
Reset State
Clear timing statistics between runs:
Example: Full Configuration
Here’s a complete example with all configuration options:
import TTSKit
import CoreML
Task {
var config = TTSKitConfig ()
// Model selection
config. model = . qwen3TTS_1_7b
config. modelRepo = "argmaxinc/ttskit-coreml"
config. download = true
// Compute units
config. computeOptions = ComputeOptions (
embedderComputeUnits : . cpuOnly ,
codeDecoderComputeUnits : . cpuAndNeuralEngine ,
multiCodeDecoderComputeUnits : . cpuAndNeuralEngine ,
speechDecoderComputeUnits : . cpuAndNeuralEngine
)
// Loading behavior
config. load = true
config. prewarm = nil // Skip prewarm if models are already compiled
// Logging
config. verbose = true
config. logLevel = . info
// Seed for reproducibility
config. seed = 42
// Initialize TTSKit
let tts = try await TTSKit (config)
// Monitor state changes
tts. modelStateCallback = { oldState, newState in
print ( "Model state: \( oldState ) -> \( newState ) " )
}
// Generate speech
var options = GenerationOptions ()
options. temperature = 0.9
options. instruction = "Speak warmly and slowly"
let result = try await tts. generate (
text : "Hello from TTSKit!" ,
speaker : . ryan ,
language : . english ,
options : options
)
print ( "Generated \( result. audioDuration ) s of audio" )
print ( "Model load: \( tts. modelLoadTime ) s" )
print ( "Pipeline: \( result. timings . fullPipeline ) s" )
}
Next Steps
Overview Return to TTSKit overview
Generation Learn about generation options