Skip to main content
Moonshine Voice uses several data structures to represent transcription results, intent matches, and model configuration.

TranscriptLine

Represents a single segment of speech (roughly equivalent to a phrase or sentence).
from moonshine_voice import TranscriptLine

Fields

text
str
UTF-8 encoded transcription text
start_time
float
Time offset in seconds from session start when speech began
duration
float
Duration of the speech segment in seconds
line_id
int
Unique 64-bit identifier for this line (stable across updates)
is_complete
bool
True when the segment is finalized, False while still being spoken
is_updated
bool
True if any field changed since the last transcript update
is_new
bool
True if this line was just added in the latest update
has_text_changed
bool
True if the text field specifically changed (subset of is_updated)
has_speaker_id
bool
True if speaker identification has completed for this line
speaker_id
int
Unique 64-bit identifier for the speaker (stable across sessions for same voice)
speaker_index
int
Sequential speaker number (0, 1, 2…) in order of first appearance
audio_data
List[float]
Raw 16kHz mono PCM audio data for this segment as floats (-1.0 to 1.0). None if return_audio_data option is disabled.

Example

for line in transcript.lines:
    print(f"[{line.start_time:.2f}s] {line.text}")
    if line.has_speaker_id:
        print(f"  Speaker: {line.speaker_index}")
    print(f"  Duration: {line.duration:.2f}s")
    print(f"  Complete: {line.is_complete}")

Transcript

Container for a list of TranscriptLine objects, representing a complete transcription session.
from moonshine_voice import Transcript

Fields

lines
List[TranscriptLine]
List of transcript lines in chronological order

Methods

# Get full text
full_text = " ".join(line.text for line in transcript.lines)

# Filter completed lines only
completed = [line for line in transcript.lines if line.is_complete]

# Get lines from specific speaker
speaker_0_lines = [
    line for line in transcript.lines 
    if line.has_speaker_id and line.speaker_index == 0
]

Example

transcript = transcriber.update_transcription()

print(f"Total lines: {len(transcript.lines)}")
for i, line in enumerate(transcript.lines):
    status = "[FINAL]" if line.is_complete else "[ACTIVE]"
    print(f"{i+1}. {status} {line.text}")

IntentMatch

Represents a matched voice command.
from moonshine_voice import IntentMatch

Fields

trigger_phrase
str
The registered command phrase that was matched
utterance
str
The actual user’s words that triggered the match
similarity
float
Confidence score between 0.0 and 1.0 indicating how well the utterance matches the intent

Example

def on_intent(trigger, utterance, similarity):
    match = IntentMatch(
        trigger_phrase=trigger,
        utterance=utterance,
        similarity=similarity
    )
    print(f"Matched '{match.trigger_phrase}'")
    print(f"User said: '{match.utterance}'")
    print(f"Confidence: {match.similarity:.0%}")

ModelArch

Enum for ASR model architectures.
from moonshine_voice import ModelArch

Values

TINY
int
26M parameters, smallest model, fastest inference
BASE
int
58M parameters, balanced accuracy/speed
TINY_STREAMING
int
34M parameters, supports streaming with caching
SMALL_STREAMING
int
123M parameters, high accuracy streaming
MEDIUM_STREAMING
int
245M parameters, highest accuracy (better than Whisper Large V3)

Example

from moonshine_voice import Transcriber, ModelArch

# Use the tiny model for fastest processing
transcriber = Transcriber(
    model_path="/path/to/tiny/models",
    model_arch=ModelArch.TINY
)

# Use streaming model for live audio
streaming_transcriber = Transcriber(
    model_path="/path/to/streaming/models",
    model_arch=ModelArch.SMALL_STREAMING
)

EmbeddingModelArch

Enum for embedding model architectures used in intent recognition.
from moonshine_voice import EmbeddingModelArch

Values

GEMMA_300M
int
Gemma-based 300M parameter sentence embedding model

Example

from moonshine_voice import IntentRecognizer, EmbeddingModelArch

recognizer = IntentRecognizer(
    model_path="/path/to/embedding/models",
    model_arch=EmbeddingModelArch.GEMMA_300M,
    model_variant="q4"
)

Helper Functions

load_wav_file

Load a WAV file into audio data and sample rate.
from moonshine_voice import load_wav_file

audio_data, sample_rate = load_wav_file("speech.wav")
filepath
str
required
Path to WAV file
Returns: Tuple of (audio_data: List[float], sample_rate: int)

get_model_for_language

Download and get path to ASR model for a language.
from moonshine_voice import get_model_for_language

model_path, model_arch = get_model_for_language(
    language: str,
    model_arch: int = None
)
language
str
required
Language code: “en”, “es”, “ar”, “ja”, “ko”, “zh”, “uk”, “vi”
model_arch
int
default:"None"
Specific model architecture (0-5). If None, uses highest quality available.
Returns: Tuple of (model_path: str, model_arch: int)

get_embedding_model

Download and get path to embedding model for intent recognition.
from moonshine_voice import get_embedding_model

model_path, model_arch = get_embedding_model(
    model_name: str = "gemma-300m",
    variant: str = "q4"
)
model_name
str
default:"gemma-300m"
Embedding model name. Currently only “gemma-300m” is supported.
variant
str
default:"q4"
Model quantization: “fp32”, “fp16”, “q8”, “q4”, “q4f16”
Returns: Tuple of (model_path: str, model_arch: int)

supported_languages

Get list of supported language codes.
from moonshine_voice import supported_languages

languages = supported_languages()  # Returns: ['en', 'es', 'ar', 'ja', 'ko', 'zh', 'uk', 'vi']
Returns: List of language codes

supported_languages_friendly

Get dictionary of language codes to full names.
from moonshine_voice import supported_languages_friendly

langs = supported_languages_friendly()
# Returns: {'en': 'English', 'es': 'Spanish', 'ar': 'Arabic', ...}
Returns: Dictionary mapping language codes to display names

model_arch_to_string

Convert ModelArch enum to string name.
from moonshine_voice import ModelArch, model_arch_to_string

name = model_arch_to_string(ModelArch.BASE)  # Returns: "base"

string_to_model_arch

Convert string name to ModelArch enum.
from moonshine_voice import string_to_model_arch

arch = string_to_model_arch("base")  # Returns: ModelArch.BASE

get_assets_path

Get path to bundled assets directory.
from moonshine_voice import get_assets_path

path = get_assets_path()
Returns: Path to assets directory

get_model_path

Get standard model cache path.
from moonshine_voice import get_model_path

path = get_model_path()
Returns: Path to model cache directory

Error Classes

MoonshineError

Base exception class for all Moonshine errors.
from moonshine_voice import MoonshineError

try:
    transcriber = Transcriber(model_path="invalid/path")
except MoonshineError as e:
    print(f"Error: {e}")

MoonshineUnknownError

Raised when an unknown error occurs.

MoonshineInvalidHandleError

Raised when an invalid transcriber or recognizer handle is used.

MoonshineInvalidArgumentError

Raised when invalid arguments are passed to a function.

See Also

Build docs developers (and LLMs) love