Skip to main content

Overview

The embeddings API generates dense vector representations for:
  • Text (transformer models)
  • Images (vision encoders)
  • Audio (acoustic encoders)
Embeddings enable semantic search, retrieval-augmented generation (RAG), and similarity comparison.

cactus_embed

Generate text embeddings.
int cactus_embed(
    cactus_model_t model,
    const char* text,
    float* embeddings_buffer,
    size_t buffer_size,
    size_t* embedding_dim,
    bool normalize
);
model
cactus_model_t
required
Embedding model handle from cactus_init
text
string
required
Input text to embed
embeddings_buffer
float*
required
Output buffer for embedding vector
buffer_size
size_t
required
Size of buffer in bytes
embedding_dim
size_t*
Output parameter: actual embedding dimension
normalize
bool
required
Whether to L2-normalize the output vector
return
int
Number of float values written on success, -1 on error, -2 if buffer too small

cactus_image_embed

Generate image embeddings.
int cactus_image_embed(
    cactus_model_t model,
    const char* image_path,
    float* embeddings_buffer,
    size_t buffer_size,
    size_t* embedding_dim
);
model
cactus_model_t
required
Vision model handle (e.g., Siglip2)
image_path
string
required
Path to image file
embeddings_buffer
float*
required
Output buffer for embedding vector
buffer_size
size_t
required
Size of buffer in bytes
embedding_dim
size_t*
Output parameter: actual embedding dimension
return
int
Number of float values written on success, -1 on error, -2 if buffer too small

cactus_audio_embed

Generate audio embeddings.
int cactus_audio_embed(
    cactus_model_t model,
    const char* audio_path,
    float* embeddings_buffer,
    size_t buffer_size,
    size_t* embedding_dim
);
model
cactus_model_t
required
ASR model handle (Whisper, Parakeet)
audio_path
string
required
Path to WAV file
embeddings_buffer
float*
required
Output buffer for embedding vector
buffer_size
size_t
required
Size of buffer in bytes
embedding_dim
size_t*
Output parameter: actual embedding dimension
return
int
Number of float values written on success, -1 on error, -2 if buffer too small

Example: Text Embeddings

#include "cactus_ffi.h"
#include <stdio.h>
#include <stdlib.h>

int main() {
    // Initialize embedding model
    cactus_model_t model = cactus_init("/path/to/nomic-embed", NULL, false);
    if (!model) {
        fprintf(stderr, "Failed to load model\n");
        return 1;
    }
    
    // Allocate buffer (typical dimension: 768 or 1024)
    float embeddings[1024];
    size_t actual_dim = 0;
    
    int result = cactus_embed(
        model,
        "The quick brown fox jumps over the lazy dog",
        embeddings,
        sizeof(embeddings),
        &actual_dim,
        true  // normalize
    );
    
    if (result > 0) {
        printf("Embedding dimension: %zu\n", actual_dim);
        printf("First 5 values: ");
        for (size_t i = 0; i < 5 && i < actual_dim; i++) {
            printf("%.4f ", embeddings[i]);
        }
        printf("\n");
    } else if (result == -2) {
        printf("Buffer too small, need %zu bytes\n", actual_dim * sizeof(float));
    } else {
        printf("Error: %s\n", cactus_get_last_error());
    }
    
    cactus_destroy(model);
    return 0;
}

Example: Image Embeddings

#include "cactus_ffi.h"
#include <math.h>

float cosine_similarity(const float* a, const float* b, size_t dim) {
    float dot = 0.0f, norm_a = 0.0f, norm_b = 0.0f;
    for (size_t i = 0; i < dim; i++) {
        dot += a[i] * b[i];
        norm_a += a[i] * a[i];
        norm_b += b[i] * b[i];
    }
    return dot / (sqrtf(norm_a) * sqrtf(norm_b));
}

int main() {
    cactus_model_t model = cactus_init("/path/to/siglip2", NULL, false);
    
    float embed1[768], embed2[768];
    size_t dim = 0;
    
    cactus_image_embed(model, "image1.jpg", embed1, sizeof(embed1), &dim);
    cactus_image_embed(model, "image2.jpg", embed2, sizeof(embed2), &dim);
    
    float similarity = cosine_similarity(embed1, embed2, dim);
    printf("Image similarity: %.4f\n", similarity);
    
    cactus_destroy(model);
}

Example: Audio Embeddings

#include "cactus_ffi.h"

int main() {
    cactus_model_t model = cactus_init("/path/to/whisper", NULL, false);
    
    float embeddings[1500];  // Whisper encoder output
    size_t dim = 0;
    
    int result = cactus_audio_embed(
        model,
        "speech.wav",
        embeddings,
        sizeof(embeddings),
        &dim
    );
    
    if (result > 0) {
        printf("Audio embedding extracted: %zu dimensions\n", dim);
        // Use embeddings for similarity search, classification, etc.
    }
    
    cactus_destroy(model);
}

Model-Specific Dimensions

ModelEmbedding DimensionNotes
nomic-embed-text768Text embeddings, normalized
Qwen2-1.5B1536Last hidden state
Siglip21152Vision encoder output
Whisper-small768Encoder features
Parakeet-TDT512Acoustic features

Normalization

For similarity search and RAG, always set normalize=true for text embeddings. This ensures cosine similarity can be computed efficiently using dot products:
float similarity = 0.0f;
for (size_t i = 0; i < dim; i++) {
    similarity += embed_a[i] * embed_b[i];
}

Integration with Vector Index

Embeddings are typically stored in a vector index for fast retrieval:
// Generate embeddings
float embed1[768], embed2[768];
size_t dim = 0;
cactus_embed(model, "document 1", embed1, sizeof(embed1), &dim, true);
cactus_embed(model, "document 2", embed2, sizeof(embed2), &dim, true);

// Create index
cactus_index_t index = cactus_index_init("/path/to/index", dim);

// Add documents
int ids[] = {1, 2};
const char* docs[] = {"document 1", "document 2"};
const float* embeds[] = {embed1, embed2};
cactus_index_add(index, ids, docs, NULL, embeds, 2, dim);

// Query
float query_embed[768];
cactus_embed(model, "search query", query_embed, sizeof(query_embed), &dim, true);

const float* query_embeds[] = {query_embed};
int* result_ids = NULL;
float* scores = NULL;
size_t result_count = 0;

cactus_index_query(
    index,
    query_embeds, 1, dim,
    "{\"top_k\":5}",
    &result_ids, &result_count,
    &scores, &result_count
);

See Also

Vector Index

Store and query embeddings

Python SDK

Python embeddings API

RAG Guide

Build RAG systems

Semantic Search

Implement semantic search

Build docs developers (and LLMs) love