Overview
The vector index API provides fast similarity search over embeddings with:
Persistent on-disk storage (memory-mapped files)
INT16 quantization for 2x memory savings
Batch operations
Document metadata storage
cactus_index_init
Create or open a vector index.
cactus_index_t cactus_index_init (
const char * index_dir ,
size_t embedding_dim
);
Directory path for index files (created if doesn’t exist)
Dimensionality of embedding vectors
Index handle, or NULL on error
cactus_index_add
Add documents to index.
int cactus_index_add (
cactus_index_t index ,
const int * ids ,
const char ** documents ,
const char ** metadatas ,
const float ** embeddings ,
size_t count ,
size_t embedding_dim
);
Index handle from cactus_index_init
Array of unique document IDs
Array of document text strings
Optional array of metadata JSON strings (can be NULL)
Array of embedding vectors
Number of documents to add
Dimensionality of embeddings
0 on success, -1 on error
cactus_index_query
Find nearest neighbors.
int cactus_index_query (
cactus_index_t index ,
const float ** embeddings ,
size_t embeddings_count ,
size_t embedding_dim ,
const char * options_json ,
int ** id_buffers ,
size_t * id_buffer_sizes ,
float ** score_buffers ,
size_t * score_buffer_sizes
);
Array of query embedding vectors
Optional JSON: {"top_k":10,"score_threshold":0.5}
Output: array of result ID arrays (caller must free)
Output: sizes of each result array
Output: array of similarity score arrays (caller must free)
Output: sizes of each score array
0 on success, -1 on error
Query Options
{
"top_k" : 10 ,
"score_threshold" : 0.5
}
Maximum number of results per query
Minimum similarity score (cosine similarity, range [-1, 1]). -1.0 disables filtering
cactus_index_get
Retrieve documents by ID.
int cactus_index_get (
cactus_index_t index ,
const int * ids ,
size_t ids_count ,
char ** document_buffers ,
size_t * document_buffer_sizes ,
char ** metadata_buffers ,
size_t * metadata_buffer_sizes ,
float ** embedding_buffers ,
size_t * embedding_buffer_sizes
);
cactus_index_delete
Delete documents by ID.
int cactus_index_delete (
cactus_index_t index ,
const int * ids ,
size_t ids_count
);
Array of document IDs to delete
cactus_index_compact
Remove deleted documents and rebuild index.
int cactus_index_compact ( cactus_index_t index );
cactus_index_destroy
Close index and free resources.
void cactus_index_destroy ( cactus_index_t index );
Example: Build Index
#include "cactus_ffi.h"
#include <stdio.h>
#include <stdlib.h>
int main () {
// Load embedding model
cactus_model_t model = cactus_init ( "/path/to/nomic-embed" , NULL , false );
// Create index
cactus_index_t index = cactus_index_init ( "/path/to/index" , 768 );
if ( ! index) {
fprintf (stderr, "Failed to create index \n " );
return 1 ;
}
// Prepare documents
const char * texts [] = {
"The quick brown fox jumps over the lazy dog" ,
"A journey of a thousand miles begins with a single step" ,
"To be or not to be, that is the question"
};
size_t num_docs = 3 ;
// Generate embeddings
float ** embeddings = malloc (num_docs * sizeof ( float * ));
for ( size_t i = 0 ; i < num_docs; i ++ ) {
embeddings [i] = malloc ( 768 * sizeof ( float ));
size_t dim = 0 ;
cactus_embed (model, texts [i], embeddings [i], 768 * sizeof ( float ), & dim, true );
}
// Add to index
int ids [] = { 1 , 2 , 3 };
cactus_index_add (
index,
ids,
texts,
NULL , // no metadata
( const float ** )embeddings,
num_docs,
768
);
printf ( "Added %zu documents to index \n " , num_docs);
// Cleanup
for ( size_t i = 0 ; i < num_docs; i ++ ) {
free ( embeddings [i]);
}
free (embeddings);
cactus_index_destroy (index);
cactus_destroy (model);
return 0 ;
}
Example: Search Index
#include "cactus_ffi.h"
#include <stdio.h>
#include <stdlib.h>
int main () {
cactus_model_t model = cactus_init ( "/path/to/nomic-embed" , NULL , false );
cactus_index_t index = cactus_index_init ( "/path/to/index" , 768 );
// Generate query embedding
const char * query = "What is the meaning of life?" ;
float query_embed [ 768 ];
size_t dim = 0 ;
cactus_embed (model, query, query_embed, sizeof (query_embed), & dim, true );
// Search
const float * query_embeds [] = {query_embed};
int * result_ids = NULL ;
float * scores = NULL ;
size_t result_count = 0 ;
const char * options = "{ \" top_k \" :5, \" score_threshold \" :0.3}" ;
int status = cactus_index_query (
index,
query_embeds,
1 , // single query
768 ,
options,
& result_ids,
& result_count,
& scores,
& result_count
);
if (status == 0 ) {
printf ( "Found %zu results: \n " , result_count);
for ( size_t i = 0 ; i < result_count; i ++ ) {
printf ( " ID: %d , Score: %.4f \n " , result_ids [i], scores [i]);
}
free (result_ids);
free (scores);
}
cactus_index_destroy (index);
cactus_destroy (model);
return 0 ;
}
Example: Batch Queries
const char * queries [] = {
"What is machine learning?" ,
"How do neural networks work?" ,
"Explain deep learning"
};
size_t num_queries = 3 ;
// Generate query embeddings
float ** query_embeds = malloc (num_queries * sizeof ( float * ));
for ( size_t i = 0 ; i < num_queries; i ++ ) {
query_embeds [i] = malloc ( 768 * sizeof ( float ));
size_t dim = 0 ;
cactus_embed (model, queries [i], query_embeds [i], 768 * sizeof ( float ), & dim, true );
}
// Batch search
int ** result_ids = malloc (num_queries * sizeof ( int * ));
float ** scores = malloc (num_queries * sizeof ( float * ));
size_t * result_counts = malloc (num_queries * sizeof ( size_t ));
cactus_index_query (
index,
( const float ** ) query_embeds ,
num_queries,
768 ,
"{ \" top_k \" :3}" ,
result_ids,
result_counts,
scores,
result_counts
);
// Process results
for ( size_t q = 0 ; q < num_queries; q ++ ) {
printf ( "Query %zu results: \n " , q);
for ( size_t i = 0 ; i < result_counts [q]; i ++ ) {
printf ( " ID: %d , Score: %.4f \n " , result_ids [q][i], scores [q][i]);
}
free ( result_ids [q]);
free ( scores [q]);
free ( query_embeds [q]);
}
free (result_ids);
free (scores);
free (result_counts);
free (query_embeds);
The index stores two memory-mapped files:
index.bin: Quantized embeddings (INT16) + scales
data.bin: Document text + metadata
This design enables:
Fast startup (no deserialization)
Low memory usage (OS manages paging)
Incremental updates (append-only writes)
Operation Throughput Add (768-dim) ~50k docs/sec Query (768-dim, 10k docs) ~1000 queries/sec Query (768-dim, 100k docs) ~200 queries/sec
Performance assumes normalized embeddings and cosine similarity. The index uses brute-force search with SIMD acceleration.
See Also
Embeddings API Generate embeddings
Python SDK Python vector index API
RAG Guide Build RAG systems
C FFI Complete FFI reference