Skip to main content
Embeddings convert text into numerical vectors that capture semantic meaning. Use them for search, recommendations, clustering, and similarity comparisons.

Basic usage

Generate embeddings for a single text string:
import os
from dedalus_labs import Dedalus

client = Dedalus(
    api_key=os.environ.get("DEDALUS_API_KEY")
)

response = client.embeddings.create(
    model="text-embedding-3-small",
    input="The quick brown fox jumps over the lazy dog"
)

# Access the embedding vector
embedding = response.data[0].embedding
print(f"Embedding dimension: {len(embedding)}")
print(f"First few values: {embedding[:5]}")

Async usage

Generate embeddings asynchronously:
import os
import asyncio
from dedalus_labs import AsyncDedalus

client = AsyncDedalus(
    api_key=os.environ.get("DEDALUS_API_KEY")
)

async def main():
    response = await client.embeddings.create(
        model="text-embedding-3-small",
        input="The quick brown fox jumps over the lazy dog"
    )
    
    embedding = response.data[0].embedding
    print(f"Generated {len(embedding)}-dimensional embedding")

asyncio.run(main())

Batch processing

Generate embeddings for multiple texts in a single request:
from dedalus_labs import Dedalus

client = Dedalus()

texts = [
    "The quick brown fox jumps over the lazy dog",
    "Machine learning is a subset of artificial intelligence",
    "Python is a popular programming language",
]

response = client.embeddings.create(
    model="text-embedding-3-small",
    input=texts
)

# Access embeddings in order
for i, data in enumerate(response.data):
    print(f"Text {i}: dimension {len(data.embedding)}")
    print(f"  First few values: {data.embedding[:3]}")

print(f"\nTotal tokens used: {response.usage.total_tokens}")

Available models

Best for most use cases with a balance of performance and cost.
response = client.embeddings.create(
    model="text-embedding-3-small",
    input="Your text here"
)
# Default: 1536 dimensions

Custom dimensions

Reduce dimensionality for storage efficiency (text-embedding-3 models only):
response = client.embeddings.create(
    model="text-embedding-3-small",
    input="Your text here",
    dimensions=512  # Reduce from default 1536 to 512
)

embedding = response.data[0].embedding
print(f"Reduced to {len(embedding)} dimensions")
Reducing dimensions can save storage space and improve search performance with minimal quality loss. The model is trained to produce meaningful representations at various dimensionalities.

Encoding formats

Choose between float and base64 encoding:
response = client.embeddings.create(
    model="text-embedding-3-small",
    input="Your text here",
    encoding_format="float"  # Default
)

# Returns list of floats
embedding = response.data[0].embedding
print(type(embedding))  # <class 'list'>
print(type(embedding[0]))  # <class 'float'>

Common use cases

import numpy as np
from dedalus_labs import Dedalus

client = Dedalus()

# Embed documents
documents = [
    "Python is a programming language",
    "JavaScript runs in browsers",
    "Machine learning uses neural networks",
    "Databases store structured data",
]

response = client.embeddings.create(
    model="text-embedding-3-small",
    input=documents
)

doc_embeddings = [data.embedding for data in response.data]

# Embed search query
query = "Tell me about programming"
query_response = client.embeddings.create(
    model="text-embedding-3-small",
    input=query
)
query_embedding = query_response.data[0].embedding

# Calculate cosine similarity
def cosine_similarity(a, b):
    return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))

# Find most similar document
similarities = [
    cosine_similarity(query_embedding, doc_emb) 
    for doc_emb in doc_embeddings
]

most_similar_idx = np.argmax(similarities)
print(f"Most similar document: {documents[most_similar_idx]}")
print(f"Similarity score: {similarities[most_similar_idx]:.4f}")

Clustering

from sklearn.cluster import KMeans
import numpy as np
from dedalus_labs import Dedalus

client = Dedalus()

texts = [
    "apple fruit red delicious",
    "banana fruit yellow tropical",
    "car vehicle transportation auto",
    "truck vehicle transportation heavy",
    "orange fruit citrus vitamin",
]

response = client.embeddings.create(
    model="text-embedding-3-small",
    input=texts
)

embeddings = np.array([data.embedding for data in response.data])

# Cluster into 2 groups
kmeans = KMeans(n_clusters=2, random_state=42)
cluster_labels = kmeans.fit_predict(embeddings)

for text, label in zip(texts, cluster_labels):
    print(f"Cluster {label}: {text}")

Token limits and batching

The embeddings API has the following limits:
  • Maximum 8,192 tokens per input string
  • Maximum 300,000 tokens total per request
  • Empty strings are not allowed
  • Maximum 2,048 dimensions for arrays
Process large batches efficiently:
from dedalus_labs import Dedalus

client = Dedalus()

def batch_embed(texts, batch_size=100):
    """Embed texts in batches to avoid rate limits."""
    all_embeddings = []
    
    for i in range(0, len(texts), batch_size):
        batch = texts[i:i + batch_size]
        response = client.embeddings.create(
            model="text-embedding-3-small",
            input=batch
        )
        all_embeddings.extend([data.embedding for data in response.data])
    
    return all_embeddings

# Process 500 texts in batches of 100
large_text_list = [f"Document {i} content..." for i in range(500)]
embeddings = batch_embed(large_text_list)
print(f"Generated {len(embeddings)} embeddings")

Error handling

import dedalus_labs
from dedalus_labs import Dedalus

client = Dedalus()

try:
    response = client.embeddings.create(
        model="text-embedding-3-small",
        input="Your text here"
    )
except dedalus_labs.APIConnectionError as e:
    print("Network error occurred")
    print(e.__cause__)
except dedalus_labs.RateLimitError as e:
    print("Rate limit exceeded, retry with backoff")
except dedalus_labs.BadRequestError as e:
    print(f"Invalid request: {e.response}")
except dedalus_labs.APIStatusError as e:
    print(f"API error: {e.status_code}")
Embeddings are deterministic - the same input will always produce the same output vector for a given model and dimension setting.

Build docs developers (and LLMs) love