Skip to main content

Documentation Index

Fetch the complete documentation index at: https://mintlify.com/MemoriLabs/Memori/llms.txt

Use this file to discover all available pages before exploring further.

Overview

The Embeddings API converts text into vector representations (embeddings) that capture semantic meaning. These embeddings power Memori’s semantic search and memory recall capabilities.

embed_texts()

Generate embeddings for one or more text inputs.

Parameters

texts
str | list[str]
required
Single text string or list of text strings to embed.Examples:
  • "Hello world"
  • ["Hello world", "Goodbye world"]
  • ["Short text", "A much longer piece of text with more content"]
async_
bool
default:"False"
If True, returns an awaitable that runs embedding in a threadpool executor. Useful for async frameworks.Note: Parameter name is async_ (with underscore) to avoid conflict with Python keyword.

Returns

return
list[list[float]] | Awaitable[list[list[float]]]
List of embedding vectors, one per input text. Each embedding is a list of floats.
  • Synchronous mode (async_=False): Returns list[list[float]]
  • Asynchronous mode (async_=True): Returns Awaitable[list[list[float]]]
The length of each embedding vector depends on the model:
  • all-MiniLM-L6-v2 (default): 384 dimensions
  • Other models: varies (typically 384-1536 dimensions)

Usage Examples

Basic Embedding

from memori import Memori

mem = Memori()

# Embed a single text
embedding = mem.embed_texts("Hello world")
print(len(embedding))       # 1 (one text)
print(len(embedding[0]))    # 384 (embedding dimension)
print(type(embedding[0]))   # <class 'list'>
print(type(embedding[0][0])) # <class 'float'>

Multiple Texts

from memori import Memori

mem = Memori()

texts = [
    "The quick brown fox",
    "jumps over the lazy dog",
    "Machine learning is fascinating"
]

embeddings = mem.embed_texts(texts)
print(len(embeddings))      # 3 (three texts)
print(len(embeddings[0]))   # 384 (each embedding)

# Access individual embeddings
for i, text in enumerate(texts):
    print(f"Text: {text}")
    print(f"Embedding: {embeddings[i][:5]}...")  # Show first 5 dimensions

Asynchronous Embedding

import asyncio
from memori import Memori

mem = Memori()

async def embed_async():
    # Returns awaitable
    embeddings = await mem.embed_texts(
        ["First text", "Second text"],
        async_=True
    )
    print(f"Generated {len(embeddings)} embeddings")
    return embeddings

# Run async function
embeddings = asyncio.run(embed_async())
print(len(embeddings))  # 2

FastAPI Integration

from fastapi import FastAPI
from memori import Memori
from pydantic import BaseModel

app = FastAPI()
mem = Memori()

class EmbedRequest(BaseModel):
    texts: list[str]

class EmbedResponse(BaseModel):
    embeddings: list[list[float]]
    count: int
    dimensions: int

@app.post("/embed", response_model=EmbedResponse)
async def embed_endpoint(request: EmbedRequest):
    # Use async embedding
    embeddings = await mem.embed_texts(request.texts, async_=True)
    
    return EmbedResponse(
        embeddings=embeddings,
        count=len(embeddings),
        dimensions=len(embeddings[0]) if embeddings else 0
    )

Custom Embedding Model

import os
from memori import Memori

# Set model via environment variable
os.environ["MEMORI_EMBEDDINGS_MODEL"] = "all-MiniLM-L6-v2"
mem = Memori()

# Or set via config
mem = Memori()
mem.config.embeddings.model = "all-MiniLM-L6-v2"

embeddings = mem.embed_texts("Hello world")

Direct API Usage

You can also use the embed_texts function directly:
from memori.embeddings import embed_texts

# Synchronous
embeddings = embed_texts(
    texts=["Hello", "World"],
    model="all-MiniLM-L6-v2"
)

print(len(embeddings))  # 2

Semantic Similarity

import numpy as np
from memori import Memori

mem = Memori()

def cosine_similarity(a, b):
    """Calculate cosine similarity between two vectors."""
    a = np.array(a)
    b = np.array(b)
    return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))

# Embed texts
texts = [
    "I love programming",
    "Coding is my passion",
    "The weather is nice today"
]

embeddings = mem.embed_texts(texts)

# Calculate similarity
sim_1_2 = cosine_similarity(embeddings[0], embeddings[1])
sim_1_3 = cosine_similarity(embeddings[0], embeddings[2])

print(f"Similarity between text 1 and 2: {sim_1_2:.3f}")  # High (similar meaning)
print(f"Similarity between text 1 and 3: {sim_1_3:.3f}")  # Low (different topics)

Batch Processing

from memori import Memori

mem = Memori()

def embed_large_dataset(texts: list[str], batch_size: int = 32):
    """Embed texts in batches."""
    all_embeddings = []
    
    for i in range(0, len(texts), batch_size):
        batch = texts[i:i + batch_size]
        embeddings = mem.embed_texts(batch)
        all_embeddings.extend(embeddings)
        
        print(f"Processed {min(i + batch_size, len(texts))}/{len(texts)} texts")
    
    return all_embeddings

# Process 1000 texts in batches of 32
large_dataset = [f"Text {i}" for i in range(1000)]
embeddings = embed_large_dataset(large_dataset, batch_size=32)
print(f"Generated {len(embeddings)} embeddings")

Advanced Usage

TEI (Text Embeddings Inference)

Memori supports TEI for high-performance embedding generation:
from memori.embeddings import embed_texts, TEI

# Configure TEI endpoint
tei = TEI(url="http://localhost:8080")

embeddings = embed_texts(
    texts=["Hello world"],
    model="all-MiniLM-L6-v2",
    tei=tei,
    chunk_size=128
)

Format Embeddings for Database

When storing embeddings in databases, use the formatting utility:
from memori.embeddings import embed_texts, format_embedding_for_db

embedding = embed_texts("Hello world", model="all-MiniLM-L6-v2")[0]

# Format for PostgreSQL
pg_embedding = format_embedding_for_db(embedding, dialect="postgresql")

# Format for MongoDB
mongo_embedding = format_embedding_for_db(embedding, dialect="mongodb")

# Format for OceanBase
ocean_embedding = format_embedding_for_db(embedding, dialect="oceanbase")

Direct Recall API Usage

The embed_texts function is used internally by the recall API:
from memori import Memori

mem = Memori()
mem.attribution(entity_id="user-123")

# This internally:
# 1. Calls mem.embed_texts("user preferences")
# 2. Searches for similar embeddings in storage
# 3. Returns matching facts
facts = mem.recall("user preferences", limit=5)

Configuration

Embedding Model

import os
from memori import Memori

# Via environment variable
os.environ["MEMORI_EMBEDDINGS_MODEL"] = "all-MiniLM-L6-v2"
mem = Memori()

# Via config
mem = Memori()
mem.config.embeddings.model = "all-MiniLM-L6-v2"

print(mem.config.embeddings.model)  # "all-MiniLM-L6-v2"

Supported Models

Memori uses sentence-transformers models. Popular options:
  • all-MiniLM-L6-v2 (default) - 384 dimensions, fast, good quality
  • all-mpnet-base-v2 - 768 dimensions, higher quality
  • multi-qa-MiniLM-L6-cos-v1 - 384 dimensions, optimized for Q&A

Performance

Embedding Time

Embedding performance depends on:
  • Model size (larger = slower but better quality)
  • Text length (longer = slower)
  • Number of texts (batch processing is efficient)
  • Hardware (GPU vs CPU)
import time
from memori import Memori

mem = Memori()

texts = [f"Sample text {i}" for i in range(100)]

start = time.time()
embeddings = mem.embed_texts(texts)
end = time.time()

print(f"Embedded {len(texts)} texts in {end - start:.2f} seconds")
print(f"Average: {(end - start) / len(texts) * 1000:.2f} ms per text")

Async for Better Performance

import asyncio
from memori import Memori

mem = Memori()

async def embed_multiple_batches():
    batches = [
        ["Text 1", "Text 2"],
        ["Text 3", "Text 4"],
        ["Text 5", "Text 6"]
    ]
    
    # Process batches concurrently
    tasks = [
        mem.embed_texts(batch, async_=True)
        for batch in batches
    ]
    
    results = await asyncio.gather(*tasks)
    return results

# Run
all_embeddings = asyncio.run(embed_multiple_batches())
print(f"Generated {len(all_embeddings)} batches")

Error Handling

from memori import Memori

mem = Memori()

try:
    # Empty input
    embeddings = mem.embed_texts([])
    print(embeddings)  # Returns []
    
    # None input
    embeddings = mem.embed_texts(None)
except Exception as e:
    print(f"Error: {e}")

Best Practices

1. Batch Processing

Process multiple texts at once for efficiency:
# Good: Batch processing
texts = ["Text 1", "Text 2", "Text 3"]
embeddings = mem.embed_texts(texts)

# Avoid: One at a time
embeddings = [mem.embed_texts(text) for text in texts]

2. Use Async in Web Applications

# Good: Async in FastAPI/async frameworks
embeddings = await mem.embed_texts(texts, async_=True)

# Avoid: Blocking in async context
embeddings = mem.embed_texts(texts)  # Blocks event loop

3. Choose Appropriate Model

# For speed: all-MiniLM-L6-v2
mem.config.embeddings.model = "all-MiniLM-L6-v2"

# For quality: all-mpnet-base-v2
mem.config.embeddings.model = "all-mpnet-base-v2"

4. Cache Embeddings

If embedding the same texts repeatedly:
from functools import lru_cache
from memori import Memori

mem = Memori()

@lru_cache(maxsize=1000)
def cached_embed(text: str) -> tuple[float, ...]:
    """Cache embeddings for repeated texts."""
    embedding = mem.embed_texts(text)[0]
    return tuple(embedding)  # Tuples are hashable

# Reuse cached results
embed1 = cached_embed("Hello world")
embed2 = cached_embed("Hello world")  # Returns cached result

See Also

Build docs developers (and LLMs) love