Skip to main content

Documentation Index

Fetch the complete documentation index at: https://mintlify.com/MemoriLabs/Memori/llms.txt

Use this file to discover all available pages before exploring further.

Overview

The Recall API allows you to retrieve relevant memories based on semantic search. It uses embeddings to find facts that match your query, enabling context-aware AI applications.

recall()

Retrieve relevant memories based on a query string.

Parameters

query
str
required
The search query to find relevant memories. The query is embedded and compared against stored memory embeddings using semantic similarity.Examples:
  • "What is the user's favorite color?"
  • "user preferences"
  • "previous issues reported by this customer"
limit
int | None
default:"None"
Maximum number of facts to return. If not specified, uses config.recall_facts_limit (default: 5).Range: Typically 1-100

Returns

return
list[FactSearchResult | Mapping[str, object] | str]
List of relevant memory facts matching the query. The format depends on the storage backend:
  • Local storage (PostgreSQL, etc.): Returns FactSearchResult objects with fields like fact, score, metadata
  • Cloud storage: Returns dictionaries or strings containing fact data
Results are sorted by relevance (highest similarity first).

Usage Examples

Basic Recall

from memori import Memori

mem = Memori()
mem.attribution(entity_id="user-123")

facts = mem.recall("What are my preferences?", limit=10)
for fact in facts:
    print(fact)

Recall with OpenAI

import os
from openai import OpenAI
from memori import Memori
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker

client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

engine = create_engine(os.getenv("DATABASE_CONNECTION_STRING"))
Session = sessionmaker(bind=engine)

mem = Memori(conn=Session).llm.register(client)
mem.attribution(entity_id="user-123", process_id="my-app")
mem.config.storage.build()

# First conversation - create memories
response1 = client.chat.completions.create(
    model="gpt-4o-mini",
    messages=[{
        "role": "user",
        "content": "My favorite color is blue and I live in Paris"
    }]
)

# Manual recall to check what was stored
facts = mem.recall("user's favorite color", limit=5)
print(f"Found {len(facts)} relevant facts:")
for i, fact in enumerate(facts, 1):
    print(f"{i}. {fact}")

# Later conversation - automatic recall happens via LLM integration
response2 = client.chat.completions.create(
    model="gpt-4o-mini",
    messages=[{
        "role": "user",
        "content": "What's my favorite color?"
    }]
)
print(response2.choices[0].message.content)  # "Your favorite color is blue"

mem.augmentation.wait()

Recall with Custom Limit

mem = Memori()
mem.attribution(entity_id="customer-456")

# Get top 3 most relevant facts
top_facts = mem.recall("customer issues", limit=3)

# Get many facts for comprehensive context
all_facts = mem.recall("customer history", limit=50)

Cloud Storage Recall

import os

os.environ["MEMORI_API_KEY"] = "your-api-key"

mem = Memori()  # Uses cloud storage
mem.attribution(entity_id="user-789", process_id="my-app")

facts = mem.recall("user preferences", limit=10)
for fact in facts:
    print(fact)

Processing Recall Results

from memori import Memori

mem = Memori()
mem.attribution(entity_id="user-123")

facts = mem.recall("user preferences", limit=10)

if not facts:
    print("No relevant memories found")
else:
    print(f"Found {len(facts)} relevant memories:")
    
    for i, fact in enumerate(facts, 1):
        # Cloud storage returns dicts or strings
        if isinstance(fact, dict):
            print(f"{i}. {fact.get('fact', fact)}")
        elif isinstance(fact, str):
            print(f"{i}. {fact}")
        else:
            # Local storage returns FactSearchResult objects
            print(f"{i}. {fact}")

Building Context for LLM

from openai import OpenAI
from memori import Memori

client = OpenAI()
mem = Memori()
mem.attribution(entity_id="user-123")

user_query = "What products have I shown interest in?"

# Retrieve relevant memories
facts = mem.recall(user_query, limit=5)

# Build context from facts
context = "\n".join([str(fact) for fact in facts])

# Use context in LLM prompt
response = client.chat.completions.create(
    model="gpt-4o-mini",
    messages=[
        {
            "role": "system",
            "content": f"Relevant context:\n{context}"
        },
        {
            "role": "user",
            "content": user_query
        }
    ]
)

print(response.choices[0].message.content)

Customer Support Agent

from memori import Memori
from agno.agent import Agent
from agno.models.openai import OpenAIChat
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker

engine = create_engine("sqlite:///support.db")
Session = sessionmaker(bind=engine)

model = OpenAIChat(id="gpt-4o-mini")
mem = Memori(conn=Session).llm.register(openai_chat=model)
mem.config.storage.build()

def handle_customer(customer_id: str, query: str):
    mem.attribution(entity_id=customer_id, process_id="support")
    
    # Recall customer history
    facts = mem.recall(
        f"customer issues and preferences: {query}",
        limit=10
    )
    
    print(f"Recalled {len(facts)} relevant facts for customer {customer_id}")
    
    # Agent uses recalled context automatically
    agent = Agent(
        model=model,
        instructions=[
            "You are a helpful customer support agent.",
            "Use customer history to provide personalized support."
        ]
    )
    
    response = agent.run(query)
    return response.content

# Usage
response = handle_customer("customer-123", "I need help with my order")
print(response)

Configuration

Recall Limits

mem = Memori()

# Set default fact limit
mem.config.recall_facts_limit = 10

# Set embeddings search limit (affects search performance)
mem.config.recall_embeddings_limit = 2000

# Set relevance threshold (0.0 to 1.0)
mem.config.recall_relevance_threshold = 0.1

facts = mem.recall("query")  # Uses recall_facts_limit=10

Environment Variables

# Set embeddings limit via environment
export MEMORI_RECALL_EMBEDDINGS_LIMIT=5000
# Automatically picked up by Memori
mem = Memori()
print(mem.config.recall_embeddings_limit)  # 5000

Embedding Model

# Change the embedding model
mem = Memori()
mem.config.embeddings.model = "all-MiniLM-L6-v2"

# Or via environment variable
import os
os.environ["MEMORI_EMBEDDINGS_MODEL"] = "all-MiniLM-L6-v2"
mem = Memori()

How Recall Works

  1. Query Embedding: Your query is converted to an embedding vector using the configured model
  2. Similarity Search: The embedding is compared against stored fact embeddings using cosine similarity
  3. Filtering: Results are filtered by entity_id (from attribution)
  4. Ranking: Facts are ranked by similarity score
  5. Limit: Top N facts are returned based on the limit parameter
# Internally, recall does this:
query = "user preferences"
query_embedding = embed_texts(query, model="all-MiniLM-L6-v2")[0]
facts = search_facts(
    driver=storage.driver.entity_fact,
    entity_id=entity_id,
    query_embedding=query_embedding,
    limit=5,
    embeddings_limit=1000
)

Recall Class (Internal)

The Recall class is used internally by mem.recall(). You typically don’t instantiate it directly, but it’s available if needed:
from memori.memory.recall import Recall

recall = Recall(mem.config)
facts = recall.search_facts(
    query="user preferences",
    limit=10,
    entity_id=None,  # Uses config.entity_id
    cloud=False      # Uses config.cloud
)

Performance Considerations

1. Limit Results

Only fetch what you need:
# Good: Focused recall
facts = mem.recall("recent user activity", limit=5)

# Avoid: Fetching too many facts
facts = mem.recall("everything", limit=1000)

2. Embeddings Limit

Adjust recall_embeddings_limit for performance vs. accuracy:
# Fast search, might miss some relevant facts
mem.config.recall_embeddings_limit = 500

# Slower but more comprehensive
mem.config.recall_embeddings_limit = 5000

3. Query Specificity

Use specific queries for better results:
# Good: Specific query
facts = mem.recall("user's favorite programming language")

# Less effective: Vague query
facts = mem.recall("user info")

Error Handling

from memori import Memori

mem = Memori()

try:
    # Recall without attribution
    facts = mem.recall("query")
    # Returns empty list if entity_id not set
    print(f"Found {len(facts)} facts")
except Exception as e:
    print(f"Recall error: {e}")

Best Practices

1. Set Attribution Before Recall

mem = Memori()
mem.attribution(entity_id="user-123")  # Required!
facts = mem.recall("user preferences")

2. Use Appropriate Limits

# For LLM context injection
facts = mem.recall(query, limit=5)

# For comprehensive analysis
facts = mem.recall(query, limit=20)

3. Check for Empty Results

facts = mem.recall("query")
if not facts:
    print("No relevant memories found")
else:
    # Process facts
    pass

4. Combine with LLM Integration

Let Memori handle recall automatically:
from openai import OpenAI

client = OpenAI()
mem = Memori().llm.register(client)
mem.attribution(entity_id="user-123")

# Recall happens automatically during chat
response = client.chat.completions.create(
    model="gpt-4o-mini",
    messages=[{"role": "user", "content": "What's my favorite color?"}]
)

See Also

Build docs developers (and LLMs) love