Documentation Index
Fetch the complete documentation index at: https://mintlify.com/topk-io/topk/llms.txt
Use this file to discover all available pages before exploring further.
The query module provides functions to build expressive search queries combining semantic similarity, keyword matching, and filters.
Overview
TopK queries are built using a fluent API:
from topk_sdk.query import select, field, filter, fn
results = client.collection("books").query(
select(
"title", "year",
similarity=fn.semantic_similarity("title", "classic novel")
)
.filter(field("year") > 1900)
.topk(field("similarity"), 10)
)
Query Stages
select()
Create a query with a select stage. Specify which fields to return and compute additional expressions.
from topk_sdk.query import select, field, fn
# Select specific fields
query = select("title", "author")
# Select fields and compute expressions
query = select(
"title",
year=field("published_year"),
similarity=fn.semantic_similarity("title", "animal")
)
Field names to include in the results.
**kwargs
LogicalExpr | FunctionExpr
Named expressions to compute. The name becomes the field name in results.
A new query with the select stage.
filter()
Create a query with a filter stage. Only documents matching the filter are returned.
from topk_sdk.query import filter, field
# Filter by field value
query = filter(field("published_year") > 1980)
# Combine filters
query = filter(
(field("year") >= 1900) & (field("year") <= 2000)
)
expr
LogicalExpr | TextExpr
required
The filter expression. Documents must satisfy this expression to be included.
A new query with the filter stage.
Query.filter()
Add a filter stage to an existing query.
query = select("title").filter(field("year") > 2000)
expr
LogicalExpr | TextExpr
required
The filter expression.
The query with the filter stage added.
Query.sort()
Add a sort stage to sort results by an expression.
from topk_sdk.query import select, field
# Sort ascending (default)
query = select("title", "year").sort(field("year"))
# Sort descending
query = select("title", "year").sort(field("year"), asc=False)
The expression to sort by.
Sort in ascending order if True, descending if False.
The query with the sort stage added.
Query.topk()
Add a top-k stage to return the top k results by a scoring expression.
from topk_sdk.query import select, field, fn
query = select(
"title",
score=fn.semantic_similarity("title", "dystopian")
).topk(field("score"), 10)
The scoring expression. Results are ranked by this value.
The number of top results to return.
If True, return k results with lowest scores. If False (default), return highest scores.
The query with the top-k stage added.
Query.limit()
Add a limit stage to restrict the number of results.
query = select("title").limit(10)
The maximum number of results to return.
The query with the limit stage added.
Query.count()
Add a count stage to return the count of matching documents instead of the documents themselves.
count_query = filter(field("year") > 2000).count()
The query with the count stage added.
Query.rerank()
Add a rerank stage to re-score results using a reranking model.
query = select("title", "description").rerank(
model="cohere/rerank-english-v3.0",
query="best science fiction books",
fields=["title", "description"],
topk_multiple=3
)
The reranking model to use.
The query text to rerank against.
fields
Sequence[str]
default:"[]"
Fields to consider during reranking.
Retrieve this multiple of k candidates before reranking.
The query with the rerank stage added.
Field References
field()
Reference a field from the document.
from topk_sdk.query import field
# Use in filters
filter(field("year") > 2000)
# Use in select expressions
select("title", publication_year=field("year"))
# Use in topk
select("title", score=fn.semantic_similarity("title", "novel"))
.topk(field("score"), 10)
The name of the field to reference.
An expression referencing the field.
literal()
Create a literal expression from a value.
from topk_sdk.query import literal, field
# Usually not needed, values are auto-converted
filter(field("year").eq(literal(2000)))
# This is equivalent:
filter(field("year") == 2000)
Logical Expressions
LogicalExpr objects support rich operations for building filters and computed fields.
Comparison Operators
from topk_sdk.query import field
# Equality
field("genre").eq("fiction") # or field("genre") == "fiction"
field("genre").ne("non-fiction") # or field("genre") != "non-fiction"
# Ordering
field("year").lt(2000) # or field("year") < 2000
field("year").lte(2000) # or field("year") <= 2000
field("year").gt(2000) # or field("year") > 2000
field("year").gte(2000) # or field("year") >= 2000
Arithmetic Operators
from topk_sdk.query import field, select
# Addition, subtraction, multiplication, division
select(
"title",
discounted_price=field("price") * 0.8,
price_diff=field("price") - field("cost"),
total=field("price") + field("tax")
)
Logical Operators
from topk_sdk.query import field, filter
# AND
filter(field("year").gt(1900) & field("year").lt(2000))
# OR
filter(field("genre").eq("fiction") | field("genre").eq("mystery"))
String Operators
from topk_sdk.query import field, filter
# Check if string starts with prefix
filter(field("title").starts_with("The"))
# Check if list/string contains value
filter(field("tags").contains("bestseller"))
# Check if value is in list
filter(field("genre").in_(["fiction", "mystery", "thriller"]))
# Regular expression matching
filter(field("title").regexp_match(r"^The.*", flags="i"))
Keyword Matching
from topk_sdk.query import field, filter
# Match any term (OR)
filter(field("title").match_any(["science", "fiction"]))
# Match all terms (AND)
filter(field("title").match_all(["science", "fiction"]))
Null Handling
from topk_sdk.query import field, filter
# Check for null
filter(field("description").is_null())
filter(field("description").is_not_null())
# Coalesce null values
select("title", rating=field("rating").coalesce(0.0))
Math Functions
from topk_sdk.query import field, select
select(
"title",
abs_diff=field("price").sub(field("cost")).abs(),
sqrt_price=field("price").sqrt(),
squared=field("rating").square(),
log_price=field("price").ln(),
exp_value=field("value").exp()
)
Conditional Expressions
from topk_sdk.query import field, select
# Choose between two values based on condition
select(
"title",
status=field("in_stock").choose("Available", "Out of Stock")
)
# Boost scoring based on condition
select(
"title",
score=fn.semantic_similarity("title", "novel").boost(
field("is_bestseller"),
2.0 # 2x boost for bestsellers
)
)
# Min/max
select(
"title",
min_value=field("price").min(field("msrp")),
max_value=field("rating").max(4.5)
)
Keyword Search
match()
Perform keyword search for documents containing specific keywords or phrases. Use with fields that have a keyword_index().
from topk_sdk.query import match, filter, select, fn
# Simple keyword match
query = filter(match("science"))
# Match on specific field
query = filter(match("fiction", field="genre"))
# Match with weight
query = filter(match("bestseller", weight=2.0))
# Match all terms (AND)
query = filter(match("science fiction", all=True))
# Combine with BM25 scoring
query = select(
"title",
text_score=fn.bm25_score()
).filter(
match("dystopian") | match("future")
).topk(field("text_score"), 10)
The keyword or phrase to search for.
The specific field to search in. If not provided, searches all keyword-indexed fields.
Weight for this term in scoring.
If True, all tokens must match (AND). If False, any token can match (OR).
A keyword match expression.
Logical Combinators
all()
Create a logical AND expression combining multiple conditions.
from topk_sdk.query import field, all, filter
query = filter(all([
field("published_year") >= 1900,
field("published_year") <= 2000,
field("title").is_not_null()
]))
exprs
Sequence[LogicalExpr]
required
List of expressions to combine with AND.
A logical AND expression.
any()
Create a logical OR expression combining multiple conditions.
from topk_sdk.query import field, any, filter
query = filter(any([
field("genre") == "fiction",
field("genre") == "mystery",
field("genre") == "thriller"
]))
exprs
Sequence[LogicalExpr]
required
List of expressions to combine with OR.
not_()
Negate a logical expression.
from topk_sdk.query import field, not_, filter
query = filter(not_(field("title").contains("Catcher")))
The expression to negate.
Function Expressions
The fn class provides functions for semantic similarity, vector distance, and keyword scoring.
fn.semantic_similarity()
Calculate semantic similarity between a field and a query string. Requires a semantic_index() on the field.
from topk_sdk.query import select, field, fn
results = client.collection("books").query(
select(
"title",
similarity=fn.semantic_similarity("title", "animal story")
).topk(field("similarity"), 10)
)
The field name with a semantic index.
The query text to compare against.
A semantic similarity scoring function.
fn.vector_distance()
Calculate vector distance between a field and a query vector. Requires a vector_index() on the field.
from topk_sdk.query import select, field, fn
results = client.collection("books").query(
select(
"title",
distance=fn.vector_distance(
"title_embedding",
[0.1, 0.2, 0.3, ...] # Your embedding vector
)
).topk(field("distance"), 10)
)
The field name containing vectors with a vector index.
vector
list[int] | list[float] | dict[int, float] | dict[int, int] | SparseVector | List
required
The query vector. Can be:
- Dense vector:
[0.1, 0.2, 0.3, ...]
- Sparse vector:
{0: 0.5, 10: 0.8, 50: 0.3}
SparseVector or List instance from topk_sdk.data
Skip the refinement step for approximate search.
A vector distance scoring function.
fn.multi_vector_distance()
Calculate multi-vector distance between a matrix field and a query matrix. Requires a multi_vector_index() on the field.
from topk_sdk.query import select, field, fn
results = client.collection("documents").query(
select(
"title",
distance=fn.multi_vector_distance(
"colbert_embeddings",
[[0.1, 0.2, ...], [0.3, 0.4, ...], ...],
candidates=100
)
).topk(field("distance"), 10)
)
The field name containing matrices with a multi-vector index.
matrix
Matrix | ndarray | list[list[float]] | list[list[int]]
required
The query matrix. Can be:
- List of lists:
[[0.1, 0.2], [0.3, 0.4]]
- Numpy array:
np.array([[0.1, 0.2], [0.3, 0.4]])
Matrix instance from topk_sdk.data
Limit the number of candidate vectors considered during search.
A multi-vector distance scoring function.
fn.bm25_score()
Calculate BM25 score for keyword search. Use with match() filters.
from topk_sdk.query import select, field, fn, match
results = client.collection("books").query(
select(
"title",
text_score=fn.bm25_score()
)
.filter(match("dystopian") | match("future"))
.topk(field("text_score"), 10)
)
Helper Functions
min()
Compute the minimum of two expressions.
from topk_sdk.query import field, min, select
query = select(
"title",
best_price=min(field("price"), field("sale_price"))
)
left
int | float | str | LogicalExpr
required
First value to compare.
right
int | float | str | LogicalExpr
required
Second value to compare.
max()
Compute the maximum of two expressions.
from topk_sdk.query import field, max, select
query = select(
"title",
max_rating=max(field("user_rating"), field("critic_rating"))
)
left
int | float | str | LogicalExpr
required
First value to compare.
right
int | float | str | LogicalExpr
required
Second value to compare.
abs()
Compute the absolute value of an expression.
from topk_sdk.query import field, abs, filter
query = filter(abs(field("temperature")) > 30)
The expression to compute the absolute value of.
Complete Examples
Semantic Search
from topk_sdk.query import select, field, fn
results = client.collection("books").query(
select(
"title", "author", "year",
similarity=fn.semantic_similarity("title", "space exploration")
)
.filter(field("year") > 1950)
.topk(field("similarity"), 10)
)
Hybrid Search (Semantic + Keyword)
from topk_sdk.query import select, field, fn, match
results = client.collection("books").query(
select(
"title",
semantic_score=fn.semantic_similarity("title", "artificial intelligence"),
keyword_score=fn.bm25_score(),
hybrid_score=fn.semantic_similarity("title", "AI") + fn.bm25_score()
)
.filter(match("technology") | match("computer"))
.topk(field("hybrid_score"), 10)
)
Vector Search with Filters
from topk_sdk.query import select, field, fn
results = client.collection("products").query(
select(
"name", "price", "category",
similarity=fn.vector_distance("image_embedding", user_query_vector)
)
.filter(
(field("price") < 100) &
(field("in_stock") == True) &
field("category").in_(["electronics", "gadgets"])
)
.topk(field("similarity"), 20)
)
Multi-Vector Search (ColBERT)
from topk_sdk.query import select, field, fn
import numpy as np
query_matrix = np.random.rand(10, 128).astype(np.float32)
results = client.collection("documents").query(
select(
"title", "content",
relevance=fn.multi_vector_distance(
"colbert_embeddings",
query_matrix,
candidates=500
)
).topk(field("relevance"), 10)
)