Skip to main content

Documentation Index

Fetch the complete documentation index at: https://mintlify.com/topk-io/topk/llms.txt

Use this file to discover all available pages before exploring further.

The query module provides functions to build expressive search queries combining semantic similarity, keyword matching, and filters.

Overview

TopK queries are built using a fluent API:
from topk_sdk.query import select, field, filter, fn

results = client.collection("books").query(
    select(
        "title", "year",
        similarity=fn.semantic_similarity("title", "classic novel")
    )
    .filter(field("year") > 1900)
    .topk(field("similarity"), 10)
)

Query Stages

select()

Create a query with a select stage. Specify which fields to return and compute additional expressions.
from topk_sdk.query import select, field, fn

# Select specific fields
query = select("title", "author")

# Select fields and compute expressions
query = select(
    "title",
    year=field("published_year"),
    similarity=fn.semantic_similarity("title", "animal")
)
*args
str
Field names to include in the results.
**kwargs
LogicalExpr | FunctionExpr
Named expressions to compute. The name becomes the field name in results.
return
Query
A new query with the select stage.

filter()

Create a query with a filter stage. Only documents matching the filter are returned.
from topk_sdk.query import filter, field

# Filter by field value
query = filter(field("published_year") > 1980)

# Combine filters
query = filter(
    (field("year") >= 1900) & (field("year") <= 2000)
)
expr
LogicalExpr | TextExpr
required
The filter expression. Documents must satisfy this expression to be included.
return
Query
A new query with the filter stage.

Query.filter()

Add a filter stage to an existing query.
query = select("title").filter(field("year") > 2000)
expr
LogicalExpr | TextExpr
required
The filter expression.
return
Query
The query with the filter stage added.

Query.sort()

Add a sort stage to sort results by an expression.
from topk_sdk.query import select, field

# Sort ascending (default)
query = select("title", "year").sort(field("year"))

# Sort descending
query = select("title", "year").sort(field("year"), asc=False)
expr
LogicalExpr
required
The expression to sort by.
asc
bool
default:"true"
Sort in ascending order if True, descending if False.
return
Query
The query with the sort stage added.

Query.topk()

Add a top-k stage to return the top k results by a scoring expression.
from topk_sdk.query import select, field, fn

query = select(
    "title",
    score=fn.semantic_similarity("title", "dystopian")
).topk(field("score"), 10)
expr
LogicalExpr
required
The scoring expression. Results are ranked by this value.
k
int
required
The number of top results to return.
asc
bool
default:"false"
If True, return k results with lowest scores. If False (default), return highest scores.
return
Query
The query with the top-k stage added.

Query.limit()

Add a limit stage to restrict the number of results.
query = select("title").limit(10)
k
int
required
The maximum number of results to return.
return
Query
The query with the limit stage added.

Query.count()

Add a count stage to return the count of matching documents instead of the documents themselves.
count_query = filter(field("year") > 2000).count()
return
Query
The query with the count stage added.

Query.rerank()

Add a rerank stage to re-score results using a reranking model.
query = select("title", "description").rerank(
    model="cohere/rerank-english-v3.0",
    query="best science fiction books",
    fields=["title", "description"],
    topk_multiple=3
)
model
str
The reranking model to use.
query
str
The query text to rerank against.
fields
Sequence[str]
default:"[]"
Fields to consider during reranking.
topk_multiple
int
Retrieve this multiple of k candidates before reranking.
return
Query
The query with the rerank stage added.

Field References

field()

Reference a field from the document.
from topk_sdk.query import field

# Use in filters
filter(field("year") > 2000)

# Use in select expressions
select("title", publication_year=field("year"))

# Use in topk
select("title", score=fn.semantic_similarity("title", "novel"))
    .topk(field("score"), 10)
name
str
required
The name of the field to reference.
return
LogicalExpr
An expression referencing the field.

literal()

Create a literal expression from a value.
from topk_sdk.query import literal, field

# Usually not needed, values are auto-converted
filter(field("year").eq(literal(2000)))

# This is equivalent:
filter(field("year") == 2000)
value
Any
required
The literal value.
return
LogicalExpr
A literal expression.

Logical Expressions

LogicalExpr objects support rich operations for building filters and computed fields.

Comparison Operators

from topk_sdk.query import field

# Equality
field("genre").eq("fiction")      # or field("genre") == "fiction"
field("genre").ne("non-fiction")  # or field("genre") != "non-fiction"

# Ordering
field("year").lt(2000)   # or field("year") < 2000
field("year").lte(2000)  # or field("year") <= 2000
field("year").gt(2000)   # or field("year") > 2000
field("year").gte(2000)  # or field("year") >= 2000

Arithmetic Operators

from topk_sdk.query import field, select

# Addition, subtraction, multiplication, division
select(
    "title",
    discounted_price=field("price") * 0.8,
    price_diff=field("price") - field("cost"),
    total=field("price") + field("tax")
)

Logical Operators

from topk_sdk.query import field, filter

# AND
filter(field("year").gt(1900) & field("year").lt(2000))

# OR
filter(field("genre").eq("fiction") | field("genre").eq("mystery"))

String Operators

from topk_sdk.query import field, filter

# Check if string starts with prefix
filter(field("title").starts_with("The"))

# Check if list/string contains value
filter(field("tags").contains("bestseller"))

# Check if value is in list
filter(field("genre").in_(["fiction", "mystery", "thriller"]))

# Regular expression matching
filter(field("title").regexp_match(r"^The.*", flags="i"))

Keyword Matching

from topk_sdk.query import field, filter

# Match any term (OR)
filter(field("title").match_any(["science", "fiction"]))

# Match all terms (AND)
filter(field("title").match_all(["science", "fiction"]))

Null Handling

from topk_sdk.query import field, filter

# Check for null
filter(field("description").is_null())
filter(field("description").is_not_null())

# Coalesce null values
select("title", rating=field("rating").coalesce(0.0))

Math Functions

from topk_sdk.query import field, select

select(
    "title",
    abs_diff=field("price").sub(field("cost")).abs(),
    sqrt_price=field("price").sqrt(),
    squared=field("rating").square(),
    log_price=field("price").ln(),
    exp_value=field("value").exp()
)

Conditional Expressions

from topk_sdk.query import field, select

# Choose between two values based on condition
select(
    "title",
    status=field("in_stock").choose("Available", "Out of Stock")
)

# Boost scoring based on condition
select(
    "title",
    score=fn.semantic_similarity("title", "novel").boost(
        field("is_bestseller"),
        2.0  # 2x boost for bestsellers
    )
)

# Min/max
select(
    "title",
    min_value=field("price").min(field("msrp")),
    max_value=field("rating").max(4.5)
)

match()

Perform keyword search for documents containing specific keywords or phrases. Use with fields that have a keyword_index().
from topk_sdk.query import match, filter, select, fn

# Simple keyword match
query = filter(match("science"))

# Match on specific field
query = filter(match("fiction", field="genre"))

# Match with weight
query = filter(match("bestseller", weight=2.0))

# Match all terms (AND)
query = filter(match("science fiction", all=True))

# Combine with BM25 scoring
query = select(
    "title",
    text_score=fn.bm25_score()
).filter(
    match("dystopian") | match("future")
).topk(field("text_score"), 10)
token
str
required
The keyword or phrase to search for.
field
str
The specific field to search in. If not provided, searches all keyword-indexed fields.
weight
float
default:"1.0"
Weight for this term in scoring.
all
bool
default:"false"
If True, all tokens must match (AND). If False, any token can match (OR).
return
LogicalExpr
A keyword match expression.

Logical Combinators

all()

Create a logical AND expression combining multiple conditions.
from topk_sdk.query import field, all, filter

query = filter(all([
    field("published_year") >= 1900,
    field("published_year") <= 2000,
    field("title").is_not_null()
]))
exprs
Sequence[LogicalExpr]
required
List of expressions to combine with AND.
return
LogicalExpr
A logical AND expression.

any()

Create a logical OR expression combining multiple conditions.
from topk_sdk.query import field, any, filter

query = filter(any([
    field("genre") == "fiction",
    field("genre") == "mystery",
    field("genre") == "thriller"
]))
exprs
Sequence[LogicalExpr]
required
List of expressions to combine with OR.
return
LogicalExpr
A logical OR expression.

not_()

Negate a logical expression.
from topk_sdk.query import field, not_, filter

query = filter(not_(field("title").contains("Catcher")))
expr
LogicalExpr
required
The expression to negate.
return
LogicalExpr
The negated expression.

Function Expressions

The fn class provides functions for semantic similarity, vector distance, and keyword scoring.

fn.semantic_similarity()

Calculate semantic similarity between a field and a query string. Requires a semantic_index() on the field.
from topk_sdk.query import select, field, fn

results = client.collection("books").query(
    select(
        "title",
        similarity=fn.semantic_similarity("title", "animal story")
    ).topk(field("similarity"), 10)
)
field
str
required
The field name with a semantic index.
query
str
required
The query text to compare against.
return
FunctionExpr
A semantic similarity scoring function.

fn.vector_distance()

Calculate vector distance between a field and a query vector. Requires a vector_index() on the field.
from topk_sdk.query import select, field, fn

results = client.collection("books").query(
    select(
        "title",
        distance=fn.vector_distance(
            "title_embedding",
            [0.1, 0.2, 0.3, ...]  # Your embedding vector
        )
    ).topk(field("distance"), 10)
)
field
str
required
The field name containing vectors with a vector index.
vector
list[int] | list[float] | dict[int, float] | dict[int, int] | SparseVector | List
required
The query vector. Can be:
  • Dense vector: [0.1, 0.2, 0.3, ...]
  • Sparse vector: {0: 0.5, 10: 0.8, 50: 0.3}
  • SparseVector or List instance from topk_sdk.data
skip_refine
bool
default:"false"
Skip the refinement step for approximate search.
return
FunctionExpr
A vector distance scoring function.

fn.multi_vector_distance()

Calculate multi-vector distance between a matrix field and a query matrix. Requires a multi_vector_index() on the field.
from topk_sdk.query import select, field, fn

results = client.collection("documents").query(
    select(
        "title",
        distance=fn.multi_vector_distance(
            "colbert_embeddings",
            [[0.1, 0.2, ...], [0.3, 0.4, ...], ...],
            candidates=100
        )
    ).topk(field("distance"), 10)
)
field
str
required
The field name containing matrices with a multi-vector index.
matrix
Matrix | ndarray | list[list[float]] | list[list[int]]
required
The query matrix. Can be:
  • List of lists: [[0.1, 0.2], [0.3, 0.4]]
  • Numpy array: np.array([[0.1, 0.2], [0.3, 0.4]])
  • Matrix instance from topk_sdk.data
candidates
int
Limit the number of candidate vectors considered during search.
return
FunctionExpr
A multi-vector distance scoring function.

fn.bm25_score()

Calculate BM25 score for keyword search. Use with match() filters.
from topk_sdk.query import select, field, fn, match

results = client.collection("books").query(
    select(
        "title",
        text_score=fn.bm25_score()
    )
    .filter(match("dystopian") | match("future"))
    .topk(field("text_score"), 10)
)
return
FunctionExpr
A BM25 scoring function.

Helper Functions

min()

Compute the minimum of two expressions.
from topk_sdk.query import field, min, select

query = select(
    "title",
    best_price=min(field("price"), field("sale_price"))
)
left
int | float | str | LogicalExpr
required
First value to compare.
right
int | float | str | LogicalExpr
required
Second value to compare.
return
LogicalExpr
The minimum value.

max()

Compute the maximum of two expressions.
from topk_sdk.query import field, max, select

query = select(
    "title",
    max_rating=max(field("user_rating"), field("critic_rating"))
)
left
int | float | str | LogicalExpr
required
First value to compare.
right
int | float | str | LogicalExpr
required
Second value to compare.
return
LogicalExpr
The maximum value.

abs()

Compute the absolute value of an expression.
from topk_sdk.query import field, abs, filter

query = filter(abs(field("temperature")) > 30)
expr
LogicalExpr
required
The expression to compute the absolute value of.
return
LogicalExpr
The absolute value.

Complete Examples

from topk_sdk.query import select, field, fn

results = client.collection("books").query(
    select(
        "title", "author", "year",
        similarity=fn.semantic_similarity("title", "space exploration")
    )
    .filter(field("year") > 1950)
    .topk(field("similarity"), 10)
)

Hybrid Search (Semantic + Keyword)

from topk_sdk.query import select, field, fn, match

results = client.collection("books").query(
    select(
        "title",
        semantic_score=fn.semantic_similarity("title", "artificial intelligence"),
        keyword_score=fn.bm25_score(),
        hybrid_score=fn.semantic_similarity("title", "AI") + fn.bm25_score()
    )
    .filter(match("technology") | match("computer"))
    .topk(field("hybrid_score"), 10)
)

Vector Search with Filters

from topk_sdk.query import select, field, fn

results = client.collection("products").query(
    select(
        "name", "price", "category",
        similarity=fn.vector_distance("image_embedding", user_query_vector)
    )
    .filter(
        (field("price") < 100) & 
        (field("in_stock") == True) &
        field("category").in_(["electronics", "gadgets"])
    )
    .topk(field("similarity"), 20)
)

Multi-Vector Search (ColBERT)

from topk_sdk.query import select, field, fn
import numpy as np

query_matrix = np.random.rand(10, 128).astype(np.float32)

results = client.collection("documents").query(
    select(
        "title", "content",
        relevance=fn.multi_vector_distance(
            "colbert_embeddings",
            query_matrix,
            candidates=500
        )
    ).topk(field("relevance"), 10)
)

Build docs developers (and LLMs) love