let results = users.aggregate() .match_("age > 25") .project(&["name", "email", "city"]) .execute()?;// Only returns name, email, city, and _id
let results = users.aggregate() .match_("status is \"active\"") .exclude(&["password", "ssn"]) .execute()?;// Returns all fields except password and ssn
use jasonisnthappy::Database;use serde_json::json;let db = Database::open("analytics.db")?;let orders = db.collection("orders");// Analyze sales by region for premium customerslet results = orders.aggregate() // Stage 1: Filter to completed orders from premium customers .match_("status is \"completed\" and customer_tier is \"premium\"") // Stage 2: Group by region and calculate metrics .group_by("region") .count("num_orders") .sum("total_amount", "revenue") .avg("total_amount", "avg_order_value") // Stage 3: Sort by revenue (highest first) .sort("revenue", false) // Stage 4: Top 10 regions only .limit(10) // Stage 5: Clean output (exclude internal fields) .exclude(&["_internal"]) .execute()?;for region in results { println!("Region: {}", region["_id"]); println!(" Orders: {}", region["num_orders"]); println!(" Revenue: ${:.2}", region["revenue"].as_f64().unwrap()); println!(" Avg Order: ${:.2}", region["avg_order_value"].as_f64().unwrap()); println!();}
let daily_sales = orders.aggregate() .match_("created_at >= \"2024-01-01\" and status is \"completed\"") .group_by("date") // Assuming date field .count("orders") .sum("amount", "revenue") .avg("amount", "avg_order") .sort("date", true) .execute()?;
// Active users by signup monthlet signups = users.aggregate() .match_("status is \"active\"") .group_by("signup_month") .count("new_users") .sort("signup_month", true) .execute()?;// User distribution by age grouplet age_distribution = users.aggregate() .group_by("age_group") // e.g., "18-24", "25-34", etc. .count("users") .sort("users", false) .execute()?;
let events = db.collection("events");// Most common events by user typelet event_summary = events.aggregate() .match_("timestamp > 1704067200") // Last 24 hours .group_by("event_type") .count("occurrences") .sort("occurrences", false) .limit(10) .execute()?;
Use match_ early: Filter documents before expensive operations.
// Good: filter firstlet results = users.aggregate() .match_("status is \"active\"") .group_by("city") .count("total") .execute()?;// Bad: group all documents then filterlet results = users.aggregate() .group_by("city") .count("total") .match_("total > 10") // This doesn't work - match before group! .execute()?;
Create indexes on grouped fields:
// Index the field you group bydb.create_index("users", "city_idx", "city", false)?;// Now grouping by city is fasterlet results = users.aggregate() .group_by("city") .count("total") .execute()?;
Use projection to reduce memory:
// Only load needed fieldslet results = users.aggregate() .project(&["city", "age"]) // Don't load other fields .group_by("city") .avg("age", "avg_age") .execute()?;