-- Use event_time instead of _tp_timeSELECT window_start, window_end, sum(amount) as total_amountFROM tumble(transactions, event_time, INTERVAL 1 HOUR)GROUP BY window_start, window_end;
-- Analyze user sessionsSELECT user_id, window_start, window_end, count() as page_views, count(DISTINCT page_url) as unique_pages, sum(CASE WHEN action = 'purchase' THEN 1 ELSE 0 END) as purchasesFROM session(user_activity, event_time, INTERVAL 5 MINUTE)PARTITION BY user_idGROUP BY user_id, window_start, window_end;
Windows extend as long as events arrive within gap threshold
New window starts after inactivity gap
Must use PARTITION BY to define session scope
Gap: 5sEvents: A B C D E FTime: 0s 2s 4s 12s 13s 15sSession1: [--------) (ends at 9s, 5s after event C)Session2: [----------) (ends at 20s, 5s after event F)
-- Track user sessions with 5-minute timeoutSELECT user_id, window_start as session_start, window_end as session_end, count() as actions, count(DISTINCT page_url) as pages_visited, date_diff('minute', window_start, window_end) as session_minutesFROM session(page_views, view_time, INTERVAL 5 MINUTE)PARTITION BY user_idGROUP BY user_id, window_start, window_endHAVING count() > 5; -- Filter short sessions
-- Compare 1-minute and 5-minute windowsWITH one_min AS ( SELECT device_id, window_start, avg(value) as avg_1min FROM tumble(metrics, timestamp, INTERVAL 1 MINUTE) GROUP BY device_id, window_start, window_end ), five_min AS ( SELECT device_id, window_start, avg(value) as avg_5min FROM tumble(metrics, timestamp, INTERVAL 5 MINUTE) GROUP BY device_id, window_start, window_end )SELECT one_min.device_id, one_min.window_start, one_min.avg_1min, five_min.avg_5min, abs(one_min.avg_1min - five_min.avg_5min) as varianceFROM one_minLEFT JOIN five_min ON one_min.device_id = five_min.device_id AND one_min.window_start >= five_min.window_start AND one_min.window_start < five_min.window_end;
-- Find top auctions by bid count per windowWITH auction_counts AS ( SELECT auction_id, count(*) as bid_count, window_start, window_end FROM hop(bids, bid_time, INTERVAL 2 SECOND, INTERVAL 10 SECOND) GROUP BY auction_id, window_start, window_end),max_counts AS ( SELECT max(bid_count) as max_bids, window_start, window_end FROM auction_counts GROUP BY window_start, window_end)SELECT ac.auction_id, ac.bid_count, ac.window_start, ac.window_endFROM auction_counts acINNER JOIN max_counts mc ON ac.window_start = mc.window_start AND ac.window_end = mc.window_endWHERE ac.bid_count >= mc.max_bids;
-- Too small: High overhead, many windowstumble(events, _tp_time, INTERVAL 1 SECOND)-- Too large: High latency, delayed insights tumble(events, _tp_time, INTERVAL 1 DAY)-- Balanced for real-time monitoringtumble(events, _tp_time, INTERVAL 1 MINUTE)
Always GROUP BY window columns
Include window_start and window_end in GROUP BY:
SELECT device_id, window_start, window_end, count() as cntFROM tumble(metrics, timestamp, INTERVAL 5 MINUTE)GROUP BY device_id, window_start, window_end;
Use event time, not ingestion time
When available, use event timestamp for accurate windowing:
-- Good: Use event timestamptumble(events, event_timestamp, INTERVAL 1 HOUR)-- May be inaccurate: Use ingestion timetumble(events, _tp_time, INTERVAL 1 HOUR)
Consider timezone for daily/hourly windows
Specify timezone for proper alignment:
-- Aligns to UTC hourstumble(events, timestamp, INTERVAL 1 HOUR)-- Aligns to New York hourstumble(events, timestamp, INTERVAL 1 HOUR, 'America/New_York')
Use EMIT STREAM for controlled updates
Control when window results are emitted:
SELECT window_start, count() as cntFROM tumble(events, _tp_time, INTERVAL 5 MINUTE)GROUP BY window_start, window_endEMIT STREAM PERIODIC INTERVAL 5 SECOND;