Documentation Index
Fetch the complete documentation index at: https://mintlify.com/TracingInsights/tif1/llms.txt
Use this file to discover all available pages before exploring further.
Overview
The tif1 library supports two DataFrame backends:
- Pandas (default): Widely used, excellent compatibility, rich ecosystem
- Polars: High performance, 2-4x faster for large datasets, modern API
Both backends provide the same API, making it easy to switch between them.
Why Choose Polars?
Polars offers significant performance advantages:
- 2-4x faster data loading and processing
- Lower memory usage through efficient internal representation
- Better parallelization using Rust-based execution engine
- Lazy evaluation for query optimization
- Modern API with method chaining
Setting the Backend
Per-Session
Specify backend when loading a session:
import tif1
# Use pandas (default)
session_pandas = tif1.get_session(
2025,
"Abu Dhabi Grand Prix",
"Practice 1",
lib="pandas"
)
# Use polars
session_polars = tif1.get_session(
2025,
"Abu Dhabi Grand Prix",
"Practice 1",
lib="polars"
)
print(f"Pandas session: {session_pandas.lib}")
print(f"Polars session: {session_polars.lib}")
Global Configuration
Set default backend in .tif1rc file:
Or via environment variable:
export TIF1_LIB=polars
python your_script.py
Runtime Configuration
from tif1.config import get_config
config = get_config()
config.set("lib", "polars")
import tif1
# All sessions will now use polars by default
session = tif1.get_session(2025, "Monaco Grand Prix", "Race")
print(f"Backend: {session.lib}")
Compare backends with the same data:
import tif1
import time
# Clear cache for fair comparison
cache = tif1.get_cache()
cache.clear()
print("BACKEND PERFORMANCE COMPARISON")
print("=" * 60)
# Test with Polars
print("\n1. POLARS BACKEND")
start = time.time()
session_polars = tif1.get_session(
2025,
"Abu Dhabi Grand Prix",
"Practice 1",
lib="polars"
)
laps_polars = session_polars.laps
polars_time = time.time() - start
print(f" Load time: {polars_time:.2f}s")
print(f" Total laps: {len(laps_polars)}")
print(f" DataFrame type: {type(laps_polars).__name__}")
# Clear cache again
cache.clear()
# Test with Pandas
print("\n2. PANDAS BACKEND")
start = time.time()
session_pandas = tif1.get_session(
2025,
"Abu Dhabi Grand Prix",
"Practice 1",
lib="pandas"
)
laps_pandas = session_pandas.laps
pandas_time = time.time() - start
print(f" Load time: {pandas_time:.2f}s")
print(f" Total laps: {len(laps_pandas)}")
print(f" DataFrame type: {type(laps_pandas).__name__}")
# Performance comparison
print("\n3. COMPARISON")
if polars_time > 0:
speedup = pandas_time / polars_time
print(f" Speedup: {speedup:.1f}x faster with Polars")
Typical results:
1. POLARS BACKEND
Load time: 1.23s
Total laps: 342
DataFrame type: DataFrame
2. PANDAS BACKEND
Load time: 2.87s
Total laps: 342
DataFrame type: DataFrame
3. COMPARISON
Speedup: 2.3x faster with Polars
Working with Polars
Basic Operations
Polars DataFrames work similarly to pandas:
import tif1
session = tif1.get_session(2025, "Monaco Grand Prix", "Race", lib="polars")
laps = session.laps
# Shape and columns
print(f"Shape: {laps.shape}")
print(f"Columns: {laps.columns}")
# Head and tail
print(laps.head(10))
print(laps.tail(5))
# Select columns
subset = laps.select(["Driver", "LapTime", "Compound"])
# Filter rows
fast_laps = laps.filter(laps["LapTime"] < 90.0)
ver_laps = laps.filter(laps["Driver"] == "VER")
Polars-Specific Operations
Take advantage of Polars’ powerful API:
import polars as pl
# Group by and aggregate
fastest_by_driver = (
laps.group_by("Driver")
.agg([
pl.col("LapTime").min().alias("fastest_time"),
pl.col("LapNumber").count().alias("lap_count"),
pl.col("Compound").first().alias("tire_compound")
])
.sort("fastest_time")
)
print(fastest_by_driver.head(10))
# Method chaining
analysis = (
laps
.filter(pl.col("LapTime").is_not_null())
.group_by(["Driver", "Stint"])
.agg([
pl.col("LapTime").mean().alias("avg_time"),
pl.col("LapTime").min().alias("best_time"),
pl.col("TyreLife").max().alias("stint_length")
])
.sort(["Driver", "Stint"])
)
print(analysis)
Lazy Evaluation
Use lazy evaluation for complex queries:
import polars as pl
# Convert to lazy frame
lazy_laps = laps.lazy()
# Build query
query = (
lazy_laps
.filter(pl.col("TrackStatus") == 1) # Green flag only
.filter(pl.col("LapTime").is_not_null())
.group_by("Driver")
.agg([
pl.col("LapTime").min().alias("fastest"),
pl.col("LapTime").mean().alias("average")
])
.sort("fastest")
)
# Execute query (optimized by Polars)
result = query.collect()
print(result)
Working with Pandas
Basic Operations
import tif1
session = tif1.get_session(2025, "Monaco Grand Prix", "Race", lib="pandas")
laps = session.laps
# Shape and columns
print(f"Shape: {laps.shape}")
print(f"Columns: {laps.columns}")
# Head and tail
print(laps.head(10))
print(laps.tail(5))
# Select columns
subset = laps[["Driver", "LapTime", "Compound"]]
# Filter rows
fast_laps = laps[laps["LapTime"] < 90.0]
ver_laps = laps[laps["Driver"] == "VER"]
Pandas-Specific Operations
import pandas as pd
# Group by and aggregate
fastest_by_driver = (
laps.groupby("Driver", observed=True)
.agg({
"LapTime": "min",
"LapNumber": "count",
"Compound": "first"
})
.rename(columns={
"LapTime": "fastest_time",
"LapNumber": "lap_count",
"Compound": "tire_compound"
})
.sort_values("fastest_time")
)
print(fastest_by_driver.head(10))
# Complex filtering
green_flag_laps = laps[
(laps["TrackStatus"] == 1) &
(laps["LapTime"].notna())
]
# Pivot tables
pivot = pd.pivot_table(
laps,
values="LapTime",
index="Driver",
columns="Stint",
aggfunc="min"
)
print(pivot)
Converting Between Backends
Polars to Pandas
import tif1
# Load with Polars
session = tif1.get_session(2025, "Monaco Grand Prix", "Race", lib="polars")
laps_polars = session.laps
print(f"Polars type: {type(laps_polars).__name__}")
# Convert to pandas
laps_pandas = laps_polars.to_pandas()
print(f"Pandas type: {type(laps_pandas).__name__}")
print(f"Shape: {laps_pandas.shape}")
Pandas to Polars
import polars as pl
import tif1
# Load with Pandas
session = tif1.get_session(2025, "Monaco Grand Prix", "Race", lib="pandas")
laps_pandas = session.laps
print(f"Pandas type: {type(laps_pandas).__name__}")
# Convert to polars
laps_polars = pl.from_pandas(laps_pandas)
print(f"Polars type: {type(laps_polars).__name__}")
print(f"Shape: {laps_polars.shape}")
Backend-Agnostic Code
Write code that works with both backends:
import tif1
def analyze_session(year, event, session_type, lib="pandas"):
"""Analyze session with any backend."""
session = tif1.get_session(year, event, session_type, lib=lib)
laps = session.laps
# These operations work with both backends
total_laps = len(laps)
drivers = laps["Driver"].unique()
print(f"Backend: {lib}")
print(f"Total laps: {total_laps}")
print(f"Drivers: {len(drivers)}")
# Backend-specific aggregation
if lib == "polars":
import polars as pl
fastest = (
laps.group_by("Driver")
.agg(pl.col("LapTime").min().alias("fastest"))
.sort("fastest")
)
else: # pandas
fastest = (
laps.groupby("Driver", observed=True)["LapTime"]
.min()
.sort_values()
.to_frame("fastest")
)
print("\nFastest laps:")
print(fastest.head(5))
return fastest
# Test with both backends
print("=" * 60)
analyze_session(2025, "Monaco Grand Prix", "Race", lib="pandas")
print("\n" + "=" * 60)
analyze_session(2025, "Monaco Grand Prix", "Race", lib="polars")
When to Use Each Backend
Use Pandas When:
- You need maximum compatibility with existing pandas code
- You’re using libraries that require pandas DataFrames (matplotlib, seaborn, etc.)
- You’re working with small datasets (<10,000 rows)
- You need pandas-specific features (MultiIndex, etc.)
- You’re teaching or learning (pandas has more tutorials)
Use Polars When:
- You need maximum performance
- You’re working with large datasets (>10,000 rows)
- You’re doing complex aggregations or transformations
- Memory usage is a concern
- You want modern, consistent API design
- You’re starting a new project
Complete Comparison Example
import tif1
import time
def benchmark_backend(lib):
"""Benchmark a backend."""
print(f"\nTesting {lib.upper()} backend:")
print("-" * 60)
# Clear cache
cache = tif1.get_cache()
cache.clear()
# Load session
start = time.time()
session = tif1.get_session(
2025,
"Abu Dhabi Grand Prix",
"Practice 1",
lib=lib
)
laps = session.laps
load_time = time.time() - start
print(f"Load time: {load_time:.2f}s")
print(f"Laps loaded: {len(laps)}")
print(f"DataFrame type: {type(laps).__module__}.{type(laps).__name__}")
# Benchmark aggregation
start = time.time()
if lib == "polars":
import polars as pl
result = (
laps.group_by("Driver")
.agg([
pl.col("LapTime").min().alias("fastest"),
pl.col("LapTime").mean().alias("average"),
pl.col("LapNumber").count().alias("count")
])
.sort("fastest")
)
else:
result = (
laps.groupby("Driver", observed=True)
.agg({
"LapTime": ["min", "mean", "count"]
})
)
agg_time = time.time() - start
print(f"Aggregation time: {agg_time:.3f}s")
return load_time, agg_time
print("BACKEND COMPARISON BENCHMARK")
print("=" * 60)
pandas_load, pandas_agg = benchmark_backend("pandas")
polars_load, polars_agg = benchmark_backend("polars")
print("\n" + "=" * 60)
print("RESULTS")
print("=" * 60)
print(f"Load time speedup: {pandas_load / polars_load:.2f}x")
print(f"Aggregation speedup: {pandas_agg / polars_agg:.2f}x")
Next Steps