Tif1’s architecture is designed for performance and reliability. Data flows through multiple layers with caching, validation, and lazy loading to ensure fast access while maintaining data integrity.
# Creating a session doesn't load any datasession = tif1.get_session(2024, "Monaco", "Race") # Instant# Data is loaded on first accesslaps = session.laps # Triggers: check cache → fetch from CDN → processweather = session.weather # Separate data flow for weathertel = laps.iloc[0].telemetry # Separate data flow for telemetry
From core.py:3491-3547 - the laps property:
@propertydef laps(self) -> DataFrame: """Get all laps data for the session (auto-async for 4-5x faster loading).""" if self._laps is None: # Check in-memory cache cache_key = f"{self.year}_{self.gp}_{self.session}_laps" lap_cache = _get_backend_lap_cache(self.lib) if self.enable_cache else None if lap_cache is not None: cached_laps = lap_cache.get(cache_key) if cached_laps is not None: logger.info(f"Lap cache hit ({self.lib}): {cache_key}") self._laps = cached_laps return self._laps # Cache miss - load async logger.info(f"Loading laps async ({self.lib}): {cache_key}") laps_df = asyncio.run(self.laps_async()) self._laps = Laps(laps_df) self._laps.session = self # Store in cache if lap_cache is not None: lap_cache.set(cache_key, self._laps) return self._laps
Tif1 fetches data from TracingInsights GitHub repositories served through jsDelivr CDN:
# CDN URL structurehttps://cdn.jsdelivr.net/gh/tracinginsights/{year}@main/ {gp}/{session}/{path}# Example URLs:# Drivers data:https://cdn.jsdelivr.net/gh/tracinginsights/2024@main/ Monaco_Grand_Prix/Race/drivers.json# Lap times for VER:https://cdn.jsdelivr.net/gh/tracinginsights/2024@main/ Monaco_Grand_Prix/Race/VER/laptimes.json# Telemetry for VER lap 45:https://cdn.jsdelivr.net/gh/tracinginsights/2024@main/ Monaco_Grand_Prix/Race/VER/45_tel.json
# From core_utils/json_utils.pydef parse_response_json(response) -> dict: """Parse JSON with validation.""" import orjson # Fast JSON parser # Parse JSON (orjson is 2-3x faster than stdlib json) data = orjson.loads(response.content) # Validate structure if not isinstance(data, dict): raise InvalidDataError("Expected dict payload") return data
# From core.py:1167-1249def _apply_laps_dtypes(df: pd.DataFrame) -> pd.DataFrame: """Enforce dtype contract on pandas laps DataFrame.""" # Timedelta columns (lap times, sector times) for col in ('LapTime', 'Sector1Time', 'Sector2Time', 'Sector3Time'): if col in df.columns: df[col] = pd.to_timedelta(df[col], unit='s') # Float64 columns (lap number, position, speeds) for col in ('LapNumber', 'Position', 'SpeedI1', 'SpeedI2'): if col in df.columns: df[col] = pd.to_numeric(df[col], errors='coerce').astype('float64') # Categorical columns (driver, team, compound) - 50% memory reduction for col in ('Driver', 'Team', 'Compound'): if col in df.columns: df[col] = df[col].astype('category') return df
Type Optimization Benefits:
Categorical types: 50% memory reduction for repeated strings
# User codefastest_tels = session.get_fastest_laps_tels(by_driver=True)# Internal flow:# 1. Get fastest lap for each driver (from laps DataFrame)# 2. Check which telemetry is cached# 3. Build list of missing telemetry files# 4. Fetch ALL missing telemetry in parallel# - 20 drivers, 20 parallel requests# - Uses asyncio.gather() for concurrency# 5. Process all telemetry DataFrames# 6. Concatenate into single DataFrame# 7. Store each in cache# 8. Return combined DataFrame
# Typical cache hit rates in a workflowimport tif1# First run (cold start)session1 = tif1.get_session(2024, "Monaco", "Race")laps1 = session1.laps # 400ms - CDN fetch# Second run (same process)laps2 = session1.laps # <1ms - memory cache hit# Third run (different process, same machine)session2 = tif1.get_session(2024, "Monaco", "Race")laps3 = session2.laps # 10ms - SQLite cache hit
import os# Set request timeout (seconds)os.environ['TIF1_TIMEOUT'] = '60'# Set max retriesos.environ['TIF1_MAX_RETRIES'] = '5'# Set retry backoff factoros.environ['TIF1_RETRY_BACKOFF_FACTOR'] = '1.5'