- Primary Objective
- Secondary Objectives
- Combined Loss
Tweet Click PredictionThe primary label is whether the user clicked on a tweet:
Powered by Mintlify
Auto-generate your docs
Legacy TensorFlow-based machine learning framework used for training light ranker models in X’s recommendation pipeline
Documentation Index
Fetch the complete documentation index at: https://mintlify.com/twitter/the-algorithm/llms.txt
Use this file to discover all available pages before exploring further.
src/python/twitter/deepbird/projects/timelines/scripts/models/earlybird/The light ranker is a critical component that pre-filters candidates from the search index before heavy ranking.DataRecordTrainer class contains the core training logic for TWML models:
# Simplified example from train.py
class DataRecordTrainer:
"""Core trainer for TWML models using DataRecord format"""
def __init__(self, params):
self.params = params
self.model = self._build_model()
self.optimizer = self._build_optimizer()
def _build_model(self):
"""Constructs the TensorFlow graph for the model"""
# Define input features from DataRecord
features = self._parse_data_record()
# Build model layers
hidden = tf.layers.dense(
features,
units=self.params.hidden_size,
activation=tf.nn.relu
)
# Output layer for ranking score
logits = tf.layers.dense(hidden, units=1)
return logits
def train(self, train_data, eval_data):
"""Training loop for the model"""
for epoch in range(self.params.num_epochs):
# Training step
loss = self._train_epoch(train_data)
# Evaluation
metrics = self._evaluate(eval_data)
# Checkpoint saving
self._save_checkpoint(epoch, metrics)
Data Collection
Feature Engineering
# Example features for light ranker
features = {
# User features
'user_followers_count': user.followers,
'user_reputation': user.tweepcred_score,
# Tweet features
'tweet_age_seconds': now - tweet.created_at,
'tweet_has_media': int(tweet.has_media),
'tweet_has_url': int(tweet.has_url),
# Engagement features
'author_engagement_rate': author.avg_engagement,
'tweet_early_engagement': tweet.engagement_1hr,
}
Model Training
# Training command (simplified)
python train.py \
--train_data_path=/path/to/train/data \
--eval_data_path=/path/to/eval/data \
--model_dir=/path/to/model/output \
--num_epochs=10 \
--learning_rate=0.001
# DataRecord structure (conceptual)
class DataRecord:
"""Container for training examples"""
# Sparse binary features (feature ID present/absent)
binary_features: Set[int]
# Sparse continuous features (feature ID -> value)
continuous_features: Dict[int, float]
# Dense features (fixed-size vector)
dense_features: List[float]
# Labels for supervised learning
labels: Dict[str, float]
# Train light ranker with TWML
trainer = DataRecordTrainer(params)
model = trainer.train(train_data, eval_data)
model.export_for_serving()
# Simplified light ranker architecture
def build_light_ranker(features, params):
"""
Lightweight neural network for fast candidate scoring
"""
# Input layer - sparse and dense features
sparse_features = embed_sparse_features(
features.binary_features,
embedding_size=params.embedding_size
)
dense_features = features.dense_features
# Concatenate all features
combined = tf.concat([
sparse_features,
dense_features
], axis=1)
# Hidden layers (typically 2-3 layers)
hidden1 = tf.layers.dense(
combined,
units=256,
activation=tf.nn.relu
)
hidden2 = tf.layers.dense(
hidden1,
units=128,
activation=tf.nn.relu
)
# Output layer - ranking score
logits = tf.layers.dense(hidden2, units=1)
return logits
# Binary classification loss
click_loss = tf.losses.sigmoid_cross_entropy(
labels=labels['click'],
logits=model_output
)
# Regression loss for video watch time
watch_time_loss = tf.losses.mean_squared_error(
labels=labels['video_watch_time'],
predictions=model_output
)
# Weighted combination
total_loss = (
alpha * click_loss +
beta * watch_time_loss
)
Heavy Ranker
the-algorithm-ml/projects/home/recap/# Main training script
src/python/twitter/deepbird/projects/timelines/scripts/models/earlybird/train.py
# Model configuration
src/python/twitter/deepbird/projects/timelines/scripts/models/earlybird/README.md