Documentation Index
Fetch the complete documentation index at: https://mintlify.com/autorope/donkeycar/llms.txt
Use this file to discover all available pages before exploring further.
PyTorch model parts provide an alternative deep learning framework to Keras for building autonomous driving models.
Overview
Donkeycar supports PyTorch models through a modular architecture similar to Keras parts. PyTorch models can be used interchangeably with Keras models in your vehicle.
Location: donkeycar/parts/pytorch/
Getting Started
Installation
# Install PyTorch (CPU version)
pip install torch torchvision
# For NVIDIA Jetson (GPU)
pip install torch torchvision --extra-index-url https://developer.download.nvidia.com/compute/redist/jp/v461 tensorflow
# For Raspberry Pi (CPU optimized)
pip install torch torchvision --index-url https://download.pytorch.org/whl/cpu
Basic Usage
from donkeycar.parts.pytorch.torch_utils import get_model_by_type
# Create model
model = get_model_by_type(
model_type='resnet18',
cfg=cfg,
checkpoint_path='./models/my_model.pth'
)
# Add to vehicle
V.add(model,
inputs=['cam/image_array'],
outputs=['pilot/angle', 'pilot/throttle'],
run_condition='run_pilot')
Available Models
ResNet18
Transfer learning model based on ImageNet pre-trained ResNet18.
Location: donkeycar/parts/pytorch/ResNet18.py
from donkeycar.parts.pytorch.ResNet18 import ResNet18
model = ResNet18(
input_shape=(cfg.BATCH_SIZE, 3, 224, 224), # BatchSize, Channels, Height, Width
num_outputs=2 # angle, throttle
)
Architecture:
class ResNet18(nn.Module):
def __init__(self, input_shape, num_outputs=2):
super().__init__()
# Load pre-trained ResNet18
self.backbone = models.resnet18(pretrained=True)
# Replace final layer
num_features = self.backbone.fc.in_features
self.backbone.fc = nn.Sequential(
nn.Linear(num_features, 512),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(512, num_outputs)
)
def forward(self, x):
return self.backbone(x)
Features:
- Transfer Learning: Leverages ImageNet pre-trained weights
- Fixed Input Size: Requires 224x224x3 images
- High Accuracy: Better feature extraction than training from scratch
- GPU Recommended: Computationally intensive
Configuration:
In myconfig.py:
# PyTorch settings
DEFAULT_MODEL_TYPE = 'resnet18'
MODEL_PATH = './models/resnet18.pth'
# Image size (ResNet18 requires 224x224)
IMAGE_W = 224
IMAGE_H = 224
IMAGE_DEPTH = 3
# Training
BATCH_SIZE = 16 # Adjust based on GPU memory
LEARNING_RATE = 0.0001
EPOCHS = 10
Model Utilities
get_model_by_type
Location: donkeycar/parts/pytorch/torch_utils.py:4
from donkeycar.parts.pytorch.torch_utils import get_model_by_type
model = get_model_by_type(
model_type='resnet18', # Model architecture
cfg=cfg, # Configuration object
checkpoint_path=None # Optional: path to saved weights
)
Supported Model Types:
'resnet18' - ResNet18 transfer learning model
- (Additional models can be added)
Training PyTorch Models
Training Script
Location: donkeycar/parts/pytorch/torch_train.py
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from donkeycar.parts.pytorch.torch_data import DonkeyDataset
from donkeycar.parts.pytorch.ResNet18 import ResNet18
# Load configuration
import donkeycar as dk
cfg = dk.load_config()
# Create datasets
train_dataset = DonkeyDataset(
tub_paths=['./data/tub_1', './data/tub_2'],
config=cfg,
mode='train'
)
val_dataset = DonkeyDataset(
tub_paths=['./data/tub_3'],
config=cfg,
mode='val'
)
# Create data loaders
train_loader = DataLoader(
train_dataset,
batch_size=cfg.BATCH_SIZE,
shuffle=True,
num_workers=4
)
val_loader = DataLoader(
val_dataset,
batch_size=cfg.BATCH_SIZE,
shuffle=False,
num_workers=4
)
# Create model
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = ResNet18(input_shape=(cfg.BATCH_SIZE, 3, 224, 224))
model = model.to(device)
# Loss and optimizer
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=cfg.LEARNING_RATE)
# Training loop
for epoch in range(cfg.EPOCHS):
model.train()
train_loss = 0.0
for images, labels in train_loader:
images = images.to(device)
labels = labels.to(device)
# Forward pass
outputs = model(images)
loss = criterion(outputs, labels)
# Backward pass
optimizer.zero_grad()
loss.backward()
optimizer.step()
train_loss += loss.item()
# Validation
model.eval()
val_loss = 0.0
with torch.no_grad():
for images, labels in val_loader:
images = images.to(device)
labels = labels.to(device)
outputs = model(images)
loss = criterion(outputs, labels)
val_loss += loss.item()
print(f'Epoch {epoch+1}/{cfg.EPOCHS}')
print(f'Train Loss: {train_loss/len(train_loader):.4f}')
print(f'Val Loss: {val_loss/len(val_loader):.4f}')
# Save checkpoint
torch.save({
'epoch': epoch,
'model_state_dict': model.state_dict(),
'optimizer_state_dict': optimizer.state_dict(),
'loss': train_loss,
}, f'./models/checkpoint_epoch_{epoch}.pth')
# Save final model
torch.save(model.state_dict(), './models/resnet18_final.pth')
Training Command
cd ~/mycar
python -m donkeycar.parts.pytorch.torch_train --tub ./data --model ./models/resnet18.pth
Dataset Handling
DonkeyDataset
Location: donkeycar/parts/pytorch/torch_data.py
PyTorch Dataset class for loading Donkeycar tub data.
from donkeycar.parts.pytorch.torch_data import DonkeyDataset
import torch
from torch.utils.data import DataLoader
# Create dataset
dataset = DonkeyDataset(
tub_paths=['./data/tub_1', './data/tub_2'],
config=cfg,
mode='train', # 'train' or 'val'
transform=None # Optional transforms
)
# Create data loader
loader = DataLoader(
dataset,
batch_size=32,
shuffle=True,
num_workers=4,
pin_memory=True # For GPU training
)
# Iterate
for images, labels in loader:
# images: (batch_size, channels, height, width)
# labels: (batch_size, 2) - angle, throttle
pass
Data Augmentation:
import torchvision.transforms as transforms
transform = transforms.Compose([
transforms.Resize((224, 224)),
transforms.RandomHorizontalFlip(p=0.5),
transforms.ColorJitter(
brightness=0.2,
contrast=0.2,
saturation=0.2,
hue=0.1
),
transforms.ToTensor(),
transforms.Normalize(
mean=[0.485, 0.456, 0.406], # ImageNet stats
std=[0.229, 0.224, 0.225]
)
])
dataset = DonkeyDataset(
tub_paths=['./data'],
config=cfg,
transform=transform
)
Model Inference
Loading Saved Models
import torch
from donkeycar.parts.pytorch.ResNet18 import ResNet18
# Create model
model = ResNet18(input_shape=(1, 3, 224, 224))
# Load weights
checkpoint = torch.load('./models/resnet18.pth')
model.load_state_dict(checkpoint['model_state_dict'])
# Set to evaluation mode
model.eval()
# Use GPU if available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)
Running Inference
import torch
import numpy as np
from PIL import Image
import torchvision.transforms as transforms
# Preprocessing
transform = transforms.Compose([
transforms.Resize((224, 224)),
transforms.ToTensor(),
transforms.Normalize(
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]
)
])
# Load and preprocess image
img = Image.open('test_image.jpg')
img_tensor = transform(img).unsqueeze(0) # Add batch dimension
img_tensor = img_tensor.to(device)
# Inference
with torch.no_grad():
output = model(img_tensor)
angle, throttle = output[0].cpu().numpy()
print(f"Predicted - Angle: {angle:.3f}, Throttle: {throttle:.3f}")
In Vehicle Loop
class PyTorchPilot:
def __init__(self, model_path, device='cpu'):
self.device = torch.device(device)
self.model = ResNet18(input_shape=(1, 3, 224, 224))
self.model.load_state_dict(torch.load(model_path))
self.model.to(self.device)
self.model.eval()
self.transform = transforms.Compose([
transforms.ToPILImage(),
transforms.Resize((224, 224)),
transforms.ToTensor(),
transforms.Normalize(
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]
)
])
def run(self, img_arr):
# Preprocess
img_tensor = self.transform(img_arr).unsqueeze(0)
img_tensor = img_tensor.to(self.device)
# Inference
with torch.no_grad():
output = self.model(img_tensor)
angle, throttle = output[0].cpu().numpy()
return angle, throttle
def shutdown(self):
pass
# In manage.py
pilot = PyTorchPilot('./models/resnet18.pth', device='cuda')
V.add(pilot,
inputs=['cam/image_array'],
outputs=['pilot/angle', 'pilot/throttle'],
run_condition='run_pilot')
Model Export
TorchScript (JIT Compilation)
import torch
from donkeycar.parts.pytorch.ResNet18 import ResNet18
# Load model
model = ResNet18(input_shape=(1, 3, 224, 224))
model.load_state_dict(torch.load('./models/resnet18.pth'))
model.eval()
# Create example input
example_input = torch.rand(1, 3, 224, 224)
# Trace and export
traced_model = torch.jit.trace(model, example_input)
traced_model.save('./models/resnet18_traced.pt')
# Load traced model
loaded_model = torch.jit.load('./models/resnet18_traced.pt')
ONNX Export
import torch
import torch.onnx
# Load model
model = ResNet18(input_shape=(1, 3, 224, 224))
model.load_state_dict(torch.load('./models/resnet18.pth'))
model.eval()
# Create example input
dummy_input = torch.randn(1, 3, 224, 224)
# Export to ONNX
torch.onnx.export(
model,
dummy_input,
'./models/resnet18.onnx',
export_params=True,
opset_version=11,
input_names=['input'],
output_names=['output'],
dynamic_axes={
'input': {0: 'batch_size'},
'output': {0: 'batch_size'}
}
)
Configuration
In myconfig.py:
# PyTorch Model
DEFAULT_MODEL_TYPE = 'resnet18'
MODEL_PATH = './models/resnet18.pth'
# Device
USE_CUDA = True # Use GPU if available
DEVICE = 'cuda' if USE_CUDA and torch.cuda.is_available() else 'cpu'
# Training
BATCH_SIZE = 16
LEARNING_RATE = 0.0001
WEIGHT_DECAY = 1e-5
EPOCHS = 10
# Image preprocessing (ResNet18 requirements)
IMAGE_W = 224
IMAGE_H = 224
IMAGE_DEPTH = 3
# Data augmentation
AUG_FLIP_HORIZONTAL = True
AUG_BRIGHTNESS = 0.2
AUG_CONTRAST = 0.2
AUG_SATURATION = 0.2
# Training/validation split
TRAIN_TEST_SPLIT = 0.8
Creating Custom PyTorch Models
import torch
import torch.nn as nn
class CustomPilot(nn.Module):
def __init__(self, input_shape=(1, 3, 120, 160)):
super().__init__()
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 24, kernel_size=5, stride=2),
nn.ReLU(),
nn.Conv2d(24, 32, kernel_size=5, stride=2),
nn.ReLU(),
nn.Conv2d(32, 64, kernel_size=3, stride=1),
nn.ReLU(),
nn.Flatten()
)
# Calculate flattened size
with torch.no_grad():
dummy_input = torch.zeros(input_shape)
conv_output_size = self.conv_layers(dummy_input).shape[1]
self.fc_layers = nn.Sequential(
nn.Linear(conv_output_size, 100),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(100, 50),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(50, 2) # angle, throttle
)
def forward(self, x):
x = self.conv_layers(x)
x = self.fc_layers(x)
return x
# Usage
model = CustomPilot(input_shape=(1, 3, 120, 160))
print(model)
Mixed Precision Training
from torch.cuda.amp import autocast, GradScaler
scaler = GradScaler()
for images, labels in train_loader:
images = images.to(device)
labels = labels.to(device)
optimizer.zero_grad()
# Mixed precision
with autocast():
outputs = model(images)
loss = criterion(outputs, labels)
# Scaled backpropagation
scaler.scale(loss).backward()
scaler.step(optimizer)
scaler.update()
Model Pruning
import torch.nn.utils.prune as prune
# Prune 20% of weights
for name, module in model.named_modules():
if isinstance(module, nn.Conv2d) or isinstance(module, nn.Linear):
prune.l1_unstructured(module, name='weight', amount=0.2)
prune.remove(module, 'weight') # Make pruning permanent
Common Issues
CUDA Out of Memory
- Reduce batch size
- Use gradient accumulation
- Enable mixed precision training
- Clear cache:
torch.cuda.empty_cache()
Model Not Learning
- Check learning rate (try 1e-4 to 1e-3)
- Verify data preprocessing
- Check for label errors
- Monitor gradients for vanishing/exploding
Inference Too Slow
- Convert to TorchScript
- Use GPU if available
- Batch multiple frames
- Optimize image preprocessing
Next Steps