Skip to main content

Documentation Index

Fetch the complete documentation index at: https://mintlify.com/autorope/donkeycar/llms.txt

Use this file to discover all available pages before exploring further.

Keras model parts use deep learning to predict steering and throttle from camera images and other sensor inputs.

Base Class: KerasPilot

All Keras models inherit from KerasPilot. Location: donkeycar/parts/keras.py:49
from donkeycar.parts.keras import KerasPilot
from donkeycar.parts.interpreter import KerasInterpreter

class MyPilot(KerasPilot):
    def __init__(self):
        super().__init__(
            interpreter=KerasInterpreter(),
            input_shape=(120, 160, 3)  # Height, Width, Channels
        )
    
    def create_model(self):
        # Define your model architecture
        pass
    
    def compile(self):
        # Compile with loss and optimizer
        pass
    
    def interpreter_to_output(self, interpreter_out):
        # Convert model output to (angle, throttle)
        return angle, throttle
Key Methods:
  • create_model() - Define neural network architecture
  • compile() - Set optimizer, loss, and metrics
  • run(img_arr, *other_arr) - Inference during driving
  • train() - Train the model
  • load(model_path) - Load saved model
  • interpreter_to_output() - Convert raw output to control values

Model Types

KerasCategorical

Discretizes steering and throttle into bins using categorical cross-entropy. Location: donkeycar/parts/keras.py:256
from donkeycar.parts.keras import KerasCategorical

model = KerasCategorical(
    input_shape=(120, 160, 3),
    throttle_range=0.5  # Throttle range for binning
)
Architecture (default_categorical):
def default_categorical(input_shape=(120, 160, 3)):
    from tensorflow.keras.layers import Input, Conv2D, Dropout, Flatten, Dense
    from tensorflow.keras.models import Model
    
    img_in = Input(shape=input_shape, name='img_in')
    
    x = img_in
    x = Conv2D(24, (5, 5), strides=(2, 2), activation='relu', name='conv1')(x)
    x = Conv2D(32, (5, 5), strides=(2, 2), activation='relu', name='conv2')(x)
    x = Conv2D(64, (5, 5), strides=(2, 2), activation='relu', name='conv3')(x)
    x = Conv2D(64, (3, 3), strides=(1, 1), activation='relu', name='conv4')(x)
    x = Conv2D(64, (3, 3), strides=(1, 1), activation='relu', name='conv5')(x)
    
    x = Flatten(name='flattened')(x)
    x = Dense(100, activation='relu', name='dense1')(x)
    x = Dropout(0.1)(x)
    x = Dense(50, activation='relu', name='dense2')(x)
    x = Dropout(0.1)(x)
    
    # Categorical outputs (15 bins each)
    angle_out = Dense(15, activation='softmax', name='angle_out')(x)
    throttle_out = Dense(15, activation='softmax', name='throttle_out')(x)
    
    model = Model(inputs=[img_in], outputs=[angle_out, throttle_out])
    return model
Features:
  • Converts continuous values to discrete bins
  • Better gradient flow for categorical data
  • Provides confidence distribution over choices
  • Default: 15 bins for steering, 15 for throttle
Usage:
# Training
model = KerasCategorical()
model.create_model()
model.train(
    model_path='./models/my_categorical.h5',
    train_data=train_dataset,
    train_steps=100,
    batch_size=32,
    validation_data=val_dataset,
    validation_steps=20,
    epochs=10
)

# Inference
model.load('./models/my_categorical.h5')
angle, throttle = model.run(img_array)

KerasLinear

Direct regression to continuous steering/throttle values. Location: donkeycar/parts/keras.py (similar to KerasCategorical)
from donkeycar.parts.keras import KerasLinear

model = KerasLinear(
    input_shape=(120, 160, 3),
    num_outputs=2  # angle, throttle
)
Architecture (default_n_linear):
def default_n_linear(input_shape=(120, 160, 3), num_outputs=2):
    from tensorflow.keras.layers import Input, Conv2D, Dropout, Flatten, Dense
    from tensorflow.keras.models import Model
    
    img_in = Input(shape=input_shape, name='img_in')
    
    x = img_in
    x = Conv2D(24, (5, 5), strides=(2, 2), activation='relu')(x)
    x = Conv2D(32, (5, 5), strides=(2, 2), activation='relu')(x)
    x = Conv2D(64, (5, 5), strides=(2, 2), activation='relu')(x)
    x = Conv2D(64, (3, 3), strides=(1, 1), activation='relu')(x)
    x = Conv2D(64, (3, 3), strides=(1, 1), activation='relu')(x)
    
    x = Flatten()(x)
    x = Dense(100, activation='relu')(x)
    x = Dropout(0.1)(x)
    x = Dense(50, activation='relu')(x)
    x = Dropout(0.1)(x)
    
    outputs = Dense(num_outputs, activation='linear', name='outputs')(x)
    
    model = Model(inputs=[img_in], outputs=[outputs])
    return model
Features:
  • Direct regression to continuous values
  • Simpler output interpretation
  • MSE or MAE loss functions
  • Good for smooth control

KerasIMU

Multi-input model using camera + IMU data. Location: donkeycar/parts/keras.py
from donkeycar.parts.keras import KerasIMU

model = KerasIMU(
    input_shape=(120, 160, 3),
    num_imu_inputs=6  # accel_x, accel_y, accel_z, gyro_x, gyro_y, gyro_z
)
Architecture:
def default_imu_model(img_shape=(120, 160, 3), num_imu=6, num_outputs=2):
    from tensorflow.keras.layers import Input, Conv2D, Flatten, Dense, concatenate
    from tensorflow.keras.models import Model
    
    # Image input branch
    img_in = Input(shape=img_shape, name='img_in')
    x = Conv2D(24, (5, 5), strides=(2, 2), activation='relu')(img_in)
    x = Conv2D(32, (5, 5), strides=(2, 2), activation='relu')(x)
    x = Flatten()(x)
    
    # IMU input branch
    imu_in = Input(shape=(num_imu,), name='imu_in')
    y = Dense(64, activation='relu')(imu_in)
    y = Dense(32, activation='relu')(y)
    
    # Merge branches
    merged = concatenate([x, y])
    z = Dense(100, activation='relu')(merged)
    z = Dense(50, activation='relu')(z)
    
    outputs = Dense(num_outputs, activation='linear')(z)
    
    model = Model(inputs=[img_in, imu_in], outputs=[outputs])
    return model
Usage:
# Add to vehicle
V.add(model,
      inputs=['cam/image_array', 
              'imu/accel_x', 'imu/accel_y', 'imu/accel_z',
              'imu/gyro_x', 'imu/gyro_y', 'imu/gyro_z'],
      outputs=['pilot/angle', 'pilot/throttle'])

KerasLSTM / KerasRNN

Recurrent models that consider temporal sequences. Location: donkeycar/parts/keras.py
from donkeycar.parts.keras import KerasLSTM

model = KerasLSTM(
    input_shape=(120, 160, 3),
    seq_length=3  # Number of frames to consider
)
Architecture:
def default_lstm(input_shape=(120, 160, 3), seq_length=3):
    from tensorflow.keras.layers import Input, Conv2D, Flatten, LSTM, Dense
    from tensorflow.keras.layers import TimeDistributed as TD
    from tensorflow.keras.models import Model
    
    # Input is a sequence of images
    img_seq_in = Input(shape=(seq_length, *input_shape), name='img_in')
    
    # TimeDistributed applies Conv layers to each frame
    x = TD(Conv2D(24, (5, 5), strides=(2, 2), activation='relu'))(img_seq_in)
    x = TD(Conv2D(32, (5, 5), strides=(2, 2), activation='relu'))(x)
    x = TD(Flatten())(x)
    
    # LSTM processes the sequence
    x = LSTM(128, return_sequences=False)(x)
    x = Dense(50, activation='relu')(x)
    
    outputs = Dense(2, activation='linear')(x)
    
    model = Model(inputs=[img_seq_in], outputs=[outputs])
    return model
Features:
  • Considers multiple frames (temporal context)
  • Better for handling motion blur, occlusions
  • Requires sequence buffering
  • Higher computational cost

Model Training

Training Configuration

In myconfig.py:
# Model type
DEFAULT_MODEL_TYPE = 'linear'  # Options: linear, categorical, lstm, imu

# Training parameters
BATCH_SIZE = 128
TRAIN_TEST_SPLIT = 0.8
EPOCHS = 10
VERBOSE_TRAIN = 1

# Early stopping
MIN_DELTA = 0.0005
PATIENCE = 5

# Optimizer
LEARNING_RATE = 0.001
LEARNING_RATE_DECAY = 0.0
OPTIMIZER = 'adam'  # Options: adam, sgd, rmsprop

# Image processing
IMAGE_W = 160
IMAGE_H = 120
IMAGE_DEPTH = 3

# Augmentation
AUG_MULTIPLY_FACTOR = 1.0
AUG_BRIGHTNESS_RANGE = (0.5, 1.5)
AUG_CROP_PERCENT_RANGE = (0, 0.2)
AUG_BLUR_RANGE = (0.0, 1.1)

Training Command

donkey train --tub ./data --model ./models/my_model.h5
Training Script Example:
from donkeycar.parts.keras import KerasLinear
from donkeycar.pipeline.training import train

# Create model
model = KerasLinear()
model.create_model()

# Train
history = train(
    cfg=cfg,
    tub_paths=['./data/tub_1', './data/tub_2'],
    model=model,
    model_type='linear',
    transfer_model=None,
    comment='my_first_model'
)

Model Inference

Loading Models

from donkeycar.parts.keras import KerasLinear

# Create model instance
model = KerasLinear()

# Load weights
model.load('./models/my_model.h5')

# Or for TFLite
from donkeycar.parts.interpreter import TfLiteInterpreter
model = KerasLinear(interpreter=TfLiteInterpreter())
model.load('./models/my_model.tflite')

Using in Vehicle

# In manage.py
from donkeycar.parts.keras import get_model_by_type

# Load model
kl = get_model_by_type(cfg.DEFAULT_MODEL_TYPE, cfg=cfg)
kl.load(cfg.MODEL_PATH)

# Add to vehicle
V.add(kl,
      inputs=['cam/image_array'],
      outputs=['pilot/angle', 'pilot/throttle'],
      run_condition='run_pilot')

Model Architectures

Available Architectures

Location: donkeycar/parts/keras.py
  • default_categorical - Standard categorical model
  • default_n_linear - Standard linear regression
  • default_imu - Multi-input with IMU
  • default_lstm - LSTM for sequences
  • default_3d_conv - 3D convolutions for video
  • default_latent - Variational autoencoder

Custom Architecture

from tensorflow.keras.layers import Input, Conv2D, Flatten, Dense
from tensorflow.keras.models import Model

def my_custom_model(input_shape=(120, 160, 3)):
    img_in = Input(shape=input_shape, name='img_in')
    
    # Custom architecture
    x = Conv2D(16, (3, 3), activation='relu')(img_in)
    x = Conv2D(32, (3, 3), activation='relu')(x)
    x = Flatten()(x)
    x = Dense(64, activation='relu')(x)
    
    # Outputs
    angle = Dense(1, activation='linear', name='angle')(x)
    throttle = Dense(1, activation='linear', name='throttle')(x)
    
    model = Model(inputs=[img_in], outputs=[angle, throttle])
    return model

# Use custom model
from donkeycar.parts.keras import KerasLinear

class MyCustomPilot(KerasLinear):
    def create_model(self):
        return my_custom_model(self.input_shape)

Model Export

TensorFlow Lite

For edge deployment:
import tensorflow as tf

# Load Keras model
model = tf.keras.models.load_model('./models/my_model.h5')

# Convert to TFLite
converter = tf.lite.TFLiteConverter.from_keras_model(model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
tflite_model = converter.convert()

# Save
with open('./models/my_model.tflite', 'wb') as f:
    f.write(tflite_model)

SavedModel Format

import tensorflow as tf

model = tf.keras.models.load_model('./models/my_model.h5')
model.save('./models/my_model_saved', save_format='tf')

Performance Optimization

Model Quantization

import tensorflow as tf

converter = tf.lite.TFLiteConverter.from_keras_model(model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.target_spec.supported_types = [tf.float16]
tflite_model = converter.convert()

Mixed Precision Training

from tensorflow.keras import mixed_precision

# Enable mixed precision
policy = mixed_precision.Policy('mixed_float16')
mixed_precision.set_global_policy(policy)

# Train as usual
model.compile(...)

Common Issues

Model Overfitting

  • Reduce model complexity
  • Add more dropout layers
  • Increase training data
  • Use data augmentation
  • Add L2 regularization

Poor Performance

  • Check training/validation loss curves
  • Verify data quality (clean bad examples)
  • Try different learning rates
  • Use transfer learning
  • Ensure proper train/test split

Memory Issues

  • Reduce batch size
  • Use model quantization
  • Convert to TFLite
  • Use gradient checkpointing

Next Steps

Build docs developers (and LLMs) love