Documentation Index
Fetch the complete documentation index at: https://mintlify.com/MilesONerd/neurenix/llms.txt
Use this file to discover all available pages before exploring further.
Overview
Neurenix provides FPGA (Field-Programmable Gate Array) support for specialized hardware acceleration of AI workloads. FPGAs offer:
- Customizable hardware - Program logic gates for specific operations
- Low latency - Deterministic execution with minimal overhead
- Power efficiency - Optimized power consumption for inference
- Flexibility - Reconfigurable for different models and workloads
Supported Frameworks
- OpenCL - Industry-standard for heterogeneous computing
- Xilinx Vitis - High-level synthesis and optimization for Xilinx FPGAs
- Intel OpenVINO - Inference acceleration on Intel FPGAs
Supported Hardware
Xilinx FPGAs
- Alveo U250, U280, U50, U55C
- Versal AI Engine series
- Kria KV260, KR260 (edge)
Intel FPGAs
- Arria 10 GX, SX
- Stratix 10 DX, GX, SX
- Agilex F-Series
Installation
OpenCL Framework
# Install OpenCL runtime
sudo apt-get install ocl-icd-opencl-dev
# Install vendor-specific runtime (Xilinx example)
wget https://www.xilinx.com/bin/public/openDownload?filename=xrt_installer.deb
sudo apt install ./xrt_installer.deb
# Install Neurenix with FPGA support
pip install neurenix-fpga
Xilinx Vitis
# Install Vitis (requires license)
wget https://www.xilinx.com/support/download.html
# Follow Xilinx installation instructions
# Source Vitis environment
source /tools/Xilinx/Vitis/2023.2/settings64.sh
# Install Neurenix with Vitis support
export NEURENIX_WITH_VITIS=1
pip install -e .
Intel OpenVINO
# Install OpenVINO
wget https://storage.openvinotoolkit.org/repositories/openvino/packages/2023.2/linux/l_openvino_toolkit_ubuntu22_2023.2.0.tar.gz
tar -xf l_openvino_toolkit_ubuntu22_2023.2.0.tar.gz
cd l_openvino_toolkit_ubuntu22_2023.2.0
sudo ./install_openvino_dependencies.sh
# Source OpenVINO environment
source /opt/intel/openvino_2023/setupvars.sh
# Install Neurenix with OpenVINO support
pip install neurenix-openvino
FPGA Management
Initialize FPGA
import neurenix as nx
from neurenix.hardware import FPGAManager
# Initialize with OpenCL
fpga = FPGAManager(framework='opencl')
fpga.initialize()
# Get FPGA information
info = fpga.get_fpga_info()
print(f"Framework: {info['framework']}")
print(f"Device: {info['name']}")
print(f"Memory: {info['global_memory']}")
print(f"Compute units: {info['compute_units']}")
// C++ FPGA initialization
#include "hardware/fpga.h"
using namespace phynexus::hardware;
FPGAConfig config;
config.framework = FPGAFramework::OpenCL;
config.device_id = 0;
config.platform_id = 0;
config.num_compute_units = 4;
if (FPGABackend::initialize(config)) {
auto device_info = FPGABackend::get_device_info();
std::cout << "FPGA Device: " << device_info["name"] << std::endl;
}
Device Selection
# List available FPGAs
fpga_count = fpga.get_fpga_count()
print(f"Available FPGAs: {fpga_count}")
# Initialize specific device
fpga = FPGAManager(framework='opencl', device_id=0)
fpga.initialize()
OpenCL Framework
Overview
OpenCL provides portable programming for FPGAs:
from neurenix.hardware import OpenCLManager
# Initialize OpenCL
opencl = OpenCLManager(device_id=0, platform_id=0)
opencl.initialize()
# Get platform info
platforms = opencl.get_platforms()
for i, platform in enumerate(platforms):
print(f"Platform {i}: {platform['name']}")
print(f" Vendor: {platform['vendor']}")
print(f" Version: {platform['version']}")
# Get device info
devices = opencl.get_devices(platform_id=0)
for i, device in enumerate(devices):
print(f"Device {i}: {device['name']}")
print(f" Type: {device['type']}")
print(f" Memory: {device['global_memory'] / (1024**3):.2f} GB")
Custom Kernels
# Define OpenCL kernel
kernel_source = """
__kernel void vector_add(
__global const float* a,
__global const float* b,
__global float* c,
const int n
) {
int gid = get_global_id(0);
if (gid < n) {
c[gid] = a[gid] + b[gid];
}
}
"""
# Create kernel
kernel = opencl.create_kernel('vector_add', kernel_source)
# Execute kernel
import numpy as np
a = np.random.randn(1024).astype(np.float32)
b = np.random.randn(1024).astype(np.float32)
c = np.zeros(1024, dtype=np.float32)
opencl.execute_kernel(
kernel,
global_size=(1024,),
local_size=(64,),
args=[a, b, c, 1024]
)
// C++ OpenCL kernel execution
const char* kernel_source = R"(
__kernel void vector_add(
__global const float* a,
__global const float* b,
__global float* c,
const int n
) {
int gid = get_global_id(0);
if (gid < n) {
c[gid] = a[gid] + b[gid];
}
}
)";
void* bitstream = FPGABackend::load_bitstream("kernel.aocx");
void* kernel = FPGABackend::create_kernel(bitstream, "vector_add");
std::vector<void*> args = {a_buffer, b_buffer, c_buffer, &n};
std::vector<size_t> global_size = {1024};
std::vector<size_t> local_size = {64};
FPGABackend::execute_kernel(kernel, args, global_size, local_size);
Xilinx Vitis Framework
Overview
Vitis provides high-level synthesis and optimization for Xilinx FPGAs:
from neurenix.hardware import VitisManager
# Initialize Vitis
vitis = VitisManager(
device_id=0,
target_device='u250',
xclbin_path='model.xclbin'
)
vitis.initialize()
Load Bitstream
# Load XCLBIN (Xilinx binary)
vitis.load_xclbin('model.xclbin')
# Get available kernels
kernels = vitis.get_kernels()
print(f"Available kernels: {kernels}")
Compile Model
from neurenix import nn
# Define model
model = nn.Sequential(
nn.Conv2d(3, 64, kernel_size=3, padding=1),
nn.ReLU(),
nn.MaxPool2d(2),
nn.Conv2d(64, 128, kernel_size=3, padding=1),
nn.ReLU(),
nn.AdaptiveAvgPool2d(1)
)
# Compile for Vitis
input_shape = (1, 3, 224, 224)
compiled_model = vitis.compile_model(model, input_shape)
# Execute on FPGA
input_tensor = nx.randn(1, 3, 224, 224)
output = vitis.execute_model(compiled_model, input_tensor)
Vitis HLS
// Vitis HLS kernel example
#include <hls_stream.h>
#include <ap_fixed.h>
extern "C" {
void conv2d_kernel(
const float* input,
const float* weights,
float* output,
int height,
int width,
int channels
) {
#pragma HLS INTERFACE m_axi port=input offset=slave bundle=gmem0
#pragma HLS INTERFACE m_axi port=weights offset=slave bundle=gmem1
#pragma HLS INTERFACE m_axi port=output offset=slave bundle=gmem2
#pragma HLS INTERFACE s_axilite port=height
#pragma HLS INTERFACE s_axilite port=width
#pragma HLS INTERFACE s_axilite port=channels
#pragma HLS INTERFACE s_axilite port=return
// Kernel implementation with optimizations
for (int c = 0; c < channels; c++) {
#pragma HLS PIPELINE II=1
for (int h = 0; h < height; h++) {
for (int w = 0; w < width; w++) {
// Convolution operation
}
}
}
}
}
Intel OpenVINO Framework
Overview
OpenVINO optimizes inference on Intel FPGAs:
from neurenix.hardware import OpenVINOManager
# Initialize OpenVINO
openvino = OpenVINOManager(
device_id=0,
precision='FP16',
optimize_for='throughput'
)
openvino.initialize()
Convert and Optimize Model
import neurenix as nx
# Load PyTorch model
model = nx.load('model.pt')
# Convert to OpenVINO IR
from neurenix.openvino import convert_to_ir
ir_model = convert_to_ir(
model,
input_shape=(1, 3, 224, 224),
output_path='model.xml'
)
# Load IR model for FPGA
fpga_model = openvino.load_model('model.xml', 'model.bin')
Run Inference
# Get input/output info
input_info = openvino.get_input_info(fpga_model)
output_info = openvino.get_output_info(fpga_model)
print(f"Input shape: {input_info['shape']}")
print(f"Output shape: {output_info['shape']}")
# Run inference
input_tensor = nx.randn(1, 3, 224, 224)
result = openvino.infer(fpga_model, {'input': input_tensor})
output = result['output']
Memory Management
Allocate FPGA Memory
# Allocate buffer on FPGA
buffer = fpga.allocate_memory(size=1024*1024, memory_bank=0)
# Copy data to FPGA
data = np.random.randn(1024, 1024).astype(np.float32)
fpga.copy_to_fpga(buffer, data)
# Copy data from FPGA
result = np.zeros((1024, 1024), dtype=np.float32)
fpga.copy_from_fpga(result, buffer)
# Free memory
fpga.free_memory(buffer)
// C++ FPGA memory management
void* buffer = FPGABackend::allocate_memory(1024 * 1024 * sizeof(float), 0);
// Copy to FPGA
float* host_data = new float[1024 * 1024];
FPGABackend::copy_to_fpga(buffer, host_data, 1024 * 1024 * sizeof(float));
// Copy from FPGA
float* result = new float[1024 * 1024];
FPGABackend::copy_from_fpga(result, buffer, 1024 * 1024 * sizeof(float));
// Free memory
FPGABackend::free_memory(buffer);
Memory Banks
# Allocate on different memory banks for parallel access
buffer0 = fpga.allocate_memory(size=1024*1024, memory_bank=0)
buffer1 = fpga.allocate_memory(size=1024*1024, memory_bank=1)
# Kernel can access both banks simultaneously
fpga.execute_kernel(kernel, args=[buffer0, buffer1, output])
Profiling
# Enable profiling
fpga = FPGAManager(framework='opencl')
fpga.config.enable_profiling = True
fpga.initialize()
# Run operations
output = fpga.execute_model(model, input)
# Get profiling results
profile_data = fpga.get_profile_data()
print(f"Kernel execution time: {profile_data['kernel_time_ms']} ms")
print(f"Data transfer time: {profile_data['transfer_time_ms']} ms")
Optimization Techniques
# Enable optimizations
fpga.config.enable_optimization = True
fpga.config.num_compute_units = 4 # Parallel execution units
fpga.config.enable_memory_bank_mapping = True
# Configure memory bank mapping
fpga.config.memory_bank_mapping = {
'input': 0,
'weights': 1,
'output': 2
}
fpga.initialize()
Batch Processing
# Process multiple inputs in parallel
batch_size = 8
inputs = [nx.randn(1, 3, 224, 224) for _ in range(batch_size)]
# Execute batch on FPGA
outputs = fpga.batch_execute(model, inputs)
Model Deployment
Export for FPGA
# Optimize model for FPGA deployment
from neurenix.fpga import optimize_for_fpga
optimized_model = optimize_for_fpga(
model,
input_shape=(1, 3, 224, 224),
target='xilinx_u250',
precision='int8',
optimization_level=3
)
# Export bitstream
optimized_model.export('model.xclbin')
Quantization
from neurenix.quantization import quantize_for_fpga
# Quantize model for FPGA
quantized_model = quantize_for_fpga(
model,
calibration_data=calibration_loader,
quantization_scheme='int8',
target_fpga='intel_arria10'
)
Environment Variables
# OpenCL settings
export XILINX_XRT=/opt/xilinx/xrt
export LD_LIBRARY_PATH=$XILINX_XRT/lib:$LD_LIBRARY_PATH
# Vitis settings
export VITIS_PATH=/tools/Xilinx/Vitis/2023.2
source $VITIS_PATH/settings64.sh
# OpenVINO settings
export INTEL_OPENVINO_DIR=/opt/intel/openvino_2023
source $INTEL_OPENVINO_DIR/setupvars.sh
# FPGA device selection
export NEURENIX_FPGA_DEVICE=0
See Also