Documentation Index
Fetch the complete documentation index at: https://mintlify.com/microsoft/onnxruntime/llms.txt
Use this file to discover all available pages before exploring further.
Session Management
The OrtSession object represents a loaded ONNX model and provides inference capabilities.
Creating Sessions
CreateSession
OrtStatus* (*CreateSession)(const OrtEnv* env,
const ORTCHAR_T* model_path,
const OrtSessionOptions* options,
OrtSession** out);
Create a session from a model file.
Parameters:
env: Environment object
model_path: Path to the ONNX model file (wchar_t* on Windows, char* elsewhere)
options: Session options (configuration)
out: Newly created session (must be freed with ReleaseSession)
Returns: NULL on success, error status otherwise
Example:
OrtEnv* env;
OrtSessionOptions* session_options;
OrtSession* session;
OrtStatus* status = api->CreateEnv(ORT_LOGGING_LEVEL_WARNING, "test", &env);
status = api->CreateSessionOptions(&session_options);
status = api->CreateSession(env, model_path, session_options, &session);
if (status != NULL) {
const char* msg = api->GetErrorMessage(status);
printf("Error: %s\n", msg);
api->ReleaseStatus(status);
}
CreateSessionFromArray
OrtStatus* (*CreateSessionFromArray)(const OrtEnv* env,
const void* model_data,
size_t model_data_length,
const OrtSessionOptions* options,
OrtSession** out);
Create a session from an in-memory model.
Parameters:
env: Environment object
model_data: Pointer to model data in memory
model_data_length: Size of model data in bytes
options: Session options
out: Newly created session
Example:
const void* model_data = /* loaded model bytes */;
size_t model_size = /* size in bytes */;
OrtSession* session;
OrtStatus* status = api->CreateSessionFromArray(
env, model_data, model_size, session_options, &session);
CreateSessionWithPrepackedWeightsContainer
OrtStatus* (*CreateSessionWithPrepackedWeightsContainer)(
const OrtEnv* env,
const ORTCHAR_T* model_path,
const OrtSessionOptions* options,
OrtPrepackedWeightsContainer* prepacked_weights_container,
OrtSession** out);
Create a session that shares pre-packed weights between multiple sessions for memory efficiency.
Parameters:
env: Environment object
model_path: Path to model file
options: Session options
prepacked_weights_container: Container for sharing pre-packed weights
out: Newly created session
Example:
OrtPrepackedWeightsContainer* prepacked_container;
api->CreatePrepackedWeightsContainer(&prepacked_container);
OrtSession* session1;
api->CreateSessionWithPrepackedWeightsContainer(
env, model_path, options, prepacked_container, &session1);
OrtSession* session2; // Shares weights with session1
api->CreateSessionWithPrepackedWeightsContainer(
env, model_path, options, prepacked_container, &session2);
ReleaseSession
void (*ReleaseSession)(OrtSession* session);
Free a session object.
Parameters:
session: Session to free (can be NULL)
Warning: Do not call from DllMain on Windows as the session owns a thread pool.
OrtStatus* (*SessionGetInputCount)(const OrtSession* session, size_t* out);
Get the number of inputs required by the model.
Parameters:
session: Session to query
out: Number of inputs
Example:
size_t num_inputs;
api->SessionGetInputCount(session, &num_inputs);
printf("Model has %zu inputs\n", num_inputs);
SessionGetOutputCount
OrtStatus* (*SessionGetOutputCount)(const OrtSession* session, size_t* out);
Get the number of outputs produced by the model.
Parameters:
session: Session to query
out: Number of outputs
OrtStatus* (*SessionGetInputName)(const OrtSession* session,
size_t index,
OrtAllocator* allocator,
char** value);
Get the name of an input.
Parameters:
session: Session to query
index: Input index (0 to num_inputs - 1)
allocator: Allocator to use for the returned string
value: UTF-8 null-terminated input name (must be freed using allocator)
Example:
OrtAllocator* allocator;
api->GetAllocatorWithDefaultOptions(&allocator);
for (size_t i = 0; i < num_inputs; i++) {
char* input_name;
api->SessionGetInputName(session, i, allocator, &input_name);
printf("Input %zu: %s\n", i, input_name);
allocator->Free(allocator, input_name);
}
SessionGetOutputName
OrtStatus* (*SessionGetOutputName)(const OrtSession* session,
size_t index,
OrtAllocator* allocator,
char** value);
Get the name of an output.
Parameters:
session: Session to query
index: Output index (0 to num_outputs - 1)
allocator: Allocator to use for the returned string
value: UTF-8 null-terminated output name (must be freed using allocator)
OrtStatus* (*SessionGetInputTypeInfo)(const OrtSession* session,
size_t index,
OrtTypeInfo** type_info);
Get type information for an input.
Parameters:
session: Session to query
index: Input index
type_info: Type information (must be freed with ReleaseTypeInfo)
Example:
OrtTypeInfo* type_info;
api->SessionGetInputTypeInfo(session, 0, &type_info);
const OrtTensorTypeAndShapeInfo* tensor_info;
api->CastTypeInfoToTensorInfo(type_info, &tensor_info);
ONNXTensorElementDataType elem_type;
api->GetTensorElementType(tensor_info, &elem_type);
size_t num_dims;
api->GetDimensionsCount(tensor_info, &num_dims);
int64_t dims[num_dims];
api->GetDimensions(tensor_info, dims, num_dims);
api->ReleaseTypeInfo(type_info);
SessionGetOutputTypeInfo
OrtStatus* (*SessionGetOutputTypeInfo)(const OrtSession* session,
size_t index,
OrtTypeInfo** type_info);
Get type information for an output.
SessionGetOverridableInitializerCount
OrtStatus* (*SessionGetOverridableInitializerCount)(
const OrtSession* session,
size_t* out);
Get the count of overridable initializers in the model.
Parameters:
session: Session to query
out: Number of overridable initializers
Running Inference
Run
OrtStatus* (*Run)(OrtSession* session,
const OrtRunOptions* run_options,
const char* const* input_names,
const OrtValue* const* inputs,
size_t input_len,
const char* const* output_names,
size_t output_names_len,
OrtValue** outputs);
Run the model with the given inputs.
Parameters:
session: Session to run
run_options: Run options (can be NULL for defaults)
input_names: Array of null-terminated UTF-8 input names
inputs: Array of input OrtValue objects
input_len: Number of inputs
output_names: Array of null-terminated UTF-8 output names
output_names_len: Number of outputs
outputs: Array to receive output OrtValue objects (can be pre-allocated or NULL)
Returns: NULL on success, error status otherwise
Example:
// Prepare inputs
const char* input_names[] = {"input"};
OrtValue* input_tensor = /* create input tensor */;
const OrtValue* inputs[] = {input_tensor};
// Prepare outputs
const char* output_names[] = {"output"};
OrtValue* output_tensor = NULL;
// Run inference
OrtStatus* status = api->Run(
session,
NULL, // use default run options
input_names,
inputs,
1, // number of inputs
output_names,
1, // number of outputs
&output_tensor
);
if (status == NULL) {
// Process output_tensor
float* output_data;
api->GetTensorMutableData(output_tensor, (void**)&output_data);
// Use output_data...
api->ReleaseValue(output_tensor);
}
RunAsync (Callback-based)
For asynchronous inference, use the callback-based API:
typedef void (*RunAsyncCallbackFn)(
void* user_data,
OrtValue** outputs,
size_t num_outputs,
OrtStatusPtr status);
OrtStatus* (*RunAsync)(OrtSession* session,
const OrtRunOptions* run_options,
const char* const* input_names,
const OrtValue* const* inputs,
size_t input_len,
const char* const* output_names,
size_t output_names_len,
RunAsyncCallbackFn run_async_callback,
void* user_data);
Run Options
CreateRunOptions
OrtStatus* (*CreateRunOptions)(OrtRunOptions** out);
Create run options for per-run configuration.
Parameters:
out: Newly created run options (must be freed with ReleaseRunOptions)
RunOptionsSetRunLogVerbosityLevel
OrtStatus* (*RunOptionsSetRunLogVerbosityLevel)(
OrtRunOptions* options,
int log_verbosity_level);
Set per-run log verbosity level.
RunOptionsSetRunLogSeverityLevel
OrtStatus* (*RunOptionsSetRunLogSeverityLevel)(
OrtRunOptions* options,
int log_severity_level);
Set per-run log severity level.
RunOptionsSetRunTag
OrtStatus* (*RunOptionsSetRunTag)(OrtRunOptions* options,
const char* run_tag);
Set a tag for the run (used in logging).
RunOptionsSetTerminate
OrtStatus* (*RunOptionsSetTerminate)(OrtRunOptions* options);
Set a flag to terminate a currently executing session from another thread.
RunOptionsUnsetTerminate
OrtStatus* (*RunOptionsUnsetTerminate)(OrtRunOptions* options);
Clear the terminate flag so the run options can be reused.
AddRunConfigEntry
OrtStatus* (*AddRunConfigEntry)(OrtRunOptions* options,
const char* config_key,
const char* config_value);
Add a run configuration entry.
See onnxruntime_run_options_config_keys.h for valid keys.
ReleaseRunOptions
void (*ReleaseRunOptions)(OrtRunOptions* options);
Free run options.
IO Binding (Advanced)
IO Binding allows binding pre-allocated memory for inputs and outputs to avoid copies.
CreateIoBinding
OrtStatus* (*CreateIoBinding)(OrtSession* session, OrtIoBinding** out);
Create an IO binding for a session.
Parameters:
session: Session to create binding for
out: Newly created IO binding (must be freed with ReleaseIoBinding)
OrtStatus* (*BindInput)(OrtIoBinding* binding_ptr,
const char* name,
const OrtValue* val_ptr);
Bind an input value.
Parameters:
binding_ptr: IO binding
name: Input name
val_ptr: Input tensor value
BindOutput
OrtStatus* (*BindOutput)(OrtIoBinding* binding_ptr,
const char* name,
const OrtValue* val_ptr);
Bind an output value.
BindOutputToDevice
OrtStatus* (*BindOutputToDevice)(OrtIoBinding* binding_ptr,
const char* name,
const OrtMemoryInfo* mem_info_ptr);
Bind an output to a device for dynamic shapes.
Parameters:
binding_ptr: IO binding
name: Output name
mem_info_ptr: Memory location for output allocation
RunWithBinding
OrtStatus* (*RunWithBinding)(OrtSession* session,
const OrtRunOptions* run_options,
const OrtIoBinding* binding_ptr);
Run inference using IO binding.
Example:
OrtIoBinding* io_binding;
api->CreateIoBinding(session, &io_binding);
// Bind inputs
api->BindInput(io_binding, "input", input_tensor);
// Bind outputs (can use pre-allocated memory on GPU)
api->BindOutput(io_binding, "output", output_tensor);
// Or bind to device for dynamic shapes
api->BindOutputToDevice(io_binding, "output", gpu_mem_info);
// Run
api->RunWithBinding(session, NULL, io_binding);
// Get outputs
size_t output_count;
OrtValue** outputs;
api->GetBoundOutputValues(io_binding, allocator, &outputs, &output_count);
api->ReleaseIoBinding(io_binding);
void (*ClearBoundInputs)(OrtIoBinding* binding_ptr);
Clear all bound inputs.
ClearBoundOutputs
void (*ClearBoundOutputs)(OrtIoBinding* binding_ptr);
Clear all bound outputs.
ReleaseIoBinding
void (*ReleaseIoBinding)(OrtIoBinding* binding);
Free an IO binding.
OrtStatus* (*SessionGetModelMetadata)(const OrtSession* session,
OrtModelMetadata** out);
Get model metadata.
Parameters:
session: Session to query
out: Model metadata (must be freed with ReleaseModelMetadata)
OrtStatus* (*ModelMetadataGetProducerName)(
const OrtModelMetadata* model_metadata,
OrtAllocator* allocator,
char** value);
Get the producer name from model metadata.
OrtStatus* (*ModelMetadataGetGraphName)(
const OrtModelMetadata* model_metadata,
OrtAllocator* allocator,
char** value);
Get the graph name.
OrtStatus* (*ModelMetadataGetVersion)(
const OrtModelMetadata* model_metadata,
int64_t* value);
Get the model version number.
OrtStatus* (*ModelMetadataLookupCustomMetadataMap)(
const OrtModelMetadata* model_metadata,
OrtAllocator* allocator,
const char* key,
char** value);
Lookup a value in the custom metadata map.
Parameters:
model_metadata: Metadata object
allocator: Allocator for returned string
key: Metadata key to lookup
value: Retrieved value (NULL if key not found, must be freed with allocator)
void (*ReleaseModelMetadata)(OrtModelMetadata* metadata);
Free model metadata.
Profiling
SessionEndProfiling
OrtStatus* (*SessionEndProfiling)(OrtSession* session,
OrtAllocator* allocator,
char** out);
End profiling and get the profile data filename.
Parameters:
session: Session being profiled
allocator: Allocator for returned string
out: Filename where profile data was written (must be freed with allocator)
Note: Profiling must be enabled via EnableProfiling in session options.
SessionGetProfilingStartTimeNs
OrtStatus* (*SessionGetProfilingStartTimeNs)(
const OrtSession* session,
uint64_t* out);
Get the profiling start time in nanoseconds.
See Also