The Tracing feature collects detailed information during agent execution and sends it to configured message processors for logging, file storage, or remote analysis. This is essential for debugging, evaluation, and understanding agent behavior.
What is Tracing?
Tracing captures comprehensive data about:
LLM interactions : All prompts sent and responses received
Tool execution : Tool calls, arguments, and results
Graph navigation : Node visits and execution flow
Agent lifecycle : Start, finish, and error events
Strategy execution : Strategy-level events and results
Streaming : Frame-by-frame streaming data
Installation
import ai.koog.agents.features.tracing.feature.Tracing
import ai.koog.agents.features.tracing.writer.TraceFeatureMessageLogWriter
import ai.koog.agents.features.tracing.writer.TraceFeatureMessageFileWriter
val agent = AIAgent (
executor = myExecutor,
strategy = myStrategy
) {
install (Tracing) {
// Log to console
addMessageProcessor ( TraceFeatureMessageLogWriter (logger))
// Write to file
val fileWriter = TraceFeatureMessageFileWriter (
outputFile = Path ( "traces/agent-trace.jsonl" ),
sinkProvider = { path -> SystemFileSystem. sink (path). buffered () }
)
addMessageProcessor (fileWriter)
}
}
Message Processors
Tracing uses message processors to handle captured events:
Log Writer
Write trace events to logs:
import io.github.oshai.kotlinlogging.KotlinLogging
val logger = KotlinLogging. logger {}
addMessageProcessor ( TraceFeatureMessageLogWriter (logger))
File Writer
Write trace events to a file in JSONL format:
import okio.Path.Companion.toPath
import okio.FileSystem
val fileWriter = TraceFeatureMessageFileWriter (
outputFile = "traces/run- ${ runId } .jsonl" . toPath (),
sinkProvider = { path ->
FileSystem.SYSTEM. sink (path). buffered ()
}
)
addMessageProcessor (fileWriter)
Remote Writer
Send trace events to a remote endpoint:
import ai.koog.agents.features.tracing.writer.TraceFeatureMessageRemoteWriter
import io.ktor.client.HttpClient
val remoteWriter = TraceFeatureMessageRemoteWriter (
httpClient = HttpClient (),
endpoint = "https://analytics.example.com/traces"
)
addMessageProcessor (remoteWriter)
Custom Processor
Implement your own message processor:
import ai.koog.agents.core.feature.message.FeatureMessageProcessor
import ai.koog.agents.core.feature.message.FeatureMessage
class MyTraceProcessor : FeatureMessageProcessor {
override suspend fun processMessage (message: FeatureMessage ) {
when (message) {
is LLMCallStartingEvent -> {
// Handle LLM call events
println ( "LLM called with prompt: ${ message.prompt } " )
}
is ToolCallCompletedEvent -> {
// Handle tool events
database.saveTool Call (message)
}
// Handle other event types...
}
}
}
addMessageProcessor ( MyTraceProcessor ())
Filtering Events
Filter which events are processed:
val fileWriter = TraceFeatureMessageFileWriter (outputFile, sinkProvider)
// Only trace LLM and tool events
fileWriter. setMessageFilter { message ->
message is LLMCallStartingEvent ||
message is LLMCallCompletedEvent ||
message is ToolCallStartingEvent ||
message is ToolCallCompletedEvent
}
addMessageProcessor (fileWriter)
Trace Event Types
Agent Events
// Agent starting
AgentStartingEvent (
agentId: String ,
runId: String ,
timestamp: Long
)
// Agent completed
AgentCompletedEvent (
agentId: String ,
runId: String ,
result: String ?,
timestamp: Long
)
// Agent failed
AgentExecutionFailedEvent (
agentId: String ,
runId: String ,
error: AgentError ,
timestamp: Long
)
// Agent closing
AgentClosingEvent (
agentId: String ,
timestamp: Long
)
Strategy Events
// Strategy starting
GraphStrategyStartingEvent (
runId: String ,
strategyName: String ,
graph: Graph , // Complete strategy graph structure
timestamp: Long
)
// Strategy completed
StrategyCompletedEvent (
runId: String ,
strategyName: String ,
result: String ?,
timestamp: Long
)
Node Events
// Node execution starting
NodeExecutionStartingEvent (
runId: String ,
nodeName: String ,
input: JsonElement ,
timestamp: Long
)
// Node execution completed
NodeExecutionCompletedEvent (
runId: String ,
nodeName: String ,
input: JsonElement ,
output: JsonElement ,
timestamp: Long
)
// Node execution failed
NodeExecutionFailedEvent (
runId: String ,
nodeName: String ,
input: JsonElement ,
error: AgentError ,
timestamp: Long
)
Subgraph Events
// Subgraph starting
SubgraphExecutionStartingEvent (
runId: String ,
subgraphName: String ,
input: JsonElement ,
timestamp: Long
)
// Subgraph completed
SubgraphExecutionCompletedEvent (
runId: String ,
subgraphName: String ,
input: JsonElement ,
output: JsonElement ,
timestamp: Long
)
// Subgraph failed
SubgraphExecutionFailedEvent (
runId: String ,
subgraphName: String ,
input: JsonElement ,
error: AgentError ,
timestamp: Long
)
LLM Events
// LLM call starting
LLMCallStartingEvent (
runId: String ,
prompt: Prompt ,
model: ModelInfo ,
tools: List < String >,
timestamp: Long
)
// LLM call completed
LLMCallCompletedEvent (
runId: String ,
prompt: Prompt ,
model: ModelInfo ,
responses: List < Message . Response >,
moderationResponse: ModerationResponse ?,
timestamp: Long
)
Streaming Events
// Streaming starting
LLMStreamingStartingEvent (
runId: String ,
prompt: Prompt ,
model: ModelInfo ,
tools: List < String >,
timestamp: Long
)
// Frame received
LLMStreamingFrameReceivedEvent (
runId: String ,
prompt: Prompt ,
model: ModelInfo ,
frame: StreamFrame ,
timestamp: Long
)
// Streaming completed
LLMStreamingCompletedEvent (
runId: String ,
prompt: Prompt ,
model: ModelInfo ,
tools: List < String >,
timestamp: Long
)
// Streaming failed
LLMStreamingFailedEvent (
runId: String ,
prompt: Prompt ,
model: ModelInfo ,
error: AgentError ,
timestamp: Long
)
// Tool call starting
ToolCallStartingEvent (
runId: String ,
toolCallId: String ,
toolName: String ,
toolArgs: String ,
timestamp: Long
)
// Tool validation failed
ToolValidationFailedEvent (
runId: String ,
toolCallId: String ,
toolName: String ,
toolArgs: String ,
toolDescription: String ,
message: String ,
error: String ?,
timestamp: Long
)
// Tool call failed
ToolCallFailedEvent (
runId: String ,
toolCallId: String ,
toolName: String ,
toolArgs: String ,
toolDescription: String ,
error: String ,
timestamp: Long
)
// Tool call completed
ToolCallCompletedEvent (
runId: String ,
toolCallId: String ,
toolName: String ,
toolArgs: String ,
toolDescription: String ,
result: String ,
timestamp: Long
)
Example Trace Output
Example of events in a trace file:
{ "eventType" : "AgentStartingEvent" , "agentId" : "agent-123" , "runId" : "run-456" , "timestamp" : 1678901234567 }
{ "eventType" : "GraphStrategyStartingEvent" , "runId" : "run-456" , "strategyName" : "code-analyzer" , "graph" :{ ... }}
{ "eventType" : "NodeExecutionStartingEvent" , "runId" : "run-456" , "nodeName" : "analyzeCode" , "input" : "fun main() {...}" }
{ "eventType" : "LLMCallStartingEvent" , "runId" : "run-456" , "model" :{ "id" : "gpt-4o" }, "prompt" :{ ... }}
{ "eventType" : "LLMCallCompletedEvent" , "runId" : "run-456" , "responses" :[{ "content" : "Analysis: ..." }]}
{ "eventType" : "NodeExecutionCompletedEvent" , "runId" : "run-456" , "nodeName" : "analyzeCode" , "output" : "Analysis: ..." }
{ "eventType" : "StrategyCompletedEvent" , "runId" : "run-456" , "result" : "Success" }
{ "eventType" : "AgentCompletedEvent" , "agentId" : "agent-123" , "runId" : "run-456" , "result" : "Analysis: ..." }
Use Cases
Debugging Agent Execution
install (Tracing) {
val debugLogger = KotlinLogging. logger { }
addMessageProcessor ( TraceFeatureMessageLogWriter (debugLogger))
// Only log errors and failures
addMessageProcessor ( object : FeatureMessageProcessor {
override suspend fun processMessage (message: FeatureMessage ) {
when (message) {
is NodeExecutionFailedEvent -> {
debugLogger. error { "Node ${ message.nodeName } failed: ${ message.error } " }
}
is ToolCallFailedEvent -> {
debugLogger. error { "Tool ${ message.toolName } failed: ${ message.error } " }
}
is AgentExecutionFailedEvent -> {
debugLogger. error { "Agent failed: ${ message.error } " }
}
}
}
})
}
class PerformanceAnalyzer : FeatureMessageProcessor {
private val nodeTimings = mutableMapOf < String , Long >()
private val nodeStarts = mutableMapOf < String , Long >()
override suspend fun processMessage (message: FeatureMessage ) {
when (message) {
is NodeExecutionStartingEvent -> {
nodeStarts[message.nodeName] = message.timestamp
}
is NodeExecutionCompletedEvent -> {
val start = nodeStarts[message.nodeName] ?: return
val duration = message.timestamp - start
nodeTimings[message.nodeName] = duration
}
is AgentCompletedEvent -> {
println ( " \n Performance Report:" )
nodeTimings. forEach { (name, duration) ->
println ( " $name : ${ duration } ms" )
}
}
}
}
}
install (Tracing) {
addMessageProcessor ( PerformanceAnalyzer ())
}
Execution Replay
Save traces for later replay:
// Save during execution
install (Tracing) {
addMessageProcessor (
TraceFeatureMessageFileWriter (
outputFile = Path ( "traces/execution- ${ runId } .jsonl" ),
sinkProvider = { path -> FileSystem.SYSTEM. sink (path). buffered () }
)
)
}
// Later: replay the execution
val traces = File ( "traces/execution-123.jsonl" ). readLines ()
traces. forEach { line ->
val event = Json. decodeFromString < FeatureMessage >(line)
replayEvent (event)
}
Complete Example
import ai.koog.agents.core.dsl.graphStrategy
import ai.koog.agents.features.tracing.feature.Tracing
import ai.koog.agents.features.tracing.writer. *
import io.github.oshai.kotlinlogging.KotlinLogging
val logger = KotlinLogging. logger {}
val runId = "run- ${ System. currentTimeMillis () } "
val agent = AIAgent (
executor = openAIExecutor,
llmModel = OpenAIModels.Chat.GPT4o,
strategy = graphStrategy {
val analyze by node < String , String > { code ->
requestLLM ( "Analyze this code: $code " )
}
edges {
start goesTo analyze
analyze goesTo finish
}
}
) {
install (Tracing) {
// Console logging
addMessageProcessor ( TraceFeatureMessageLogWriter (logger))
// File storage
val fileWriter = TraceFeatureMessageFileWriter (
outputFile = Path ( "traces/ $runId .jsonl" ),
sinkProvider = { FileSystem.SYSTEM. sink (it). buffered () }
)
// Filter: only save LLM and tool events to file
fileWriter. setMessageFilter { message ->
message is LLMCallStartingEvent ||
message is LLMCallCompletedEvent ||
message is ToolCallStartingEvent ||
message is ToolCallCompletedEvent
}
addMessageProcessor (fileWriter)
// Custom analytics
addMessageProcessor ( object : FeatureMessageProcessor {
override suspend fun processMessage (message: FeatureMessage ) {
when (message) {
is LLMCallCompletedEvent -> {
analyticsService. trackLLMCall (
model = message.model.id,
tokenCount = message.responses. sumOf { it.content.length }
)
}
}
}
})
}
}
val result = agent. run ( "fun main() { println( \" Hello \" ) }" )
Best Practices
Use filtering in production
Don’t trace everything in production. Filter to only capture important events to reduce overhead and storage.
Separate trace files by run
Use unique file names per run (include runId or timestamp) to avoid overwriting traces.
Set up log rotation for trace files to prevent disk space issues.
Use tracing for detailed debugging and metrics for high-level monitoring.
Filter or redact sensitive information from traces before writing to storage.
Performance : Comprehensive tracing can generate large volumes of data and impact performance. Use filtering and sampling appropriately.
Event Handlers Lightweight event hooks for custom logic
OpenTelemetry Industry-standard distributed tracing and observability