def execute_function(name, arguments): """Execute the actual function""" if name == "get_weather": # Call weather API return { "location": arguments["location"], "temperature": 22, "unit": arguments.get("unit", "celsius"), "conditions": "sunny" } return {"error": "Unknown function"}conversation = [{ "role": "user", "content": "What's the weather in Tokyo and Paris?"}]while True: messages = json.dumps(conversation) result = json.loads(cactus_complete(model, messages, None, tools, None)) # No function calls - we're done if not result["function_calls"]: print(f"Assistant: {result['response']}") break # Execute each function call for call in result["function_calls"]: print(f"Calling {call['name']}({call['arguments']})...") # Execute function function_result = execute_function(call["name"], call["arguments"]) # Add function result to conversation conversation.append({ "role": "function", "name": call["name"], "content": json.dumps(function_result) })
Some models can return multiple function calls simultaneously:
result = json.loads(cactus_complete(model, messages, None, tools, None))if len(result["function_calls"]) > 1: print("Executing functions in parallel...") import concurrent.futures with concurrent.futures.ThreadPoolExecutor() as executor: futures = [ executor.submit(execute_function, call["name"], call["arguments"]) for call in result["function_calls"] ] results = [future.result() for future in futures] for call, result in zip(result["function_calls"], results): conversation.append({ "role": "function", "name": call["name"], "content": json.dumps(result) })
For advanced use cases, manually set tool constraints:
# In C++model->set_tool_constraints({"function1", "function2"});// During generation, only these tools can be calleduint32_t token = model->decode(tokens, temperature, top_p, top_k);// Clear constraintsmodel->clear_tool_constraints();