Use this file to discover all available pages before exploring further.
Streaming lets your application display or process model output progressively — useful for chat interfaces, long-form generation, and real-time feedback.
Use ai.generateStream() instead of ai.generate(). It returns an object with two properties:
stream — an async iterable of GenerateResponseChunk objects.
response — a Promise<GenerateResponse> that resolves when generation is complete.
TypeScript
Python
Go
import { genkit } from 'genkit';import { googleAI } from '@genkit-ai/google-genai';const ai = genkit({ plugins: [googleAI()], model: 'googleai/gemini-2.0-flash' });const { stream, response } = ai.generateStream({ prompt: 'Write a short story about a robot learning to paint.',});// Consume chunks as they arrivefor await (const chunk of stream) { process.stdout.write(chunk.text);}// Await the final response for metadata (usage, finish reason, etc.)const finalResponse = await response;console.log('\n\nFinish reason:', finalResponse.finishReason);console.log('Total tokens:', finalResponse.usage.totalTokens);
import asynciofrom genkit import Genkitfrom genkit.plugins.google_genai import GoogleAIai = Genkit(plugins=[GoogleAI()])async def main(): async for chunk in ai.generate_stream( prompt='Write a short story about a robot learning to paint.' ): print(chunk.text, end='', flush=True)asyncio.run(main())
package mainimport ( "context" "fmt" "log" "github.com/firebase/genkit/go/ai" "github.com/firebase/genkit/go/genkit" "github.com/firebase/genkit/go/plugins/googlegenai")func main() { ctx := context.Background() g := genkit.Init(ctx, genkit.WithPlugins(&googlegenai.GoogleAI{}), genkit.WithDefaultModel("googleai/gemini-2.0-flash"), ) streamCh, err := genkit.GenerateStream(ctx, g, ai.WithPrompt("Write a short story about a robot learning to paint."), ) if err != nil { log.Fatal(err) } for result := range streamCh { if result.Err != nil { log.Fatal(result.Err) } if result.Done { fmt.Printf("\n\nFinish reason: %s\n", result.Output.FinishReason) } else { fmt.Print(result.Stream.Text()) } }}
Each chunk delivered by the stream is a GenerateResponseChunk with the following properties:
Property
Type
Description
chunk.text
string
Text content in this chunk only.
chunk.accumulatedText
string
All text received up to and including this chunk.
chunk.content
Part[]
Raw content parts in this chunk.
chunk.toolRequests
ToolRequestPart[]
Tool call requests included in this chunk.
chunk.output
T | null
Partial structured output (when using an output schema).
chunk.index
number
Message index this chunk belongs to (starts at 0).
const { stream } = ai.generateStream({ prompt: 'Summarize the French Revolution in three bullet points.',});for await (const chunk of stream) { // chunk.text — new text in this chunk // chunk.accumulatedText — everything so far process.stdout.write(chunk.text);}
As an alternative to async iteration, you can pass an onChunk callback directly to generate(). This is useful when you want the final response object but also want to react to chunks:
const response = await ai.generate({ prompt: 'Explain quantum entanglement simply.', onChunk: (chunk) => { // called for each chunk as it arrives process.stdout.write(chunk.text); },});// response is the fully assembled GenerateResponseconsole.log('\n\nTotal tokens:', response.usage.totalTokens);
onChunk and generateStream() both stream the same underlying chunks. Use generateStream() when you want async iteration syntax; use onChunk when you only need a side effect and still want the final Promise<GenerateResponse>.
You can stream structured output by combining generateStream() with an output.schema. Each chunk’s output property contains the partial JSON parsed so far:
import { z } from 'genkit';const ItemSchema = z.object({ title: z.string(), description: z.string(), price: z.number(),});const { stream, response } = ai.generateStream({ prompt: 'Generate a product listing for a mechanical keyboard.', output: { schema: ItemSchema },});for await (const chunk of stream) { // chunk.output is the partial object built from JSON received so far if (chunk.output) { console.log('Partial output:', chunk.output); }}const finalResponse = await response;console.log('Final product:', finalResponse.output);
For streaming structured output with the jsonl format, each chunk contains a complete JSON object on its own line. This is useful for streaming arrays of items one element at a time.
When deploying flows as HTTP endpoints, you can stream the response to the client using server-sent events (SSE) or chunked transfer encoding. The Genkit flow server handles this automatically when a client sends a streaming request.
import express from 'express';import { genkit, z } from 'genkit';const app = express();app.get('/stream', async (req, res) => { res.setHeader('Content-Type', 'text/event-stream'); res.setHeader('Cache-Control', 'no-cache'); const { stream, response } = ai.generateStream({ prompt: req.query.prompt as string, }); for await (const chunk of stream) { res.write(`data: ${JSON.stringify({ text: chunk.text })}\n\n`); } await response; res.write('data: [DONE]\n\n'); res.end();});