Some language models that support vision capabilities accept images as part of the prompt. Here are some of the different formats you can use to include images as input.
import { streamObject } from 'ai';import { openai } from '@ai-sdk/openai';import dotenv from 'dotenv';import { z } from 'zod';
dotenv.config();
async function main() { const { partialObjectStream } = streamObject({ model: openai('gpt-4.1'), maxOutputTokens: 512, schema: z.object({ stamps: z.array( z.object({ country: z.string(), date: z.string(), }), ), }), messages: [ { role: 'user', content: [ { type: 'text', text: 'list all the stamps in these passport pages?', }, { type: 'image', image: new URL( 'https://upload.wikimedia.org/wikipedia/commons/thumb/c/c5/WW2_Spanish_official_passport.jpg/1498px-WW2_Spanish_official_passport.jpg', ), }, ], }, ], });
for await (const partialObject of partialObjectStream) { console.clear(); console.log(partialObject); }}
main();import { streamObject } from 'ai';import { openai } from '@ai-sdk/openai';import dotenv from 'dotenv';import { z } from 'zod';import fs from 'fs';
dotenv.config();
async function main() { const { partialObjectStream } = streamObject({ model: openai('gpt-4.1'), maxOutputTokens: 512, schema: z.object({ stamps: z.array( z.object({ country: z.string(), date: z.string(), }), ), }), messages: [ { role: 'user', content: [ { type: 'text', text: 'list all the stamps in these passport pages?', }, { type: 'image', image: fs.readFileSync('./data/passport.png', { encoding: 'base64', }), }, ], }, ], });
for await (const partialObject of partialObjectStream) { console.clear(); console.log(partialObject); }}
main();