diff --git a/.changeset/easy-taxis-stop.md b/.changeset/easy-taxis-stop.md new file mode 100644 index 00000000..36aeaf98 --- /dev/null +++ b/.changeset/easy-taxis-stop.md @@ -0,0 +1,6 @@ +--- +'@openai/agents-openai': minor +'@openai/agents-core': minor +--- + +feat: fix #272 add memory feature diff --git a/examples/mcp/hosted-mcp-human-in-the-loop.ts b/examples/mcp/hosted-mcp-human-in-the-loop.ts index f43ec77e..db1b548e 100644 --- a/examples/mcp/hosted-mcp-human-in-the-loop.ts +++ b/examples/mcp/hosted-mcp-human-in-the-loop.ts @@ -16,8 +16,10 @@ async function confirm(item: RunToolApprovalItem): Promise { async function main(verbose: boolean, stream: boolean): Promise { // 'always' | 'never' | { never, always } const requireApproval = { - never: { toolNames: ['search_codex_code', 'fetch_codex_documentation'] }, - always: { toolNames: ['fetch_generic_url_content'] }, + never: { toolNames: ['search_codex_code'] }, + always: { + toolNames: ['fetch_generic_url_content', 'fetch_codex_documentation'], + }, }; const agent = new Agent({ name: 'MCP Assistant', diff --git a/examples/mcp/hosted-mcp-on-approval.ts b/examples/mcp/hosted-mcp-on-approval.ts index 3deb40af..5b74f1c4 100644 --- a/examples/mcp/hosted-mcp-on-approval.ts +++ b/examples/mcp/hosted-mcp-on-approval.ts @@ -16,12 +16,8 @@ async function promptApproval(item: RunToolApprovalItem): Promise { async function main(verbose: boolean, stream: boolean): Promise { // 'always' | 'never' | { never, always } const requireApproval = { - never: { - toolNames: ['fetch_codex_documentation', 'fetch_generic_url_content'], - }, - always: { - toolNames: ['search_codex_code'], - }, + never: { toolNames: ['fetch_generic_url_content'] }, + always: { toolNames: ['fetch_codex_documentation', 'search_codex_code'] }, }; const agent = new Agent({ name: 'MCP Assistant', diff --git a/examples/memory/.gitignore b/examples/memory/.gitignore new file mode 100644 index 00000000..9a1c3101 --- /dev/null +++ b/examples/memory/.gitignore @@ -0,0 +1,2 @@ +tmp/ +*.db diff --git a/examples/memory/file-hitl.ts b/examples/memory/file-hitl.ts new file mode 100644 index 00000000..66f0a667 --- /dev/null +++ b/examples/memory/file-hitl.ts @@ -0,0 +1,126 @@ +import readline from 'node:readline/promises'; +import { stdin as input, stdout as output } from 'node:process'; +import { + Agent, + RunResult, + RunToolApprovalItem, + run, + withTrace, +} from '@openai/agents'; + +import type { Interface as ReadlineInterface } from 'node:readline/promises'; +import { FileSession } from './sessions'; +import { createLookupCustomerProfileTool, fetchImageData } from './tools'; + +const customerDirectory: Record = { + '101': + 'Customer Kaz S. (tier gold) can be reached at +1-415-555-AAAA. Notes: Prefers SMS follow ups and values concise summaries.', + '104': + 'Customer Yu S. (tier platinum) can be reached at +1-415-555-BBBB. Notes: Recently reported sync issues. Flagged for a proactive onboarding call.', + '205': + 'Customer Ken S. (tier standard) can be reached at +1-415-555-CCCC. Notes: Interested in automation tutorials sent last week.', +}; + +const lookupCustomerProfile = createLookupCustomerProfileTool({ + directory: customerDirectory, + transientErrorMessage: + 'Simulated CRM outage for the first lookup. Please retry the tool call.', +}); +lookupCustomerProfile.needsApproval = async () => true; + +const instructions = + 'You assist support agents. For every user turn you must call lookup_customer_profile and fetch_image_data before responding so replies include stored notes and the sample image. If a tool reports a transient failure, request approval and retry the same call once before responding. Keep responses under three sentences.'; + +function formatToolArguments(interruption: RunToolApprovalItem): string { + const args = interruption.rawItem.arguments; + if (!args) { + return ''; + } + if (typeof args === 'string') { + return args; + } + try { + return JSON.stringify(args); + } catch { + return String(args); + } +} + +async function promptYesNo( + rl: ReadlineInterface, + question: string, +): Promise { + const answer = await rl.question(`${question} (y/n): `); + const normalized = answer.trim().toLowerCase(); + return normalized === 'y' || normalized === 'yes'; +} + +async function resolveInterruptions>( + rl: ReadlineInterface, + agent: TAgent, + initialResult: RunResult, + session: FileSession, +): Promise> { + let result = initialResult; + while (result.interruptions?.length) { + for (const interruption of result.interruptions) { + const args = formatToolArguments(interruption); + const approved = await promptYesNo( + rl, + `Agent ${interruption.agent.name} wants to call ${interruption.rawItem.name} with ${args || 'no arguments'}`, + ); + if (approved) { + result.state.approve(interruption); + console.log('Approved tool call.'); + } else { + result.state.reject(interruption); + console.log('Rejected tool call.'); + } + } + + result = await run(agent, result.state, { session }); + } + + return result; +} + +async function main() { + await withTrace('memory:file-hitl:main', async () => { + const agent = new Agent({ + name: 'File HITL assistant', + instructions, + modelSettings: { toolChoice: 'required' }, + tools: [lookupCustomerProfile, fetchImageData], + }); + + const session = new FileSession({ dir: './tmp' }); + const sessionId = await session.getSessionId(); + const rl = readline.createInterface({ input, output }); + + console.log(`Session id: ${sessionId}`); + console.log( + 'Enter a message to chat with the agent. Submit an empty line to exit.', + ); + + while (true) { + const userMessage = await rl.question('You: '); + if (!userMessage.trim()) { + break; + } + + let result = await run(agent, userMessage, { session }); + result = await resolveInterruptions(rl, agent, result, session); + + const reply = result.finalOutput ?? '[No final output produced]'; + console.log(`Assistant: ${reply}`); + console.log(); + } + + rl.close(); + }); +} + +main().catch((error) => { + console.error(error); + process.exit(1); +}); diff --git a/examples/memory/file.ts b/examples/memory/file.ts new file mode 100644 index 00000000..b0d738a0 --- /dev/null +++ b/examples/memory/file.ts @@ -0,0 +1,105 @@ +import { Agent, run, withTrace } from '@openai/agents'; +import { FileSession } from './sessions'; +import { createLookupCustomerProfileTool, fetchImageData } from './tools'; + +const directory: Record = { + '1': 'Customer 1 (tier gold). Notes: Prefers concise replies.', + '2': 'Customer 2 (tier standard). Notes: Interested in tutorials.', +}; + +const instructions = + 'You are a helpful assistant. For every user turn you must call lookup_customer_profile and fetch_image_data before responding.'; + +const lookupCustomerProfile = createLookupCustomerProfileTool({ + directory, + transientErrorMessage: + 'Simulated transient CRM outage. Please retry the tool call.', +}); + +async function main() { + await withTrace('memory:file:main', async () => { + const agent = new Agent({ + name: 'Assistant', + instructions, + modelSettings: { toolChoice: 'required' }, + tools: [lookupCustomerProfile, fetchImageData], + }); + + const session = new FileSession({ dir: './tmp/' }); + let result = await run( + agent, + 'What is the largest country in South America?', + { session }, + ); + console.log(result.finalOutput); // e.g., Brazil + + result = await run(agent, 'What is the capital of that country?', { + session, + }); + console.log(result.finalOutput); // e.g., Brasilia + }); +} + +async function mainStream() { + await withTrace('memory:file:mainStream', async () => { + const agent = new Agent({ + name: 'Assistant', + instructions, + modelSettings: { toolChoice: 'required' }, + tools: [lookupCustomerProfile, fetchImageData], + }); + + const session = new FileSession({ dir: './tmp/' }); + let result = await run( + agent, + 'What is the largest country in South America?', + { + stream: true, + session, + }, + ); + + for await (const event of result) { + if ( + event.type === 'raw_model_stream_event' && + event.data.type === 'output_text_delta' + ) + process.stdout.write(event.data.delta); + } + console.log(); + + result = await run(agent, 'What is the capital of that country?', { + stream: true, + session, + }); + + // toTextStream() automatically returns a readable stream of strings intended to be displayed + // to the user + for await (const event of result.toTextStream()) { + process.stdout.write(event); + } + console.log(); + + // Additional tool invocations happen earlier in the turn. + }); +} + +async function promptAndRun() { + const readline = await import('node:readline/promises'); + const rl = readline.createInterface({ + input: process.stdin, + output: process.stdout, + }); + const isStream = await rl.question('Run in stream mode? (y/n): '); + rl.close(); + if (isStream.trim().toLowerCase() === 'y') { + await mainStream(); + } else { + await main(); + } +} + +promptAndRun().catch((error) => { + console.error(error); + process.exit(1); +}); diff --git a/examples/memory/memory-hitl.ts b/examples/memory/memory-hitl.ts new file mode 100644 index 00000000..58e4f55a --- /dev/null +++ b/examples/memory/memory-hitl.ts @@ -0,0 +1,126 @@ +import readline from 'node:readline/promises'; +import { stdin as input, stdout as output } from 'node:process'; +import { + Agent, + MemorySession, + RunResult, + RunToolApprovalItem, + run, + withTrace, +} from '@openai/agents'; + +import type { Interface as ReadlineInterface } from 'node:readline/promises'; +import { createLookupCustomerProfileTool, fetchImageData } from './tools'; + +const customerDirectory: Record = { + '101': + 'Customer Kaz S. (tier gold) can be reached at +1-415-555-AAAA. Notes: Prefers SMS follow ups and values concise summaries.', + '104': + 'Customer Yu S. (tier platinum) can be reached at +1-415-555-BBBB. Notes: Recently reported sync issues. Flagged for a proactive onboarding call.', + '205': + 'Customer Ken S. (tier standard) can be reached at +1-415-555-CCCC. Notes: Interested in automation tutorials sent last week.', +}; + +const lookupCustomerProfile = createLookupCustomerProfileTool({ + directory: customerDirectory, + transientErrorMessage: + 'Simulated CRM outage for the first lookup. Please retry the tool call.', +}); +lookupCustomerProfile.needsApproval = async () => true; + +const instructions = + 'You assist support agents. For every user turn you must call lookup_customer_profile and fetch_image_data before responding so replies include stored notes and the sample image. If a tool reports a transient failure, request approval and retry the same call once before responding. Keep responses under three sentences.'; + +function formatToolArguments(interruption: RunToolApprovalItem): string { + const args = interruption.rawItem.arguments; + if (!args) { + return ''; + } + if (typeof args === 'string') { + return args; + } + try { + return JSON.stringify(args); + } catch { + return String(args); + } +} + +async function promptYesNo( + rl: ReadlineInterface, + question: string, +): Promise { + const answer = await rl.question(`${question} (y/n): `); + const normalized = answer.trim().toLowerCase(); + return normalized === 'y' || normalized === 'yes'; +} + +async function resolveInterruptions>( + rl: ReadlineInterface, + agent: TAgent, + initialResult: RunResult, + session: MemorySession, +): Promise> { + let result = initialResult; + while (result.interruptions?.length) { + for (const interruption of result.interruptions) { + const args = formatToolArguments(interruption); + const approved = await promptYesNo( + rl, + `Agent ${interruption.agent.name} wants to call ${interruption.rawItem.name} with ${args || 'no arguments'}`, + ); + if (approved) { + result.state.approve(interruption); + console.log('Approved tool call.'); + } else { + result.state.reject(interruption); + console.log('Rejected tool call.'); + } + } + + result = await run(agent, result.state, { session }); + } + + return result; +} + +async function main() { + await withTrace('memory:memory-hitl:main', async () => { + const agent = new Agent({ + name: 'Memory HITL assistant', + instructions, + modelSettings: { toolChoice: 'required' }, + tools: [lookupCustomerProfile, fetchImageData], + }); + + const session = new MemorySession(); + const sessionId = await session.getSessionId(); + const rl = readline.createInterface({ input, output }); + + console.log(`Session id: ${sessionId}`); + console.log( + 'Enter a message to chat with the agent. Submit an empty line to exit.', + ); + + while (true) { + const userMessage = await rl.question('You: '); + if (!userMessage.trim()) { + break; + } + + let result = await run(agent, userMessage, { session }); + result = await resolveInterruptions(rl, agent, result, session); + + const reply = result.finalOutput ?? '[No final output produced]'; + console.log(`Assistant: ${reply}`); + console.log(); + } + + rl.close(); + }); +} + +main().catch((error) => { + console.error(error); + process.exit(1); +}); diff --git a/examples/memory/memory.ts b/examples/memory/memory.ts new file mode 100644 index 00000000..902416bd --- /dev/null +++ b/examples/memory/memory.ts @@ -0,0 +1,109 @@ +import { + Agent, + getLogger, + MemorySession, + run, + withTrace, +} from '@openai/agents'; +import { createLookupCustomerProfileTool, fetchImageData } from './tools'; + +const directory: Record = { + '1': 'Customer 1 (tier gold). Notes: Prefers concise replies.', + '2': 'Customer 2 (tier standard). Notes: Interested in tutorials.', +}; + +const instructions = + 'You are a helpful assistant. For every user turn you must call lookup_customer_profile and fetch_image_data before responding.'; + +const lookupCustomerProfile = createLookupCustomerProfileTool({ + directory, + transientErrorMessage: + 'Simulated transient CRM outage. Please retry the tool call.', +}); + +async function main() { + await withTrace('memory:memorySession:main', async () => { + const agent = new Agent({ + name: 'Assistant', + instructions, + modelSettings: { toolChoice: 'required' }, + tools: [lookupCustomerProfile, fetchImageData], + }); + + const session = new MemorySession({ + logger: getLogger('memory:memory'), + }); + let result = await run( + agent, + 'What is the largest country in South America?', + { session }, + ); + console.log(result.finalOutput); // e.g., Brazil. + + result = await run(agent, 'What is the capital of that country?', { + session, + }); + console.log(result.finalOutput); // e.g., Brasilia. + }); +} + +async function mainStream() { + await withTrace('memory:memorySession:mainStream', async () => { + const agent = new Agent({ + name: 'Assistant', + instructions, + modelSettings: { toolChoice: 'required' }, + tools: [lookupCustomerProfile, fetchImageData], + }); + + const session = new MemorySession(); + let result = await run( + agent, + 'What is the largest country in South America?', + { + stream: true, + session, + }, + ); + + for await (const event of result) { + if ( + event.type === 'raw_model_stream_event' && + event.data.type === 'output_text_delta' + ) { + process.stdout.write(event.data.delta); + } + } + console.log(); + + result = await run(agent, 'What is the capital of that country?', { + stream: true, + session, + }); + + for await (const event of result.toTextStream()) { + process.stdout.write(event); + } + console.log(); + }); +} + +async function promptAndRun() { + const readline = await import('node:readline/promises'); + const rl = readline.createInterface({ + input: process.stdin, + output: process.stdout, + }); + const isStream = await rl.question('Run in stream mode? (y/n): '); + rl.close(); + if (isStream.trim().toLowerCase() === 'y') { + await mainStream(); + } else { + await main(); + } +} + +promptAndRun().catch((error) => { + console.error(error); + process.exit(1); +}); diff --git a/examples/memory/oai-hitl.ts b/examples/memory/oai-hitl.ts new file mode 100644 index 00000000..39cbc38b --- /dev/null +++ b/examples/memory/oai-hitl.ts @@ -0,0 +1,124 @@ +import readline from 'node:readline/promises'; +import { stdin as input, stdout as output } from 'node:process'; +import { + Agent, + OpenAIConversationsSession, + RunResult, + RunToolApprovalItem, + run, + withTrace, +} from '@openai/agents'; + +import type { Interface as ReadlineInterface } from 'node:readline/promises'; +import { createLookupCustomerProfileTool, fetchImageData } from './tools'; + +const customerDirectory: Record = { + '101': + 'Customer Kaz S. (tier gold) can be reached at +1-415-555-AAAA. Notes: Prefers SMS follow ups and values concise summaries.', + '104': + 'Customer Yu S. (tier platinum) can be reached at +1-415-555-BBBB. Notes: Recently reported sync issues. Flagged for a proactive onboarding call.', + '205': + 'Customer Ken S. (tier standard) can be reached at +1-415-555-CCCC. Notes: Interested in automation tutorials sent last week.', +}; + +const lookupCustomerProfile = createLookupCustomerProfileTool({ + directory: customerDirectory, + transientErrorMessage: + 'Simulated CRM outage for the first lookup. Please retry the tool call.', +}); +lookupCustomerProfile.needsApproval = async () => true; + +const instructions = + 'You assist support agents. For every user turn you must call lookup_customer_profile and fetch_image_data before responding so replies include stored notes and the sample image. If a tool reports a transient failure, request approval and retry the same call once before responding. Keep responses under three sentences.'; + +function formatToolArguments(interruption: RunToolApprovalItem): string { + const args = interruption.rawItem.arguments; + if (!args) { + return ''; + } + if (typeof args === 'string') { + return args; + } + try { + return JSON.stringify(args); + } catch { + return String(args); + } +} + +async function promptYesNo( + rl: ReadlineInterface, + question: string, +): Promise { + const answer = await rl.question(`${question} (y/n): `); + const normalized = answer.trim().toLowerCase(); + return normalized === 'y' || normalized === 'yes'; +} + +async function resolveInterruptions>( + rl: ReadlineInterface, + agent: TAgent, + initialResult: RunResult, + session: OpenAIConversationsSession, +): Promise> { + let result = initialResult; + while (result.interruptions?.length) { + for (const interruption of result.interruptions) { + const args = formatToolArguments(interruption); + const approved = await promptYesNo( + rl, + `Agent ${interruption.agent.name} wants to call ${interruption.rawItem.name} with ${args || 'no arguments'}`, + ); + if (approved) { + result.state.approve(interruption); + console.log('Approved tool call.'); + } else { + result.state.reject(interruption); + console.log('Rejected tool call.'); + } + } + + result = await run(agent, result.state, { session }); + } + + return result; +} + +async function main() { + await withTrace('memory:oai-hitl:main', async () => { + const agent = new Agent({ + name: 'Memory HITL assistant', + instructions, + modelSettings: { toolChoice: 'required' }, + tools: [lookupCustomerProfile, fetchImageData], + }); + + const session = new OpenAIConversationsSession(); + const rl = readline.createInterface({ input, output }); + + console.log( + 'Enter a message to chat with the agent. Submit an empty line to exit.', + ); + + while (true) { + const userMessage = await rl.question('You: '); + if (!userMessage.trim()) { + break; + } + + let result = await run(agent, userMessage, { session }); + result = await resolveInterruptions(rl, agent, result, session); + + const reply = result.finalOutput ?? '[No final output produced]'; + console.log(`Assistant: ${reply}`); + console.log(); + } + + rl.close(); + }); +} + +main().catch((error) => { + console.error(error); + process.exit(1); +}); diff --git a/examples/memory/oai.ts b/examples/memory/oai.ts new file mode 100644 index 00000000..bb84eeef --- /dev/null +++ b/examples/memory/oai.ts @@ -0,0 +1,109 @@ +import { + Agent, + OpenAIConversationsSession, + run, + withTrace, +} from '@openai/agents'; +import { createLookupCustomerProfileTool, fetchImageData } from './tools'; + +const directory: Record = { + '1': 'Customer 1 (tier gold). Notes: Prefers concise replies.', + '2': 'Customer 2 (tier standard). Notes: Interested in tutorials.', +}; + +const instructions = + 'You are a helpful assistant. For every user turn you must call lookup_customer_profile and fetch_image_data before responding.'; + +const lookupCustomerProfile = createLookupCustomerProfileTool({ + directory, + transientErrorMessage: + 'Simulated transient CRM outage. Please retry the tool call.', +}); + +async function main() { + await withTrace('memory:oai:main', async () => { + const agent = new Agent({ + name: 'Assistant', + instructions, + modelSettings: { toolChoice: 'required' }, + tools: [lookupCustomerProfile, fetchImageData], + }); + + const session = new OpenAIConversationsSession(); + let result = await run( + agent, + 'What is the largest country in South America?', + { session }, + ); + console.log(result.finalOutput); // e.g., Brazil + + result = await run(agent, 'What is the capital of that country?', { + session, + }); + console.log(result.finalOutput); // e.g., Brasilia + }); +} + +async function mainStream() { + await withTrace('memory:oai:mainStream', async () => { + const agent = new Agent({ + name: 'Assistant', + instructions, + modelSettings: { toolChoice: 'required' }, + tools: [lookupCustomerProfile, fetchImageData], + }); + + const session = new OpenAIConversationsSession(); + let result = await run( + agent, + 'What is the largest country in South America?', + { + stream: true, + session, + }, + ); + + for await (const event of result) { + if ( + event.type === 'raw_model_stream_event' && + event.data.type === 'output_text_delta' + ) + process.stdout.write(event.data.delta); + } + console.log(); + + result = await run(agent, 'What is the capital of that country?', { + stream: true, + session, + }); + + // toTextStream() automatically returns a readable stream of strings intended to be displayed + // to the user + for await (const event of result.toTextStream()) { + process.stdout.write(event); + } + console.log(); + + // Additional tool invocations happen earlier in the turn. + }); +} + +async function promptAndRun() { + const readline = await import('node:readline/promises'); + const rl = readline.createInterface({ + input: process.stdin, + output: process.stdout, + }); + const isStream = await rl.question('Run in stream mode? (y/n): '); + rl.close(); + if (isStream.trim().toLowerCase() === 'y') { + await mainStream(); + } else { + await main(); + } +} + +promptAndRun().catch((error) => { + console.error(error); + process.exit(1); +}); diff --git a/examples/memory/package.json b/examples/memory/package.json new file mode 100644 index 00000000..a46a1d19 --- /dev/null +++ b/examples/memory/package.json @@ -0,0 +1,21 @@ +{ + "private": true, + "name": "memory", + "dependencies": { + "@openai/agents": "workspace:*", + "@prisma/client": "^6.18.0" + }, + "scripts": { + "build-check": "tsc --noEmit", + "start:memory": "tsx memory.ts", + "start:memory-hitl": "tsx memory-hitl.ts", + "start:oai": "tsx oai.ts", + "start:oai-hitl": "tsx oai-hitl.ts", + "start:file": "tsx file.ts", + "start:file-hitl": "tsx file-hitl.ts", + "start:prisma": "pnpm prisma db push --schema ./prisma/schema.prisma && pnpm prisma generate --schema ./prisma/schema.prisma && tsx prisma.ts" + }, + "devDependencies": { + "prisma": "^6.18.0" + } +} diff --git a/examples/memory/prisma-client-stub.d.ts b/examples/memory/prisma-client-stub.d.ts new file mode 100644 index 00000000..ac70b448 --- /dev/null +++ b/examples/memory/prisma-client-stub.d.ts @@ -0,0 +1,57 @@ +// Minimal fallback types so the example can compile when @prisma/client has not +// been generated yet (e.g., in CI where Prisma migrations are not run). When the +// real client is available, these declarations merge with the generated ones. + +declare module '@prisma/client' { + export namespace Prisma { + interface SessionItemCreateManyInput { + sessionId: string; + position: number; + item: string; + } + + type TransactionClient = PrismaClient; + } + + type UpsertArgs = { + where: { id: string }; + create: { id: string }; + update: Record; + }; + + type SessionItemSelect = { id?: true; position?: true; item?: true }; + + type SessionItemQueryArgs = { + where: { sessionId: string }; + orderBy: { position: 'asc' | 'desc' }; + select?: SessionItemSelect; + take?: number; + }; + + interface SessionDelegate { + upsert(args: UpsertArgs): Promise; + delete(args: { where: { id: string } }): Promise; + } + + interface SessionItemDelegate { + findMany( + args: SessionItemQueryArgs, + ): Promise>; + findFirst( + args: SessionItemQueryArgs, + ): Promise<{ id?: string; position?: number; item?: unknown } | null>; + createMany(args: { + data: Prisma.SessionItemCreateManyInput[]; + }): Promise; + delete(args: { where: { id: string } }): Promise; + } + + export class PrismaClient { + session: SessionDelegate; + sessionItem: SessionItemDelegate; + $transaction( + fn: (client: Prisma.TransactionClient) => Promise, + ): Promise; + $disconnect(): Promise; + } +} diff --git a/examples/memory/prisma.ts b/examples/memory/prisma.ts new file mode 100644 index 00000000..8a6098fd --- /dev/null +++ b/examples/memory/prisma.ts @@ -0,0 +1,115 @@ +// Prisma-backed Session implementation example. To try it out: +// pnpm add @prisma/client prisma +// npx prisma migrate dev --name init --schema ./examples/memory/prisma/schema.prisma +// npx prisma generate --schema ./examples/memory/prisma/schema.prisma +// pnpm start:prisma + +import { Agent, run, withTrace } from '@openai/agents'; +import { createPrismaSession } from './sessions'; +import { createLookupCustomerProfileTool, fetchImageData } from './tools'; + +const directory: Record = { + '1': 'Customer 1 (tier gold). Notes: Prefers concise replies.', + '2': 'Customer 2 (tier standard). Notes: Interested in tutorials.', +}; + +const lookupCustomerProfile = createLookupCustomerProfileTool({ + directory, + transientErrorMessage: + 'Simulated transient CRM outage. Please retry the tool call.', +}); + +async function main() { + await withTrace('memory:prisma:main', async () => { + const { session, prisma } = await createPrismaSession(); + const agent = new Agent({ + name: 'Assistant', + instructions: + 'You are a helpful assistant. Be VERY concise. For every user turn you must call lookup_customer_profile and fetch_image_data before responding.', + tools: [lookupCustomerProfile, fetchImageData], + }); + + try { + let result = await run( + agent, + 'What is the largest country in South America?', + { session }, + ); + console.log(result.finalOutput); + + result = await run(agent, 'What is the capital of that country?', { + session, + }); + console.log(result.finalOutput); + } finally { + await prisma.$disconnect().catch(() => {}); + } + }); +} + +async function mainStream() { + await withTrace('memory:prisma:mainStream', async () => { + const { session, prisma } = await createPrismaSession(); + const agent = new Agent({ + name: 'Assistant', + instructions: + 'You are a helpful assistant. Be VERY concise. For every user turn you must call lookup_customer_profile and fetch_image_data before responding.', + tools: [lookupCustomerProfile, fetchImageData], + }); + + try { + let result = await run( + agent, + 'What is the largest country in South America?', + { + stream: true, + session, + }, + ); + + for await (const event of result) { + if ( + event.type === 'raw_model_stream_event' && + event.data.type === 'output_text_delta' + ) { + process.stdout.write(event.data.delta); + } + } + console.log(); + + result = await run(agent, 'What is the capital of that country?', { + stream: true, + session, + }); + + for await (const event of result.toTextStream()) { + process.stdout.write(event); + } + console.log(); + + // Additional tool invocations happen earlier in the turn. + } finally { + await prisma.$disconnect().catch(() => {}); + } + }); +} + +async function promptAndRun() { + const readline = await import('node:readline/promises'); + const rl = readline.createInterface({ + input: process.stdin, + output: process.stdout, + }); + const isStream = await rl.question('Run in stream mode? (y/n): '); + rl.close(); + if (isStream.trim().toLowerCase() === 'y') { + await mainStream(); + } else { + await main(); + } +} + +promptAndRun().catch((error) => { + console.error(error); + process.exit(1); +}); diff --git a/examples/memory/prisma/schema.prisma b/examples/memory/prisma/schema.prisma new file mode 100644 index 00000000..65b208e5 --- /dev/null +++ b/examples/memory/prisma/schema.prisma @@ -0,0 +1,35 @@ +// Example schema for the Prisma-backed session store. +// Run the following to get started: +// pnpm add -D prisma +// pnpm add @prisma/client +// npx prisma migrate dev --name init --schema ./examples/memory/prisma/schema.prisma +// npx prisma generate --schema ./examples/memory/prisma/schema.prisma + +generator client { + provider = "prisma-client-js" +} + +datasource db { + provider = "sqlite" + url = env("DATABASE_URL") +} + +model Session { + id String @id + createdAt DateTime @default(now()) + updatedAt DateTime @updatedAt + items SessionItem[] +} + +model SessionItem { + id Int @id @default(autoincrement()) + sessionId String + position Int + item Json + createdAt DateTime @default(now()) + + session Session @relation(fields: [sessionId], references: [id], onDelete: Cascade) + + @@index([sessionId, position]) + @@unique([sessionId, position]) +} diff --git a/examples/memory/sessions/file.ts b/examples/memory/sessions/file.ts new file mode 100644 index 00000000..58829a88 --- /dev/null +++ b/examples/memory/sessions/file.ts @@ -0,0 +1,108 @@ +import type { AgentInputItem, Session } from '@openai/agents'; +import * as fs from 'node:fs/promises'; +import * as path from 'node:path'; +import { randomUUID } from 'node:crypto'; + +export type FileSessionOptions = { + dir?: string; + sessionId?: string; +}; + +export class FileSession implements Session { + #dir: string; + #sessionId?: string; + + constructor(options: FileSessionOptions = {}) { + this.#dir = options.dir ?? path.resolve(process.cwd(), '.agents-sessions'); + this.#sessionId = options.sessionId; + } + + async getSessionId(): Promise { + if (!this.#sessionId) { + this.#sessionId = randomUUID().replace(/-/g, '').slice(0, 24); + } + await this.#ensureDir(); + const file = this.#filePath(this.#sessionId); + try { + await fs.access(file); + } catch { + await fs.writeFile(file, '[]', 'utf8'); + } + return this.#sessionId; + } + + async getItems(limit?: number): Promise { + const sessionId = await this.getSessionId(); + const items = await this.#readItems(sessionId); + if (typeof limit === 'number' && limit >= 0) { + return items.slice(-limit); + } + return items; + } + + async addItems(items: AgentInputItem[]): Promise { + if (!items.length) { + return; + } + const sessionId = await this.getSessionId(); + const current = await this.#readItems(sessionId); + // Store a structured clone so we don't accidentally persist references that can mutate. + const serialized = items.map((item) => JSON.parse(JSON.stringify(item))); + const next = current.concat(serialized as AgentInputItem[]); + await this.#writeItems(sessionId, next); + } + + async popItem(): Promise { + const sessionId = await this.getSessionId(); + const items = await this.#readItems(sessionId); + if (items.length === 0) { + return undefined; + } + const popped = items.pop(); + await this.#writeItems(sessionId, items); + return popped; + } + + async clearSession(): Promise { + if (!this.#sessionId) { + return; + } + const file = this.#filePath(this.#sessionId); + try { + await fs.unlink(file); + } catch { + // ignore missing file + } + this.#sessionId = undefined; + } + + async #ensureDir(): Promise { + await fs.mkdir(this.#dir, { recursive: true }); + } + + #filePath(sessionId: string): string { + return path.join(this.#dir, `${sessionId}.json`); + } + + async #readItems(sessionId: string): Promise { + const file = this.#filePath(sessionId); + try { + const data = await fs.readFile(file, 'utf8'); + const parsed = JSON.parse(data); + return Array.isArray(parsed) ? (parsed as AgentInputItem[]) : []; + } catch (err: any) { + if (err && (err.code === 'ENOENT' || err.code === 'ENOTDIR')) { + return []; + } + throw err; + } + } + + async #writeItems(sessionId: string, items: AgentInputItem[]): Promise { + await this.#ensureDir(); + const file = this.#filePath(sessionId); + await fs.writeFile(file, JSON.stringify(items, null, 2), 'utf8'); + } +} + +export type { AgentInputItem } from '@openai/agents'; diff --git a/examples/memory/sessions/index.ts b/examples/memory/sessions/index.ts new file mode 100644 index 00000000..36e9a597 --- /dev/null +++ b/examples/memory/sessions/index.ts @@ -0,0 +1,4 @@ +export { FileSession } from './file'; +export type { FileSessionOptions } from './file'; +export { PrismaSession, createPrismaSession } from './prisma'; +export type { PrismaSessionOptions } from './prisma'; diff --git a/examples/memory/sessions/prisma.ts b/examples/memory/sessions/prisma.ts new file mode 100644 index 00000000..bc6586d6 --- /dev/null +++ b/examples/memory/sessions/prisma.ts @@ -0,0 +1,168 @@ +import type { AgentInputItem, Session } from '@openai/agents'; +import { protocol } from '@openai/agents'; +import { randomUUID } from 'node:crypto'; +import type { Prisma } from '@prisma/client'; +import { PrismaClient } from '@prisma/client'; +import * as process from 'node:process'; + +export type PrismaSessionOptions = { + client: PrismaClient; + sessionId?: string; + useTransactions?: boolean; +}; + +export class PrismaSession implements Session { + #client: PrismaClient; + #sessionId?: string; + #useTransactions: boolean; + + constructor(options: PrismaSessionOptions) { + this.#client = options.client; + this.#sessionId = options.sessionId; + this.#useTransactions = options.useTransactions ?? true; + } + + async getSessionId(): Promise { + if (!this.#sessionId) { + this.#sessionId = randomUUID().replace(/-/g, '').slice(0, 24); + } + const sessionId = this.#sessionId; + await this.#client.session.upsert({ + where: { id: sessionId }, + create: { id: sessionId }, + update: {}, + }); + return sessionId; + } + + async getItems(limit?: number): Promise { + const sessionId = await this.getSessionId(); + const take = typeof limit === 'number' && limit >= 0 ? limit : undefined; + const records = await this.#client.sessionItem.findMany({ + where: { sessionId }, + orderBy: { position: take ? 'desc' : 'asc' }, + take, + }); + const ordered = take ? [...records].reverse() : records; + const result: AgentInputItem[] = []; + for (const record of ordered) { + const raw = + typeof record.item === 'string' ? JSON.parse(record.item) : record.item; + const item = coerceAgentItem(raw); + if (item) { + result.push(item); + } + } + return result; + } + + async addItems(items: AgentInputItem[]): Promise { + if (!items.length) { + return; + } + const sessionId = await this.getSessionId(); + await this.#withClient(async (client) => { + const last = await client.sessionItem.findFirst({ + where: { sessionId }, + select: { position: true }, + orderBy: { position: 'desc' }, + }); + let position = last?.position ?? 0; + const payload: Prisma.SessionItemCreateManyInput[] = []; + for (const raw of items) { + const item = coerceAgentItem(raw); + if (!item) continue; + position += 1; + payload.push({ + sessionId, + position, + item: JSON.stringify(item), + }); + } + if (payload.length === 0) { + return; + } + await client.sessionItem.createMany({ data: payload }); + }); + } + + async popItem(): Promise { + const sessionId = await this.getSessionId(); + return await this.#withClient(async (client) => { + const latest = await client.sessionItem.findFirst({ + where: { sessionId }, + select: { id: true, item: true }, + orderBy: { position: 'desc' }, + }); + if (!latest?.id) { + return undefined; + } + await client.sessionItem.delete({ where: { id: latest.id } }); + const raw = + typeof latest.item === 'string' ? JSON.parse(latest.item) : latest.item; + return coerceAgentItem(raw) ?? undefined; + }); + } + + async clearSession(): Promise { + if (!this.#sessionId) { + return; + } + try { + await this.#client.session.delete({ where: { id: this.#sessionId } }); + } catch { + // ignore missing sessions + } + this.#sessionId = undefined; + } + + async #withClient( + fn: (client: PrismaClient | Prisma.TransactionClient) => Promise, + ): Promise { + if ( + this.#useTransactions && + typeof this.#client.$transaction === 'function' + ) { + return this.#client.$transaction((tx) => fn(tx)); + } + return fn(this.#client); + } +} + +export async function createPrismaSession( + options: { + sessionId?: string; + useTransactions?: boolean; + client?: PrismaClient; + databaseUrl?: string; + } = {}, +): Promise<{ session: PrismaSession; prisma: PrismaClient }> { + if (!options.client) { + if (!process.env.DATABASE_URL && options.databaseUrl) { + process.env.DATABASE_URL = options.databaseUrl; + } + if (!process.env.DATABASE_URL) { + process.env.DATABASE_URL = 'file:./dev.db'; + console.warn( + 'DATABASE_URL was not set. Defaulting to sqlite db at file:./dev.db', + ); + } + } + const prisma = options.client ?? new PrismaClient(); + const session = new PrismaSession({ + client: prisma, + sessionId: options.sessionId, + useTransactions: options.useTransactions, + }); + return { session, prisma }; +} + +function coerceAgentItem(raw: unknown): AgentInputItem | undefined { + const parsed = protocol.ModelItem.safeParse(raw); + if (!parsed.success) { + return undefined; + } + return parsed.data as AgentInputItem; +} + +export type { PrismaClient } from '@prisma/client'; diff --git a/examples/memory/tools.ts b/examples/memory/tools.ts new file mode 100644 index 00000000..1f4748d3 --- /dev/null +++ b/examples/memory/tools.ts @@ -0,0 +1,101 @@ +import { tool } from '@openai/agents'; +import { readFileSync } from 'node:fs'; +import { z } from 'zod'; + +const SAMPLE_IMAGE_URL = new URL( + '../basic/media/image_bison.jpg', + import.meta.url, +); +const SAMPLE_IMAGE_BASE64 = readFileSync(SAMPLE_IMAGE_URL).toString('base64'); +const SAMPLE_IMAGE_MEDIA_TYPE = 'image/jpeg'; + +const LookupCustomerProfileParameters = z.object({ + id: z + .string() + .describe('The internal identifier for the customer to retrieve.'), +}); + +type LookupCustomerProfileOptions = { + directory: Record; + name?: string; + description?: string; + transientErrorMessage?: string; + missingCustomerMessage?: (id: string) => string; +}; + +export function createLookupCustomerProfileTool( + options: LookupCustomerProfileOptions, +) { + const { + directory, + name = 'lookup_customer_profile', + description = 'Look up stored profile details for a customer by their internal id.', + transientErrorMessage = 'Simulated transient CRM outage. Please retry the tool call.', + missingCustomerMessage = (id: string) => `No customer found for id ${id}.`, + } = options; + + let hasSimulatedLookupFailure = false; + + return tool({ + name, + description, + parameters: LookupCustomerProfileParameters, + async execute({ id }: z.infer) { + if (!hasSimulatedLookupFailure) { + hasSimulatedLookupFailure = true; + throw new Error(transientErrorMessage); + } + return directory[id] ?? missingCustomerMessage(id); + }, + }); +} + +const FetchImageDataParameters = z.object({ + label: z + .string() + .max(32) + .optional() + .nullable() + .describe('An optional short label to echo back in the response.'), +}); + +/** + * Fetches a reusable image so downstream samples can verify binary persistence. + */ +export const fetchImageData = tool({ + name: 'fetch_image_data', + description: + 'Returns a JPEG sample image as raw bytes so you can confirm image persistence.', + parameters: FetchImageDataParameters, + async execute({ label }: z.infer) { + const filename = label + ? `sample-image-${sanitizeFilenameFragment(label)}.jpg` + : 'sample-image.jpg'; + const imageDataUrl = `data:${SAMPLE_IMAGE_MEDIA_TYPE};base64,${SAMPLE_IMAGE_BASE64}`; + + return [ + { + type: 'text' as const, + text: label + ? `Fetched the sample image for "${label}".` + : 'Fetched the default sample image.', + }, + { + type: 'image' as const, + image: imageDataUrl, + providerData: { filename }, + }, + ]; + }, +}); + +function sanitizeFilenameFragment(value: string): string { + return ( + value + .replace(/[^a-z0-9-_]+/gi, '-') + .replace(/-+/g, '-') + .replace(/^-|-$/g, '') + .toLowerCase() + .slice(0, 32) || 'label' + ); +} diff --git a/examples/memory/tsconfig.json b/examples/memory/tsconfig.json new file mode 100644 index 00000000..150a0961 --- /dev/null +++ b/examples/memory/tsconfig.json @@ -0,0 +1,3 @@ +{ + "extends": "../../tsconfig.examples.json" +} diff --git a/examples/realtime-next/src/app/server/backendAgent.action.tsx b/examples/realtime-next/src/app/server/backendAgent.action.tsx index 963eeb67..2306d15d 100644 --- a/examples/realtime-next/src/app/server/backendAgent.action.tsx +++ b/examples/realtime-next/src/app/server/backendAgent.action.tsx @@ -8,7 +8,7 @@ const backendAgent = new Agent({ name: 'Refund Agent', instructions: 'You are a specialist on handling refund requests and detect fraud. You are given a request and you need to determine if the request is valid and if it is, you need to handle it.', - model: 'o4-mini', + model: 'gpt-5-mini', outputType: z.object({ refundApproved: z.boolean(), refundReason: z.string(), diff --git a/packages/agents-core/src/index.ts b/packages/agents-core/src/index.ts index 88c6190b..72e71ffa 100644 --- a/packages/agents-core/src/index.ts +++ b/packages/agents-core/src/index.ts @@ -112,6 +112,11 @@ export { Runner, StreamRunOptions, } from './run'; +export type { + ModelInputData, + CallModelInputFilter, + CallModelInputFilterArgs, +} from './run'; export { RunContext } from './runContext'; export { RunState } from './runState'; export { @@ -161,6 +166,8 @@ export type { StreamEventGenericItem, } from './types'; export { Usage } from './usage'; +export type { Session, SessionInputCallback } from './memory/session'; +export { MemorySession } from './memory/memorySession'; /** * Exporting the whole protocol as an object here. This contains both the types diff --git a/packages/agents-core/src/memory/memorySession.ts b/packages/agents-core/src/memory/memorySession.ts new file mode 100644 index 00000000..1ae244f3 --- /dev/null +++ b/packages/agents-core/src/memory/memorySession.ts @@ -0,0 +1,85 @@ +import { randomUUID } from '@openai/agents-core/_shims'; + +import type { AgentInputItem } from '../types'; +import type { Session } from './session'; +import { logger, Logger } from '../logger'; + +export type MemorySessionOptions = { + sessionId?: string; + initialItems?: AgentInputItem[]; + logger?: Logger; +}; + +/** + * Simple in-memory session store intended for demos or tests. Not recommended for production use. + */ +export class MemorySession implements Session { + private readonly sessionId: string; + private readonly logger: Logger; + + private items: AgentInputItem[]; + + constructor(options: MemorySessionOptions = {}) { + this.sessionId = options.sessionId ?? randomUUID(); + this.items = options.initialItems + ? options.initialItems.map(cloneAgentItem) + : []; + this.logger = options.logger ?? logger; + } + + async getSessionId(): Promise { + return this.sessionId; + } + + async getItems(limit?: number): Promise { + if (limit === undefined) { + const cloned = this.items.map(cloneAgentItem); + this.logger.debug( + `Getting items from memory session (${this.sessionId}): ${JSON.stringify(cloned)}`, + ); + return cloned; + } + if (limit <= 0) { + return []; + } + const start = Math.max(this.items.length - limit, 0); + const items = this.items.slice(start).map(cloneAgentItem); + this.logger.debug( + `Getting items from memory session (${this.sessionId}): ${JSON.stringify(items)}`, + ); + return items; + } + + async addItems(items: AgentInputItem[]): Promise { + if (items.length === 0) { + return; + } + const cloned = items.map(cloneAgentItem); + this.logger.debug( + `Adding items to memory session (${this.sessionId}): ${JSON.stringify(cloned)}`, + ); + this.items = [...this.items, ...cloned]; + } + + async popItem(): Promise { + if (this.items.length === 0) { + return undefined; + } + const item = this.items[this.items.length - 1]; + const cloned = cloneAgentItem(item); + this.logger.debug( + `Popping item from memory session (${this.sessionId}): ${JSON.stringify(cloned)}`, + ); + this.items = this.items.slice(0, -1); + return cloned; + } + + async clearSession(): Promise { + this.logger.debug(`Clearing memory session (${this.sessionId})`); + this.items = []; + } +} + +function cloneAgentItem(item: T): T { + return structuredClone(item); +} diff --git a/packages/agents-core/src/memory/session.ts b/packages/agents-core/src/memory/session.ts new file mode 100644 index 00000000..9afbf14f --- /dev/null +++ b/packages/agents-core/src/memory/session.ts @@ -0,0 +1,45 @@ +import type { AgentInputItem } from '../types'; + +/** + * A function that combines session history with new input items before the model call. + */ +export type SessionInputCallback = ( + historyItems: AgentInputItem[], + newItems: AgentInputItem[], +) => AgentInputItem[] | Promise; + +/** + * Interface representing a persistent session store for conversation history. + */ +export interface Session { + /** + * Ensure and return the identifier for this session. + */ + getSessionId(): Promise; + + /** + * Retrieve items from the conversation history. + * + * @param limit - The maximum number of items to return. When provided the most + * recent {@link limit} items should be returned in chronological order. + */ + getItems(limit?: number): Promise; + + /** + * Append new items to the conversation history. + * + * @param items - Items to add to the session history. + */ + addItems(items: AgentInputItem[]): Promise; + + /** + * Remove and return the most recent item from the conversation history if it + * exists. + */ + popItem(): Promise; + + /** + * Remove all items that belong to the session and reset its state. + */ + clearSession(): Promise; +} diff --git a/packages/agents-core/src/run.ts b/packages/agents-core/src/run.ts index 577e8145..6101fed3 100644 --- a/packages/agents-core/src/run.ts +++ b/packages/agents-core/src/run.ts @@ -16,6 +16,9 @@ import { ModelResponse, ModelSettings, ModelTracing, + Prompt, + SerializedHandoff, + SerializedTool, } from './model'; import { getDefaultModelProvider } from './providers'; import { RunContext } from './runContext'; @@ -34,16 +37,22 @@ import { } from './errors'; import { addStepToRunResult, - executeInterruptedToolsAndSideEffects, - executeToolsAndSideEffects, + resolveInterruptedTurn, + resolveTurnAfterModelResponse, maybeResetToolChoice, ProcessedResponse, processModelResponse, streamStepItemsToRunResult, + saveStreamInputToSession, + saveStreamResultToSession, + saveToSession, + prepareInputItemsWithSession, } from './runImplementation'; import { RunItem } from './items'; +import { Tool } from './tool'; import { getOrCreateTrace, + addErrorToCurrentSpan, resetCurrentSpan, setCurrentSpan, withNewSpanContext, @@ -56,8 +65,17 @@ import { RunState } from './runState'; import { StreamEventResponseCompleted } from './types/protocol'; import { convertAgentOutputTypeToSerializable } from './utils/tools'; import { gpt5ReasoningSettingsRequired, isGpt5Default } from './defaultModel'; +import type { Session, SessionInputCallback } from './memory/session'; +import { encodeUint8ArrayToBase64 } from './utils/base64'; +import { + isArrayBufferView, + isNodeBuffer, + isSerializedBufferSnapshot, +} from './utils/smartString'; -const DEFAULT_MAX_TURNS = 10; +// -------------------------------------------------------------- +// Configuration +// -------------------------------------------------------------- /** * Configures settings for the entire agent run. @@ -130,16 +148,37 @@ export type RunConfig = { * An optional dictionary of additional metadata to include with the trace. */ traceMetadata?: Record; + + /** + * Customizes how session history is combined with the current turn's input. + * When omitted, history items are appended before the new input. + */ + sessionInputCallback?: SessionInputCallback; + + /** + * Invoked immediately before calling the model, allowing callers to edit the + * system instructions or input items that will be sent to the model. + */ + callModelInputFilter?: CallModelInputFilter; }; +/** + * Common run options shared between streaming and non-streaming execution pathways. + */ type SharedRunOptions = { context?: TContext | RunContext; maxTurns?: number; signal?: AbortSignal; previousResponseId?: string; conversationId?: string; + session?: Session; + sessionInputCallback?: SessionInputCallback; + callModelInputFilter?: CallModelInputFilter; }; +/** + * Options for runs that stream incremental events as the model responds. + */ export type StreamRunOptions = SharedRunOptions & { /** @@ -148,205 +187,71 @@ export type StreamRunOptions = stream: true; }; +/** + * Options for runs that collect the full model response before returning. + */ export type NonStreamRunOptions = SharedRunOptions & { /** - * Whether to stream the run. If true, the run will emit events as the model responds. + * Run to completion without streaming incremental events; leave undefined or set to `false`. */ stream?: false; }; +/** + * Options polymorphic over streaming or non-streaming execution modes. + */ export type IndividualRunOptions = | StreamRunOptions | NonStreamRunOptions; -/** - * @internal - */ -export function getTracing( - tracingDisabled: boolean, - traceIncludeSensitiveData: boolean, -): ModelTracing { - if (tracingDisabled) { - return false; - } - - if (traceIncludeSensitiveData) { - return true; - } - - return 'enabled_without_data'; -} - -function toAgentInputList( - originalInput: string | AgentInputItem[], -): AgentInputItem[] { - if (typeof originalInput === 'string') { - return [{ type: 'message', role: 'user', content: originalInput }]; - } - - return [...originalInput]; -} +// -------------------------------------------------------------- +// Runner +// -------------------------------------------------------------- /** - * Internal module for tracking the items in turns and ensuring that we don't send duplicate items. - * This logic is vital for properly handling the items to send during multiple turns - * when you use either `conversationId` or `previousResponseId`. - * Both scenarios expect an agent loop to send only new items for each Responses API call. + * Executes an agent workflow with the shared default `Runner` instance. * - * see also: https://platform.openai.com/docs/guides/conversation-state?api-mode=responses + * @param agent - The entry agent to invoke. + * @param input - A string utterance, structured input items, or a resumed `RunState`. + * @param options - Controls streaming mode, context, session handling, and turn limits. + * @returns A `RunResult` when `stream` is false, otherwise a `StreamedRunResult`. */ -class ServerConversationTracker { - // Conversation ID: - // - https://platform.openai.com/docs/guides/conversation-state?api-mode=responses#using-the-conversations-api - // - https://platform.openai.com/docs/api-reference/conversations/create - public conversationId?: string; - - // Previous Response ID: - // https://platform.openai.com/docs/guides/conversation-state?api-mode=responses#passing-context-from-the-previous-response - public previousResponseId?: string; - - // Using this flag because WeakSet does not provide a way to check its size - private sentInitialInput = false; - // The items already sent to the model; using WeakSet for memory efficiency - private sentItems = new WeakSet(); - // The items received from the server; using WeakSet for memory efficiency - private serverItems = new WeakSet(); - - constructor({ - conversationId, - previousResponseId, - }: { - conversationId?: string; - previousResponseId?: string; - }) { - this.conversationId = conversationId ?? undefined; - this.previousResponseId = previousResponseId ?? undefined; - } - - /** - * Pre-populates tracker caches from an existing RunState when resuming server-managed runs. - */ - primeFromState({ - originalInput, - generatedItems, - modelResponses, - }: { - originalInput: string | AgentInputItem[]; - generatedItems: RunItem[]; - modelResponses: ModelResponse[]; - }) { - if (this.sentInitialInput) { - return; - } - - for (const item of toAgentInputList(originalInput)) { - if (item && typeof item === 'object') { - this.sentItems.add(item); - } - } - - this.sentInitialInput = true; - - const latestResponse = modelResponses[modelResponses.length - 1]; - for (const response of modelResponses) { - for (const item of response.output) { - if (item && typeof item === 'object') { - this.serverItems.add(item); - } - } - } - - if (!this.conversationId && latestResponse?.responseId) { - this.previousResponseId = latestResponse.responseId; - } - - for (const item of generatedItems) { - const rawItem = item.rawItem; - if (!rawItem || typeof rawItem !== 'object') { - continue; - } - if (this.serverItems.has(rawItem)) { - this.sentItems.add(rawItem); - } - } - } - - trackServerItems(modelResponse: ModelResponse | undefined) { - if (!modelResponse) { - return; - } - for (const item of modelResponse.output) { - if (item && typeof item === 'object') { - this.serverItems.add(item); - } - } - if ( - !this.conversationId && - this.previousResponseId !== undefined && - modelResponse.responseId - ) { - this.previousResponseId = modelResponse.responseId; - } - } - - prepareInput( - originalInput: string | AgentInputItem[], - generatedItems: RunItem[], - ): AgentInputItem[] { - const inputItems: AgentInputItem[] = []; - - if (!this.sentInitialInput) { - const initialItems = toAgentInputList(originalInput); - for (const item of initialItems) { - inputItems.push(item); - if (item && typeof item === 'object') { - this.sentItems.add(item); - } - } - this.sentInitialInput = true; - } - - for (const item of generatedItems) { - if (item.type === 'tool_approval_item') { - continue; - } - const rawItem = item.rawItem; - if (!rawItem || typeof rawItem !== 'object') { - continue; - } - if (this.sentItems.has(rawItem) || this.serverItems.has(rawItem)) { - continue; - } - inputItems.push(rawItem as AgentInputItem); - this.sentItems.add(rawItem); - } - - return inputItems; +export async function run, TContext = undefined>( + agent: TAgent, + input: string | AgentInputItem[] | RunState, + options?: NonStreamRunOptions, +): Promise>; +export async function run, TContext = undefined>( + agent: TAgent, + input: string | AgentInputItem[] | RunState, + options?: StreamRunOptions, +): Promise>; +export async function run, TContext = undefined>( + agent: TAgent, + input: string | AgentInputItem[] | RunState, + options?: StreamRunOptions | NonStreamRunOptions, +): Promise | StreamedRunResult> { + const runner = getDefaultRunner(); + if (options?.stream) { + return await runner.run(agent, input, options); + } else { + return await runner.run(agent, input, options); } } -export function getTurnInput( - originalInput: string | AgentInputItem[], - generatedItems: RunItem[], -): AgentInputItem[] { - const rawItems = generatedItems - .filter((item) => item.type !== 'tool_approval_item') // don't include approval items to avoid double function calls - .map((item) => item.rawItem); - return [...toAgentInputList(originalInput), ...rawItems]; -} - /** - * A Runner is responsible for running an agent workflow. + * Orchestrates agent execution, including guardrails, tool calls, session persistence, and + * tracing. Reuse a `Runner` instance when you want consistent configuration across multiple runs. */ export class Runner extends RunHooks> { public readonly config: RunConfig; - private readonly inputGuardrailDefs: InputGuardrailDefinition[]; - private readonly outputGuardrailDefs: OutputGuardrailDefinition< - OutputGuardrailMetadata, - AgentOutputType - >[]; + /** + * Creates a runner with optional defaults that apply to every subsequent run invocation. + * + * @param config - Overrides for models, guardrails, tracing, or session behavior. + */ constructor(config: Partial = {}) { super(); this.config = { @@ -362,6 +267,8 @@ export class Runner extends RunHooks> { traceId: config.traceId, groupId: config.groupId, traceMetadata: config.traceMetadata, + sessionInputCallback: config.sessionInputCallback, + callModelInputFilter: config.callModelInputFilter, }; this.inputGuardrailDefs = (config.inputGuardrails ?? []).map( defineInputGuardrail, @@ -372,40 +279,390 @@ export class Runner extends RunHooks> { } /** - * @internal + * Run a workflow starting at the given agent. The agent will run in a loop until a final + * output is generated. The loop runs like so: + * 1. The agent is invoked with the given input. + * 2. If there is a final output (i.e. the agent produces something of type + * `agent.outputType`, the loop terminates. + * 3. If there's a handoff, we run the loop again, with the new agent. + * 4. Else, we run tool calls (if any), and re-run the loop. + * + * In two cases, the agent may raise an exception: + * 1. If the maxTurns is exceeded, a MaxTurnsExceeded exception is raised. + * 2. If a guardrail tripwire is triggered, a GuardrailTripwireTriggered exception is raised. + * + * Note that only the first agent's input guardrails are run. + * + * @param agent - The starting agent to run. + * @param input - The initial input to the agent. You can pass a string or an array of + * `AgentInputItem`. + * @param options - Options for the run, including streaming behavior, execution context, and the + * maximum number of turns. + * @returns The result of the run. */ - async #runIndividualNonStream< - TContext, - TAgent extends Agent, - _THandoffs extends (Agent | Handoff)[] = any[], - >( - startingAgent: TAgent, + run, TContext = undefined>( + agent: TAgent, input: string | AgentInputItem[] | RunState, - options: NonStreamRunOptions, - ): Promise> { - return withNewSpanContext(async () => { - // if we have a saved state we use that one, otherwise we create a new one - const isResumedState = input instanceof RunState; - const state = isResumedState - ? input - : new RunState( - options.context instanceof RunContext - ? options.context - : new RunContext(options.context), - input, - startingAgent, - options.maxTurns ?? DEFAULT_MAX_TURNS, - ); - - const serverConversationTracker = - options.conversationId || options.previousResponseId - ? new ServerConversationTracker({ - conversationId: options.conversationId, - previousResponseId: options.previousResponseId, - }) - : undefined; - - if (serverConversationTracker && isResumedState) { + options?: NonStreamRunOptions, + ): Promise>; + run, TContext = undefined>( + agent: TAgent, + input: string | AgentInputItem[] | RunState, + options?: StreamRunOptions, + ): Promise>; + async run, TContext = undefined>( + agent: TAgent, + input: string | AgentInputItem[] | RunState, + options: IndividualRunOptions = { + stream: false, + context: undefined, + } as IndividualRunOptions, + ): Promise< + RunResult | StreamedRunResult + > { + const resolvedOptions = options ?? { stream: false, context: undefined }; + // Per-run options take precedence over runner defaults for session memory behavior. + const sessionInputCallback = + resolvedOptions.sessionInputCallback ?? this.config.sessionInputCallback; + // Likewise allow callers to override callModelInputFilter on individual runs. + const callModelInputFilter = + resolvedOptions.callModelInputFilter ?? this.config.callModelInputFilter; + const hasCallModelInputFilter = Boolean(callModelInputFilter); + const effectiveOptions = { + ...resolvedOptions, + sessionInputCallback, + callModelInputFilter, + }; + const serverManagesConversation = + Boolean(effectiveOptions.conversationId) || + Boolean(effectiveOptions.previousResponseId); + // When the server tracks conversation history we defer to it for previous turns so local session + // persistence can focus solely on the new delta being generated in this process. + const session = effectiveOptions.session; + const resumingFromState = input instanceof RunState; + let sessionInputOriginalSnapshot: AgentInputItem[] | undefined = + session && resumingFromState ? [] : undefined; + let sessionInputFilteredSnapshot: AgentInputItem[] | undefined = undefined; + // Tracks remaining persistence slots per AgentInputItem key so resumed sessions only write each original occurrence once. + let sessionInputPendingWriteCounts: Map | undefined = + session && resumingFromState ? new Map() : undefined; + // Keeps track of which inputs should be written back to session memory. `sourceItems` reflects + // the original objects (so we can respect resume counts) while `filteredItems`, when present, + // contains the filtered/redacted clones that must be persisted for history. + // The helper reconciles the filtered copies produced by callModelInputFilter with their original + // counterparts so resume-from-state bookkeeping stays consistent and duplicate references only + // consume a single persistence slot. + const recordSessionItemsForPersistence = ( + sourceItems: (AgentInputItem | undefined)[], + filteredItems?: AgentInputItem[], + ) => { + const pendingWriteCounts = sessionInputPendingWriteCounts; + if (filteredItems !== undefined) { + if (!pendingWriteCounts) { + sessionInputFilteredSnapshot = filteredItems.map((item) => + structuredClone(item), + ); + return; + } + const persistableItems: AgentInputItem[] = []; + const sourceOccurrenceCounts = new WeakMap(); + // Track how many times each original object appears so duplicate references only consume one persistence slot. + for (const source of sourceItems) { + if (!source || typeof source !== 'object') { + continue; + } + const nextCount = (sourceOccurrenceCounts.get(source) ?? 0) + 1; + sourceOccurrenceCounts.set(source, nextCount); + } + // Let filtered items without a one-to-one source match claim any remaining persistence count. + const consumeAnyPendingWriteSlot = () => { + for (const [key, remaining] of pendingWriteCounts) { + if (remaining > 0) { + pendingWriteCounts.set(key, remaining - 1); + return true; + } + } + return false; + }; + for (let i = 0; i < filteredItems.length; i++) { + const filteredItem = filteredItems[i]; + if (!filteredItem) { + continue; + } + let allocated = false; + const source = sourceItems[i]; + if (source && typeof source === 'object') { + const pendingOccurrences = + (sourceOccurrenceCounts.get(source) ?? 0) - 1; + sourceOccurrenceCounts.set(source, pendingOccurrences); + if (pendingOccurrences > 0) { + continue; + } + const sourceKey = getAgentInputItemKey(source); + const remaining = pendingWriteCounts.get(sourceKey) ?? 0; + if (remaining > 0) { + pendingWriteCounts.set(sourceKey, remaining - 1); + persistableItems.push(structuredClone(filteredItem)); + allocated = true; + continue; + } + } + const filteredKey = getAgentInputItemKey(filteredItem); + const filteredRemaining = pendingWriteCounts.get(filteredKey) ?? 0; + if (filteredRemaining > 0) { + pendingWriteCounts.set(filteredKey, filteredRemaining - 1); + persistableItems.push(structuredClone(filteredItem)); + allocated = true; + continue; + } + if (!source && consumeAnyPendingWriteSlot()) { + persistableItems.push(structuredClone(filteredItem)); + allocated = true; + } + if ( + !allocated && + !source && + sessionInputFilteredSnapshot === undefined + ) { + // Preserve at least one copy so later persistence resolves even when no counters remain. + persistableItems.push(structuredClone(filteredItem)); + } + } + if ( + persistableItems.length > 0 || + sessionInputFilteredSnapshot === undefined + ) { + sessionInputFilteredSnapshot = persistableItems; + } + return; + } + const filtered: AgentInputItem[] = []; + if (!pendingWriteCounts) { + for (const item of sourceItems) { + if (!item) { + continue; + } + filtered.push(structuredClone(item)); + } + } else { + for (const item of sourceItems) { + if (!item) { + continue; + } + const key = getAgentInputItemKey(item); + const remaining = pendingWriteCounts.get(key) ?? 0; + if (remaining <= 0) { + continue; + } + pendingWriteCounts.set(key, remaining - 1); + filtered.push(structuredClone(item)); + } + } + if (filtered.length > 0) { + sessionInputFilteredSnapshot = filtered; + } else if (sessionInputFilteredSnapshot === undefined) { + sessionInputFilteredSnapshot = []; + } + }; + + // Determine which items should be committed to session memory for this turn. + // Filters take precedence because they reflect the exact payload delivered to the model. + const resolveSessionItemsForPersistence = () => { + if (sessionInputFilteredSnapshot !== undefined) { + return sessionInputFilteredSnapshot; + } + if (hasCallModelInputFilter) { + return undefined; + } + return sessionInputOriginalSnapshot; + }; + + let preparedInput: typeof input = input; + if (!(preparedInput instanceof RunState)) { + if (session && Array.isArray(preparedInput) && !sessionInputCallback) { + throw new UserError( + 'RunConfig.sessionInputCallback must be provided when using session history with list inputs.', + ); + } + + const prepared = await prepareInputItemsWithSession( + preparedInput, + session, + sessionInputCallback, + { + // When the server tracks conversation state we only send the new turn inputs; + // previous messages are recovered via conversationId/previousResponseId. + includeHistoryInPreparedInput: !serverManagesConversation, + preserveDroppedNewItems: serverManagesConversation, + }, + ); + if (serverManagesConversation && session) { + // When the server manages memory we only persist the new turn inputs locally so the + // conversation service stays the single source of truth for prior exchanges. + const sessionItems = prepared.sessionItems; + if (sessionItems && sessionItems.length > 0) { + preparedInput = sessionItems; + } else { + preparedInput = prepared.preparedInput; + } + } else { + preparedInput = prepared.preparedInput; + } + if (session) { + const items = prepared.sessionItems ?? []; + // Clone the items that will be persisted so later mutations (filters, hooks) cannot desync history. + sessionInputOriginalSnapshot = items.map((item) => + structuredClone(item), + ); + // Reset pending counts so each prepared item reserves exactly one write slot until filters resolve matches. + sessionInputPendingWriteCounts = new Map(); + for (const item of items) { + const key = getAgentInputItemKey(item); + sessionInputPendingWriteCounts.set( + key, + (sessionInputPendingWriteCounts.get(key) ?? 0) + 1, + ); + } + } + } + + // Streaming runs persist the input asynchronously, so track a one-shot helper + // that can be awaited from multiple branches without double-writing. + let ensureStreamInputPersisted: (() => Promise) | undefined; + // Sessions remain usable alongside server-managed conversations (e.g., OpenAIConversationsSession) + // so callers can reuse callbacks, resume-from-state logic, and other helpers without duplicating + // remote history, so persistence is gated on serverManagesConversation. + if (session && !serverManagesConversation) { + let persisted = false; + ensureStreamInputPersisted = async () => { + if (persisted) { + return; + } + const itemsToPersist = resolveSessionItemsForPersistence(); + if (!itemsToPersist || itemsToPersist.length === 0) { + return; + } + persisted = true; + await saveStreamInputToSession(session, itemsToPersist); + }; + } + + const executeRun = async () => { + if (effectiveOptions.stream) { + const streamResult = await this.#runIndividualStream( + agent, + preparedInput, + effectiveOptions, + ensureStreamInputPersisted, + recordSessionItemsForPersistence, + ); + return streamResult; + } + const runResult = await this.#runIndividualNonStream( + agent, + preparedInput, + effectiveOptions, + recordSessionItemsForPersistence, + ); + // See note above: allow sessions to run for callbacks/state but skip writes when the server + // is the source of truth for transcript history. + if (session && !serverManagesConversation) { + await saveToSession( + session, + resolveSessionItemsForPersistence(), + runResult, + ); + } + return runResult; + }; + + if (preparedInput instanceof RunState && preparedInput._trace) { + return withTrace(preparedInput._trace, async () => { + if (preparedInput._currentAgentSpan) { + setCurrentSpan(preparedInput._currentAgentSpan); + } + return executeRun(); + }); + } + return getOrCreateTrace(async () => executeRun(), { + traceId: this.config.traceId, + name: this.config.workflowName, + groupId: this.config.groupId, + metadata: this.config.traceMetadata, + }); + } + + // -------------------------------------------------------------- + // Internals + // -------------------------------------------------------------- + + private readonly inputGuardrailDefs: InputGuardrailDefinition[]; + + private readonly outputGuardrailDefs: OutputGuardrailDefinition< + OutputGuardrailMetadata, + AgentOutputType + >[]; + + /** + * @internal + * Resolves the effective model once so both run loops obey the same precedence rules. + */ + async #resolveModelForAgent( + agent: Agent, + ): Promise<{ model: Model; explictlyModelSet: boolean }> { + const explictlyModelSet = + (agent.model !== undefined && + agent.model !== Agent.DEFAULT_MODEL_PLACEHOLDER) || + (this.config.model !== undefined && + this.config.model !== Agent.DEFAULT_MODEL_PLACEHOLDER); + let resolvedModel = selectModel(agent.model, this.config.model); + if (typeof resolvedModel === 'string') { + resolvedModel = await this.config.modelProvider.getModel(resolvedModel); + } + return { model: resolvedModel, explictlyModelSet }; + } + + /** + * @internal + */ + async #runIndividualNonStream< + TContext, + TAgent extends Agent, + _THandoffs extends (Agent | Handoff)[] = any[], + >( + startingAgent: TAgent, + input: string | AgentInputItem[] | RunState, + options: NonStreamRunOptions, + // sessionInputUpdate lets the caller adjust queued session items after filters run so we + // persist exactly what we send to the model (e.g., after redactions or truncation). + sessionInputUpdate?: ( + sourceItems: (AgentInputItem | undefined)[], + filteredItems?: AgentInputItem[], + ) => void, + ): Promise> { + return withNewSpanContext(async () => { + // if we have a saved state we use that one, otherwise we create a new one + const isResumedState = input instanceof RunState; + const state = isResumedState + ? input + : new RunState( + options.context instanceof RunContext + ? options.context + : new RunContext(options.context), + input, + startingAgent, + options.maxTurns ?? DEFAULT_MAX_TURNS, + ); + + const serverConversationTracker = + options.conversationId || options.previousResponseId + ? new ServerConversationTracker({ + conversationId: options.conversationId, + previousResponseId: options.previousResponseId, + }) + : undefined; + + if (serverConversationTracker && isResumedState) { serverConversationTracker.primeFromState({ originalInput: state._originalInput, generatedItems: state._generatedItems, @@ -415,16 +672,6 @@ export class Runner extends RunHooks> { try { while (true) { - const explictlyModelSet = - (state._currentAgent.model !== undefined && - state._currentAgent.model !== '') || - (this.config.model !== undefined && this.config.model !== ''); - let model = selectModel(state._currentAgent.model, this.config.model); - - if (typeof model === 'string') { - model = await this.config.modelProvider.getModel(model); - } - // if we don't have a current step, we treat this as a new run state._currentStep = state._currentStep ?? { type: 'next_step_run_again', @@ -439,16 +686,15 @@ export class Runner extends RunHooks> { ); } - const turnResult = - await executeInterruptedToolsAndSideEffects( - state._currentAgent, - state._originalInput, - state._generatedItems, - state._lastTurnResponse, - state._lastProcessedResponse as ProcessedResponse, - this, - state, - ); + const turnResult = await resolveInterruptedTurn( + state._currentAgent, + state._originalInput, + state._generatedItems, + state._lastTurnResponse, + state._lastProcessedResponse as ProcessedResponse, + this, + state, + ); state._toolUseTracker.addToolUse( state._currentAgent, @@ -457,6 +703,9 @@ export class Runner extends RunHooks> { state._originalInput = turnResult.originalInput; state._generatedItems = turnResult.generatedItems; + if (turnResult.nextStep.type === 'next_step_run_again') { + state._currentTurnPersistedItemCount = 0; + } state._currentStep = turnResult.nextStep; if (turnResult.nextStep.type === 'next_step_interruption') { @@ -468,31 +717,10 @@ export class Runner extends RunHooks> { } if (state._currentStep.type === 'next_step_run_again') { - const handoffs = await state._currentAgent.getEnabledHandoffs( - state._context, - ); - - if (!state._currentAgentSpan) { - const handoffNames = handoffs.map((h) => h.agentName); - state._currentAgentSpan = createAgentSpan({ - data: { - name: state._currentAgent.name, - handoffs: handoffNames, - output_type: state._currentAgent.outputSchemaName, - }, - }); - state._currentAgentSpan.start(); - setCurrentSpan(state._currentAgentSpan); - } - - const tools = await state._currentAgent.getAllTools(state._context); - const serializedTools = tools.map((t) => serializeTool(t)); - const serializedHandoffs = handoffs.map((h) => serializeHandoff(h)); - if (state._currentAgentSpan) { - state._currentAgentSpan.spanData.tools = tools.map((t) => t.name); - } + const artifacts = await prepareAgentArtifacts(state); state._currentTurn++; + state._currentTurnPersistedItemCount = 0; if (state._currentTurn > state._maxTurns) { state._currentAgentSpan?.setError({ @@ -530,45 +758,31 @@ export class Runner extends RunHooks> { this.emit('agent_start', state._context, state._currentAgent); } - let modelSettings = { - ...this.config.modelSettings, - ...state._currentAgent.modelSettings, - }; - const agentModelSettings = state._currentAgent.modelSettings; - modelSettings = adjustModelSettingsForNonGPT5RunnerModel( - explictlyModelSet, - agentModelSettings, - model, - modelSettings, - ); - modelSettings = maybeResetToolChoice( - state._currentAgent, - state._toolUseTracker, - modelSettings, + const preparedCall = await this.#prepareModelCall( + state, + options, + artifacts, + turnInput, + serverConversationTracker, + sessionInputUpdate, ); - const previousResponseId = - serverConversationTracker?.previousResponseId ?? - options.previousResponseId; - const conversationId = - serverConversationTracker?.conversationId ?? - options.conversationId; - - state._lastTurnResponse = await model.getResponse({ - systemInstructions: await state._currentAgent.getSystemPrompt( - state._context, - ), - prompt: await state._currentAgent.getPrompt(state._context), + + state._lastTurnResponse = await preparedCall.model.getResponse({ + systemInstructions: preparedCall.modelInput.instructions, + prompt: preparedCall.prompt, // Explicit agent/run config models should take precedence over prompt defaults. - ...(explictlyModelSet ? { overridePromptModel: true } : {}), - input: turnInput, - previousResponseId, - conversationId, - modelSettings, - tools: serializedTools, + ...(preparedCall.explictlyModelSet + ? { overridePromptModel: true } + : {}), + input: preparedCall.modelInput.input, + previousResponseId: preparedCall.previousResponseId, + conversationId: preparedCall.conversationId, + modelSettings: preparedCall.modelSettings, + tools: preparedCall.serializedTools, outputType: convertAgentOutputTypeToSerializable( state._currentAgent.outputType, ), - handoffs: serializedHandoffs, + handoffs: preparedCall.serializedHandoffs, tracing: getTracing( this.config.tracingDisabled, this.config.traceIncludeSensitiveData, @@ -579,6 +793,8 @@ export class Runner extends RunHooks> { state._context.usage.add(state._lastTurnResponse.usage); state._noActiveAgentRun = false; + // After each turn record the items echoed by the server so future requests only + // include the incremental inputs that have not yet been acknowledged. serverConversationTracker?.trackServerItems( state._lastTurnResponse, ); @@ -586,12 +802,12 @@ export class Runner extends RunHooks> { const processedResponse = processModelResponse( state._lastTurnResponse, state._currentAgent, - tools, - handoffs, + preparedCall.tools, + preparedCall.handoffs, ); state._lastProcessedResponse = processedResponse; - const turnResult = await executeToolsAndSideEffects( + const turnResult = await resolveTurnAfterModelResponse( state._currentAgent, state._originalInput, state._generatedItems, @@ -608,6 +824,9 @@ export class Runner extends RunHooks> { state._originalInput = turnResult.originalInput; state._generatedItems = turnResult.generatedItems; + if (turnResult.nextStep.type === 'next_step_run_again') { + state._currentTurnPersistedItemCount = 0; + } state._currentStep = turnResult.nextStep; } @@ -672,157 +891,53 @@ export class Runner extends RunHooks> { }); } - async #runInputGuardrails< + /** + * @internal + */ + async #runStreamLoop< TContext, TAgent extends Agent, - >(state: RunState) { - const guardrails = this.inputGuardrailDefs.concat( - state._currentAgent.inputGuardrails.map(defineInputGuardrail), - ); - if (guardrails.length > 0) { - const guardrailArgs = { - agent: state._currentAgent, - input: state._originalInput, - context: state._context, - }; - try { - const results = await Promise.all( - guardrails.map(async (guardrail) => { - return withGuardrailSpan( - async (span) => { - const result = await guardrail.run(guardrailArgs); - span.spanData.triggered = result.output.tripwireTriggered; - return result; - }, - { data: { name: guardrail.name } }, - state._currentAgentSpan, - ); - }), - ); - for (const result of results) { - if (result.output.tripwireTriggered) { - if (state._currentAgentSpan) { - state._currentAgentSpan.setError({ - message: 'Guardrail tripwire triggered', - data: { guardrail: result.guardrail.name }, - }); - } - throw new InputGuardrailTripwireTriggered( - `Input guardrail triggered: ${JSON.stringify(result.output.outputInfo)}`, - result, - state, - ); - } - } - } catch (e) { - if (e instanceof InputGuardrailTripwireTriggered) { - throw e; - } - // roll back the current turn to enable reruns - state._currentTurn--; - throw new GuardrailExecutionError( - `Input guardrail failed to complete: ${e}`, - e as Error, - state, - ); + >( + result: StreamedRunResult, + options: StreamRunOptions, + isResumedState: boolean, + ensureStreamInputPersisted?: () => Promise, + sessionInputUpdate?: ( + sourceItems: (AgentInputItem | undefined)[], + filteredItems?: AgentInputItem[], + ) => void, + ): Promise { + const serverManagesConversation = + Boolean(options.conversationId) || Boolean(options.previousResponseId); + const serverConversationTracker = serverManagesConversation + ? new ServerConversationTracker({ + conversationId: options.conversationId, + previousResponseId: options.previousResponseId, + }) + : undefined; + + let handedInputToModel = false; + let streamInputPersisted = false; + const persistStreamInputIfNeeded = async () => { + if (streamInputPersisted || !ensureStreamInputPersisted) { + return; } - } - } - - async #runOutputGuardrails< - TContext, - TOutput extends AgentOutputType, - TAgent extends Agent, - >(state: RunState, output: string) { - const guardrails = this.outputGuardrailDefs.concat( - state._currentAgent.outputGuardrails.map(defineOutputGuardrail), - ); - if (guardrails.length > 0) { - const agentOutput = state._currentAgent.processFinalOutput(output); - const guardrailArgs: OutputGuardrailFunctionArgs = { - agent: state._currentAgent, - agentOutput, - context: state._context, - details: { modelResponse: state._lastTurnResponse }, - }; - try { - const results = await Promise.all( - guardrails.map(async (guardrail) => { - return withGuardrailSpan( - async (span) => { - const result = await guardrail.run(guardrailArgs); - span.spanData.triggered = result.output.tripwireTriggered; - return result; - }, - { data: { name: guardrail.name } }, - state._currentAgentSpan, - ); - }), - ); - for (const result of results) { - if (result.output.tripwireTriggered) { - if (state._currentAgentSpan) { - state._currentAgentSpan.setError({ - message: 'Guardrail tripwire triggered', - data: { guardrail: result.guardrail.name }, - }); - } - throw new OutputGuardrailTripwireTriggered( - `Output guardrail triggered: ${JSON.stringify(result.output.outputInfo)}`, - result, - state, - ); - } - } - } catch (e) { - if (e instanceof OutputGuardrailTripwireTriggered) { - throw e; - } - throw new GuardrailExecutionError( - `Output guardrail failed to complete: ${e}`, - e as Error, - state, - ); - } - } - } - - /** - * @internal - */ - async #runStreamLoop< - TContext, - TAgent extends Agent, - >( - result: StreamedRunResult, - options: StreamRunOptions, - isResumedState: boolean, - ): Promise { - const serverConversationTracker = - options.conversationId || options.previousResponseId - ? new ServerConversationTracker({ - conversationId: options.conversationId, - previousResponseId: options.previousResponseId, - }) - : undefined; - - if (serverConversationTracker && isResumedState) { - serverConversationTracker.primeFromState({ - originalInput: result.state._originalInput, - generatedItems: result.state._generatedItems, - modelResponses: result.state._modelResponses, - }); + // Both success and error paths call this helper, so guard against multiple writes. + await ensureStreamInputPersisted(); + streamInputPersisted = true; + }; + + if (serverConversationTracker && isResumedState) { + serverConversationTracker.primeFromState({ + originalInput: result.state._originalInput, + generatedItems: result.state._generatedItems, + modelResponses: result.state._modelResponses, + }); } try { while (true) { const currentAgent = result.state._currentAgent; - const handoffs = await currentAgent.getEnabledHandoffs( - result.state._context, - ); - const tools = await currentAgent.getAllTools(result.state._context); - const serializedTools = tools.map((t) => serializeTool(t)); - const serializedHandoffs = handoffs.map((h) => serializeHandoff(h)); result.state._currentStep = result.state._currentStep ?? { type: 'next_step_run_again', @@ -840,16 +955,15 @@ export class Runner extends RunHooks> { ); } - const turnResult = - await executeInterruptedToolsAndSideEffects( - result.state._currentAgent, - result.state._originalInput, - result.state._generatedItems, - result.state._lastTurnResponse, - result.state._lastProcessedResponse as ProcessedResponse, - this, - result.state, - ); + const turnResult = await resolveInterruptedTurn( + result.state._currentAgent, + result.state._originalInput, + result.state._generatedItems, + result.state._lastTurnResponse, + result.state._lastProcessedResponse as ProcessedResponse, + this, + result.state, + ); addStepToRunResult(result, turnResult); @@ -860,6 +974,9 @@ export class Runner extends RunHooks> { result.state._originalInput = turnResult.originalInput; result.state._generatedItems = turnResult.generatedItems; + if (turnResult.nextStep.type === 'next_step_run_again') { + result.state._currentTurnPersistedItemCount = 0; + } result.state._currentStep = turnResult.nextStep; if (turnResult.nextStep.type === 'next_step_interruption') { // we are still in an interruption, so we need to avoid an infinite loop @@ -869,21 +986,10 @@ export class Runner extends RunHooks> { } if (result.state._currentStep.type === 'next_step_run_again') { - if (!result.state._currentAgentSpan) { - const handoffNames = handoffs.map((h) => h.agentName); - result.state._currentAgentSpan = createAgentSpan({ - data: { - name: currentAgent.name, - handoffs: handoffNames, - tools: tools.map((t) => t.name), - output_type: currentAgent.outputSchemaName, - }, - }); - result.state._currentAgentSpan.start(); - setCurrentSpan(result.state._currentAgentSpan); - } + const artifacts = await prepareAgentArtifacts(result.state); result.state._currentTurn++; + result.state._currentTurnPersistedItemCount = 0; if (result.state._currentTurn > result.state._maxTurns) { result.state._currentAgentSpan?.setError({ @@ -900,36 +1006,10 @@ export class Runner extends RunHooks> { `Running agent ${currentAgent.name} (turn ${result.state._currentTurn})`, ); - const explictlyModelSet = - (currentAgent.model !== undefined && currentAgent.model !== '') || - (this.config.model !== undefined && this.config.model !== ''); - let model = selectModel(currentAgent.model, this.config.model); - - if (typeof model === 'string') { - model = await this.config.modelProvider.getModel(model); - } - if (result.state._currentTurn === 1) { await this.#runInputGuardrails(result.state); } - let modelSettings = { - ...this.config.modelSettings, - ...currentAgent.modelSettings, - }; - const agentModelSettings = currentAgent.modelSettings; - modelSettings = adjustModelSettingsForNonGPT5RunnerModel( - explictlyModelSet, - agentModelSettings, - model, - modelSettings, - ); - modelSettings = maybeResetToolChoice( - currentAgent, - result.state._toolUseTracker, - modelSettings, - ); - const turnInput = serverConversationTracker ? serverConversationTracker.prepareInput( result.input, @@ -948,25 +1028,31 @@ export class Runner extends RunHooks> { let finalResponse: ModelResponse | undefined = undefined; - const previousResponseId = - serverConversationTracker?.previousResponseId ?? - options.previousResponseId; - const conversationId = - serverConversationTracker?.conversationId ?? options.conversationId; + const preparedCall = await this.#prepareModelCall( + result.state, + options, + artifacts, + turnInput, + serverConversationTracker, + sessionInputUpdate, + ); + + handedInputToModel = true; + await persistStreamInputIfNeeded(); - for await (const event of model.getStreamedResponse({ - systemInstructions: await currentAgent.getSystemPrompt( - result.state._context, - ), - prompt: await currentAgent.getPrompt(result.state._context), + for await (const event of preparedCall.model.getStreamedResponse({ + systemInstructions: preparedCall.modelInput.instructions, + prompt: preparedCall.prompt, // Streaming requests should also honor explicitly chosen models. - ...(explictlyModelSet ? { overridePromptModel: true } : {}), - input: turnInput, - previousResponseId, - conversationId, - modelSettings, - tools: serializedTools, - handoffs: serializedHandoffs, + ...(preparedCall.explictlyModelSet + ? { overridePromptModel: true } + : {}), + input: preparedCall.modelInput.input, + previousResponseId: preparedCall.previousResponseId, + conversationId: preparedCall.conversationId, + modelSettings: preparedCall.modelSettings, + tools: preparedCall.serializedTools, + handoffs: preparedCall.serializedHandoffs, outputType: convertAgentOutputTypeToSerializable( currentAgent.outputType, ), @@ -1002,14 +1088,15 @@ export class Runner extends RunHooks> { } result.state._lastTurnResponse = finalResponse; + // Keep the tracker in sync with the streamed response so reconnections remain accurate. serverConversationTracker?.trackServerItems(finalResponse); result.state._modelResponses.push(result.state._lastTurnResponse); const processedResponse = processModelResponse( result.state._lastTurnResponse, currentAgent, - tools, - handoffs, + preparedCall.tools, + preparedCall.handoffs, ); result.state._lastProcessedResponse = processedResponse; @@ -1021,7 +1108,7 @@ export class Runner extends RunHooks> { streamStepItemsToRunResult(result, processedResponse.newItems); } - const turnResult = await executeToolsAndSideEffects( + const turnResult = await resolveTurnAfterModelResponse( currentAgent, result.state._originalInput, result.state._generatedItems, @@ -1042,6 +1129,9 @@ export class Runner extends RunHooks> { result.state._originalInput = turnResult.originalInput; result.state._generatedItems = turnResult.generatedItems; + if (turnResult.nextStep.type === 'next_step_run_again') { + result.state._currentTurnPersistedItemCount = 0; + } result.state._currentStep = turnResult.nextStep; } @@ -1050,6 +1140,11 @@ export class Runner extends RunHooks> { result.state, result.state._currentStep.output, ); + await persistStreamInputIfNeeded(); + // Guardrails must succeed before persisting session memory to avoid storing blocked outputs. + if (!serverManagesConversation) { + await saveStreamResultToSession(options.session, result); + } this.emit( 'agent_end', result.state._context, @@ -1066,6 +1161,10 @@ export class Runner extends RunHooks> { result.state._currentStep.type === 'next_step_interruption' ) { // we are done for now. Don't run any output guardrails + await persistStreamInputIfNeeded(); + if (!serverManagesConversation) { + await saveStreamResultToSession(options.session, result); + } return; } else if (result.state._currentStep.type === 'next_step_handoff') { result.state._currentAgent = result.state._currentStep @@ -1089,6 +1188,9 @@ export class Runner extends RunHooks> { } } } catch (error) { + if (handedInputToModel && !streamInputPersisted) { + await persistStreamInputIfNeeded(); + } if (result.state._currentAgentSpan) { result.state._currentAgentSpan.setError({ message: 'Error in agent run', @@ -1116,6 +1218,11 @@ export class Runner extends RunHooks> { agent: TAgent, input: string | AgentInputItem[] | RunState, options?: StreamRunOptions, + ensureStreamInputPersisted?: () => Promise, + sessionInputUpdate?: ( + sourceItems: (AgentInputItem | undefined)[], + filteredItems?: AgentInputItem[], + ) => void, ): Promise> { options = options ?? ({} as StreamRunOptions); return withNewSpanContext(async () => { @@ -1146,6 +1253,8 @@ export class Runner extends RunHooks> { result, options, isResumedState, + ensureStreamInputPersisted, + sessionInputUpdate, ).then( () => { result._done(); @@ -1162,95 +1271,279 @@ export class Runner extends RunHooks> { }); } - /** - * Run a workflow starting at the given agent. The agent will run in a loop until a final - * output is generated. The loop runs like so: - * 1. The agent is invoked with the given input. - * 2. If there is a final output (i.e. the agent produces something of type - * `agent.outputType`, the loop terminates. - * 3. If there's a handoff, we run the loop again, with the new agent. - * 4. Else, we run tool calls (if any), and re-run the loop. - * - * In two cases, the agent may raise an exception: - * 1. If the maxTurns is exceeded, a MaxTurnsExceeded exception is raised. - * 2. If a guardrail tripwire is triggered, a GuardrailTripwireTriggered exception is raised. - * - * Note that only the first agent's input guardrails are run. - * - * @param agent - The starting agent to run. - * @param input - The initial input to the agent. You can pass a string or an array of - * `AgentInputItem`. - * @param options - Options for the run, including streaming behavior, execution context, and the - * maximum number of turns. - * @returns The result of the run. - */ - run, TContext = undefined>( - agent: TAgent, - input: string | AgentInputItem[] | RunState, - options?: NonStreamRunOptions, - ): Promise>; - run, TContext = undefined>( - agent: TAgent, - input: string | AgentInputItem[] | RunState, - options?: StreamRunOptions, - ): Promise>; - run, TContext = undefined>( - agent: TAgent, - input: string | AgentInputItem[] | RunState, - options: IndividualRunOptions = { - stream: false, - context: undefined, - } as IndividualRunOptions, - ): Promise< - RunResult | StreamedRunResult - > { - if (input instanceof RunState && input._trace) { - return withTrace(input._trace, async () => { - if (input._currentAgentSpan) { - setCurrentSpan(input._currentAgentSpan); + async #runInputGuardrails< + TContext, + TAgent extends Agent, + >(state: RunState) { + const guardrails = this.inputGuardrailDefs.concat( + state._currentAgent.inputGuardrails.map(defineInputGuardrail), + ); + if (guardrails.length > 0) { + const guardrailArgs = { + agent: state._currentAgent, + input: state._originalInput, + context: state._context, + }; + try { + const results = await Promise.all( + guardrails.map(async (guardrail) => { + return withGuardrailSpan( + async (span) => { + const result = await guardrail.run(guardrailArgs); + span.spanData.triggered = result.output.tripwireTriggered; + return result; + }, + { data: { name: guardrail.name } }, + state._currentAgentSpan, + ); + }), + ); + for (const result of results) { + if (result.output.tripwireTriggered) { + if (state._currentAgentSpan) { + state._currentAgentSpan.setError({ + message: 'Guardrail tripwire triggered', + data: { guardrail: result.guardrail.name }, + }); + } + throw new InputGuardrailTripwireTriggered( + `Input guardrail triggered: ${JSON.stringify(result.output.outputInfo)}`, + result, + state, + ); + } } - - if (options?.stream) { - return this.#runIndividualStream(agent, input, options); - } else { - return this.#runIndividualNonStream(agent, input, options); + } catch (e) { + if (e instanceof InputGuardrailTripwireTriggered) { + throw e; } - }); + // roll back the current turn to enable reruns + state._currentTurn--; + throw new GuardrailExecutionError( + `Input guardrail failed to complete: ${e}`, + e as Error, + state, + ); + } } + } - return getOrCreateTrace( - async () => { - if (options?.stream) { - return this.#runIndividualStream(agent, input, options); - } else { - return this.#runIndividualNonStream(agent, input, options); - } - }, - { - traceId: this.config.traceId, - name: this.config.workflowName, - groupId: this.config.groupId, - metadata: this.config.traceMetadata, - }, + async #runOutputGuardrails< + TContext, + TOutput extends AgentOutputType, + TAgent extends Agent, + >(state: RunState, output: string) { + const guardrails = this.outputGuardrailDefs.concat( + state._currentAgent.outputGuardrails.map(defineOutputGuardrail), ); + if (guardrails.length > 0) { + const agentOutput = state._currentAgent.processFinalOutput(output); + const guardrailArgs: OutputGuardrailFunctionArgs = { + agent: state._currentAgent, + agentOutput, + context: state._context, + details: { modelResponse: state._lastTurnResponse }, + }; + try { + const results = await Promise.all( + guardrails.map(async (guardrail) => { + return withGuardrailSpan( + async (span) => { + const result = await guardrail.run(guardrailArgs); + span.spanData.triggered = result.output.tripwireTriggered; + return result; + }, + { data: { name: guardrail.name } }, + state._currentAgentSpan, + ); + }), + ); + for (const result of results) { + if (result.output.tripwireTriggered) { + if (state._currentAgentSpan) { + state._currentAgentSpan.setError({ + message: 'Guardrail tripwire triggered', + data: { guardrail: result.guardrail.name }, + }); + } + throw new OutputGuardrailTripwireTriggered( + `Output guardrail triggered: ${JSON.stringify(result.output.outputInfo)}`, + result, + state, + ); + } + } + } catch (e) { + if (e instanceof OutputGuardrailTripwireTriggered) { + throw e; + } + throw new GuardrailExecutionError( + `Output guardrail failed to complete: ${e}`, + e as Error, + state, + ); + } + } } -} -let _defaultRunner: Runner | undefined = undefined; -function getDefaultRunner() { - if (_defaultRunner) { - return _defaultRunner; - } - _defaultRunner = new Runner(); - return _defaultRunner; + /** + * @internal + * Applies call-level filters and merges session updates so the model request mirrors exactly + * what we persisted for history. + */ + async #prepareModelCall< + TContext, + TAgent extends Agent, + >( + state: RunState, + options: SharedRunOptions, + artifacts: AgentArtifacts, + turnInput: AgentInputItem[], + serverConversationTracker?: ServerConversationTracker, + sessionInputUpdate?: ( + sourceItems: (AgentInputItem | undefined)[], + filteredItems?: AgentInputItem[], + ) => void, + ): Promise> { + const { model, explictlyModelSet } = await this.#resolveModelForAgent( + state._currentAgent, + ); + + let modelSettings = { + ...this.config.modelSettings, + ...state._currentAgent.modelSettings, + }; + modelSettings = adjustModelSettingsForNonGPT5RunnerModel( + explictlyModelSet, + state._currentAgent.modelSettings, + model, + modelSettings, + ); + modelSettings = maybeResetToolChoice( + state._currentAgent, + state._toolUseTracker, + modelSettings, + ); + + const systemInstructions = await state._currentAgent.getSystemPrompt( + state._context, + ); + const prompt = await state._currentAgent.getPrompt(state._context); + + const { modelInput, sourceItems, persistedItems, filterApplied } = + await applyCallModelInputFilter( + state._currentAgent, + options.callModelInputFilter, + state._context, + turnInput, + systemInstructions, + ); + + // Inform the tracker which exact original objects made it to the provider so future turns + // only send the delta that has not yet been acknowledged by the server. + serverConversationTracker?.markInputAsSent(sourceItems); + // Provide filtered clones whenever filters run so session history mirrors the model payload. + // Returning an empty array is intentional: it tells the session layer to persist "nothing" + // instead of falling back to the unfiltered originals when the filter redacts everything. + sessionInputUpdate?.( + sourceItems, + filterApplied ? persistedItems : undefined, + ); + + const previousResponseId = + serverConversationTracker?.previousResponseId ?? + options.previousResponseId; + const conversationId = + serverConversationTracker?.conversationId ?? options.conversationId; + + return { + ...artifacts, + model, + explictlyModelSet, + modelSettings, + modelInput, + prompt, + previousResponseId, + conversationId, + }; + } } +// -------------------------------------------------------------- +// Other types and functions +// -------------------------------------------------------------- + +/** + * Mutable view of the instructions + input items that the model will receive. + * Filters always see a copy so they can edit without side effects. + */ +export type ModelInputData = { + input: AgentInputItem[]; + instructions?: string; +}; + +/** + * Shape of the payload given to `callModelInputFilter`. Mirrored in the Python SDK so filters can + * share the same implementation across languages. + */ +export type CallModelInputFilterArgs = { + modelData: ModelInputData; + agent: Agent; + context: TContext | undefined; +}; + +/** + * Hook invoked immediately before a model call is issued, allowing callers to adjust the + * instructions or input array. Returning a new array enables redaction, truncation, or + * augmentation of the payload that will be sent to the provider. + */ +export type CallModelInputFilter = ( + args: CallModelInputFilterArgs, +) => ModelInputData | Promise; + +/** + * Constructs the model input array for the current turn by combining the original turn input with + * any new run items (excluding tool approval placeholders). This helps ensure that repeated calls + * to the Responses API only send newly generated content. + * + * See: https://platform.openai.com/docs/guides/conversation-state?api-mode=responses. + */ +export function getTurnInput( + originalInput: string | AgentInputItem[], + generatedItems: RunItem[], +): AgentInputItem[] { + const rawItems = generatedItems + .filter((item) => item.type !== 'tool_approval_item') // don't include approval items to avoid double function calls + .map((item) => item.rawItem); + return [...toAgentInputList(originalInput), ...rawItems]; +} + +// -------------------------------------------------------------- +// Internal helpers +// -------------------------------------------------------------- + +const DEFAULT_MAX_TURNS = 10; + +let _defaultRunner: Runner | undefined = undefined; + +function getDefaultRunner() { + if (_defaultRunner) { + return _defaultRunner; + } + _defaultRunner = new Runner(); + return _defaultRunner; +} + +/** + * Resolves the effective model for the next turn by giving precedence to the agent-specific + * configuration when present, otherwise falling back to the runner-level default. + */ export function selectModel( agentModel: string | Model, runConfigModel: string | Model | undefined, ): string | Model { // When initializing an agent without model name, the model property is set to an empty string. So, - // * agentModel === '' & runConfigModel exists, runConfigModel will be used + // * agentModel === Agent.DEFAULT_MODEL_PLACEHOLDER & runConfigModel exists, runConfigModel will be used // * agentModel is set, the agentModel will be used over runConfigModel if ( (typeof agentModel === 'string' && @@ -1262,26 +1555,346 @@ export function selectModel( return runConfigModel ?? agentModel ?? Agent.DEFAULT_MODEL_PLACEHOLDER; } -export async function run, TContext = undefined>( - agent: TAgent, - input: string | AgentInputItem[] | RunState, - options?: NonStreamRunOptions, -): Promise>; -export async function run, TContext = undefined>( - agent: TAgent, - input: string | AgentInputItem[] | RunState, - options?: StreamRunOptions, -): Promise>; -export async function run, TContext = undefined>( - agent: TAgent, - input: string | AgentInputItem[] | RunState, - options?: StreamRunOptions | NonStreamRunOptions, -): Promise | StreamedRunResult> { - const runner = getDefaultRunner(); - if (options?.stream) { - return await runner.run(agent, input, options); - } else { - return await runner.run(agent, input, options); +/** + * Normalizes tracing configuration into the format expected by model providers. + * Returns `false` to disable tracing, `true` to include full payload data, or + * `'enabled_without_data'` to omit sensitive content while still emitting spans. + */ +export function getTracing( + tracingDisabled: boolean, + traceIncludeSensitiveData: boolean, +): ModelTracing { + if (tracingDisabled) { + return false; + } + + if (traceIncludeSensitiveData) { + return true; + } + + return 'enabled_without_data'; +} + +/** + * Result of applying a `callModelInputFilter`. + * - `modelInput` is the payload that goes to the model. + * - `sourceItems` maps each filtered item back to the original turn item (or `undefined` when none). + * This lets the conversation tracker know which originals reached the model. + * - `persistedItems` are the filtered clones we should commit to session memory so the stored + * history reflects any redactions or truncation introduced by the filter. + * - `filterApplied` signals whether a filter ran so callers can distinguish empty filtered results + * from the filter being skipped entirely. + */ +type FilterApplicationResult = { + modelInput: ModelInputData; + sourceItems: (AgentInputItem | undefined)[]; + persistedItems: AgentInputItem[]; + filterApplied: boolean; +}; + +/** + * @internal + */ +async function applyCallModelInputFilter( + agent: Agent, + callModelInputFilter: CallModelInputFilter | undefined, + context: RunContext, + inputItems: AgentInputItem[], + systemInstructions: string | undefined, +): Promise { + const cloneInputItems = ( + items: AgentInputItem[], + map?: WeakMap, + ) => + items.map((item) => { + const cloned = structuredClone(item) as AgentInputItem; + if (map && cloned && typeof cloned === 'object') { + map.set(cloned as object, item); + } + return cloned; + }); + + // Record the relationship between the cloned array passed to filters and the original inputs. + const cloneMap = new WeakMap(); + const originalPool = buildAgentInputPool(inputItems); + const fallbackOriginals: AgentInputItem[] = []; + // Track any original object inputs so filtered replacements can still mark them as delivered. + for (const item of inputItems) { + if (item && typeof item === 'object') { + fallbackOriginals.push(item); + } + } + const removeFromFallback = (candidate: AgentInputItem | undefined) => { + if (!candidate || typeof candidate !== 'object') { + return; + } + const index = fallbackOriginals.findIndex( + (original) => original === candidate, + ); + if (index !== -1) { + fallbackOriginals.splice(index, 1); + } + }; + const takeFallbackOriginal = (): AgentInputItem | undefined => { + const next = fallbackOriginals.shift(); + if (next) { + removeAgentInputFromPool(originalPool, next); + } + return next; + }; + + // Always create a deep copy so downstream mutations inside filters cannot affect + // the cached turn state. + const clonedBaseInput = cloneInputItems(inputItems, cloneMap); + const base: ModelInputData = { + input: clonedBaseInput, + instructions: systemInstructions, + }; + if (!callModelInputFilter) { + return { + modelInput: base, + sourceItems: [...inputItems], + persistedItems: [], + filterApplied: false, + }; + } + + try { + const result = await callModelInputFilter({ + modelData: base, + agent, + context: context.context, + } as CallModelInputFilterArgs); + + if (!result || !Array.isArray(result.input)) { + throw new UserError( + 'callModelInputFilter must return a ModelInputData object with an input array.', + ); + } + + // Preserve a pointer to the original object backing each filtered clone so downstream + // trackers can keep their bookkeeping consistent even after redaction. + const sourceItems = result.input.map((item) => { + if (!item || typeof item !== 'object') { + return undefined; + } + const original = cloneMap.get(item as object); + if (original) { + removeFromFallback(original); + removeAgentInputFromPool(originalPool, original); + return original; + } + const key = getAgentInputItemKey(item as AgentInputItem); + const matchedByContent = takeAgentInputFromPool(originalPool, key); + if (matchedByContent) { + removeFromFallback(matchedByContent); + return matchedByContent; + } + const fallback = takeFallbackOriginal(); + if (fallback) { + return fallback; + } + return undefined; + }); + + const clonedFilteredInput = cloneInputItems(result.input); + return { + modelInput: { + input: clonedFilteredInput, + instructions: + typeof result.instructions === 'undefined' + ? systemInstructions + : result.instructions, + }, + sourceItems, + persistedItems: clonedFilteredInput.map((item) => structuredClone(item)), + filterApplied: true, + }; + } catch (error) { + addErrorToCurrentSpan({ + message: 'Error in callModelInputFilter', + data: { error: String(error) }, + }); + throw error; + } +} + +// Tracks which items have already been sent to or received from the Responses API when the caller +// supplies `conversationId`/`previousResponseId`. This ensures we only send the delta each turn. +class ServerConversationTracker { + // Conversation ID: + // - https://platform.openai.com/docs/guides/conversation-state?api-mode=responses#using-the-conversations-api + // - https://platform.openai.com/docs/api-reference/conversations/create + public conversationId?: string; + + // Previous Response ID: + // https://platform.openai.com/docs/guides/conversation-state?api-mode=responses#passing-context-from-the-previous-response + public previousResponseId?: string; + + // Using this flag because WeakSet does not provide a way to check its size + private sentInitialInput = false; + // The items already sent to the model; using WeakSet for memory efficiency + private sentItems = new WeakSet(); + // The items received from the server; using WeakSet for memory efficiency + private serverItems = new WeakSet(); + // Track initial input items that have not yet been sent so they can be retried on later turns. + private remainingInitialInput: AgentInputItem[] | null = null; + + constructor({ + conversationId, + previousResponseId, + }: { + conversationId?: string; + previousResponseId?: string; + }) { + this.conversationId = conversationId ?? undefined; + this.previousResponseId = previousResponseId ?? undefined; + } + + /** + * Pre-populates tracker caches from an existing RunState when resuming server-managed runs. + */ + primeFromState({ + originalInput, + generatedItems, + modelResponses, + }: { + originalInput: string | AgentInputItem[]; + generatedItems: RunItem[]; + modelResponses: ModelResponse[]; + }) { + if (this.sentInitialInput) { + return; + } + + for (const item of toAgentInputList(originalInput)) { + if (item && typeof item === 'object') { + this.sentItems.add(item); + } + } + + this.sentInitialInput = true; + this.remainingInitialInput = null; + + const latestResponse = modelResponses[modelResponses.length - 1]; + for (const response of modelResponses) { + for (const item of response.output) { + if (item && typeof item === 'object') { + this.serverItems.add(item); + } + } + } + + if (!this.conversationId && latestResponse?.responseId) { + this.previousResponseId = latestResponse.responseId; + } + + for (const item of generatedItems) { + const rawItem = item.rawItem; + if (!rawItem || typeof rawItem !== 'object') { + continue; + } + if (this.serverItems.has(rawItem)) { + this.sentItems.add(rawItem); + } + } + } + + /** + * Records the raw items returned by the server so future delta calculations skip them. + * Also captures the latest response identifier to chain follow-up calls when possible. + */ + trackServerItems(modelResponse: ModelResponse | undefined) { + if (!modelResponse) { + return; + } + for (const item of modelResponse.output) { + if (item && typeof item === 'object') { + this.serverItems.add(item); + } + } + if (!this.conversationId && modelResponse.responseId) { + this.previousResponseId = modelResponse.responseId; + } + } + + /** + * Returns the minimum set of items that still need to be delivered to the server for the + * current turn. This includes the original turn inputs (until acknowledged) plus any + * newly generated items that have not yet been echoed back by the API. + */ + prepareInput( + originalInput: string | AgentInputItem[], + generatedItems: RunItem[], + ): AgentInputItem[] { + const inputItems: AgentInputItem[] = []; + + if (!this.sentInitialInput) { + const initialItems = toAgentInputList(originalInput); + // Preserve the full initial payload so a filter can drop items without losing their originals. + inputItems.push(...initialItems); + this.remainingInitialInput = initialItems.filter( + (item): item is AgentInputItem => + Boolean(item) && typeof item === 'object', + ); + this.sentInitialInput = true; + } else if ( + this.remainingInitialInput && + this.remainingInitialInput.length > 0 + ) { + // Re-queue prior initial items until the tracker confirms they were delivered to the API. + inputItems.push(...this.remainingInitialInput); + } + + for (const item of generatedItems) { + if (item.type === 'tool_approval_item') { + continue; + } + const rawItem = item.rawItem; + if (!rawItem || typeof rawItem !== 'object') { + continue; + } + if (this.sentItems.has(rawItem) || this.serverItems.has(rawItem)) { + continue; + } + inputItems.push(rawItem as AgentInputItem); + } + + return inputItems; + } + + /** + * Marks the provided originals as delivered so future turns do not resend them and any + * pending initial inputs can be dropped once the server acknowledges receipt. + */ + markInputAsSent(items: (AgentInputItem | undefined)[]) { + if (!items.length) { + return; + } + + const delivered = new Set(); + for (const item of items) { + if (!item || typeof item !== 'object' || delivered.has(item)) { + continue; + } + // Some inputs may be repeated in the filtered list; only mark unique originals once. + delivered.add(item); + this.sentItems.add(item); + } + + if ( + !this.remainingInitialInput || + this.remainingInitialInput.length === 0 + ) { + return; + } + + this.remainingInitialInput = this.remainingInitialInput.filter( + (item) => !delivered.has(item), + ); + if (this.remainingInitialInput.length === 0) { + this.remainingInitialInput = null; + } } } @@ -1325,3 +1938,174 @@ function adjustModelSettingsForNonGPT5RunnerModel( } return modelSettings; } + +// Package turn metadata so both run loops share identical serialization. +// Each field mirrors the information we ship to the model for the current agent turn. +type AgentArtifacts = { + handoffs: Handoff[]; + tools: Tool[]; + serializedHandoffs: SerializedHandoff[]; + serializedTools: SerializedTool[]; +}; + +/** + * @internal + * Collects tools/handoffs early so we can annotate spans before model execution begins. + */ +async function prepareAgentArtifacts< + TContext, + TAgent extends Agent, +>(state: RunState): Promise> { + const handoffs = await state._currentAgent.getEnabledHandoffs(state._context); + const tools = await state._currentAgent.getAllTools(state._context); + + if (!state._currentAgentSpan) { + const handoffNames = handoffs.map((h) => h.agentName); + state._currentAgentSpan = createAgentSpan({ + data: { + name: state._currentAgent.name, + handoffs: handoffNames, + tools: tools.map((t) => t.name), + output_type: state._currentAgent.outputSchemaName, + }, + }); + state._currentAgentSpan.start(); + setCurrentSpan(state._currentAgentSpan); + } else { + state._currentAgentSpan.spanData.tools = tools.map((t) => t.name); + } + + return { + handoffs, + tools, + serializedHandoffs: handoffs.map((handoff) => serializeHandoff(handoff)), + serializedTools: tools.map((tool) => serializeTool(tool)), + }; +} + +// Captures everything required to call the model once so we avoid recomputing precedence or filters. +// The values here are the "final say" for a turn; every loop simply consumes the structure rather +// than attempting to rebuild model settings, filters, or metadata on its own. +type PreparedModelCall = AgentArtifacts & { + model: Model; + explictlyModelSet: boolean; + modelSettings: ModelSettings; + modelInput: ModelInputData; + prompt?: Prompt; + previousResponseId?: string; + conversationId?: string; +}; + +type AgentInputItemPool = Map; + +function getAgentInputItemKey(item: AgentInputItem): string { + // Deep serialization keeps binary inputs comparable after filters clone them. + return JSON.stringify(item, agentInputSerializationReplacer); +} + +function buildAgentInputPool(items: AgentInputItem[]): AgentInputItemPool { + // Track every original object so filters can safely return cloned copies. + const pool: AgentInputItemPool = new Map(); + for (const item of items) { + const key = getAgentInputItemKey(item); + const existing = pool.get(key); + if (existing) { + existing.push(item); + } else { + pool.set(key, [item]); + } + } + return pool; +} + +function takeAgentInputFromPool( + pool: AgentInputItemPool, + key: string, +): AgentInputItem | undefined { + // Prefer reusing the earliest untouched original to keep ordering stable. + const candidates = pool.get(key); + if (!candidates || candidates.length === 0) { + return undefined; + } + const [first] = candidates; + candidates.shift(); + if (candidates.length === 0) { + pool.delete(key); + } + return first; +} + +function removeAgentInputFromPool( + pool: AgentInputItemPool, + item: AgentInputItem, +) { + // Remove exactly the matched instance so duplicate payloads remain available. + const key = getAgentInputItemKey(item); + const candidates = pool.get(key); + if (!candidates || candidates.length === 0) { + return; + } + const index = candidates.findIndex((candidate) => candidate === item); + if (index === -1) { + return; + } + candidates.splice(index, 1); + if (candidates.length === 0) { + pool.delete(key); + } +} + +function agentInputSerializationReplacer( + _key: string, + value: unknown, +): unknown { + // Mirror runImplementation serialization so buffer snapshots round-trip. + if (value instanceof ArrayBuffer) { + return { + __type: 'ArrayBuffer', + data: encodeUint8ArrayToBase64(new Uint8Array(value)), + }; + } + + if (isArrayBufferView(value)) { + const view = value as ArrayBufferView; + return { + __type: view.constructor.name, + data: encodeUint8ArrayToBase64( + new Uint8Array(view.buffer, view.byteOffset, view.byteLength), + ), + }; + } + + if (isNodeBuffer(value)) { + const view = value as Uint8Array; + return { + __type: 'Buffer', + data: encodeUint8ArrayToBase64( + new Uint8Array(view.buffer, view.byteOffset, view.byteLength), + ), + }; + } + + if (isSerializedBufferSnapshot(value)) { + return { + __type: 'Buffer', + data: encodeUint8ArrayToBase64(Uint8Array.from(value.data)), + }; + } + + return value; +} + +// Normalizes user-provided input into the structure the model expects. Strings become user messages, +// arrays are kept as-is so downstream loops can treat both scenarios uniformly. +function toAgentInputList( + originalInput: string | AgentInputItem[], +): AgentInputItem[] { + // Allow callers to pass plain strings while preserving original item order. + if (typeof originalInput === 'string') { + return [{ type: 'message', role: 'user', content: originalInput }]; + } + + return [...originalInput]; +} diff --git a/packages/agents-core/src/runImplementation.ts b/packages/agents-core/src/runImplementation.ts index 8b13bb38..1667be19 100644 --- a/packages/agents-core/src/runImplementation.ts +++ b/packages/agents-core/src/runImplementation.ts @@ -39,38 +39,50 @@ import { getLastTextFromOutputMessage } from './utils/messages'; import { withFunctionSpan, withHandoffSpan } from './tracing/createSpans'; import { getSchemaAndParserFromInputType } from './utils/tools'; import { encodeUint8ArrayToBase64 } from './utils/base64'; +import { + isArrayBufferView, + isNodeBuffer, + isSerializedBufferSnapshot, + toSmartString, +} from './utils/smartString'; import { safeExecute } from './utils/safeExecute'; import { addErrorToCurrentSpan } from './tracing/context'; import { RunItemStreamEvent, RunItemStreamEventName } from './events'; -import { StreamedRunResult } from './result'; +import { RunResult, StreamedRunResult } from './result'; import { z } from 'zod'; -import { toSmartString } from './utils/smartString'; import * as protocol from './types/protocol'; import { Computer } from './computer'; import { RunState } from './runState'; import { isZodObject } from './utils'; import * as ProviderData from './types/providerData'; +import type { Session, SessionInputCallback } from './memory/session'; +// Represents a single handoff function call that still needs to be executed after the model turn. type ToolRunHandoff = { toolCall: protocol.FunctionCallItem; handoff: Handoff; }; +// Captures a function tool invocation emitted by the model along with the concrete tool to run. type ToolRunFunction = { toolCall: protocol.FunctionCallItem; tool: FunctionTool; }; +// Holds a pending computer-use action so we can dispatch to the configured computer tool. type ToolRunComputer = { toolCall: protocol.ComputerUseCallItem; computer: ComputerTool; }; +// Tracks hosted MCP approval requests awaiting either automatic or user-driven authorization. type ToolRunMCPApprovalRequest = { requestItem: RunToolApprovalItem; mcpTool: HostedMCPTool; }; +// Aggregates everything the model produced in a single turn. Downstream logic consumes this +// structure to decide which follow-up work (tools, handoffs, MCP approvals, computer calls) must run. export type ProcessedResponse = { newItems: RunItem[]; handoffs: ToolRunHandoff[]; @@ -81,8 +93,72 @@ export type ProcessedResponse = { hasToolsOrApprovalsToRun(): boolean; }; +type ApprovalItemLike = + | RunToolApprovalItem + | { + rawItem?: protocol.FunctionCallItem | protocol.HostedToolCallItem; + agent?: Agent; + }; + +function isApprovalItemLike(value: unknown): value is ApprovalItemLike { + if (!value || typeof value !== 'object') { + return false; + } + + if (!('rawItem' in value)) { + return false; + } + + const rawItem = (value as { rawItem?: unknown }).rawItem; + if (!rawItem || typeof rawItem !== 'object') { + return false; + } + + const itemType = (rawItem as { type?: unknown }).type; + return itemType === 'function_call' || itemType === 'hosted_tool_call'; +} + +function getApprovalIdentity(approval: ApprovalItemLike): string | undefined { + const rawItem = approval.rawItem; + if (!rawItem) { + return undefined; + } + + if (rawItem.type === 'function_call' && rawItem.callId) { + return `function_call:${rawItem.callId}`; + } + + if ('callId' in rawItem && rawItem.callId) { + return `${rawItem.type}:${rawItem.callId}`; + } + + const id = 'id' in rawItem ? rawItem.id : undefined; + if (id) { + return `${rawItem.type}:${id}`; + } + + const providerData = + typeof rawItem.providerData === 'object' && rawItem.providerData + ? (rawItem.providerData as { id?: string }) + : undefined; + if (providerData?.id) { + return `${rawItem.type}:provider:${providerData.id}`; + } + + const agentName = + 'agent' in approval && approval.agent ? approval.agent.name : ''; + + try { + return `${agentName}:${rawItem.type}:${JSON.stringify(rawItem)}`; + } catch { + return `${agentName}:${rawItem.type}`; + } +} + /** * @internal + * Walks a raw model response and classifies each item so the runner can schedule follow-up work. + * Returns both the serializable RunItems (for history/streaming) and the actionable tool metadata. */ export function processModelResponse( modelResponse: ModelResponse, @@ -97,6 +173,7 @@ export function processModelResponse( const runMCPApprovalRequests: ToolRunMCPApprovalRequest[] = []; const toolsUsed: string[] = []; const handoffMap = new Map(handoffs.map((h) => [h.toolName, h])); + // Resolve tools upfront so we can look up the concrete handler in O(1) while iterating outputs. const functionMap = new Map( tools.filter((t) => t.type === 'function').map((t) => [t.name, t]), ); @@ -256,10 +333,14 @@ export const nextStepSchema = z.discriminatedUnion('type', [ export type NextStep = z.infer; +/** + * Internal convenience wrapper that groups the outcome of a single agent turn. It lets the caller + * update the RunState in one shot and decide which step to execute next. + */ class SingleStepResult { constructor( /** - * The input items i.e. the items before run() was called. May be muted by handoff input filters + * The input items (i.e., the items before run() was called). May be mutated by handoff input filters. */ public originalInput: string | AgentInputItem[], /** @@ -290,6 +371,8 @@ class SingleStepResult { /** * @internal + * Resets the tool choice when the agent is configured to prefer a fresh tool selection after + * any tool usage. This prevents the provider from reusing stale tool hints across turns. */ export function maybeResetToolChoice( agent: Agent, @@ -304,8 +387,10 @@ export function maybeResetToolChoice( /** * @internal + * Continues a turn that was previously interrupted waiting for tool approval. Executes the now + * approved tools and returns the resulting step transition. */ -export async function executeInterruptedToolsAndSideEffects( +export async function resolveInterruptedTurn( agent: Agent, originalInput: string | AgentInputItem[], originalPreStepItems: RunItem[], @@ -323,6 +408,58 @@ export async function executeInterruptedToolsAndSideEffects( item.rawItem.type === 'function_call', ) .map((item) => (item.rawItem as protocol.FunctionCallItem).callId); + + // We already persisted the turn once when the approval interrupt was raised, so the + // counter reflects the approval items as "flushed". When we resume the same turn we need + // to rewind it so the eventual tool output for this call is still written to the session. + const pendingApprovalItems = state + .getInterruptions() + .filter(isApprovalItemLike); + + if (pendingApprovalItems.length > 0) { + const pendingApprovalIdentities = new Set(); + for (const approval of pendingApprovalItems) { + const identity = getApprovalIdentity(approval); + if (identity) { + pendingApprovalIdentities.add(identity); + } + } + + if (pendingApprovalIdentities.size > 0) { + let rewindCount = 0; + for (let index = originalPreStepItems.length - 1; index >= 0; index--) { + const item = originalPreStepItems[index]; + if (!(item instanceof RunToolApprovalItem)) { + continue; + } + + const identity = getApprovalIdentity(item); + if (!identity) { + continue; + } + + if (!pendingApprovalIdentities.has(identity)) { + continue; + } + + rewindCount++; + pendingApprovalIdentities.delete(identity); + + if (pendingApprovalIdentities.size === 0) { + break; + } + } + + // Persisting the approval request already advanced the counter once, so undo the increment + // to make sure we write the final tool output back to the session when the turn resumes. + if (rewindCount > 0) { + state._currentTurnPersistedItemCount = Math.max( + 0, + state._currentTurnPersistedItemCount - rewindCount, + ); + } + } + } // Run function tools that require approval after they get their approval results const functionToolRuns = processedResponse.functions.filter((run) => { return functionCallIds.includes(run.toolCall.callId); @@ -335,8 +472,37 @@ export async function executeInterruptedToolsAndSideEffects( state, ); - // Create the initial set of the output items - const newItems: RunItem[] = functionResults.map((r) => r.runItem); + // There is no built-in HITL approval surface for computer tools today, so every pending action + // is executed immediately when the turn resumes. + const computerResults = + processedResponse.computerActions.length > 0 + ? await executeComputerActions( + agent, + processedResponse.computerActions, + runner, + state._context, + ) + : []; + + // When resuming we receive the original RunItem references; suppress duplicates so history and streaming do not double-emit the same items. + const originalPreStepItemSet = new Set(originalPreStepItems); + const newItems: RunItem[] = []; + const newItemsSet = new Set(); + const appendIfNew = (item: RunItem) => { + if (originalPreStepItemSet.has(item) || newItemsSet.has(item)) { + return; + } + newItems.push(item); + newItemsSet.add(item); + }; + + for (const result of functionResults) { + appendIfNew(result.runItem); + } + + for (const result of computerResults) { + appendIfNew(result); + } // Run MCP tools that require approval after they get their approval results const mcpApprovalRuns = processedResponse.mcpApprovalRequests.filter( @@ -348,6 +514,10 @@ export async function executeInterruptedToolsAndSideEffects( ); }, ); + // Hosted MCP approvals may still be waiting on a human decision when the turn resumes. + const pendingHostedMCPApprovals = new Set(); + const pendingHostedMCPApprovalIds = new Set(); + // Keep track of approvals we still need to surface next turn so HITL flows can resume cleanly. for (const run of mcpApprovalRuns) { // the approval_request_id "mcpr_123..." const approvalRequestId = run.requestItem.rawItem.id!; @@ -363,63 +533,66 @@ export async function executeInterruptedToolsAndSideEffects( reason: undefined, }; // Tell Responses API server the approval result in the next turn - newItems.push( - new RunToolCallItem( - { - type: 'hosted_tool_call', - name: 'mcp_approval_response', - providerData, - }, - agent as Agent, - ), + const responseItem = new RunToolCallItem( + { + type: 'hosted_tool_call', + name: 'mcp_approval_response', + providerData, + }, + agent as Agent, ); + appendIfNew(responseItem); + } else { + pendingHostedMCPApprovals.add(run.requestItem); + pendingHostedMCPApprovalIds.add(approvalRequestId); + functionResults.push({ + type: 'hosted_mcp_tool_approval', + tool: run.mcpTool, + runItem: run.requestItem, + }); + appendIfNew(run.requestItem); } } - const checkToolOutput = await checkForFinalOutputFromTools( - agent, - functionResults, - state, - ); - - // Exclude the tool approval items, which should not be sent to Responses API, - // from the SingleStepResult's preStepItems + // Server-managed conversations rely on preStepItems to re-surface pending approvals. + // Keep unresolved hosted MCP approvals in place so HITL flows still have something to approve next turn. + // Drop resolved approval placeholders so they are not replayed on the next turn, but keep + // pending approvals in place to signal the outstanding work to the UI and session store. const preStepItems = originalPreStepItems.filter((item) => { - return !(item instanceof RunToolApprovalItem); + if (!(item instanceof RunToolApprovalItem)) { + return true; + } + + if ( + item.rawItem.type === 'hosted_tool_call' && + item.rawItem.providerData?.type === 'mcp_approval_request' + ) { + if (pendingHostedMCPApprovals.has(item)) { + return true; + } + const approvalRequestId = item.rawItem.id; + if (approvalRequestId) { + return pendingHostedMCPApprovalIds.has(approvalRequestId); + } + return false; + } + + return false; }); - if (checkToolOutput.isFinalOutput) { - runner.emit( - 'agent_end', - state._context, - agent, - checkToolOutput.finalOutput, - ); - agent.emit('agent_end', state._context, checkToolOutput.finalOutput); + const completedStep = await maybeCompleteTurnFromToolResults({ + agent, + runner, + state, + functionResults, + originalInput, + newResponse, + preStepItems, + newItems, + }); - return new SingleStepResult( - originalInput, - newResponse, - preStepItems, - newItems, - { - type: 'next_step_final_output', - output: checkToolOutput.finalOutput, - }, - ); - } else if (checkToolOutput.isInterrupted) { - return new SingleStepResult( - originalInput, - newResponse, - preStepItems, - newItems, - { - type: 'next_step_interruption', - data: { - interruptions: checkToolOutput.interruptions, - }, - }, - ); + if (completedStep) { + return completedStep; } // we only ran new tools and side effects. We need to run the rest of the agent @@ -434,8 +607,10 @@ export async function executeInterruptedToolsAndSideEffects( /** * @internal + * Executes every follow-up action the model requested (function tools, computer actions, MCP flows), + * appends their outputs to the run history, and determines the next step for the agent loop. */ -export async function executeToolsAndSideEffects( +export async function resolveTurnAfterModelResponse( agent: Agent, originalInput: string | AgentInputItem[], originalPreStepItems: RunItem[], @@ -444,9 +619,24 @@ export async function executeToolsAndSideEffects( runner: Runner, state: RunState>, ): Promise { + // Reuse the same array reference so we can compare object identity when deciding whether to + // append new items, ensuring we never double-stream existing RunItems. const preStepItems = originalPreStepItems; - let newItems = processedResponse.newItems; + const seenItems = new Set(originalPreStepItems); + const newItems: RunItem[] = []; + const appendIfNew = (item: RunItem) => { + if (seenItems.has(item)) { + return; + } + newItems.push(item); + seenItems.add(item); + }; + for (const item of processedResponse.newItems) { + appendIfNew(item); + } + + // Run function tools and computer actions in parallel; neither depends on the other's side effects. const [functionResults, computerResults] = await Promise.all([ executeFunctionToolCalls( agent, @@ -462,8 +652,12 @@ export async function executeToolsAndSideEffects( ), ]); - newItems = newItems.concat(functionResults.map((r) => r.runItem)); - newItems = newItems.concat(computerResults); + for (const result of functionResults) { + appendIfNew(result.runItem); + } + for (const item of computerResults) { + appendIfNew(item); + } // run hosted MCP approval requests if (processedResponse.mcpApprovalRequests.length > 0) { @@ -531,44 +725,19 @@ export async function executeToolsAndSideEffects( ); } - const checkToolOutput = await checkForFinalOutputFromTools( + const completedStep = await maybeCompleteTurnFromToolResults({ agent, - functionResults, + runner, state, - ); - - if (checkToolOutput.isFinalOutput) { - runner.emit( - 'agent_end', - state._context, - agent, - checkToolOutput.finalOutput, - ); - agent.emit('agent_end', state._context, checkToolOutput.finalOutput); + functionResults, + originalInput, + newResponse, + preStepItems, + newItems, + }); - return new SingleStepResult( - originalInput, - newResponse, - preStepItems, - newItems, - { - type: 'next_step_final_output', - output: checkToolOutput.finalOutput, - }, - ); - } else if (checkToolOutput.isInterrupted) { - return new SingleStepResult( - originalInput, - newResponse, - preStepItems, - newItems, - { - type: 'next_step_interruption', - data: { - interruptions: checkToolOutput.interruptions, - }, - }, - ); + if (completedStep) { + return completedStep; } // If the model issued any tool calls or handoffs in this turn, @@ -612,6 +781,7 @@ export async function executeToolsAndSideEffects( ); } + // Keep looping if any tool output placeholders still require an approval follow-up. const hasPendingToolsOrApprovals = functionResults.some( (result) => result.runItem instanceof RunToolApprovalItem, ); @@ -666,8 +836,73 @@ export async function executeToolsAndSideEffects( ); } +type TurnFinalizationParams = { + agent: Agent; + runner: Runner; + state: RunState>; + functionResults: FunctionToolResult[]; + originalInput: string | AgentInputItem[]; + newResponse: ModelResponse; + preStepItems: RunItem[]; + newItems: RunItem[]; +}; + +// Consolidates the logic that determines whether tool results yielded a final answer, +// triggered an interruption, or require the agent loop to continue running. +async function maybeCompleteTurnFromToolResults({ + agent, + runner, + state, + functionResults, + originalInput, + newResponse, + preStepItems, + newItems, +}: TurnFinalizationParams): Promise { + const toolOutcome = await checkForFinalOutputFromTools( + agent, + functionResults, + state, + ); + + if (toolOutcome.isFinalOutput) { + runner.emit('agent_end', state._context, agent, toolOutcome.finalOutput); + agent.emit('agent_end', state._context, toolOutcome.finalOutput); + + return new SingleStepResult( + originalInput, + newResponse, + preStepItems, + newItems, + { + type: 'next_step_final_output', + output: toolOutcome.finalOutput, + }, + ); + } + + if (toolOutcome.isInterrupted) { + return new SingleStepResult( + originalInput, + newResponse, + preStepItems, + newItems, + { + type: 'next_step_interruption', + data: { + interruptions: toolOutcome.interruptions, + }, + }, + ); + } + + return null; +} + /** * @internal + * Normalizes tool outputs once so downstream code works with fully structured protocol items. + * Doing this here keeps API surface stable even when providers add new shapes. */ export function getToolCallOutputItem( toolCall: protocol.FunctionCallItem, @@ -701,299 +936,42 @@ export function getToolCallOutputItem( }; } -type StructuredToolOutput = - | ToolOutputText - | ToolOutputImage - | ToolOutputFileContent; +function normalizeFileValue( + value: Record, +): FileReferenceValue | null { + const directFile = value.file; + if (typeof directFile === 'string' && directFile.length > 0) { + return directFile; + } -/** - * Accepts whatever the tool returned and attempts to coerce it into the structured protocol - * shapes we expose to downstream model adapters (input_text/input_image/input_file). Tools are - * allowed to return either a single structured object or an array of them; anything else falls - * back to the legacy string pipeline. - */ -function normalizeStructuredToolOutputs( - output: unknown, -): StructuredToolOutput[] | null { - if (Array.isArray(output)) { - const structured: StructuredToolOutput[] = []; - for (const item of output) { - const normalized = normalizeStructuredToolOutput(item); - if (!normalized) { - return null; - } - structured.push(normalized); - } - return structured; + const normalizedObject = normalizeFileObjectCandidate(directFile); + if (normalizedObject) { + return normalizedObject; } - const normalized = normalizeStructuredToolOutput(output); - return normalized ? [normalized] : null; + + const legacyValue = normalizeLegacyFileValue(value); + if (legacyValue) { + return legacyValue; + } + + return null; } -/** - * Best-effort normalization of a single tool output item. If the object already matches the - * protocol shape we simply cast it; otherwise we copy the recognised fields into the canonical - * structure. Returning null lets the caller know we should revert to plain-string handling. - */ -function normalizeStructuredToolOutput( +function normalizeFileObjectCandidate( value: unknown, -): StructuredToolOutput | null { +): FileReferenceValue | null { if (!isRecord(value)) { return null; } - const type = value.type; - if (type === 'text' && typeof value.text === 'string') { - const output: ToolOutputText = { type: 'text', text: value.text }; - if (isRecord(value.providerData)) { - output.providerData = value.providerData; - } - return output; - } - if (type === 'image') { - const output: ToolOutputImage = { type: 'image' }; - - let imageString: string | undefined; - let imageFileId: string | undefined; - const fallbackImageMediaType = isNonEmptyString((value as any).mediaType) - ? (value as any).mediaType - : undefined; - - const imageField = value.image; - if (typeof imageField === 'string' && imageField.length > 0) { - imageString = imageField; - } else if (isRecord(imageField)) { - const imageObj = imageField as Record; - const inlineMediaType = isNonEmptyString(imageObj.mediaType) - ? imageObj.mediaType - : fallbackImageMediaType; - if (isNonEmptyString(imageObj.url)) { - imageString = imageObj.url; - } else if (isNonEmptyString(imageObj.data)) { - imageString = toInlineImageString(imageObj.data, inlineMediaType); - } else if ( - imageObj.data instanceof Uint8Array && - imageObj.data.length > 0 - ) { - imageString = toInlineImageString(imageObj.data, inlineMediaType); - } - - if (!imageString) { - const candidateId = - (isNonEmptyString(imageObj.fileId) && imageObj.fileId) || - (isNonEmptyString(imageObj.id) && imageObj.id) || - undefined; - if (candidateId) { - imageFileId = candidateId; - } - } - } - - if ( - !imageString && - typeof value.imageUrl === 'string' && - value.imageUrl.length > 0 - ) { - imageString = value.imageUrl; - } - if ( - !imageFileId && - typeof value.fileId === 'string' && - value.fileId.length > 0 - ) { - imageFileId = value.fileId; - } - - if ( - !imageString && - typeof value.data === 'string' && - value.data.length > 0 - ) { - imageString = fallbackImageMediaType - ? toInlineImageString(value.data, fallbackImageMediaType) - : value.data; - } else if ( - !imageString && - value.data instanceof Uint8Array && - value.data.length > 0 - ) { - imageString = toInlineImageString(value.data, fallbackImageMediaType); - } - if (typeof value.detail === 'string' && value.detail.length > 0) { - output.detail = value.detail; - } - - if (imageString) { - output.image = imageString; - } else if (imageFileId) { - output.image = { fileId: imageFileId }; - } else { - return null; - } - - if (isRecord(value.providerData)) { - output.providerData = value.providerData; - } - return output; - } - - if (type === 'file') { - const fileValue = normalizeFileValue(value); - if (!fileValue) { - return null; - } - - const output: ToolOutputFileContent = { type: 'file', file: fileValue }; - - if (isRecord(value.providerData)) { - output.providerData = value.providerData; - } - return output; - } - - return null; -} - -/** - * Translates the normalized tool output into the protocol `input_*` items. This is the last hop - * before we hand the data to model-specific adapters, so we generate the exact schema expected by - * the protocol definitions. - */ -function convertStructuredToolOutputToInputItem( - output: StructuredToolOutput, -): ToolCallStructuredOutput { - if (output.type === 'text') { - const result: protocol.InputText = { - type: 'input_text', - text: output.text, - }; - if (output.providerData) { - result.providerData = output.providerData; - } - return result; - } - if (output.type === 'image') { - const result: protocol.InputImage = { type: 'input_image' }; - if (typeof output.detail === 'string' && output.detail.length > 0) { - result.detail = output.detail; - } - if (typeof output.image === 'string' && output.image.length > 0) { - result.image = output.image; - } else if (isRecord(output.image)) { - const imageObj = output.image as Record; - const inlineMediaType = isNonEmptyString(imageObj.mediaType) - ? imageObj.mediaType - : undefined; - if (isNonEmptyString(imageObj.url)) { - result.image = imageObj.url; - } else if (isNonEmptyString(imageObj.data)) { - result.image = - inlineMediaType && !imageObj.data.startsWith('data:') - ? asDataUrl(imageObj.data, inlineMediaType) - : imageObj.data; - } else if ( - imageObj.data instanceof Uint8Array && - imageObj.data.length > 0 - ) { - const base64 = encodeUint8ArrayToBase64(imageObj.data); - result.image = asDataUrl(base64, inlineMediaType); - } else { - const referencedId = - (isNonEmptyString(imageObj.fileId) && imageObj.fileId) || - (isNonEmptyString(imageObj.id) && imageObj.id) || - undefined; - if (referencedId) { - result.image = { id: referencedId }; - } - } - } - if (output.providerData) { - result.providerData = output.providerData; - } - return result; - } - - if (output.type === 'file') { - const result: protocol.InputFile = { type: 'input_file' }; - const fileValue = output.file; - if (typeof fileValue === 'string') { - result.file = fileValue; - } else if (fileValue && typeof fileValue === 'object') { - const record = fileValue as Record; - if ('data' in record && record.data) { - const mediaType = record.mediaType ?? 'text/plain'; - if (typeof record.data === 'string') { - result.file = asDataUrl(record.data, mediaType); - } else { - const base64 = encodeUint8ArrayToBase64(record.data); - result.file = asDataUrl(base64, mediaType); - } - } else if (typeof record.url === 'string' && record.url.length > 0) { - result.file = { url: record.url }; - } else { - const referencedId = - (typeof record.id === 'string' && - record.id.length > 0 && - record.id) || - (typeof record.fileId === 'string' && record.fileId.length > 0 - ? record.fileId - : undefined); - if (referencedId) { - result.file = { id: referencedId }; - } - } - - if (typeof record.filename === 'string' && record.filename.length > 0) { - result.filename = record.filename; - } - } - if (output.providerData) { - result.providerData = output.providerData; - } - return result; - } - const exhaustiveCheck: never = output; - return exhaustiveCheck; -} - -type FileReferenceValue = ToolOutputFileContent['file']; - -function normalizeFileValue( - value: Record, -): FileReferenceValue | null { - const directFile = value.file; - if (typeof directFile === 'string' && directFile.length > 0) { - return directFile; - } - - const normalizedObject = normalizeFileObjectCandidate(directFile); - if (normalizedObject) { - return normalizedObject; - } - - const legacyValue = normalizeLegacyFileValue(value); - if (legacyValue) { - return legacyValue; - } - - return null; -} - -function normalizeFileObjectCandidate( - value: unknown, -): FileReferenceValue | null { - if (!isRecord(value)) { - return null; - } - - if ('data' in value && value.data !== undefined) { - const dataValue = value.data; - const hasStringData = typeof dataValue === 'string' && dataValue.length > 0; - const hasBinaryData = - dataValue instanceof Uint8Array && dataValue.length > 0; - if (!hasStringData && !hasBinaryData) { - return null; - } + if ('data' in value && value.data !== undefined) { + const dataValue = value.data; + const hasStringData = typeof dataValue === 'string' && dataValue.length > 0; + const hasBinaryData = + dataValue instanceof Uint8Array && dataValue.length > 0; + if (!hasStringData && !hasBinaryData) { + return null; + } if ( !isNonEmptyString(value.mediaType) || @@ -1105,6 +1083,8 @@ function asDataUrl(base64: string, mediaType?: string): string { /** * @internal + * Runs every function tool call requested by the model and returns their outputs alongside + * the `RunItem` instances that should be appended to history. */ export async function executeFunctionToolCalls( agent: Agent, @@ -1121,6 +1101,7 @@ export async function executeFunctionToolCalls( parsedArgs = JSON.parse(parsedArgs); } } + // Some tools require a human or policy check before execution; defer until approval is recorded. const needsApproval = await toolRun.tool.needsApproval( state._context, parsedArgs, @@ -1334,6 +1315,8 @@ async function _runComputerActionAndScreenshot( /** * @internal + * Executes any computer-use actions emitted by the model and returns the resulting items so the + * run history reflects the computer session. */ export async function executeComputerActions( agent: Agent, @@ -1375,7 +1358,7 @@ export async function executeComputerActions( }); } - // Always return a screenshot as a base64 data URL + // Return the screenshot as a data URL when available; fall back to an empty string on failures. const imageUrl = output ? `data:image/png;base64,${output}` : ''; const rawItem: protocol.ComputerCallResultItem = { type: 'computer_call_result', @@ -1389,6 +1372,8 @@ export async function executeComputerActions( /** * @internal + * Drives handoff calls by invoking the downstream agent and capturing any generated items so + * the current agent can continue with the new context. */ export async function executeHandoffCalls< TContext, @@ -1522,6 +1507,8 @@ const NOT_FINAL_OUTPUT: ToolsToFinalOutputResult = { /** * @internal + * Determines whether tool executions produced a final agent output, triggered an interruption, + * or whether the agent loop should continue collecting more responses. */ export async function checkForFinalOutputFromTools< TContext, @@ -1686,3 +1673,730 @@ export class AgentToolUseTracker { ); } } + +/** + * @internal + * Convert a user-provided input into a list of input items. + */ +export function toInputItemList( + input: string | AgentInputItem[], +): AgentInputItem[] { + if (typeof input === 'string') { + return [ + { + type: 'message', + role: 'user', + content: input, + }, + ]; + } + return [...input]; +} + +/** + * @internal + * Extract model output items from run items, excluding tool approval items. + */ +export function extractOutputItemsFromRunItems( + items: RunItem[], +): AgentInputItem[] { + return items + .filter((item) => item.type !== 'tool_approval_item') + .map((item) => item.rawItem as AgentInputItem); +} + +// Carries metadata while recursively sanitizing nested payloads so binary blobs can share the +// appropriate media type when converted into durable data URLs. +type SessionBinaryContext = { + mediaType?: string; +}; + +function normalizeItemsForSessionPersistence( + items: AgentInputItem[], +): AgentInputItem[] { + // Persisted sessions must avoid raw binary so we convert every item into a JSON-safe shape before writing to storage. + return items.map((item) => + sanitizeValueForSession(stripTransientCallIds(item)), + ); +} + +function sanitizeValueForSession( + value: AgentInputItem, + context?: SessionBinaryContext, +): AgentInputItem; +// Nested fields such as providerData may hold arbitrary shapes, so we keep an unknown-based overload for recursive traversal. +function sanitizeValueForSession( + value: unknown, + context?: SessionBinaryContext, +): unknown; +function sanitizeValueForSession( + value: unknown, + context: SessionBinaryContext = {}, +): unknown { + if (value === null || value === undefined) { + return value; + } + + // Convert supported binary payloads into ArrayBuffer views before serialization. + const binary = toUint8ArrayIfBinary(value); + if (binary) { + return toDataUrlFromBytes(binary, context.mediaType); + } + + if (Array.isArray(value)) { + return value.map((entry) => sanitizeValueForSession(entry, context)); + } + + if (!isPlainObject(value)) { + return value; + } + + const record = value as Record; + const result: Record = {}; + + const mediaType = + typeof record.mediaType === 'string' && record.mediaType.length > 0 + ? (record.mediaType as string) + : context.mediaType; + + for (const [key, entry] of Object.entries(record)) { + // Propagate explicit media type only when walking into binary payload containers. + const nextContext = + key === 'data' || key === 'fileData' ? { mediaType } : context; + result[key] = sanitizeValueForSession(entry, nextContext); + } + + return result; +} + +function toUint8ArrayIfBinary(value: unknown): Uint8Array | undefined { + // Normalize the diverse binary containers we may receive into a shared Uint8Array view. + if (value instanceof ArrayBuffer) { + return new Uint8Array(value); + } + if (isArrayBufferView(value)) { + const view = value as ArrayBufferView; + return new Uint8Array(view.buffer, view.byteOffset, view.byteLength); + } + if (isNodeBuffer(value)) { + const view = value as Uint8Array; + return new Uint8Array(view.buffer, view.byteOffset, view.byteLength); + } + if (isSerializedBufferSnapshot(value)) { + const snapshot = value as { data: number[] }; + return Uint8Array.from(snapshot.data); + } + return undefined; +} + +function toDataUrlFromBytes(bytes: Uint8Array, mediaType?: string): string { + // Convert binary payloads into a durable data URL so session files remain self-contained. + const base64 = encodeUint8ArrayToBase64(bytes); + // Note that OpenAI Responses API never accepts application/octet-stream as a media type, + // so we fall back to text/plain; that said, tools are supposed to return a valid media type when this utility is used. + const type = + mediaType && !mediaType.startsWith('data:') ? mediaType : 'text/plain'; + return `data:${type};base64,${base64}`; +} + +function isPlainObject(value: unknown): value is Record { + if (typeof value !== 'object' || value === null) { + return false; + } + const proto = Object.getPrototypeOf(value); + return proto === Object.prototype || proto === null; +} + +// Drop IDs from transient function call items (fc_***) so replayed histories do not reuse generated IDs. +function stripTransientCallIds(value: AgentInputItem): AgentInputItem; +function stripTransientCallIds(value: unknown): unknown; +function stripTransientCallIds(value: unknown): unknown { + if (value === null || value === undefined) { + return value; + } + if (Array.isArray(value)) { + return value.map((entry) => stripTransientCallIds(entry)); + } + if (!isPlainObject(value)) { + return value; + } + const record = value as Record; + const result: Record = {}; + const isProtocolItem = + typeof record.type === 'string' && record.type.length > 0; + const shouldStripId = + isProtocolItem && shouldStripIdForType(record.type as string); + for (const [key, entry] of Object.entries(record)) { + if (shouldStripId && key === 'id') { + continue; + } + result[key] = stripTransientCallIds(entry); + } + return result; +} + +function shouldStripIdForType(type: string): boolean { + switch (type) { + case 'function_call': + case 'function_call_result': + return true; + default: + return false; + } +} + +/** + * @internal + * Persist full turn (input + outputs) for non-streaming runs. + */ +// Persists the combination of user inputs (possibly filtered) and model outputs for a completed turn. +export async function saveToSession( + session: Session | undefined, + sessionInputItems: AgentInputItem[] | undefined, + result: RunResult, +): Promise { + if (!session) { + return; + } + const inputItems = sessionInputItems ?? []; + const state = result.state; + const alreadyPersisted = state._currentTurnPersistedItemCount ?? 0; + // Persist only the portion of _generatedItems that has not yet been stored for this turn. + const newRunItems = result.newItems.slice(alreadyPersisted); + if (process.env.OPENAI_AGENTS__DEBUG_SAVE_SESSION) { + console.debug( + 'saveToSession:newRunItems', + newRunItems.map((item) => item.type), + ); + } + const outputItems = extractOutputItemsFromRunItems(newRunItems); + const itemsToSave = [...inputItems, ...outputItems]; + if (itemsToSave.length === 0) { + state._currentTurnPersistedItemCount = + alreadyPersisted + newRunItems.length; + return; + } + const sanitizedItems = normalizeItemsForSessionPersistence(itemsToSave); + await session.addItems(sanitizedItems); + state._currentTurnPersistedItemCount = alreadyPersisted + newRunItems.length; +} + +/** + * @internal + * Persist only the user input for streaming runs at start. + */ +// For streaming runs we persist user input as soon as it is sent so reconnections can resume. +export async function saveStreamInputToSession( + session: Session | undefined, + sessionInputItems: AgentInputItem[] | undefined, +): Promise { + if (!session) { + return; + } + if (!sessionInputItems || sessionInputItems.length === 0) { + return; + } + const sanitizedInput = normalizeItemsForSessionPersistence(sessionInputItems); + await session.addItems(sanitizedInput); +} + +/** + * @internal + * Persist only the model outputs for streaming runs at the end of a turn. + */ +// Complements saveStreamInputToSession by recording the streaming outputs at the end of the turn. +export async function saveStreamResultToSession( + session: Session | undefined, + result: StreamedRunResult, +): Promise { + if (!session) { + return; + } + const state = result.state; + const alreadyPersisted = state._currentTurnPersistedItemCount ?? 0; + const newRunItems = result.newItems.slice(alreadyPersisted); + const itemsToSave = extractOutputItemsFromRunItems(newRunItems); + if (itemsToSave.length === 0) { + state._currentTurnPersistedItemCount = + alreadyPersisted + newRunItems.length; + return; + } + const sanitizedItems = normalizeItemsForSessionPersistence(itemsToSave); + await session.addItems(sanitizedItems); + state._currentTurnPersistedItemCount = alreadyPersisted + newRunItems.length; +} + +/** + * @internal + * If a session is provided, expands the input with session history; otherwise returns the input. + */ +export type PreparedInputWithSessionResult = { + preparedInput: string | AgentInputItem[]; + sessionItems?: AgentInputItem[]; +}; + +export async function prepareInputItemsWithSession( + input: string | AgentInputItem[], + session?: Session, + sessionInputCallback?: SessionInputCallback, + options?: { + /** + * When true (default), the returned `preparedInput` includes both the persisted session history + * and the new turn items. Set to false when upstream code already provides history to the model + * (e.g. server-managed conversations) to avoid sending duplicated messages each turn. + */ + includeHistoryInPreparedInput?: boolean; + /** + * When true, ensures new turn inputs are still provided to the model even if the session input + * callback drops them from persistence (used for server-managed conversations that redact + * writes). + */ + preserveDroppedNewItems?: boolean; + }, +): Promise { + if (!session) { + return { + preparedInput: input, + sessionItems: undefined, + }; + } + + const includeHistoryInPreparedInput = + options?.includeHistoryInPreparedInput ?? true; + const preserveDroppedNewItems = options?.preserveDroppedNewItems ?? false; + + const history = await session.getItems(); + const newInputItems = Array.isArray(input) + ? [...input] + : toInputItemList(input); + + if (!sessionInputCallback) { + return { + preparedInput: includeHistoryInPreparedInput + ? [...history, ...newInputItems] + : newInputItems, + sessionItems: newInputItems, + }; + } + + // Capture snapshots before invoking the callback so we can reason about the original state even + // if the callback mutates the history array in-place. + const historySnapshot = history.slice(); + const newInputSnapshot = newInputItems.slice(); + + // Delegate history reconciliation to the user-supplied callback. It must return a concrete list + // to keep downstream model requests well-typed. + const combined = await sessionInputCallback(history, newInputItems); + if (!Array.isArray(combined)) { + throw new UserError( + 'Session input callback must return an array of AgentInputItem objects.', + ); + } + + const historyCounts = buildItemFrequencyMap(historySnapshot); + const newInputCounts = buildItemFrequencyMap(newInputSnapshot); + const historyRefs = buildItemReferenceMap(historySnapshot); + const newInputRefs = buildItemReferenceMap(newInputSnapshot); + + const appended: AgentInputItem[] = []; + for (const item of combined) { + const key = sessionItemKey(item); + if (consumeReference(newInputRefs, key, item)) { + decrementCount(newInputCounts, key); + appended.push(item); + continue; + } + + // Prioritize exact history matches before payload-based counts so callbacks that surface + // history ahead of identical new inputs keep previously persisted items out of the new queue. + if (consumeReference(historyRefs, key, item)) { + decrementCount(historyCounts, key); + continue; + } + + const historyRemaining = historyCounts.get(key) ?? 0; + if (historyRemaining > 0) { + historyCounts.set(key, historyRemaining - 1); + continue; + } + + const newRemaining = newInputCounts.get(key) ?? 0; + if (newRemaining > 0) { + newInputCounts.set(key, newRemaining - 1); + appended.push(item); + continue; + } + + appended.push(item); + } + + // Preserve redacted inputs for model delivery when requested (e.g. server-managed histories). + const preparedItems = includeHistoryInPreparedInput + ? combined + : appended.length > 0 + ? appended + : preserveDroppedNewItems + ? newInputSnapshot + : []; + + return { + preparedInput: preparedItems, + // Respect callbacks that intentionally drop the latest inputs (e.g. to redact sensitive + // values) by persisting only the items they kept in the combined array. + sessionItems: appended, + }; +} + +// Internal helpers kept near the end so the main execution path reads top-to-bottom. +type StructuredToolOutput = + | ToolOutputText + | ToolOutputImage + | ToolOutputFileContent; + +/** + * Accepts whatever the tool returned and attempts to coerce it into the structured protocol + * shapes we expose to downstream model adapters (input_text/input_image/input_file). Tools are + * allowed to return either a single structured object or an array of them; anything else falls + * back to the legacy string pipeline. + */ +function normalizeStructuredToolOutputs( + output: unknown, +): StructuredToolOutput[] | null { + if (Array.isArray(output)) { + const structured: StructuredToolOutput[] = []; + for (const item of output) { + const normalized = normalizeStructuredToolOutput(item); + if (!normalized) { + return null; + } + structured.push(normalized); + } + return structured; + } + const normalized = normalizeStructuredToolOutput(output); + return normalized ? [normalized] : null; +} + +/** + * Best-effort normalization of a single tool output item. If the object already matches the + * protocol shape we simply cast it; otherwise we copy the recognised fields into the canonical + * structure. Returning null lets the caller know we should revert to plain-string handling. + */ +function normalizeStructuredToolOutput( + value: unknown, +): StructuredToolOutput | null { + if (!isRecord(value)) { + return null; + } + const type = value.type; + if (type === 'text' && typeof value.text === 'string') { + const output: ToolOutputText = { type: 'text', text: value.text }; + if (isRecord(value.providerData)) { + output.providerData = value.providerData; + } + return output; + } + + if (type === 'image') { + const output: ToolOutputImage = { type: 'image' }; + + let imageString: string | undefined; + let imageFileId: string | undefined; + const fallbackImageMediaType = isNonEmptyString((value as any).mediaType) + ? (value as any).mediaType + : undefined; + + const imageField = value.image; + if (typeof imageField === 'string' && imageField.length > 0) { + imageString = imageField; + } else if (isRecord(imageField)) { + const imageObj = imageField as Record; + const inlineMediaType = isNonEmptyString(imageObj.mediaType) + ? imageObj.mediaType + : fallbackImageMediaType; + if (isNonEmptyString(imageObj.url)) { + imageString = imageObj.url; + } else if (isNonEmptyString(imageObj.data)) { + imageString = toInlineImageString(imageObj.data, inlineMediaType); + } else if ( + imageObj.data instanceof Uint8Array && + imageObj.data.length > 0 + ) { + imageString = toInlineImageString(imageObj.data, inlineMediaType); + } + + if (!imageString) { + const candidateId = + (isNonEmptyString(imageObj.fileId) && imageObj.fileId) || + (isNonEmptyString(imageObj.id) && imageObj.id) || + undefined; + if (candidateId) { + imageFileId = candidateId; + } + } + } + + if ( + !imageString && + typeof value.imageUrl === 'string' && + value.imageUrl.length > 0 + ) { + imageString = value.imageUrl; + } + if ( + !imageFileId && + typeof value.fileId === 'string' && + value.fileId.length > 0 + ) { + imageFileId = value.fileId; + } + + if ( + !imageString && + typeof value.data === 'string' && + value.data.length > 0 + ) { + imageString = fallbackImageMediaType + ? toInlineImageString(value.data, fallbackImageMediaType) + : value.data; + } else if ( + !imageString && + value.data instanceof Uint8Array && + value.data.length > 0 + ) { + imageString = toInlineImageString(value.data, fallbackImageMediaType); + } + if (typeof value.detail === 'string' && value.detail.length > 0) { + output.detail = value.detail; + } + + if (imageString) { + output.image = imageString; + } else if (imageFileId) { + output.image = { fileId: imageFileId }; + } else { + return null; + } + + if (isRecord(value.providerData)) { + output.providerData = value.providerData; + } + return output; + } + + if (type === 'file') { + const fileValue = normalizeFileValue(value); + if (!fileValue) { + return null; + } + + const output: ToolOutputFileContent = { type: 'file', file: fileValue }; + + if (isRecord(value.providerData)) { + output.providerData = value.providerData; + } + return output; + } + + return null; +} + +/** + * Translates the normalized tool output into the protocol `input_*` items. This is the last hop + * before we hand the data to model-specific adapters, so we generate the exact schema expected by + * the protocol definitions. + */ +function convertStructuredToolOutputToInputItem( + output: StructuredToolOutput, +): ToolCallStructuredOutput { + if (output.type === 'text') { + const result: protocol.InputText = { + type: 'input_text', + text: output.text, + }; + if (output.providerData) { + result.providerData = output.providerData; + } + return result; + } + if (output.type === 'image') { + const result: protocol.InputImage = { type: 'input_image' }; + if (typeof output.detail === 'string' && output.detail.length > 0) { + result.detail = output.detail; + } + if (typeof output.image === 'string' && output.image.length > 0) { + result.image = output.image; + } else if (isRecord(output.image)) { + const imageObj = output.image as Record; + const inlineMediaType = isNonEmptyString(imageObj.mediaType) + ? imageObj.mediaType + : undefined; + if (isNonEmptyString(imageObj.url)) { + result.image = imageObj.url; + } else if (isNonEmptyString(imageObj.data)) { + result.image = + inlineMediaType && !imageObj.data.startsWith('data:') + ? asDataUrl(imageObj.data, inlineMediaType) + : imageObj.data; + } else if ( + imageObj.data instanceof Uint8Array && + imageObj.data.length > 0 + ) { + const base64 = encodeUint8ArrayToBase64(imageObj.data); + result.image = asDataUrl(base64, inlineMediaType); + } else { + const referencedId = + (isNonEmptyString(imageObj.fileId) && imageObj.fileId) || + (isNonEmptyString(imageObj.id) && imageObj.id) || + undefined; + if (referencedId) { + result.image = { id: referencedId }; + } + } + } + if (output.providerData) { + result.providerData = output.providerData; + } + return result; + } + + if (output.type === 'file') { + const result: protocol.InputFile = { type: 'input_file' }; + const fileValue = output.file; + if (typeof fileValue === 'string') { + result.file = fileValue; + } else if (fileValue && typeof fileValue === 'object') { + const record = fileValue as Record; + if ('data' in record && record.data) { + const mediaType = record.mediaType ?? 'text/plain'; + if (typeof record.data === 'string') { + result.file = asDataUrl(record.data, mediaType); + } else { + const base64 = encodeUint8ArrayToBase64(record.data); + result.file = asDataUrl(base64, mediaType); + } + } else if (typeof record.url === 'string' && record.url.length > 0) { + result.file = { url: record.url }; + } else { + const referencedId = + (typeof record.id === 'string' && + record.id.length > 0 && + record.id) || + (typeof record.fileId === 'string' && record.fileId.length > 0 + ? record.fileId + : undefined); + if (referencedId) { + result.file = { id: referencedId }; + } + } + + if (typeof record.filename === 'string' && record.filename.length > 0) { + result.filename = record.filename; + } + } + if (output.providerData) { + result.providerData = output.providerData; + } + return result; + } + const exhaustiveCheck: never = output; + return exhaustiveCheck; +} + +type FileReferenceValue = ToolOutputFileContent['file']; + +function buildItemFrequencyMap(items: AgentInputItem[]): Map { + const counts = new Map(); + for (const item of items) { + const key = sessionItemKey(item); + counts.set(key, (counts.get(key) ?? 0) + 1); + } + return counts; +} + +function buildItemReferenceMap( + items: AgentInputItem[], +): Map { + const refs = new Map(); + for (const item of items) { + const key = sessionItemKey(item); + const list = refs.get(key); + if (list) { + list.push(item); + } else { + refs.set(key, [item]); + } + } + return refs; +} + +function consumeReference( + refs: Map, + key: string, + target: AgentInputItem, +): boolean { + const candidates = refs.get(key); + if (!candidates || candidates.length === 0) { + return false; + } + const index = candidates.findIndex((candidate) => candidate === target); + if (index === -1) { + return false; + } + candidates.splice(index, 1); + if (candidates.length === 0) { + refs.delete(key); + } + return true; +} + +function decrementCount(map: Map, key: string) { + const remaining = (map.get(key) ?? 0) - 1; + if (remaining <= 0) { + map.delete(key); + } else { + map.set(key, remaining); + } +} + +function sessionItemKey(item: AgentInputItem): string { + return JSON.stringify(item, sessionSerializationReplacer); +} + +function sessionSerializationReplacer(_key: string, value: unknown): unknown { + if (value instanceof ArrayBuffer) { + return { + __type: 'ArrayBuffer', + data: encodeUint8ArrayToBase64(new Uint8Array(value)), + }; + } + + if (isArrayBufferView(value)) { + const view = value as ArrayBufferView; + return { + __type: view.constructor.name, + data: encodeUint8ArrayToBase64( + new Uint8Array(view.buffer, view.byteOffset, view.byteLength), + ), + }; + } + + if (isNodeBuffer(value)) { + const view = value as Uint8Array; + return { + __type: 'Buffer', + data: encodeUint8ArrayToBase64( + new Uint8Array(view.buffer, view.byteOffset, view.byteLength), + ), + }; + } + + if (isSerializedBufferSnapshot(value)) { + return { + __type: 'Buffer', + data: encodeUint8ArrayToBase64(Uint8Array.from(value.data)), + }; + } + + return value; +} diff --git a/packages/agents-core/src/runState.ts b/packages/agents-core/src/runState.ts index baa83b1a..9c95da5c 100644 --- a/packages/agents-core/src/runState.ts +++ b/packages/agents-core/src/runState.ts @@ -231,6 +231,7 @@ export const SerializedRunState = z.object({ lastModelResponse: modelResponseSchema.optional(), generatedItems: z.array(itemSchema), lastProcessedResponse: serializedProcessedResponseSchema.optional(), + currentTurnPersistedItemCount: z.number().int().min(0).optional(), trace: serializedTraceSchema.nullable(), }); @@ -275,6 +276,17 @@ export class RunState> { * Items generated by the agent during the run. */ public _generatedItems: RunItem[]; + /** + * Number of `_generatedItems` already flushed to session storage for the current turn. + * + * Persisting the entire turn on every save would duplicate responses and tool outputs. + * Instead, `saveToSession` appends only the delta since the previous write. This counter + * tracks how many generated run items from *this turn* were already written so the next + * save can slice off only the new entries. When a turn is interrupted (e.g., awaiting tool + * approval) and later resumed, we rewind the counter before continuing so the pending tool + * output still gets stored. + */ + public _currentTurnPersistedItemCount: number; /** * Maximum allowed turns before forcing termination. */ @@ -322,6 +334,7 @@ export class RunState> { this._currentAgent = startingAgent; this._toolUseTracker = new AgentToolUseTracker(); this._generatedItems = []; + this._currentTurnPersistedItemCount = 0; this._maxTurns = maxTurns; this._inputGuardrailResults = []; this._outputGuardrailResults = []; @@ -427,6 +440,7 @@ export class RunState> { currentStep: this._currentStep as any, lastModelResponse: this._lastTurnResponse as any, generatedItems: this._generatedItems.map((item) => item.toJSON() as any), + currentTurnPersistedItemCount: this._currentTurnPersistedItemCount, lastProcessedResponse: this._lastProcessedResponse as any, trace: this._trace ? (this._trace.toJSON() as any) : null, }; @@ -563,6 +577,8 @@ export class RunState> { state._generatedItems = stateJson.generatedItems.map((item) => deserializeItem(item, agentMap), ); + state._currentTurnPersistedItemCount = + stateJson.currentTurnPersistedItemCount ?? 0; state._lastProcessedResponse = stateJson.lastProcessedResponse ? await deserializeProcessedResponse( agentMap, diff --git a/packages/agents-core/src/types/protocol.ts b/packages/agents-core/src/types/protocol.ts index 9694e82b..676f1ebc 100644 --- a/packages/agents-core/src/types/protocol.ts +++ b/packages/agents-core/src/types/protocol.ts @@ -606,6 +606,7 @@ export const OutputModelItem = z.discriminatedUnion('type', [ HostedToolCallItem, FunctionCallItem, ComputerUseCallItem, + FunctionCallResultItem, ReasoningItem, UnknownItem, ]); diff --git a/packages/agents-core/src/utils/smartString.ts b/packages/agents-core/src/utils/smartString.ts index c032f5f2..918a8b77 100644 --- a/packages/agents-core/src/utils/smartString.ts +++ b/packages/agents-core/src/utils/smartString.ts @@ -1,6 +1,7 @@ const BYTE_PREVIEW_LIMIT = 20; export function toSmartString(value: unknown): string { + // Produce a human-friendly string representation while preserving enough detail for debugging workflows. if (value === null || value === undefined) { return String(value); } @@ -31,7 +32,8 @@ export function toSmartString(value: unknown): string { return String(value); } -function isArrayBufferLike(value: unknown): value is ArrayBufferLike { +export function isArrayBufferLike(value: unknown): value is ArrayBufferLike { + // Detect raw ArrayBuffer-backed payloads so callers can generate full previews rather than truncated hashes. if (value instanceof ArrayBuffer) { return true; } @@ -47,13 +49,15 @@ function isArrayBufferLike(value: unknown): value is ArrayBufferLike { ); } -function isArrayBufferView(value: unknown): value is ArrayBufferView { +export function isArrayBufferView(value: unknown): value is ArrayBufferView { + // Treat typed array views as binary data for consistent serialization. return typeof ArrayBuffer !== 'undefined' && ArrayBuffer.isView(value); } -function isSerializedBufferSnapshot( +export function isSerializedBufferSnapshot( value: unknown, ): value is { type: 'Buffer'; data: number[] } { + // Support serialized Buffer snapshots (e.g., from JSON.parse) emitted by some tool outputs. return ( typeof value === 'object' && value !== null && @@ -62,6 +66,22 @@ function isSerializedBufferSnapshot( ); } +export function isNodeBuffer( + value: unknown, +): value is Uint8Array & { toString(encoding: string): string } { + // Detect runtime Buffers without importing node-specific shims, handling browser builds gracefully. + const bufferCtor = ( + globalThis as { + Buffer?: { isBuffer(input: unknown): boolean }; + } + ).Buffer; + return Boolean( + bufferCtor && + typeof bufferCtor.isBuffer === 'function' && + bufferCtor.isBuffer(value), + ); +} + function formatByteArray(bytes: Uint8Array): string { if (bytes.length === 0) { return '[byte array (0 bytes)]'; diff --git a/packages/agents-core/test/memorySession.test.ts b/packages/agents-core/test/memorySession.test.ts new file mode 100644 index 00000000..94be3a4f --- /dev/null +++ b/packages/agents-core/test/memorySession.test.ts @@ -0,0 +1,69 @@ +import { describe, expect, test } from 'vitest'; + +import { MemorySession } from '../src/memory/memorySession'; +import type { AgentInputItem } from '../src/types'; + +const createUserMessage = (text: string): AgentInputItem => ({ + role: 'user', + content: [ + { + type: 'input_text', + text, + }, + ], +}); + +describe('MemorySession', () => { + test('stores and retrieves items in memory', async () => { + const initialItems = [createUserMessage('hello')]; + const session = new MemorySession({ + sessionId: 'session-1', + initialItems, + }); + + expect(await session.getSessionId()).toBe('session-1'); + expect(await session.getItems()).toEqual(initialItems); + + const newItems = [createUserMessage('one'), createUserMessage('two')]; + await session.addItems(newItems); + expect(await session.getItems()).toEqual([...initialItems, ...newItems]); + + expect(await session.getItems(2)).toEqual(newItems); + + expect(await session.popItem()).toEqual(newItems[1]); + expect(await session.getItems()).toEqual([...initialItems, newItems[0]]); + + await session.clearSession(); + expect(await session.getItems()).toEqual([]); + expect(await session.getItems(3)).toEqual([]); + expect(await session.popItem()).toBeUndefined(); + }); + + test('returns clones so external mutations do not persist', async () => { + const initial = createUserMessage('start'); + const session = new MemorySession({ + sessionId: 'session-2', + initialItems: [initial], + }); + + const items = await session.getItems(); + expect(items[0]).not.toBe(initial); + (items[0] as any).content = 'mutated'; + expect(await session.getItems()).toEqual([createUserMessage('start')]); + + const next = createUserMessage('next'); + await session.addItems([next]); + (next as any).content = 'mutated'; + expect(await session.getItems()).toEqual([ + createUserMessage('start'), + createUserMessage('next'), + ]); + + const popped = await session.popItem(); + expect(popped).toEqual(createUserMessage('next')); + if (popped) { + (popped as any).content = 'mutated'; + } + expect(await session.getItems()).toEqual([createUserMessage('start')]); + }); +}); diff --git a/packages/agents-core/test/run.stream.test.ts b/packages/agents-core/test/run.stream.test.ts index 911e1170..13dc4963 100644 --- a/packages/agents-core/test/run.stream.test.ts +++ b/packages/agents-core/test/run.stream.test.ts @@ -1,4 +1,4 @@ -import { describe, it, expect, beforeAll, vi } from 'vitest'; +import { describe, it, expect, beforeAll, afterEach, vi } from 'vitest'; import { z } from 'zod'; import { Agent, @@ -19,9 +19,27 @@ import { StreamEvent, FunctionCallItem, tool, + user, + Session, + InputGuardrailTripwireTriggered, } from '../src'; import { FakeModel, FakeModelProvider, fakeModelMessage } from './stubs'; import * as protocol from '../src/types/protocol'; +import * as runImplementation from '../src/runImplementation'; + +function getFirstTextContent(item: AgentInputItem): string | undefined { + if (item.type !== 'message') { + return undefined; + } + if (typeof item.content === 'string') { + return item.content; + } + if (Array.isArray(item.content)) { + const first = item.content[0] as { text?: string }; + return first?.text; + } + return undefined; +} // Test for unhandled rejection when stream loop throws @@ -31,6 +49,10 @@ describe('Runner.run (streaming)', () => { setDefaultModelProvider(new FakeModelProvider()); }); + afterEach(() => { + vi.restoreAllMocks(); + }); + it('does not emit unhandled rejection when stream loop fails', async () => { const agent = new Agent({ name: 'StreamFail', model: new FakeModel() }); @@ -546,6 +568,75 @@ describe('Runner.run (streaming)', () => { }); }); + it('keeps server tracker aligned with filtered inputs when streaming', async () => { + const model = new TrackingStreamingModel([ + buildTurn( + [fakeModelMessage('call the tool'), buildToolCall('call-1', 'value')], + 'resp-1', + ), + buildTurn([fakeModelMessage('all done')], 'resp-2'), + ]); + + let filterCalls = 0; + const runner = new Runner({ + callModelInputFilter: ({ modelData }) => { + filterCalls += 1; + if (filterCalls === 1) { + return { + instructions: modelData.instructions, + input: modelData.input + .slice(1) + .map((item) => structuredClone(item)), + }; + } + return modelData; + }, + }); + + const agent = new Agent({ + name: 'StreamTrackerFilter', + model, + tools: [serverTool], + }); + + const result = await runner.run( + agent, + [user('First input'), user('Second input')], + { + stream: true, + conversationId: 'conv-filter-stream', + }, + ); + + await drain(result); + + expect(result.finalOutput).toBe('all done'); + expect(filterCalls).toBe(2); + expect(model.requests).toHaveLength(2); + + const firstInput = model.requests[0].input as AgentInputItem[]; + expect(Array.isArray(firstInput)).toBe(true); + expect(firstInput).toHaveLength(1); + expect(getFirstTextContent(firstInput[0])).toBe('Second input'); + + const secondInput = model.requests[1].input as AgentInputItem[]; + expect(Array.isArray(secondInput)).toBe(true); + expect( + secondInput.some( + (item) => + item.type === 'message' && + getFirstTextContent(item) === 'First input', + ), + ).toBe(true); + expect( + secondInput.some( + (item) => + item.type === 'function_call_result' && + (item as protocol.FunctionCallResultItem).callId === 'call-1', + ), + ).toBe(true); + }); + it('only sends new items and updates previousResponseId across turns', async () => { const model = new TrackingStreamingModel([ buildTurn( @@ -681,4 +772,266 @@ describe('Runner.run (streaming)', () => { }); }); }); + + it('persists streaming input only after the run completes successfully', async () => { + const saveInputSpy = vi + .spyOn(runImplementation, 'saveStreamInputToSession') + .mockResolvedValue(); + + const session = createSessionMock(); + + const agent = new Agent({ + name: 'StreamSuccess', + model: new ImmediateStreamingModel({ + output: [fakeModelMessage('done')], + usage: new Usage(), + }), + }); + + const runner = new Runner(); + + const result = await runner.run(agent, 'hello world', { + stream: true, + session, + }); + + await result.completed; + + expect(saveInputSpy).toHaveBeenCalledTimes(1); + const [sessionArg, persistedItems] = saveInputSpy.mock.calls[0]; + expect(sessionArg).toBe(session); + if (!Array.isArray(persistedItems)) { + throw new Error('Expected persisted session items to be an array.'); + } + expect(persistedItems).toHaveLength(1); + expect(persistedItems[0]).toMatchObject({ + role: 'user', + content: 'hello world', + }); + }); + + it('persists streaming input when the model stream rejects before completion', async () => { + const saveInputSpy = vi + .spyOn(runImplementation, 'saveStreamInputToSession') + .mockResolvedValue(); + + const session = createSessionMock(); + const streamError = new Error('model stream failed'); + + const agent = new Agent({ + name: 'StreamFailurePersistsInput', + model: new RejectingStreamingModel(streamError), + }); + + const runner = new Runner(); + + const result = await runner.run(agent, 'save me please', { + stream: true, + session, + }); + + await expect(result.completed).rejects.toThrow('model stream failed'); + + expect(saveInputSpy).toHaveBeenCalledTimes(1); + const [, persistedItems] = saveInputSpy.mock.calls[0]; + if (!Array.isArray(persistedItems)) { + throw new Error('Expected persisted session items to be an array.'); + } + expect(persistedItems).toHaveLength(1); + expect(persistedItems[0]).toMatchObject({ + role: 'user', + content: 'save me please', + }); + }); + + it('persists filtered streaming input instead of the raw turn payload', async () => { + const saveInputSpy = vi + .spyOn(runImplementation, 'saveStreamInputToSession') + .mockResolvedValue(); + + const session = createSessionMock(); + + const agent = new Agent({ + name: 'StreamFiltered', + model: new ImmediateStreamingModel({ + output: [fakeModelMessage('done')], + usage: new Usage(), + }), + }); + + const runner = new Runner(); + + const secretInput = 'super secret'; + const redactedContent = '[filtered]'; + + const result = await runner.run(agent, secretInput, { + stream: true, + session, + callModelInputFilter: ({ modelData }) => { + const sanitizedInput = modelData.input.map((item) => { + if ( + item.type === 'message' && + 'role' in item && + item.role === 'user' + ) { + return { + ...item, + content: redactedContent, + }; + } + return item; + }); + + return { + ...modelData, + input: sanitizedInput, + }; + }, + }); + + await result.completed; + + expect(saveInputSpy).toHaveBeenCalledTimes(1); + const [, persistedItems] = saveInputSpy.mock.calls[0]; + if (!Array.isArray(persistedItems)) { + throw new Error('Expected persisted session items to be an array.'); + } + expect(persistedItems).toHaveLength(1); + expect(persistedItems[0]).toMatchObject({ + role: 'user', + content: redactedContent, + }); + expect(JSON.stringify(persistedItems)).not.toContain(secretInput); + }); + + it('skips streaming session persistence when the server manages the conversation', async () => { + const saveInputSpy = vi + .spyOn(runImplementation, 'saveStreamInputToSession') + .mockResolvedValue(); + const saveResultSpy = vi + .spyOn(runImplementation, 'saveStreamResultToSession') + .mockResolvedValue(); + + const session = createSessionMock(); + + const agent = new Agent({ + name: 'StreamServerManaged', + model: new ImmediateStreamingModel({ + output: [fakeModelMessage('done')], + usage: new Usage(), + }), + }); + + const runner = new Runner(); + + // Session is still supplied alongside conversationId to confirm we suppress duplicate persistence while preserving session-based hooks. + const result = await runner.run(agent, 'hello world', { + stream: true, + session, + conversationId: 'conv-server-managed', + }); + + await result.completed; + + expect(saveInputSpy).not.toHaveBeenCalled(); + expect(saveResultSpy).not.toHaveBeenCalled(); + }); + + it('skips persisting streaming input when an input guardrail triggers', async () => { + const saveInputSpy = vi + .spyOn(runImplementation, 'saveStreamInputToSession') + .mockResolvedValue(); + + const guardrail = { + name: 'block', + execute: vi.fn().mockResolvedValue({ + tripwireTriggered: true, + outputInfo: { reason: 'blocked' }, + }), + }; + + const session = createSessionMock(); + + const agent = new Agent({ + name: 'StreamGuardrail', + model: new ImmediateStreamingModel({ + output: [fakeModelMessage('should not run')], + usage: new Usage(), + }), + }); + + const runner = new Runner({ inputGuardrails: [guardrail] }); + + const result = await runner.run(agent, 'blocked input', { + stream: true, + session, + }); + + await expect(result.completed).rejects.toBeInstanceOf( + InputGuardrailTripwireTriggered, + ); + + expect(saveInputSpy).not.toHaveBeenCalled(); + }); }); + +class ImmediateStreamingModel implements Model { + constructor(private readonly response: ModelResponse) {} + + async getResponse(_request: ModelRequest): Promise { + return this.response; + } + + async *getStreamedResponse( + _request: ModelRequest, + ): AsyncIterable { + const usage = this.response.usage; + const output = this.response.output.map((item) => + protocol.OutputModelItem.parse(item), + ); + yield { + type: 'response_done', + response: { + id: 'r', + usage: { + requests: usage.requests, + inputTokens: usage.inputTokens, + outputTokens: usage.outputTokens, + totalTokens: usage.totalTokens, + }, + output, + }, + } satisfies StreamEvent; + } +} + +class RejectingStreamingModel implements Model { + constructor(private readonly error: Error) {} + + async getResponse(_request: ModelRequest): Promise { + throw this.error; + } + + getStreamedResponse(_request: ModelRequest): AsyncIterable { + const error = this.error; + return { + [Symbol.asyncIterator]() { + return { + async next() { + throw error; + }, + } satisfies AsyncIterator; + }, + } satisfies AsyncIterable; + } +} + +function createSessionMock(): Session { + return { + getSessionId: vi.fn().mockResolvedValue('session-id'), + getItems: vi.fn().mockResolvedValue([]), + addItems: vi.fn().mockResolvedValue(undefined), + popItem: vi.fn().mockResolvedValue(undefined), + clearSession: vi.fn().mockResolvedValue(undefined), + }; +} diff --git a/packages/agents-core/test/run.test.ts b/packages/agents-core/test/run.test.ts index 66168c22..51eb6d2e 100644 --- a/packages/agents-core/test/run.test.ts +++ b/packages/agents-core/test/run.test.ts @@ -1,3 +1,4 @@ +import { Buffer } from 'node:buffer'; import { beforeAll, beforeEach, @@ -11,22 +12,27 @@ import { import { z } from 'zod'; import { Agent, - AgentInputItem, MaxTurnsExceededError, ModelResponse, OutputGuardrailTripwireTriggered, + Session, + ModelInputData, + type AgentInputItem, run, Runner, setDefaultModelProvider, setTraceProcessors, setTracingDisabled, BatchTraceProcessor, + user, + assistant, } from '../src'; import { RunStreamEvent } from '../src/events'; import { handoff } from '../src/handoff'; import { RunMessageOutputItem as MessageOutputItem, RunToolApprovalItem as ToolApprovalItem, + RunToolCallOutputItem as ToolCallOutputItem, } from '../src/items'; import { getTurnInput, selectModel } from '../src/run'; import { RunContext } from '../src/runContext'; @@ -50,6 +56,20 @@ import { ModelSettings, } from '../src/model'; +function getFirstTextContent(item: AgentInputItem): string | undefined { + if (item.type !== 'message') { + return undefined; + } + if (typeof item.content === 'string') { + return item.content; + } + if (Array.isArray(item.content)) { + const first = item.content[0] as { text?: string }; + return first?.text; + } + return undefined; +} + describe('Runner.run', () => { beforeAll(() => { setTracingDisabled(true); @@ -576,6 +596,1238 @@ describe('Runner.run', () => { expect(spy.mock.instances[0]).toBe(spy.mock.instances[1]); spy.mockRestore(); }); + + describe('sessions', () => { + class MemorySession implements Session { + #history: AgentInputItem[]; + #added: AgentInputItem[][] = []; + sessionId?: string; + + constructor(history: AgentInputItem[] = []) { + this.#history = [...history]; + } + + get added(): AgentInputItem[][] { + return this.#added; + } + + async getSessionId(): Promise { + if (!this.sessionId) { + this.sessionId = 'conv_test'; + } + return this.sessionId; + } + + async getItems(limit?: number): Promise { + if (limit == null) { + return [...this.#history]; + } + return this.#history.slice(-limit); + } + + async addItems(items: AgentInputItem[]): Promise { + this.#added.push(items); + this.#history.push(...items); + } + + async popItem(): Promise { + return this.#history.pop(); + } + + async clearSession(): Promise { + this.#history = []; + this.sessionId = undefined; + } + } + + class RecordingModel extends FakeModel { + lastRequest: ModelRequest | undefined; + + override async getResponse( + request: ModelRequest, + ): Promise { + this.lastRequest = request; + return super.getResponse(request); + } + } + + it('uses session history and stores run results', async () => { + const model = new RecordingModel([ + { + ...TEST_MODEL_RESPONSE_BASIC, + output: [fakeModelMessage('response')], + }, + ]); + const agent = new Agent({ name: 'SessionAgent', model }); + const historyItem = fakeModelMessage( + 'earlier message', + ) as AgentInputItem; + const session = new MemorySession([historyItem]); + const runner = new Runner(); + + await runner.run(agent, 'How are you?', { session }); + + const recordedInput = model.lastRequest?.input as AgentInputItem[]; + expect(Array.isArray(recordedInput)).toBe(true); + expect(recordedInput[0]).toEqual(historyItem); + expect(recordedInput[1]).toMatchObject({ + role: 'user', + content: 'How are you?', + }); + + expect(session.added).toHaveLength(1); + expect(session.added[0][0]).toMatchObject({ + role: 'user', + content: 'How are you?', + }); + expect(session.added[0][1]).toMatchObject({ role: 'assistant' }); + const savedAssistant = session + .added[0][1] as protocol.AssistantMessageItem; + const firstPart = Array.isArray(savedAssistant.content) + ? (savedAssistant.content[0] as { providerData?: unknown }) + : undefined; + expect(firstPart?.providerData).toEqual({ annotations: [] }); + }); + + it('rejects list inputs when using session history', async () => { + const runner = new Runner(); + const agent = new Agent({ name: 'ListSession' }); + const session = new MemorySession(); + + await expect( + runner.run(agent, [user('Hello')], { + session, + }), + ).rejects.toThrow('RunConfig.sessionInputCallback'); + }); + + it('allows list inputs when session input callback is provided', async () => { + const model = new RecordingModel([ + { + ...TEST_MODEL_RESPONSE_BASIC, + output: [fakeModelMessage('response')], + }, + ]); + const agent = new Agent({ name: 'SessionCallbackAgent', model }); + const sessionHistory: AgentInputItem[] = [ + user('Keep this history item'), + assistant('Drop this assistant reply'), + ]; + const session = new MemorySession([...sessionHistory]); + const runner = new Runner({ + sessionInputCallback: (history, newItems) => { + return history + .filter( + (item) => + item.type === 'message' && + 'role' in item && + item.role === 'user', + ) + .concat(newItems); + }, + }); + + await runner.run(agent, [user('New message')], { session }); + + const recordedInput = model.lastRequest?.input as AgentInputItem[]; + expect(Array.isArray(recordedInput)).toBe(true); + expect(recordedInput).toHaveLength(2); + expect( + recordedInput[0].type === 'message' && + 'role' in recordedInput[0] && + recordedInput[0].role, + ).toBe('user'); + expect(getFirstTextContent(recordedInput[0])).toBe( + 'Keep this history item', + ); + expect( + recordedInput[1].type === 'message' && + 'role' in recordedInput[1] && + recordedInput[1].role, + ).toBe('user'); + expect(getFirstTextContent(recordedInput[1])).toBe('New message'); + }); + + it('supports async session input callback', async () => { + const model = new RecordingModel([ + { + ...TEST_MODEL_RESPONSE_BASIC, + output: [fakeModelMessage('response')], + }, + ]); + const agent = new Agent({ name: 'AsyncSessionCallback', model }); + const session = new MemorySession([ + user('Older message'), + user('Newest history'), + ]); + const runner = new Runner(); + + await runner.run(agent, [user('Fresh input')], { + session, + sessionInputCallback: async (history, newItems) => { + await Promise.resolve(); + return history.slice(-1).concat(newItems); + }, + }); + + const recordedInput = model.lastRequest?.input as AgentInputItem[]; + expect(Array.isArray(recordedInput)).toBe(true); + expect(recordedInput).toHaveLength(2); + expect(getFirstTextContent(recordedInput[0])).toBe('Newest history'); + expect(getFirstTextContent(recordedInput[1])).toBe('Fresh input'); + }); + + it('persists transformed session input from callback', async () => { + const model = new RecordingModel([ + { + ...TEST_MODEL_RESPONSE_BASIC, + output: [fakeModelMessage('session response')], + }, + ]); + const agent = new Agent({ name: 'SessionTransform', model }); + const session = new MemorySession(); + const runner = new Runner(); + const original = 'Sensitive payload'; + const redacted = '[redacted]'; + + await runner.run(agent, original, { + session, + sessionInputCallback: (history, newItems) => { + expect(history).toHaveLength(0); + if (newItems[0] && typeof newItems[0] === 'object') { + (newItems[0] as protocol.UserMessageItem).content = redacted; + } + return history.concat(newItems); + }, + }); + + const recordedInput = model.lastRequest?.input as AgentInputItem[]; + expect(recordedInput[recordedInput.length - 1]).toMatchObject({ + role: 'user', + content: redacted, + }); + + expect(session.added).toHaveLength(1); + const persistedTurn = session.added[0]; + expect(persistedTurn[0]).toMatchObject({ + role: 'user', + content: redacted, + }); + }); + + it('does not duplicate history when callback clones entries', async () => { + const model = new RecordingModel([ + { + ...TEST_MODEL_RESPONSE_BASIC, + output: [fakeModelMessage('clone response')], + }, + ]); + const history = [user('Existing history item')]; + const session = new MemorySession(history); + const agent = new Agent({ name: 'CloneSession', model }); + const runner = new Runner(); + + await runner.run(agent, [user('Fresh input')], { + session, + sessionInputCallback: (incomingHistory, newItems) => { + const clonedHistory = incomingHistory.map((item) => + structuredClone(item), + ); + const clonedNewItems = newItems.map((item) => + structuredClone(item), + ); + return clonedHistory.concat(clonedNewItems); + }, + }); + + expect(session.added).toHaveLength(1); + const [persistedItems] = session.added; + const persistedUsers = persistedItems.filter( + (item): item is protocol.UserMessageItem => + item.type === 'message' && 'role' in item && item.role === 'user', + ); + expect(persistedUsers).toHaveLength(1); + expect(getFirstTextContent(persistedUsers[0])).toBe('Fresh input'); + }); + + it('persists reordered new items ahead of matching history', async () => { + const model = new RecordingModel([ + { + ...TEST_MODEL_RESPONSE_BASIC, + output: [fakeModelMessage('reordered response')], + }, + ]); + const historyMessage = user('Repeatable message'); + const newMessage = user('Repeatable message'); + const session = new MemorySession([historyMessage]); + const agent = new Agent({ name: 'ReorderedSession', model }); + const runner = new Runner({ + sessionInputCallback: (history, newItems) => newItems.concat(history), + }); + + await runner.run(agent, [newMessage], { session }); + + expect(session.added).toHaveLength(1); + const [persisted] = session.added; + const persistedUsers = persisted.filter( + (item): item is protocol.UserMessageItem => + item.type === 'message' && 'role' in item && item.role === 'user', + ); + expect(persistedUsers).toHaveLength(1); + expect(getFirstTextContent(persistedUsers[0])).toBe( + 'Repeatable message', + ); + }); + + it('persists binary payloads that share prefixes with history', async () => { + const model = new RecordingModel([ + { + ...TEST_MODEL_RESPONSE_BASIC, + output: [fakeModelMessage('binary response')], + }, + ]); + const historyPayload = new Uint8Array(32); + const newPayload = new Uint8Array(32); + for (let i = 0; i < 32; i++) { + const value = i < 20 ? 0xaa : i; + historyPayload[i] = value; + newPayload[i] = value; + } + historyPayload[31] = 0xbb; + newPayload[31] = 0xcc; + + const session = new MemorySession([ + user('History with binary', { payload: historyPayload }), + ]); + const agent = new Agent({ name: 'BinarySession', model }); + const runner = new Runner(); + + await runner.run(agent, [user('Binary input')], { + session, + sessionInputCallback: (history, newItems) => { + const clonedHistory = history.map((item) => structuredClone(item)); + const updatedNewItems = newItems.map((item) => { + const cloned = structuredClone(item); + cloned.providerData = { payload: newPayload }; + return cloned; + }); + return clonedHistory.concat(updatedNewItems); + }, + }); + + expect(session.added).toHaveLength(1); + const [persistedItems] = session.added; + const persistedPayloads = persistedItems + .filter( + (item): item is protocol.UserMessageItem => + item.type === 'message' && + 'role' in item && + item.role === 'user' && + item.providerData?.payload, + ) + .map((item) => item.providerData?.payload); + const expectedNewPayload = `data:text/plain;base64,${Buffer.from(newPayload).toString('base64')}`; + const expectedHistoryPayload = `data:text/plain;base64,${Buffer.from(historyPayload).toString('base64')}`; + expect(persistedPayloads).toContain(expectedNewPayload); + expect(persistedPayloads).not.toContain(expectedHistoryPayload); + }); + + it('throws when session input callback returns invalid data', async () => { + const model = new RecordingModel([ + { + ...TEST_MODEL_RESPONSE_BASIC, + output: [fakeModelMessage('response')], + }, + ]); + const agent = new Agent({ name: 'InvalidCallback', model }); + const session = new MemorySession([user('history')]); + const runner = new Runner(); + + await expect( + runner.run(agent, 'Hello', { + session, + sessionInputCallback: () => + 'not-an-array' as unknown as AgentInputItem[], + }), + ).rejects.toThrow( + 'Session input callback must return an array of AgentInputItem objects.', + ); + }); + + it('stores function tool call and structured output in session', async () => { + const functionCall: protocol.FunctionCallItem = { + type: 'function_call', + callId: 'call-weather', + name: 'weather_lookup', + status: 'completed', + arguments: JSON.stringify({ city: 'San Francisco' }), + providerData: { source: 'openai' }, + } as protocol.FunctionCallItem; + + const model = new FakeModel([ + { + output: [functionCall], + usage: new Usage(), + }, + { + output: [fakeModelMessage('Weather retrieved.')], + usage: new Usage(), + }, + ]); + + const weatherTool = tool({ + name: 'weather_lookup', + description: 'Looks up weather information', + parameters: z.object({ city: z.string() }), + execute: async ({ city }) => [ + { + type: 'text', + text: `Weather for ${city}`, + }, + ], + }); + + const agent = new Agent({ + name: 'FunctionToolAgent', + model, + tools: [weatherTool], + }); + + const session = new MemorySession(); + const runner = new Runner(); + + await runner.run(agent, 'What is the weather in San Francisco?', { + session, + }); + + expect(session.added).toHaveLength(1); + const savedItems = session.added[0]; + expect(savedItems).toHaveLength(4); + const savedFunctionCall = savedItems[1] as protocol.FunctionCallItem; + expect(savedFunctionCall.providerData).toEqual({ source: 'openai' }); + expect(savedFunctionCall.arguments).toBe( + JSON.stringify({ city: 'San Francisco' }), + ); + const savedResult = savedItems[2] as protocol.FunctionCallResultItem & { + output: protocol.ToolCallStructuredOutput[]; + }; + expect(Array.isArray(savedResult.output)).toBe(true); + expect(savedResult.output[0]).toMatchObject({ + type: 'input_text', + text: 'Weather for San Francisco', + }); + }); + + it('stores hosted tool call metadata when approval is required', async () => { + const hostedCall: protocol.HostedToolCallItem = { + type: 'hosted_tool_call', + id: 'approval-1', + name: 'mcp_approval_request', + status: 'completed', + providerData: { + type: 'mcp_approval_request', + server_label: 'demo_server', + name: 'file_search', + id: 'approval-1', + arguments: '{"query":"invoices"}', + }, + } as protocol.HostedToolCallItem; + + const model = new FakeModel([ + { + output: [hostedCall], + usage: new Usage(), + }, + ]); + + const hostedTool = hostedMcpTool({ + serverLabel: 'demo_server', + serverUrl: 'https://example.com', + requireApproval: { + always: { toolNames: ['file_search'] }, + }, + }); + + const agent = new Agent({ + name: 'HostedToolAgent', + model, + tools: [hostedTool], + }); + + const session = new MemorySession(); + const runner = new Runner(); + + const result = await runner.run(agent, 'Find latest invoices', { + session, + }); + + expect(result.interruptions).toHaveLength(1); + expect(session.added).toHaveLength(1); + const savedItems = session.added[0]; + expect(savedItems).toHaveLength(2); + const savedHostedCall = savedItems[1] as protocol.HostedToolCallItem & { + providerData: Record; + }; + expect(savedHostedCall.providerData).toEqual(hostedCall.providerData); + expect(savedHostedCall.id).toBe('approval-1'); + }); + }); + }); + + describe('callModelInputFilter', () => { + class FilterTrackingModel extends FakeModel { + lastRequest?: ModelRequest; + + override async getResponse( + request: ModelRequest, + ): Promise { + this.lastRequest = request; + return await super.getResponse(request); + } + } + + class FilterStreamingModel implements Model { + lastRequest?: ModelRequest; + + constructor(private readonly response: ModelResponse) {} + + async getResponse(request: ModelRequest): Promise { + this.lastRequest = request; + return this.response; + } + + async *getStreamedResponse( + request: ModelRequest, + ): AsyncIterable { + this.lastRequest = request; + yield { + type: 'response_done', + response: { + id: 'stream-filter', + usage: { + requests: 1, + inputTokens: 0, + outputTokens: 0, + totalTokens: 0, + }, + output: this.response.output, + }, + } as protocol.StreamEvent; + } + } + + it('modifies model input for non-streaming runs', async () => { + const model = new FilterTrackingModel([ + { + ...TEST_MODEL_RESPONSE_BASIC, + output: [fakeModelMessage('filtered result')], + }, + ]); + const agent = new Agent({ + name: 'FilterAgent', + instructions: 'Base instructions', + model, + }); + + const runner = new Runner({ + callModelInputFilter: ({ modelData }) => { + return { + instructions: `${modelData.instructions ?? ''} ::filtered`, + input: modelData.input.slice(-1), + }; + }, + }); + + await runner.run(agent, [user('First input'), user('Second input')]); + + expect(model.lastRequest?.systemInstructions).toBe( + 'Base instructions ::filtered', + ); + const sentInput = model.lastRequest?.input as AgentInputItem[]; + expect(Array.isArray(sentInput)).toBe(true); + expect(sentInput).toHaveLength(1); + expect( + sentInput[0].type === 'message' && + 'role' in sentInput[0] && + sentInput[0].role, + ).toBe('user'); + expect(getFirstTextContent(sentInput[0])).toBe('Second input'); + }); + + it('supports async filters for streaming runs', async () => { + const streamingModel = new FilterStreamingModel({ + output: [fakeModelMessage('stream response')], + usage: new Usage(), + }); + const agent = new Agent({ + name: 'StreamFilterAgent', + instructions: 'Stream instructions', + model: streamingModel, + }); + + const runner = new Runner({ + callModelInputFilter: async ({ modelData }) => { + await Promise.resolve(); + return { + instructions: `${modelData.instructions ?? ''} ::stream`, + input: modelData.input.slice(0, 1), + }; + }, + }); + + const result = await runner.run(agent, [user('Alpha'), user('Beta')], { + stream: true, + }); + + const events: RunStreamEvent[] = []; + for await (const e of result.toStream()) { + events.push(e); + } + await result.completed; + + expect(streamingModel.lastRequest?.systemInstructions).toBe( + 'Stream instructions ::stream', + ); + const streamInput = streamingModel.lastRequest?.input as AgentInputItem[]; + expect(Array.isArray(streamInput)).toBe(true); + expect(streamInput).toHaveLength(1); + expect( + streamInput[0].type === 'message' && + 'role' in streamInput[0] && + streamInput[0].role, + ).toBe('user'); + expect(getFirstTextContent(streamInput[0])).toBe('Alpha'); + }); + + it('does not mutate run history when filter mutates input items', async () => { + const model = new FilterTrackingModel([ + { + ...TEST_MODEL_RESPONSE_BASIC, + }, + ]); + const agent = new Agent({ + name: 'HistoryFilterAgent', + model, + }); + + const originalText = 'Top secret message'; + const redactedText = '[redacted]'; + + const runner = new Runner({ + callModelInputFilter: ({ modelData }) => { + const first = modelData.input[0]; + if ( + first?.type === 'message' && + Array.isArray(first.content) && + first.content.length > 0 + ) { + const firstChunk = first.content[0] as { text?: string }; + if (firstChunk) { + firstChunk.text = redactedText; + } + } + return modelData; + }, + }); + + const result = await runner.run(agent, [user(originalText)]); + + const sentInput = model.lastRequest?.input as AgentInputItem[]; + expect(Array.isArray(sentInput)).toBe(true); + expect(getFirstTextContent(sentInput[0])).toBe(redactedText); + + const history = result.history; + expect(getFirstTextContent(history[0])).toBe(originalText); + }); + + it('does not duplicate existing session history when filters run', async () => { + class TrackingSession implements Session { + #history: AgentInputItem[]; + added: AgentInputItem[][] = []; + sessionId?: string; + + constructor(history: AgentInputItem[]) { + this.#history = [...history]; + this.sessionId = 'filter-session'; + } + + async getSessionId(): Promise { + if (!this.sessionId) { + this.sessionId = 'filter-session'; + } + return this.sessionId; + } + + async getItems(): Promise { + return [...this.#history]; + } + + async addItems(items: AgentInputItem[]): Promise { + this.added.push(items); + this.#history.push(...items); + } + + async popItem(): Promise { + return this.#history.pop(); + } + + async clearSession(): Promise { + this.#history = []; + this.sessionId = undefined; + } + } + + const model = new FilterTrackingModel([ + { + ...TEST_MODEL_RESPONSE_BASIC, + }, + ]); + const agent = new Agent({ + name: 'FilterSessionAgent', + model, + }); + const historyMessage = user('Persisted history'); + const session = new TrackingSession([historyMessage]); + const runner = new Runner({ + callModelInputFilter: ({ modelData }) => ({ + instructions: modelData.instructions, + input: modelData.input, + }), + }); + + await runner.run(agent, 'Fresh input', { session }); + + expect(session.added).toHaveLength(1); + const [persisted] = session.added; + const persistedUsers = persisted.filter( + (item) => 'role' in item && item.role === 'user', + ); + expect(persistedUsers).toHaveLength(1); + const persistedTexts = persistedUsers + .map((item) => { + if ('content' in item && typeof item.content === 'string') { + return item.content; + } + return getFirstTextContent(item); + }) + .filter((text): text is string => typeof text === 'string'); + expect(persistedTexts).toContain('Fresh input'); + expect(persistedTexts).not.toContain('Persisted history'); + }); + + it('does not persist raw inputs when filters drop every item', async () => { + class RecordingSession implements Session { + #history: AgentInputItem[] = []; + added: AgentInputItem[][] = []; + #sessionId: string | undefined = 'empty-filter-session'; + + async getSessionId(): Promise { + if (!this.#sessionId) { + this.#sessionId = 'empty-filter-session'; + } + return this.#sessionId; + } + + async getItems(): Promise { + return [...this.#history]; + } + + async addItems(items: AgentInputItem[]): Promise { + this.added.push(items); + this.#history.push(...items); + } + + async popItem(): Promise { + return this.#history.pop(); + } + + async clearSession(): Promise { + this.#history = []; + this.#sessionId = undefined; + } + } + + const model = new FilterTrackingModel([ + { + ...TEST_MODEL_RESPONSE_BASIC, + }, + ]); + const agent = new Agent({ + name: 'EmptyFilterAgent', + model, + }); + const session = new RecordingSession(); + + const runner = new Runner({ + callModelInputFilter: ({ modelData }) => ({ + instructions: modelData.instructions, + input: [], + }), + }); + + const secret = 'sensitive payload'; + const result = await runner.run(agent, secret, { session }); + + expect(result.finalOutput).toBe('Hello World'); + expect(model.lastRequest?.input).toEqual([]); + + expect(session.added).toHaveLength(1); + const persisted = session.added[0]; + const persistedTexts = persisted + .map((item) => getFirstTextContent(item)) + .filter((text): text is string => typeof text === 'string'); + expect(persistedTexts).not.toContain(secret); + const userItems = persisted.filter( + (item) => 'role' in item && item.role === 'user', + ); + expect(userItems).toHaveLength(0); + }); + + it('keeps original inputs when filters prepend new items', async () => { + class RecordingSession implements Session { + #history: AgentInputItem[] = []; + added: AgentInputItem[][] = []; + #sessionId: string | undefined = 'prepended-filter-session'; + + async getSessionId(): Promise { + if (!this.#sessionId) { + this.#sessionId = 'prepended-filter-session'; + } + return this.#sessionId; + } + + async getItems(): Promise { + return [...this.#history]; + } + + async addItems(items: AgentInputItem[]): Promise { + this.added.push(items); + this.#history.push(...items); + } + + async popItem(): Promise { + return this.#history.pop(); + } + + async clearSession(): Promise { + this.#history = []; + this.#sessionId = undefined; + } + } + + const model = new FilterTrackingModel([ + { + ...TEST_MODEL_RESPONSE_BASIC, + }, + ]); + const agent = new Agent({ + name: 'PrependedFilterAgent', + model, + }); + const session = new RecordingSession(); + + const runner = new Runner({ + callModelInputFilter: ({ modelData }) => ({ + instructions: modelData.instructions, + input: [assistant('primer'), ...modelData.input], + }), + }); + + await runner.run(agent, 'Persist me', { session }); + + expect(session.added).toHaveLength(1); + const [persisted] = session.added; + const persistedTexts = persisted + .map((item) => getFirstTextContent(item)) + .filter((text): text is string => typeof text === 'string'); + expect(persistedTexts).toContain('Persist me'); + }); + + it('throws when filter returns invalid data', async () => { + const model = new FilterTrackingModel([ + { + ...TEST_MODEL_RESPONSE_BASIC, + }, + ]); + const agent = new Agent({ + name: 'InvalidFilterAgent', + model, + }); + const runner = new Runner({ + callModelInputFilter: () => + ({ + instructions: 'invalid', + }) as unknown as ModelInputData, + }); + + await expect(runner.run(agent, 'Hello')).rejects.toThrow( + 'ModelInputData', + ); + }); + + it('prefers per-run callModelInputFilter over runner config', async () => { + const model = new FilterTrackingModel([ + { + ...TEST_MODEL_RESPONSE_BASIC, + }, + ]); + const agent = new Agent({ + name: 'OverrideFilterAgent', + model, + }); + + const defaultFilter = vi.fn(({ modelData }) => ({ + instructions: `${modelData.instructions ?? ''} default`, + input: modelData.input, + })); + const overrideFilter = vi.fn((payload) => ({ + instructions: 'override instructions', + input: payload.modelData.input, + })); + + const runner = new Runner({ + callModelInputFilter: defaultFilter, + }); + + const context = { tenant: 'acme' }; + + await runner.run(agent, 'Hello override', { + callModelInputFilter: overrideFilter, + context, + }); + + expect(defaultFilter).not.toHaveBeenCalled(); + expect(overrideFilter).toHaveBeenCalledTimes(1); + const args = overrideFilter.mock.calls[0][0]; + expect(args.context).toEqual(context); + + expect(model.lastRequest?.systemInstructions).toBe( + 'override instructions', + ); + const sentInput = model.lastRequest?.input as AgentInputItem[]; + expect(Array.isArray(sentInput)).toBe(true); + expect(sentInput).toHaveLength(1); + expect(getFirstTextContent(sentInput[0])).toBe('Hello override'); + }); + + it('keeps server conversation tracking aligned with filtered inputs', async () => { + class ConversationTrackingModel implements Model { + requests: ModelRequest[] = []; + + constructor(private readonly responses: ModelResponse[]) {} + + async getResponse(request: ModelRequest): Promise { + const cloned: ModelRequest = { + ...request, + input: Array.isArray(request.input) + ? (JSON.parse(JSON.stringify(request.input)) as AgentInputItem[]) + : request.input, + }; + this.requests.push(cloned); + const response = this.responses.shift(); + if (!response) { + throw new Error('No response configured'); + } + return response; + } + + getStreamedResponse( + _request: ModelRequest, + ): AsyncIterable { + throw new Error('Not implemented'); + } + } + + const model = new ConversationTrackingModel([ + { + output: [ + fakeModelMessage('call the tool'), + { + id: 'call-1', + type: 'function_call', + name: 'filterTool', + callId: 'call-1', + status: 'completed', + arguments: JSON.stringify({ test: 'value' }), + } as protocol.FunctionCallItem, + ], + usage: new Usage(), + responseId: 'resp-1', + }, + { + output: [fakeModelMessage('all done')], + usage: new Usage(), + responseId: 'resp-2', + }, + ]); + + const filterTool = tool({ + name: 'filterTool', + description: 'test tool', + parameters: z.object({ test: z.string() }), + execute: async ({ test }) => `result:${test}`, + }); + + let filterCalls = 0; + const runner = new Runner({ + callModelInputFilter: ({ modelData }) => { + filterCalls += 1; + if (filterCalls === 1) { + return { + instructions: modelData.instructions, + input: modelData.input + .slice(1) + .map((item) => structuredClone(item)), + }; + } + return modelData; + }, + }); + + const agent = new Agent({ + name: 'TrackerFilterAgent', + model, + tools: [filterTool], + }); + + const result = await runner.run( + agent, + [user('First input'), user('Second input')], + { conversationId: 'conv-filter-tracker' }, + ); + + expect(result.finalOutput).toBe('all done'); + expect(filterCalls).toBe(2); + expect(model.requests).toHaveLength(2); + + const firstInput = model.requests[0].input as AgentInputItem[]; + expect(Array.isArray(firstInput)).toBe(true); + expect(firstInput).toHaveLength(1); + expect(getFirstTextContent(firstInput[0])).toBe('Second input'); + + const secondInput = model.requests[1].input as AgentInputItem[]; + expect(Array.isArray(secondInput)).toBe(true); + const secondMessages = secondInput.filter( + (item) => item.type === 'message', + ); + expect(secondMessages).toHaveLength(1); + expect(getFirstTextContent(secondMessages[0])).toBe('First input'); + + expect( + secondInput.some( + (item) => + item.type === 'function_call_result' && + (item as protocol.FunctionCallResultItem).callId === 'call-1', + ), + ).toBe(true); + }); + + it('stops requeuing sanitized inputs when filters replace them', async () => { + class RedactionTrackingModel implements Model { + requests: ModelRequest[] = []; + + constructor(private readonly responses: ModelResponse[]) {} + + async getResponse(request: ModelRequest): Promise { + const cloned: ModelRequest = { + ...request, + input: Array.isArray(request.input) + ? (JSON.parse(JSON.stringify(request.input)) as AgentInputItem[]) + : request.input, + }; + this.requests.push(cloned); + const response = this.responses.shift(); + if (!response) { + throw new Error('No response configured'); + } + return response; + } + + getStreamedResponse( + _request: ModelRequest, + ): AsyncIterable { + throw new Error('Not implemented'); + } + } + + const model = new RedactionTrackingModel([ + { + output: [ + fakeModelMessage('call the tool'), + { + id: 'call-1', + type: 'function_call', + name: 'filterTool', + callId: 'call-1', + status: 'completed', + arguments: JSON.stringify({ test: 'value' }), + } as protocol.FunctionCallItem, + ], + usage: new Usage(), + responseId: 'resp-redact-1', + }, + { + output: [fakeModelMessage('all done')], + usage: new Usage(), + responseId: 'resp-redact-2', + }, + ]); + + const filterTool = tool({ + name: 'filterTool', + description: 'test tool', + parameters: z.object({ test: z.string() }), + execute: async ({ test }) => `result:${test}`, + }); + + let filterCalls = 0; + const runner = new Runner({ + callModelInputFilter: ({ modelData }) => { + filterCalls += 1; + if (filterCalls === 1) { + return { + instructions: modelData.instructions, + input: modelData.input.map((item) => { + if ( + item?.type === 'message' && + 'role' in item && + item.role === 'user' + ) { + const clone = structuredClone(item); + if (typeof clone.content === 'string') { + clone.content = '[redacted]'; + } else if (Array.isArray(clone.content)) { + const firstChunk = clone.content[0] as { text?: string }; + if (firstChunk) { + firstChunk.text = '[redacted]'; + } + } + return clone; + } + return structuredClone(item); + }), + }; + } + return modelData; + }, + }); + + const agent = new Agent({ + name: 'RedactionFilterAgent', + model, + tools: [filterTool], + }); + + const result = await runner.run(agent, [user('Sensitive payload')], { + conversationId: 'conv-filter-redact', + }); + + expect(result.finalOutput).toBe('all done'); + expect(filterCalls).toBe(2); + expect(model.requests).toHaveLength(2); + + const firstInput = model.requests[0].input as AgentInputItem[]; + expect(Array.isArray(firstInput)).toBe(true); + expect(getFirstTextContent(firstInput[0])).toBe('[redacted]'); + + const secondInput = model.requests[1].input as AgentInputItem[]; + const secondTexts = secondInput + .map((item) => getFirstTextContent(item)) + .filter((text): text is string => typeof text === 'string'); + expect(secondTexts).not.toContain('[redacted]'); + expect(secondTexts).not.toContain('Sensitive payload'); + }); + + it('preserves providerData when saving streaming session items', async () => { + class MetadataStreamingModel implements Model { + constructor(private readonly response: ModelResponse) {} + + async getResponse(_request: ModelRequest): Promise { + return this.response; + } + + async *getStreamedResponse( + _request: ModelRequest, + ): AsyncIterable { + yield { + type: 'response_done', + response: { + id: 'meta-stream', + usage: { + requests: 1, + inputTokens: 0, + outputTokens: 0, + totalTokens: 0, + }, + output: this.response.output, + }, + } as protocol.StreamEvent; + } + } + + const assistantMessage: protocol.AssistantMessageItem = { + ...fakeModelMessage('assistant with metadata'), + providerData: { annotations: ['keep-me'] }, + }; + const model = new MetadataStreamingModel({ + output: [assistantMessage], + usage: new Usage(), + }); + + const agent = new Agent({ + name: 'StreamSessionMetadata', + model, + }); + + class RecordingSession implements Session { + #history: AgentInputItem[] = []; + added: AgentInputItem[][] = []; + sessionId = 'stream-session'; + + async getSessionId(): Promise { + return this.sessionId; + } + + async getItems(): Promise { + return [...this.#history]; + } + + async addItems(items: AgentInputItem[]): Promise { + this.added.push(items); + this.#history.push(...items); + } + + async popItem(): Promise { + return this.#history.pop(); + } + + async clearSession(): Promise { + this.#history = []; + } + } + + const session = new RecordingSession(); + const runner = new Runner(); + + const result = await runner.run(agent, 'Hi stream', { + stream: true, + session, + }); + + for await (const _event of result.toStream()) { + // exhaust stream so the run finishes + } + await result.completed; + + expect(session.added).toHaveLength(2); + const streamedItems = session.added[1]; + expect(streamedItems).toHaveLength(1); + const savedAssistant = streamedItems[0] as protocol.AssistantMessageItem; + expect(savedAssistant.providerData).toEqual({ annotations: ['keep-me'] }); + expect(getFirstTextContent(savedAssistant)).toBe( + 'assistant with metadata', + ); + }); }); describe('gpt-5 default model adjustments', () => { @@ -699,6 +1951,30 @@ describe('Runner.run', () => { describe('server-managed conversation state', () => { type TurnResponse = ModelResponse; + class RecordingSession implements Session { + public added: AgentInputItem[][] = []; + + async getSessionId(): Promise { + return 'server-managed-session'; + } + + async getItems(): Promise { + return []; + } + + async addItems(items: AgentInputItem[]): Promise { + this.added.push(items); + } + + async popItem(): Promise { + return undefined; + } + + async clearSession(): Promise { + this.added = []; + } + } + class TrackingModel implements Model { public requests: ModelRequest[] = []; public firstRequest: ModelRequest | undefined; @@ -766,6 +2042,91 @@ describe('Runner.run', () => { execute: async ({ test }) => `result:${test}`, }); + it('skips persisting turns when the server manages conversation history via conversationId', async () => { + const model = new TrackingModel([ + { + ...TEST_MODEL_RESPONSE_BASIC, + output: [fakeModelMessage('response')], + }, + ]); + const agent = new Agent({ name: 'ServerManagedConversation', model }); + // Deliberately combine session with conversationId to ensure callbacks and state helpers remain usable without duplicating remote history. + const session = new RecordingSession(); + const runner = new Runner(); + + await runner.run(agent, 'Hello there', { + session, + conversationId: 'conv-server-managed', + }); + + expect(session.added).toHaveLength(0); + expect(model.lastRequest?.conversationId).toBe('conv-server-managed'); + }); + + it('skips persisting turns when the server manages conversation history via previousResponseId', async () => { + const model = new TrackingModel([ + { + ...TEST_MODEL_RESPONSE_BASIC, + output: [fakeModelMessage('response')], + }, + ]); + const agent = new Agent({ name: 'ServerManagedPrevious', model }); + // Deliberately combine session with previousResponseId to ensure we honor server-side transcripts while keeping session utilities available. + const session = new RecordingSession(); + const runner = new Runner(); + + await runner.run(agent, 'Hi again', { + session, + previousResponseId: 'resp-existing', + }); + + expect(session.added).toHaveLength(0); + expect(model.lastRequest?.previousResponseId).toBe('resp-existing'); + }); + + it('preserves user input when the session callback only reuses history with conversationId', async () => { + const model = new TrackingModel([ + { + ...TEST_MODEL_RESPONSE_BASIC, + output: [fakeModelMessage('response')], + }, + ]); + const agent = new Agent({ name: 'ServerManagedReuse', model }); + const persistedHistory: AgentInputItem[] = [ + assistant('Persisted reply from history'), + ]; + const session: Session = { + async getSessionId() { + return 'server-managed-session'; + }, + async getItems() { + return persistedHistory; + }, + async addItems(items) { + persistedHistory.push(...items); + }, + async popItem() { + return persistedHistory.pop(); + }, + async clearSession() { + persistedHistory.length = 0; + }, + }; + const runner = new Runner(); + + await runner.run(agent, 'Latest user input', { + session, + conversationId: 'conv-history-only', + sessionInputCallback: (historyItems) => historyItems, + }); + + const firstInput = model.firstRequest?.input; + expect(Array.isArray(firstInput)).toBe(true); + const sentItems = firstInput as AgentInputItem[]; + expect(sentItems).toHaveLength(1); + expect(getFirstTextContent(sentItems[0])).toBe('Latest user input'); + }); + it('only sends new items when using conversationId across turns', async () => { const model = new TrackingModel([ buildResponse( @@ -1131,14 +2492,23 @@ describe('Runner.run', () => { conversationId: 'conv-mixed', }); - expect(model.requests).toHaveLength(2); - const secondItems = model.requests[1].input as AgentInputItem[]; - expect(secondItems).toHaveLength(1); - expect(secondItems[0]).toMatchObject({ - type: 'function_call_result', - callId: 'call-mixed', + expect(model.requests).toHaveLength(1); + + const toolOutputs = secondResult.newItems.filter( + (item) => + item instanceof ToolCallOutputItem && + item.rawItem.type === 'function_call_result' && + item.rawItem.callId === 'call-mixed', + ); + expect(toolOutputs).toHaveLength(1); + + expect(secondResult.interruptions).toHaveLength(1); + expect(secondResult.interruptions[0].rawItem).toMatchObject({ + providerData: { id: 'approval-id', type: 'mcp_approval_request' }, }); - expect(secondResult.finalOutput).toBe('still waiting'); + expect(secondResult.state._currentStep?.type).toBe( + 'next_step_interruption', + ); }); it('sends full history when no server-managed state is provided', async () => { diff --git a/packages/agents-core/test/runImplementation.test.ts b/packages/agents-core/test/runImplementation.test.ts index eb3a8c4f..9205cd38 100644 --- a/packages/agents-core/test/runImplementation.test.ts +++ b/packages/agents-core/test/runImplementation.test.ts @@ -17,6 +17,7 @@ import { ModelResponse } from '../src/model'; import { RunResult, StreamedRunResult } from '../src/result'; import { getTracing } from '../src/run'; import { RunState } from '../src/runState'; +import type { ProcessedResponse } from '../src/runImplementation'; import { addStepToRunResult, AgentToolUseTracker, @@ -24,11 +25,15 @@ import { getToolCallOutputItem, maybeResetToolChoice, processModelResponse, + prepareInputItemsWithSession, executeFunctionToolCalls, executeComputerActions, executeHandoffCalls, - executeToolsAndSideEffects, + resolveTurnAfterModelResponse, streamStepItemsToRunResult, + saveToSession, + resolveInterruptedTurn, + toInputItemList, } from '../src/runImplementation'; import { FunctionTool, @@ -57,6 +62,9 @@ import { Runner } from '../src/run'; import { RunContext } from '../src/runContext'; import { setDefaultModelProvider } from '../src'; import { Logger } from '../src/logger'; +import type { UnknownContext } from '../src/types'; +import type { Session } from '../src/memory/session'; +import type { AgentInputItem } from '../src/types'; beforeAll(() => { setTracingDisabled(true); @@ -134,6 +142,506 @@ describe('maybeResetToolChoice', () => { }); }); +describe('saveToSession', () => { + class MemorySession implements Session { + items: AgentInputItem[] = []; + + async getSessionId(): Promise { + return 'session'; + } + + async getItems(): Promise { + return [...this.items]; + } + + async addItems(items: AgentInputItem[]): Promise { + this.items.push(...items); + } + + async popItem(): Promise { + return this.items.pop(); + } + + async clearSession(): Promise { + this.items = []; + } + } + + it('persists tool outputs when resuming a turn after approvals', async () => { + const textAgent = new Agent({ + name: 'Hitl Agent', + outputType: 'text', + instructions: 'test', + }); + const agent = textAgent as unknown as Agent< + UnknownContext, + AgentOutputType + >; + const session = new MemorySession(); + const context = new RunContext(undefined as UnknownContext); + const state = new RunState< + UnknownContext, + Agent + >(context, 'hello', agent, 10); + + const functionCall: protocol.FunctionCallItem = { + type: 'function_call', + id: 'fc_1', + callId: 'call_1', + name: 'lookup_customer_profile', + status: 'completed', + arguments: JSON.stringify({ id: '1' }), + providerData: {}, + }; + + const approvalItem = new ToolApprovalItem(functionCall, textAgent); + state._generatedItems = [approvalItem]; + state._currentStep = { + type: 'next_step_interruption', + data: { + interruptions: [approvalItem], + }, + }; + + const preApprovalResult = new RunResult(state); + await saveToSession( + session, + toInputItemList(state._originalInput), + preApprovalResult, + ); + + expect(session.items).toEqual([ + { + type: 'message', + role: 'user', + content: 'hello', + }, + ]); + expect(state._currentTurnPersistedItemCount).toBe(1); + + const toolDefinition = tool({ + name: 'lookup_customer_profile', + description: 'mock lookup', + parameters: z.object({ id: z.string() }), + async execute({ id }) { + return `No customer found for id ${id}.`; + }, + }) as unknown as FunctionTool; + + const assistantMessage: protocol.AssistantMessageItem = { + type: 'message', + id: 'msg_1', + role: 'assistant', + status: 'completed', + content: [ + { + type: 'output_text', + text: 'Ready to help.', + }, + ], + providerData: {}, + }; + + const processedResponse: ProcessedResponse = { + newItems: [new MessageOutputItem(assistantMessage, textAgent)], + handoffs: [], + functions: [ + { + toolCall: functionCall, + tool: toolDefinition, + }, + ], + computerActions: [], + mcpApprovalRequests: [], + toolsUsed: [], + hasToolsOrApprovalsToRun() { + return false; + }, + } as ProcessedResponse; + + const runner = new Runner(); + const resumedResponse: ModelResponse = { + usage: new Usage({ + requests: 0, + inputTokens: 0, + outputTokens: 0, + totalTokens: 0, + }), + output: [], + }; + + const turnResult = await withTrace('hitl-test-trace', async () => { + return resolveInterruptedTurn( + textAgent, + state._originalInput, + state._generatedItems, + resumedResponse, + processedResponse, + runner, + state, + ); + }); + + state._originalInput = turnResult.originalInput; + state._generatedItems = turnResult.generatedItems; + state._currentStep = turnResult.nextStep; + + const resumedResult = new RunResult(state); + await saveToSession(session, [], resumedResult); + + expect(session.items).toHaveLength(2); + const last = session.items[ + session.items.length - 1 + ] as protocol.FunctionCallResultItem; + expect(last.type).toBe('function_call_result'); + expect(last.callId).toBe(functionCall.callId); + }); + + it('persists HITL tool outputs when approval items are not the last generated entries', async () => { + const textAgent = new Agent({ + name: 'Interleaved HITL Agent', + outputType: 'text', + instructions: 'test', + }); + const agent = textAgent as unknown as Agent< + UnknownContext, + AgentOutputType + >; + const session = new MemorySession(); + const context = new RunContext(undefined as UnknownContext); + const state = new RunState< + UnknownContext, + Agent + >(context, 'hello', agent, 10); + + const approvalCall: protocol.FunctionCallItem = { + type: 'function_call', + id: 'fc_hitl', + callId: 'call_hitl', + name: 'lookup_customer_profile', + status: 'completed', + arguments: JSON.stringify({ id: '101' }), + providerData: {}, + }; + + const autoCall: protocol.FunctionCallItem = { + type: 'function_call', + id: 'fc_auto', + callId: 'call_auto', + name: 'fetch_image_data', + status: 'completed', + arguments: JSON.stringify({ id: '101' }), + providerData: {}, + }; + + const approvalToolCallItem = new ToolCallItem(approvalCall, textAgent); + const autoToolCallItem = new ToolCallItem(autoCall, textAgent); + const approvalItem = new ToolApprovalItem(approvalCall, textAgent); + const autoOutputRaw = getToolCallOutputItem(autoCall, 'Fetched image.'); + const autoOutputItem = new ToolCallOutputItem( + autoOutputRaw, + textAgent, + 'Fetched image.', + ); + + state._generatedItems = [ + approvalToolCallItem, + autoToolCallItem, + approvalItem, + autoOutputItem, + ]; + state._currentStep = { + type: 'next_step_interruption', + data: { + interruptions: [approvalItem], + }, + }; + + const preApprovalResult = new RunResult(state); + await saveToSession( + session, + toInputItemList(state._originalInput), + preApprovalResult, + ); + + expect(state._currentTurnPersistedItemCount).toBe(4); + expect(session.items).toHaveLength(4); + const preResumeResult = session.items[3] as protocol.FunctionCallResultItem; + expect(preResumeResult.type).toBe('function_call_result'); + expect(preResumeResult.callId).toBe(autoCall.callId); + + state.approve(approvalItem); + + const approvalTool = tool({ + name: approvalCall.name, + description: 'Approval tool', + parameters: z.object({ id: z.string() }), + needsApproval: async () => true, + async execute({ id }) { + return `Customer ${id} details.`; + }, + }) as unknown as FunctionTool; + + const autoTool = tool({ + name: autoCall.name, + description: 'Auto tool', + parameters: z.object({ id: z.string() }), + async execute({ id }) { + return `Image for ${id}.`; + }, + }) as unknown as FunctionTool; + + const processedResponse: ProcessedResponse = { + newItems: [ + approvalToolCallItem, + autoToolCallItem, + approvalItem, + autoOutputItem, + ], + handoffs: [], + functions: [ + { + toolCall: approvalCall, + tool: approvalTool, + }, + { + toolCall: autoCall, + tool: autoTool, + }, + ], + computerActions: [], + mcpApprovalRequests: [], + toolsUsed: [approvalCall.name, autoCall.name], + hasToolsOrApprovalsToRun() { + return false; + }, + } as ProcessedResponse; + + const runner = new Runner(); + const resumedResponse: ModelResponse = { + usage: new Usage({ + requests: 0, + inputTokens: 0, + outputTokens: 0, + totalTokens: 0, + }), + output: [], + }; + + const turnResult = await withTrace('interleaved-hitl', async () => { + return resolveInterruptedTurn( + textAgent, + state._originalInput, + state._generatedItems, + resumedResponse, + processedResponse, + runner, + state, + ); + }); + + state._originalInput = turnResult.originalInput; + state._generatedItems = turnResult.generatedItems; + state._currentStep = turnResult.nextStep; + + const resumedResult = new RunResult(state); + await saveToSession(session, [], resumedResult); + + expect(session.items).toHaveLength(5); + const latest = session.items[4] as protocol.FunctionCallResultItem; + expect(latest.type).toBe('function_call_result'); + expect(latest.callId).toBe(approvalCall.callId); + }); +}); + +describe('prepareInputItemsWithSession', () => { + class StubSession implements Session { + constructor(private history: AgentInputItem[]) {} + + async getSessionId(): Promise { + return 'session'; + } + + async getItems(): Promise { + return [...this.history]; + } + + async addItems(_items: AgentInputItem[]): Promise {} + + async popItem(): Promise { + return undefined; + } + + async clearSession(): Promise {} + } + + it('concatenates session history with array inputs when no callback is provided', async () => { + const historyItem: AgentInputItem = { + type: 'message', + role: 'user', + content: 'history', + id: 'history-1', + }; + const newItems: AgentInputItem[] = [ + { + type: 'message', + role: 'user', + content: 'fresh text', + id: 'new-1', + }, + { + type: 'function_call_result', + name: 'foo-func', + callId: 'new-2', + output: [ + { + type: 'input_image', + image: 'https://example.com/image.png', + }, + ], + status: 'completed', + }, + ]; + const session = new StubSession([historyItem]); + + const result = await prepareInputItemsWithSession(newItems, session); + + expect(result.preparedInput).toEqual([historyItem, ...newItems]); + const sessionItems = result.sessionItems; + if (!sessionItems) { + throw new Error('Expected sessionItems to be defined.'); + } + expect(sessionItems).toEqual(newItems); + expect(sessionItems[0]).toBe(newItems[0]); + expect(sessionItems[1]).toBe(newItems[1]); + }); + + it('only persists new inputs when callbacks prepend history duplicates', async () => { + const historyItem: AgentInputItem = { + type: 'message', + role: 'user', + content: 'ok', + id: 'history-1', + }; + const newItem: AgentInputItem = { + type: 'message', + role: 'user', + content: 'ok', + id: 'new-1', + }; + const session = new StubSession([historyItem]); + + const result = await prepareInputItemsWithSession( + [newItem], + session, + (history, newItems) => { + expect(history).toHaveLength(1); + expect(history[0]).toBe(historyItem); + expect(newItems).toHaveLength(1); + expect(newItems[0]).toBe(newItem); + return [...history.slice(-1), ...newItems]; + }, + ); + + expect(result.preparedInput).toEqual([historyItem, newItem]); + const sessionItems = result.sessionItems; + if (!sessionItems) { + throw new Error('Expected sessionItems to be defined.'); + } + expect(sessionItems).toEqual([newItem]); + expect(sessionItems[0]).toBe(newItem); + }); + + it('respects callbacks that intentionally drop new inputs', async () => { + const historyItem: AgentInputItem = { + type: 'message', + role: 'user', + content: 'previous', + id: 'history-1', + }; + const newItem: AgentInputItem = { + type: 'message', + role: 'user', + content: 'fresh', + id: 'new-1', + }; + const session = new StubSession([historyItem]); + + const result = await prepareInputItemsWithSession( + [newItem], + session, + (history) => history.slice(), + { includeHistoryInPreparedInput: false }, + ); + + expect(result.preparedInput).toEqual([]); + const sessionItems = result.sessionItems; + if (!sessionItems) { + throw new Error('Expected sessionItems to be defined.'); + } + expect(sessionItems).toEqual([]); + }); + + it('persists appended copies when callbacks mutate history in place', async () => { + const historyItem: AgentInputItem = { + type: 'message', + role: 'user', + content: 'past', + id: 'history-1', + }; + const newItem: AgentInputItem = { + type: 'message', + role: 'user', + content: 'fresh', + id: 'new-1', + }; + const session = new StubSession([historyItem]); + + let appendedItems: AgentInputItem[] = []; + const result = await prepareInputItemsWithSession( + [newItem], + session, + (history, newItems) => { + appendedItems = newItems.map((item) => ({ + ...item, + providerData: { annotated: true }, + })); + history.push(...appendedItems); + return history; + }, + ); + + expect(appendedItems).toHaveLength(1); + expect(result.preparedInput).toEqual([historyItem, ...appendedItems]); + const sessionItems = result.sessionItems; + if (!sessionItems) { + throw new Error('Expected sessionItems to be defined.'); + } + expect(sessionItems).toEqual(appendedItems); + expect(sessionItems[0]).toBe(appendedItems[0]); + expect(sessionItems[0]).not.toBe(newItem); + }); + + it('omits session history from prepared input when includeHistoryInPreparedInput is false', async () => { + const historyItem: AgentInputItem = { + type: 'message', + role: 'user', + content: 'past', + id: 'history-1', + }; + const session = new StubSession([historyItem]); + const result = await prepareInputItemsWithSession( + 'fresh input', + session, + undefined, + { includeHistoryInPreparedInput: false }, + ); + + expect(result.preparedInput).toEqual(toInputItemList('fresh input')); + expect(result.sessionItems).toEqual(toInputItemList('fresh input')); + }); +}); + describe('getToolCallOutputItem', () => { it('produces a correctly shaped function_call_output item', () => { const output = getToolCallOutputItem(TEST_MODEL_FUNCTION_CALL, 'hi'); @@ -1254,7 +1762,7 @@ describe('hasToolsOrApprovalsToRun method', () => { }); }); -describe('executeToolsAndSideEffects', () => { +describe('resolveTurnAfterModelResponse', () => { let runner: Runner; let state: RunState; @@ -1275,7 +1783,7 @@ describe('executeToolsAndSideEffects', () => { expect(processedResponse.hasToolsOrApprovalsToRun()).toBe(true); const result = await withTrace('test', () => - executeToolsAndSideEffects( + resolveTurnAfterModelResponse( textAgent, 'test input', [], @@ -1322,7 +1830,7 @@ describe('executeToolsAndSideEffects', () => { ); const result = await withTrace('test', () => - executeToolsAndSideEffects( + resolveTurnAfterModelResponse( structuredAgent, 'test input', [], @@ -1347,7 +1855,7 @@ describe('executeToolsAndSideEffects', () => { expect(processedResponse.hasToolsOrApprovalsToRun()).toBe(false); const result = await withTrace('test', () => - executeToolsAndSideEffects( + resolveTurnAfterModelResponse( textAgent, 'test input', [], @@ -1390,7 +1898,7 @@ describe('executeToolsAndSideEffects', () => { expect(processedResponse.hasToolsOrApprovalsToRun()).toBe(false); const result = await withTrace('test', () => - executeToolsAndSideEffects( + resolveTurnAfterModelResponse( textAgent, 'test input', [], @@ -1456,7 +1964,7 @@ describe('executeToolsAndSideEffects', () => { ); const result = await withTrace('test', () => - executeToolsAndSideEffects( + resolveTurnAfterModelResponse( computerAgent, 'test input', [], @@ -1470,6 +1978,55 @@ describe('executeToolsAndSideEffects', () => { expect(result.nextStep.type).toBe('next_step_run_again'); }); + it('does not duplicate previously persisted model items when resuming after approvals', async () => { + const toolCall = { + ...TEST_MODEL_FUNCTION_CALL, + id: 'call-resume', + callId: 'call-resume', + }; + const message = fakeModelMessage('Tool approval pending'); + message.id = 'message-resume'; + const response: ModelResponse = { + output: [toolCall, message], + usage: new Usage(), + } as any; + + const processedResponse = processModelResponse( + response, + TEST_AGENT, + [TEST_TOOL], + [], + ); + + const priorItems = [...processedResponse.newItems]; + state._generatedItems = priorItems; + + const result = await withTrace('test', () => + resolveTurnAfterModelResponse( + TEST_AGENT, + 'test input', + priorItems, + response, + processedResponse, + runner, + state, + ), + ); + + const persistedToolCalls = result.generatedItems.filter((item) => { + return item instanceof ToolCallItem && item.rawItem.id === 'call-resume'; + }); + expect(persistedToolCalls).toHaveLength(1); + + const persistedMessages = result.generatedItems.filter((item) => { + return ( + item instanceof MessageOutputItem && + item.rawItem.id === 'message-resume' + ); + }); + expect(persistedMessages).toHaveLength(1); + }); + it('does not finalize when hosted MCP approval happens in the same turn; runs again', async () => { const approvalAgent = new Agent({ name: 'MCPAgent', outputType: 'text' }); const mcpTool = hostedMcpTool({ @@ -1515,7 +2072,7 @@ describe('executeToolsAndSideEffects', () => { ); const result = await withTrace('test', () => - executeToolsAndSideEffects( + resolveTurnAfterModelResponse( approvalAgent, 'test input', [], @@ -1573,7 +2130,7 @@ describe('executeToolsAndSideEffects', () => { ); const result = await withTrace('test', () => - executeToolsAndSideEffects( + resolveTurnAfterModelResponse( approvalAgent, 'test input', [], @@ -1592,4 +2149,229 @@ describe('executeToolsAndSideEffects', () => { }); } }); + + it('preserves pending hosted MCP approvals when resuming an interrupted turn', async () => { + const approvalAgent = new Agent({ name: 'MCPAgent', outputType: 'text' }); + const mcpTool = hostedMcpTool({ + serverLabel: 'demo_server', + serverUrl: 'https://example.com', + requireApproval: { + always: { toolNames: ['demo_tool'] }, + }, + }); + + const approvalRequest: protocol.HostedToolCallItem = { + type: 'hosted_tool_call', + id: 'approval1', + name: 'demo_tool', + status: 'in_progress', + providerData: { + type: 'mcp_approval_request', + server_label: 'demo_server', + name: 'demo_tool', + id: 'approval1', + arguments: '{}', + }, + } as protocol.HostedToolCallItem; + + const approvalItem = new ToolApprovalItem(approvalRequest, approvalAgent); + const originalPreStepItems = [approvalItem]; + + const processedResponse: ProcessedResponse = { + newItems: [], + handoffs: [], + functions: [], + computerActions: [], + mcpApprovalRequests: [ + { + requestItem: approvalItem, + mcpTool, + }, + ], + toolsUsed: [], + hasToolsOrApprovalsToRun() { + return true; + }, + }; + + const resumedResponse: ModelResponse = { + output: [], + usage: new Usage(), + } as any; + + const resumedState = new RunState( + new RunContext(), + 'test input', + approvalAgent, + 1, + ); + + const runner = new Runner(); + + const result = await resolveInterruptedTurn( + approvalAgent, + 'test input', + originalPreStepItems, + resumedResponse, + processedResponse, + runner, + resumedState, + ); + + expect(result.nextStep.type).toBe('next_step_interruption'); + if (result.nextStep.type === 'next_step_interruption') { + expect(result.nextStep.data.interruptions).toContain(approvalItem); + } + expect(result.preStepItems).toContain(approvalItem); + expect(result.newStepItems).not.toContain(approvalItem); + }); +}); + +describe('resolveInterruptedTurn', () => { + it('rewinds persisted count only for pending approval placeholders', async () => { + const textAgent = new Agent({ + name: 'SequentialApprovalsAgent', + outputType: 'text', + }); + const agent = textAgent as unknown as Agent< + UnknownContext, + AgentOutputType + >; + const firstCall: protocol.FunctionCallItem = { + ...TEST_MODEL_FUNCTION_CALL, + id: 'call-first', + callId: 'call-first', + }; + const secondCall: protocol.FunctionCallItem = { + ...TEST_MODEL_FUNCTION_CALL, + id: 'call-second', + callId: 'call-second', + }; + + const firstApproval = new ToolApprovalItem(firstCall, agent); + const firstOutputRaw = getToolCallOutputItem(firstCall, 'done'); + const firstOutput = new ToolCallOutputItem(firstOutputRaw, agent, 'done'); + const secondApproval = new ToolApprovalItem(secondCall, agent); + + const generatedItems = [firstApproval, firstOutput, secondApproval]; + const state = new RunState(new RunContext(), 'hello', agent, 5); + state._generatedItems = generatedItems; + state._currentTurnPersistedItemCount = generatedItems.length; + state._currentStep = { + type: 'next_step_interruption', + data: { + interruptions: [secondApproval], + }, + }; + + const processedResponse: ProcessedResponse = { + newItems: [], + handoffs: [], + functions: [], + computerActions: [], + mcpApprovalRequests: [], + toolsUsed: [], + hasToolsOrApprovalsToRun() { + return false; + }, + }; + + const runner = new Runner({ tracingDisabled: true }); + const modelResponse: ModelResponse = { + output: [], + usage: new Usage(), + } as any; + + const result = await resolveInterruptedTurn( + agent, + 'hello', + generatedItems, + modelResponse, + processedResponse, + runner, + state, + ); + + expect(state._currentTurnPersistedItemCount).toBe( + generatedItems.length - 1, + ); + expect(result.preStepItems).toEqual([firstOutput]); + }); + + it('dispatches approved computer actions when resuming an interruption', async () => { + const fakeComputer: Computer = { + environment: 'mac', + dimensions: [1, 1], + screenshot: vi.fn().mockResolvedValue('img'), + click: vi.fn(async (_x: number, _y: number, _button: any) => {}), + doubleClick: vi.fn(async (_x: number, _y: number) => {}), + drag: vi.fn(async (_path: [number, number][]) => {}), + keypress: vi.fn(async (_keys: string[]) => {}), + move: vi.fn(async (_x: number, _y: number) => {}), + scroll: vi.fn( + async (_x: number, _y: number, _sx: number, _sy: number) => {}, + ), + type: vi.fn(async (_text: string) => {}), + wait: vi.fn(async () => {}), + }; + const computer = computerTool({ computer: fakeComputer }); + const agent = new Agent({ name: 'ComputerAgent', tools: [computer] }); + const computerCall: protocol.ComputerUseCallItem = { + type: 'computer_call', + id: 'comp1', + callId: 'comp1', + status: 'in_progress', + action: { type: 'screenshot' } as any, + }; + const processedResponse: ProcessedResponse = { + newItems: [new ToolCallItem(computerCall, agent)], + handoffs: [], + functions: [], + computerActions: [{ toolCall: computerCall, computer }], + mcpApprovalRequests: [], + toolsUsed: ['computer_use'], + hasToolsOrApprovalsToRun() { + return true; + }, + }; + + const runner = new Runner({ tracingDisabled: true }); + const state = new RunState(new RunContext(), 'hello', agent, 1); + const approvalSpy = vi + .spyOn(state._context, 'isToolApproved') + .mockImplementation(({ toolName, callId }) => { + if (toolName === computer.name && callId === computerCall.callId) { + return true as any; + } + return undefined as any; + }); + + const originalItems = [new ToolCallItem(computerCall, agent)]; + const resumedResponse: ModelResponse = { + output: [], + usage: new Usage(), + } as any; + + const result = await resolveInterruptedTurn( + agent, + 'hello', + originalItems, + resumedResponse, + processedResponse, + runner, + state, + ); + + approvalSpy.mockRestore(); + + const toolOutputs = result.newStepItems.filter( + (item): item is ToolCallOutputItem => item instanceof ToolCallOutputItem, + ); + + expect(toolOutputs).toHaveLength(1); + expect( + (toolOutputs[0].rawItem as protocol.ComputerCallResultItem).callId, + ).toBe(computerCall.callId); + expect(fakeComputer.screenshot).toHaveBeenCalledTimes(1); + }); }); diff --git a/packages/agents-openai/src/index.ts b/packages/agents-openai/src/index.ts index c62d7aeb..175893cc 100644 --- a/packages/agents-openai/src/index.ts +++ b/packages/agents-openai/src/index.ts @@ -18,3 +18,7 @@ export { codeInterpreterTool, imageGenerationTool, } from './tools'; +export { + OpenAIConversationsSession, + startOpenAIConversationsSession, +} from './memory/openaiConversationsSession'; diff --git a/packages/agents-openai/src/memory/openaiConversationsSession.ts b/packages/agents-openai/src/memory/openaiConversationsSession.ts new file mode 100644 index 00000000..d7a04d45 --- /dev/null +++ b/packages/agents-openai/src/memory/openaiConversationsSession.ts @@ -0,0 +1,261 @@ +import OpenAI from 'openai'; +import type { AgentInputItem, Session } from '@openai/agents-core'; +import { getDefaultOpenAIClient, getDefaultOpenAIKey } from '../defaults'; +import { convertToOutputItem, getInputItems } from '../openaiResponsesModel'; +import { protocol } from '@openai/agents-core'; +import type { ConversationItem as APIConversationItem } from 'openai/resources/conversations/items'; +import type { Message as APIConversationMessage } from 'openai/resources/conversations/conversations'; + +export type OpenAIConversationsSessionOptions = { + conversationId?: string; + client?: OpenAI; + apiKey?: string; + baseURL?: string; + organization?: string; + project?: string; +}; + +export async function startOpenAIConversationsSession( + client?: OpenAI, +): Promise { + const resolvedClient = client ?? resolveClient({}); + const response = await resolvedClient.conversations.create({ items: [] }); + return response.id; +} + +export class OpenAIConversationsSession implements Session { + #client: OpenAI; + #conversationId?: string; + + constructor(options: OpenAIConversationsSessionOptions = {}) { + this.#client = resolveClient(options); + this.#conversationId = options.conversationId; + } + + get sessionId(): string | undefined { + return this.#conversationId; + } + + async getSessionId(): Promise { + if (!this.#conversationId) { + this.#conversationId = await startOpenAIConversationsSession( + this.#client, + ); + } + + return this.#conversationId; + } + + async getItems(limit?: number): Promise { + const conversationId = await this.getSessionId(); + // Convert each API item into the Agent SDK's input shape. Some API payloads expand into multiple items. + const toAgentItems = (item: APIConversationItem): AgentInputItem[] => { + if (item.type === 'message' && item.role === 'user') { + const message = item as APIConversationMessage; + return [ + { + id: item.id, + type: 'message', + role: 'user', + content: (message.content ?? []) + .map((c) => { + if (c.type === 'input_text') { + return { type: 'input_text', text: c.text }; + } else if (c.type === 'input_image') { + if (c.image_url) { + return { type: 'input_image', image: c.image_url }; + } else if (c.file_id) { + return { type: 'input_image', image: { id: c.file_id } }; + } + } else if (c.type === 'input_file') { + if (c.file_data) { + const fileItem: protocol.InputFile = { + type: 'input_file', + file: c.file_data, + }; + if (c.filename) { + fileItem.filename = c.filename; + } + return fileItem; + } + if (c.file_url) { + const fileItem: protocol.InputFile = { + type: 'input_file', + file: c.file_url, + }; + if (c.filename) { + fileItem.filename = c.filename; + } + return fileItem; + } else if (c.file_id) { + const fileItem: protocol.InputFile = { + type: 'input_file', + file: { id: c.file_id }, + }; + if (c.filename) { + fileItem.filename = c.filename; + } + return fileItem; + } + } + // Add more content types here when they're added + return null; + }) + .filter((c) => c !== null) as protocol.UserContent[], + }, + ]; + } + + const outputItems = (item as APIConversationItem & { output?: unknown }) + .output; + + if (isResponseOutputItemArray(outputItems)) { + return convertToOutputItem(outputItems); + } + + return convertToOutputItem([item as OpenAI.Responses.ResponseOutputItem]); + }; + + if (limit === undefined) { + const items: AgentInputItem[] = []; + const iterator = this.#client.conversations.items.list(conversationId, { + order: 'asc' as const, + }); + for await (const item of iterator) { + items.push(...toAgentItems(item)); + } + return items; + } + + if (limit <= 0) { + return []; + } + + const itemGroups: AgentInputItem[][] = []; + let total = 0; + const iterator = this.#client.conversations.items.list(conversationId, { + limit, + order: 'desc' as const, + }); + + for await (const item of iterator) { + const group = toAgentItems(item); + if (!group.length) { + continue; + } + + itemGroups.push(group); + total += group.length; + + if (total >= limit) { + break; + } + } + + // Iterate in reverse because the API returned items in descending order. + const orderedItems: AgentInputItem[] = []; + for (let index = itemGroups.length - 1; index >= 0; index -= 1) { + orderedItems.push(...itemGroups[index]); + } + + if (orderedItems.length > limit) { + orderedItems.splice(0, orderedItems.length - limit); + } + + return orderedItems; + } + + async addItems(items: AgentInputItem[]): Promise { + if (!items.length) { + return; + } + + const conversationId = await this.getSessionId(); + await this.#client.conversations.items.create(conversationId, { + items: getInputItems(items), + }); + } + + async popItem(): Promise { + const conversationId = await this.getSessionId(); + const [latest] = await this.getItems(1); + if (!latest) { + return undefined; + } + + const itemId = (latest as { id?: string }).id; + if (itemId) { + await this.#client.conversations.items.delete(itemId, { + conversation_id: conversationId, + }); + } + + return latest; + } + + async clearSession(): Promise { + if (!this.#conversationId) { + return; + } + + await this.#client.conversations.delete(this.#conversationId); + this.#conversationId = undefined; + } +} + +// -------------------------------------------------------------- +// Internals +// -------------------------------------------------------------- + +const INPUT_CONTENT_TYPES = new Set([ + 'input_text', + 'input_image', + 'input_file', + 'input_audio', +]); + +function isObject(value: unknown): value is Record { + return typeof value === 'object' && value !== null; +} + +// Treats a value as ResponseOutputItem[] only when each entry resembles an output item rather than raw input content. +function isResponseOutputItemArray( + value: unknown, +): value is OpenAI.Responses.ResponseOutputItem[] { + if (!Array.isArray(value) || value.length === 0) { + return false; + } + + return value.every((entry) => { + if (!isObject(entry)) { + return false; + } + const type = (entry as { type?: unknown }).type; + if (typeof type !== 'string') { + return false; + } + + if (INPUT_CONTENT_TYPES.has(type)) { + return false; + } + + // Fallback: pre-emptively exclude future input_* variants so they never masquerade as response outputs. + return !type.startsWith('input_'); + }); +} + +function resolveClient(options: OpenAIConversationsSessionOptions): OpenAI { + if (options.client) { + return options.client; + } + + return ( + getDefaultOpenAIClient() ?? + new OpenAI({ + apiKey: options.apiKey ?? getDefaultOpenAIKey(), + baseURL: options.baseURL, + organization: options.organization, + project: options.project, + }) + ); +} diff --git a/packages/agents-openai/src/openaiResponsesModel.ts b/packages/agents-openai/src/openaiResponsesModel.ts index 107d96cb..c01ff871 100644 --- a/packages/agents-openai/src/openaiResponsesModel.ts +++ b/packages/agents-openai/src/openaiResponsesModel.ts @@ -51,16 +51,16 @@ type ResponseFunctionCallOutputListItem = } | { type: 'input_image'; - image_url?: string; - file_id?: string; - detail?: 'low' | 'high' | 'auto'; + image_url?: string | null; + file_id?: string | null; + detail?: 'low' | 'high' | 'auto' | null; } | { type: 'input_file'; - file_data?: string; - file_url?: string; - file_id?: string; - filename?: string; + file_data?: string | null; + file_url?: string | null; + file_id?: string | null; + filename?: string | null; }; type ExtendedFunctionCallOutput = Omit< @@ -70,6 +70,13 @@ type ExtendedFunctionCallOutput = Omit< output: string | ResponseFunctionCallOutputListItem[]; }; +type ResponseOutputItemWithFunctionResult = + | OpenAI.Responses.ResponseOutputItem + | (OpenAI.Responses.ResponseFunctionToolCallOutputItem & { + name?: string; + function_name?: string; + }); + const HostedToolChoice = z.enum([ 'file_search', 'web_search', @@ -394,6 +401,82 @@ function convertStructuredOutputToRequestItem( ); } +function convertResponseFunctionCallOutputItemToStructured( + item: ResponseFunctionCallOutputListItem, +): protocol.ToolCallStructuredOutput | null { + if (item.type === 'input_text') { + return { + type: 'input_text', + text: item.text, + }; + } + + if (item.type === 'input_image') { + const structured: protocol.InputImage = { type: 'input_image' }; + + if (typeof item.image_url === 'string' && item.image_url.length > 0) { + structured.image = item.image_url; + } else if (typeof item.file_id === 'string' && item.file_id.length > 0) { + structured.image = { id: item.file_id }; + } else { + // As of 2025-10-30, conversations retrieval API may not include + // data url in image_url property; so skipping this pattern + logger.debug( + `Skipped the "input_image" output item from a tool call result because the OpenAI Conversations API response didn't include the required property (image_url or file_id).`, + ); + return null; + } + + if (item.detail) { + structured.detail = item.detail; + } + + return structured; + } + + if (item.type === 'input_file') { + const structured: protocol.InputFile = { type: 'input_file' }; + + if (typeof item.file_id === 'string' && item.file_id.length > 0) { + structured.file = { id: item.file_id }; + } else if (typeof item.file_url === 'string' && item.file_url.length > 0) { + structured.file = { url: item.file_url }; + } else if ( + typeof item.file_data === 'string' && + item.file_data.length > 0 + ) { + structured.file = item.file_data; + } + + if (item.filename) { + structured.filename = item.filename; + } + + return structured; + } + + const exhaustive: never = item; + throw new UserError( + `Unsupported structured tool output: ${JSON.stringify(exhaustive)}`, + ); +} + +function convertFunctionCallOutputToProtocol( + output: OpenAI.Responses.ResponseFunctionToolCallOutputItem['output'], +): protocol.FunctionCallResultItem['output'] { + if (typeof output === 'string') { + return output; + } + + if (Array.isArray(output)) { + return output + .map(convertResponseFunctionCallOutputItemToStructured) + .filter((s) => s !== null); + } + + return ''; +} + function normalizeLegacyFileFromOutput(value: Record): { file?: protocol.InputFile['file']; filename?: string; @@ -862,7 +945,6 @@ function getInputItems( status: item.status, ...camelOrSnakeToSnakeCase(item.providerData), }; - return entry as unknown as OpenAI.Responses.ResponseInputItem.FunctionCallOutput; } @@ -1090,7 +1172,7 @@ function convertToMessageContentItem( } function convertToOutputItem( - items: OpenAI.Responses.ResponseOutputItem[], + items: ResponseOutputItemWithFunctionResult[], ): protocol.OutputModelItem[] { return items.map((item) => { if (item.type === 'message') { @@ -1137,6 +1219,28 @@ function convertToOutputItem( providerData, }; return output; + } else if (item.type === 'function_call_output') { + const { + call_id, + status, + output: rawOutput, + name: toolName, + function_name: functionName, + ...providerData + } = item as OpenAI.Responses.ResponseFunctionToolCallOutputItem & { + name?: string; + function_name?: string; + }; + const output: protocol.FunctionCallResultItem = { + type: 'function_call_result', + id: item.id, + callId: call_id, + name: toolName ?? functionName ?? call_id, + status: status ?? 'completed', + output: convertFunctionCallOutputToProtocol(rawOutput), + providerData, + }; + return output; } else if (item.type === 'computer_call') { const { call_id, status, action, ...providerData } = item; const output: protocol.ComputerUseCallItem = { @@ -1204,6 +1308,7 @@ function convertToOutputItem( return { type: 'unknown', + id: item.id, providerData: item, }; }); diff --git a/packages/agents-openai/test/openaiConversationsSession.test.ts b/packages/agents-openai/test/openaiConversationsSession.test.ts new file mode 100644 index 00000000..5d562402 --- /dev/null +++ b/packages/agents-openai/test/openaiConversationsSession.test.ts @@ -0,0 +1,438 @@ +import { describe, expect, it, beforeEach, vi } from 'vitest'; + +const { convertToOutputItemMock, getInputItemsMock } = vi.hoisted(() => ({ + convertToOutputItemMock: vi.fn(), + getInputItemsMock: vi.fn(), +})); + +vi.mock('../src/openaiResponsesModel', () => ({ + convertToOutputItem: convertToOutputItemMock, + getInputItems: getInputItemsMock, +})); + +import { OpenAIConversationsSession } from '../src/memory/openaiConversationsSession'; + +describe('OpenAIConversationsSession', () => { + beforeEach(() => { + convertToOutputItemMock.mockReset(); + getInputItemsMock.mockReset(); + }); + + it('converts response items using their output payload', async () => { + const responseOutput = [ + { + id: 'resp-1-msg-1', + type: 'message', + role: 'assistant', + content: [], + }, + ]; + const convertedItems = [ + { + id: 'converted-1', + type: 'message', + role: 'assistant', + content: [], + }, + ]; + + convertToOutputItemMock.mockReturnValue(convertedItems as any); + + const items = [ + { + type: 'response', + id: 'resp-1', + output: responseOutput, + }, + ]; + + const list = vi.fn(() => ({ + async *[Symbol.asyncIterator]() { + for (const item of items) { + yield item as any; + } + }, + })); + + const session = new OpenAIConversationsSession({ + client: { + conversations: { + items: { + list, + create: vi.fn(), + delete: vi.fn(), + }, + create: vi.fn(), + delete: vi.fn(), + }, + } as any, + conversationId: 'conv-123', + }); + + const result = await session.getItems(); + + expect(list).toHaveBeenCalledWith('conv-123', { order: 'asc' }); + expect(convertToOutputItemMock).toHaveBeenCalledWith(responseOutput); + expect(result).toEqual(convertedItems); + }); + + it('wraps string function_call_output payloads before converting', async () => { + const convertedItems = [ + { + id: 'converted-output', + type: 'function_call_result', + }, + ]; + + convertToOutputItemMock.mockReturnValue(convertedItems as any); + + const items = [ + { + type: 'function_call_output', + id: 'resp-fn-output', + call_id: 'call-1', + output: 'Tool error message', + }, + ]; + + const list = vi.fn(() => ({ + async *[Symbol.asyncIterator]() { + for (const item of items) { + yield item as any; + } + }, + })); + + const session = new OpenAIConversationsSession({ + client: { + conversations: { + items: { + list, + create: vi.fn(), + delete: vi.fn(), + }, + create: vi.fn(), + delete: vi.fn(), + }, + } as any, + conversationId: 'conv-123', + }); + + const result = await session.getItems(); + + expect(convertToOutputItemMock).toHaveBeenCalledWith([ + expect.objectContaining({ + id: 'resp-fn-output', + type: 'function_call_output', + call_id: 'call-1', + output: 'Tool error message', + }), + ]); + expect(result).toEqual(convertedItems); + }); + + it('wraps function_call_output structured content arrays before converting', async () => { + const convertedItems = [ + { + id: 'converted-output-array', + type: 'function_call_result', + }, + ]; + + convertToOutputItemMock.mockReturnValue(convertedItems as any); + + const items = [ + { + type: 'function_call_output', + id: 'resp-fn-output-array', + call_id: 'call-2', + output: [ + { + type: 'input_text', + text: 'No customer found', + }, + ], + }, + ]; + + const list = vi.fn(() => ({ + async *[Symbol.asyncIterator]() { + for (const item of items) { + yield item as any; + } + }, + })); + + const session = new OpenAIConversationsSession({ + client: { + conversations: { + items: { + list, + create: vi.fn(), + delete: vi.fn(), + }, + create: vi.fn(), + delete: vi.fn(), + }, + } as any, + conversationId: 'conv-123', + }); + + const result = await session.getItems(); + + expect(convertToOutputItemMock).toHaveBeenCalledWith([ + expect.objectContaining({ + id: 'resp-fn-output-array', + type: 'function_call_output', + call_id: 'call-2', + output: [ + { + type: 'input_text', + text: 'No customer found', + }, + ], + }), + ]); + expect(result).toEqual(convertedItems); + }); + + it('enforces the item limit after converting response items', async () => { + convertToOutputItemMock.mockImplementation((raw) => { + const id = raw[0]?.id ?? 'response'; + return [ + { + id: `${id}-msg-1`, + type: 'message', + role: 'assistant', + content: [], + }, + { + id: `${id}-msg-2`, + type: 'message', + role: 'assistant', + content: [], + }, + { + id: `${id}-msg-3`, + type: 'message', + role: 'assistant', + content: [], + }, + ] as any; + }); + + const items = [ + { + type: 'message', + role: 'assistant', + id: 'resp-1', + content: [], + }, + { + type: 'message', + role: 'assistant', + id: 'resp-0', + content: [], + }, + ]; + + const list = vi.fn(() => ({ + async *[Symbol.asyncIterator]() { + for (const item of items) { + yield item as any; + } + }, + })); + + const session = new OpenAIConversationsSession({ + client: { + conversations: { + items: { + list, + create: vi.fn(), + delete: vi.fn(), + }, + create: vi.fn(), + delete: vi.fn(), + }, + } as any, + conversationId: 'conv-123', + }); + + const result = await session.getItems(2); + + expect(list).toHaveBeenCalledWith('conv-123', { limit: 2, order: 'desc' }); + expect(convertToOutputItemMock).toHaveBeenCalledTimes(1); + expect(result).toHaveLength(2); + expect(result.map((item: any) => item.id)).toEqual([ + 'resp-1-msg-2', + 'resp-1-msg-3', + ]); + }); + + it('popItem deletes the newest converted item', async () => { + convertToOutputItemMock.mockReturnValue([ + { + id: 'resp-1-msg-1', + type: 'message', + role: 'assistant', + content: [], + }, + { + id: 'resp-1-msg-2', + type: 'message', + role: 'assistant', + content: [], + }, + { + id: 'resp-1-msg-3', + type: 'message', + role: 'assistant', + content: [], + }, + ] as any); + + const items = [ + { + type: 'message', + role: 'assistant', + id: 'resp-1', + content: [], + }, + ]; + + const list = vi.fn(() => ({ + async *[Symbol.asyncIterator]() { + for (const item of items) { + yield item as any; + } + }, + })); + + const deleteMock = vi.fn(); + + const session = new OpenAIConversationsSession({ + client: { + conversations: { + items: { + list, + create: vi.fn(), + delete: deleteMock, + }, + create: vi.fn(), + delete: vi.fn(), + }, + } as any, + conversationId: 'conv-123', + }); + + const popped = await session.popItem(); + + expect(list).toHaveBeenCalledWith('conv-123', { limit: 1, order: 'desc' }); + expect(deleteMock).toHaveBeenCalledWith('resp-1-msg-3', { + conversation_id: 'conv-123', + }); + expect(popped?.id).toBe('resp-1-msg-3'); + }); + + it('preserves inline file data for user inputs', async () => { + const items = [ + { + type: 'message', + role: 'user', + id: 'user-1', + content: [ + { + type: 'input_file', + file_data: 'data:application/pdf;base64,SGVsbG8=', + filename: 'inline.pdf', + }, + ], + }, + ]; + + const list = vi.fn(() => ({ + async *[Symbol.asyncIterator]() { + for (const item of items) { + yield item as any; + } + }, + })); + + const session = new OpenAIConversationsSession({ + client: { + conversations: { + items: { + list, + create: vi.fn(), + delete: vi.fn(), + }, + create: vi.fn(), + delete: vi.fn(), + }, + } as any, + conversationId: 'conv-123', + }); + + const result = await session.getItems(); + + expect(result).toEqual([ + { + id: 'user-1', + type: 'message', + role: 'user', + content: [ + { + type: 'input_file', + file: 'data:application/pdf;base64,SGVsbG8=', + filename: 'inline.pdf', + }, + ], + }, + ]); + }); + + it('adds items without requesting additional response includes', async () => { + const createMock = vi.fn(); + const inputItems = [ + { + id: 'user-1', + type: 'message', + role: 'user', + content: [], + }, + ]; + const converted = [ + { + id: 'payload-user-1', + type: 'message', + role: 'user', + content: [], + }, + ]; + + getInputItemsMock.mockReturnValue(converted as any); + + const session = new OpenAIConversationsSession({ + client: { + conversations: { + items: { + list: vi.fn(), + create: createMock, + delete: vi.fn(), + }, + create: vi.fn(), + delete: vi.fn(), + }, + } as any, + conversationId: 'conv-123', + }); + + await session.addItems(inputItems as any); + + expect(getInputItemsMock).toHaveBeenCalledWith(inputItems); + expect(createMock).toHaveBeenCalledWith('conv-123', { + items: converted, + }); + }); +}); diff --git a/packages/agents-openai/test/openaiResponsesModel.helpers.test.ts b/packages/agents-openai/test/openaiResponsesModel.helpers.test.ts index 386bbc2e..2f418fda 100644 --- a/packages/agents-openai/test/openaiResponsesModel.helpers.test.ts +++ b/packages/agents-openai/test/openaiResponsesModel.helpers.test.ts @@ -512,4 +512,70 @@ describe('convertToOutputItem', () => { ] as any), ).toThrow(); }); + + it('converts function_call_output items into function_call_result entries', () => { + const out = convertToOutputItem([ + { + type: 'function_call_output', + id: 'out-1', + call_id: 'call-1', + name: 'lookup', + output: 'done', + } as any, + ]); + + expect(out[0]).toMatchObject({ + type: 'function_call_result', + id: 'out-1', + callId: 'call-1', + name: 'lookup', + output: 'done', + status: 'completed', + }); + }); + + it('converts structured function_call_output payloads into structured outputs', () => { + const out = convertToOutputItem([ + { + type: 'function_call_output', + id: 'out-2', + call_id: 'call-2', + function_name: 'search', + status: 'in_progress', + output: [ + { type: 'input_text', text: 'hello' }, + { + type: 'input_image', + image_url: 'https://example.com/img.png', + detail: 'high', + }, + { + type: 'input_file', + file_url: 'https://example.com/file.txt', + filename: 'file.txt', + }, + ], + } as any, + ]); + + expect(out[0]).toMatchObject({ + type: 'function_call_result', + callId: 'call-2', + name: 'search', + status: 'in_progress', + output: [ + { type: 'input_text', text: 'hello' }, + { + type: 'input_image', + image: 'https://example.com/img.png', + detail: 'high', + }, + { + type: 'input_file', + file: { url: 'https://example.com/file.txt' }, + filename: 'file.txt', + }, + ], + }); + }); }); diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index a3e5d007..be909751 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -227,6 +227,19 @@ importers: specifier: workspace:* version: link:../../packages/agents + examples/memory: + dependencies: + '@openai/agents': + specifier: workspace:* + version: link:../../packages/agents + '@prisma/client': + specifier: ^6.18.0 + version: 6.18.0(prisma@6.18.0(magicast@0.3.5)(typescript@5.9.3))(typescript@5.9.3) + devDependencies: + prisma: + specifier: ^6.18.0 + version: 6.18.0(magicast@0.3.5)(typescript@5.9.3) + examples/model-providers: dependencies: '@openai/agents': @@ -1270,6 +1283,36 @@ packages: resolution: {integrity: sha512-+1VkjdD0QBLPodGrJUeqarH8VAIvQODIbwh9XpP5Syisf7YoQgsJKPNFoqqLQlu+VQ/tVSshMR6loPMn8U+dPg==} engines: {node: '>=14'} + '@prisma/client@6.18.0': + resolution: {integrity: sha512-jnL2I9gDnPnw4A+4h5SuNn8Gc+1mL1Z79U/3I9eE2gbxJG1oSA+62ByPW4xkeDgwE0fqMzzpAZ7IHxYnLZ4iQA==} + engines: {node: '>=18.18'} + peerDependencies: + prisma: '*' + typescript: '>=5.1.0' + peerDependenciesMeta: + prisma: + optional: true + typescript: + optional: true + + '@prisma/config@6.18.0': + resolution: {integrity: sha512-rgFzspCpwsE+q3OF/xkp0fI2SJ3PfNe9LLMmuSVbAZ4nN66WfBiKqJKo/hLz3ysxiPQZf8h1SMf2ilqPMeWATQ==} + + '@prisma/debug@6.18.0': + resolution: {integrity: sha512-PMVPMmxPj0ps1VY75DIrT430MoOyQx9hmm174k6cmLZpcI95rAPXOQ+pp8ANQkJtNyLVDxnxVJ0QLbrm/ViBcg==} + + '@prisma/engines-version@6.18.0-8.34b5a692b7bd79939a9a2c3ef97d816e749cda2f': + resolution: {integrity: sha512-T7Af4QsJQnSgWN1zBbX+Cha5t4qjHRxoeoWpK4JugJzG/ipmmDMY5S+O0N1ET6sCBNVkf6lz+Y+ZNO9+wFU8pQ==} + + '@prisma/engines@6.18.0': + resolution: {integrity: sha512-i5RzjGF/ex6AFgqEe2o1IW8iIxJGYVQJVRau13kHPYEL1Ck8Zvwuzamqed/1iIljs5C7L+Opiz5TzSsUebkriA==} + + '@prisma/fetch-engine@6.18.0': + resolution: {integrity: sha512-TdaBvTtBwP3IoqVYoGIYpD4mWlk0pJpjTJjir/xLeNWlwog7Sl3bD2J0jJ8+5+q/6RBg+acb9drsv5W6lqae7A==} + + '@prisma/get-platform@6.18.0': + resolution: {integrity: sha512-uXNJCJGhxTCXo2B25Ta91Rk1/Nmlqg9p7G9GKh8TPhxvAyXCvMNQoogj4JLEUy+3ku8g59cpyQIKFhqY2xO2bg==} + '@radix-ui/primitive@1.1.2': resolution: {integrity: sha512-XnbHrrprsNqZKQhStrSwgRUQzoCI1glLzdw79xiZPoofhGICeZRSQ3dIxAKH1gb3OHfNf4d6f+vAv3kil2eggA==} @@ -2312,6 +2355,14 @@ packages: resolution: {integrity: sha512-/Nf7TyzTx6S3yRJObOAV7956r8cr2+Oj8AC5dt8wSP3BQAoeX58NoHyCU8P8zGkNXStjTSi6fzO6F0pBdcYbEg==} engines: {node: '>= 0.8'} + c12@3.1.0: + resolution: {integrity: sha512-uWoS8OU1MEIsOv8p/5a82c3H31LsWVR5qiyXVfBNOzfffjUWtPnhAb4BYI2uG2HfGmZmFjCtui5XNWaps+iFuw==} + peerDependencies: + magicast: ^0.3.5 + peerDependenciesMeta: + magicast: + optional: true + cac@6.7.14: resolution: {integrity: sha512-b6Ilus+c3RrdDk+JhLKUAQfzzgLEPy6wcXqS7f/xe1EETvsDP6GORG7SFuOs6cID5YkqchW/LXZbX5bc8j7ZcQ==} engines: {node: '>=8'} @@ -2400,6 +2451,9 @@ packages: resolution: {integrity: sha512-Wdy2Igu8OcBpI2pZePZ5oWjPC38tmDVx5WKUXKwlLYkA0ozo85sLsLvkBbBn/sZaSCMFOGZJ14fvW9t5/d7kdA==} engines: {node: '>=8'} + citty@0.1.6: + resolution: {integrity: sha512-tskPPKEs8D2KPafUypv2gxwJP8h/OaJmC82QQGGDQcHvXX43xF2VDACcJVmZ0EuSxkpO9Kc4MlrA3q0+FG58AQ==} + class-variance-authority@0.7.1: resolution: {integrity: sha512-Ka+9Trutv7G8M6WT6SeiRWz792K5qEqIGEGzXKhAE6xOWAY6pPH8U+9IY3oCMv6kqTmLsv7Xh/2w2RigkePMsg==} @@ -2473,6 +2527,13 @@ packages: engines: {node: '>=18'} hasBin: true + confbox@0.2.2: + resolution: {integrity: sha512-1NB+BKqhtNipMsov4xI/NnhCKp9XG9NamYp5PVm9klAT0fsrNPjaFICsCFhNhwZJKNh7zB/3q8qXz0E9oaMNtQ==} + + consola@3.4.2: + resolution: {integrity: sha512-5IKcdX0nnYavi6G7TtOhwkYzyjfJlatbjMjuLSfE2kYT5pMDOilZ4OvMhi637CcDICTmz3wARPoyhqyX1Y+XvA==} + engines: {node: ^14.18.0 || >=16.10.0} + content-disposition@0.5.4: resolution: {integrity: sha512-FveZTNuGw04cxlAiWbzi6zTAL/lhehaWbTtgluJh4/E95DqMwTmha3KZN1aAWA8cFIhHzMZUvLevkw5Rqk+tSQ==} engines: {node: '>= 0.6'} @@ -2586,6 +2647,10 @@ packages: deep-is@0.1.4: resolution: {integrity: sha512-oIPzksmTg4/MriiaYGO+okXDT7ztn/w3Eptv/+gSIdMdKsJo0u4CfYNFJPy+4SKMuCqGw2wxnA+URMg3t8a/bQ==} + deepmerge-ts@7.1.5: + resolution: {integrity: sha512-HOJkrhaYsweh+W+e74Yn7YStZOilkoPb6fycpwNLKzSPtruFs48nYis0zy5yJz1+ktUhHxoRDJ27RQAWLIJVJw==} + engines: {node: '>=16.0.0'} + defer-to-connect@2.0.1: resolution: {integrity: sha512-4tvttepXG1VaYGrRibk5EwJd1t4udunSOVMdLSAL6mId1ix438oPwPZMALY41FCijukO1L0twNcGsdzS7dHgDg==} engines: {node: '>=10'} @@ -2655,6 +2720,10 @@ packages: resolution: {integrity: sha512-m/C+AwOAr9/W1UOIZUo232ejMNnJAJtYQjUbHoNTBNTJSvqzzDh7vnrei3o3r3m9blf6ZoDkvcw0VmozNRFJxg==} engines: {node: '>=12'} + dotenv@16.6.1: + resolution: {integrity: sha512-uBq4egWHTcTt33a72vpSG0z3HnPuIl6NqYcTrKEg2azoEyl2hpW0zqlxysq2pK9HlDIHyHyakeYaYnSAwd8bow==} + engines: {node: '>=12'} + dset@3.1.4: resolution: {integrity: sha512-2QF/g9/zTaPDc3BjNcVTGoBbXBgYfMTTceLaYcFJ/W9kggFUkhxD/hMEeuLKbugyef9SqAx8cpgwlIP/jinUTA==} engines: {node: '>=4'} @@ -2681,6 +2750,9 @@ packages: ee-first@1.1.1: resolution: {integrity: sha512-WMwm9LhRUo+WUaRN+vRuETqG89IgZphVSNkdFgeb6sS/E4OrDIN7t48CAewSHXc6C8lefD8KKfr5vY61brQlow==} + effect@3.18.4: + resolution: {integrity: sha512-b1LXQJLe9D11wfnOKAk3PKxuqYshQ0Heez+y5pnkd3jLj1yx9QhM72zZ9uUrOQyNvrs2GZZd/3maL0ZV18YuDA==} + emoji-regex@10.6.0: resolution: {integrity: sha512-toUI84YS5YmxW219erniWD0CIVOo46xGKColeNQRgOzDorgBi1v4D71/OFzgD9GO2UGKIv1C3Sp8DAn0+j5w7A==} @@ -2690,6 +2762,10 @@ packages: emoji-regex@9.2.2: resolution: {integrity: sha512-L18DaJsXSUk2+42pv8mLs5jJT2hqFkFE4j21wOmgbUqsZ2hL72NsUU785g9RXgo3s0ZNgVl42TiHp3ZtOv/Vyg==} + empathic@2.0.0: + resolution: {integrity: sha512-i6UzDscO/XfAcNYD75CfICkmfLedpyPDdozrLMmQc5ORaQcdMoc21OnlEylMIqI7U8eniKrPMxxtj8k0vhmJhA==} + engines: {node: '>=14'} + encodeurl@1.0.2: resolution: {integrity: sha512-TPJXq8JqFaVYm2CWmPvnP2Iyo4ZSM7/QKcSmuMLDObfpH5fi7RUGmd/rTDf+rut/saiDiQEeVTNgAmJEdAOx0w==} engines: {node: '>= 0.8'} @@ -2920,6 +2996,9 @@ packages: expressive-code@0.41.3: resolution: {integrity: sha512-YLnD62jfgBZYrXIPQcJ0a51Afv9h8VlWqEGK9uU2T5nL/5rb8SnA86+7+mgCZe5D34Tff5RNEA5hjNVJYHzrFg==} + exsolve@1.0.7: + resolution: {integrity: sha512-VO5fQUzZtI6C+vx4w/4BWJpg3s/5l+6pRQEHzFRM8WFi4XffSP1Z+4qi7GbjWbvRQEbdIco5mIMq+zX4rPuLrw==} + extend@3.0.2: resolution: {integrity: sha512-fjquC59cD7CyW6urNXK0FBufkZcoiGG80wTuPujX590cB5Ttln20E2UB4S/WARVqhXffZl2LNgS+gQdPIIim/g==} @@ -2930,6 +3009,10 @@ packages: resolution: {integrity: sha512-11Ndz7Nv+mvAC1j0ktTa7fAb0vLyGGX+rMHNBYQviQDGU0Hw7lhctJANqbPhu9nV9/izT/IntTgZ7Im/9LJs9g==} engines: {'0': node >=0.6.0} + fast-check@3.23.2: + resolution: {integrity: sha512-h5+1OzzfCC3Ef7VbtKdcv7zsstUQwUDlYpUTvjeUsJAssPgLn7QzbboPtL5ro04Mq0rPOsMzl7q5hIbRs2wD1A==} + engines: {node: '>=8.0.0'} + fast-decode-uri-component@1.0.1: resolution: {integrity: sha512-WKgKWg5eUxvRZGwW8FvfbaH7AXSh2cL+3j5fMGzUMCxWBJ3dV3a7Wz8y2f/uQ0e3B6WmodD3oS54jTQ9HVTIIg==} @@ -3121,6 +3204,10 @@ packages: getpass@0.1.7: resolution: {integrity: sha512-0fzj9JxOLfJ+XGLhR8ze3unN0KZCgZwiSSDz168VERjK8Wl8kVSdcu2kspd4s4wtAa1y/qrVRiAA0WclVsu0ng==} + giget@2.0.0: + resolution: {integrity: sha512-L5bGsVkxJbJgdnwyuheIunkGatUF/zssUoxxjACCseZYAVbaqdh9Tsmmlkl8vYan09H7sbvKt4pS8GqKLBrEzA==} + hasBin: true + github-slugger@2.0.0: resolution: {integrity: sha512-IaOQ9puYtjrkq7Y0Ygl9KDZnrf/aiUJYUpVf89y8kyaxbRG7Y1SrX/jaumrv81vc61+kiMempujsM3Yw7w5qcw==} @@ -4152,6 +4239,11 @@ packages: nth-check@2.1.1: resolution: {integrity: sha512-lqjrjmaOoAnWfMmBPL+XNnynZh2+swxiX3WUE0s4yEHI6m+AwrK2UZOimIRl3X/4QctVqS8AiZjFqyOGrMXb/w==} + nypm@0.6.2: + resolution: {integrity: sha512-7eM+hpOtrKrBDCh7Ypu2lJ9Z7PNZBdi/8AT3AX8xoCj43BBVHD0hPSTEvMtkMpfs8FCqBGhxB+uToIQimA111g==} + engines: {node: ^14.16.0 || >=16.10.0} + hasBin: true + object-assign@4.1.1: resolution: {integrity: sha512-rJgTQnkUnH1sFw8yT6VSU3zD3sWmu6sZhIseY8VX+GRu3P6F7Fu+JNDoXfklElbLJSnc3FUQHVe4cU5hj+BcUg==} engines: {node: '>=0.10.0'} @@ -4335,6 +4427,9 @@ packages: peek-stream@1.1.3: resolution: {integrity: sha512-FhJ+YbOSBb9/rIl2ZeE/QHEsWn7PqNYt8ARAY3kIgNGOk13g9FGyIY6JIl/xB/3TFRVoTv5as0l11weORrTekA==} + perfect-debounce@1.0.0: + resolution: {integrity: sha512-xCy9V055GLEqoFaHoC1SoLIaLmWctgCUaBaWxDZ7/Zx4CTyX7cJQLJOok/orfjZAh9kEYpjJa4d0KcJmCbctZA==} + performance-now@2.1.0: resolution: {integrity: sha512-7EAHlyLHI56VEIdK57uwHdHKIaAGbnXPiw0yWbarQZOKaKpvUIgW0jWRVLiatnM+XXlSwsanIBH/hzGMJulMow==} @@ -4378,6 +4473,9 @@ packages: resolution: {integrity: sha512-ueGLflrrnvwB3xuo/uGob5pd5FN7l0MsLf0Z87o/UQmRtwjvfylfc9MurIxRAWywCYTgrvpXBcqjV4OfCYGCIQ==} engines: {node: '>=16.20.0'} + pkg-types@2.3.0: + resolution: {integrity: sha512-SIqCzDRg0s9npO5XQ3tNZioRY1uK06lA41ynBC1YmFTmnY6FjUjVt6s4LoADmwoig1qqD0oK8h1p/8mlMx8Oig==} + playwright-core@1.55.1: resolution: {integrity: sha512-Z6Mh9mkwX+zxSlHqdr5AOcJnfp+xUWLCt9uKV18fhzA8eyxUd8NUWzAjxUh55RZKSYwDGX0cfaySdhZJGMoJ+w==} engines: {node: '>=18'} @@ -4438,6 +4536,16 @@ packages: resolution: {integrity: sha512-gjVS5hOP+M3wMm5nmNOucbIrqudzs9v/57bWRHQWLYklXqoXKrVfYW2W9+glfGsqtPgpiz5WwyEEB+ksXIx3gQ==} engines: {node: '>=18'} + prisma@6.18.0: + resolution: {integrity: sha512-bXWy3vTk8mnRmT+SLyZBQoC2vtV9Z8u7OHvEu+aULYxwiop/CPiFZ+F56KsNRNf35jw+8wcu8pmLsjxpBxAO9g==} + engines: {node: '>=18.18'} + hasBin: true + peerDependencies: + typescript: '>=5.1.0' + peerDependenciesMeta: + typescript: + optional: true + prismjs@1.30.0: resolution: {integrity: sha512-DEvV2ZF2r2/63V+tK8hQvrR2ZGn10srHbXviTlcv7Kpzw8jWiNTqbVgjO3IY8RxrrOUF8VPMQQFysYYYv0YZxw==} engines: {node: '>=6'} @@ -4489,6 +4597,9 @@ packages: resolution: {integrity: sha512-vYt7UD1U9Wg6138shLtLOvdAu+8DsC/ilFtEVHcH+wydcSpNE20AfSOduf6MkRFahL5FY7X1oU7nKVZFtfq8Fg==} engines: {node: '>=6'} + pure-rand@6.1.0: + resolution: {integrity: sha512-bVWawvoZoBYpp6yIoQtQXHZjmz35RSVHnUOTefl8Vcjr8snTPY1wnpSPMWekcFwbxI6gtmT7rSYPFvz71ldiOA==} + qs@6.13.0: resolution: {integrity: sha512-+38qI9SOr8tfZ4QmJNplMUxqjbe7LKvvZgWdExBOmd+egZTtjLB67Gu0HRX3u/XOq7UU2Nx6nsjvS16Z9uwfpg==} engines: {node: '>=0.6'} @@ -4529,6 +4640,9 @@ packages: resolution: {integrity: sha512-9G8cA+tuMS75+6G/TzW8OtLzmBDMo8p1JRxN5AZ+LAp8uxGA8V8GZm4GQ4/N5QNQEnLmg6SS7wyuSmbKepiKqA==} engines: {node: '>= 0.10'} + rc9@2.1.2: + resolution: {integrity: sha512-btXCnMmRIBINM2LDZoEmOogIZU7Qe7zn4BpomSKZ/ykbLObuBdvG+mFq11DL6fjH1DRwHhrlgtYWG96bJiC7Cg==} + react-dom@19.1.0: resolution: {integrity: sha512-Xs1hdnE+DyKgeHJeJznQmYMIBG3TKIHJJT95Q58nHLSrElKlGQqDTR2HQ9fx5CN/Gk6Vh/kupBTDLU11/nDk/g==} peerDependencies: @@ -5701,9 +5815,6 @@ packages: typescript: ^4.9.4 || ^5.0.2 zod: ^3 - zod@3.25.62: - resolution: {integrity: sha512-YCxsr4DmhPcrKPC9R1oBHQNlQzlJEyPAId//qTau/vBee9uO8K6prmRq4eMkOyxvBfH4wDPIPdLx9HVMWIY3xA==} - zod@3.25.76: resolution: {integrity: sha512-gzUt/qt81nXsFGKIFcC3YnfEAx5NkunCfnDlvuBSSFS02bcXu4Lmea0AFIUwbLWxWPx3d9p8S5QoaujKcNQxcQ==} @@ -6480,8 +6591,8 @@ snapshots: express-rate-limit: 7.5.0(express@5.1.0) pkce-challenge: 5.0.0 raw-body: 3.0.0 - zod: 3.25.62 - zod-to-json-schema: 3.24.5(zod@3.25.62) + zod: 3.25.76 + zod-to-json-schema: 3.24.5(zod@3.25.76) transitivePeerDependencies: - supports-color @@ -6573,6 +6684,41 @@ snapshots: '@pkgjs/parseargs@0.11.0': optional: true + '@prisma/client@6.18.0(prisma@6.18.0(magicast@0.3.5)(typescript@5.9.3))(typescript@5.9.3)': + optionalDependencies: + prisma: 6.18.0(magicast@0.3.5)(typescript@5.9.3) + typescript: 5.9.3 + + '@prisma/config@6.18.0(magicast@0.3.5)': + dependencies: + c12: 3.1.0(magicast@0.3.5) + deepmerge-ts: 7.1.5 + effect: 3.18.4 + empathic: 2.0.0 + transitivePeerDependencies: + - magicast + + '@prisma/debug@6.18.0': {} + + '@prisma/engines-version@6.18.0-8.34b5a692b7bd79939a9a2c3ef97d816e749cda2f': {} + + '@prisma/engines@6.18.0': + dependencies: + '@prisma/debug': 6.18.0 + '@prisma/engines-version': 6.18.0-8.34b5a692b7bd79939a9a2c3ef97d816e749cda2f + '@prisma/fetch-engine': 6.18.0 + '@prisma/get-platform': 6.18.0 + + '@prisma/fetch-engine@6.18.0': + dependencies: + '@prisma/debug': 6.18.0 + '@prisma/engines-version': 6.18.0-8.34b5a692b7bd79939a9a2c3ef97d816e749cda2f + '@prisma/get-platform': 6.18.0 + + '@prisma/get-platform@6.18.0': + dependencies: + '@prisma/debug': 6.18.0 + '@radix-ui/primitive@1.1.2': {} '@radix-ui/react-compose-refs@1.1.2(@types/react@19.1.8)(react@19.1.0)': @@ -7752,6 +7898,23 @@ snapshots: bytes@3.1.2: {} + c12@3.1.0(magicast@0.3.5): + dependencies: + chokidar: 4.0.3 + confbox: 0.2.2 + defu: 6.1.4 + dotenv: 16.6.1 + exsolve: 1.0.7 + giget: 2.0.0 + jiti: 2.6.1 + ohash: 2.0.11 + pathe: 2.0.3 + perfect-debounce: 1.0.0 + pkg-types: 2.3.0 + rc9: 2.1.2 + optionalDependencies: + magicast: 0.3.5 + cac@6.7.14: {} cacheable-lookup@6.1.0: {} @@ -7825,6 +7988,10 @@ snapshots: ci-info@4.3.1: {} + citty@0.1.6: + dependencies: + consola: 3.4.2 + class-variance-authority@0.7.1: dependencies: clsx: 2.1.1 @@ -7898,6 +8065,10 @@ snapshots: tree-kill: 1.2.2 yargs: 17.7.2 + confbox@0.2.2: {} + + consola@3.4.2: {} + content-disposition@0.5.4: dependencies: safe-buffer: 5.2.1 @@ -7980,6 +8151,8 @@ snapshots: deep-is@0.1.4: {} + deepmerge-ts@7.1.5: {} + defer-to-connect@2.0.1: {} defu@6.1.4: {} @@ -8024,6 +8197,8 @@ snapshots: dotenv@16.5.0: {} + dotenv@16.6.1: {} + dset@3.1.4: {} dunder-proto@1.0.1: @@ -8059,12 +8234,19 @@ snapshots: ee-first@1.1.1: {} + effect@3.18.4: + dependencies: + '@standard-schema/spec': 1.0.0 + fast-check: 3.23.2 + emoji-regex@10.6.0: {} emoji-regex@8.0.0: {} emoji-regex@9.2.2: {} + empathic@2.0.0: {} + encodeurl@1.0.2: {} encodeurl@2.0.0: {} @@ -8404,12 +8586,18 @@ snapshots: '@expressive-code/plugin-shiki': 0.41.3 '@expressive-code/plugin-text-markers': 0.41.3 + exsolve@1.0.7: {} + extend@3.0.2: {} extendable-error@0.1.7: {} extsprintf@1.3.0: {} + fast-check@3.23.2: + dependencies: + pure-rand: 6.1.0 + fast-decode-uri-component@1.0.1: {} fast-deep-equal@3.1.3: {} @@ -8640,6 +8828,15 @@ snapshots: dependencies: assert-plus: 1.0.0 + giget@2.0.0: + dependencies: + citty: 0.1.6 + consola: 3.4.2 + defu: 6.1.4 + node-fetch-native: 1.6.7 + nypm: 0.6.2 + pathe: 2.0.3 + github-slugger@2.0.0: {} glob-parent@5.1.2: @@ -9998,6 +10195,14 @@ snapshots: dependencies: boolbase: 1.0.0 + nypm@0.6.2: + dependencies: + citty: 0.1.6 + consola: 3.4.2 + pathe: 2.0.3 + pkg-types: 2.3.0 + tinyexec: 1.0.1 + object-assign@4.1.1: {} object-inspect@1.13.4: {} @@ -10173,6 +10378,8 @@ snapshots: duplexify: 3.7.1 through2: 2.0.5 + perfect-debounce@1.0.0: {} + performance-now@2.1.0: {} picocolors@1.1.1: {} @@ -10226,6 +10433,12 @@ snapshots: pkce-challenge@5.0.0: {} + pkg-types@2.3.0: + dependencies: + confbox: 0.2.2 + exsolve: 1.0.7 + pathe: 2.0.3 + playwright-core@1.55.1: {} playwright-core@1.56.1: {} @@ -10280,6 +10493,15 @@ snapshots: dependencies: parse-ms: 4.0.0 + prisma@6.18.0(magicast@0.3.5)(typescript@5.9.3): + dependencies: + '@prisma/config': 6.18.0(magicast@0.3.5) + '@prisma/engines': 6.18.0 + optionalDependencies: + typescript: 5.9.3 + transitivePeerDependencies: + - magicast + prismjs@1.30.0: {} process-nextick-args@2.0.1: {} @@ -10326,6 +10548,8 @@ snapshots: punycode@2.3.1: {} + pure-rand@6.1.0: {} + qs@6.13.0: dependencies: side-channel: 1.1.0 @@ -10367,6 +10591,11 @@ snapshots: iconv-lite: 0.7.0 unpipe: 1.0.0 + rc9@2.1.2: + dependencies: + defu: 6.1.4 + destr: 2.0.5 + react-dom@19.1.0(react@19.1.0): dependencies: react: 19.1.0 @@ -11656,9 +11885,9 @@ snapshots: yoctocolors@2.1.2: {} - zod-to-json-schema@3.24.5(zod@3.25.62): + zod-to-json-schema@3.24.5(zod@3.25.76): dependencies: - zod: 3.25.62 + zod: 3.25.76 zod-to-json-schema@3.24.6(zod@3.25.76): dependencies: @@ -11669,8 +11898,6 @@ snapshots: typescript: 5.9.3 zod: 3.25.76 - zod@3.25.62: {} - zod@3.25.76: {} zwitch@2.0.4: {} diff --git a/pnpm-workspace.yaml b/pnpm-workspace.yaml index 23b9c60e..39187d3d 100644 --- a/pnpm-workspace.yaml +++ b/pnpm-workspace.yaml @@ -7,5 +7,9 @@ onlyBuiltDependencies: - '@tailwindcss/oxide' - esbuild - sharp + # used only in examples/memory + - '@prisma/client' + - '@prisma/engines' + - 'prisma' publishBranch: main