From 442b2990421af6c7bdb5065223bd691f3b3bbe01 Mon Sep 17 00:00:00 2001 From: James Grugett Date: Mon, 2 Mar 2026 22:27:26 -0800 Subject: [PATCH 001/749] Count tokens by open ai endpoint --- .../token-count/__tests__/token-count.test.ts | 479 ++++++++++++++++++ web/src/app/api/v1/token-count/_post.ts | 186 ++++++- 2 files changed, 657 insertions(+), 8 deletions(-) diff --git a/web/src/app/api/v1/token-count/__tests__/token-count.test.ts b/web/src/app/api/v1/token-count/__tests__/token-count.test.ts index 903521b91f..22c89bf640 100644 --- a/web/src/app/api/v1/token-count/__tests__/token-count.test.ts +++ b/web/src/app/api/v1/token-count/__tests__/token-count.test.ts @@ -3,6 +3,8 @@ import { describe, expect, it } from 'bun:test' import { convertContentToAnthropic, convertToAnthropicMessages, + convertToResponsesApiInput, + countTokensViaOpenAI, formatToolContent, } from '../_post' @@ -433,6 +435,483 @@ describe('convertToAnthropicMessages', () => { }) }) +describe('convertToResponsesApiInput', () => { + it('converts a simple user message', () => { + const result = convertToResponsesApiInput([ + { role: 'user', content: 'Hello world' }, + ]) + expect(result).toEqual([ + { type: 'message', role: 'user', content: 'Hello world' }, + ]) + }) + + it('maps system messages to developer role', () => { + const result = convertToResponsesApiInput([ + { role: 'system', content: 'You are helpful' }, + { role: 'user', content: 'Hi' }, + ]) + expect(result).toEqual([ + { type: 'message', role: 'developer', content: 'You are helpful' }, + { type: 'message', role: 'user', content: 'Hi' }, + ]) + }) + + it('converts tool messages to function_call_output', () => { + const result = convertToResponsesApiInput([ + { role: 'tool', toolCallId: 'call-1', content: 'File contents here' }, + ]) + expect(result).toEqual([ + { type: 'function_call_output', call_id: 'call-1', output: 'File contents here' }, + ]) + }) + + it('uses unknown call_id when toolCallId is missing', () => { + const result = convertToResponsesApiInput([ + { role: 'tool', content: 'Some output' }, + ]) + expect(result).toEqual([ + { type: 'function_call_output', call_id: 'unknown', output: 'Some output' }, + ]) + }) + + it('converts assistant messages', () => { + const result = convertToResponsesApiInput([ + { role: 'assistant', content: 'I can help with that.' }, + ]) + expect(result).toEqual([ + { type: 'message', role: 'assistant', content: 'I can help with that.' }, + ]) + }) + + it('handles array content with text parts', () => { + const result = convertToResponsesApiInput([ + { + role: 'user', + content: [{ type: 'text', text: 'What is TypeScript?' }], + }, + ]) + expect(result).toEqual([ + { type: 'message', role: 'user', content: 'What is TypeScript?' }, + ]) + }) + + it('converts tool-call content to function_call items', () => { + const result = convertToResponsesApiInput([ + { + role: 'assistant', + content: [ + { + type: 'tool-call', + toolCallId: 'call-1', + toolName: 'read_file', + input: { path: 'src/index.ts' }, + }, + ], + }, + ]) + expect(result).toEqual([ + { + type: 'function_call', + id: 'call-1', + name: 'read_file', + arguments: '{"path":"src/index.ts"}', + }, + ]) + }) + + it('splits assistant messages with text and tool-calls', () => { + const result = convertToResponsesApiInput([ + { + role: 'assistant', + content: [ + { type: 'text', text: 'Let me read that file.' }, + { + type: 'tool-call', + toolCallId: 'call-2', + toolName: 'read_file', + input: { path: 'test.ts' }, + }, + ], + }, + ]) + expect(result).toEqual([ + { type: 'message', role: 'assistant', content: 'Let me read that file.' }, + { + type: 'function_call', + id: 'call-2', + name: 'read_file', + arguments: '{"path":"test.ts"}', + }, + ]) + }) + + it('handles json content parts', () => { + const result = convertToResponsesApiInput([ + { + role: 'user', + content: [{ type: 'json', value: { key: 'value' } }], + }, + ]) + expect(result).toEqual([ + { type: 'message', role: 'user', content: '{"key":"value"}' }, + ]) + }) + + it('converts a multi-turn conversation', () => { + const result = convertToResponsesApiInput([ + { role: 'user', content: 'Hello' }, + { role: 'assistant', content: 'Hi there!' }, + { role: 'user', content: 'How are you?' }, + ]) + expect(result).toEqual([ + { type: 'message', role: 'user', content: 'Hello' }, + { type: 'message', role: 'assistant', content: 'Hi there!' }, + { type: 'message', role: 'user', content: 'How are you?' }, + ]) + }) + + describe('image handling', () => { + it('converts user message with URL image to content array', () => { + const result = convertToResponsesApiInput([ + { + role: 'user', + content: [ + { type: 'text', text: 'What is in this image?' }, + { + type: 'image', + image: 'https://example.com/photo.png', + }, + ], + }, + ]) + expect(result).toEqual([ + { + type: 'message', + role: 'user', + content: [ + { type: 'input_text', text: 'What is in this image?' }, + { type: 'input_image', image_url: 'https://example.com/photo.png' }, + ], + }, + ]) + }) + + it('converts base64 image to data: URI', () => { + const result = convertToResponsesApiInput([ + { + role: 'user', + content: [ + { type: 'text', text: 'Describe this' }, + { + type: 'image', + image: 'iVBORw0KGgoAAAANSUhEUg', + mediaType: 'image/png', + }, + ], + }, + ]) + expect(result).toEqual([ + { + type: 'message', + role: 'user', + content: [ + { type: 'input_text', text: 'Describe this' }, + { type: 'input_image', image_url: 'data:image/png;base64,iVBORw0KGgoAAAANSUhEUg' }, + ], + }, + ]) + }) + + it('uses default media type for base64 when not specified', () => { + const result = convertToResponsesApiInput([ + { + role: 'user', + content: [ + { + type: 'image', + image: 'base64data', + }, + ], + }, + ]) + expect(result).toEqual([ + { + type: 'message', + role: 'user', + content: [ + { type: 'input_image', image_url: 'data:image/png;base64,base64data' }, + ], + }, + ]) + }) + + it('passes through data: URIs as-is', () => { + const result = convertToResponsesApiInput([ + { + role: 'user', + content: [ + { + type: 'image', + image: 'data:image/jpeg;base64,/9j/4AAQ', + mediaType: 'image/jpeg', + }, + ], + }, + ]) + expect(result).toEqual([ + { + type: 'message', + role: 'user', + content: [ + { type: 'input_image', image_url: 'data:image/jpeg;base64,/9j/4AAQ' }, + ], + }, + ]) + }) + + it('handles http:// image URLs', () => { + const result = convertToResponsesApiInput([ + { + role: 'user', + content: [ + { + type: 'image', + image: 'http://example.com/image.jpg', + }, + ], + }, + ]) + expect(result).toEqual([ + { + type: 'message', + role: 'user', + content: [ + { type: 'input_image', image_url: 'http://example.com/image.jpg' }, + ], + }, + ]) + }) + + it('handles multiple images with text', () => { + const result = convertToResponsesApiInput([ + { + role: 'user', + content: [ + { type: 'text', text: 'Compare these images' }, + { type: 'image', image: 'https://example.com/a.png' }, + { type: 'image', image: 'https://example.com/b.png' }, + ], + }, + ]) + expect(result).toEqual([ + { + type: 'message', + role: 'user', + content: [ + { type: 'input_text', text: 'Compare these images' }, + { type: 'input_image', image_url: 'https://example.com/a.png' }, + { type: 'input_image', image_url: 'https://example.com/b.png' }, + ], + }, + ]) + }) + + it('skips images with missing image field', () => { + const result = convertToResponsesApiInput([ + { + role: 'user', + content: [ + { type: 'text', text: 'Hello' }, + { type: 'image' }, + ], + }, + ]) + expect(result).toEqual([ + { type: 'message', role: 'user', content: 'Hello' }, + ]) + }) + + it('skips images with empty string image field', () => { + const result = convertToResponsesApiInput([ + { + role: 'user', + content: [ + { type: 'text', text: 'Hello' }, + { type: 'image', image: '' }, + ], + }, + ]) + expect(result).toEqual([ + { type: 'message', role: 'user', content: 'Hello' }, + ]) + }) + + it('uses plain string content when no valid images are present', () => { + const result = convertToResponsesApiInput([ + { + role: 'user', + content: [ + { type: 'text', text: 'Just text' }, + { type: 'image' }, + ], + }, + ]) + expect(result).toEqual([ + { type: 'message', role: 'user', content: 'Just text' }, + ]) + }) + }) + + it('handles a full tool-use round trip', () => { + const result = convertToResponsesApiInput([ + { role: 'user', content: 'Read the file' }, + { + role: 'assistant', + content: [ + { + type: 'tool-call', + toolCallId: 'call-abc', + toolName: 'read_file', + input: { path: 'index.ts' }, + }, + ], + }, + { + role: 'tool', + toolCallId: 'call-abc', + content: 'console.log("hello")', + }, + { role: 'assistant', content: 'The file contains a log statement.' }, + ]) + expect(result).toEqual([ + { type: 'message', role: 'user', content: 'Read the file' }, + { + type: 'function_call', + id: 'call-abc', + name: 'read_file', + arguments: '{"path":"index.ts"}', + }, + { + type: 'function_call_output', + call_id: 'call-abc', + output: 'console.log("hello")', + }, + { + type: 'message', + role: 'assistant', + content: 'The file contains a log statement.', + }, + ]) + }) +}) + +describe('countTokensViaOpenAI', () => { + const mockLogger = { + info: () => {}, + error: () => {}, + warn: () => {}, + debug: () => {}, + } as any + + function createMockFetch(inputTokens: number) { + return (async () => + new Response(JSON.stringify({ object: 'response.input_tokens', input_tokens: inputTokens }), { + status: 200, + headers: { 'Content-Type': 'application/json' }, + })) as unknown as typeof globalThis.fetch + } + + it('returns token count from OpenAI API', async () => { + const result = await countTokensViaOpenAI({ + messages: [{ role: 'user', content: 'Hello world' }], + system: undefined, + model: 'openai/gpt-5.3-codex', + fetch: createMockFetch(42), + logger: mockLogger, + }) + expect(result).toBe(42) + }) + + it('passes system prompt as instructions', async () => { + let capturedBody: any + const mockFetch = async (_url: string, init: RequestInit) => { + capturedBody = JSON.parse(init.body as string) + return new Response( + JSON.stringify({ object: 'response.input_tokens', input_tokens: 10 }), + { status: 200, headers: { 'Content-Type': 'application/json' } }, + ) + } + + await countTokensViaOpenAI({ + messages: [{ role: 'user', content: 'Hi' }], + system: 'You are a helpful assistant.', + model: 'openai/gpt-5.3', + fetch: mockFetch as any, + logger: mockLogger, + }) + + expect(capturedBody.instructions).toBe('You are a helpful assistant.') + expect(capturedBody.model).toBe('gpt-5.3') + }) + + it('strips openai/ prefix from model', async () => { + let capturedBody: any + const mockFetch = async (_url: string, init: RequestInit) => { + capturedBody = JSON.parse(init.body as string) + return new Response( + JSON.stringify({ object: 'response.input_tokens', input_tokens: 5 }), + { status: 200, headers: { 'Content-Type': 'application/json' } }, + ) + } + + await countTokensViaOpenAI({ + messages: [{ role: 'user', content: 'Test' }], + system: undefined, + model: 'openai/gpt-5.3-codex', + fetch: mockFetch as any, + logger: mockLogger, + }) + + expect(capturedBody.model).toBe('gpt-5.3-codex') + }) + + it('omits instructions when system is undefined', async () => { + let capturedBody: any + const mockFetch = async (_url: string, init: RequestInit) => { + capturedBody = JSON.parse(init.body as string) + return new Response( + JSON.stringify({ object: 'response.input_tokens', input_tokens: 5 }), + { status: 200, headers: { 'Content-Type': 'application/json' } }, + ) + } + + await countTokensViaOpenAI({ + messages: [{ role: 'user', content: 'Test' }], + system: undefined, + model: 'openai/gpt-5.3', + fetch: mockFetch as any, + logger: mockLogger, + }) + + expect(capturedBody.instructions).toBeUndefined() + }) + + it('throws on API error', async () => { + const mockFetch = async () => + new Response('Internal Server Error', { status: 500 }) + + await expect( + countTokensViaOpenAI({ + messages: [{ role: 'user', content: 'Test' }], + system: undefined, + model: 'openai/gpt-5.3-codex', + fetch: mockFetch as any, + logger: mockLogger, + }), + ).rejects.toThrow('OpenAI API error: 500') + }) +}) + describe('formatToolContent', () => { it('returns string content as-is', () => { expect(formatToolContent('simple string')).toBe('simple string') diff --git a/web/src/app/api/v1/token-count/_post.ts b/web/src/app/api/v1/token-count/_post.ts index 9e2ce09cb1..616164ee39 100644 --- a/web/src/app/api/v1/token-count/_post.ts +++ b/web/src/app/api/v1/token-count/_post.ts @@ -1,4 +1,5 @@ import { AnalyticsEvent } from '@codebuff/common/constants/analytics-events' +import { isOpenAIProviderModel } from '@codebuff/common/constants/chatgpt-oauth' import { isClaudeModel, toAnthropicModelId, @@ -77,13 +78,16 @@ export async function postTokenCount(params: { const { messages, system, model } = bodyResult.data try { - const inputTokens = await countTokensViaAnthropic({ - messages, - system, - model, - fetch, - logger, - }) + const useOpenAI = model != null && isOpenAIProviderModel(model) + const inputTokens = useOpenAI + ? await countTokensViaOpenAI({ messages, system, model, fetch, logger }) + : await countTokensViaAnthropic({ + messages, + system, + model, + fetch, + logger, + }) logger.info({ userId, @@ -91,6 +95,7 @@ export async function postTokenCount(params: { hasSystem: !!system, model: model ?? DEFAULT_ANTHROPIC_MODEL, tokenCount: inputTokens, + provider: useOpenAI ? 'openai' : 'anthropic', }, `Token count: ${inputTokens}` ) @@ -99,7 +104,7 @@ export async function postTokenCount(params: { } catch (error) { logger.error( { error: getErrorObject(error), userId }, - 'Failed to count tokens via Anthropic API', + 'Failed to count tokens', ) return NextResponse.json( @@ -112,6 +117,171 @@ export async function postTokenCount(params: { // Buffer to add to token count for non-Anthropic models since tokenizers differ const NON_ANTHROPIC_TOKEN_BUFFER = 0.3 +export async function countTokensViaOpenAI(params: { + messages: TokenCountRequest['messages'] + system: string | undefined + model: string + fetch: typeof globalThis.fetch + logger: Logger +}): Promise { + const { messages, system, model, fetch, logger } = params + + const openaiModelId = model.startsWith('openai/') + ? model.slice('openai/'.length) + : model + + const input = convertToResponsesApiInput(messages) + + const response = await fetch( + 'https://api.openai.com/v1/responses/input_tokens', + { + method: 'POST', + headers: { + Authorization: `Bearer ${env.OPENAI_API_KEY}`, + 'Content-Type': 'application/json', + }, + body: JSON.stringify({ + model: openaiModelId, + input, + ...(system && { instructions: system }), + }), + }, + ) + + if (!response.ok) { + const errorText = await response.text() + logger.error( + { status: response.status, errorText, model }, + 'OpenAI token count API error', + ) + throw new Error(`OpenAI API error: ${response.status} - ${errorText}`) + } + + const data = await response.json() + return data.input_tokens +} + +export type ResponsesApiContentPart = + | { type: 'input_text'; text: string } + | { type: 'input_image'; image_url: string } + +export type ResponsesApiInputItem = + | { type: 'message'; role: 'user' | 'assistant' | 'developer'; content: string | ResponsesApiContentPart[] } + | { type: 'function_call'; id: string; name: string; arguments: string } + | { type: 'function_call_output'; call_id: string; output: string } + +export function convertToResponsesApiInput( + messages: TokenCountRequest['messages'], +): ResponsesApiInputItem[] { + const input: ResponsesApiInputItem[] = [] + + for (const message of messages) { + if (message.role === 'system') { + const content = buildMessageContent(message.content) + if (content) { + input.push({ type: 'message', role: 'developer', content }) + } + continue + } + + if (message.role === 'tool') { + input.push({ + type: 'function_call_output', + call_id: message.toolCallId ?? 'unknown', + output: formatToolContent(message.content), + }) + continue + } + + if (message.role === 'user') { + const content = buildMessageContent(message.content) + if (content) { + input.push({ type: 'message', role: 'user', content }) + } + continue + } + + if (message.role === 'assistant') { + const content = buildMessageContent(message.content) + if (content) { + input.push({ type: 'message', role: 'assistant', content }) + } + if (Array.isArray(message.content)) { + for (const part of message.content) { + if (part.type === 'tool-call') { + input.push({ + type: 'function_call', + id: part.toolCallId ?? 'unknown', + name: part.toolName, + arguments: JSON.stringify(part.input ?? {}), + }) + } + } + } + } + } + + return input +} + +function buildMessageContent( + content: unknown, +): string | ResponsesApiContentPart[] | null { + if (typeof content === 'string') return content || null + if (!Array.isArray(content)) { + const text = JSON.stringify(content) + return text || null + } + + const hasImages = content.some( + (part) => part.type === 'image' && typeof part.image === 'string' && part.image, + ) + + if (!hasImages) { + const text = extractTextParts(content) + return text || null + } + + const parts: ResponsesApiContentPart[] = [] + for (const part of content) { + if (part.type === 'text' && typeof part.text === 'string' && part.text) { + parts.push({ type: 'input_text', text: part.text }) + } else if (part.type === 'json') { + const text = typeof part.value === 'string' ? part.value : JSON.stringify(part.value) + if (text) { + parts.push({ type: 'input_text', text }) + } + } else if (part.type === 'image') { + const imageUrl = toImageUrl(part.image, part.mediaType) + if (imageUrl) { + parts.push({ type: 'input_image', image_url: imageUrl }) + } + } + } + + return parts.length > 0 ? parts : null +} + +function toImageUrl(image: unknown, mediaType?: string): string | null { + if (typeof image !== 'string' || !image) return null + if (image.startsWith('http://') || image.startsWith('https://') || image.startsWith('data:')) { + return image + } + return `data:${mediaType ?? 'image/png'};base64,${image}` +} + +function extractTextParts(content: Array>): string { + const parts: string[] = [] + for (const part of content) { + if (part.type === 'text' && typeof part.text === 'string') { + parts.push(part.text) + } else if (part.type === 'json') { + parts.push(typeof part.value === 'string' ? part.value : JSON.stringify(part.value)) + } + } + return parts.join('\n') +} + async function countTokensViaAnthropic(params: { messages: TokenCountRequest['messages'] system: string | undefined From 84166f379d08e874be742523fa1f1448623e1048 Mon Sep 17 00:00:00 2001 From: James Grugett Date: Tue, 3 Mar 2026 09:24:26 -0800 Subject: [PATCH 002/749] base-deep-evals --- agents/base2/base-deep-evals.ts | 8 ++++++ agents/base2/base-deep.ts | 48 +++++++++++++++++++-------------- evals/buffbench/main.ts | 2 +- 3 files changed, 37 insertions(+), 21 deletions(-) create mode 100644 agents/base2/base-deep-evals.ts diff --git a/agents/base2/base-deep-evals.ts b/agents/base2/base-deep-evals.ts new file mode 100644 index 0000000000..d51c4ed38e --- /dev/null +++ b/agents/base2/base-deep-evals.ts @@ -0,0 +1,8 @@ +import { createBaseDeep } from './base-deep' + +const definition = { + ...createBaseDeep({ noAskUser: true }), + id: 'base-deep-evals', + displayName: 'Buffy the Codex Evals Orchestrator', +} +export default definition diff --git a/agents/base2/base-deep.ts b/agents/base2/base-deep.ts index cb125813e6..ad9d1f4705 100644 --- a/agents/base2/base-deep.ts +++ b/agents/base2/base-deep.ts @@ -1,10 +1,13 @@ +import { buildArray } from '@codebuff/common/util/array' + import { publisher } from '../constants' import { PLACEHOLDER, type SecretAgentDefinition, } from '../types/secret-agent-definition' -const SYSTEM_PROMPT = `You are Buffy, a strategic assistant that orchestrates complex coding tasks through specialized sub-agents. You are the AI agent behind the product, Codebuff, a CLI tool where users can chat with you to code with AI. +function buildDeepSystemPrompt(noAskUser: boolean): string { + return `You are Buffy, a strategic assistant that orchestrates complex coding tasks through specialized sub-agents. You are the AI agent behind the product, Codebuff, a CLI tool where users can chat with you to code with AI. # Core Mandates @@ -14,8 +17,8 @@ const SYSTEM_PROMPT = `You are Buffy, a strategic assistant that orchestrates co - **Spawn mentioned agents:** If the user uses "@AgentName" in their message, you must spawn that agent. - **Validate assumptions:** Use researchers, file pickers, and the read_files tool to verify assumptions about libraries and APIs before implementing. - **Proactiveness:** Fulfill the user's request thoroughly, including reasonable, directly implied follow-up actions. -- **Confirm Ambiguity/Expansion:** Do not take significant actions beyond the clear scope of the request without confirming with the user. If asked *how* to do something, explain first, don't just do it. -- **Ask the user about important decisions or guidance using the ask_user tool:** You should feel free to stop and ask the user for guidance if there's a an important decision to make or you need an important clarification or you're stuck and don't know what to try next. Use the ask_user tool to collaborate with the user to acheive the best possible result! Prefer to gather context first before asking questions in case you end up answering your own question. +- **Confirm Ambiguity/Expansion:** Do not take significant actions beyond the clear scope of the request without confirming with the user. If asked *how* to do something, explain first, don't just do it.${noAskUser ? '' : ` +- **Ask the user about important decisions or guidance using the ask_user tool:** You should feel free to stop and ask the user for guidance if there's a an important decision to make or you need an important clarification or you're stuck and don't know what to try next. Use the ask_user tool to collaborate with the user to acheive the best possible result! Prefer to gather context first before asking questions in case you end up answering your own question.`} - **Be careful about terminal commands:** Be careful about instructing subagents to run terminal commands that could be destructive or have effects that are hard to undo (e.g. git push, git commit, running any scripts -- especially ones that could alter production environments (!), installing packages globally, etc). Don't run any of these effectful commands unless the user explicitly asks you to. - **Do what the user asks:** If the user asks you to do something, even running a risky terminal command, do it. @@ -96,8 +99,10 @@ The following is the state of the git repository at the start of the conversatio ${PLACEHOLDER.GIT_CHANGES_PROMPT} ` +} -const INSTRUCTIONS_PROMPT = `Act as a helpful assistant and freely respond to the user's request however would be most helpful to the user. Use your judgement to orchestrate the completion of the user's request using your specialized sub-agents and tools as needed. Take your time and be comprehensive. Don't surprise the user. For example, don't modify files if the user has not asked you to do so at least implicitly. +function buildDeepInstructionsPrompt(noAskUser: boolean): string { + return `Act as a helpful assistant and freely respond to the user's request however would be most helpful to the user. Use your judgement to orchestrate the completion of the user's request using your specialized sub-agents and tools as needed. Take your time and be comprehensive. Don't surprise the user. For example, don't modify files if the user has not asked you to do so at least implicitly. Follow this 7-phase workflow for implementation tasks. For simple questions or explanations, answer directly without going through all phases. @@ -138,7 +143,7 @@ Draft a spec first, then refine it with the user: - **Technical Approach**: How the implementation will work at a high level - **Files to Create/Modify**: List of files that will be touched - **Out of Scope**: Anything explicitly excluded - - The spec defines WHAT to build and WHY — it should NOT include detailed implementation steps or a plan. That belongs in Phase 3. + - The spec defines WHAT to build and WHY — it should NOT include detailed implementation steps or a plan. That belongs in Phase 3.${noAskUser ? '' : ` 3. Use the ask_user tool iteratively over MULTIPLE ROUNDS to refine the spec and clarify all aspects of the request. Ask ~2-5 focused questions per round. Continue until you have clarity on: - The exact scope and boundaries of the task - Key requirements and acceptance criteria @@ -148,13 +153,13 @@ Draft a spec first, then refine it with the user: - Any constraints or preferences on implementation approach 4. Between rounds, update SPEC.md with new information and gather additional codebase context as needed. 5. **Do NOT ask obvious questions.** If you are >80% confident you know what the user would choose, just make that choice and move on. Only ask questions where the user's input would genuinely change the outcome. -6. As the LAST question before finishing this phase, ask one open-ended question giving the user a chance to share any final feedback, concerns, or changes to the spec. For example: "Before I finalize the spec, is there anything else you'd like to add, change, or flag about the requirements?" -7. Iteratively critique the spec: +6. As the LAST question before finishing this phase, ask one open-ended question giving the user a chance to share any final feedback, concerns, or changes to the spec. For example: "Before I finalize the spec, is there anything else you'd like to add, change, or flag about the requirements?"`} +${noAskUser ? '3' : '7'}. Iteratively critique the spec: a. Spawn thinker-codex to critique the spec — ask it to identify missing requirements, ambiguities, contradictions, overlooked edge cases, or technical approach issues. b. If the thinker raises valid critiques, update SPEC.md to address them. c. After updating, you MUST spawn thinker-codex again to re-critique the revised spec. d. Repeat until the thinker finds no new substantive critiques. Do NOT skip the re-critique — every revision must be verified. -8. Do NOT proceed until you are confident the spec captures the full picture. +${noAskUser ? '4' : '8'}. Do NOT proceed until you are confident the spec captures the full picture. ## Phase 3 — Plan @@ -231,8 +236,8 @@ Capture learnings for future sessions: a. Spawn thinker-codex to critique your LESSONS.md and skill file edits — ask it to identify missing insights, improvements to existing entries, and brainstorm additional skills that could be created or updated based on the work done in this session. b. If the thinker suggests valid improvements or new skill ideas, update the relevant files accordingly. c. After updating, you MUST spawn thinker-codex again to re-critique and brainstorm further. - d. Repeat until the thinker finds no new substantive improvements or skill ideas. Do NOT skip the re-critique — every revision must be verified. -4. Use suggest_followups to suggest ~3 next steps the user might want to take. + d. Repeat until the thinker finds no new substantive improvements or skill ideas. Do NOT skip the re-critique — every revision must be verified.${noAskUser ? '' : ` +4. Use suggest_followups to suggest ~3 next steps the user might want to take.`} Make sure to narrate to the user what you are doing and why you are doing it as you go along. Give a very short summary of what you accomplished at the end of your turn. @@ -240,10 +245,13 @@ Make sure to narrate to the user what you are doing and why you are doing it as If the full 7-phase workflow has already been completed in this conversation and the user is asking for a followup change (e.g. "also add X" or "tweak Y"), you do NOT need to repeat the entire workflow. Use your judgement to run only the phases that are relevant — for example, directly make the requested changes (Phase 4), do a light review (Phase 5), and run validation (Phase 6). Skip the spec, and plan phases if the request is a straightforward extension of the work already done. Still update LESSONS.md and skills if you learn anything new. ` +} -export function createBaseDeep(): SecretAgentDefinition { +export function createBaseDeep(options?: { + noAskUser?: boolean +}): Omit { + const { noAskUser = false } = options ?? {} return { - id: 'base-deep', publisher, model: 'openai/gpt-5.3-codex', displayName: 'Buffy the Codex Orchestrator', @@ -266,18 +274,18 @@ export function createBaseDeep(): SecretAgentDefinition { }, outputMode: 'last_message', includeMessageHistory: true, - toolNames: [ + toolNames: buildArray( 'spawn_agents', 'read_files', 'read_subtree', - 'suggest_followups', + !noAskUser && 'suggest_followups', 'apply_patch', 'write_file', 'write_todos', - 'ask_user', + !noAskUser && 'ask_user', 'skill', 'set_output', - ], + ), spawnableAgents: [ 'file-picker', 'code-searcher', @@ -291,13 +299,13 @@ export function createBaseDeep(): SecretAgentDefinition { 'gpt-5-agent', 'context-pruner', ], - systemPrompt: SYSTEM_PROMPT, - instructionsPrompt: INSTRUCTIONS_PROMPT, + systemPrompt: buildDeepSystemPrompt(noAskUser), + instructionsPrompt: buildDeepInstructionsPrompt(noAskUser), stepPrompt: `Workflow phases reminder (7 phases): **Planning todos** (write at start): Phase 1 → Phase 2 → Phase 3 1. Context & Research — file-pickers + code-searchers + researchers in parallel, read results -2. Spec — draft SPEC.md, iterative ask_user to refine (skip obvious Qs), open-ended final Q, thinker-codex critique loop +2. Spec — draft SPEC.md, ${noAskUser ? '' : 'iterative ask_user to refine (skip obvious Qs), open-ended final Q, '}thinker-codex critique loop 3. Plan — write PLAN.md, thinker-codex critique loop **Implementation todos** (write after Plan): one todo per plan step + phases 5-7 @@ -326,5 +334,5 @@ export function createBaseDeep(): SecretAgentDefinition { } } -const definition = createBaseDeep() +const definition = { ...createBaseDeep(), id: 'base-deep' } export default definition diff --git a/evals/buffbench/main.ts b/evals/buffbench/main.ts index 7f22cd2c10..5c23fb980b 100644 --- a/evals/buffbench/main.ts +++ b/evals/buffbench/main.ts @@ -8,7 +8,7 @@ async function main() { // Use 'external:codex' for OpenAI Codex CLI await runBuffBench({ evalDataPaths: [path.join(__dirname, 'eval-codebuff.json')], - agents: ['base-deep'], + agents: ['base-deep-evals'], taskConcurrency: 5, }) From 82ab4ea718d623309cc57c6146014678111766de Mon Sep 17 00:00:00 2001 From: James Grugett Date: Tue, 3 Mar 2026 09:28:10 -0800 Subject: [PATCH 003/749] Add no learning param --- agents/base2/base-deep-evals.ts | 2 +- agents/base2/base-deep.ts | 40 +++++++++++++++++---------------- 2 files changed, 22 insertions(+), 20 deletions(-) diff --git a/agents/base2/base-deep-evals.ts b/agents/base2/base-deep-evals.ts index d51c4ed38e..ce458d71ec 100644 --- a/agents/base2/base-deep-evals.ts +++ b/agents/base2/base-deep-evals.ts @@ -1,7 +1,7 @@ import { createBaseDeep } from './base-deep' const definition = { - ...createBaseDeep({ noAskUser: true }), + ...createBaseDeep({ noAskUser: true, noLearning: true }), id: 'base-deep-evals', displayName: 'Buffy the Codex Evals Orchestrator', } diff --git a/agents/base2/base-deep.ts b/agents/base2/base-deep.ts index ad9d1f4705..9b3d7e1484 100644 --- a/agents/base2/base-deep.ts +++ b/agents/base2/base-deep.ts @@ -6,7 +6,7 @@ import { type SecretAgentDefinition, } from '../types/secret-agent-definition' -function buildDeepSystemPrompt(noAskUser: boolean): string { +function buildDeepSystemPrompt(noAskUser: boolean, noLearning: boolean): string { return `You are Buffy, a strategic assistant that orchestrates complex coding tasks through specialized sub-agents. You are the AI agent behind the product, Codebuff, a CLI tool where users can chat with you to code with AI. # Core Mandates @@ -70,9 +70,9 @@ For other questions, you can direct them to codebuff.com, or especially codebuff [ Phase 5 — Review Loop: You spawn code-reviewer-codex, fix any issues found, and re-run the reviewer until no new issues are found ] -[ Phase 6 — Validate: You run unit tests, add new tests, fix failures, and attempt E2E verification by running the application ] +[ Phase 6 — Validate: You run unit tests, add new tests, fix failures, and attempt E2E verification by running the application ]${noLearning ? '' : ` -[ Phase 7 — Lessons: You write LESSONS.md in the session directory and update/create skill files with key learnings ] +[ Phase 7 — Lessons: You write LESSONS.md in the session directory and update/create skill files with key learnings ]`} @@ -101,10 +101,11 @@ ${PLACEHOLDER.GIT_CHANGES_PROMPT} ` } -function buildDeepInstructionsPrompt(noAskUser: boolean): string { +function buildDeepInstructionsPrompt(noAskUser: boolean, noLearning: boolean): string { + const totalPhases = noLearning ? 6 : 7 return `Act as a helpful assistant and freely respond to the user's request however would be most helpful to the user. Use your judgement to orchestrate the completion of the user's request using your specialized sub-agents and tools as needed. Take your time and be comprehensive. Don't surprise the user. For example, don't modify files if the user has not asked you to do so at least implicitly. -Follow this 7-phase workflow for implementation tasks. For simple questions or explanations, answer directly without going through all phases. +Follow this ${totalPhases}-phase workflow for implementation tasks. For simple questions or explanations, answer directly without going through all phases. ## Two-Phase Todo Tracking @@ -119,8 +120,8 @@ These help the user understand what's about to happen before any code is written **Implementation todos** — Write these AFTER Phase 3 (Plan) is complete, replacing the planning todos: - One todo per implementation step from the finalized PLAN.md - Phase 5: Review loop -- Phase 6: Validate changes -- Phase 7: Capture lessons & update skills +- Phase 6: Validate changes${noLearning ? '' : ` +- Phase 7: Capture lessons & update skills`} Update these as you complete each step during implementation. ## Phase 1 — Codebase Context & Research @@ -174,7 +175,7 @@ Create a detailed implementation plan, iteratively critique it, and save it alon b. If the thinker raises valid critiques, update PLAN.md to address them. c. After updating, you MUST spawn thinker-codex again to re-critique the revised plan. d. Repeat until the thinker finds no new substantive critiques. Do NOT skip the re-critique — every revision must be verified. -3. Write implementation todos (the second phase of todos) — one todo per plan step, plus todos for phases 5-7. +3. Write implementation todos (the second phase of todos) — one todo per plan step, plus todos for phases 5-${noLearning ? '6' : '7'}. ## Phase 4 — Implement @@ -205,7 +206,7 @@ Thoroughly validate the changes: - For a CLI tool: run it with relevant arguments - For a library: write and run a small integration script - For config/infra changes: validate the configuration is correct -4. If E2E verification reveals issues, fix them and re-validate. +4. If E2E verification reveals issues, fix them and re-validate.${noLearning ? '' : ` ## Phase 7 — Lessons @@ -236,21 +237,22 @@ Capture learnings for future sessions: a. Spawn thinker-codex to critique your LESSONS.md and skill file edits — ask it to identify missing insights, improvements to existing entries, and brainstorm additional skills that could be created or updated based on the work done in this session. b. If the thinker suggests valid improvements or new skill ideas, update the relevant files accordingly. c. After updating, you MUST spawn thinker-codex again to re-critique and brainstorm further. - d. Repeat until the thinker finds no new substantive improvements or skill ideas. Do NOT skip the re-critique — every revision must be verified.${noAskUser ? '' : ` -4. Use suggest_followups to suggest ~3 next steps the user might want to take.`} + d. Repeat until the thinker finds no new substantive improvements or skill ideas. Do NOT skip the re-critique — every revision must be verified.`}${noAskUser ? '' : ` +${noLearning ? '1' : '4'}. Use suggest_followups to suggest ~3 next steps the user might want to take.`} Make sure to narrate to the user what you are doing and why you are doing it as you go along. Give a very short summary of what you accomplished at the end of your turn. ## Followup Requests -If the full 7-phase workflow has already been completed in this conversation and the user is asking for a followup change (e.g. "also add X" or "tweak Y"), you do NOT need to repeat the entire workflow. Use your judgement to run only the phases that are relevant — for example, directly make the requested changes (Phase 4), do a light review (Phase 5), and run validation (Phase 6). Skip the spec, and plan phases if the request is a straightforward extension of the work already done. Still update LESSONS.md and skills if you learn anything new. +If the full ${totalPhases}-phase workflow has already been completed in this conversation and the user is asking for a followup change (e.g. "also add X" or "tweak Y"), you do NOT need to repeat the entire workflow. Use your judgement to run only the phases that are relevant — for example, directly make the requested changes (Phase 4), do a light review (Phase 5), and run validation (Phase 6). Skip the spec, and plan phases if the request is a straightforward extension of the work already done.${noLearning ? '' : ' Still update LESSONS.md and skills if you learn anything new.'} ` } export function createBaseDeep(options?: { noAskUser?: boolean + noLearning?: boolean }): Omit { - const { noAskUser = false } = options ?? {} + const { noAskUser = false, noLearning = false } = options ?? {} return { publisher, model: 'openai/gpt-5.3-codex', @@ -299,20 +301,20 @@ export function createBaseDeep(options?: { 'gpt-5-agent', 'context-pruner', ], - systemPrompt: buildDeepSystemPrompt(noAskUser), - instructionsPrompt: buildDeepInstructionsPrompt(noAskUser), - stepPrompt: `Workflow phases reminder (7 phases): + systemPrompt: buildDeepSystemPrompt(noAskUser, noLearning), + instructionsPrompt: buildDeepInstructionsPrompt(noAskUser, noLearning), + stepPrompt: `Workflow phases reminder (${noLearning ? 6 : 7} phases): **Planning todos** (write at start): Phase 1 → Phase 2 → Phase 3 1. Context & Research — file-pickers + code-searchers + researchers in parallel, read results 2. Spec — draft SPEC.md, ${noAskUser ? '' : 'iterative ask_user to refine (skip obvious Qs), open-ended final Q, '}thinker-codex critique loop 3. Plan — write PLAN.md, thinker-codex critique loop -**Implementation todos** (write after Plan): one todo per plan step + phases 5-7 +**Implementation todos** (write after Plan): one todo per plan step + phases 5-${noLearning ? '6' : '7'} 4. Implement — fully build the spec using file editing tools 5. Review Loop — code-reviewer-codex → fix → re-review until clean -6. Validate — run tests + typechecks, add new tests, do E2E verification -7. Lessons — write LESSONS.md, update/create skills, iterative thinker-codex brainstorm loop`, +6. Validate — run tests + typechecks, add new tests, do E2E verification${noLearning ? '' : ` +7. Lessons — write LESSONS.md, update/create skills, iterative thinker-codex brainstorm loop`}`, handleSteps: function* ({ params }) { while (true) { // Run context-pruner before each step. From ef06634a03df209fb9fe570e4e28001354ce8b12 Mon Sep 17 00:00:00 2001 From: James Grugett Date: Tue, 3 Mar 2026 09:29:10 -0800 Subject: [PATCH 004/749] turn off openai token count for now --- web/src/app/api/v1/token-count/_post.ts | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/web/src/app/api/v1/token-count/_post.ts b/web/src/app/api/v1/token-count/_post.ts index 616164ee39..ceb3d71e4a 100644 --- a/web/src/app/api/v1/token-count/_post.ts +++ b/web/src/app/api/v1/token-count/_post.ts @@ -1,5 +1,4 @@ import { AnalyticsEvent } from '@codebuff/common/constants/analytics-events' -import { isOpenAIProviderModel } from '@codebuff/common/constants/chatgpt-oauth' import { isClaudeModel, toAnthropicModelId, @@ -78,7 +77,7 @@ export async function postTokenCount(params: { const { messages, system, model } = bodyResult.data try { - const useOpenAI = model != null && isOpenAIProviderModel(model) + const useOpenAI = model != null && false // isOpenAIProviderModel(model) const inputTokens = useOpenAI ? await countTokensViaOpenAI({ messages, system, model, fetch, logger }) : await countTokensViaAnthropic({ From 9e9f788948b65c562c0ec76a12a1167c40145dcb Mon Sep 17 00:00:00 2001 From: layla <111667698+04cb@users.noreply.github.com> Date: Wed, 4 Mar 2026 02:51:41 +0800 Subject: [PATCH 005/749] Fix docs: align markdown table in knowledge-files.mdx (#449) --- web/src/content/tips/knowledge-files.mdx | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/web/src/content/tips/knowledge-files.mdx b/web/src/content/tips/knowledge-files.mdx index 5d20178d26..64df4be714 100644 --- a/web/src/content/tips/knowledge-files.mdx +++ b/web/src/content/tips/knowledge-files.mdx @@ -107,12 +107,12 @@ Then add your global preferences: ### When to Use Home Directory vs Project Knowledge Files -| Home Directory (`~/.knowledge.md`) | Project (`knowledge.md`) | -|-----------------------------------|------------------------------------| -| Personal coding preferences | Project-specific conventions | -| Preferred frameworks/tools | Architecture decisions | -| Communication style | Build and deploy commands | -| Global defaults | Team coding standards | +| Home Directory (`~/.knowledge.md`) | Project (`knowledge.md`) | +|-----------------------------------|-----------------------------| +| Personal coding preferences | Project-specific conventions | +| Preferred frameworks/tools | Architecture decisions | +| Communication style | Build and deploy commands | +| Global defaults | Team coding standards | Both files are loaded—project knowledge files add to (and can override) your home directory preferences. From 5d8d3cd8a4b236e67d9f861f0dea9200987538f8 Mon Sep 17 00:00:00 2001 From: James Grugett Date: Tue, 3 Mar 2026 11:04:37 -0800 Subject: [PATCH 006/749] .md files from run --- .../03-03-09:09-add-console-log/LESSONS.md | 15 +++++++++++ .../03-03-09:09-add-console-log/PLAN.md | 16 ++++++++++++ .../03-03-09:09-add-console-log/SPEC.md | 25 +++++++++++++++++++ .agents/skills/meta/SKILL.md | 10 ++++++++ 4 files changed, 66 insertions(+) create mode 100644 .agents/sessions/03-03-09:09-add-console-log/LESSONS.md create mode 100644 .agents/sessions/03-03-09:09-add-console-log/PLAN.md create mode 100644 .agents/sessions/03-03-09:09-add-console-log/SPEC.md create mode 100644 .agents/skills/meta/SKILL.md diff --git a/.agents/sessions/03-03-09:09-add-console-log/LESSONS.md b/.agents/sessions/03-03-09:09-add-console-log/LESSONS.md new file mode 100644 index 0000000000..271cfead5b --- /dev/null +++ b/.agents/sessions/03-03-09:09-add-console-log/LESSONS.md @@ -0,0 +1,15 @@ +# LESSONS + +## What went well +- `git diff -- cli/src/index.tsx` immediately after editing made it easy to enforce exact scope for a one-line change. +- Validating with `bun run cli/src/index.tsx --help` gave a quick, non-effectful end-to-end check that startup output works. + +## What was tricky +- Bun script invocation shape from repo root was easy to misremember: `bun --cwd cli run typecheck` failed, while `bun run --cwd cli typecheck` succeeded. + +## Useful patterns +- Entrypoint logs placed at the top of `main()` apply to all command paths that enter `main()`; verify with a non-interactive path first. +- For tiny requests, combine: (1) minimal code edit, (2) scoped diff check, (3) one runtime smoke check, (4) one typecheck. + +## Future efficiency notes +- Put exact validation commands directly in `PLAN.md` to avoid command-syntax backtracking during validation. diff --git a/.agents/sessions/03-03-09:09-add-console-log/PLAN.md b/.agents/sessions/03-03-09:09-add-console-log/PLAN.md new file mode 100644 index 0000000000..5b27b95678 --- /dev/null +++ b/.agents/sessions/03-03-09:09-add-console-log/PLAN.md @@ -0,0 +1,16 @@ +# PLAN + +## Implementation Steps +1. Update `cli/src/index.tsx` by adding `console.log('Codebuff CLI starting')` as the first statement in `main()`. +2. Inspect the diff to confirm scope: exactly one new `console.log` line in `cli/src/index.tsx` and no unintended edits. +3. Run lightweight validation for CLI startup behavior: + - Run a non-interactive path (`--help`) and confirm the line appears once. + - Confirm the log sits before command branching in `main()` so it applies to all `main()` paths. + +## Dependencies / Ordering +- Step 1 must happen before Step 2 and Step 3. +- Step 2 should complete before Step 3 to ensure we validate the intended change only. + +## Risk Areas +- Low risk overall. +- Minor UX risk: the new stdout line appears for all command paths entering `main()` (including `--help`, `login`, and `publish`). This is intentional per spec. diff --git a/.agents/sessions/03-03-09:09-add-console-log/SPEC.md b/.agents/sessions/03-03-09:09-add-console-log/SPEC.md new file mode 100644 index 0000000000..69d397f76c --- /dev/null +++ b/.agents/sessions/03-03-09:09-add-console-log/SPEC.md @@ -0,0 +1,25 @@ +# SPEC + +## Overview +Add a single startup `console.log` to the CLI entrypoint so there is explicit stdout output when the CLI boots. + +## Requirements +1. Modify `cli/src/index.tsx` only for functional code changes. +2. Add exactly one `console.log(...)` statement. +3. Place the log at the start of `main()`. +4. Use a static message string (no timestamp or dynamic args). Chosen message: `Codebuff CLI starting`. +5. The log should print for any execution path that enters `main()` (including normal startup and command modes like `login`/`publish`). +6. Keep all existing behavior unchanged aside from the added stdout line. + +## Technical Approach +Insert one `console.log('Codebuff CLI starting')` call as the first statement inside `main()` so it prints once per process run before the rest of startup flow proceeds. + +## Files to Create/Modify +- `cli/src/index.tsx` (modify) +- `.agents/sessions/03-03-09:09-add-console-log/SPEC.md` (this spec) + +## Out of Scope +- Replacing existing logger usage with `console.log` +- Adding additional logs +- Refactoring startup flow or command handling +- Any server/web/API changes diff --git a/.agents/skills/meta/SKILL.md b/.agents/skills/meta/SKILL.md new file mode 100644 index 0000000000..7dd06229d2 --- /dev/null +++ b/.agents/skills/meta/SKILL.md @@ -0,0 +1,10 @@ +--- +name: meta +description: Broad project-level implementation and validation heuristics +--- + +# Meta + +- When validating CLI changes, run a non-effectful command path first (for example `--help`) before any command that could trigger external side effects. (from .agents/sessions/03-03-09:09-add-console-log) +- For tightly scoped edits, pair runtime smoke-checks with `git diff -- ` to verify no unintended spillover. (from .agents/sessions/03-03-09:09-add-console-log) +- From monorepo root, run workspace scripts as `bun run --cwd + + +
Top navigation should disappear
+
+
+

Important Answer

+

The web researcher should see this useful paragraph.

+

React 19 useActionState returns state, a form action, and pending state.

+
+
+
Footer boilerplate should disappear
+ + + `) + + expect('errorMessage' in result).toBe(false) + if ('errorMessage' in result) return + + expect(result.title).toBe('Research Source') + expect(result.description).toBe('A concise source description.') + expect(result.text).toContain('Important Answer') + expect(result.text).toContain('useActionState returns state') + expect(result.text).not.toContain('.unused-') + expect(result.text).not.toContain('Top navigation') + }) + + it('prefers article content over a larger page main area', async () => { + const result = await successValue(` + + Repository Page + +
+
+

Folders and files

+ ${Array.from( + { length: 40 }, + (_, index) => `file-${index}.ts`, + ).join('')} +
+
+

Project README

+

This is the source content the researcher needs.

+
+
+ + + `) + + expect('errorMessage' in result).toBe(false) + if ('errorMessage' in result) return + + expect(result.text).toContain('Project README') + expect(result.text).toContain('source content') + expect(result.text).not.toContain('Folders and files') + expect(result.text).not.toContain('file-39.ts') + }) + + it('does not add spaces between syntax-highlighted code tokens', async () => { + const result = await successValue(` +
+
const answer=42;
+
+ `) + + expect('errorMessage' in result).toBe(false) + if ('errorMessage' in result) return + + expect(result.text).toContain('const answer=42;') + }) + + it('leaves invalid numeric HTML entities unchanged', async () => { + const result = await successValue( + '

Bad entity: �

', + ) + + expect('errorMessage' in result).toBe(false) + if ('errorMessage' in result) return + + expect(result.text).toContain('Bad entity: �') + }) + + it('rejects non-http URLs', async () => { + const result = await readUrl({ + url: 'file:///etc/passwd', + fetch: async () => { + throw new Error('fetch should not be called') + }, + }) + + expect(result[0].value).toEqual({ + url: 'file:///etc/passwd', + errorMessage: 'Only http:// and https:// URLs are supported', + }) + }) + + it('rejects non-http URLs at the tool schema boundary', () => { + expect(() => + clientToolCallSchema.parse({ + toolName: 'read_url', + input: { url: 'file:///etc/passwd' }, + }), + ).toThrow() + }) + + it('truncates extracted text to max_chars', async () => { + const result = await readUrl({ + url: 'https://example.com/long', + max_chars: 1_000, + fetch: async () => + new Response(`

${'word '.repeat(1_000)}

`, { + status: 200, + headers: { 'content-type': 'text/html' }, + }), + }) + const value = result[0].value + + expect('errorMessage' in value).toBe(false) + if ('errorMessage' in value) return + + expect(value.truncated).toBe(true) + expect(value.text.length).toBeLessThanOrEqual(1_030) + expect(value.text).toContain('[Content truncated]') + }) + + it('returns pretty-printed JSON for JSON responses', async () => { + const result = await successValue('{"name":"Codebuff","answer":42}', { + contentType: 'application/json', + }) + + expect('errorMessage' in result).toBe(false) + if ('errorMessage' in result) return + + expect(result.text).toContain('"name": "Codebuff"') + expect(result.text).toContain('"answer": 42') + }) + + it('supports vendor JSON content types', async () => { + const result = await successValue('{"type":"metadata"}', { + contentType: 'application/ld+json', + }) + + expect('errorMessage' in result).toBe(false) + if ('errorMessage' in result) return + + expect(result.text).toContain('"type": "metadata"') + }) + + it('extracts markdown frontmatter into metadata and omits it from text', async () => { + const result = await successValue( + [ + '---', + 'title: "Readable Docs"', + "description: 'A useful docs page'", + '---', + '# First Heading', + 'Body with · entity.', + ].join('\n'), + { + contentType: 'text/markdown; charset=utf-8', + }, + ) + + expect('errorMessage' in result).toBe(false) + if ('errorMessage' in result) return + + expect(result.title).toBe('Readable Docs') + expect(result.description).toBe('A useful docs page') + expect(result.text.startsWith('# First Heading')).toBe(true) + expect(result.text).toContain('Body with * entity.') + expect(result.text).not.toContain('title:') + }) + + it('supports CRLF markdown frontmatter', async () => { + const result = await successValue( + '---\r\ntitle: CRLF Docs\r\n---\r\n# Body', + { + contentType: 'text/markdown; charset=utf-8', + }, + ) + + expect('errorMessage' in result).toBe(false) + if ('errorMessage' in result) return + + expect(result.title).toBe('CRLF Docs') + expect(result.text).toBe('# Body') + }) +}) diff --git a/sdk/src/__tests__/researcher-web.integration.test.ts b/sdk/src/__tests__/researcher-web.integration.test.ts index d35498bec4..a5e981654a 100644 --- a/sdk/src/__tests__/researcher-web.integration.test.ts +++ b/sdk/src/__tests__/researcher-web.integration.test.ts @@ -63,6 +63,72 @@ function extractOutputText(output: AgentOutput): string { return assistantText.join('\n') } +function summarizeToolTrace(events: PrintModeEvent[]): { + readUrlCount: number + lines: string[] +} { + const lines: string[] = [] + let readUrlCount = 0 + + for (const event of events) { + if (event.type === 'tool_call') { + if (event.toolName === 'web_search') { + lines.push(`tool_call web_search query=${event.input.query}`) + } else if (event.toolName === 'read_url') { + readUrlCount += 1 + lines.push(`tool_call read_url url=${event.input.url}`) + } else { + lines.push(`tool_call ${event.toolName}`) + } + continue + } + + if (event.type !== 'tool_result') continue + + const output = event.output[0] + const value = output?.type === 'json' ? output.value : undefined + if (!value || typeof value !== 'object') { + lines.push(`tool_result ${event.toolName} empty`) + continue + } + + if (event.toolName === 'read_url') { + const result = value as { + url?: string + finalUrl?: string + status?: number + title?: string + text?: string + truncated?: boolean + errorMessage?: string + } + if (result.errorMessage) { + lines.push(`tool_result read_url error=${result.errorMessage}`) + } else { + lines.push( + [ + 'tool_result read_url', + `status=${result.status}`, + `finalUrl=${result.finalUrl}`, + `title=${JSON.stringify(result.title ?? '')}`, + `textChars=${result.text?.length ?? 0}`, + `truncated=${result.truncated ?? false}`, + ].join(' '), + ) + } + } else if (event.toolName === 'web_search') { + const result = value as { result?: string; errorMessage?: string } + lines.push( + result.errorMessage + ? `tool_result web_search error=${result.errorMessage}` + : `tool_result web_search chars=${result.result?.length ?? 0}`, + ) + } + } + + return { readUrlCount, lines } +} + describe('researcher-web SDK integration', () => { it( `runs researcher-web through the SDK and answers with ${EXPECTED_KEYWORD}`, @@ -98,13 +164,21 @@ describe('researcher-web SDK integration', () => { }, prompt: [ 'Use web search to answer this React docs question.', - 'After searching, fetch the most relevant React docs page with run_terminal_command before answering.', + 'After searching, fetch the most relevant React docs page with read_url before answering.', 'In React 19, which hook returns state, a form action, and an isPending value for form actions?', 'Answer with the exact hook name and one short sentence.', ].join(' '), }) const outputText = extractOutputText(result.output) + const trace = summarizeToolTrace(events) + console.log( + [ + 'researcher-web SDK trace:', + ...trace.lines.map((line) => ` ${line}`), + `read_url fetch count: ${trace.readUrlCount}`, + ].join('\n'), + ) console.log('researcher-web SDK output:', outputText) expect(result.output.type).not.toBe('error') @@ -119,8 +193,7 @@ describe('researcher-web SDK integration', () => { expect( events.some( (event) => - event.type === 'tool_call' && - event.toolName === 'run_terminal_command', + event.type === 'tool_call' && event.toolName === 'read_url', ), ).toBe(true) }, diff --git a/sdk/src/run.ts b/sdk/src/run.ts index b492443c39..4014e85449 100644 --- a/sdk/src/run.ts +++ b/sdk/src/run.ts @@ -29,6 +29,7 @@ import { glob } from './tools/glob' import { listDirectory } from './tools/list-directory' import { getProjectPathLookupKeys } from './tools/path-utils' import { getFiles } from './tools/read-files' +import { readUrl } from './tools/read-url' import { runTerminalCommand } from './tools/run-terminal-command' import type { CustomToolDefinition } from './custom-tool' @@ -704,6 +705,8 @@ async function handleToolCall({ cwd: path.resolve(resolvedCwd, input.cwd ?? '.'), env, } as Parameters[0]) + } else if (toolName === 'read_url') { + result = await readUrl(input as Parameters[0]) } else if (toolName === 'code_search') { result = await codeSearch({ projectPath: requireCwd(cwd, 'code_search'), diff --git a/sdk/src/tools/read-url.ts b/sdk/src/tools/read-url.ts new file mode 100644 index 0000000000..9bd5c89f86 --- /dev/null +++ b/sdk/src/tools/read-url.ts @@ -0,0 +1,413 @@ +import type { CodebuffToolOutput } from '../../../common/src/tools/list' + +const DEFAULT_MAX_CHARS = 20_000 +const MAX_RESPONSE_BYTES = 2_000_000 +const FETCH_TIMEOUT_MS = 20_000 +const USER_AGENT = + 'Mozilla/5.0 (compatible; CodebuffResearchBot/1.0; +https://codebuff.com)' + +type ReadUrlOutput = CodebuffToolOutput<'read_url'> +type FetchLike = ( + input: string | URL | Request, + init?: RequestInit, +) => Promise + +function errorResult( + url: string | undefined, + errorMessage: string, +): ReadUrlOutput { + return [{ type: 'json', value: { ...(url ? { url } : {}), errorMessage } }] +} + +function isAllowedUrl(url: URL): boolean { + return url.protocol === 'http:' || url.protocol === 'https:' +} + +function getHeader(headers: Headers, name: string): string | undefined { + return headers.get(name) ?? undefined +} + +async function readResponseBody( + response: Response, + maxBytes: number, +): Promise { + const contentLength = getHeader(response.headers, 'content-length') + if (contentLength && Number(contentLength) > maxBytes) { + throw new Error(`Response is too large (${contentLength} bytes)`) + } + + if (!response.body) { + const buffer = await response.arrayBuffer() + if (buffer.byteLength > maxBytes) { + throw new Error(`Response is too large (${buffer.byteLength} bytes)`) + } + return new TextDecoder().decode(buffer) + } + + const reader = response.body.getReader() + const chunks: Uint8Array[] = [] + let totalBytes = 0 + + while (true) { + const { done, value } = await reader.read() + if (done) break + if (!value) continue + + totalBytes += value.byteLength + if (totalBytes > maxBytes) { + await reader.cancel() + throw new Error(`Response exceeded ${maxBytes} bytes`) + } + chunks.push(value) + } + + const body = new Uint8Array(totalBytes) + let offset = 0 + for (const chunk of chunks) { + body.set(chunk, offset) + offset += chunk.byteLength + } + + return new TextDecoder().decode(body) +} + +function decodeHtmlEntities(text: string): string { + const namedEntities: Record = { + amp: '&', + apos: "'", + copy: '(c)', + hellip: '...', + gt: '>', + lt: '<', + mdash: '-', + middot: '*', + nbsp: ' ', + ndash: '-', + quot: '"', + rsquo: "'", + } + + return text.replace(/&(#x?[0-9a-fA-F]+|[a-zA-Z]+);/g, (entity, body) => { + if (body[0] === '#') { + const isHex = body[1]?.toLowerCase() === 'x' + const value = Number.parseInt(body.slice(isHex ? 2 : 1), isHex ? 16 : 10) + return Number.isFinite(value) && value >= 0 && value <= 0x10ffff + ? String.fromCodePoint(value) + : entity + } + return namedEntities[body] ?? entity + }) +} + +function normalizeText(text: string): string { + return text + .replace(/\r/g, '') + .replace(/[ \t\f\v]+/g, ' ') + .replace(/ *\n */g, '\n') + .replace(/\n{3,}/g, '\n\n') + .split('\n') + .map((line) => line.trim()) + .filter(Boolean) + .join('\n') + .trim() +} + +function extractFirstMatch(html: string, pattern: RegExp): string | undefined { + const match = html.match(pattern) + if (!match?.[1]) return undefined + return normalizeText(decodeHtmlEntities(stripTags(match[1]))) +} + +function stripTags(html: string): string { + return html.replace(/<[^>]*>/g, ' ') +} + +function removeElement(html: string, tagName: string): string { + return html.replace( + new RegExp(`<${tagName}\\b[^>]*>[\\s\\S]*?<\\/${tagName}>`, 'gi'), + '\n', + ) +} + +function extractElementContents(html: string, tagName: string): string[] { + const matches = html.matchAll( + new RegExp(`<${tagName}\\b[^>]*>([\\s\\S]*?)<\\/${tagName}>`, 'gi'), + ) + return Array.from(matches, (match) => match[1]).filter(Boolean) +} + +function selectReadableHtml(html: string): string { + const articleCandidates = extractElementContents(html, 'article') + if (articleCandidates.length > 0) { + return articleCandidates.reduce((best, candidate) => + stripTags(candidate).length > stripTags(best).length ? candidate : best, + ) + } + + const mainCandidates = extractElementContents(html, 'main') + if (mainCandidates.length > 0) { + return mainCandidates.reduce((best, candidate) => + stripTags(candidate).length > stripTags(best).length ? candidate : best, + ) + } + + return html +} + +function extractMetaContent(html: string, name: string): string | undefined { + const escapedName = name.replace(/[.*+?^${}()|[\]\\]/g, '\\$&') + const patterns = [ + new RegExp( + `]*(?:name|property)=["']${escapedName}["'])(?=[^>]*content=["']([^"']*)["'])[^>]*>`, + 'i', + ), + new RegExp( + `]*content=["']([^"']*)["'])(?=[^>]*(?:name|property)=["']${escapedName}["'])[^>]*>`, + 'i', + ), + ] + + for (const pattern of patterns) { + const match = html.match(pattern) + if (match?.[1]) return normalizeText(decodeHtmlEntities(match[1])) + } + return undefined +} + +function extractHtml(html: string): { + title?: string + description?: string + text: string +} { + const title = extractFirstMatch(html, /]*>([\s\S]*?)<\/title>/i) + const description = + extractMetaContent(html, 'description') ?? + extractMetaContent(html, 'og:description') + + let readable = html + .replace(//g, '\n') + .replace(/]*>/gi, '\n') + + for (const tagName of [ + 'script', + 'style', + 'svg', + 'canvas', + 'iframe', + 'noscript', + 'nav', + 'header', + 'footer', + 'form', + 'button', + 'select', + ]) { + readable = removeElement(readable, tagName) + } + + readable = selectReadableHtml(readable) + + readable = readable + .replace(//gi, '\n') + .replace( + /<\/(p|div|section|article|main|aside|li|tr|td|th|h[1-6]|blockquote|pre)>/gi, + '\n', + ) + .replace(/<(li|tr|h[1-6])\b[^>]*>/gi, '\n') + .replace(/<[^>]*>/g, '') + + const text = normalizeText(decodeHtmlEntities(readable)) + return { title, description, text } +} + +function extractMarkdownFrontmatter(body: string): { + title?: string + description?: string + text: string +} { + const match = body.match(/^---\s*\r?\n([\s\S]*?)\r?\n---\s*\r?\n?/) + if (!match) { + return { text: normalizeText(decodeHtmlEntities(body)) } + } + + const frontmatter = match[1] + const getValue = (key: 'title' | 'description') => { + const valueMatch = frontmatter.match( + new RegExp(`^${key}:\\s*(?:"([^"]*)"|'([^']*)'|(.+))\\s*$`, 'm'), + ) + return normalizeText( + decodeHtmlEntities( + valueMatch?.[1] ?? valueMatch?.[2] ?? valueMatch?.[3] ?? '', + ), + ) + } + + return { + title: getValue('title') || undefined, + description: getValue('description') || undefined, + text: normalizeText(decodeHtmlEntities(body.slice(match[0].length))), + } +} + +function isJsonContentType(contentType: string): boolean { + return ( + contentType.includes('application/json') || contentType.includes('+json') + ) +} + +function isMarkdownContentType(contentType: string): boolean { + return contentType.includes('text/markdown') +} + +function isSupportedContentType(contentType: string): boolean { + return /^(text\/|application\/(json|[^;\s/]+\+json|xhtml\+xml|xml|rss\+xml|atom\+xml)\b)/i.test( + contentType, + ) +} + +function extractTextByContentType( + contentType: string, + body: string, +): { + title?: string + description?: string + text: string +} { + const lowerContentType = contentType.toLowerCase() + + if ( + lowerContentType.includes('text/html') || + lowerContentType.includes('application/xhtml') + ) { + return extractHtml(body) + } + + if (isJsonContentType(lowerContentType)) { + try { + return { text: JSON.stringify(JSON.parse(body), null, 2) } + } catch { + return { text: normalizeText(body) } + } + } + + if (isMarkdownContentType(lowerContentType)) { + return extractMarkdownFrontmatter(body) + } + + if ( + lowerContentType.startsWith('text/') || + lowerContentType.includes('application/xml') || + lowerContentType.includes('application/rss+xml') || + lowerContentType.includes('application/atom+xml') + ) { + return { text: normalizeText(body) } + } + + return { text: normalizeText(body) } +} + +function truncateText( + text: string, + maxChars: number, +): { + text: string + truncated: boolean +} { + if (text.length <= maxChars) { + return { text, truncated: false } + } + return { + text: `${text.slice(0, maxChars).trimEnd()}\n\n[Content truncated]`, + truncated: true, + } +} + +export async function readUrl({ + url, + max_chars = DEFAULT_MAX_CHARS, + fetch: fetchImpl = globalThis.fetch, +}: { + url: string + max_chars?: number + fetch?: FetchLike +}): Promise { + let parsedUrl: URL + try { + parsedUrl = new URL(url) + } catch { + return errorResult(url, 'Invalid URL') + } + + if (!isAllowedUrl(parsedUrl)) { + return errorResult(url, 'Only http:// and https:// URLs are supported') + } + + const controller = new AbortController() + const timeout = setTimeout(() => controller.abort(), FETCH_TIMEOUT_MS) + + try { + const response = await fetchImpl(parsedUrl.toString(), { + redirect: 'follow', + signal: controller.signal, + headers: { + accept: + 'text/html,application/xhtml+xml,application/json,text/plain;q=0.9,*/*;q=0.8', + 'accept-language': 'en-US,en;q=0.9', + 'user-agent': USER_AGENT, + }, + }) + + if (!response.ok) { + return errorResult( + url, + `Failed to fetch URL: ${response.status} ${response.statusText}`, + ) + } + + const contentType = getHeader(response.headers, 'content-type') ?? '' + if (contentType && !isSupportedContentType(contentType)) { + return errorResult( + url, + `Unsupported content type: ${contentType || 'unknown'}`, + ) + } + + const body = await readResponseBody(response, MAX_RESPONSE_BYTES) + const extracted = extractTextByContentType(contentType, body) + const truncated = truncateText(extracted.text, max_chars) + + if (!truncated.text) { + return errorResult(url, 'No readable text found at URL') + } + + return [ + { + type: 'json', + value: { + url, + finalUrl: response.url || parsedUrl.toString(), + status: response.status, + ...(contentType ? { contentType } : {}), + ...(extracted.title ? { title: extracted.title } : {}), + ...(extracted.description + ? { description: extracted.description } + : {}), + text: truncated.text, + truncated: truncated.truncated, + }, + }, + ] + } catch (error) { + const isAbort = error instanceof Error && error.name === 'AbortError' + return errorResult( + url, + isAbort + ? `Timed out after ${FETCH_TIMEOUT_MS} ms` + : error instanceof Error + ? error.message + : 'Unknown error', + ) + } finally { + clearTimeout(timeout) + } +} From 101c88ae1f89704b6d12e72d8c934fb45ba48ec2 Mon Sep 17 00:00:00 2001 From: James Grugett Date: Thu, 21 May 2026 13:53:45 -0700 Subject: [PATCH 735/749] Add read_url to base2! --- agents/base2/base2.ts | 1 + cli/src/components/tools/read-url.tsx | 33 +++++++++++++++++++++++++++ cli/src/components/tools/registry.ts | 2 ++ 3 files changed, 36 insertions(+) create mode 100644 cli/src/components/tools/read-url.tsx diff --git a/agents/base2/base2.ts b/agents/base2/base2.ts index 2245a81b48..924683e0ac 100644 --- a/agents/base2/base2.ts +++ b/agents/base2/base2.ts @@ -108,6 +108,7 @@ export function createBase2( !isFree && 'propose_str_replace', !isFree && 'propose_write_file', !noAskUser && 'ask_user', + 'read_url', 'skill', 'set_output', 'list_directory', diff --git a/cli/src/components/tools/read-url.tsx b/cli/src/components/tools/read-url.tsx new file mode 100644 index 0000000000..5cb972b367 --- /dev/null +++ b/cli/src/components/tools/read-url.tsx @@ -0,0 +1,33 @@ +import { SimpleToolCallItem } from './tool-call-item' +import { defineToolComponent } from './types' + +import type { ChatTheme } from '../../types/theme-system' +import type { ToolRenderConfig } from './types' + +/** + * UI component for read_url tool. + * Displays the URL being read in a compact format. + */ +export const ReadURLComponent = defineToolComponent({ + toolName: 'read_url', + + render(toolBlock, theme): ToolRenderConfig { + const input = toolBlock.input as { url?: string } | undefined + + const url = typeof input?.url === 'string' ? input.url.trim() : '' + + if (!url) { + return { content: null } + } + + return { + content: ( + + ), + } + }, +}) diff --git a/cli/src/components/tools/registry.ts b/cli/src/components/tools/registry.ts index 6eb94b7e22..1367d22e45 100644 --- a/cli/src/components/tools/registry.ts +++ b/cli/src/components/tools/registry.ts @@ -6,6 +6,7 @@ import { ListDirectoryComponent } from './list-directory' import { ReadDocsComponent } from './read-docs' import { ReadFilesComponent } from './read-files' import { ReadSubtreeComponent } from './read-subtree' +import { ReadURLComponent } from './read-url' import { RenderUIComponent } from './render-ui' import { RunTerminalCommandComponent } from './run-terminal-command' import { SkillComponent } from './skill' @@ -38,6 +39,7 @@ const toolComponentRegistry = new Map([ [ReadDocsComponent.toolName, ReadDocsComponent], [ReadFilesComponent.toolName, ReadFilesComponent], [ReadSubtreeComponent.toolName, ReadSubtreeComponent], + [ReadURLComponent.toolName, ReadURLComponent], [RenderUIComponent.toolName, RenderUIComponent], [WriteTodosComponent.toolName, WriteTodosComponent], [StrReplaceComponent.toolName, StrReplaceComponent], From 6d1e53bbfb7fea104dd3ff1807e001c98bd6a30c Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Thu, 21 May 2026 20:54:22 +0000 Subject: [PATCH 736/749] Bump version to 1.0.678 --- cli/release/package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cli/release/package.json b/cli/release/package.json index 27fea63949..168e2c8f92 100644 --- a/cli/release/package.json +++ b/cli/release/package.json @@ -1,6 +1,6 @@ { "name": "codebuff", - "version": "1.0.677", + "version": "1.0.678", "description": "AI coding agent", "license": "MIT", "bin": { From 955df38564a51cf3b1b53fac866330e6c59a4407 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Thu, 21 May 2026 20:54:58 +0000 Subject: [PATCH 737/749] Bump Freebuff version to 0.0.95 --- freebuff/cli/release/package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/freebuff/cli/release/package.json b/freebuff/cli/release/package.json index 3b44381529..26831a2d68 100644 --- a/freebuff/cli/release/package.json +++ b/freebuff/cli/release/package.json @@ -1,6 +1,6 @@ { "name": "freebuff", - "version": "0.0.94", + "version": "0.0.95", "description": "The world's strongest free coding agent", "license": "MIT", "bin": { From dc3aad6b3659eeb2b2264d9d58d53302a08ee367 Mon Sep 17 00:00:00 2001 From: James Grugett Date: Thu, 21 May 2026 16:49:07 -0700 Subject: [PATCH 738/749] Simple web search tool renderer --- cli/src/components/tools/registry.ts | 2 ++ cli/src/components/tools/web-search.tsx | 33 +++++++++++++++++++++++++ 2 files changed, 35 insertions(+) create mode 100644 cli/src/components/tools/web-search.tsx diff --git a/cli/src/components/tools/registry.ts b/cli/src/components/tools/registry.ts index 1367d22e45..ed2ed33d4a 100644 --- a/cli/src/components/tools/registry.ts +++ b/cli/src/components/tools/registry.ts @@ -8,6 +8,7 @@ import { ReadFilesComponent } from './read-files' import { ReadSubtreeComponent } from './read-subtree' import { ReadURLComponent } from './read-url' import { RenderUIComponent } from './render-ui' +import { WebSearchComponent } from './web-search' import { RunTerminalCommandComponent } from './run-terminal-command' import { SkillComponent } from './skill' import { StrReplaceComponent } from './str-replace' @@ -41,6 +42,7 @@ const toolComponentRegistry = new Map([ [ReadSubtreeComponent.toolName, ReadSubtreeComponent], [ReadURLComponent.toolName, ReadURLComponent], [RenderUIComponent.toolName, RenderUIComponent], + [WebSearchComponent.toolName, WebSearchComponent], [WriteTodosComponent.toolName, WriteTodosComponent], [StrReplaceComponent.toolName, StrReplaceComponent], [SuggestFollowupsComponent.toolName, SuggestFollowupsComponent], diff --git a/cli/src/components/tools/web-search.tsx b/cli/src/components/tools/web-search.tsx new file mode 100644 index 0000000000..37477220cc --- /dev/null +++ b/cli/src/components/tools/web-search.tsx @@ -0,0 +1,33 @@ +import { SimpleToolCallItem } from './tool-call-item' +import { defineToolComponent } from './types' + +import type { ChatTheme } from '../../types/theme-system' +import type { ToolRenderConfig } from './types' + +/** + * UI component for web_search tool. + * Displays the search query in a compact format. + */ +export const WebSearchComponent = defineToolComponent({ + toolName: 'web_search', + + render(toolBlock, theme): ToolRenderConfig { + const input = toolBlock.input as { query?: string } | undefined + + const query = typeof input?.query === 'string' ? input.query.trim() : '' + + if (!query) { + return { content: null } + } + + return { + content: ( + + ), + } + }, +}) From 6758b721752950f8abb0cd235d9df4aada482919 Mon Sep 17 00:00:00 2001 From: James Grugett Date: Fri, 22 May 2026 14:55:48 -0700 Subject: [PATCH 739/749] Improve Freebuff session limit messaging (#721) --- cli/src/components/limited-landing-panel.tsx | 77 +++++++++++++------- cli/src/components/waiting-room-screen.tsx | 1 + freebuff/e2e/tests/help-command.e2e.test.ts | 26 ++++++- 3 files changed, 74 insertions(+), 30 deletions(-) diff --git a/cli/src/components/limited-landing-panel.tsx b/cli/src/components/limited-landing-panel.tsx index 0dc0f7753a..97fd082795 100644 --- a/cli/src/components/limited-landing-panel.tsx +++ b/cli/src/components/limited-landing-panel.tsx @@ -24,6 +24,8 @@ interface LimitedLandingPanelProps { sessionCounterText: string /** True when the shared per-day quota is fully spent. Disables the CTA. */ isQuotaExhausted: boolean + /** Plain-text explanation shown instead of the CTA when quota is exhausted. */ + exhaustedMessageText: string /** Max vertical rows the panel may occupy. When its content is taller the * panel scrolls (scrollbar shown) instead of letting flexbox compress the * bordered button onto its own border. */ @@ -42,6 +44,7 @@ export const LimitedLandingPanel: React.FC = ({ sessionCounter, sessionCounterText, isQuotaExhausted, + exhaustedMessageText, maxHeight, }) => { const theme = useTheme() @@ -52,16 +55,22 @@ export const LimitedLandingPanel: React.FC = ({ // Rendered height of the panel, matching the JSX below row-for-row so the // scroll budget is exact: name + warning (each wrap-aware) + the counter - // line with its 1-row top/bottom margins + the 3-row bordered button. + // line with its 1-row top/bottom margins + either the 3-row bordered button + // or the exhausted-quota message. + const exhaustedTitleText = 'Daily session limit reached' const wrappedRows = (text: string) => Math.max(1, Math.ceil(text.length / contentMaxWidth)) + const BUTTON_ROWS = 3 // 2 border rows + label + const actionRows = isQuotaExhausted + ? wrappedRows(exhaustedTitleText) + wrappedRows(exhaustedMessageText) + : BUTTON_ROWS const contentHeight = wrappedRows(model.displayName) + (model.warning ? wrappedRows(model.warning) : 0) + 1 /* counter marginTop */ + wrappedRows(sessionCounterText) + 1 /* counter marginBottom */ + - 3 /* button: 2 border rows + label */ + actionRows const needsScroll = contentHeight > maxHeight const viewportHeight = Math.max(1, Math.min(contentHeight, maxHeight)) @@ -72,6 +81,9 @@ export const LimitedLandingPanel: React.FC = ({ // 'center'` on the parent can center the whole block again. const BUTTON_LABEL = 'Start session Enter' const BUTTON_CHROME = 6 // 2 border + 4 padding (paddingLeft/Right 2) + const actionWidth = isQuotaExhausted + ? Math.max(exhaustedTitleText.length, exhaustedMessageText.length) + : BUTTON_LABEL.length + BUTTON_CHROME const panelWidth = Math.min( contentMaxWidth, @@ -79,7 +91,7 @@ export const LimitedLandingPanel: React.FC = ({ model.displayName.length, model.warning?.length ?? 0, sessionCounterText.length, - BUTTON_LABEL.length + BUTTON_CHROME, + actionWidth, ), ) + (needsScroll ? 1 : 0) /* scrollbar gutter */ @@ -159,30 +171,43 @@ export const LimitedLandingPanel: React.FC = ({ > {sessionCounter} - + + {pending ? ( + 'Starting…' + ) : ( + <> + Start session{' Enter'} + + )} + + + )} ) } diff --git a/cli/src/components/waiting-room-screen.tsx b/cli/src/components/waiting-room-screen.tsx index c07a171c1c..122e637be9 100644 --- a/cli/src/components/waiting-room-screen.tsx +++ b/cli/src/components/waiting-room-screen.tsx @@ -520,6 +520,7 @@ export const WaitingRoomScreen: React.FC = ({ {isLanding && accessTier === 'limited' && ( { describe('Freebuff: /help slash command', () => { let session: FreebuffSession | null = null + const openHelp = async (session: FreebuffSession): Promise => { + const initialOutput = await session.capture() + if (!initialOutput.includes('Enter a coding task')) { + console.log( + 'Skipping /help slash command assertion: Freebuff is not on the chat input screen.', + ) + return null + } + + await session.sendKey('C-u') + for (const key of ['/', 'h', 'e', 'l', 'p']) { + await session.sendKey(key) + } + await session.waitForText('/help', 10_000) + await session.sendKey('Enter') + return session.waitForText('Shortcuts', 10_000) + } + afterEach(async () => { if (session) { await session.stop() @@ -50,8 +68,8 @@ describe('Freebuff: /help slash command', () => { session = await FreebuffSession.start(binary) await session.waitForReady() - await session.send('/help') - const output = await session.capture(2) + const output = await openHelp(session) + if (!output) return // Should show shortcuts section expect(output).toMatch(/shortcut|ctrl|esc/i) @@ -66,8 +84,8 @@ describe('Freebuff: /help slash command', () => { session = await FreebuffSession.start(binary) await session.waitForReady() - await session.send('/help') - const output = await session.capture(2) + const output = await openHelp(session) + if (!output) return // Freebuff should NOT show these paid/subscription commands expect(output).not.toContain('/subscribe') From 36c1c1a14ad73d08913fc214d2de44da0542bef7 Mon Sep 17 00:00:00 2001 From: brandon chen <9735006+brandonkachen@users.noreply.github.com> Date: Fri, 22 May 2026 16:15:52 -0700 Subject: [PATCH 740/749] Keep base2 current date prompt fresh (#722) Co-authored-by: James Grugett --- agents/base2/base2.ts | 10 +----- agents/types/secret-agent-definition.ts | 1 + .../src/templates/__tests__/strings.test.ts | 35 ++++++++++++++++++- .../agent-runtime/src/templates/strings.ts | 9 +++++ packages/agent-runtime/src/templates/types.ts | 1 + 5 files changed, 46 insertions(+), 10 deletions(-) diff --git a/agents/base2/base2.ts b/agents/base2/base2.ts index 924683e0ac..662cc2a775 100644 --- a/agents/base2/base2.ts +++ b/agents/base2/base2.ts @@ -17,14 +17,6 @@ import { type SecretAgentDefinition, } from '../types/secret-agent-definition' -function formatCurrentDate(date: Date): string { - return new Intl.DateTimeFormat('en-US', { - year: 'numeric', - month: 'long', - day: 'numeric', - }).format(date) -} - export function createBase2( mode: 'default' | 'free' | 'lite' | 'max' | 'fast', options?: { @@ -138,7 +130,7 @@ export function createBase2( systemPrompt: `You are Buffy, a strategic assistant that orchestrates complex coding tasks through specialized sub-agents. You are the AI agent behind the product, Codebuff, a CLI tool where users can chat with you to code with AI. -Current date: ${formatCurrentDate(new Date())}. +Current date: ${PLACEHOLDER.CURRENT_DATE}. # Core Mandates diff --git a/agents/types/secret-agent-definition.ts b/agents/types/secret-agent-definition.ts index fa0656f557..cab28c2669 100644 --- a/agents/types/secret-agent-definition.ts +++ b/agents/types/secret-agent-definition.ts @@ -23,6 +23,7 @@ export interface SecretAgentDefinition const placeholderNames = [ 'AGENT_NAME', 'AGENTS_PROMPT', + 'CURRENT_DATE', 'FILE_TREE_PROMPT_SMALL', 'FILE_TREE_PROMPT', 'FILE_TREE_PROMPT_LARGE', diff --git a/packages/agent-runtime/src/templates/__tests__/strings.test.ts b/packages/agent-runtime/src/templates/__tests__/strings.test.ts index 89a11a4aab..b77a115cec 100644 --- a/packages/agent-runtime/src/templates/__tests__/strings.test.ts +++ b/packages/agent-runtime/src/templates/__tests__/strings.test.ts @@ -1,7 +1,8 @@ import { TEST_AGENT_RUNTIME_IMPL } from '@codebuff/common/testing/impl/agent-runtime' import { describe, test, expect, mock } from 'bun:test' -import { getAgentPrompt } from '../strings' +import { PLACEHOLDER } from '../types' +import { formatCurrentDate, getAgentPrompt } from '../strings' import type { AgentTemplate } from '../types' import type { AgentState } from '@codebuff/common/types/session-state' @@ -81,6 +82,38 @@ const createMockAgentTemplate = ( }) describe('getAgentPrompt', () => { + test('replaces CURRENT_DATE when formatting prompts', async () => { + const agentTemplate = createMockAgentTemplate({ + id: 'date-agent', + systemPrompt: `Today is ${PLACEHOLDER.CURRENT_DATE}.`, + }) + const agentTemplates: Record = { + 'date-agent': agentTemplate, + } + + const result = await getAgentPrompt({ + agentTemplate, + promptType: { type: 'systemPrompt' }, + fileContext: createMockFileContext(), + agentState: createMockAgentState('date-agent'), + agentTemplates, + additionalToolDefinitions: async () => ({}), + logger: createMockLogger(), + apiKey: TEST_AGENT_RUNTIME_IMPL.apiKey, + databaseAgentCache: TEST_AGENT_RUNTIME_IMPL.databaseAgentCache, + fetchAgentFromDatabase: TEST_AGENT_RUNTIME_IMPL.fetchAgentFromDatabase, + }) + + expect(result).toBe(`Today is ${formatCurrentDate(new Date())}.`) + expect(result).not.toContain(PLACEHOLDER.CURRENT_DATE) + }) + + test('formats current date for prompts', () => { + expect(formatCurrentDate(new Date(2026, 4, 22, 12))).toBe( + 'May 22, 2026', + ) + }) + describe('spawnerPrompt inclusion in instructionsPrompt', () => { test('includes spawnerPrompt for each spawnable agent with spawnerPrompt defined', async () => { const filePickerTemplate = createMockAgentTemplate({ diff --git a/packages/agent-runtime/src/templates/strings.ts b/packages/agent-runtime/src/templates/strings.ts index 6ac005a151..8391900fc1 100644 --- a/packages/agent-runtime/src/templates/strings.ts +++ b/packages/agent-runtime/src/templates/strings.ts @@ -29,6 +29,14 @@ import type { ProjectFileContext, } from '@codebuff/common/util/file' +export function formatCurrentDate(date: Date): string { + return new Intl.DateTimeFormat('en-US', { + year: 'numeric', + month: 'long', + day: 'numeric', + }).format(date) +} + export async function formatPrompt( params: { prompt: string @@ -85,6 +93,7 @@ export async function formatPrompt( const toInject: Record string | Promise> = { [PLACEHOLDER.AGENT_NAME]: () => agentTemplate ? agentTemplate.displayName || 'Unknown Agent' : 'Buffy', + [PLACEHOLDER.CURRENT_DATE]: () => formatCurrentDate(new Date()), [PLACEHOLDER.FILE_TREE_PROMPT_SMALL]: () => getProjectFileTreePrompt({ fileContext, diff --git a/packages/agent-runtime/src/templates/types.ts b/packages/agent-runtime/src/templates/types.ts index 6ce6739631..7131183991 100644 --- a/packages/agent-runtime/src/templates/types.ts +++ b/packages/agent-runtime/src/templates/types.ts @@ -13,6 +13,7 @@ export type { AgentTemplate, StepGenerator, StepHandler } const placeholderNames = [ 'AGENT_NAME', + 'CURRENT_DATE', 'FILE_TREE_PROMPT_SMALL', 'FILE_TREE_PROMPT', 'FILE_TREE_PROMPT_LARGE', From f9f63c89a2236f51e5c3e576452592caa93ec1dd Mon Sep 17 00:00:00 2001 From: James Grugett Date: Fri, 22 May 2026 16:30:02 -0700 Subject: [PATCH 741/749] [codex] Improve binary spawn errors (#724) --- cli/release/index.js | 83 ++++++++++++++++++++++++++++------- freebuff/cli/release/index.js | 83 ++++++++++++++++++++++++++++------- 2 files changed, 134 insertions(+), 32 deletions(-) diff --git a/cli/release/index.js b/cli/release/index.js index f84e6940c8..bf1eead545 100644 --- a/cli/release/index.js +++ b/cli/release/index.js @@ -490,10 +490,7 @@ async function checkForUpdates(runningProcess, exitListener) { await downloadBinary(latestVersion) - const newChild = spawn(CONFIG.binaryPath, process.argv.slice(2), { - stdio: 'inherit', - detached: false, - }) + const newChild = spawnInstalledBinary({ detached: false }) newChild.on('exit', (code, signal) => { resetTerminal() @@ -501,11 +498,6 @@ async function checkForUpdates(runningProcess, exitListener) { process.exit(signal ? 1 : (code || 0)) }) - newChild.on('error', (err) => { - console.error('Failed to start codebuff:', err.message) - process.exit(1) - }) - return new Promise(() => {}) } } catch (error) { @@ -561,13 +553,77 @@ function printCrashDiagnostics(code, signal) { console.error('') } -async function main() { - await ensureBinaryExists() +function getInstalledBinaryStatus() { + try { + const stats = fs.statSync(CONFIG.binaryPath) + return stats.isFile() ? `yes (${formatBytes(stats.size)})` : 'no' + } catch { + return 'no' + } +} + +function printSpawnFailure(err) { + resetTerminal() + const code = err && err.code ? ` (${err.code})` : '' + + console.error(`Failed to start ${packageName}: ${err.message}${code}`) + console.error('') + console.error('System info:') + console.error(` Platform: ${process.platform} ${process.arch}`) + console.error(` Node: ${process.version}`) + console.error(` Binary: ${CONFIG.binaryPath}`) + console.error(` Exists: ${getInstalledBinaryStatus()}`) + + if (process.platform === 'win32') { + console.error('') + console.error( + 'On Windows, this can happen when Windows Security or antivirus blocks', + ) + console.error( + 'or quarantines the downloaded executable, or when the binary requires', + ) + console.error('CPU instructions that are not available on this machine.') + } + + console.error('') + console.error('Try deleting the downloaded files and running again:') + console.error(` ${CONFIG.configDir}`) + console.error('') +} + +function spawnInstalledBinary(options = {}) { + if (!fs.existsSync(CONFIG.binaryPath)) { + try { + if (fs.existsSync(CONFIG.metadataPath)) fs.unlinkSync(CONFIG.metadataPath) + } catch { + // best effort + } + const error = new Error( + `downloaded binary is missing at ${CONFIG.binaryPath}`, + ) + error.code = 'BINARY_MISSING' + printSpawnFailure(error) + process.exit(1) + } const child = spawn(CONFIG.binaryPath, process.argv.slice(2), { stdio: 'inherit', + ...options, + }) + + child.on('error', (err) => { + printSpawnFailure(err) + process.exit(1) }) + return child +} + +async function main() { + await ensureBinaryExists() + + const child = spawnInstalledBinary() + const exitListener = (code, signal) => { resetTerminal() printCrashDiagnostics(code, signal) @@ -576,11 +632,6 @@ async function main() { child.on('exit', exitListener) - child.on('error', (err) => { - console.error('Failed to start codebuff:', err.message) - process.exit(1) - }) - setTimeout(() => { checkForUpdates(child, exitListener) }, 100) diff --git a/freebuff/cli/release/index.js b/freebuff/cli/release/index.js index 044d86ebc5..ca853b83fb 100644 --- a/freebuff/cli/release/index.js +++ b/freebuff/cli/release/index.js @@ -477,10 +477,7 @@ async function checkForUpdates(runningProcess, exitListener) { await downloadBinary(latestVersion) - const newChild = spawn(CONFIG.binaryPath, process.argv.slice(2), { - stdio: 'inherit', - detached: false, - }) + const newChild = spawnInstalledBinary({ detached: false }) newChild.on('exit', (code, signal) => { resetTerminal() @@ -488,11 +485,6 @@ async function checkForUpdates(runningProcess, exitListener) { process.exit(signal ? 1 : (code || 0)) }) - newChild.on('error', (err) => { - console.error('Failed to start freebuff:', err.message) - process.exit(1) - }) - return new Promise(() => {}) } } catch (error) { @@ -548,13 +540,77 @@ function printCrashDiagnostics(code, signal) { console.error('') } -async function main() { - await ensureBinaryExists() +function getInstalledBinaryStatus() { + try { + const stats = fs.statSync(CONFIG.binaryPath) + return stats.isFile() ? `yes (${formatBytes(stats.size)})` : 'no' + } catch { + return 'no' + } +} + +function printSpawnFailure(err) { + resetTerminal() + const code = err && err.code ? ` (${err.code})` : '' + + console.error(`Failed to start ${packageName}: ${err.message}${code}`) + console.error('') + console.error('System info:') + console.error(` Platform: ${process.platform} ${process.arch}`) + console.error(` Node: ${process.version}`) + console.error(` Binary: ${CONFIG.binaryPath}`) + console.error(` Exists: ${getInstalledBinaryStatus()}`) + + if (process.platform === 'win32') { + console.error('') + console.error( + 'On Windows, this can happen when Windows Security or antivirus blocks', + ) + console.error( + 'or quarantines the downloaded executable, or when the binary requires', + ) + console.error('CPU instructions that are not available on this machine.') + } + + console.error('') + console.error('Try deleting the downloaded files and running again:') + console.error(` ${CONFIG.configDir}`) + console.error('') +} + +function spawnInstalledBinary(options = {}) { + if (!fs.existsSync(CONFIG.binaryPath)) { + try { + if (fs.existsSync(CONFIG.metadataPath)) fs.unlinkSync(CONFIG.metadataPath) + } catch { + // best effort + } + const error = new Error( + `downloaded binary is missing at ${CONFIG.binaryPath}`, + ) + error.code = 'BINARY_MISSING' + printSpawnFailure(error) + process.exit(1) + } const child = spawn(CONFIG.binaryPath, process.argv.slice(2), { stdio: 'inherit', + ...options, + }) + + child.on('error', (err) => { + printSpawnFailure(err) + process.exit(1) }) + return child +} + +async function main() { + await ensureBinaryExists() + + const child = spawnInstalledBinary() + const exitListener = (code, signal) => { resetTerminal() printCrashDiagnostics(code, signal) @@ -563,11 +619,6 @@ async function main() { child.on('exit', exitListener) - child.on('error', (err) => { - console.error('Failed to start freebuff:', err.message) - process.exit(1) - }) - setTimeout(() => { checkForUpdates(child, exitListener) }, 100) From 83da0c2701103e77b21f3de89d5938b8519b6c0a Mon Sep 17 00:00:00 2001 From: brandon chen <9735006+brandonkachen@users.noreply.github.com> Date: Fri, 22 May 2026 22:39:51 -0700 Subject: [PATCH 742/749] [codex] Put VPN and proxy Freebuff users in limited mode (#726) Co-authored-by: James Grugett --- docs/environment-variables.md | 2 +- docs/freebuff-waiting-room.md | 2 +- .../completions/__tests__/completions.test.ts | 34 +++++++------------ web/src/app/api/v1/chat/completions/_post.ts | 7 ++-- .../session/__tests__/session.test.ts | 19 ++++++----- .../free-mode-country-access-cache.test.ts | 11 ++++-- .../__tests__/free-mode-country.test.ts | 4 +-- web/src/server/free-mode-country.ts | 9 ++--- 8 files changed, 42 insertions(+), 46 deletions(-) diff --git a/docs/environment-variables.md b/docs/environment-variables.md index 980272b6d9..8396b7ce79 100644 --- a/docs/environment-variables.md +++ b/docs/environment-variables.md @@ -6,7 +6,7 @@ - Server secrets: validated in `packages/internal/src/env-schema.ts` (used via `@codebuff/internal/env`). - Runtime/OS env: pass typed snapshots instead of reading `process.env` throughout the codebase. - `IPINFO_TOKEN` is required; free-mode country gating uses it to check IPinfo privacy signals for VPN/proxy/Tor/relay/hosting traffic. -- `SPUR_TOKEN` is required; hard VPN/proxy/Tor/residential-proxy free-mode blocks require Spur Context API corroboration. In allowlisted countries, a successful clean Spur result overrides IPinfo privacy signals back to full access, while a Spur lookup failure falls back to limited access. +- `SPUR_TOKEN` is required; VPN/proxy/Tor/residential-proxy privacy signals use Spur Context API corroboration. In allowlisted countries, a successful clean Spur result overrides IPinfo privacy signals back to full access, while suspicious or failed Spur lookups fall back to limited access. Cloudflare Tor country detection remains a hard block. - `CODEBUFF_FULL_TELEMETRY=true` or `CODEBUFF_FULL_TELEMETRY_IDS=user-id,email@example.com` disables client analytics sampling for targeted debugging. Use sparingly because it can send full CLI log payloads. diff --git a/docs/freebuff-waiting-room.md b/docs/freebuff-waiting-room.md index c0e38b3bf9..76af547f3d 100644 --- a/docs/freebuff-waiting-room.md +++ b/docs/freebuff-waiting-room.md @@ -181,7 +181,7 @@ All endpoints authenticate via the standard `Authorization: Bearer ` or - Existing active+unexpired row, **different model** → reject with `model_locked` (HTTP 409); `active_instance_id` is **not** rotated so the other CLI stays valid. Client must DELETE the session before switching. - Existing active+expired row → reset to queued with fresh `queued_at` and the requested `model` (re-queue at back). -Before any of those state transitions, the handler requires a resolved country and successful IPinfo/Spur privacy checks. Unsupported countries enter limited Freebuff access. In allowlisted countries, IPinfo privacy signals still receive full access when Spur returns clean context, fall back to limited access when Spur lookup fails, and hard-block only when Spur corroborates VPN/proxy/Tor/residential-proxy traffic. IPinfo lookup failures fail closed into limited access. +Before any of those state transitions, the handler requires a resolved country and IPinfo/Spur privacy classification. Unsupported countries enter limited Freebuff access. In allowlisted countries, IPinfo privacy signals still receive full access when Spur returns clean context, and fall back to limited access when Spur reports suspicious context or lookup fails. IPinfo lookup failures fail closed into limited access. Cloudflare Tor country detection remains a hard block. Response shapes: diff --git a/web/src/app/api/v1/chat/completions/__tests__/completions.test.ts b/web/src/app/api/v1/chat/completions/__tests__/completions.test.ts index 7b97b4aad1..c8fdaa232a 100644 --- a/web/src/app/api/v1/chat/completions/__tests__/completions.test.ts +++ b/web/src/app/api/v1/chat/completions/__tests__/completions.test.ts @@ -628,7 +628,7 @@ describe('/api/v1/chat/completions POST endpoint', () => { ) it( - 'blocks hard VPN/proxy privacy signals before the session gate', + 'puts VPN/proxy privacy signals in limited mode before the session gate', async () => { const req = new NextRequest( 'http://localhost:3000/api/v1/chat/completions', @@ -649,6 +649,10 @@ describe('/api/v1/chat/completions POST endpoint', () => { ) const endFreebuffSession = mock(async () => {}) + const checkSessionAdmissible = mock(async (params) => { + expect(params.accessTier).toBe('limited') + return { ok: true, reason: 'active', remainingMs: 60_000 } as const + }) const response = await postChatCompletionsForTest({ req, getUserInfoFromApiKey: mockGetUserInfoFromApiKey, @@ -659,9 +663,7 @@ describe('/api/v1/chat/completions POST endpoint', () => { fetch: mockFetch, insertMessageBigquery: mockInsertMessageBigquery, loggerWithContext: mockLoggerWithContext, - checkSessionAdmissible: mock(() => { - throw new Error('session gate should not be reached') - }), + checkSessionAdmissible, endFreebuffSession, resolveFreeModeCountryAccess: async () => ({ allowed: false, @@ -676,20 +678,10 @@ describe('/api/v1/chat/completions POST endpoint', () => { clientIpHash: 'test-ip-hash', }), }) - expect(endFreebuffSession).toHaveBeenCalledWith({ - userId: 'user-new-free', - userEmail: null, - }) - expect(response.status).toBe(403) - const body = await response.json() - expect(body).toMatchObject({ - error: 'free_mode_unavailable', - countryCode: 'US', - countryBlockReason: 'anonymous_network', - ipPrivacySignals: ['vpn', 'hosting'], - }) - expect(body.message).toContain('VPN') + expect(response.status).toBe(200) + expect(endFreebuffSession).not.toHaveBeenCalled() + expect(checkSessionAdmissible).toHaveBeenCalledTimes(1) const validationEvent = ( mockTrackEvent as ReturnType ).mock.calls @@ -697,18 +689,18 @@ describe('/api/v1/chat/completions POST endpoint', () => { .find( ({ event, properties }) => event === AnalyticsEvent.CHAT_COMPLETIONS_VALIDATION_ERROR && - properties?.error === 'free_mode_unavailable', + properties?.error === 'free_mode_not_available_in_country', ) expect(validationEvent?.properties).toMatchObject({ - accessStatus: 'blocked', + accessTier: 'limited', + accessStatus: 'limited', countryCode: 'US', ipPrivacySignals: ['vpn', 'hosting'], spurStatus: 'suspicious', privacyDecision: 'corroborated_block', privacyProviderDecision: 'corroborated_hard', - privacyHardBlocked: true, + privacyHardBlocked: false, }) - expect(validationEvent?.properties).not.toHaveProperty('accessTier') }, FETCH_PATH_TEST_TIMEOUT_MS, ) diff --git a/web/src/app/api/v1/chat/completions/_post.ts b/web/src/app/api/v1/chat/completions/_post.ts index d40c30c576..b23e5fe1b7 100644 --- a/web/src/app/api/v1/chat/completions/_post.ts +++ b/web/src/app/api/v1/chat/completions/_post.ts @@ -337,9 +337,10 @@ export async function postChatCompletions(params: { ) } - // For free mode requests, classify the request into full, limited, or - // hard-blocked access. Most non-allowlist/privacy cases are limited to the - // cheap DeepSeek Flash path, but VPN/proxy/Tor traffic is rejected outright. + // For free mode requests, classify the request into full or limited + // access. Most non-allowlist/privacy cases, including VPN/proxy traffic, + // are limited to the cheap DeepSeek Flash path; Cloudflare Tor remains a + // hard block. if (isFreeModeRequest) { const countryAccess = await resolveCountryAccess(userId, req, { fetch, diff --git a/web/src/app/api/v1/freebuff/session/__tests__/session.test.ts b/web/src/app/api/v1/freebuff/session/__tests__/session.test.ts index b55a64add3..54dc6c90de 100644 --- a/web/src/app/api/v1/freebuff/session/__tests__/session.test.ts +++ b/web/src/app/api/v1/freebuff/session/__tests__/session.test.ts @@ -299,7 +299,7 @@ describe('POST /api/v1/freebuff/session', () => { expect(body.status).toBe('queued') }) - test('blocks VPN/proxy privacy signals before joining the queue', async () => { + test('puts VPN/proxy privacy signals in limited mode before joining the queue', async () => { const sessionDeps = makeSessionDeps() sessionDeps.rows.set('u1', { user_id: 'u1', @@ -329,13 +329,14 @@ describe('POST /api/v1/freebuff/session', () => { }), }), ) - expect(resp.status).toBe(403) + expect(resp.status).toBe(200) const body = await resp.json() - expect(body.status).toBe('country_blocked') - expect(body.message).toContain('VPN') + expect(body.status).toBe('queued') + expect(body.accessTier).toBe('limited') + expect(body.model).toBe(FREEBUFF_DEEPSEEK_V4_FLASH_MODEL_ID) expect(body.countryBlockReason).toBe('anonymous_network') expect(body.ipPrivacySignals).toEqual(['vpn', 'hosting']) - expect(sessionDeps.rows.size).toBe(0) + expect(sessionDeps.rows.size).toBe(1) }) test('blocks Cloudflare Tor before joining the queue', async () => { @@ -464,7 +465,7 @@ describe('GET /api/v1/freebuff/session', () => { expect(body.ipPrivacySignals).toBeUndefined() }) - test('returns country_blocked on GET for VPN/proxy privacy signals', async () => { + test('returns limited mode on GET for VPN/proxy privacy signals', async () => { const sessionDeps = makeSessionDeps() sessionDeps.rows.set('u1', { user_id: 'u1', @@ -494,10 +495,10 @@ describe('GET /api/v1/freebuff/session', () => { }), }), ) - expect(resp.status).toBe(403) + expect(resp.status).toBe(200) const body = await resp.json() - expect(body.status).toBe('country_blocked') - expect(body.message).toContain('proxy') + expect(body.status).toBe('none') + expect(body.accessTier).toBe('limited') expect(body.countryBlockReason).toBe('anonymous_network') expect(body.ipPrivacySignals).toEqual(['res_proxy']) expect(sessionDeps.rows.size).toBe(0) diff --git a/web/src/server/__tests__/free-mode-country-access-cache.test.ts b/web/src/server/__tests__/free-mode-country-access-cache.test.ts index c0c81cfe46..005240d2ff 100644 --- a/web/src/server/__tests__/free-mode-country-access-cache.test.ts +++ b/web/src/server/__tests__/free-mode-country-access-cache.test.ts @@ -115,7 +115,7 @@ describe('free mode country access cache', () => { expect(fetch).toHaveBeenCalledTimes(1) }) - test('does not persist corroborated hard privacy blocks', async () => { + test('stores corroborated VPN/proxy limited decisions', async () => { const cacheStore: FreeModeCountryAccessCacheStore = { get: mock(async () => null), set: mock(async () => {}), @@ -141,7 +141,14 @@ describe('free mode country access cache', () => { expect(access.allowed).toBe(false) expect(access.spurIpPrivacy?.signals).toEqual(['vpn']) expect(access.spurStatus).toBe('suspicious') - expect(cacheStore.set).not.toHaveBeenCalled() + expect(cacheStore.set).toHaveBeenCalledWith({ + userId, + access, + now, + }) + expect(expiresAtForCountryAccess(access, now).getTime() - now.getTime()).toBe( + FREE_MODE_COUNTRY_CACHE_ANONYMOUS_NETWORK_TTL_MS, + ) }) test('stores transient limited decisions when Spur fails after hard IPinfo signals', async () => { diff --git a/web/src/server/__tests__/free-mode-country.test.ts b/web/src/server/__tests__/free-mode-country.test.ts index 14ad4c0ffc..b29b59536f 100644 --- a/web/src/server/__tests__/free-mode-country.test.ts +++ b/web/src/server/__tests__/free-mode-country.test.ts @@ -222,7 +222,7 @@ describe('free mode country access', () => { expect(shouldHardBlockFreeModeAccess(access)).toBe(false) }) - test('hard-blocks only VPN, proxy, Tor, or residential proxy signals', async () => { + test('keeps corroborated VPN/proxy privacy signals in limited mode', async () => { const vpnAccess = await getFreeModeCountryAccess( makeReq({ 'cf-ipcountry': 'US', @@ -241,7 +241,7 @@ describe('free mode country access', () => { ) expect(vpnAccess.allowed).toBe(false) expect(vpnAccess.spurStatus).toBe('suspicious') - expect(shouldHardBlockFreeModeAccess(vpnAccess)).toBe(true) + expect(shouldHardBlockFreeModeAccess(vpnAccess)).toBe(false) expect(getFreeModePrivacyDecision(vpnAccess)).toBe('corroborated_block') expect(getFreeModePrivacyProviderDecision(vpnAccess)).toBe( 'corroborated_hard', diff --git a/web/src/server/free-mode-country.ts b/web/src/server/free-mode-country.ts index e30f2700ae..1b0845e801 100644 --- a/web/src/server/free-mode-country.ts +++ b/web/src/server/free-mode-country.ts @@ -141,15 +141,10 @@ export function hasHardBlockedPrivacySignal( export function shouldHardBlockFreeModeAccess( countryAccess: Pick< FreeModeCountryAccess, - 'blockReason' | 'cfCountry' | 'ipPrivacy' | 'spurIpPrivacy' + 'cfCountry' >, ): boolean { - return ( - countryAccess.cfCountry === CLOUDFLARE_TOR_COUNTRY || - (countryAccess.blockReason === 'anonymous_network' && - hasHardBlockedPrivacySignal(countryAccess.ipPrivacy) && - hasHardBlockedPrivacySignal(countryAccess.spurIpPrivacy)) - ) + return countryAccess.cfCountry === CLOUDFLARE_TOR_COUNTRY } export function getFreeModePrivacyDecision( From cbd3fde333adf343286899fdd410f01727719aef Mon Sep 17 00:00:00 2001 From: James Grugett Date: Sat, 23 May 2026 13:53:40 -0700 Subject: [PATCH 743/749] Fix numpad input handling (#727) --- .../setup-bun-compile-runtime/action.yml | 16 ++- .../__tests__/multiline-input.test.tsx | 112 +++++++++++++++--- cli/src/components/ask-user/index.tsx | 5 +- cli/src/components/chat-history-screen.tsx | 12 +- cli/src/components/chat-input-bar.tsx | 41 ++----- cli/src/components/feedback-input-mode.tsx | 4 +- cli/src/components/multiline-input.tsx | 61 ++++++---- cli/src/components/project-picker-screen.tsx | 12 +- cli/src/components/publish-container.tsx | 12 +- cli/src/components/review-screen.tsx | 3 +- cli/src/components/session-ended-banner.tsx | 3 +- cli/src/hooks/use-chat-keyboard.ts | 6 +- cli/src/hooks/use-login-keyboard-handlers.ts | 8 +- .../chat-input-key-intercept.test.ts | 40 +++++++ .../utils/__tests__/keyboard-actions.test.ts | 40 +++++++ .../terminal-enter-detection.test.ts | 77 ++++++++++++ cli/src/utils/chat-input-key-intercept.ts | 52 ++++++++ cli/src/utils/keyboard-actions.ts | 8 +- cli/src/utils/keypad-keys.ts | 47 ++++++++ cli/src/utils/terminal-enter-detection.ts | 48 +++++++- 20 files changed, 505 insertions(+), 102 deletions(-) create mode 100644 cli/src/utils/__tests__/chat-input-key-intercept.test.ts create mode 100644 cli/src/utils/__tests__/terminal-enter-detection.test.ts create mode 100644 cli/src/utils/chat-input-key-intercept.ts create mode 100644 cli/src/utils/keypad-keys.ts diff --git a/.github/actions/setup-bun-compile-runtime/action.yml b/.github/actions/setup-bun-compile-runtime/action.yml index f1fa88dc68..74d1cf1bd0 100644 --- a/.github/actions/setup-bun-compile-runtime/action.yml +++ b/.github/actions/setup-bun-compile-runtime/action.yml @@ -14,11 +14,13 @@ runs: shell: bash run: echo "version=$(bun --version)" >> "$GITHUB_OUTPUT" - - name: Cache Bun compile runtime - uses: actions/cache@v5 + - name: Restore Bun compile runtime cache + id: compile-runtime-cache + uses: actions/cache/restore@v5 + continue-on-error: true with: path: ${{ runner.temp }}/bun-compile-runtimes/${{ inputs.target }}-v${{ steps.bun-version.outputs.version }} - key: ${{ runner.os }}-bun-compile-runtime-${{ inputs.target }}-v${{ steps.bun-version.outputs.version }} + key: ${{ runner.os }}-bun-compile-runtime-v2-${{ inputs.target }}-v${{ steps.bun-version.outputs.version }} - name: Prepare Bun compile runtime shell: pwsh @@ -49,3 +51,11 @@ runs: } "BUN_COMPILE_EXECUTABLE_PATH=$runtimePath" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8 + + - name: Save Bun compile runtime cache + if: steps.compile-runtime-cache.outputs.cache-hit != 'true' + uses: actions/cache/save@v5 + continue-on-error: true + with: + path: ${{ runner.temp }}/bun-compile-runtimes/${{ inputs.target }}-v${{ steps.bun-version.outputs.version }} + key: ${{ runner.os }}-bun-compile-runtime-v2-${{ inputs.target }}-v${{ steps.bun-version.outputs.version }} diff --git a/cli/src/components/__tests__/multiline-input.test.tsx b/cli/src/components/__tests__/multiline-input.test.tsx index 97f2d3f78a..a024ad9d3e 100644 --- a/cli/src/components/__tests__/multiline-input.test.tsx +++ b/cli/src/components/__tests__/multiline-input.test.tsx @@ -1,5 +1,10 @@ import { describe, test, expect } from 'bun:test' +import { + getKeypadPrintableSequence, + isKeypadEnter, +} from '../../utils/keypad-keys' + /** * Tests for tab character cursor rendering in MultilineInput component. * @@ -13,23 +18,23 @@ import { describe, test, expect } from 'bun:test' /** * Check if a key event represents printable character input (not a special key). * This mirrors the function in multiline-input.tsx for testing. - * + * * Uses a positive heuristic based on key.name length rather than a brittle deny-list. * Special keys have descriptive multi-character names (like 'backspace', 'up', 'f1') * while regular printable characters either have no name or a single-character name. */ function isPrintableCharacterKey(key: { name?: string }): boolean { const name = key.name - + // No name = likely multi-byte input (Chinese, Japanese, Korean, etc.) if (!name) return true - + // Single character name = regular ASCII printable (a, b, 1, $, etc.) if (name.length === 1) return true - + // Special case: space key has name 'space' but is printable if (name === 'space') return true - + // Multi-char name = special key (up, f1, backspace, etc.) return false } @@ -256,27 +261,42 @@ describe('MultilineInput - Chinese/IME character input', () => { meta?: boolean option?: boolean }): boolean { + return getPrintableKeySequence(key) !== null + } + + function getPrintableKeySequence(key: { + sequence?: string + name?: string + ctrl?: boolean + meta?: boolean + option?: boolean + }): string | null { // Must have a sequence with at least one character if (!key.sequence || key.sequence.length < 1) { - return false + return null } // No modifier keys allowed if (key.ctrl || key.meta || key.option) { - return false + return null + } + + const keypadValue = getKeypadPrintableSequence(key) + if (keypadValue !== null) { + return keypadValue } // Must not be a control character if (CONTROL_CHAR_REGEX.test(key.sequence)) { - return false + return null } // Must be a printable character key (not a special key like arrows, function keys, etc.) if (!isPrintableCharacterKey(key)) { - return false + return null } - return true + return key.sequence } test('accepts single Chinese character (你)', () => { @@ -387,6 +407,42 @@ describe('MultilineInput - Chinese/IME character input', () => { expect(shouldAcceptCharacterInput(key)).toBe(true) }) + test('accepts Kitty keyboard numpad digit names', () => { + const key = { + sequence: '\x1b[57400u', + name: 'kp1', + ctrl: false, + meta: false, + option: false, + } + + expect(getPrintableKeySequence(key)).toBe('1') + }) + + test('accepts raw application keypad digit sequences', () => { + const key = { + sequence: '\x1bOq', + name: '', + ctrl: false, + meta: false, + option: false, + } + + expect(getPrintableKeySequence(key)).toBe('1') + }) + + test('accepts raw application keypad operator sequences', () => { + const key = { + sequence: '\x1bOk', + name: '', + ctrl: false, + meta: false, + option: false, + } + + expect(getPrintableKeySequence(key)).toBe('+') + }) + test('rejects arrow key (up)', () => { const key = { sequence: '\x1b[A', @@ -625,7 +681,9 @@ describe('MultilineInput - newline keyboard shortcuts', () => { hasBackslashBeforeCursor: boolean = false, ): 'newline' | 'submit' | 'ignore' { const lowerKeyName = (key.name ?? '').toLowerCase() - const isEnterKey = key.name === 'return' || key.name === 'enter' + const keypadEnter = isKeypadEnter(key) + const isEnterKey = + key.name === 'return' || key.name === 'enter' || keypadEnter // Ctrl+J is translated by the terminal to a linefeed character (0x0a) // So we detect it by checking for name === 'linefeed' rather than ctrl + j const isCtrlJ = @@ -651,13 +709,13 @@ describe('MultilineInput - newline keyboard shortcuts', () => { !key.meta && !key.option && !isAltLikeModifier && - !hasEscapePrefix && - key.sequence === '\r' && + (!hasEscapePrefix || keypadEnter) && + (key.sequence === '\r' || keypadEnter) && !hasBackslashBeforeCursor const isShiftEnter = isEnterKey && (Boolean(key.shift) || key.sequence === '\n') const isOptionEnter = - isEnterKey && (isAltLikeModifier || hasEscapePrefix) + isEnterKey && !keypadEnter && (isAltLikeModifier || hasEscapePrefix) const isBackslashEnter = isEnterKey && hasBackslashBeforeCursor const shouldInsertNewline = @@ -900,6 +958,32 @@ describe('MultilineInput - newline keyboard shortcuts', () => { expect(getEnterKeyAction(key, false)).toBe('submit') }) + test('keypad Enter submits with Kitty keyboard key name', () => { + const key = { + name: 'kpenter', + sequence: '\x1b[57414u', + ctrl: false, + meta: false, + shift: false, + option: false, + } + + expect(getEnterKeyAction(key, false)).toBe('submit') + }) + + test('keypad Enter submits with raw application keypad sequence', () => { + const key = { + name: '', + sequence: '\x1bOM', + ctrl: false, + meta: false, + shift: false, + option: false, + } + + expect(getEnterKeyAction(key, false)).toBe('submit') + }) + // --- Non-Enter key tests --- test('Regular J key (no ctrl) is ignored', () => { diff --git a/cli/src/components/ask-user/index.tsx b/cli/src/components/ask-user/index.tsx index b56b5cccd2..3743a55533 100644 --- a/cli/src/components/ask-user/index.tsx +++ b/cli/src/components/ask-user/index.tsx @@ -16,6 +16,7 @@ import { import { getOptionLabel, KEYBOARD_HINTS, CUSTOM_OPTION_INDEX } from './constants' import { useTheme } from '../../hooks/use-theme' import { useChatStore } from '../../state/chat-store' +import { isPlainEnterKey } from '../../utils/terminal-enter-detection' import { BORDER_CHARS } from '../../utils/ui-constants' import { Button } from '../button' @@ -338,7 +339,7 @@ export const MultipleChoiceForm: React.FC = ({ } return } - if (key.name === 'return' || key.name === 'enter' || key.name === 'space') { + if (isPlainEnterKey(key) || key.name === 'space') { preventDefault() handleSubmit() return @@ -442,7 +443,7 @@ export const MultipleChoiceForm: React.FC = ({ return } - if (key.name === 'return' || key.name === 'enter' || key.name === 'space') { + if (isPlainEnterKey(key) || key.name === 'space') { preventDefault() if (expandedIndex === null) { diff --git a/cli/src/components/chat-history-screen.tsx b/cli/src/components/chat-history-screen.tsx index 01f3e03322..bf9c72ee51 100644 --- a/cli/src/components/chat-history-screen.tsx +++ b/cli/src/components/chat-history-screen.tsx @@ -12,6 +12,7 @@ import { formatRelativeTime, getAllChats, } from '../utils/chat-history' +import { isPlainEnterKey } from '../utils/terminal-enter-detection' import type { SelectableListItem } from './selectable-list' @@ -170,7 +171,14 @@ export const ChatHistoryScreen: React.FC = ({ // Handle keyboard input const handleKeyIntercept = useCallback( - (key: { name?: string; shift?: boolean; ctrl?: boolean }) => { + (key: { + name?: string + sequence?: string + shift?: boolean + ctrl?: boolean + meta?: boolean + option?: boolean + }) => { if (key.name === 'escape') { if (searchQuery.length > 0) { setSearchQuery('') @@ -189,7 +197,7 @@ export const ChatHistoryScreen: React.FC = ({ setFocusedIndex((prev) => Math.min(maxIndex, prev + 1)) return true } - if (key.name === 'return' || key.name === 'enter') { + if (isPlainEnterKey(key)) { const focused = filteredItems[focusedIndex] if (focused) { onSelectChat(focused.id) diff --git a/cli/src/components/chat-input-bar.tsx b/cli/src/components/chat-input-bar.tsx index cee0a296eb..a95b8cbfb4 100644 --- a/cli/src/components/chat-input-bar.tsx +++ b/cli/src/components/chat-input-bar.tsx @@ -11,8 +11,8 @@ import { SuggestionMenu, type SuggestionItem } from './suggestion-menu' import { useAskUserBridge } from '../hooks/use-ask-user-bridge' import { useEvent } from '../hooks/use-event' import { useChatStore } from '../state/chat-store' +import { shouldInterceptChatInputKey } from '../utils/chat-input-key-intercept' import { getInputModeConfig } from '../utils/input-modes' -import { isLinefeedActingAsEnter } from '../utils/terminal-enter-detection' import { BORDER_CHARS } from '../utils/ui-constants' import type { useTheme } from '../hooks/use-theme' @@ -133,38 +133,13 @@ export const ChatInputBar = ({ meta?: boolean option?: boolean }) => { - const isPlainEnter = - (key.name === 'return' || key.name === 'enter' || - (key.name === 'linefeed' && isLinefeedActingAsEnter())) && - !key.shift && - !key.ctrl && - !key.meta && - !key.option - const isTab = key.name === 'tab' && !key.ctrl && !key.meta && !key.option - const isUp = key.name === 'up' && !key.ctrl && !key.meta && !key.option - const isDown = key.name === 'down' && !key.ctrl && !key.meta && !key.option - const isUpDown = isUp || isDown - - const hasSuggestions = hasSlashSuggestions || hasMentionSuggestions - if (hasSuggestions) { - if (isUpDown && lastEditDueToNav) { - return true - } - if (isPlainEnter || isTab || isUpDown) { - return true - } - } - - const historyUpEnabled = lastEditDueToNav || cursorPosition === 0 - const historyDownEnabled = lastEditDueToNav || cursorPosition === inputValue.length - if (isUp && historyUpEnabled) { - return true - } - if (isDown && historyDownEnabled) { - return true - } - - return false + return shouldInterceptChatInputKey(key, { + hasSlashSuggestions, + hasMentionSuggestions, + lastEditDueToNav, + cursorPosition, + inputLength: inputValue.length, + }) }, ) diff --git a/cli/src/components/feedback-input-mode.tsx b/cli/src/components/feedback-input-mode.tsx index 48b709589f..0d47bdd6dc 100644 --- a/cli/src/components/feedback-input-mode.tsx +++ b/cli/src/components/feedback-input-mode.tsx @@ -8,6 +8,7 @@ import { useTheme } from '../hooks/use-theme' import { useChatStore } from '../state/chat-store' import { IS_FREEBUFF } from '../utils/constants' import { createTextPasteHandler } from '../utils/strings' +import { isPlainEnterKey } from '../utils/terminal-enter-detection' import { BORDER_CHARS } from '../utils/ui-constants' import type { FeedbackCategory } from '@codebuff/common/constants/feedback' @@ -120,8 +121,7 @@ const FeedbackTextSection: React.FC = ({ }} onSubmit={onSubmit} onKeyIntercept={(key) => { - const isEnter = key.name === 'return' || key.name === 'enter' - if (!isEnter) return false + if (!isPlainEnterKey(key)) return false // Just add newline on Enter const newText = value.slice(0, cursor) + '\n' + value.slice(cursor) onChange(newText) diff --git a/cli/src/components/multiline-input.tsx b/cli/src/components/multiline-input.tsx index f6f40b31db..a58fc8c5cb 100644 --- a/cli/src/components/multiline-input.tsx +++ b/cli/src/components/multiline-input.tsx @@ -16,8 +16,15 @@ import { import { InputCursor } from './input-cursor' import { useTheme } from '../hooks/use-theme' import { useChatStore } from '../state/chat-store' +import { + getKeypadPrintableSequence, + isKeypadEnter, +} from '../utils/keypad-keys' import { clamp } from '../utils/math' -import { isLinefeedActingAsEnter, markReturnKeySeen } from '../utils/terminal-enter-detection' +import { + isLinefeedActingAsEnter, + markReturnKeySeenForKey, +} from '../utils/terminal-enter-detection' import { supportsTruecolor } from '../utils/theme-system' import { calculateNewCursorPosition } from '../utils/word-wrap-utils' @@ -91,27 +98,41 @@ const TAB_WIDTH = 4 /** * Check if a key event represents printable character input (not a special key). * Uses a positive heuristic based on key.name length rather than a brittle deny-list. - * + * * The key insight is that OpenTUI's parser assigns descriptive multi-character names * to special keys (like 'backspace', 'up', 'f1') while regular printable characters * either have no name (multi-byte input like Chinese) or a single-character name. */ function isPrintableCharacterKey(key: KeyEvent): boolean { const name = key.name - + // No name = likely multi-byte input (Chinese, Japanese, Korean, etc.) - treat as printable if (!name) return true - + // Single character name = regular ASCII printable (a, b, 1, $, etc.) if (name.length === 1) return true - + // Special case: space key has name 'space' but is printable if (name === 'space') return true - + // Multi-char name = special key (up, f1, backspace, etc.) return false } +function getPrintableKeySequence(key: KeyEvent): string | null { + if (!key.sequence || key.sequence.length < 1) return null + if (key.ctrl || key.meta || key.option) return null + + const keypadValue = getKeypadPrintableSequence(key) + if (keypadValue !== null) return keypadValue + + if (!CONTROL_CHAR_REGEX.test(key.sequence) && isPrintableCharacterKey(key)) { + return key.sequence + } + + return null +} + // Helper to convert render position (in tab-expanded string) to original text position function renderPositionToOriginal(text: string, renderPos: number): number { let originalPos = 0 @@ -532,11 +553,11 @@ export const MultilineInput = forwardRef< const handleEnterKeys = useCallback( (key: KeyEvent): boolean => { const lowerKeyName = (key.name ?? '').toLowerCase() - const isReturnOrEnter = key.name === 'return' || key.name === 'enter' + const keypadEnter = isKeypadEnter(key) + const isReturnOrEnter = + key.name === 'return' || key.name === 'enter' || keypadEnter - if (isReturnOrEnter) { - markReturnKeySeen() - } + markReturnKeySeenForKey(key) const linefeedIsEnter = lowerKeyName === 'linefeed' && isLinefeedActingAsEnter() const isEnterKey = isReturnOrEnter || linefeedIsEnter @@ -567,12 +588,12 @@ export const MultilineInput = forwardRef< !key.meta && !key.option && !isAltLikeModifier && - !hasEscapePrefix && - (key.sequence === '\r' || key.sequence === '\n') && + (!hasEscapePrefix || keypadEnter) && + (key.sequence === '\r' || key.sequence === '\n' || keypadEnter) && !hasBackslashBeforeCursor const isShiftEnter = isEnterKey && Boolean(key.shift) const isOptionEnter = - isEnterKey && (isAltLikeModifier || hasEscapePrefix) + isEnterKey && !keypadEnter && (isAltLikeModifier || hasEscapePrefix) const isBackslashEnter = isEnterKey && hasBackslashBeforeCursor const shouldInsertNewline = @@ -1003,18 +1024,10 @@ export const MultilineInput = forwardRef< } // Character input (including multi-byte characters from IME like Chinese, Japanese, Korean) - // Check for printable input: has a sequence, no modifier keys, and not a control character - if ( - key.sequence && - key.sequence.length >= 1 && - !key.ctrl && - !key.meta && - !key.option && - !CONTROL_CHAR_REGEX.test(key.sequence) && - isPrintableCharacterKey(key) - ) { + const textToInsert = getPrintableKeySequence(key) + if (textToInsert !== null) { preventKeyDefault(key) - insertTextAtCursor(key.sequence) + insertTextAtCursor(textToInsert) return true } diff --git a/cli/src/components/project-picker-screen.tsx b/cli/src/components/project-picker-screen.tsx index 71fdb1cc1b..10db83a0ab 100644 --- a/cli/src/components/project-picker-screen.tsx +++ b/cli/src/components/project-picker-screen.tsx @@ -15,6 +15,7 @@ import { useTerminalLayout } from '../hooks/use-terminal-layout' import { useTheme } from '../hooks/use-theme' import { formatCwd } from '../utils/path-helpers' import { loadRecentProjects } from '../utils/recent-projects' +import { isPlainEnterKey } from '../utils/terminal-enter-detection' import { getLogoBlockColor, getLogoAccentColor } from '../utils/theme-system' import type { SelectableListItem } from './selectable-list' @@ -226,7 +227,14 @@ export const ProjectPickerScreen: React.FC = ({ // Handle search input keyboard intercept const handleSearchKeyIntercept = useCallback( - (key: { name?: string; shift?: boolean; ctrl?: boolean }) => { + (key: { + name?: string + sequence?: string + shift?: boolean + ctrl?: boolean + meta?: boolean + option?: boolean + }) => { if (key.name === 'escape') { if (searchQuery.length > 0) { setSearchQuery('') @@ -246,7 +254,7 @@ export const ProjectPickerScreen: React.FC = ({ ) return true } - if (key.name === 'return' || key.name === 'enter') { + if (isPlainEnterKey(key)) { // If search looks like a path, try to navigate there directly if (searchQuery.startsWith('/') || searchQuery.startsWith('~')) { if (tryNavigateToPath(searchQuery)) { diff --git a/cli/src/components/publish-container.tsx b/cli/src/components/publish-container.tsx index 729b5b14e7..73c2af5290 100644 --- a/cli/src/components/publish-container.tsx +++ b/cli/src/components/publish-container.tsx @@ -15,6 +15,7 @@ import { useTheme } from '../hooks/use-theme' import { useChatStore } from '../state/chat-store' import { usePublishStore } from '../state/publish-store' import { loadLocalAgents, loadAgentDefinitions } from '../utils/local-agent-registry' +import { isPlainEnterKey } from '../utils/terminal-enter-detection' import { BORDER_CHARS } from '../utils/ui-constants' @@ -110,7 +111,14 @@ export const PublishContainer: React.FC = ({ // Handle keyboard navigation in checklist const handleSearchKeyIntercept = useCallback( - (key: { name?: string; shift?: boolean }) => { + (key: { + name?: string + sequence?: string + shift?: boolean + ctrl?: boolean + meta?: boolean + option?: boolean + }) => { if (key.name === 'escape') { // Escape: clear input if there is any, otherwise exit publish mode if (searchQuery.length > 0) { @@ -129,7 +137,7 @@ export const PublishContainer: React.FC = ({ setFocusedIndex(Math.min(filteredAgents.length - 1, focusedIndex + 1)) return true } - if (key.name === 'return' || key.name === 'enter') { + if (isPlainEnterKey(key)) { // Enter: toggle selection const agent = filteredAgents[focusedIndex] if (agent) { diff --git a/cli/src/components/review-screen.tsx b/cli/src/components/review-screen.tsx index 98d8f7d160..d3a557871a 100644 --- a/cli/src/components/review-screen.tsx +++ b/cli/src/components/review-screen.tsx @@ -3,6 +3,7 @@ import React, { useCallback, useState } from 'react' import { buildReviewPrompt, REVIEW_BASE_PROMPT } from '../commands/prompt-builders' import { useTheme } from '../hooks/use-theme' +import { isPlainEnterKey } from '../utils/terminal-enter-detection' import { BORDER_CHARS } from '../utils/ui-constants' import type { KeyEvent } from '@opentui/core' @@ -61,7 +62,7 @@ export const ReviewScreen: React.FC = ({ setSelectedIndex((prev) => Math.min(REVIEW_OPTIONS.length - 1, prev + 1)) return } - if (key.name === 'return' || key.name === 'enter') { + if (isPlainEnterKey(key)) { const option = REVIEW_OPTIONS[selectedIndex] if (option) { handleSelect(option) diff --git a/cli/src/components/session-ended-banner.tsx b/cli/src/components/session-ended-banner.tsx index b99ac28536..78dd623f79 100644 --- a/cli/src/components/session-ended-banner.tsx +++ b/cli/src/components/session-ended-banner.tsx @@ -11,6 +11,7 @@ import { import { useTheme } from '../hooks/use-theme' import { useFreebuffSessionStore } from '../state/freebuff-session-store' import { formatSessionUnits } from '../utils/format-session-units' +import { isPlainEnterKey } from '../utils/terminal-enter-detection' import { BORDER_CHARS } from '../utils/ui-constants' import type { KeyEvent } from '@opentui/core' @@ -89,7 +90,7 @@ export const SessionEndedBanner: React.FC = ({ useCallback( (key: KeyEvent) => { if (!canRestart) return - if (key.name === 'return' || key.name === 'enter') { + if (isPlainEnterKey(key)) { key.preventDefault?.() startSameChatSession() return diff --git a/cli/src/hooks/use-chat-keyboard.ts b/cli/src/hooks/use-chat-keyboard.ts index a2cc87daf9..1ad09d1772 100644 --- a/cli/src/hooks/use-chat-keyboard.ts +++ b/cli/src/hooks/use-chat-keyboard.ts @@ -12,7 +12,7 @@ import { type ChatKeyboardState, type ChatKeyboardAction, } from '../utils/keyboard-actions' -import { markReturnKeySeen } from '../utils/terminal-enter-detection' +import { markReturnKeySeenForKey } from '../utils/terminal-enter-detection' import type { KeyEvent } from '@opentui/core' @@ -305,9 +305,7 @@ export function useChatKeyboard({ reportActivity() } - if (key.name === 'return' || key.name === 'enter') { - markReturnKeySeen() - } + markReturnKeySeenForKey(key) const action = resolveChatKeyboardAction(key, state) const handled = dispatchAction(action, handlers) diff --git a/cli/src/hooks/use-login-keyboard-handlers.ts b/cli/src/hooks/use-login-keyboard-handlers.ts index 5d7d9cded9..16e74d73a2 100644 --- a/cli/src/hooks/use-login-keyboard-handlers.ts +++ b/cli/src/hooks/use-login-keyboard-handlers.ts @@ -1,6 +1,8 @@ import { useKeyboard } from '@opentui/react' import { useCallback } from 'react' +import { isPlainEnterKey } from '../utils/terminal-enter-detection' + import type { KeyEvent } from '@opentui/core' interface UseLoginKeyboardHandlersParams { @@ -27,11 +29,7 @@ export function useLoginKeyboardHandlers({ useKeyboard( useCallback( (key: KeyEvent) => { - const isEnter = - (key.name === 'return' || key.name === 'enter') && - !key.ctrl && - !key.meta && - !key.shift + const isEnter = isPlainEnterKey(key) const isCKey = key.name === 'c' && !key.ctrl && !key.meta && !key.shift const isCtrlC = key.ctrl && key.name === 'c' diff --git a/cli/src/utils/__tests__/chat-input-key-intercept.test.ts b/cli/src/utils/__tests__/chat-input-key-intercept.test.ts new file mode 100644 index 0000000000..acced0445a --- /dev/null +++ b/cli/src/utils/__tests__/chat-input-key-intercept.test.ts @@ -0,0 +1,40 @@ +import { describe, expect, test } from 'bun:test' + +import { shouldInterceptChatInputKey } from '../chat-input-key-intercept' + +const baseState = { + hasSlashSuggestions: false, + hasMentionSuggestions: false, + lastEditDueToNav: false, + cursorPosition: 1, + inputLength: 3, +} + +describe('shouldInterceptChatInputKey', () => { + test('intercepts keypad Enter while slash suggestions are visible', () => { + expect( + shouldInterceptChatInputKey( + { name: 'kpenter', sequence: '\x1b[57414u' }, + { ...baseState, hasSlashSuggestions: true }, + ), + ).toBe(true) + }) + + test('intercepts raw application keypad Enter while mention suggestions are visible', () => { + expect( + shouldInterceptChatInputKey( + { sequence: '\x1bOM' }, + { ...baseState, hasMentionSuggestions: true }, + ), + ).toBe(true) + }) + + test('does not intercept keypad Enter without visible suggestions', () => { + expect( + shouldInterceptChatInputKey( + { name: 'kpenter', sequence: '\x1b[57414u' }, + baseState, + ), + ).toBe(false) + }) +}) diff --git a/cli/src/utils/__tests__/keyboard-actions.test.ts b/cli/src/utils/__tests__/keyboard-actions.test.ts index c518b47ea7..59fd46f55f 100644 --- a/cli/src/utils/__tests__/keyboard-actions.test.ts +++ b/cli/src/utils/__tests__/keyboard-actions.test.ts @@ -26,6 +26,8 @@ const downKey = createKey({ name: 'down' }) const tabKey = createKey({ name: 'tab' }) const shiftTabKey = createKey({ name: 'tab', shift: true }) const enterKey = createKey({ name: 'return' }) +const keypadEnterKey = createKey({ name: 'kpenter', sequence: '\x1b[57414u' }) +const rawApplicationKeypadEnterKey = createKey({ sequence: '\x1bOM' }) const backspaceKey = createKey({ name: 'backspace' }) const defaultState = createDefaultChatKeyboardState() @@ -533,6 +535,44 @@ describe('resolveChatKeyboardAction', () => { }) }) + test('keypad enter without active menu does nothing', () => { + expect(resolveChatKeyboardAction(keypadEnterKey, defaultState)).toEqual({ + type: 'none', + }) + }) + + test('raw application keypad enter without active menu does nothing', () => { + expect( + resolveChatKeyboardAction(rawApplicationKeypadEnterKey, defaultState), + ).toEqual({ + type: 'none', + }) + }) + + test('keypad enter selects an active slash menu item', () => { + const state: ChatKeyboardState = { + ...defaultState, + slashMenuActive: true, + slashMatchesLength: 3, + } + expect(resolveChatKeyboardAction(keypadEnterKey, state)).toEqual({ + type: 'slash-menu-select', + }) + }) + + test('raw application keypad enter selects an active slash menu item', () => { + const state: ChatKeyboardState = { + ...defaultState, + slashMenuActive: true, + slashMatchesLength: 3, + } + expect( + resolveChatKeyboardAction(rawApplicationKeypadEnterKey, state), + ).toEqual({ + type: 'slash-menu-select', + }) + }) + test('shift+enter does nothing even in menu', () => { const shiftEnter = createKey({ name: 'return', shift: true }) const state: ChatKeyboardState = { diff --git a/cli/src/utils/__tests__/terminal-enter-detection.test.ts b/cli/src/utils/__tests__/terminal-enter-detection.test.ts new file mode 100644 index 0000000000..29e07bb2c1 --- /dev/null +++ b/cli/src/utils/__tests__/terminal-enter-detection.test.ts @@ -0,0 +1,77 @@ +import { afterEach, beforeEach, describe, expect, test } from 'bun:test' + +import { + isLinefeedActingAsEnter, + isPlainEnterKey, + markReturnKeySeenForKey, + resetReturnKeySeenForTests, + shouldMarkReturnKeySeen, +} from '../terminal-enter-detection' + +describe('terminal enter detection', () => { + beforeEach(() => { + resetReturnKeySeenForTests(false) + }) + + afterEach(() => { + resetReturnKeySeenForTests() + }) + + test('marks real carriage-return Enter as return seen', () => { + expect(shouldMarkReturnKeySeen({ name: 'return', sequence: '\r' })).toBe( + true, + ) + + markReturnKeySeenForKey({ name: 'return', sequence: '\r' }) + + expect(isLinefeedActingAsEnter()).toBe(false) + }) + + test('marks Kitty CSI-u Return as return seen', () => { + expect( + shouldMarkReturnKeySeen({ name: 'return', sequence: '\x1b[13u' }), + ).toBe(true) + + markReturnKeySeenForKey({ name: 'return', sequence: '\x1b[13u' }) + + expect(isLinefeedActingAsEnter()).toBe(false) + }) + + test('does not mark keypad Enter escape sequences as return seen', () => { + expect( + shouldMarkReturnKeySeen({ name: 'kpenter', sequence: '\x1b[57414u' }), + ).toBe(false) + expect(shouldMarkReturnKeySeen({ name: '', sequence: '\x1bOM' })).toBe( + false, + ) + + markReturnKeySeenForKey({ name: 'kpenter', sequence: '\x1b[57414u' }) + markReturnKeySeenForKey({ name: '', sequence: '\x1bOM' }) + + expect(isLinefeedActingAsEnter()).toBe(true) + }) + + test('recognizes keypad Enter as plain Enter', () => { + expect( + isPlainEnterKey({ name: 'kpenter', sequence: '\x1b[57414u' }), + ).toBe(true) + expect(isPlainEnterKey({ name: '', sequence: '\x1bOM' })).toBe(true) + }) + + test('plain Enter detection records return before later linefeed checks', () => { + expect(isLinefeedActingAsEnter()).toBe(true) + expect(isPlainEnterKey({ name: 'return', sequence: '\r' })).toBe(true) + expect(isLinefeedActingAsEnter()).toBe(false) + expect(isPlainEnterKey({ name: 'linefeed', sequence: '\n' })).toBe(false) + }) + + test('does not recognize modified keypad Enter as plain Enter', () => { + expect( + isPlainEnterKey({ + name: 'kpenter', + sequence: '\x1b[57414u', + shift: true, + }), + ).toBe(false) + }) +}) diff --git a/cli/src/utils/chat-input-key-intercept.ts b/cli/src/utils/chat-input-key-intercept.ts new file mode 100644 index 0000000000..d0053946b2 --- /dev/null +++ b/cli/src/utils/chat-input-key-intercept.ts @@ -0,0 +1,52 @@ +import { isPlainEnterKey } from './terminal-enter-detection' + +type ChatInputKey = { + name?: string + sequence?: string + shift?: boolean + ctrl?: boolean + meta?: boolean + option?: boolean +} + +type ChatInputKeyInterceptState = { + hasSlashSuggestions: boolean + hasMentionSuggestions: boolean + lastEditDueToNav: boolean + cursorPosition: number + inputLength: number +} + +export function shouldInterceptChatInputKey( + key: ChatInputKey, + state: ChatInputKeyInterceptState, +): boolean { + const isPlainEnter = isPlainEnterKey(key) + const isTab = key.name === 'tab' && !key.ctrl && !key.meta && !key.option + const isUp = key.name === 'up' && !key.ctrl && !key.meta && !key.option + const isDown = key.name === 'down' && !key.ctrl && !key.meta && !key.option + const isUpDown = isUp || isDown + + const hasSuggestions = + state.hasSlashSuggestions || state.hasMentionSuggestions + if (hasSuggestions) { + if (isUpDown && state.lastEditDueToNav) { + return true + } + if (isPlainEnter || isTab || isUpDown) { + return true + } + } + + const historyUpEnabled = state.lastEditDueToNav || state.cursorPosition === 0 + const historyDownEnabled = + state.lastEditDueToNav || state.cursorPosition === state.inputLength + if (isUp && historyUpEnabled) { + return true + } + if (isDown && historyDownEnabled) { + return true + } + + return false +} diff --git a/cli/src/utils/keyboard-actions.ts b/cli/src/utils/keyboard-actions.ts index 8a11ba782c..39de9dda5b 100644 --- a/cli/src/utils/keyboard-actions.ts +++ b/cli/src/utils/keyboard-actions.ts @@ -1,5 +1,5 @@ import { getInputModeConfig, type InputMode } from './input-modes' -import { isLinefeedActingAsEnter } from './terminal-enter-detection' +import { isPlainEnterKey } from './terminal-enter-detection' import type { KeyEvent } from '@opentui/core' @@ -131,11 +131,7 @@ export function resolveChatKeyboardAction( const isTab = key.name === 'tab' && !hasModifier(key) const isShiftTab = key.name === 'tab' && key.shift && !key.ctrl && !key.meta && !key.option - const isEnter = - (key.name === 'return' || key.name === 'enter' || - (key.name === 'linefeed' && isLinefeedActingAsEnter())) && - !key.shift && - !hasModifier(key) + const isEnter = isPlainEnterKey(key) const isPageUp = key.name === 'pageup' && !hasModifier(key) const isPageDown = key.name === 'pagedown' && !hasModifier(key) diff --git a/cli/src/utils/keypad-keys.ts b/cli/src/utils/keypad-keys.ts new file mode 100644 index 0000000000..966e176972 --- /dev/null +++ b/cli/src/utils/keypad-keys.ts @@ -0,0 +1,47 @@ +type KeypadKey = { + name?: string + sequence?: string +} + +const APPLICATION_KEYPAD_DIGITS = 'pqrstuvwxy' +const KEYPAD_OPERATOR_NAMES: Record = { + kpdecimal: '.', + kpdivide: '/', + kpmultiply: '*', + kpminus: '-', + kpplus: '+', + kpequal: '=', + kpseparator: ',', +} + +const APPLICATION_KEYPAD_OPERATORS: Record = { + n: '.', + o: '/', + j: '*', + m: '-', + k: '+', + X: '=', + l: ',', +} + +export function isKeypadEnter(key: KeypadKey): boolean { + return key.name === 'kpenter' || key.sequence === '\x1bOM' +} + +export function getKeypadPrintableSequence(key: KeypadKey): string | null { + const kittyDigit = /^kp([0-9])$/.exec(key.name ?? '')?.[1] + if (kittyDigit !== undefined) return kittyDigit + + const kittyOperator = key.name ? KEYPAD_OPERATOR_NAMES[key.name] : undefined + if (kittyOperator !== undefined) return kittyOperator + + if (!key.sequence?.startsWith('\x1bO') || key.sequence.length !== 3) { + return null + } + + const applicationKey = key.sequence[2] ?? '' + const applicationDigit = APPLICATION_KEYPAD_DIGITS.indexOf(applicationKey) + if (applicationDigit >= 0) return String(applicationDigit) + + return APPLICATION_KEYPAD_OPERATORS[applicationKey] ?? null +} diff --git a/cli/src/utils/terminal-enter-detection.ts b/cli/src/utils/terminal-enter-detection.ts index d2f7d0a7aa..3b94bd3beb 100644 --- a/cli/src/utils/terminal-enter-detection.ts +++ b/cli/src/utils/terminal-enter-detection.ts @@ -1,3 +1,5 @@ +import { isKeypadEnter } from './keypad-keys' + /** * Most terminals send \r for Enter and \n for Ctrl+J. A few niche Linux * terminal emulators send \n for Enter instead, making the two @@ -5,13 +7,57 @@ * ever seen a \r ("return") key event. On macOS, Enter always sends \r. */ -let hasSeenReturnKey = process.platform === 'darwin' +type EnterDetectionKey = { + name?: string + sequence?: string + shift?: boolean + ctrl?: boolean + meta?: boolean + option?: boolean +} + +const defaultHasSeenReturnKey = process.platform === 'darwin' + +let hasSeenReturnKey = defaultHasSeenReturnKey + +export function shouldMarkReturnKeySeen(key: EnterDetectionKey): boolean { + return (key.name === 'return' || key.name === 'enter') && !isKeypadEnter(key) +} + +export function isPlainEnterKey(key: EnterDetectionKey): boolean { + // Some local interceptors consume Enter before the global keyboard hooks see + // it, so record non-keypad Return here before consulting the linefeed fallback. + markReturnKeySeenForKey(key) + + return ( + (key.name === 'return' || + key.name === 'enter' || + isKeypadEnter(key) || + (key.name === 'linefeed' && isLinefeedActingAsEnter())) && + !key.shift && + !key.ctrl && + !key.meta && + !key.option + ) +} export function markReturnKeySeen(): void { hasSeenReturnKey = true } +export function markReturnKeySeenForKey(key: EnterDetectionKey): void { + if (shouldMarkReturnKeySeen(key)) { + markReturnKeySeen() + } +} + /** True when a "linefeed" (\n) key event should be treated as Enter. */ export function isLinefeedActingAsEnter(): boolean { return !hasSeenReturnKey } + +export function resetReturnKeySeenForTests( + hasSeenReturn: boolean = defaultHasSeenReturnKey, +): void { + hasSeenReturnKey = hasSeenReturn +} From a2884a2bd1bc3b61d95d33baa16b2075169bc20d Mon Sep 17 00:00:00 2001 From: James Grugett Date: Sat, 23 May 2026 14:29:53 -0700 Subject: [PATCH 744/749] Split public LLM provider code from internal (#734) --- bun.lock | 13 +- common/src/testing/mocks/database.ts | 2 +- common/src/testing/setup.ts | 2 +- docs/architecture.md | 14 +- package.json | 2 +- .../src/__tests__/loop-agent-steps.test.ts | 8 +- .../__tests__/run-agent-step-tools.test.ts | 8 +- .../agent-runtime/src/prompt-agent-stream.ts | 7 +- packages/internal/package.json | 7 - .../map-openai-compatible-finish-reason.ts | 19 - .../image/openai-compatible-image-settings.ts | 1 - ...onvert-to-openrouter-chat-messages.test.ts | 551 ------ .../convert-to-openrouter-chat-messages.ts | 223 --- .../openrouter-ai-sdk/chat/file-url-utils.ts | 34 - .../openrouter-ai-sdk/chat/get-tool-choice.ts | 39 - .../src/openrouter-ai-sdk/chat/index.test.ts | 1599 ----------------- .../src/openrouter-ai-sdk/chat/index.ts | 852 --------- .../src/openrouter-ai-sdk/chat/is-url.ts | 15 - .../src/openrouter-ai-sdk/chat/schemas.ts | 164 -- ...convert-to-openrouter-completion-prompt.ts | 151 -- .../completion/index.test.ts | 665 ------- .../src/openrouter-ai-sdk/completion/index.ts | 344 ---- .../openrouter-ai-sdk/completion/schemas.ts | 50 - .../internal/src/openrouter-ai-sdk/facade.ts | 83 - .../internal/src/openrouter-ai-sdk/index.ts | 3 - .../src/openrouter-ai-sdk/internal/index.ts | 5 - .../src/openrouter-ai-sdk/provider.ts | 180 -- .../schemas/error-response.test.ts | 51 - .../schemas/error-response.ts | 18 - .../schemas/reasoning-details.ts | 48 - .../tests/provider-options.test.ts | 223 --- .../tests/stream-usage-accounting.test.ts | 219 --- .../tests/usage-accounting.test.ts | 183 -- .../src/openrouter-ai-sdk/types/index.ts | 70 - .../openrouter-chat-completions-input.ts | 78 - .../types/openrouter-chat-settings.ts | 133 -- .../types/openrouter-completion-settings.ts | 39 - packages/llm-providers/package.json | 35 + ...to-openai-compatible-chat-messages.test.ts | 0 ...vert-to-openai-compatible-chat-messages.ts | 0 .../chat/get-response-metadata.ts | 8 +- .../map-openai-compatible-finish-reason.ts} | 2 +- .../chat/openai-compatible-api-types.ts | 54 +- .../openai-compatible-chat-language-model.ts | 319 ++-- .../chat/openai-compatible-chat-options.ts | 8 +- .../openai-compatible-metadata-extractor.ts | 14 +- .../chat/openai-compatible-prepare-tools.ts | 61 +- ...-to-openai-compatible-completion-prompt.ts | 55 +- .../completion/get-response-metadata.ts | 8 +- .../map-openai-compatible-finish-reason.ts | 12 +- ...ai-compatible-completion-language-model.ts | 173 +- .../openai-compatible-completion-options.ts | 8 +- .../openai-compatible-embedding-model.ts | 84 +- .../openai-compatible-embedding-options.ts | 8 +- .../image/openai-compatible-image-model.ts | 56 +- .../image/openai-compatible-image-settings.ts | 1 + .../src/openai-compatible/index.ts | 24 +- .../src/openai-compatible/internal/index.ts | 8 +- .../openai-compatible-error.ts | 20 +- .../openai-compatible-provider.ts | 98 +- .../src/openai-compatible/version.ts | 4 +- packages/llm-providers/tsconfig.json | 9 + sdk/scripts/build.ts | 6 +- sdk/src/impl/llm.ts | 55 +- sdk/src/impl/model-provider.ts | 22 +- sdk/tsconfig.json | 3 +- tsconfig.json | 2 + 67 files changed, 633 insertions(+), 6627 deletions(-) delete mode 100644 packages/internal/src/openai-compatible/completion/map-openai-compatible-finish-reason.ts delete mode 100644 packages/internal/src/openai-compatible/image/openai-compatible-image-settings.ts delete mode 100644 packages/internal/src/openrouter-ai-sdk/chat/convert-to-openrouter-chat-messages.test.ts delete mode 100644 packages/internal/src/openrouter-ai-sdk/chat/convert-to-openrouter-chat-messages.ts delete mode 100644 packages/internal/src/openrouter-ai-sdk/chat/file-url-utils.ts delete mode 100644 packages/internal/src/openrouter-ai-sdk/chat/get-tool-choice.ts delete mode 100644 packages/internal/src/openrouter-ai-sdk/chat/index.test.ts delete mode 100644 packages/internal/src/openrouter-ai-sdk/chat/index.ts delete mode 100644 packages/internal/src/openrouter-ai-sdk/chat/is-url.ts delete mode 100644 packages/internal/src/openrouter-ai-sdk/chat/schemas.ts delete mode 100644 packages/internal/src/openrouter-ai-sdk/completion/convert-to-openrouter-completion-prompt.ts delete mode 100644 packages/internal/src/openrouter-ai-sdk/completion/index.test.ts delete mode 100644 packages/internal/src/openrouter-ai-sdk/completion/index.ts delete mode 100644 packages/internal/src/openrouter-ai-sdk/completion/schemas.ts delete mode 100644 packages/internal/src/openrouter-ai-sdk/facade.ts delete mode 100644 packages/internal/src/openrouter-ai-sdk/index.ts delete mode 100644 packages/internal/src/openrouter-ai-sdk/internal/index.ts delete mode 100644 packages/internal/src/openrouter-ai-sdk/provider.ts delete mode 100644 packages/internal/src/openrouter-ai-sdk/schemas/error-response.test.ts delete mode 100644 packages/internal/src/openrouter-ai-sdk/schemas/error-response.ts delete mode 100644 packages/internal/src/openrouter-ai-sdk/schemas/reasoning-details.ts delete mode 100644 packages/internal/src/openrouter-ai-sdk/tests/provider-options.test.ts delete mode 100644 packages/internal/src/openrouter-ai-sdk/tests/stream-usage-accounting.test.ts delete mode 100644 packages/internal/src/openrouter-ai-sdk/tests/usage-accounting.test.ts delete mode 100644 packages/internal/src/openrouter-ai-sdk/types/index.ts delete mode 100644 packages/internal/src/openrouter-ai-sdk/types/openrouter-chat-completions-input.ts delete mode 100644 packages/internal/src/openrouter-ai-sdk/types/openrouter-chat-settings.ts delete mode 100644 packages/internal/src/openrouter-ai-sdk/types/openrouter-completion-settings.ts create mode 100644 packages/llm-providers/package.json rename packages/{internal => llm-providers}/src/openai-compatible/chat/convert-to-openai-compatible-chat-messages.test.ts (100%) rename packages/{internal => llm-providers}/src/openai-compatible/chat/convert-to-openai-compatible-chat-messages.ts (100%) rename packages/{internal => llm-providers}/src/openai-compatible/chat/get-response-metadata.ts (65%) rename packages/{internal/src/openrouter-ai-sdk/utils/map-finish-reason.ts => llm-providers/src/openai-compatible/chat/map-openai-compatible-finish-reason.ts} (89%) rename packages/{internal => llm-providers}/src/openai-compatible/chat/openai-compatible-api-types.ts (53%) rename packages/{internal => llm-providers}/src/openai-compatible/chat/openai-compatible-chat-language-model.ts (81%) rename packages/{internal => llm-providers}/src/openai-compatible/chat/openai-compatible-chat-options.ts (86%) rename packages/{internal => llm-providers}/src/openai-compatible/chat/openai-compatible-metadata-extractor.ts (89%) rename packages/{internal => llm-providers}/src/openai-compatible/chat/openai-compatible-prepare-tools.ts (61%) rename packages/{internal => llm-providers}/src/openai-compatible/completion/convert-to-openai-compatible-completion-prompt.ts (67%) rename packages/{internal => llm-providers}/src/openai-compatible/completion/get-response-metadata.ts (65%) rename packages/{internal/src/openai-compatible/chat => llm-providers/src/openai-compatible/completion}/map-openai-compatible-finish-reason.ts (72%) rename packages/{internal => llm-providers}/src/openai-compatible/completion/openai-compatible-completion-language-model.ts (76%) rename packages/{internal => llm-providers}/src/openai-compatible/completion/openai-compatible-completion-options.ts (90%) rename packages/{internal => llm-providers}/src/openai-compatible/embedding/openai-compatible-embedding-model.ts (66%) rename packages/{internal => llm-providers}/src/openai-compatible/embedding/openai-compatible-embedding-options.ts (85%) rename packages/{internal => llm-providers}/src/openai-compatible/image/openai-compatible-image-model.ts (73%) create mode 100644 packages/llm-providers/src/openai-compatible/image/openai-compatible-image-settings.ts rename packages/{internal => llm-providers}/src/openai-compatible/index.ts (64%) rename packages/{internal => llm-providers}/src/openai-compatible/internal/index.ts (69%) rename packages/{internal => llm-providers}/src/openai-compatible/openai-compatible-error.ts (72%) rename packages/{internal => llm-providers}/src/openai-compatible/openai-compatible-provider.ts (70%) rename packages/{internal => llm-providers}/src/openai-compatible/version.ts (57%) create mode 100644 packages/llm-providers/tsconfig.json diff --git a/bun.lock b/bun.lock index e575f4f9df..4f6021307f 100644 --- a/bun.lock +++ b/bun.lock @@ -212,7 +212,6 @@ "name": "@codebuff/internal", "version": "1.0.0", "dependencies": { - "@ai-sdk/provider-utils": "^3.0.17", "@codebuff/common": "workspace:*", "drizzle-kit": "0.31.8", "drizzle-orm": "0.45.1", @@ -222,6 +221,16 @@ "server-only": "0.0.1", }, }, + "packages/llm-providers": { + "name": "@codebuff/llm-providers", + "version": "1.0.0", + "dependencies": { + "@ai-sdk/provider": "2.0.1", + "@ai-sdk/provider-utils": "^3.0.17", + "ai": "^5.0.52", + "zod": "^4.2.1", + }, + }, "scripts": { "name": "@codebuff/scripts", "version": "1.0.0", @@ -499,6 +508,8 @@ "@codebuff/internal": ["@codebuff/internal@workspace:packages/internal"], + "@codebuff/llm-providers": ["@codebuff/llm-providers@workspace:packages/llm-providers"], + "@codebuff/scripts": ["@codebuff/scripts@workspace:scripts"], "@codebuff/sdk": ["@codebuff/sdk@workspace:sdk"], diff --git a/common/src/testing/mocks/database.ts b/common/src/testing/mocks/database.ts index c78353b2c8..3ad9c108ea 100644 --- a/common/src/testing/mocks/database.ts +++ b/common/src/testing/mocks/database.ts @@ -241,7 +241,7 @@ export interface DbSpies { * * @example * ```typescript - * import db from '@codebuff/internal/db' + * const db = createMockDbOperations() * * describe('my test', () => { * let dbSpies: DbSpies diff --git a/common/src/testing/setup.ts b/common/src/testing/setup.ts index 631178350c..5758fbb601 100644 --- a/common/src/testing/setup.ts +++ b/common/src/testing/setup.ts @@ -114,7 +114,7 @@ export interface TestSetupResult { * @example * ```typescript * import * as analytics from '@codebuff/common/analytics' - * import db from '@codebuff/internal/db' + * const db = createMockDbOperations() * * describe('my test', () => { * const setup = createTestSetup({ diff --git a/docs/architecture.md b/docs/architecture.md index 4c60d4ae22..a47a90657b 100644 --- a/docs/architecture.md +++ b/docs/architecture.md @@ -52,7 +52,7 @@ The public SDK used by the CLI and available to external users via `@codebuff/sd - **Executes tool calls locally** on the user's machine (file edits, terminal commands, code search) - Manages model provider selection: Claude OAuth, ChatGPT OAuth, or Codebuff backend - Handles credentials, retry logic, and error transformation -- **Depends on:** `agent-runtime`, `common`, `internal` (for OpenAI-compatible provider) +- **Depends on:** `agent-runtime`, `common`, `llm-providers` ### `packages/agent-runtime/` — Agent Execution Engine @@ -113,17 +113,23 @@ The Codebuff web server, marketing site, and API. ### `packages/internal/` — Internal Utilities -Server-side utilities, database schema, and vendor forks shared between `web` and `sdk`. +Server-side utilities, database schema, and service integrations shared by private server packages. - **Key areas:** - `src/db/` — Drizzle ORM schema (`schema.ts`), migrations, Docker Compose for local Postgres - `src/env.ts` — Server environment variable validation (@t3-oss/env-nextjs) - `src/loops/` — Loops email service integration (transactional emails) - - `src/openai-compatible/` — Forked OpenAI-compatible AI SDK provider (used by the SDK to call the Codebuff backend) - - `src/openrouter-ai-sdk/` — Forked OpenRouter AI SDK provider (used by the web server) - `src/templates/` — Agent template fetching and validation - **Depends on:** `common` +### `packages/llm-providers/` — Public LLM Provider Shims + +Provider adapters that are safe for public packages to depend on. + +- **Key areas:** + - `src/openai-compatible/` — Forked OpenAI-compatible AI SDK provider used by the SDK for the Codebuff backend and ChatGPT OAuth flows +- **Depends on:** AI SDK provider packages + ### `packages/billing/` — Billing & Credits Credit management, subscription handling, and usage tracking. diff --git a/package.json b/package.json index 6ae23fa737..dd3d36e6ad 100644 --- a/package.json +++ b/package.json @@ -33,7 +33,7 @@ "release:freebuff": "bun run --cwd=freebuff release", "clean-ts": "find . -name '*.tsbuildinfo' -type f -delete && find . -name '.next' -type d -exec rm -rf {} + 2>/dev/null || true && find . -name 'node_modules' -type d -exec rm -rf {} + 2>/dev/null || true && bun install", "typecheck": "bun scripts/check-env-architecture.ts && bun --filter='*' run typecheck && echo '✅ All type checks passed!'", - "test": "bun --filter='{@codebuff/common,@codebuff/agents,@codebuff/agent-runtime,@codebuff/sdk,@codebuff/web,@codebuff/cli,@codebuff/evals,@codebuff/scripts}' run test", + "test": "bun --filter='{@codebuff/common,@codebuff/agents,@codebuff/agent-runtime,@codebuff/llm-providers,@codebuff/sdk,@codebuff/web,@codebuff/cli,@codebuff/evals,@codebuff/scripts}' run test", "init-worktree": "bun scripts/init-worktree.ts", "cleanup-worktree": "bun scripts/cleanup-worktree.ts", "generate-tool-definitions": "bun scripts/generate-tool-definitions.ts" diff --git a/packages/agent-runtime/src/__tests__/loop-agent-steps.test.ts b/packages/agent-runtime/src/__tests__/loop-agent-steps.test.ts index 74a637c8ef..1b2768dfd2 100644 --- a/packages/agent-runtime/src/__tests__/loop-agent-steps.test.ts +++ b/packages/agent-runtime/src/__tests__/loop-agent-steps.test.ts @@ -2,11 +2,13 @@ import * as analytics from '@codebuff/common/analytics' import { TEST_USER_ID } from '@codebuff/common/old-constants' import { createTestAgentRuntimeParams } from '@codebuff/common/testing/fixtures/agent-runtime' import { clearMockedModules } from '@codebuff/common/testing/mock-modules' -import { setupDbSpies } from '@codebuff/common/testing/mocks/database' +import { + createMockDbOperations, + setupDbSpies, +} from '@codebuff/common/testing/mocks/database' import { getInitialSessionState } from '@codebuff/common/types/session-state' import { AbortError, promptSuccess } from '@codebuff/common/util/error' import { assistantMessage, userMessage } from '@codebuff/common/util/messages' -import db from '@codebuff/internal/db' import { afterAll, afterEach, @@ -61,7 +63,7 @@ describe('loopAgentSteps - runAgentStep vs runProgrammaticStep behavior', () => llmCallCount = 0 // Setup spies for database operations using typed helper - dbSpies = setupDbSpies(db) + dbSpies = setupDbSpies(createMockDbOperations()) agentRuntimeImpl.promptAiSdkStream = mock(async function* ({}) { llmCallCount++ diff --git a/packages/agent-runtime/src/__tests__/run-agent-step-tools.test.ts b/packages/agent-runtime/src/__tests__/run-agent-step-tools.test.ts index f3a793c35a..d55ac77d1a 100644 --- a/packages/agent-runtime/src/__tests__/run-agent-step-tools.test.ts +++ b/packages/agent-runtime/src/__tests__/run-agent-step-tools.test.ts @@ -1,11 +1,13 @@ import * as analytics from '@codebuff/common/analytics' import { TEST_USER_ID } from '@codebuff/common/old-constants' import { TEST_AGENT_RUNTIME_IMPL } from '@codebuff/common/testing/impl/agent-runtime' -import { setupDbSpies } from '@codebuff/common/testing/mocks/database' +import { + createMockDbOperations, + setupDbSpies, +} from '@codebuff/common/testing/mocks/database' import { getInitialSessionState } from '@codebuff/common/types/session-state' import { promptSuccess } from '@codebuff/common/util/error' import { assistantMessage, userMessage } from '@codebuff/common/util/messages' -import db from '@codebuff/internal/db' import { afterAll, afterEach, @@ -66,7 +68,7 @@ describe('runAgentStep - set_output tool', () => { } // Setup spies for database operations using typed helper - dbSpies = setupDbSpies(db) + dbSpies = setupDbSpies(createMockDbOperations()) // Mock analytics spyOn(analytics, 'trackEvent').mockImplementation(() => {}) diff --git a/packages/agent-runtime/src/prompt-agent-stream.ts b/packages/agent-runtime/src/prompt-agent-stream.ts index c3ce83d15d..7e41e0385c 100644 --- a/packages/agent-runtime/src/prompt-agent-stream.ts +++ b/packages/agent-runtime/src/prompt-agent-stream.ts @@ -3,11 +3,14 @@ import { globalStopSequence } from './constants' import type { AgentTemplate } from './templates/types' import type { TrackEventFn } from '@codebuff/common/types/contracts/analytics' import type { SendActionFn } from '@codebuff/common/types/contracts/client' -import type { CacheDebugUsageData, PromptAiSdkStreamFn } from '@codebuff/common/types/contracts/llm' +import type { + CacheDebugUsageData, + PromptAiSdkStreamFn, +} from '@codebuff/common/types/contracts/llm' import type { Logger } from '@codebuff/common/types/contracts/logger' import type { ParamsOf } from '@codebuff/common/types/function-params' import type { Message } from '@codebuff/common/types/messages/codebuff-message' -import type { OpenRouterProviderOptions } from '@codebuff/internal/openrouter-ai-sdk' +import type { OpenRouterProviderOptions } from '@codebuff/common/types/agent-template' import type { ToolSet } from 'ai' export const getAgentStreamFromTemplate = (params: { diff --git a/packages/internal/package.json b/packages/internal/package.json index 7c4f797e7a..8183341630 100644 --- a/packages/internal/package.json +++ b/packages/internal/package.json @@ -22,12 +22,6 @@ "types": "./src/loops/index.ts", "default": "./src/loops/index.ts" }, - "./openrouter-ai-sdk": { - "bun": "./src/openrouter-ai-sdk/index.ts", - "import": "./src/openrouter-ai-sdk/index.ts", - "types": "./src/openrouter-ai-sdk/index.ts", - "default": "./src/openrouter-ai-sdk/index.ts" - }, "./env": { "react-server": "./src/env.react-server.ts", "browser": "./src/env.browser.ts", @@ -58,7 +52,6 @@ "bun": "1.3.11" }, "dependencies": { - "@ai-sdk/provider-utils": "^3.0.17", "@codebuff/common": "workspace:*", "drizzle-kit": "0.31.8", "drizzle-orm": "0.45.1", diff --git a/packages/internal/src/openai-compatible/completion/map-openai-compatible-finish-reason.ts b/packages/internal/src/openai-compatible/completion/map-openai-compatible-finish-reason.ts deleted file mode 100644 index b18feae081..0000000000 --- a/packages/internal/src/openai-compatible/completion/map-openai-compatible-finish-reason.ts +++ /dev/null @@ -1,19 +0,0 @@ -import type { LanguageModelV2FinishReason } from '@ai-sdk/provider'; - -export function mapOpenAICompatibleFinishReason( - finishReason: string | null | undefined, -): LanguageModelV2FinishReason { - switch (finishReason) { - case 'stop': - return 'stop'; - case 'length': - return 'length'; - case 'content_filter': - return 'content-filter'; - case 'function_call': - case 'tool_calls': - return 'tool-calls'; - default: - return 'unknown'; - } -} diff --git a/packages/internal/src/openai-compatible/image/openai-compatible-image-settings.ts b/packages/internal/src/openai-compatible/image/openai-compatible-image-settings.ts deleted file mode 100644 index 463fd56530..0000000000 --- a/packages/internal/src/openai-compatible/image/openai-compatible-image-settings.ts +++ /dev/null @@ -1 +0,0 @@ -export type OpenAICompatibleImageModelId = string; diff --git a/packages/internal/src/openrouter-ai-sdk/chat/convert-to-openrouter-chat-messages.test.ts b/packages/internal/src/openrouter-ai-sdk/chat/convert-to-openrouter-chat-messages.test.ts deleted file mode 100644 index a6897db596..0000000000 --- a/packages/internal/src/openrouter-ai-sdk/chat/convert-to-openrouter-chat-messages.test.ts +++ /dev/null @@ -1,551 +0,0 @@ -import { describe, expect, it } from 'bun:test' - -import { convertToOpenRouterChatMessages } from './convert-to-openrouter-chat-messages' - -describe('user messages', () => { - it('should convert image Uint8Array', async () => { - const result = convertToOpenRouterChatMessages([ - { - role: 'user', - content: [ - { type: 'text', text: 'Hello' }, - { - type: 'file', - data: new Uint8Array([0, 1, 2, 3]), - mediaType: 'image/png', - }, - ], - }, - ]) - - expect(result).toEqual([ - { - role: 'user', - content: [ - { type: 'text', text: 'Hello' }, - { - type: 'image_url', - image_url: { url: 'data:image/png;base64,AAECAw==' }, - }, - ], - }, - ]) - }) - - it('should convert image urls', async () => { - const result = convertToOpenRouterChatMessages([ - { - role: 'user', - content: [ - { type: 'text', text: 'Hello' }, - { - type: 'file', - data: 'https://example.com/image.png', - mediaType: 'image/png', - }, - ], - }, - ]) - - expect(result).toEqual([ - { - role: 'user', - content: [ - { type: 'text', text: 'Hello' }, - { - type: 'image_url', - image_url: { url: 'https://example.com/image.png' }, - }, - ], - }, - ]) - }) - - it('should convert messages with image base64', async () => { - const result = convertToOpenRouterChatMessages([ - { - role: 'user', - content: [ - { type: 'text', text: 'Hello' }, - { - type: 'file', - data: 'data:image/png;base64,AAECAw==', - mediaType: 'image/png', - }, - ], - }, - ]) - - expect(result).toEqual([ - { - role: 'user', - content: [ - { type: 'text', text: 'Hello' }, - { - type: 'image_url', - image_url: { url: 'data:image/png;base64,AAECAw==' }, - }, - ], - }, - ]) - }) -}) - -describe('cache control', () => { - it('should pass cache control from system message provider metadata', () => { - const result = convertToOpenRouterChatMessages([ - { - role: 'system', - content: 'System prompt', - providerOptions: { - anthropic: { - cacheControl: { type: 'ephemeral' }, - }, - }, - }, - ]) - - expect(result).toEqual([ - { - role: 'system', - content: 'System prompt', - cache_control: { type: 'ephemeral' }, - }, - ]) - }) - - it('should pass cache control from user message provider metadata (single text part)', () => { - const result = convertToOpenRouterChatMessages([ - { - role: 'user', - content: [{ type: 'text', text: 'Hello' }], - providerOptions: { - anthropic: { - cacheControl: { type: 'ephemeral' }, - }, - }, - }, - ]) - - expect(result).toEqual([ - { - role: 'user', - content: [ - { - type: 'text', - text: 'Hello', - cache_control: { type: 'ephemeral' }, - }, - ], - }, - ]) - }) - - it('should pass cache control from content part provider metadata (single text part)', () => { - const result = convertToOpenRouterChatMessages([ - { - role: 'user', - content: [ - { - type: 'text', - text: 'Hello', - providerOptions: { - anthropic: { - cacheControl: { type: 'ephemeral' }, - }, - }, - }, - ], - }, - ]) - - expect(result).toEqual([ - { - role: 'user', - content: [ - { - type: 'text', - text: 'Hello', - cache_control: { type: 'ephemeral' }, - }, - ], - }, - ]) - }) - - it('should pass cache control from user message provider metadata (multiple parts)', () => { - const result = convertToOpenRouterChatMessages([ - { - role: 'user', - content: [ - { type: 'text', text: 'Hello' }, - { - type: 'file', - data: new Uint8Array([0, 1, 2, 3]), - mediaType: 'image/png', - }, - ], - providerOptions: { - anthropic: { - cacheControl: { type: 'ephemeral' }, - }, - }, - }, - ]) - - expect(result).toEqual([ - { - role: 'user', - content: [ - { - type: 'text', - text: 'Hello', - cache_control: { type: 'ephemeral' }, - }, - { - type: 'image_url', - image_url: { url: 'data:image/png;base64,AAECAw==' }, - cache_control: { type: 'ephemeral' }, - }, - ], - }, - ]) - }) - - it('should pass cache control from user message provider metadata without cache control (single text part)', () => { - const result = convertToOpenRouterChatMessages([ - { - role: 'user', - content: [{ type: 'text', text: 'Hello' }], - }, - ]) - - expect(result).toEqual([ - { - role: 'user', - content: [{ type: 'text', text: 'Hello' }], - }, - ]) - }) - - it('should pass cache control to multiple image parts from user message provider metadata', () => { - const result = convertToOpenRouterChatMessages([ - { - role: 'user', - content: [ - { type: 'text', text: 'Hello' }, - { - type: 'file', - data: new Uint8Array([0, 1, 2, 3]), - mediaType: 'image/png', - }, - { - type: 'file', - data: new Uint8Array([4, 5, 6, 7]), - mediaType: 'image/jpeg', - }, - ], - providerOptions: { - anthropic: { - cacheControl: { type: 'ephemeral' }, - }, - }, - }, - ]) - - expect(result).toEqual([ - { - role: 'user', - content: [ - { - type: 'text', - text: 'Hello', - cache_control: { type: 'ephemeral' }, - }, - { - type: 'image_url', - image_url: { url: 'data:image/png;base64,AAECAw==' }, - cache_control: { type: 'ephemeral' }, - }, - { - type: 'image_url', - image_url: { url: 'data:image/jpeg;base64,BAUGBw==' }, - cache_control: { type: 'ephemeral' }, - }, - ], - }, - ]) - }) - - it('should pass cache control to file parts from user message provider metadata', () => { - const result = convertToOpenRouterChatMessages([ - { - role: 'user', - content: [ - { type: 'text', text: 'Hello' }, - { - type: 'file', - data: 'ZmlsZSBjb250ZW50', - mediaType: 'text/plain', - providerOptions: { - openrouter: { - filename: 'file.txt', - }, - }, - }, - ], - providerOptions: { - anthropic: { - cacheControl: { type: 'ephemeral' }, - }, - }, - }, - ]) - - expect(result).toEqual([ - { - role: 'user', - content: [ - { - type: 'text', - text: 'Hello', - cache_control: { type: 'ephemeral' }, - }, - { - type: 'file', - file: { - filename: 'file.txt', - file_data: 'data:text/plain;base64,ZmlsZSBjb250ZW50', - }, - cache_control: { type: 'ephemeral' }, - }, - ], - }, - ]) - }) - - it('should handle mixed part-specific and message-level cache control for multiple parts', () => { - const result = convertToOpenRouterChatMessages([ - { - role: 'user', - content: [ - { - type: 'text', - text: 'Hello', - // No part-specific provider metadata - }, - { - type: 'file', - data: new Uint8Array([0, 1, 2, 3]), - mediaType: 'image/png', - providerOptions: { - anthropic: { - cacheControl: { type: 'ephemeral' }, - }, - }, - }, - { - type: 'file', - data: 'ZmlsZSBjb250ZW50', - mediaType: 'text/plain', - providerOptions: { - openrouter: { - filename: 'file.txt', - }, - }, - // No part-specific provider metadata - }, - ], - providerOptions: { - anthropic: { - cacheControl: { type: 'ephemeral' }, - }, - }, - }, - ]) - - expect(result).toEqual([ - { - role: 'user', - content: [ - { - type: 'text', - text: 'Hello', - cache_control: { type: 'ephemeral' }, - }, - { - type: 'image_url', - image_url: { url: 'data:image/png;base64,AAECAw==' }, - cache_control: { type: 'ephemeral' }, - }, - { - type: 'file', - file: { - filename: 'file.txt', - file_data: 'data:text/plain;base64,ZmlsZSBjb250ZW50', - }, - cache_control: { type: 'ephemeral' }, - }, - ], - }, - ]) - }) - - it('should pass cache control from individual content part provider metadata', () => { - const result = convertToOpenRouterChatMessages([ - { - role: 'user', - content: [ - { - type: 'text', - text: 'Hello', - providerOptions: { - anthropic: { - cacheControl: { type: 'ephemeral' }, - }, - }, - }, - { - type: 'file', - data: new Uint8Array([0, 1, 2, 3]), - mediaType: 'image/png', - }, - ], - }, - ]) - - expect(result).toEqual([ - { - role: 'user', - content: [ - { - type: 'text', - text: 'Hello', - cache_control: { type: 'ephemeral' }, - }, - { - type: 'image_url', - image_url: { url: 'data:image/png;base64,AAECAw==' }, - }, - ], - }, - ]) - }) - - it('should pass cache control from assistant message provider metadata', () => { - const result = convertToOpenRouterChatMessages([ - { - role: 'assistant', - content: [{ type: 'text', text: 'Assistant response' }], - providerOptions: { - anthropic: { - cacheControl: { type: 'ephemeral' }, - }, - }, - }, - ]) - - expect(result).toEqual([ - { - role: 'assistant', - content: 'Assistant response', - cache_control: { type: 'ephemeral' }, - }, - ]) - }) - - it('should pass cache control from tool message provider metadata', () => { - const result = convertToOpenRouterChatMessages([ - { - role: 'tool', - content: [ - { - type: 'tool-result', - toolCallId: 'call-123', - toolName: 'calculator', - output: { - type: 'json', - value: { answer: 42 }, - }, - }, - ], - providerOptions: { - anthropic: { - cacheControl: { type: 'ephemeral' }, - }, - }, - }, - ]) - - expect(result).toEqual([ - { - role: 'tool', - tool_call_id: 'call-123', - content: JSON.stringify({ answer: 42 }), - cache_control: { type: 'ephemeral' }, - }, - ]) - }) - - it('should support the alias cache_control field', () => { - const result = convertToOpenRouterChatMessages([ - { - role: 'system', - content: 'System prompt', - providerOptions: { - anthropic: { - cache_control: { type: 'ephemeral' }, - }, - }, - }, - ]) - - expect(result).toEqual([ - { - role: 'system', - content: 'System prompt', - cache_control: { type: 'ephemeral' }, - }, - ]) - }) - - it('should support cache control on last message in content array', () => { - const result = convertToOpenRouterChatMessages([ - { - role: 'system', - content: 'System prompt', - }, - { - role: 'user', - content: [ - { type: 'text', text: 'User prompt' }, - { - type: 'text', - text: 'User prompt 2', - providerOptions: { - anthropic: { cacheControl: { type: 'ephemeral' } }, - }, - }, - ], - }, - ]) - - expect(result).toEqual([ - { - role: 'system', - content: 'System prompt', - }, - { - role: 'user', - content: [ - { type: 'text', text: 'User prompt' }, - { - type: 'text', - text: 'User prompt 2', - cache_control: { type: 'ephemeral' }, - }, - ], - }, - ]) - }) -}) diff --git a/packages/internal/src/openrouter-ai-sdk/chat/convert-to-openrouter-chat-messages.ts b/packages/internal/src/openrouter-ai-sdk/chat/convert-to-openrouter-chat-messages.ts deleted file mode 100644 index 41cf10d76a..0000000000 --- a/packages/internal/src/openrouter-ai-sdk/chat/convert-to-openrouter-chat-messages.ts +++ /dev/null @@ -1,223 +0,0 @@ -import { getFileUrl } from './file-url-utils' -import { isUrl } from './is-url' -import { ReasoningDetailType } from '../schemas/reasoning-details' - -import type { ReasoningDetailUnion } from '../schemas/reasoning-details' -import type { - ChatCompletionContentPart, - OpenRouterChatCompletionsInput, -} from '../types/openrouter-chat-completions-input' -import type { - LanguageModelV2FilePart, - LanguageModelV2Prompt, - LanguageModelV2TextPart, - LanguageModelV2ToolResultPart, - SharedV2ProviderMetadata, -} from '@ai-sdk/provider' - -// Type for OpenRouter Cache Control following Anthropic's pattern -export type OpenRouterCacheControl = { type: 'ephemeral' } - -function getCacheControl( - providerMetadata: SharedV2ProviderMetadata | undefined, -): OpenRouterCacheControl | undefined { - const anthropic = providerMetadata?.anthropic - const openrouter = providerMetadata?.openrouter - - // Allow both cacheControl and cache_control: - return (openrouter?.cacheControl ?? - openrouter?.cache_control ?? - anthropic?.cacheControl ?? - anthropic?.cache_control) as OpenRouterCacheControl | undefined -} - -export function convertToOpenRouterChatMessages( - prompt: LanguageModelV2Prompt, -): OpenRouterChatCompletionsInput { - const messages: OpenRouterChatCompletionsInput = [] - for (const { role, content, providerOptions } of prompt) { - switch (role) { - case 'system': { - messages.push({ - role: 'system', - content, - cache_control: getCacheControl(providerOptions), - }) - break - } - - case 'user': { - // Get message level cache control - const messageCacheControl = getCacheControl(providerOptions) - const contentParts: ChatCompletionContentPart[] = content.map( - (part: LanguageModelV2TextPart | LanguageModelV2FilePart) => { - const cacheControl = - getCacheControl(part.providerOptions) ?? messageCacheControl - - switch (part.type) { - case 'text': - return { - type: 'text' as const, - text: part.text, - // For text parts, only use part-specific cache control - cache_control: cacheControl, - } - case 'file': { - if (part.mediaType?.startsWith('image/')) { - const url = getFileUrl({ - part, - defaultMediaType: 'image/jpeg', - }) - return { - type: 'image_url' as const, - image_url: { - url, - }, - // For image parts, use part-specific or message-level cache control - cache_control: cacheControl, - } - } - - const fileName = String( - part.providerOptions?.openrouter?.filename ?? - part.filename ?? - '', - ) - - const fileData = getFileUrl({ - part, - defaultMediaType: 'application/pdf', - }) - - if ( - isUrl({ - url: fileData, - protocols: new Set(['http:', 'https:']), - }) - ) { - return { - type: 'file' as const, - file: { - filename: fileName, - file_data: fileData, - }, - } satisfies ChatCompletionContentPart - } - - return { - type: 'file' as const, - file: { - filename: fileName, - file_data: fileData, - }, - cache_control: cacheControl, - } satisfies ChatCompletionContentPart - } - default: { - return { - type: 'text' as const, - text: '', - cache_control: cacheControl, - } - } - } - }, - ) - - // For multi-part messages, don't add cache_control at the root level - messages.push({ - role: 'user', - content: contentParts, - }) - - break - } - - case 'assistant': { - let text = '' - let reasoning = '' - const reasoningDetails: ReasoningDetailUnion[] = [] - const toolCalls: Array<{ - id: string - type: 'function' - function: { name: string; arguments: string } - }> = [] - - for (const part of content) { - switch (part.type) { - case 'text': { - text += part.text - break - } - case 'tool-call': { - toolCalls.push({ - id: part.toolCallId, - type: 'function', - function: { - name: part.toolName, - arguments: JSON.stringify(part.input), - }, - }) - break - } - case 'reasoning': { - reasoning += part.text - reasoningDetails.push({ - type: ReasoningDetailType.Text, - text: part.text, - }) - - break - } - - case 'file': - break - default: { - break - } - } - } - - messages.push({ - role: 'assistant', - content: text, - tool_calls: toolCalls.length > 0 ? toolCalls : undefined, - reasoning: reasoning || undefined, - reasoning_details: - reasoningDetails.length > 0 ? reasoningDetails : undefined, - cache_control: getCacheControl(providerOptions), - }) - - break - } - - case 'tool': { - for (const toolResponse of content) { - const content = getToolResultContent(toolResponse) - - messages.push({ - role: 'tool', - tool_call_id: toolResponse.toolCallId, - content, - cache_control: - getCacheControl(providerOptions) ?? - getCacheControl(toolResponse.providerOptions), - }) - } - break - } - - default: { - break - } - } - } - - return messages -} - -function getToolResultContent(input: LanguageModelV2ToolResultPart): string { - return input.output.type === 'text' - ? input.output.value - : JSON.stringify(input.output.value) -} diff --git a/packages/internal/src/openrouter-ai-sdk/chat/file-url-utils.ts b/packages/internal/src/openrouter-ai-sdk/chat/file-url-utils.ts deleted file mode 100644 index d094c28cb4..0000000000 --- a/packages/internal/src/openrouter-ai-sdk/chat/file-url-utils.ts +++ /dev/null @@ -1,34 +0,0 @@ - -import { convertUint8ArrayToBase64 } from '@ai-sdk/provider-utils' - -import { isUrl } from './is-url' - -import type { LanguageModelV2FilePart } from '@ai-sdk/provider' - -export function getFileUrl({ - part, - defaultMediaType, -}: { - part: LanguageModelV2FilePart - defaultMediaType: string -}) { - if (part.data instanceof Uint8Array) { - const base64 = convertUint8ArrayToBase64(part.data) - return `data:${part.mediaType ?? defaultMediaType};base64,${base64}` - } - - const stringUrl = part.data.toString() - - if ( - isUrl({ - url: stringUrl, - protocols: new Set(['http:', 'https:']), - }) - ) { - return stringUrl - } - - return stringUrl.startsWith('data:') - ? stringUrl - : `data:${part.mediaType ?? defaultMediaType};base64,${stringUrl}` -} diff --git a/packages/internal/src/openrouter-ai-sdk/chat/get-tool-choice.ts b/packages/internal/src/openrouter-ai-sdk/chat/get-tool-choice.ts deleted file mode 100644 index dad83d4d9a..0000000000 --- a/packages/internal/src/openrouter-ai-sdk/chat/get-tool-choice.ts +++ /dev/null @@ -1,39 +0,0 @@ -import { z } from 'zod/v4' - -import type { LanguageModelV2ToolChoice } from '@ai-sdk/provider' - - -const ChatCompletionToolChoiceSchema = z.union([ - z.literal('auto'), - z.literal('none'), - z.literal('required'), - z.object({ - type: z.literal('function'), - function: z.object({ - name: z.string(), - }), - }), -]) - -type ChatCompletionToolChoice = z.infer - -export function getChatCompletionToolChoice( - toolChoice: LanguageModelV2ToolChoice, -): ChatCompletionToolChoice { - switch (toolChoice.type) { - case 'auto': - case 'none': - case 'required': - return toolChoice.type - case 'tool': { - return { - type: 'function', - function: { name: toolChoice.toolName }, - } - } - default: { - toolChoice satisfies never - throw new Error(`Invalid tool choice type: ${toolChoice}`) - } - } -} diff --git a/packages/internal/src/openrouter-ai-sdk/chat/index.test.ts b/packages/internal/src/openrouter-ai-sdk/chat/index.test.ts deleted file mode 100644 index d2143a7533..0000000000 --- a/packages/internal/src/openrouter-ai-sdk/chat/index.test.ts +++ /dev/null @@ -1,1599 +0,0 @@ -import { convertReadableStreamToArray } from '@ai-sdk/provider-utils/test' -import { beforeEach, describe, expect, it } from 'bun:test' - -import { createOpenRouter } from '../provider' -import { ReasoningDetailType } from '../schemas/reasoning-details' - -import type { ReasoningDetailUnion } from '../schemas/reasoning-details' -import type { LanguageModelV2Prompt } from '@ai-sdk/provider' - - - -const TEST_PROMPT: LanguageModelV2Prompt = [ - { role: 'user', content: [{ type: 'text', text: 'Hello' }] }, -] - -const TEST_LOGPROBS = { - content: [ - { - token: 'Hello', - logprob: -0.0009994634, - top_logprobs: [ - { - token: 'Hello', - logprob: -0.0009994634, - }, - ], - }, - { - token: '!', - logprob: -0.13410144, - top_logprobs: [ - { - token: '!', - logprob: -0.13410144, - }, - ], - }, - { - token: ' How', - logprob: -0.0009250381, - top_logprobs: [ - { - token: ' How', - logprob: -0.0009250381, - }, - ], - }, - { - token: ' can', - logprob: -0.047709424, - top_logprobs: [ - { - token: ' can', - logprob: -0.047709424, - }, - ], - }, - { - token: ' I', - logprob: -0.000009014684, - top_logprobs: [ - { - token: ' I', - logprob: -0.000009014684, - }, - ], - }, - { - token: ' assist', - logprob: -0.009125131, - top_logprobs: [ - { - token: ' assist', - logprob: -0.009125131, - }, - ], - }, - { - token: ' you', - logprob: -0.0000066306106, - top_logprobs: [ - { - token: ' you', - logprob: -0.0000066306106, - }, - ], - }, - { - token: ' today', - logprob: -0.00011093382, - top_logprobs: [ - { - token: ' today', - logprob: -0.00011093382, - }, - ], - }, - { - token: '?', - logprob: -0.00004596782, - top_logprobs: [ - { - token: '?', - logprob: -0.00004596782, - }, - ], - }, - ], -} - -type MockResponseDefinition = - | { - type: 'json-value' - body: any - headers?: Record - status?: number - } - | { - type: 'stream-chunks' - chunks: string[] - headers?: Record - status?: number - } - -type MockServerRoute = { - response: MockResponseDefinition -} - -type MockServerCall = { - requestHeaders: Record - requestBodyJson: Promise -} - -const createStreamFromChunks = (chunks: string[]) => - new ReadableStream({ - start(controller) { - try { - for (const chunk of chunks) { - controller.enqueue(chunk) - } - } finally { - controller.close() - } - }, - }).pipeThrough(new TextEncoderStream()) - -function toHeadersRecord(headers?: HeadersInit): Record { - const result: Record = {} - - if (!headers) { - return result - } - - if (headers instanceof Headers) { - headers.forEach((value, key) => { - result[key.toLowerCase()] = value - }) - return result - } - - if (Array.isArray(headers)) { - for (const [key, value] of headers) { - result[String(key).toLowerCase()] = String(value) - } - return result - } - - for (const [key, value] of Object.entries(headers)) { - if (typeof value !== 'undefined') { - result[key.toLowerCase()] = String(value) - } - } - - return result -} - -function parseRequestBody(body: BodyInit | null | undefined): any { - if (body == null) { - return undefined - } - - if (typeof body === 'string') { - try { - return JSON.parse(body) - } catch { - return undefined - } - } - - return undefined -} - -function createMockServer(routes: Record) { - const urls: Record = Object.fromEntries( - Object.entries(routes).map(([url, config]) => [ - url, - { - response: { ...config.response }, - }, - ]), - ) - - const calls: MockServerCall[] = [] - - const buildResponse = (definition: MockResponseDefinition): Response => { - const status = definition.status ?? 200 - - if (definition.type === 'json-value') { - return new Response(JSON.stringify(definition.body), { - status, - headers: { - 'Content-Type': 'application/json', - ...definition.headers, - }, - }) - } - - return new Response(createStreamFromChunks(definition.chunks), { - status, - headers: { - 'Content-Type': 'text/event-stream', - 'Cache-Control': 'no-cache', - Connection: 'keep-alive', - ...definition.headers, - }, - }) - } - - const fetchImpl = async (input: RequestInfo, init: RequestInit = {}) => { - const url = - typeof input === 'string' - ? input - : input instanceof URL - ? input.toString() - : input.url - - const route = urls[url] - - if (!route) { - return new Response('Not Found', { status: 404 }) - } - - const requestHeaders = toHeadersRecord(init.headers) - const requestBodyJson = Promise.resolve(parseRequestBody(init.body)) - - calls.push({ requestHeaders, requestBodyJson }) - - return buildResponse(route.response) - } - - const fetch = ((input: RequestInfo | URL, init?: RequestInit) => - fetchImpl(input as RequestInfo, init ?? {})) as typeof global.fetch - - fetch.preconnect = async () => {} - - return { - urls, - calls, - fetch, - } -} - -describe('doGenerate', () => { - const server = createMockServer({ - 'https://openrouter.ai/api/v1/chat/completions': { - response: { type: 'json-value', body: {} }, - }, - }) - - const provider = createOpenRouter({ - apiKey: 'test-api-key', - compatibility: 'strict', - fetch: server.fetch, - }) - - const model = provider.chat('anthropic/claude-3.5-sonnet') - - beforeEach(() => { - server.calls.length = 0 - server.urls['https://openrouter.ai/api/v1/chat/completions']!.response = { - type: 'json-value', - body: {}, - } - }) - - function prepareJsonResponse({ - content = '', - reasoning, - reasoning_details, - usage = { - prompt_tokens: 4, - total_tokens: 34, - completion_tokens: 30, - }, - logprobs = null, - finish_reason = 'stop', - }: { - content?: string - reasoning?: string - reasoning_details?: Array - usage?: { - prompt_tokens: number - total_tokens: number - completion_tokens: number - } - logprobs?: { - content: - | { - token: string - logprob: number - top_logprobs: { token: string; logprob: number }[] - }[] - | null - } | null - finish_reason?: string - } = {}) { - server.urls['https://openrouter.ai/api/v1/chat/completions']!.response = { - type: 'json-value', - body: { - id: 'chatcmpl-95ZTZkhr0mHNKqerQfiwkuox3PHAd', - object: 'chat.completion', - created: 1711115037, - model: 'gpt-3.5-turbo-0125', - choices: [ - { - index: 0, - message: { - role: 'assistant', - content, - reasoning, - reasoning_details, - }, - logprobs, - finish_reason, - }, - ], - usage, - system_fingerprint: 'fp_3bc1b5746c', - }, - } - } - - it('should extract text response', async () => { - prepareJsonResponse({ content: 'Hello, World!' }) - - const result = await model.doGenerate({ - prompt: TEST_PROMPT, - }) - - expect(result.content[0]).toStrictEqual({ - type: 'text', - text: 'Hello, World!', - }) - }) - - it('should extract usage', async () => { - prepareJsonResponse({ - content: '', - usage: { prompt_tokens: 20, total_tokens: 25, completion_tokens: 5 }, - }) - - const { usage } = await model.doGenerate({ - prompt: TEST_PROMPT, - }) - - expect(usage).toStrictEqual({ - inputTokens: 20, - outputTokens: 5, - totalTokens: 25, - reasoningTokens: 0, - cachedInputTokens: 0, - }) - }) - - it('should extract logprobs', async () => { - prepareJsonResponse({ - logprobs: TEST_LOGPROBS, - }) - - await provider.chat('openai/gpt-3.5-turbo', { logprobs: 1 }).doGenerate({ - prompt: TEST_PROMPT, - }) - }) - - it('should extract finish reason', async () => { - prepareJsonResponse({ - content: '', - finish_reason: 'stop', - }) - - const response = await model.doGenerate({ - prompt: TEST_PROMPT, - }) - - expect(response.finishReason).toStrictEqual('stop') - }) - - it('should support unknown finish reason', async () => { - prepareJsonResponse({ - content: '', - finish_reason: 'eos', - }) - - const response = await model.doGenerate({ - prompt: TEST_PROMPT, - }) - - expect(response.finishReason).toStrictEqual('unknown') - }) - - it('should extract reasoning content from reasoning field', async () => { - prepareJsonResponse({ - content: 'Hello!', - reasoning: - 'I need to think about this... The user said hello, so I should respond with a greeting.', - }) - - const result = await model.doGenerate({ - prompt: TEST_PROMPT, - }) - - expect(result.content).toStrictEqual([ - { - type: 'reasoning', - text: 'I need to think about this... The user said hello, so I should respond with a greeting.', - }, - { - type: 'text', - text: 'Hello!', - }, - ]) - }) - - it('should extract reasoning content from reasoning_details', async () => { - prepareJsonResponse({ - content: 'Hello!', - reasoning_details: [ - { - type: ReasoningDetailType.Text, - text: 'Let me analyze this request...', - }, - { - type: ReasoningDetailType.Summary, - summary: 'The user wants a greeting response.', - }, - ], - }) - - const result = await model.doGenerate({ - prompt: TEST_PROMPT, - }) - - expect(result.content).toStrictEqual([ - { - type: 'reasoning', - text: 'Let me analyze this request...', - }, - { - type: 'reasoning', - text: 'The user wants a greeting response.', - }, - { - type: 'text', - text: 'Hello!', - }, - ]) - }) - - it('should handle encrypted reasoning details', async () => { - prepareJsonResponse({ - content: 'Hello!', - reasoning_details: [ - { - type: ReasoningDetailType.Encrypted, - data: 'encrypted_reasoning_data_here', - }, - ], - }) - - const result = await model.doGenerate({ - prompt: TEST_PROMPT, - }) - - expect(result.content).toStrictEqual([ - { - type: 'reasoning', - text: '[REDACTED]', - }, - { - type: 'text', - text: 'Hello!', - }, - ]) - }) - - it('should prioritize reasoning_details over reasoning when both are present', async () => { - prepareJsonResponse({ - content: 'Hello!', - reasoning: 'This should be ignored when reasoning_details is present', - reasoning_details: [ - { - type: ReasoningDetailType.Text, - text: 'Processing from reasoning_details...', - }, - { - type: ReasoningDetailType.Summary, - summary: 'Summary from reasoning_details', - }, - ], - }) - - const result = await model.doGenerate({ - prompt: TEST_PROMPT, - }) - - expect(result.content).toStrictEqual([ - { - type: 'reasoning', - text: 'Processing from reasoning_details...', - }, - { - type: 'reasoning', - text: 'Summary from reasoning_details', - }, - { - type: 'text', - text: 'Hello!', - }, - ]) - - // Verify that the reasoning field content is not included - expect(result.content).not.toContainEqual({ - type: 'reasoning', - text: 'This should be ignored when reasoning_details is present', - }) - }) - - it('should pass the model and the messages', async () => { - prepareJsonResponse({ content: '' }) - - await model.doGenerate({ - prompt: TEST_PROMPT, - }) - - const requestBody = await server.calls[0]!.requestBodyJson - - expect(requestBody).toStrictEqual({ - model: 'anthropic/claude-3.5-sonnet', - messages: [{ role: 'user', content: [{ type: 'text', text: 'Hello' }] }], - }) - }) - - it('should pass the models array when provided', async () => { - prepareJsonResponse({ content: '' }) - - const customModel = provider.chat('anthropic/claude-3.5-sonnet', { - models: ['anthropic/claude-2', 'gryphe/mythomax-l2-13b'], - }) - - await customModel.doGenerate({ - prompt: TEST_PROMPT, - }) - - const requestBody = await server.calls[0]!.requestBodyJson - - expect(requestBody).toStrictEqual({ - model: 'anthropic/claude-3.5-sonnet', - models: ['anthropic/claude-2', 'gryphe/mythomax-l2-13b'], - messages: [{ role: 'user', content: [{ type: 'text', text: 'Hello' }] }], - }) - }) - - it('should pass settings', async () => { - prepareJsonResponse() - - await provider - .chat('openai/gpt-3.5-turbo', { - logitBias: { 50256: -100 }, - logprobs: 2, - parallelToolCalls: false, - user: 'test-user-id', - }) - .doGenerate({ - prompt: TEST_PROMPT, - }) - - const requestBody = await server.calls[0]!.requestBodyJson - - expect(requestBody).toStrictEqual({ - model: 'openai/gpt-3.5-turbo', - messages: [{ role: 'user', content: [{ type: 'text', text: 'Hello' }] }], - logprobs: true, - top_logprobs: 2, - logit_bias: { 50256: -100 }, - parallel_tool_calls: false, - user: 'test-user-id', - }) - }) - - it('should pass tools and toolChoice', async () => { - prepareJsonResponse({ content: '' }) - - await model.doGenerate({ - prompt: TEST_PROMPT, - tools: [ - { - type: 'function', - name: 'test-tool', - description: 'Test tool', - inputSchema: { - type: 'object', - properties: { value: { type: 'string' } }, - required: ['value'], - additionalProperties: false, - $schema: 'http://json-schema.org/draft-07/schema#', - }, - }, - ], - toolChoice: { - type: 'tool', - toolName: 'test-tool', - }, - }) - - const requestBody = await server.calls[0]!.requestBodyJson - - expect(requestBody).toStrictEqual({ - model: 'anthropic/claude-3.5-sonnet', - messages: [{ role: 'user', content: [{ type: 'text', text: 'Hello' }] }], - tools: [ - { - type: 'function', - function: { - name: 'test-tool', - description: 'Test tool', - parameters: { - type: 'object', - properties: { value: { type: 'string' } }, - required: ['value'], - additionalProperties: false, - $schema: 'http://json-schema.org/draft-07/schema#', - }, - }, - }, - ], - tool_choice: { - type: 'function', - function: { name: 'test-tool' }, - }, - }) - }) - - it('should pass headers', async () => { - prepareJsonResponse({ content: '' }) - - const provider = createOpenRouter({ - apiKey: 'test-api-key', - headers: { - 'Custom-Provider-Header': 'provider-header-value', - }, - fetch: server.fetch, - }) - - await provider.chat('openai/gpt-3.5-turbo').doGenerate({ - prompt: TEST_PROMPT, - headers: { - 'Custom-Request-Header': 'request-header-value', - }, - }) - - const requestHeaders = server.calls[0]!.requestHeaders - - expect(requestHeaders.authorization).toBe('Bearer test-api-key') - expect(requestHeaders['content-type']).toBe('application/json') - expect(requestHeaders['custom-provider-header']).toBe( - 'provider-header-value', - ) - expect(requestHeaders['custom-request-header']).toBe('request-header-value') - expect(requestHeaders['user-agent']).toMatch( - /^ai-sdk\/provider-utils\/\d+\.\d+\.\d+ runtime\/bun\/\d+\.\d+\.\d+$/, - ) - }) - - it('should pass responseFormat for JSON schema structured outputs', async () => { - prepareJsonResponse({ content: '{"name": "John", "age": 30}' }) - - const testSchema = { - type: 'object' as const, - properties: { - name: { type: 'string' as const }, - age: { type: 'number' as const }, - }, - required: ['name', 'age'], - additionalProperties: false, - } - - await model.doGenerate({ - prompt: TEST_PROMPT, - responseFormat: { - type: 'json', - schema: testSchema, - name: 'PersonResponse', - description: 'A person object', - }, - }) - - const requestBody = await server.calls[0]!.requestBodyJson - - expect(requestBody).toStrictEqual({ - model: 'anthropic/claude-3.5-sonnet', - messages: [{ role: 'user', content: [{ type: 'text', text: 'Hello' }] }], - response_format: { - type: 'json_schema', - json_schema: { - schema: testSchema, - strict: true, - name: 'PersonResponse', - description: 'A person object', - }, - }, - }) - }) - - it('should use default name when name is not provided in responseFormat', async () => { - prepareJsonResponse({ content: '{"name": "John", "age": 30}' }) - - const testSchema = { - type: 'object' as const, - properties: { - name: { type: 'string' as const }, - age: { type: 'number' as const }, - }, - required: ['name', 'age'], - additionalProperties: false, - } - - await model.doGenerate({ - prompt: TEST_PROMPT, - responseFormat: { - type: 'json', - schema: testSchema, - }, - }) - - const requestBody = await server.calls[0]!.requestBodyJson - - expect(requestBody).toStrictEqual({ - model: 'anthropic/claude-3.5-sonnet', - messages: [{ role: 'user', content: [{ type: 'text', text: 'Hello' }] }], - response_format: { - type: 'json_schema', - json_schema: { - schema: testSchema, - strict: true, - name: 'response', - }, - }, - }) - }) -}) - -describe('doStream', () => { - const server = createMockServer({ - 'https://openrouter.ai/api/v1/chat/completions': { - response: { type: 'stream-chunks', chunks: [] }, - }, - }) - - const provider = createOpenRouter({ - apiKey: 'test-api-key', - compatibility: 'strict', - fetch: server.fetch, - }) - - const model = provider.chat('anthropic/claude-3.5-sonnet') - - beforeEach(() => { - server.calls.length = 0 - server.urls['https://openrouter.ai/api/v1/chat/completions']!.response = { - type: 'stream-chunks', - chunks: [], - } - }) - - function prepareStreamResponse({ - content, - usage = { - prompt_tokens: 17, - total_tokens: 244, - completion_tokens: 227, - }, - logprobs = null, - finish_reason = 'stop', - }: { - content: string[] - usage?: { - prompt_tokens: number - total_tokens: number - completion_tokens: number - } - logprobs?: { - content: - | { - token: string - logprob: number - top_logprobs: { token: string; logprob: number }[] - }[] - | null - } | null - finish_reason?: string - }) { - server.urls['https://openrouter.ai/api/v1/chat/completions']!.response = { - type: 'stream-chunks', - chunks: [ - `data: {"id":"chatcmpl-96aZqmeDpA9IPD6tACY8djkMsJCMP","object":"chat.completion.chunk","created":1702657020,"model":"gpt-3.5-turbo-0613",` + - `"system_fingerprint":null,"choices":[{"index":0,"delta":{"role":"assistant","content":""},"finish_reason":null}]}\n\n`, - ...content.flatMap((text) => { - return `data: {"id":"chatcmpl-96aZqmeDpA9IPD6tACY8djkMsJCMP","object":"chat.completion.chunk","created":1702657020,"model":"gpt-3.5-turbo-0613","system_fingerprint":null,"choices":[{"index":1,"delta":{"content":"${text}"},"finish_reason":null}]}\n\n` - }), - `data: {"id":"chatcmpl-96aZqmeDpA9IPD6tACY8djkMsJCMP","object":"chat.completion.chunk","created":1702657020,"model":"gpt-3.5-turbo-0613","system_fingerprint":null,"choices":[{"index":0,"delta":{},"finish_reason":"${finish_reason}","logprobs":${JSON.stringify( - logprobs, - )}}]}\n\n`, - `data: {"id":"chatcmpl-96aZqmeDpA9IPD6tACY8djkMsJCMP","object":"chat.completion.chunk","created":1702657020,"model":"gpt-3.5-turbo-0613","system_fingerprint":"fp_3bc1b5746c","choices":[],"usage":${JSON.stringify( - usage, - )}}\n\n`, - 'data: [DONE]\n\n', - ], - } - } - - it('should stream text deltas', async () => { - prepareStreamResponse({ - content: ['Hello', ', ', 'World!'], - finish_reason: 'stop', - usage: { - prompt_tokens: 17, - total_tokens: 244, - completion_tokens: 227, - }, - logprobs: TEST_LOGPROBS, - }) - - const { stream } = await model.doStream({ - prompt: TEST_PROMPT, - }) - - // note: space moved to last chunk bc of trimming - const elements = await convertReadableStreamToArray(stream) - expect(elements).toStrictEqual([ - { - type: 'response-metadata', - id: 'chatcmpl-96aZqmeDpA9IPD6tACY8djkMsJCMP', - }, - { - type: 'response-metadata', - modelId: 'gpt-3.5-turbo-0613', - }, - { - type: 'response-metadata', - id: 'chatcmpl-96aZqmeDpA9IPD6tACY8djkMsJCMP', - }, - { - type: 'response-metadata', - modelId: 'gpt-3.5-turbo-0613', - }, - { type: 'text-start', id: expect.any(String) }, - { type: 'text-delta', delta: 'Hello', id: expect.any(String) }, - { - type: 'response-metadata', - id: 'chatcmpl-96aZqmeDpA9IPD6tACY8djkMsJCMP', - }, - { - type: 'response-metadata', - modelId: 'gpt-3.5-turbo-0613', - }, - { type: 'text-delta', delta: ', ', id: expect.any(String) }, - { - type: 'response-metadata', - id: 'chatcmpl-96aZqmeDpA9IPD6tACY8djkMsJCMP', - }, - { - type: 'response-metadata', - modelId: 'gpt-3.5-turbo-0613', - }, - { type: 'text-delta', delta: 'World!', id: expect.any(String) }, - { - type: 'response-metadata', - id: 'chatcmpl-96aZqmeDpA9IPD6tACY8djkMsJCMP', - }, - { - type: 'response-metadata', - modelId: 'gpt-3.5-turbo-0613', - }, - { - type: 'response-metadata', - id: 'chatcmpl-96aZqmeDpA9IPD6tACY8djkMsJCMP', - }, - { - type: 'response-metadata', - modelId: 'gpt-3.5-turbo-0613', - }, - { - type: 'text-end', - id: expect.any(String), - }, - { - type: 'finish', - finishReason: 'stop', - - providerMetadata: { - openrouter: { - usage: { - completionTokens: 227, - promptTokens: 17, - totalTokens: 244, - }, - }, - }, - usage: { - inputTokens: 17, - outputTokens: 227, - totalTokens: 244, - reasoningTokens: Number.NaN, - cachedInputTokens: Number.NaN, - }, - }, - ]) - }) - - it('should prioritize reasoning_details over reasoning when both are present in streaming', async () => { - // This test verifies that when the API returns both 'reasoning' and 'reasoning_details' fields, - // we prioritize reasoning_details and ignore the reasoning field to avoid duplicates. - server.urls['https://openrouter.ai/api/v1/chat/completions']!.response = { - type: 'stream-chunks', - chunks: [ - // First chunk: both reasoning and reasoning_details with different content - `data: {"id":"chatcmpl-reasoning","object":"chat.completion.chunk","created":1711357598,"model":"gpt-3.5-turbo-0125",` + - `"system_fingerprint":"fp_3bc1b5746c","choices":[{"index":0,"delta":{"role":"assistant","content":"",` + - `"reasoning":"This should be ignored...",` + - `"reasoning_details":[{"type":"${ReasoningDetailType.Text}","text":"Let me think about this..."}]},` + - `"logprobs":null,"finish_reason":null}]}\n\n`, - // Second chunk: reasoning_details with multiple types - `data: {"id":"chatcmpl-reasoning","object":"chat.completion.chunk","created":1711357598,"model":"gpt-3.5-turbo-0125",` + - `"system_fingerprint":"fp_3bc1b5746c","choices":[{"index":0,"delta":{` + - `"reasoning":"Also ignored",` + - `"reasoning_details":[{"type":"${ReasoningDetailType.Summary}","summary":"User wants a greeting"},{"type":"${ReasoningDetailType.Encrypted}","data":"secret"}]},` + - `"logprobs":null,"finish_reason":null}]}\n\n`, - // Third chunk: only reasoning field (should be processed) - `data: {"id":"chatcmpl-reasoning","object":"chat.completion.chunk","created":1711357598,"model":"gpt-3.5-turbo-0125",` + - `"system_fingerprint":"fp_3bc1b5746c","choices":[{"index":0,"delta":{` + - `"reasoning":"This reasoning is used"},` + - `"logprobs":null,"finish_reason":null}]}\n\n`, - // Content chunk - `data: {"id":"chatcmpl-reasoning","object":"chat.completion.chunk","created":1711357598,"model":"gpt-3.5-turbo-0125",` + - `"system_fingerprint":"fp_3bc1b5746c","choices":[{"index":0,"delta":{"content":"Hello!"},` + - `"logprobs":null,"finish_reason":null}]}\n\n`, - // Finish chunk - `data: {"id":"chatcmpl-reasoning","object":"chat.completion.chunk","created":1711357598,"model":"gpt-3.5-turbo-0125",` + - `"system_fingerprint":"fp_3bc1b5746c","choices":[{"index":0,"delta":{},` + - `"logprobs":null,"finish_reason":"stop"}]}\n\n`, - `data: {"id":"chatcmpl-reasoning","object":"chat.completion.chunk","created":1711357598,"model":"gpt-3.5-turbo-0125",` + - `"system_fingerprint":"fp_3bc1b5746c","choices":[],"usage":{"prompt_tokens":17,"completion_tokens":30,"total_tokens":47}}\n\n`, - 'data: [DONE]\n\n', - ], - } - - const { stream } = await model.doStream({ - prompt: TEST_PROMPT, - }) - - const elements = await convertReadableStreamToArray(stream) - - // Filter for reasoning-related elements - const reasoningElements = elements.filter( - (el) => - el.type === 'reasoning-start' || - el.type === 'reasoning-delta' || - el.type === 'reasoning-end', - ) - - // Debug output to see what we're getting - // console.log('Reasoning elements count:', reasoningElements.length); - // console.log('Reasoning element types:', reasoningElements.map(el => el.type)); - - // We should get reasoning content from reasoning_details when present, not reasoning field - // start + 4 deltas (text, summary, encrypted, reasoning-only) + end = 6 - expect(reasoningElements).toHaveLength(6) - - // Verify the content comes from reasoning_details, not reasoning field - const reasoningDeltas = reasoningElements - .filter((el) => el.type === 'reasoning-delta') - .map( - (el) => - (el as { type: 'reasoning-delta'; delta: string; id: string }).delta, - ) - - expect(reasoningDeltas).toEqual([ - 'Let me think about this...', // from reasoning_details text - 'User wants a greeting', // from reasoning_details summary - '[REDACTED]', // from reasoning_details encrypted - 'This reasoning is used', // from reasoning field (no reasoning_details) - ]) - - // Verify that "This should be ignored..." and "Also ignored" are NOT in the output - expect(reasoningDeltas).not.toContain('This should be ignored...') - expect(reasoningDeltas).not.toContain('Also ignored') - }) - - it('should maintain correct reasoning order when content comes after reasoning (issue #7824)', async () => { - // This test reproduces the issue where reasoning appears first but then gets "pushed down" - // by content that comes later in the stream - server.urls['https://openrouter.ai/api/v1/chat/completions']!.response = { - type: 'stream-chunks', - chunks: [ - // First chunk: Start with reasoning - `data: {"id":"chatcmpl-order-test","object":"chat.completion.chunk","created":1711357598,"model":"gpt-3.5-turbo-0125",` + - `"system_fingerprint":"fp_3bc1b5746c","choices":[{"index":0,"delta":{"role":"assistant",` + - `"reasoning":"I need to think about this step by step..."},` + - `"logprobs":null,"finish_reason":null}]}\n\n`, - // Second chunk: More reasoning - `data: {"id":"chatcmpl-order-test","object":"chat.completion.chunk","created":1711357598,"model":"gpt-3.5-turbo-0125",` + - `"system_fingerprint":"fp_3bc1b5746c","choices":[{"index":0,"delta":{` + - `"reasoning":" First, I should analyze the request."},` + - `"logprobs":null,"finish_reason":null}]}\n\n`, - // Third chunk: Even more reasoning - `data: {"id":"chatcmpl-order-test","object":"chat.completion.chunk","created":1711357598,"model":"gpt-3.5-turbo-0125",` + - `"system_fingerprint":"fp_3bc1b5746c","choices":[{"index":0,"delta":{` + - `"reasoning":" Then I should provide a helpful response."},` + - `"logprobs":null,"finish_reason":null}]}\n\n`, - // Fourth chunk: Content starts - `data: {"id":"chatcmpl-order-test","object":"chat.completion.chunk","created":1711357598,"model":"gpt-3.5-turbo-0125",` + - `"system_fingerprint":"fp_3bc1b5746c","choices":[{"index":0,"delta":{"content":"Hello! "},` + - `"logprobs":null,"finish_reason":null}]}\n\n`, - // Fifth chunk: More content - `data: {"id":"chatcmpl-order-test","object":"chat.completion.chunk","created":1711357598,"model":"gpt-3.5-turbo-0125",` + - `"system_fingerprint":"fp_3bc1b5746c","choices":[{"index":0,"delta":{"content":"How can I help you today?"},` + - `"logprobs":null,"finish_reason":null}]}\n\n`, - // Finish chunk - `data: {"id":"chatcmpl-order-test","object":"chat.completion.chunk","created":1711357598,"model":"gpt-3.5-turbo-0125",` + - `"system_fingerprint":"fp_3bc1b5746c","choices":[{"index":0,"delta":{},` + - `"logprobs":null,"finish_reason":"stop"}]}\n\n`, - `data: {"id":"chatcmpl-order-test","object":"chat.completion.chunk","created":1711357598,"model":"gpt-3.5-turbo-0125",` + - `"system_fingerprint":"fp_3bc1b5746c","choices":[],"usage":{"prompt_tokens":17,"completion_tokens":30,"total_tokens":47}}\n\n`, - 'data: [DONE]\n\n', - ], - } - - const { stream } = await model.doStream({ - prompt: TEST_PROMPT, - }) - - const elements = await convertReadableStreamToArray(stream) - - // The expected order should be: - // 1. reasoning-start - // 2. reasoning-delta (3 times) - // 3. reasoning-end (when text starts) - // 4. text-start - // 5. text-delta (2 times) - // 6. text-end (when stream finishes) - - const streamOrder = elements.map((el) => el.type) - - // Find the positions of key events - const reasoningStartIndex = streamOrder.indexOf('reasoning-start') - const reasoningEndIndex = streamOrder.indexOf('reasoning-end') - const textStartIndex = streamOrder.indexOf('text-start') - - // Reasoning should come before text and end before text starts - expect(reasoningStartIndex).toBeLessThan(textStartIndex) - expect(reasoningEndIndex).toBeLessThan(textStartIndex) - - // Verify reasoning content - const reasoningDeltas = elements - .filter((el) => el.type === 'reasoning-delta') - .map((el) => (el as { type: 'reasoning-delta'; delta: string }).delta) - - expect(reasoningDeltas).toEqual([ - 'I need to think about this step by step...', - ' First, I should analyze the request.', - ' Then I should provide a helpful response.', - ]) - - // Verify text content - const textDeltas = elements - .filter((el) => el.type === 'text-delta') - .map((el) => (el as { type: 'text-delta'; delta: string }).delta) - - expect(textDeltas).toEqual(['Hello! ', 'How can I help you today?']) - }) - - it('should stream tool deltas', async () => { - server.urls['https://openrouter.ai/api/v1/chat/completions']!.response = { - type: 'stream-chunks', - chunks: [ - `data: {"id":"chatcmpl-96aZqmeDpA9IPD6tACY8djkMsJCMP","object":"chat.completion.chunk","created":1711357598,"model":"gpt-3.5-turbo-0125",` + - `"system_fingerprint":"fp_3bc1b5746c","choices":[{"index":0,"delta":{"role":"assistant","content":null,` + - `"tool_calls":[{"index":0,"id":"call_O17Uplv4lJvD6DVdIvFFeRMw","type":"function","function":{"name":"test-tool","arguments":""}}]},` + - `"logprobs":null,"finish_reason":null}]}\n\n`, - `data: {"id":"chatcmpl-96aZqmeDpA9IPD6tACY8djkMsJCMP","object":"chat.completion.chunk","created":1711357598,"model":"gpt-3.5-turbo-0125",` + - `"system_fingerprint":"fp_3bc1b5746c","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"{\\""}}]},` + - `"logprobs":null,"finish_reason":null}]}\n\n`, - `data: {"id":"chatcmpl-96aZqmeDpA9IPD6tACY8djkMsJCMP","object":"chat.completion.chunk","created":1711357598,"model":"gpt-3.5-turbo-0125",` + - `"system_fingerprint":"fp_3bc1b5746c","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"value"}}]},` + - `"logprobs":null,"finish_reason":null}]}\n\n`, - `data: {"id":"chatcmpl-96aZqmeDpA9IPD6tACY8djkMsJCMP","object":"chat.completion.chunk","created":1711357598,"model":"gpt-3.5-turbo-0125",` + - `"system_fingerprint":"fp_3bc1b5746c","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"\\":\\""}}]},` + - `"logprobs":null,"finish_reason":null}]}\n\n`, - `data: {"id":"chatcmpl-96aZqmeDpA9IPD6tACY8djkMsJCMP","object":"chat.completion.chunk","created":1711357598,"model":"gpt-3.5-turbo-0125",` + - `"system_fingerprint":"fp_3bc1b5746c","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"Spark"}}]},` + - `"logprobs":null,"finish_reason":null}]}\n\n`, - `data: {"id":"chatcmpl-96aZqmeDpA9IPD6tACY8djkMsJCMP","object":"chat.completion.chunk","created":1711357598,"model":"gpt-3.5-turbo-0125",` + - `"system_fingerprint":"fp_3bc1b5746c","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"le"}}]},` + - `"logprobs":null,"finish_reason":null}]}\n\n`, - `data: {"id":"chatcmpl-96aZqmeDpA9IPD6tACY8djkMsJCMP","object":"chat.completion.chunk","created":1711357598,"model":"gpt-3.5-turbo-0125",` + - `"system_fingerprint":"fp_3bc1b5746c","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":" Day"}}]},` + - `"logprobs":null,"finish_reason":null}]}\n\n`, - `data: {"id":"chatcmpl-96aZqmeDpA9IPD6tACY8djkMsJCMP","object":"chat.completion.chunk","created":1711357598,"model":"gpt-3.5-turbo-0125",` + - `"system_fingerprint":"fp_3bc1b5746c","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"\\"}"}}]},` + - `"logprobs":null,"finish_reason":null}]}\n\n`, - `data: {"id":"chatcmpl-96aZqmeDpA9IPD6tACY8djkMsJCMP","object":"chat.completion.chunk","created":1711357598,"model":"gpt-3.5-turbo-0125",` + - `"system_fingerprint":"fp_3bc1b5746c","choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"tool_calls"}]}\n\n`, - `data: {"id":"chatcmpl-96aZqmeDpA9IPD6tACY8djkMsJCMP","object":"chat.completion.chunk","created":1711357598,"model":"gpt-3.5-turbo-0125",` + - `"system_fingerprint":"fp_3bc1b5746c","choices":[],"usage":{"prompt_tokens":53,"completion_tokens":17,"total_tokens":70}}\n\n`, - 'data: [DONE]\n\n', - ], - } - - const { stream } = await model.doStream({ - tools: [ - { - type: 'function', - name: 'test-tool', - inputSchema: { - type: 'object', - properties: { value: { type: 'string' } }, - required: ['value'], - additionalProperties: false, - $schema: 'http://json-schema.org/draft-07/schema#', - }, - }, - ], - prompt: TEST_PROMPT, - }) - - const elements = await convertReadableStreamToArray(stream) - - expect(elements).toStrictEqual([ - { - type: 'response-metadata', - id: 'chatcmpl-96aZqmeDpA9IPD6tACY8djkMsJCMP', - }, - { - type: 'response-metadata', - modelId: 'gpt-3.5-turbo-0125', - }, - { - type: 'response-metadata', - id: 'chatcmpl-96aZqmeDpA9IPD6tACY8djkMsJCMP', - }, - { - type: 'response-metadata', - modelId: 'gpt-3.5-turbo-0125', - }, - { - id: 'call_O17Uplv4lJvD6DVdIvFFeRMw', - toolName: 'test-tool', - type: 'tool-input-start', - }, - { - type: 'tool-input-delta', - id: 'call_O17Uplv4lJvD6DVdIvFFeRMw', - delta: '{"', - }, - { - type: 'response-metadata', - id: 'chatcmpl-96aZqmeDpA9IPD6tACY8djkMsJCMP', - }, - { - type: 'response-metadata', - modelId: 'gpt-3.5-turbo-0125', - }, - { - type: 'tool-input-delta', - id: 'call_O17Uplv4lJvD6DVdIvFFeRMw', - delta: 'value', - }, - { - type: 'response-metadata', - id: 'chatcmpl-96aZqmeDpA9IPD6tACY8djkMsJCMP', - }, - { - type: 'response-metadata', - modelId: 'gpt-3.5-turbo-0125', - }, - { - type: 'tool-input-delta', - id: 'call_O17Uplv4lJvD6DVdIvFFeRMw', - delta: '":"', - }, - { - type: 'response-metadata', - id: 'chatcmpl-96aZqmeDpA9IPD6tACY8djkMsJCMP', - }, - { - type: 'response-metadata', - modelId: 'gpt-3.5-turbo-0125', - }, - { - type: 'tool-input-delta', - id: 'call_O17Uplv4lJvD6DVdIvFFeRMw', - delta: 'Spark', - }, - { - type: 'response-metadata', - id: 'chatcmpl-96aZqmeDpA9IPD6tACY8djkMsJCMP', - }, - { - type: 'response-metadata', - modelId: 'gpt-3.5-turbo-0125', - }, - { - type: 'tool-input-delta', - id: 'call_O17Uplv4lJvD6DVdIvFFeRMw', - delta: 'le', - }, - { - type: 'response-metadata', - id: 'chatcmpl-96aZqmeDpA9IPD6tACY8djkMsJCMP', - }, - { - type: 'response-metadata', - modelId: 'gpt-3.5-turbo-0125', - }, - { - type: 'tool-input-delta', - id: 'call_O17Uplv4lJvD6DVdIvFFeRMw', - delta: ' Day', - }, - { - type: 'response-metadata', - id: 'chatcmpl-96aZqmeDpA9IPD6tACY8djkMsJCMP', - }, - { - type: 'response-metadata', - modelId: 'gpt-3.5-turbo-0125', - }, - { - type: 'tool-input-delta', - id: 'call_O17Uplv4lJvD6DVdIvFFeRMw', - delta: '"}', - }, - { - type: 'tool-call', - toolCallId: 'call_O17Uplv4lJvD6DVdIvFFeRMw', - toolName: 'test-tool', - input: '{"value":"Sparkle Day"}', - }, - { - type: 'response-metadata', - id: 'chatcmpl-96aZqmeDpA9IPD6tACY8djkMsJCMP', - }, - { - type: 'response-metadata', - modelId: 'gpt-3.5-turbo-0125', - }, - { - type: 'response-metadata', - id: 'chatcmpl-96aZqmeDpA9IPD6tACY8djkMsJCMP', - }, - { - type: 'response-metadata', - modelId: 'gpt-3.5-turbo-0125', - }, - { - type: 'finish', - finishReason: 'tool-calls', - providerMetadata: { - openrouter: { - usage: { - completionTokens: 17, - promptTokens: 53, - totalTokens: 70, - }, - }, - }, - usage: { - inputTokens: 53, - outputTokens: 17, - totalTokens: 70, - reasoningTokens: Number.NaN, - cachedInputTokens: Number.NaN, - }, - }, - ]) - }) - - it('should stream tool call that is sent in one chunk', async () => { - server.urls['https://openrouter.ai/api/v1/chat/completions']!.response = { - type: 'stream-chunks', - chunks: [ - `data: {"id":"chatcmpl-96aZqmeDpA9IPD6tACY8djkMsJCMP","object":"chat.completion.chunk","created":1711357598,"model":"gpt-3.5-turbo-0125",` + - `"system_fingerprint":"fp_3bc1b5746c","choices":[{"index":0,"delta":{"role":"assistant","content":null,` + - `"tool_calls":[{"index":0,"id":"call_O17Uplv4lJvD6DVdIvFFeRMw","type":"function","function":{"name":"test-tool","arguments":"{\\"value\\":\\"Sparkle Day\\"}"}}]},` + - `"logprobs":null,"finish_reason":null}]}\n\n`, - `data: {"id":"chatcmpl-96aZqmeDpA9IPD6tACY8djkMsJCMP","object":"chat.completion.chunk","created":1711357598,"model":"gpt-3.5-turbo-0125",` + - `"system_fingerprint":"fp_3bc1b5746c","choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"tool_calls"}]}\n\n`, - `data: {"id":"chatcmpl-96aZqmeDpA9IPD6tACY8djkMsJCMP","object":"chat.completion.chunk","created":1711357598,"model":"gpt-3.5-turbo-0125",` + - `"system_fingerprint":"fp_3bc1b5746c","choices":[],"usage":{"prompt_tokens":53,"completion_tokens":17,"total_tokens":70}}\n\n`, - 'data: [DONE]\n\n', - ], - } - - const { stream } = await model.doStream({ - tools: [ - { - type: 'function', - name: 'test-tool', - inputSchema: { - type: 'object', - properties: { value: { type: 'string' } }, - required: ['value'], - additionalProperties: false, - $schema: 'http://json-schema.org/draft-07/schema#', - }, - }, - ], - prompt: TEST_PROMPT, - }) - - const elements = await convertReadableStreamToArray(stream) - expect(elements).toStrictEqual([ - { - type: 'response-metadata', - id: 'chatcmpl-96aZqmeDpA9IPD6tACY8djkMsJCMP', - }, - { - type: 'response-metadata', - modelId: 'gpt-3.5-turbo-0125', - }, - { - type: 'tool-input-start', - id: 'call_O17Uplv4lJvD6DVdIvFFeRMw', - toolName: 'test-tool', - }, - { - type: 'tool-input-delta', - id: 'call_O17Uplv4lJvD6DVdIvFFeRMw', - delta: '{"value":"Sparkle Day"}', - }, - { - type: 'tool-input-end', - id: 'call_O17Uplv4lJvD6DVdIvFFeRMw', - }, - { - type: 'tool-call', - toolCallId: 'call_O17Uplv4lJvD6DVdIvFFeRMw', - toolName: 'test-tool', - input: '{"value":"Sparkle Day"}', - }, - { - type: 'response-metadata', - id: 'chatcmpl-96aZqmeDpA9IPD6tACY8djkMsJCMP', - }, - { - type: 'response-metadata', - modelId: 'gpt-3.5-turbo-0125', - }, - { - type: 'response-metadata', - id: 'chatcmpl-96aZqmeDpA9IPD6tACY8djkMsJCMP', - }, - { - type: 'response-metadata', - modelId: 'gpt-3.5-turbo-0125', - }, - { - type: 'finish', - finishReason: 'tool-calls', - providerMetadata: { - openrouter: { - usage: { - completionTokens: 17, - promptTokens: 53, - totalTokens: 70, - }, - }, - }, - usage: { - inputTokens: 53, - outputTokens: 17, - totalTokens: 70, - reasoningTokens: Number.NaN, - cachedInputTokens: Number.NaN, - }, - }, - ]) - }) - - it('should handle error stream parts', async () => { - server.urls['https://openrouter.ai/api/v1/chat/completions']!.response = { - type: 'stream-chunks', - chunks: [ - `data: {"error":{"message": "The server had an error processing your request. Sorry about that! You can retry your request, or contact us through our ` + - `help center at help.openrouter.com if you keep seeing this error.","type":"server_error","param":null,"code":null}}\n\n`, - 'data: [DONE]\n\n', - ], - } - - const { stream } = await model.doStream({ - prompt: TEST_PROMPT, - }) - - const elements = await convertReadableStreamToArray(stream) - - expect(elements).toStrictEqual([ - { - type: 'error', - error: { - message: - 'The server had an error processing your request. Sorry about that! ' + - 'You can retry your request, or contact us through our help center at ' + - 'help.openrouter.com if you keep seeing this error.', - type: 'server_error', - code: null, - param: null, - }, - }, - { - finishReason: 'error', - providerMetadata: { - openrouter: { - usage: {}, - }, - }, - type: 'finish', - usage: { - inputTokens: Number.NaN, - outputTokens: Number.NaN, - totalTokens: Number.NaN, - reasoningTokens: Number.NaN, - cachedInputTokens: Number.NaN, - }, - }, - ]) - }) - - it('should handle unparsable stream parts', async () => { - server.urls['https://openrouter.ai/api/v1/chat/completions']!.response = { - type: 'stream-chunks', - chunks: ['data: {unparsable}\n\n', 'data: [DONE]\n\n'], - } - - const { stream } = await model.doStream({ - prompt: TEST_PROMPT, - }) - - const elements = await convertReadableStreamToArray(stream) - - expect(elements.length).toBe(2) - expect(elements[0]?.type).toBe('error') - expect(elements[1]).toStrictEqual({ - finishReason: 'error', - - type: 'finish', - providerMetadata: { - openrouter: { - usage: {}, - }, - }, - usage: { - inputTokens: Number.NaN, - outputTokens: Number.NaN, - totalTokens: Number.NaN, - reasoningTokens: Number.NaN, - cachedInputTokens: Number.NaN, - }, - }) - }) - - it('should pass the messages and the model', async () => { - prepareStreamResponse({ content: [] }) - - await model.doStream({ - prompt: TEST_PROMPT, - }) - - const requestBody = await server.calls[0]!.requestBodyJson - - expect(requestBody).toStrictEqual({ - stream: true, - stream_options: { include_usage: true }, - model: 'anthropic/claude-3.5-sonnet', - messages: [{ role: 'user', content: [{ type: 'text', text: 'Hello' }] }], - }) - }) - - it('should pass headers', async () => { - prepareStreamResponse({ content: [] }) - - const provider = createOpenRouter({ - apiKey: 'test-api-key', - headers: { - 'Custom-Provider-Header': 'provider-header-value', - }, - fetch: server.fetch, - }) - - await provider.chat('openai/gpt-3.5-turbo').doStream({ - prompt: TEST_PROMPT, - headers: { - 'Custom-Request-Header': 'request-header-value', - }, - }) - - const requestHeaders = server.calls[0]!.requestHeaders - - expect(requestHeaders.authorization).toBe('Bearer test-api-key') - expect(requestHeaders['content-type']).toBe('application/json') - expect(requestHeaders['custom-provider-header']).toBe( - 'provider-header-value', - ) - expect(requestHeaders['custom-request-header']).toBe('request-header-value') - expect(requestHeaders['user-agent']).toMatch( - /^ai-sdk\/provider-utils\/\d+\.\d+\.\d+ runtime\/bun\/\d+\.\d+\.\d+$/, - ) - }) - - it('should pass extra body', async () => { - prepareStreamResponse({ content: [] }) - - const provider = createOpenRouter({ - apiKey: 'test-api-key', - extraBody: { - custom_field: 'custom_value', - providers: { - anthropic: { - custom_field: 'custom_value', - }, - }, - }, - fetch: server.fetch, - }) - - await provider.chat('anthropic/claude-3.5-sonnet').doStream({ - prompt: TEST_PROMPT, - }) - - const requestBody = await server.calls[0]!.requestBodyJson - - expect(requestBody).toHaveProperty('custom_field', 'custom_value') - expect(requestBody).toHaveProperty( - 'providers.anthropic.custom_field', - 'custom_value', - ) - }) - - it('should pass responseFormat for JSON schema structured outputs', async () => { - prepareStreamResponse({ content: ['{"name": "John", "age": 30}'] }) - - const testSchema = { - type: 'object' as const, - properties: { - name: { type: 'string' as const }, - age: { type: 'number' as const }, - }, - required: ['name', 'age'], - additionalProperties: false, - } - - await model.doStream({ - prompt: TEST_PROMPT, - responseFormat: { - type: 'json', - schema: testSchema, - name: 'PersonResponse', - description: 'A person object', - }, - }) - - const requestBody = await server.calls[0]!.requestBodyJson - - expect(requestBody).toStrictEqual({ - stream: true, - stream_options: { include_usage: true }, - model: 'anthropic/claude-3.5-sonnet', - messages: [{ role: 'user', content: [{ type: 'text', text: 'Hello' }] }], - response_format: { - type: 'json_schema', - json_schema: { - schema: testSchema, - strict: true, - name: 'PersonResponse', - description: 'A person object', - }, - }, - }) - }) -}) diff --git a/packages/internal/src/openrouter-ai-sdk/chat/index.ts b/packages/internal/src/openrouter-ai-sdk/chat/index.ts deleted file mode 100644 index 593a369c99..0000000000 --- a/packages/internal/src/openrouter-ai-sdk/chat/index.ts +++ /dev/null @@ -1,852 +0,0 @@ -import { InvalidResponseDataError } from '@ai-sdk/provider' -import { - combineHeaders, - createEventSourceResponseHandler, - createJsonResponseHandler, - generateId, - isParsableJson, - postJsonToApi, -} from '@ai-sdk/provider-utils' - -import { convertToOpenRouterChatMessages } from './convert-to-openrouter-chat-messages' -import { getChatCompletionToolChoice } from './get-tool-choice' -import { - OpenRouterNonStreamChatCompletionResponseSchema, - OpenRouterStreamChatCompletionChunkSchema, -} from './schemas' -import { openrouterFailedResponseHandler } from '../schemas/error-response' -import { ReasoningDetailType } from '../schemas/reasoning-details' -import { mapOpenRouterFinishReason } from '../utils/map-finish-reason' - -import type { OpenRouterUsageAccounting } from '../types/index' -import type { - OpenRouterChatModelId, - OpenRouterChatSettings, -} from '../types/openrouter-chat-settings' -import type { - LanguageModelV2, - LanguageModelV2CallOptions, - LanguageModelV2CallWarning, - LanguageModelV2Content, - LanguageModelV2FinishReason, - LanguageModelV2ResponseMetadata, - LanguageModelV2StreamPart, - LanguageModelV2Usage, - SharedV2Headers, -} from '@ai-sdk/provider' -import type { ParseResult } from '@ai-sdk/provider-utils' -import type { FinishReason } from 'ai' -import type { z } from 'zod/v4' - -type OpenRouterChatConfig = { - provider: string - compatibility: 'strict' | 'compatible' - headers: () => Record - url: (options: { modelId: string; path: string }) => string - fetch?: typeof fetch - extraBody?: Record -} - -export class OpenRouterChatLanguageModel implements LanguageModelV2 { - readonly specificationVersion = 'v2' as const - readonly provider = 'openrouter' - readonly defaultObjectGenerationMode = 'tool' as const - - readonly modelId: OpenRouterChatModelId - readonly supportedUrls: Record = { - 'image/*': [ - /^data:image\/[a-zA-Z]+;base64,/, - /^https?:\/\/.+\.(jpg|jpeg|png|gif|webp)$/i, - ], - // 'text/*': [/^data:text\//, /^https?:\/\/.+$/], - 'application/*': [/^data:application\//, /^https?:\/\/.+$/], - } - readonly settings: OpenRouterChatSettings - - private readonly config: OpenRouterChatConfig - - constructor( - modelId: OpenRouterChatModelId, - settings: OpenRouterChatSettings, - config: OpenRouterChatConfig, - ) { - this.modelId = modelId - this.settings = settings - this.config = config - } - - private getArgs({ - prompt, - maxOutputTokens, - temperature, - topP, - frequencyPenalty, - presencePenalty, - seed, - stopSequences, - responseFormat, - topK, - tools, - toolChoice, - }: LanguageModelV2CallOptions) { - const baseArgs = { - // model id: - model: this.modelId, - models: this.settings.models, - - // model specific settings: - logit_bias: this.settings.logitBias, - logprobs: - this.settings.logprobs === true || - typeof this.settings.logprobs === 'number' - ? true - : undefined, - top_logprobs: - typeof this.settings.logprobs === 'number' - ? this.settings.logprobs - : typeof this.settings.logprobs === 'boolean' - ? this.settings.logprobs - ? 0 - : undefined - : undefined, - user: this.settings.user, - parallel_tool_calls: this.settings.parallelToolCalls, - - // standardized settings: - max_tokens: maxOutputTokens, - temperature, - top_p: topP, - frequency_penalty: frequencyPenalty, - presence_penalty: presencePenalty, - seed, - - ...(this.modelId === 'x-ai/grok-code-fast-1' - ? {} - : { stop: stopSequences }), - response_format: responseFormat, - top_k: topK, - - // messages: - messages: convertToOpenRouterChatMessages(prompt), - - // OpenRouter specific settings: - include_reasoning: this.settings.includeReasoning, - reasoning: this.settings.reasoning, - usage: this.settings.usage, - - // Web search settings: - plugins: this.settings.plugins, - web_search_options: this.settings.web_search_options, - // Provider routing settings: - provider: this.settings.provider, - - // extra body: - ...this.config.extraBody, - ...this.settings.extraBody, - } - - if (responseFormat?.type === 'json' && responseFormat.schema != null) { - return { - ...baseArgs, - response_format: { - type: 'json_schema', - json_schema: { - schema: responseFormat.schema, - strict: true, - name: responseFormat.name ?? 'response', - ...(responseFormat.description && { - description: responseFormat.description, - }), - }, - }, - } - } - - if (tools && tools.length > 0) { - // TODO: support built-in tools - const mappedTools = tools - .filter((tool) => tool.type === 'function') - .map((tool) => ({ - type: 'function' as const, - function: { - name: tool.name, - description: tool.description, - parameters: tool.inputSchema, - }, - })) - - return { - ...baseArgs, - tools: mappedTools, - tool_choice: toolChoice - ? getChatCompletionToolChoice(toolChoice) - : undefined, - } - } - - return baseArgs - } - - async doGenerate(options: LanguageModelV2CallOptions): Promise<{ - content: Array - finishReason: LanguageModelV2FinishReason - usage: LanguageModelV2Usage - warnings: Array - providerMetadata?: { - openrouter: { - provider: string - usage: OpenRouterUsageAccounting - } - } - request?: { body?: unknown } - response?: LanguageModelV2ResponseMetadata & { - headers?: SharedV2Headers - body?: unknown - } - }> { - const providerOptions = options.providerOptions || {} - const openrouterOptions = providerOptions.openrouter || {} - - const args = { - ...this.getArgs(options), - ...openrouterOptions, - } - - const { value: response, responseHeaders } = await postJsonToApi({ - url: this.config.url({ - path: '/chat/completions', - modelId: this.modelId, - }), - headers: combineHeaders(this.config.headers(), options.headers), - body: args, - failedResponseHandler: openrouterFailedResponseHandler, - successfulResponseHandler: createJsonResponseHandler( - OpenRouterNonStreamChatCompletionResponseSchema, - ), - abortSignal: options.abortSignal, - fetch: this.config.fetch, - }) - - const choice = response.choices[0] - - if (!choice) { - throw new Error('No choice in response') - } - - // Extract detailed usage information - const usageInfo: LanguageModelV2Usage = response.usage - ? { - inputTokens: response.usage.prompt_tokens ?? 0, - outputTokens: response.usage.completion_tokens ?? 0, - totalTokens: - (response.usage.prompt_tokens ?? 0) + - (response.usage.completion_tokens ?? 0), - reasoningTokens: - response.usage.completion_tokens_details?.reasoning_tokens ?? 0, - cachedInputTokens: - response.usage.prompt_tokens_details?.cached_tokens ?? 0, - } - : { - inputTokens: 0, - outputTokens: 0, - totalTokens: 0, - reasoningTokens: 0, - cachedInputTokens: 0, - } - - const reasoningDetails = choice.message.reasoning_details ?? [] - - const reasoning: Array = - reasoningDetails.length > 0 - ? reasoningDetails - .map((detail) => { - switch (detail.type) { - case ReasoningDetailType.Text: { - if (detail.text) { - return { - type: 'reasoning' as const, - text: detail.text, - } - } - break - } - case ReasoningDetailType.Summary: { - if (detail.summary) { - return { - type: 'reasoning' as const, - text: detail.summary, - } - } - break - } - case ReasoningDetailType.Encrypted: { - // For encrypted reasoning, we include a redacted placeholder - if (detail.data) { - return { - type: 'reasoning' as const, - text: '[REDACTED]', - } - } - break - } - default: { - detail satisfies never - } - } - return null - }) - .filter((p) => p !== null) - : choice.message.reasoning - ? [ - { - type: 'reasoning' as const, - text: choice.message.reasoning, - }, - ] - : [] - - const content: Array = [] - - // Add reasoning content first - content.push(...reasoning) - - if (choice.message.content) { - content.push({ - type: 'text' as const, - text: choice.message.content, - }) - } - - if (choice.message.tool_calls) { - for (const toolCall of choice.message.tool_calls) { - content.push({ - type: 'tool-call' as const, - toolCallId: toolCall.id ?? generateId(), - toolName: toolCall.function.name, - input: toolCall.function.arguments, - }) - } - } - - if (choice.message.annotations) { - for (const annotation of choice.message.annotations) { - if (annotation.type === 'url_citation') { - content.push({ - type: 'source' as const, - sourceType: 'url' as const, - id: annotation.url_citation.url, - url: annotation.url_citation.url, - title: annotation.url_citation.title, - providerMetadata: { - openrouter: { - content: annotation.url_citation.content || '', - }, - }, - }) - } - } - } - - return { - content, - finishReason: mapOpenRouterFinishReason(choice.finish_reason), - usage: usageInfo, - warnings: [], - providerMetadata: { - openrouter: { - provider: response.provider ?? '', - usage: { - promptTokens: usageInfo.inputTokens ?? 0, - completionTokens: usageInfo.outputTokens ?? 0, - totalTokens: usageInfo.totalTokens ?? 0, - cost: response.usage?.cost, - promptTokensDetails: { - cachedTokens: - response.usage?.prompt_tokens_details?.cached_tokens ?? 0, - }, - completionTokensDetails: { - reasoningTokens: - response.usage?.completion_tokens_details?.reasoning_tokens ?? - 0, - }, - costDetails: { - upstreamInferenceCost: - response.usage?.cost_details?.upstream_inference_cost ?? 0, - }, - }, - }, - }, - request: { body: args }, - response: { - id: response.id, - modelId: response.model, - headers: responseHeaders, - }, - } - } - - async doStream(options: LanguageModelV2CallOptions): Promise<{ - stream: ReadableStream - warnings: Array - request?: { body?: unknown } - response?: LanguageModelV2ResponseMetadata & { - headers?: SharedV2Headers - body?: unknown - } - }> { - const providerOptions = options.providerOptions || {} - const openrouterOptions = providerOptions.openrouter || {} - - const args = { - ...this.getArgs(options), - ...openrouterOptions, - } - - const { value: response, responseHeaders } = await postJsonToApi({ - url: this.config.url({ - path: '/chat/completions', - modelId: this.modelId, - }), - headers: combineHeaders(this.config.headers(), options.headers), - body: { - ...args, - stream: true, - - // only include stream_options when in strict compatibility mode: - stream_options: - this.config.compatibility === 'strict' - ? { - include_usage: true, - // If user has requested usage accounting, make sure we get it in the stream - ...(this.settings.usage?.include - ? { include_usage: true } - : {}), - } - : undefined, - }, - failedResponseHandler: openrouterFailedResponseHandler, - successfulResponseHandler: createEventSourceResponseHandler( - OpenRouterStreamChatCompletionChunkSchema, - ), - abortSignal: options.abortSignal, - fetch: this.config.fetch, - }) - - const toolCalls: Array<{ - id: string - type: 'function' - function: { - name: string - arguments: string - } - inputStarted: boolean - sent: boolean - }> = [] - - let finishReason: FinishReason = 'other' - const usage: LanguageModelV2Usage = { - inputTokens: Number.NaN, - outputTokens: Number.NaN, - totalTokens: Number.NaN, - reasoningTokens: Number.NaN, - cachedInputTokens: Number.NaN, - } - - // Track provider-specific usage information - const openrouterUsage: Partial = {} - - let textStarted = false - let reasoningStarted = false - let textId: string | undefined - let reasoningId: string | undefined - let openrouterResponseId: string | undefined - let provider: string | undefined - - return { - stream: response.pipeThrough( - new TransformStream< - ParseResult< - z.infer - >, - LanguageModelV2StreamPart - >({ - transform(chunk, controller) { - // handle failed chunk parsing / validation: - if (!chunk.success) { - finishReason = 'error' - controller.enqueue({ type: 'error', error: chunk.error }) - return - } - - const value = chunk.value - - // handle error chunks: - if ('error' in value) { - finishReason = 'error' - controller.enqueue({ type: 'error', error: value.error }) - return - } - - if (value.provider) { - provider = value.provider - } - - if (value.id) { - openrouterResponseId = value.id - controller.enqueue({ - type: 'response-metadata', - id: value.id, - }) - } - - if (value.model) { - controller.enqueue({ - type: 'response-metadata', - modelId: value.model, - }) - } - - if (value.usage != null) { - usage.inputTokens = value.usage.prompt_tokens - usage.outputTokens = value.usage.completion_tokens - usage.totalTokens = - value.usage.prompt_tokens + value.usage.completion_tokens - - // Collect OpenRouter specific usage information - openrouterUsage.promptTokens = value.usage.prompt_tokens - - if (value.usage.prompt_tokens_details) { - const cachedInputTokens = - value.usage.prompt_tokens_details.cached_tokens ?? 0 - - usage.cachedInputTokens = cachedInputTokens - openrouterUsage.promptTokensDetails = { - cachedTokens: cachedInputTokens, - } - } - - openrouterUsage.completionTokens = value.usage.completion_tokens - if (value.usage.completion_tokens_details) { - const reasoningTokens = - value.usage.completion_tokens_details.reasoning_tokens ?? 0 - - usage.reasoningTokens = reasoningTokens - openrouterUsage.completionTokensDetails = { - reasoningTokens, - } - } - - const upstreamInferenceCost = - value.usage.cost_details?.upstream_inference_cost - if ( - upstreamInferenceCost != null && - upstreamInferenceCost !== undefined - ) { - openrouterUsage.costDetails = { - upstreamInferenceCost, - } - } - - if (value.usage.cost !== undefined) { - openrouterUsage.cost = value.usage.cost - } - openrouterUsage.totalTokens = value.usage.total_tokens - } - - const choice = value.choices[0] - - if (choice?.finish_reason != null) { - finishReason = mapOpenRouterFinishReason(choice.finish_reason) - } - - if (choice?.delta == null) { - return - } - - const delta = choice.delta - - const emitReasoningChunk = (chunkText: string) => { - if (!reasoningStarted) { - reasoningId = openrouterResponseId || generateId() - controller.enqueue({ - type: 'reasoning-start', - id: reasoningId, - }) - reasoningStarted = true - } - controller.enqueue({ - type: 'reasoning-delta', - delta: chunkText, - id: reasoningId || generateId(), - }) - } - - if (delta.reasoning_details && delta.reasoning_details.length > 0) { - for (const detail of delta.reasoning_details) { - switch (detail.type) { - case ReasoningDetailType.Text: { - if (detail.text) { - emitReasoningChunk(detail.text) - } - break - } - case ReasoningDetailType.Encrypted: { - if (detail.data) { - emitReasoningChunk('[REDACTED]') - } - break - } - case ReasoningDetailType.Summary: { - if (detail.summary) { - emitReasoningChunk(detail.summary) - } - break - } - default: { - detail satisfies never - break - } - } - } - } else if (delta.reasoning) { - emitReasoningChunk(delta.reasoning) - } - - if (delta.content) { - // If reasoning was previously active and now we're starting text content, - // we should end the reasoning first to maintain proper order - if (reasoningStarted && !textStarted) { - controller.enqueue({ - type: 'reasoning-end', - id: reasoningId || generateId(), - }) - reasoningStarted = false // Mark as ended so we don't end it again in flush - } - - if (!textStarted) { - textId = openrouterResponseId || generateId() - controller.enqueue({ - type: 'text-start', - id: textId, - }) - textStarted = true - } - controller.enqueue({ - type: 'text-delta', - delta: delta.content, - id: textId || generateId(), - }) - } - - if (delta.annotations) { - for (const annotation of delta.annotations) { - if (annotation.type === 'url_citation') { - controller.enqueue({ - type: 'source', - sourceType: 'url' as const, - id: annotation.url_citation.url, - url: annotation.url_citation.url, - title: annotation.url_citation.title, - providerMetadata: { - openrouter: { - content: annotation.url_citation.content || '', - }, - }, - }) - } - } - } - - if (delta.tool_calls != null) { - for (const toolCallDelta of delta.tool_calls) { - const index = toolCallDelta.index ?? toolCalls.length - 1 - - // Tool call start. OpenRouter returns all information except the arguments in the first chunk. - if (toolCalls[index] == null) { - if (toolCallDelta.type !== 'function') { - throw new InvalidResponseDataError({ - data: toolCallDelta, - message: `Expected 'function' type.`, - }) - } - - if (toolCallDelta.id == null) { - throw new InvalidResponseDataError({ - data: toolCallDelta, - message: `Expected 'id' to be a string.`, - }) - } - - if (toolCallDelta.function?.name == null) { - throw new InvalidResponseDataError({ - data: toolCallDelta, - message: `Expected 'function.name' to be a string.`, - }) - } - - toolCalls[index] = { - id: toolCallDelta.id, - type: 'function', - function: { - name: toolCallDelta.function.name, - arguments: toolCallDelta.function.arguments ?? '', - }, - inputStarted: false, - sent: false, - } - - const toolCall = toolCalls[index] - - if (toolCall == null) { - throw new Error('Tool call is missing') - } - - // check if tool call is complete (some providers send the full tool call in one chunk) - if ( - toolCall.function?.name != null && - toolCall.function?.arguments != null && - isParsableJson(toolCall.function.arguments) - ) { - toolCall.inputStarted = true - - controller.enqueue({ - type: 'tool-input-start', - id: toolCall.id, - toolName: toolCall.function.name, - }) - - // send delta - controller.enqueue({ - type: 'tool-input-delta', - id: toolCall.id, - delta: toolCall.function.arguments, - }) - - controller.enqueue({ - type: 'tool-input-end', - id: toolCall.id, - }) - - // send tool call - controller.enqueue({ - type: 'tool-call', - toolCallId: toolCall.id, - toolName: toolCall.function.name, - input: toolCall.function.arguments, - }) - - toolCall.sent = true - } - - continue - } - - // existing tool call, merge - const toolCall = toolCalls[index] - - if (toolCall == null) { - throw new Error('Tool call is missing') - } - - if (!toolCall.inputStarted) { - toolCall.inputStarted = true - controller.enqueue({ - type: 'tool-input-start', - id: toolCall.id, - toolName: toolCall.function.name, - }) - } - - if (toolCallDelta.function?.arguments != null) { - toolCall.function.arguments += - toolCallDelta.function?.arguments ?? '' - } - - // send delta - controller.enqueue({ - type: 'tool-input-delta', - id: toolCall.id, - delta: toolCallDelta.function.arguments ?? '', - }) - - // check if tool call is complete - if ( - toolCall.function?.name != null && - toolCall.function?.arguments != null && - isParsableJson(toolCall.function.arguments) - ) { - controller.enqueue({ - type: 'tool-call', - toolCallId: toolCall.id ?? generateId(), - toolName: toolCall.function.name, - input: toolCall.function.arguments, - }) - - toolCall.sent = true - } - } - } - }, - - flush(controller) { - // Forward any unsent tool calls if finish reason is 'tool-calls' - if (finishReason === 'tool-calls') { - for (const toolCall of toolCalls) { - if (toolCall && !toolCall.sent) { - controller.enqueue({ - type: 'tool-call', - toolCallId: toolCall.id ?? generateId(), - toolName: toolCall.function.name, - // Coerce invalid arguments to an empty JSON object - input: isParsableJson(toolCall.function.arguments) - ? toolCall.function.arguments - : '{}', - }) - toolCall.sent = true - } - } - } - - // End reasoning first if it was started, to maintain proper order - if (reasoningStarted) { - controller.enqueue({ - type: 'reasoning-end', - id: reasoningId || generateId(), - }) - } - if (textStarted) { - controller.enqueue({ - type: 'text-end', - id: textId || generateId(), - }) - } - - const openrouterMetadata: { - usage: Partial - provider?: string - } = { - usage: openrouterUsage, - } - - // Only include provider if it's actually set - if (provider !== undefined) { - openrouterMetadata.provider = provider - } - - controller.enqueue({ - type: 'finish', - finishReason, - usage, - providerMetadata: { - openrouter: openrouterMetadata, - }, - }) - }, - }), - ), - warnings: [], - request: { body: args }, - response: { headers: responseHeaders }, - } - } -} diff --git a/packages/internal/src/openrouter-ai-sdk/chat/is-url.ts b/packages/internal/src/openrouter-ai-sdk/chat/is-url.ts deleted file mode 100644 index 137a636d3d..0000000000 --- a/packages/internal/src/openrouter-ai-sdk/chat/is-url.ts +++ /dev/null @@ -1,15 +0,0 @@ -export function isUrl({ - url, - protocols, -}: { - url: string | URL - protocols: Set<`${string}:`> -}): boolean { - try { - const urlObj = new URL(url) - // Cast to the literal string due to Set inferred input type - return protocols.has(urlObj.protocol as `${string}:`) - } catch (_) { - return false - } -} diff --git a/packages/internal/src/openrouter-ai-sdk/chat/schemas.ts b/packages/internal/src/openrouter-ai-sdk/chat/schemas.ts deleted file mode 100644 index 5c71c30282..0000000000 --- a/packages/internal/src/openrouter-ai-sdk/chat/schemas.ts +++ /dev/null @@ -1,164 +0,0 @@ -import { z } from 'zod/v4' - -import { OpenRouterErrorResponseSchema } from '../schemas/error-response' -import { ReasoningDetailArraySchema } from '../schemas/reasoning-details' - -const OpenRouterChatCompletionBaseResponseSchema = z.object({ - id: z.string().optional(), - model: z.string().optional(), - provider: z.string().optional(), - usage: z - .object({ - prompt_tokens: z.number(), - prompt_tokens_details: z - .object({ - cached_tokens: z.number(), - }) - .nullish(), - completion_tokens: z.number(), - completion_tokens_details: z - .object({ - reasoning_tokens: z.number(), - }) - .nullish(), - total_tokens: z.number(), - cost: z.number().optional(), - cost_details: z - .object({ - upstream_inference_cost: z.number().nullish(), - }) - .nullish(), - }) - .nullish(), -}) -// limited version of the schema, focussed on what is needed for the implementation -// this approach limits breakages when the API changes and increases efficiency -export const OpenRouterNonStreamChatCompletionResponseSchema = - OpenRouterChatCompletionBaseResponseSchema.extend({ - choices: z.array( - z.object({ - message: z.object({ - role: z.literal('assistant'), - content: z.string().nullable().optional(), - reasoning: z.string().nullable().optional(), - reasoning_details: ReasoningDetailArraySchema.nullish(), - - tool_calls: z - .array( - z.object({ - id: z.string().optional().nullable(), - type: z.literal('function'), - function: z.object({ - name: z.string(), - arguments: z.string(), - }), - }), - ) - .optional(), - - annotations: z - .array( - z.object({ - type: z.enum(['url_citation']), - url_citation: z.object({ - end_index: z.number(), - start_index: z.number(), - title: z.string(), - url: z.string(), - content: z.string().optional(), - }), - }), - ) - .nullish(), - }), - index: z.number().nullish(), - logprobs: z - .object({ - content: z - .array( - z.object({ - token: z.string(), - logprob: z.number(), - top_logprobs: z.array( - z.object({ - token: z.string(), - logprob: z.number(), - }), - ), - }), - ) - .nullable(), - }) - .nullable() - .optional(), - finish_reason: z.string().optional().nullable(), - }), - ), - }) -// limited version of the schema, focussed on what is needed for the implementation -// this approach limits breakages when the API changes and increases efficiency -export const OpenRouterStreamChatCompletionChunkSchema = z.union([ - OpenRouterChatCompletionBaseResponseSchema.extend({ - choices: z.array( - z.object({ - delta: z - .object({ - role: z.enum(['assistant']).optional(), - content: z.string().nullish(), - reasoning: z.string().nullish().optional(), - reasoning_details: ReasoningDetailArraySchema.nullish(), - tool_calls: z - .array( - z.object({ - index: z.number().nullish(), - id: z.string().nullish(), - type: z.literal('function').optional(), - function: z.object({ - name: z.string().nullish(), - arguments: z.string().nullish(), - }), - }), - ) - .nullish(), - - annotations: z - .array( - z.object({ - type: z.enum(['url_citation']), - url_citation: z.object({ - end_index: z.number(), - start_index: z.number(), - title: z.string(), - url: z.string(), - content: z.string().optional(), - }), - }), - ) - .nullish(), - }) - .nullish(), - logprobs: z - .object({ - content: z - .array( - z.object({ - token: z.string(), - logprob: z.number(), - top_logprobs: z.array( - z.object({ - token: z.string(), - logprob: z.number(), - }), - ), - }), - ) - .nullable(), - }) - .nullish(), - finish_reason: z.string().nullable().optional(), - index: z.number().nullish(), - }), - ), - }), - OpenRouterErrorResponseSchema, -]) diff --git a/packages/internal/src/openrouter-ai-sdk/completion/convert-to-openrouter-completion-prompt.ts b/packages/internal/src/openrouter-ai-sdk/completion/convert-to-openrouter-completion-prompt.ts deleted file mode 100644 index 4b5b5c90cf..0000000000 --- a/packages/internal/src/openrouter-ai-sdk/completion/convert-to-openrouter-completion-prompt.ts +++ /dev/null @@ -1,151 +0,0 @@ -import { - InvalidPromptError, - UnsupportedFunctionalityError, -} from '@ai-sdk/provider' - -import type { - LanguageModelV2FilePart, - LanguageModelV2Prompt, - LanguageModelV2ReasoningPart, - LanguageModelV2TextPart, - LanguageModelV2ToolCallPart, - LanguageModelV2ToolResultPart, -} from '@ai-sdk/provider' - - -export function convertToOpenRouterCompletionPrompt({ - prompt, - inputFormat, - user = 'user', - assistant = 'assistant', -}: { - prompt: LanguageModelV2Prompt - inputFormat: 'prompt' | 'messages' - user?: string - assistant?: string -}): { - prompt: string -} { - // When the user supplied a prompt input, we don't transform it: - if ( - inputFormat === 'prompt' && - prompt.length === 1 && - prompt[0] && - prompt[0].role === 'user' && - prompt[0].content.length === 1 && - prompt[0].content[0] && - prompt[0].content[0].type === 'text' - ) { - return { prompt: prompt[0].content[0].text } - } - - // otherwise transform to a chat message format: - let text = '' - - // if first message is a system message, add it to the text: - if (prompt[0] && prompt[0].role === 'system') { - text += `${prompt[0].content}\n\n` - prompt = prompt.slice(1) - } - - for (const { role, content } of prompt) { - switch (role) { - case 'system': { - throw new InvalidPromptError({ - message: `Unexpected system message in prompt: ${content}`, - prompt, - }) - } - - case 'user': { - const userMessage = content - .map((part: LanguageModelV2TextPart | LanguageModelV2FilePart) => { - switch (part.type) { - case 'text': { - return part.text - } - - case 'file': { - throw new UnsupportedFunctionalityError({ - functionality: 'file attachments', - }) - } - default: { - return '' - } - } - }) - .join('') - - text += `${user}:\n${userMessage}\n\n` - break - } - - case 'assistant': { - const assistantMessage = content - .map( - ( - part: - | LanguageModelV2TextPart - | LanguageModelV2FilePart - | LanguageModelV2ReasoningPart - | LanguageModelV2ToolCallPart - | LanguageModelV2ToolResultPart, - ) => { - switch (part.type) { - case 'text': { - return part.text - } - case 'tool-call': { - throw new UnsupportedFunctionalityError({ - functionality: 'tool-call messages', - }) - } - case 'tool-result': { - throw new UnsupportedFunctionalityError({ - functionality: 'tool-result messages', - }) - } - case 'reasoning': { - throw new UnsupportedFunctionalityError({ - functionality: 'reasoning messages', - }) - } - - case 'file': { - throw new UnsupportedFunctionalityError({ - functionality: 'file attachments', - }) - } - - default: { - return '' - } - } - }, - ) - .join('') - - text += `${assistant}:\n${assistantMessage}\n\n` - break - } - - case 'tool': { - throw new UnsupportedFunctionalityError({ - functionality: 'tool messages', - }) - } - - default: { - break - } - } - } - - // Assistant message prefix: - text += `${assistant}:\n` - - return { - prompt: text, - } -} diff --git a/packages/internal/src/openrouter-ai-sdk/completion/index.test.ts b/packages/internal/src/openrouter-ai-sdk/completion/index.test.ts deleted file mode 100644 index cca1ac805a..0000000000 --- a/packages/internal/src/openrouter-ai-sdk/completion/index.test.ts +++ /dev/null @@ -1,665 +0,0 @@ -import { convertReadableStreamToArray } from '@ai-sdk/provider-utils/test' -import { beforeEach, describe, expect, it } from 'bun:test' - -import { createOpenRouter } from '../provider' - -import type { LanguageModelV2Prompt } from '@ai-sdk/provider' - -const TEST_PROMPT: LanguageModelV2Prompt = [ - { role: 'user', content: [{ type: 'text', text: 'Hello' }] }, -] - -const TEST_LOGPROBS = { - tokens: [' ever', ' after', '.\n\n', 'The', ' end', '.'], - token_logprobs: [ - -0.0664508, -0.014520033, -1.3820221, -0.7890417, -0.5323165, -0.10247037, - ], - top_logprobs: [ - { - ' ever': -0.0664508, - }, - { - ' after': -0.014520033, - }, - { - '.\n\n': -1.3820221, - }, - { - The: -0.7890417, - }, - { - ' end': -0.5323165, - }, - { - '.': -0.10247037, - }, - ] as Record[], -} - -type MockResponseDefinition = - | { - type: 'json-value' - body: any - headers?: Record - status?: number - } - | { - type: 'stream-chunks' - chunks: string[] - headers?: Record - status?: number - } - -type MockServerRoute = { - response: MockResponseDefinition -} - -type MockServerCall = { - requestHeaders: Record - requestBodyJson: Promise -} - -const createStreamFromChunks = (chunks: string[]) => - new ReadableStream({ - start(controller) { - try { - for (const chunk of chunks) { - controller.enqueue(chunk) - } - } finally { - controller.close() - } - }, - }).pipeThrough(new TextEncoderStream()) - -function toHeadersRecord(headers?: HeadersInit): Record { - const result: Record = {} - - if (!headers) { - return result - } - - if (headers instanceof Headers) { - headers.forEach((value, key) => { - result[key.toLowerCase()] = value - }) - return result - } - - if (Array.isArray(headers)) { - for (const [key, value] of headers) { - result[String(key).toLowerCase()] = String(value) - } - return result - } - - for (const [key, value] of Object.entries(headers)) { - if (typeof value !== 'undefined') { - result[key.toLowerCase()] = String(value) - } - } - - return result -} - -function parseRequestBody(body: BodyInit | null | undefined): any { - if (body == null) { - return undefined - } - - if (typeof body === 'string') { - try { - return JSON.parse(body) - } catch { - return undefined - } - } - - return undefined -} - -function createMockServer(routes: Record) { - const urls: Record = Object.fromEntries( - Object.entries(routes).map(([url, config]) => [ - url, - { - response: { ...config.response }, - }, - ]), - ) - - const calls: MockServerCall[] = [] - - const buildResponse = (definition: MockResponseDefinition): Response => { - const status = definition.status ?? 200 - - if (definition.type === 'json-value') { - return new Response(JSON.stringify(definition.body), { - status, - headers: { - 'Content-Type': 'application/json', - ...definition.headers, - }, - }) - } - - return new Response(createStreamFromChunks(definition.chunks), { - status, - headers: { - 'Content-Type': 'text/event-stream', - 'Cache-Control': 'no-cache', - Connection: 'keep-alive', - ...definition.headers, - }, - }) - } - - const fetchImpl = async (input: RequestInfo, init: RequestInit = {}) => { - const url = - typeof input === 'string' - ? input - : input instanceof URL - ? input.toString() - : input.url - - const route = urls[url] - - if (!route) { - return new Response('Not Found', { status: 404 }) - } - - const requestHeaders = toHeadersRecord(init.headers) - const requestBodyJson = Promise.resolve(parseRequestBody(init.body)) - - calls.push({ requestHeaders, requestBodyJson }) - - return buildResponse(route.response) - } - - const fetch = ((input: RequestInfo | URL, init?: RequestInit) => - fetchImpl(input as RequestInfo, init ?? {})) as typeof global.fetch - - fetch.preconnect = async () => {} - - return { - urls, - calls, - fetch, - } -} - -describe('doGenerate', () => { - const server = createMockServer({ - 'https://openrouter.ai/api/v1/completions': { - response: { type: 'json-value', body: {} }, - }, - }) - - const provider = createOpenRouter({ - apiKey: 'test-api-key', - compatibility: 'strict', - fetch: server.fetch, - }) - - const model = provider.completion('openai/gpt-3.5-turbo-instruct') - - beforeEach(() => { - server.calls.length = 0 - server.urls['https://openrouter.ai/api/v1/completions']!.response = { - type: 'json-value', - body: {}, - } - }) - - function prepareJsonResponse({ - content = '', - usage = { - prompt_tokens: 4, - total_tokens: 34, - completion_tokens: 30, - }, - logprobs = null, - finish_reason = 'stop', - }: { - content?: string - usage?: { - prompt_tokens: number - total_tokens: number - completion_tokens: number - } - logprobs?: { - tokens: string[] - token_logprobs: number[] - top_logprobs: Record[] - } | null - finish_reason?: string - }) { - server.urls['https://openrouter.ai/api/v1/completions']!.response = { - type: 'json-value', - body: { - id: 'cmpl-96cAM1v77r4jXa4qb2NSmRREV5oWB', - object: 'text_completion', - created: 1711363706, - model: 'openai/gpt-3.5-turbo-instruct', - choices: [ - { - text: content, - index: 0, - logprobs, - finish_reason, - }, - ], - usage, - }, - } - } - - it('should extract text response', async () => { - prepareJsonResponse({ content: 'Hello, World!' }) - - const { content } = await model.doGenerate({ - prompt: TEST_PROMPT, - }) - - const text = content[0]?.type === 'text' ? content[0].text : '' - - expect(text).toStrictEqual('Hello, World!') - }) - - it('should extract usage', async () => { - prepareJsonResponse({ - content: '', - usage: { prompt_tokens: 20, total_tokens: 25, completion_tokens: 5 }, - }) - - const { usage } = await model.doGenerate({ - prompt: TEST_PROMPT, - }) - - expect(usage).toStrictEqual({ - inputTokens: 20, - outputTokens: 5, - totalTokens: 25, - reasoningTokens: 0, - cachedInputTokens: 0, - }) - }) - - it('should extract logprobs', async () => { - prepareJsonResponse({ logprobs: TEST_LOGPROBS }) - - const provider = createOpenRouter({ - apiKey: 'test-api-key', - fetch: server.fetch, - }) - - await provider - .completion('openai/gpt-3.5-turbo', { logprobs: 1 }) - .doGenerate({ - prompt: TEST_PROMPT, - }) - }) - - it('should extract finish reason', async () => { - prepareJsonResponse({ - content: '', - finish_reason: 'stop', - }) - - const { finishReason } = await provider - .completion('openai/gpt-3.5-turbo-instruct') - .doGenerate({ - prompt: TEST_PROMPT, - }) - - expect(finishReason).toStrictEqual('stop') - }) - - it('should support unknown finish reason', async () => { - prepareJsonResponse({ - content: '', - finish_reason: 'eos', - }) - - const { finishReason } = await provider - .completion('openai/gpt-3.5-turbo-instruct') - .doGenerate({ - prompt: TEST_PROMPT, - }) - - expect(finishReason).toStrictEqual('unknown') - }) - - it('should pass the model and the prompt', async () => { - prepareJsonResponse({ content: '' }) - - await model.doGenerate({ - prompt: TEST_PROMPT, - }) - - const requestBody = await server.calls[0]!.requestBodyJson - - expect(requestBody).toStrictEqual({ - model: 'openai/gpt-3.5-turbo-instruct', - prompt: 'Hello', - }) - }) - - it('should pass the models array when provided', async () => { - prepareJsonResponse({ content: '' }) - - const customModel = provider.completion('openai/gpt-3.5-turbo-instruct', { - models: ['openai/gpt-4', 'anthropic/claude-2'], - }) - - await customModel.doGenerate({ - prompt: TEST_PROMPT, - }) - - const requestBody = await server.calls[0]!.requestBodyJson - - expect(requestBody).toStrictEqual({ - model: 'openai/gpt-3.5-turbo-instruct', - models: ['openai/gpt-4', 'anthropic/claude-2'], - prompt: 'Hello', - }) - }) - - it('should pass headers', async () => { - prepareJsonResponse({ content: '' }) - - const provider = createOpenRouter({ - apiKey: 'test-api-key', - headers: { - 'Custom-Provider-Header': 'provider-header-value', - }, - fetch: server.fetch, - }) - - await provider.completion('openai/gpt-3.5-turbo-instruct').doGenerate({ - prompt: TEST_PROMPT, - headers: { - 'Custom-Request-Header': 'request-header-value', - }, - }) - - const requestHeaders = server.calls[0]!.requestHeaders - - expect(requestHeaders.authorization).toBe('Bearer test-api-key') - expect(requestHeaders['content-type']).toBe('application/json') - expect(requestHeaders['custom-provider-header']).toBe( - 'provider-header-value', - ) - expect(requestHeaders['custom-request-header']).toBe('request-header-value') - expect(requestHeaders['user-agent']).toMatch( - /^ai-sdk\/provider-utils\/\d+\.\d+\.\d+ runtime\/bun\/\d+\.\d+\.\d+$/, - ) - }) -}) - -describe('doStream', () => { - const server = createMockServer({ - 'https://openrouter.ai/api/v1/completions': { - response: { type: 'stream-chunks', chunks: [] }, - }, - }) - - const provider = createOpenRouter({ - apiKey: 'test-api-key', - compatibility: 'strict', - fetch: server.fetch, - }) - - const model = provider.completion('openai/gpt-3.5-turbo-instruct') - - beforeEach(() => { - server.calls.length = 0 - server.urls['https://openrouter.ai/api/v1/completions']!.response = { - type: 'stream-chunks', - chunks: [], - } - }) - - function prepareStreamResponse({ - content, - finish_reason = 'stop', - usage = { - prompt_tokens: 10, - total_tokens: 372, - completion_tokens: 362, - }, - logprobs = null, - }: { - content: string[] - usage?: { - prompt_tokens: number - total_tokens: number - completion_tokens: number - } - logprobs?: { - tokens: string[] - token_logprobs: number[] - top_logprobs: Record[] - } | null - finish_reason?: string - }) { - server.urls['https://openrouter.ai/api/v1/completions']!.response = { - type: 'stream-chunks', - chunks: [ - ...content.map((text) => { - return `data: {"id":"cmpl-96c64EdfhOw8pjFFgVpLuT8k2MtdT","object":"text_completion","created":1711363440,"choices":[{"text":"${text}","index":0,"logprobs":null,"finish_reason":null}],"model":"openai/gpt-3.5-turbo-instruct"}\n\n` - }), - `data: {"id":"cmpl-96c3yLQE1TtZCd6n6OILVmzev8M8H","object":"text_completion","created":1711363310,"choices":[{"text":"","index":0,"logprobs":${JSON.stringify( - logprobs, - )},"finish_reason":"${finish_reason}"}],"model":"openai/gpt-3.5-turbo-instruct"}\n\n`, - `data: {"id":"cmpl-96c3yLQE1TtZCd6n6OILVmzev8M8H","object":"text_completion","created":1711363310,"model":"openai/gpt-3.5-turbo-instruct","usage":${JSON.stringify( - usage, - )},"choices":[]}\n\n`, - 'data: [DONE]\n\n', - ], - } - } - - it('should stream text deltas', async () => { - prepareStreamResponse({ - content: ['Hello', ', ', 'World!'], - finish_reason: 'stop', - usage: { - prompt_tokens: 10, - total_tokens: 372, - completion_tokens: 362, - }, - logprobs: TEST_LOGPROBS, - }) - - const { stream } = await model.doStream({ - prompt: TEST_PROMPT, - }) - - // note: space moved to last chunk bc of trimming - const elements = await convertReadableStreamToArray(stream) - expect(elements).toStrictEqual([ - { type: 'text-delta', delta: 'Hello', id: expect.any(String) }, - { type: 'text-delta', delta: ', ', id: expect.any(String) }, - { type: 'text-delta', delta: 'World!', id: expect.any(String) }, - { type: 'text-delta', delta: '', id: expect.any(String) }, - { - type: 'finish', - finishReason: 'stop', - providerMetadata: { - openrouter: { - usage: { - promptTokens: 10, - completionTokens: 362, - totalTokens: 372, - }, - }, - }, - usage: { - inputTokens: 10, - outputTokens: 362, - totalTokens: 372, - reasoningTokens: Number.NaN, - cachedInputTokens: Number.NaN, - }, - }, - ]) - }) - - it('should handle error stream parts', async () => { - server.urls['https://openrouter.ai/api/v1/completions']!.response = { - type: 'stream-chunks', - chunks: [ - `data: {"error":{"message": "The server had an error processing your request. Sorry about that! You can retry your request, or contact us through our ` + - `help center at help.openrouter.com if you keep seeing this error.","type":"server_error","param":null,"code":null}}\n\n`, - 'data: [DONE]\n\n', - ], - } - - const { stream } = await model.doStream({ - prompt: TEST_PROMPT, - }) - - const elements = await convertReadableStreamToArray(stream) - - expect(elements).toStrictEqual([ - { - type: 'error', - error: { - message: - 'The server had an error processing your request. Sorry about that! ' + - 'You can retry your request, or contact us through our help center at ' + - 'help.openrouter.com if you keep seeing this error.', - type: 'server_error', - code: null, - param: null, - }, - }, - { - finishReason: 'error', - providerMetadata: { - openrouter: { - usage: {}, - }, - }, - type: 'finish', - usage: { - inputTokens: Number.NaN, - outputTokens: Number.NaN, - totalTokens: Number.NaN, - reasoningTokens: Number.NaN, - cachedInputTokens: Number.NaN, - }, - }, - ]) - }) - - it('should handle unparsable stream parts', async () => { - server.urls['https://openrouter.ai/api/v1/completions']!.response = { - type: 'stream-chunks', - chunks: ['data: {unparsable}\n\n', 'data: [DONE]\n\n'], - } - - const { stream } = await model.doStream({ - prompt: TEST_PROMPT, - }) - - const elements = await convertReadableStreamToArray(stream) - - expect(elements.length).toBe(2) - expect(elements[0]?.type).toBe('error') - expect(elements[1]).toStrictEqual({ - finishReason: 'error', - providerMetadata: { - openrouter: { - usage: {}, - }, - }, - type: 'finish', - usage: { - inputTokens: Number.NaN, - outputTokens: Number.NaN, - totalTokens: Number.NaN, - reasoningTokens: Number.NaN, - cachedInputTokens: Number.NaN, - }, - }) - }) - - it('should pass the model and the prompt', async () => { - prepareStreamResponse({ content: [] }) - - await model.doStream({ - prompt: TEST_PROMPT, - }) - - const requestBody = await server.calls[0]!.requestBodyJson - - expect(requestBody).toStrictEqual({ - stream: true, - stream_options: { include_usage: true }, - model: 'openai/gpt-3.5-turbo-instruct', - prompt: 'Hello', - }) - }) - - it('should pass headers', async () => { - prepareStreamResponse({ content: [] }) - - const provider = createOpenRouter({ - apiKey: 'test-api-key', - headers: { - 'Custom-Provider-Header': 'provider-header-value', - }, - fetch: server.fetch, - }) - - await provider.completion('openai/gpt-3.5-turbo-instruct').doStream({ - prompt: TEST_PROMPT, - headers: { - 'Custom-Request-Header': 'request-header-value', - }, - }) - - const requestHeaders = server.calls[0]!.requestHeaders - - expect(requestHeaders.authorization).toBe('Bearer test-api-key') - expect(requestHeaders['content-type']).toBe('application/json') - expect(requestHeaders['custom-provider-header']).toBe( - 'provider-header-value', - ) - expect(requestHeaders['custom-request-header']).toBe('request-header-value') - expect(requestHeaders['user-agent']).toMatch( - /^ai-sdk\/provider-utils\/\d+\.\d+\.\d+ runtime\/bun\/\d+\.\d+\.\d+$/, - ) - }) - - it('should pass extra body', async () => { - prepareStreamResponse({ content: [] }) - - const provider = createOpenRouter({ - apiKey: 'test-api-key', - extraBody: { - custom_field: 'custom_value', - providers: { - anthropic: { - custom_field: 'custom_value', - }, - }, - }, - fetch: server.fetch, - }) - - await provider.completion('openai/gpt-4o').doStream({ - prompt: TEST_PROMPT, - }) - - const requestBody = await server.calls[0]!.requestBodyJson - - expect(requestBody).toHaveProperty('custom_field', 'custom_value') - expect(requestBody).toHaveProperty( - 'providers.anthropic.custom_field', - 'custom_value', - ) - }) -}) diff --git a/packages/internal/src/openrouter-ai-sdk/completion/index.ts b/packages/internal/src/openrouter-ai-sdk/completion/index.ts deleted file mode 100644 index 1185f2cf1d..0000000000 --- a/packages/internal/src/openrouter-ai-sdk/completion/index.ts +++ /dev/null @@ -1,344 +0,0 @@ -import { UnsupportedFunctionalityError } from '@ai-sdk/provider' -import { - combineHeaders, - createEventSourceResponseHandler, - createJsonResponseHandler, - generateId, - postJsonToApi, -} from '@ai-sdk/provider-utils' - -import { convertToOpenRouterCompletionPrompt } from './convert-to-openrouter-completion-prompt' -import { OpenRouterCompletionChunkSchema } from './schemas' -import { openrouterFailedResponseHandler } from '../schemas/error-response' -import { mapOpenRouterFinishReason } from '../utils/map-finish-reason' - -import type { OpenRouterUsageAccounting } from '../types' -import type { - OpenRouterCompletionModelId, - OpenRouterCompletionSettings, -} from '../types/openrouter-completion-settings' -import type { - LanguageModelV2, - LanguageModelV2CallOptions, - LanguageModelV2StreamPart, - LanguageModelV2Usage, -} from '@ai-sdk/provider' -import type { ParseResult } from '@ai-sdk/provider-utils' -import type { FinishReason } from 'ai' -import type { z } from 'zod/v4' - - - -type OpenRouterCompletionConfig = { - provider: string - compatibility: 'strict' | 'compatible' - headers: () => Record - url: (options: { modelId: string; path: string }) => string - fetch?: typeof fetch - extraBody?: Record -} - -export class OpenRouterCompletionLanguageModel implements LanguageModelV2 { - readonly specificationVersion = 'v2' as const - readonly provider = 'openrouter' - readonly modelId: OpenRouterCompletionModelId - readonly supportedUrls: Record = { - 'image/*': [ - /^data:image\/[a-zA-Z]+;base64,/, - /^https?:\/\/.+\.(jpg|jpeg|png|gif|webp)$/i, - ], - 'text/*': [/^data:text\//, /^https?:\/\/.+$/], - 'application/*': [/^data:application\//, /^https?:\/\/.+$/], - } - readonly defaultObjectGenerationMode = undefined - readonly settings: OpenRouterCompletionSettings - - private readonly config: OpenRouterCompletionConfig - - constructor( - modelId: OpenRouterCompletionModelId, - settings: OpenRouterCompletionSettings, - config: OpenRouterCompletionConfig, - ) { - this.modelId = modelId - this.settings = settings - this.config = config - } - - private getArgs({ - prompt, - maxOutputTokens, - temperature, - topP, - frequencyPenalty, - presencePenalty, - seed, - responseFormat, - topK, - stopSequences, - tools, - toolChoice, - }: LanguageModelV2CallOptions) { - const { prompt: completionPrompt } = convertToOpenRouterCompletionPrompt({ - prompt, - inputFormat: 'prompt', - }) - - if (tools?.length) { - throw new UnsupportedFunctionalityError({ - functionality: 'tools', - }) - } - - if (toolChoice) { - throw new UnsupportedFunctionalityError({ - functionality: 'toolChoice', - }) - } - - return { - // model id: - model: this.modelId, - models: this.settings.models, - - // model specific settings: - logit_bias: this.settings.logitBias, - logprobs: - typeof this.settings.logprobs === 'number' - ? this.settings.logprobs - : typeof this.settings.logprobs === 'boolean' - ? this.settings.logprobs - ? 0 - : undefined - : undefined, - suffix: this.settings.suffix, - user: this.settings.user, - - // standardized settings: - max_tokens: maxOutputTokens, - temperature, - top_p: topP, - frequency_penalty: frequencyPenalty, - presence_penalty: presencePenalty, - seed, - - ...(this.modelId === 'x-ai/grok-code-fast-1' - ? {} - : { stop: stopSequences }), - response_format: responseFormat, - top_k: topK, - - // prompt: - prompt: completionPrompt, - - // OpenRouter specific settings: - include_reasoning: this.settings.includeReasoning, - reasoning: this.settings.reasoning, - - // extra body: - ...this.config.extraBody, - ...this.settings.extraBody, - } - } - - async doGenerate( - options: LanguageModelV2CallOptions, - ): Promise>> { - const providerOptions = options.providerOptions || {} - const openrouterOptions = providerOptions.openrouter || {} - - const args = { - ...this.getArgs(options), - ...openrouterOptions, - } - - const { value: response, responseHeaders } = await postJsonToApi({ - url: this.config.url({ - path: '/completions', - modelId: this.modelId, - }), - headers: combineHeaders(this.config.headers(), options.headers), - body: args, - failedResponseHandler: openrouterFailedResponseHandler, - successfulResponseHandler: createJsonResponseHandler( - OpenRouterCompletionChunkSchema, - ), - abortSignal: options.abortSignal, - fetch: this.config.fetch, - }) - - if ('error' in response) { - throw new Error(`${response.error.message}`) - } - - const choice = response.choices[0] - - if (!choice) { - throw new Error('No choice in OpenRouter completion response') - } - - return { - content: [ - { - type: 'text', - text: choice.text ?? '', - }, - ], - finishReason: mapOpenRouterFinishReason(choice.finish_reason), - usage: { - inputTokens: response.usage?.prompt_tokens ?? 0, - outputTokens: response.usage?.completion_tokens ?? 0, - totalTokens: - (response.usage?.prompt_tokens ?? 0) + - (response.usage?.completion_tokens ?? 0), - reasoningTokens: - response.usage?.completion_tokens_details?.reasoning_tokens ?? 0, - cachedInputTokens: - response.usage?.prompt_tokens_details?.cached_tokens ?? 0, - }, - warnings: [], - response: { - headers: responseHeaders, - }, - } - } - - async doStream( - options: LanguageModelV2CallOptions, - ): Promise>> { - const providerOptions = options.providerOptions || {} - const openrouterOptions = providerOptions.openrouter || {} - - const args = { - ...this.getArgs(options), - ...openrouterOptions, - } - - const { value: response, responseHeaders } = await postJsonToApi({ - url: this.config.url({ - path: '/completions', - modelId: this.modelId, - }), - headers: combineHeaders(this.config.headers(), options.headers), - body: { - ...args, - stream: true, - - // only include stream_options when in strict compatibility mode: - stream_options: - this.config.compatibility === 'strict' - ? { include_usage: true } - : undefined, - }, - failedResponseHandler: openrouterFailedResponseHandler, - successfulResponseHandler: createEventSourceResponseHandler( - OpenRouterCompletionChunkSchema, - ), - abortSignal: options.abortSignal, - fetch: this.config.fetch, - }) - - let finishReason: FinishReason = 'other' - const usage: LanguageModelV2Usage = { - inputTokens: Number.NaN, - outputTokens: Number.NaN, - totalTokens: Number.NaN, - reasoningTokens: Number.NaN, - cachedInputTokens: Number.NaN, - } - - const openrouterUsage: Partial = {} - return { - stream: response.pipeThrough( - new TransformStream< - ParseResult>, - LanguageModelV2StreamPart - >({ - transform(chunk, controller) { - // handle failed chunk parsing / validation: - if (!chunk.success) { - finishReason = 'error' - controller.enqueue({ type: 'error', error: chunk.error }) - return - } - - const value = chunk.value - - // handle error chunks: - if ('error' in value) { - finishReason = 'error' - controller.enqueue({ type: 'error', error: value.error }) - return - } - - if (value.usage != null) { - usage.inputTokens = value.usage.prompt_tokens - usage.outputTokens = value.usage.completion_tokens - usage.totalTokens = - value.usage.prompt_tokens + value.usage.completion_tokens - - // Collect OpenRouter specific usage information - openrouterUsage.promptTokens = value.usage.prompt_tokens - - if (value.usage.prompt_tokens_details) { - const cachedInputTokens = - value.usage.prompt_tokens_details.cached_tokens ?? 0 - - usage.cachedInputTokens = cachedInputTokens - openrouterUsage.promptTokensDetails = { - cachedTokens: cachedInputTokens, - } - } - - openrouterUsage.completionTokens = value.usage.completion_tokens - if (value.usage.completion_tokens_details) { - const reasoningTokens = - value.usage.completion_tokens_details.reasoning_tokens ?? 0 - - usage.reasoningTokens = reasoningTokens - openrouterUsage.completionTokensDetails = { - reasoningTokens, - } - } - - if (value.usage.cost !== undefined) { - openrouterUsage.cost = value.usage.cost - } - openrouterUsage.totalTokens = value.usage.total_tokens - } - - const choice = value.choices[0] - - if (choice?.finish_reason != null) { - finishReason = mapOpenRouterFinishReason(choice.finish_reason) - } - - if (choice?.text != null) { - controller.enqueue({ - type: 'text-delta', - delta: choice.text, - id: generateId(), - }) - } - }, - - flush(controller) { - controller.enqueue({ - type: 'finish', - finishReason, - usage, - providerMetadata: { - openrouter: { - usage: openrouterUsage, - }, - }, - }) - }, - }), - ), - response: { - headers: responseHeaders, - }, - } - } -} diff --git a/packages/internal/src/openrouter-ai-sdk/completion/schemas.ts b/packages/internal/src/openrouter-ai-sdk/completion/schemas.ts deleted file mode 100644 index 28f82abfcd..0000000000 --- a/packages/internal/src/openrouter-ai-sdk/completion/schemas.ts +++ /dev/null @@ -1,50 +0,0 @@ -import { z } from 'zod/v4' - -import { OpenRouterErrorResponseSchema } from '../schemas/error-response' -import { ReasoningDetailArraySchema } from '../schemas/reasoning-details' - -// limited version of the schema, focussed on what is needed for the implementation -// this approach limits breakages when the API changes and increases efficiency -export const OpenRouterCompletionChunkSchema = z.union([ - z.object({ - id: z.string().optional(), - model: z.string().optional(), - choices: z.array( - z.object({ - text: z.string(), - reasoning: z.string().nullish().optional(), - reasoning_details: ReasoningDetailArraySchema.nullish(), - - finish_reason: z.string().nullish(), - index: z.number().nullish(), - logprobs: z - .object({ - tokens: z.array(z.string()), - token_logprobs: z.array(z.number()), - top_logprobs: z.array(z.record(z.string(), z.number())).nullable(), - }) - .nullable() - .optional(), - }), - ), - usage: z - .object({ - prompt_tokens: z.number(), - prompt_tokens_details: z - .object({ - cached_tokens: z.number(), - }) - .nullish(), - completion_tokens: z.number(), - completion_tokens_details: z - .object({ - reasoning_tokens: z.number(), - }) - .nullish(), - total_tokens: z.number(), - cost: z.number().optional(), - }) - .nullish(), - }), - OpenRouterErrorResponseSchema, -]) diff --git a/packages/internal/src/openrouter-ai-sdk/facade.ts b/packages/internal/src/openrouter-ai-sdk/facade.ts deleted file mode 100644 index cd66240457..0000000000 --- a/packages/internal/src/openrouter-ai-sdk/facade.ts +++ /dev/null @@ -1,83 +0,0 @@ -import { loadApiKey, withoutTrailingSlash } from '@ai-sdk/provider-utils' - -import { OpenRouterChatLanguageModel } from './chat' -import { OpenRouterCompletionLanguageModel } from './completion' - -import type { OpenRouterProviderSettings } from './provider' -import type { - OpenRouterChatModelId, - OpenRouterChatSettings, -} from './types/openrouter-chat-settings' -import type { - OpenRouterCompletionModelId, - OpenRouterCompletionSettings, -} from './types/openrouter-completion-settings' - - -/** -@deprecated Use `createOpenRouter` instead. - */ -export class OpenRouter { - /** -Use a different URL prefix for API calls, e.g. to use proxy servers. -The default prefix is `https://openrouter.ai/api/v1`. - */ - readonly baseURL: string - - /** -API key that is being sent using the `Authorization` header. -It defaults to the `OPENROUTER_API_KEY` environment variable. - */ - readonly apiKey?: string - - /** -Custom headers to include in the requests. - */ - readonly headers?: Record - - /** - * Creates a new OpenRouter provider instance. - */ - constructor(options: OpenRouterProviderSettings = {}) { - this.baseURL = - withoutTrailingSlash(options.baseURL ?? options.baseUrl) ?? - 'https://openrouter.ai/api/v1' - this.apiKey = options.apiKey - this.headers = options.headers - } - - private get baseConfig() { - return { - baseURL: this.baseURL, - headers: () => ({ - Authorization: `Bearer ${loadApiKey({ - apiKey: this.apiKey, - environmentVariableName: 'OPENROUTER_API_KEY', - description: 'OpenRouter', - })}`, - ...this.headers, - }), - } - } - - chat(modelId: OpenRouterChatModelId, settings: OpenRouterChatSettings = {}) { - return new OpenRouterChatLanguageModel(modelId, settings, { - provider: 'openrouter.chat', - ...this.baseConfig, - compatibility: 'strict', - url: ({ path }) => `${this.baseURL}${path}`, - }) - } - - completion( - modelId: OpenRouterCompletionModelId, - settings: OpenRouterCompletionSettings = {}, - ) { - return new OpenRouterCompletionLanguageModel(modelId, settings, { - provider: 'openrouter.completion', - ...this.baseConfig, - compatibility: 'strict', - url: ({ path }) => `${this.baseURL}${path}`, - }) - } -} diff --git a/packages/internal/src/openrouter-ai-sdk/index.ts b/packages/internal/src/openrouter-ai-sdk/index.ts deleted file mode 100644 index 14e12e4960..0000000000 --- a/packages/internal/src/openrouter-ai-sdk/index.ts +++ /dev/null @@ -1,3 +0,0 @@ -export * from './facade' -export * from './provider' -export * from './types' diff --git a/packages/internal/src/openrouter-ai-sdk/internal/index.ts b/packages/internal/src/openrouter-ai-sdk/internal/index.ts deleted file mode 100644 index 5f7acdc51e..0000000000 --- a/packages/internal/src/openrouter-ai-sdk/internal/index.ts +++ /dev/null @@ -1,5 +0,0 @@ -export * from '../chat' -export * from '../completion' -export * from '../types' -export * from '../types/openrouter-chat-settings' -export * from '../types/openrouter-completion-settings' diff --git a/packages/internal/src/openrouter-ai-sdk/provider.ts b/packages/internal/src/openrouter-ai-sdk/provider.ts deleted file mode 100644 index 181be2e867..0000000000 --- a/packages/internal/src/openrouter-ai-sdk/provider.ts +++ /dev/null @@ -1,180 +0,0 @@ - -import { loadApiKey, withoutTrailingSlash } from '@ai-sdk/provider-utils' - -import { OpenRouterChatLanguageModel } from './chat' -import { OpenRouterCompletionLanguageModel } from './completion' - -import type { - OpenRouterChatModelId, - OpenRouterChatSettings, -} from './types/openrouter-chat-settings' -import type { - OpenRouterCompletionModelId, - OpenRouterCompletionSettings, -} from './types/openrouter-completion-settings' -import type { LanguageModelV2 } from '@ai-sdk/provider' - -export type { OpenRouterCompletionSettings } - -export interface OpenRouterProvider extends LanguageModelV2 { - ( - modelId: OpenRouterChatModelId, - settings?: OpenRouterCompletionSettings, - ): OpenRouterCompletionLanguageModel - ( - modelId: OpenRouterChatModelId, - settings?: OpenRouterChatSettings, - ): OpenRouterChatLanguageModel - - languageModel( - modelId: OpenRouterChatModelId, - settings?: OpenRouterCompletionSettings, - ): OpenRouterCompletionLanguageModel - languageModel( - modelId: OpenRouterChatModelId, - settings?: OpenRouterChatSettings, - ): OpenRouterChatLanguageModel - - /** -Creates an OpenRouter chat model for text generation. - */ - chat( - modelId: OpenRouterChatModelId, - settings?: OpenRouterChatSettings, - ): OpenRouterChatLanguageModel - - /** -Creates an OpenRouter completion model for text generation. - */ - completion( - modelId: OpenRouterCompletionModelId, - settings?: OpenRouterCompletionSettings, - ): OpenRouterCompletionLanguageModel -} - -export interface OpenRouterProviderSettings { - /** -Base URL for the OpenRouter API calls. - */ - baseURL?: string - - /** -@deprecated Use `baseURL` instead. - */ - baseUrl?: string - - /** -API key for authenticating requests. - */ - apiKey?: string - - /** -Custom headers to include in the requests. - */ - headers?: Record - - /** -OpenRouter compatibility mode. Should be set to `strict` when using the OpenRouter API, -and `compatible` when using 3rd party providers. In `compatible` mode, newer -information such as streamOptions are not being sent. Defaults to 'compatible'. - */ - compatibility?: 'strict' | 'compatible' - - /** -Custom fetch implementation. You can use it as a middleware to intercept requests, -or to provide a custom fetch implementation for e.g. testing. - */ - fetch?: typeof fetch - - /** -A JSON object to send as the request body to access OpenRouter features & upstream provider features. - */ - extraBody?: Record -} - -/** -Create an OpenRouter provider instance. - */ -export function createOpenRouter( - options: OpenRouterProviderSettings = {}, -): OpenRouterProvider { - const baseURL = - withoutTrailingSlash(options.baseURL ?? options.baseUrl) ?? - 'https://openrouter.ai/api/v1' - - // we default to compatible, because strict breaks providers like Groq: - const compatibility = options.compatibility ?? 'compatible' - - const getHeaders = () => ({ - Authorization: `Bearer ${loadApiKey({ - apiKey: options.apiKey, - environmentVariableName: 'OPENROUTER_API_KEY', - description: 'OpenRouter', - })}`, - ...options.headers, - }) - - const createChatModel = ( - modelId: OpenRouterChatModelId, - settings: OpenRouterChatSettings = {}, - ) => - new OpenRouterChatLanguageModel(modelId, settings, { - provider: 'openrouter.chat', - url: ({ path }) => `${baseURL}${path}`, - headers: getHeaders, - compatibility, - fetch: options.fetch, - extraBody: options.extraBody, - }) - - const createCompletionModel = ( - modelId: OpenRouterCompletionModelId, - settings: OpenRouterCompletionSettings = {}, - ) => - new OpenRouterCompletionLanguageModel(modelId, settings, { - provider: 'openrouter.completion', - url: ({ path }) => `${baseURL}${path}`, - headers: getHeaders, - compatibility, - fetch: options.fetch, - extraBody: options.extraBody, - }) - - const createLanguageModel = ( - modelId: OpenRouterChatModelId | OpenRouterCompletionModelId, - settings?: OpenRouterChatSettings | OpenRouterCompletionSettings, - ) => { - if (new.target) { - throw new Error( - 'The OpenRouter model function cannot be called with the new keyword.', - ) - } - - if (modelId === 'openai/gpt-3.5-turbo-instruct') { - return createCompletionModel( - modelId, - settings as OpenRouterCompletionSettings, - ) - } - - return createChatModel(modelId, settings as OpenRouterChatSettings) - } - - const provider = ( - modelId: OpenRouterChatModelId | OpenRouterCompletionModelId, - settings?: OpenRouterChatSettings | OpenRouterCompletionSettings, - ) => createLanguageModel(modelId, settings) - - provider.languageModel = createLanguageModel - provider.chat = createChatModel - provider.completion = createCompletionModel - - return provider as OpenRouterProvider -} - -/** -Default OpenRouter provider instance. It uses 'strict' compatibility mode. - */ -export const openrouter = createOpenRouter({ - compatibility: 'strict', // strict for OpenRouter API -}) diff --git a/packages/internal/src/openrouter-ai-sdk/schemas/error-response.test.ts b/packages/internal/src/openrouter-ai-sdk/schemas/error-response.test.ts deleted file mode 100644 index 60de40c0fc..0000000000 --- a/packages/internal/src/openrouter-ai-sdk/schemas/error-response.test.ts +++ /dev/null @@ -1,51 +0,0 @@ -import { describe, expect, it } from 'bun:test' - -import { OpenRouterErrorResponseSchema } from './error-response' - -describe('OpenRouterErrorResponseSchema', () => { - it('should be valid without a type, code, and param', () => { - const errorWithoutTypeCodeAndParam = { - error: { - message: 'Example error message', - metadata: { provider_name: 'Morph' }, - }, - user_id: 'example_1', - } - - const result = OpenRouterErrorResponseSchema.parse( - errorWithoutTypeCodeAndParam, - ) - - expect(result).toEqual({ - error: { - message: 'Example error message', - code: null, - type: null, - param: null, - }, - }) - }) - - it('should be invalid with a type', () => { - const errorWithType = { - error: { - message: 'Example error message with type', - type: 'invalid_request_error', - code: 400, - param: 'canBeAnything', - metadata: { provider_name: 'Morph' }, - }, - } - - const result = OpenRouterErrorResponseSchema.parse(errorWithType) - - expect(result).toEqual({ - error: { - code: 400, - message: 'Example error message with type', - type: 'invalid_request_error', - param: 'canBeAnything', - }, - }) - }) -}) diff --git a/packages/internal/src/openrouter-ai-sdk/schemas/error-response.ts b/packages/internal/src/openrouter-ai-sdk/schemas/error-response.ts deleted file mode 100644 index 311bf39943..0000000000 --- a/packages/internal/src/openrouter-ai-sdk/schemas/error-response.ts +++ /dev/null @@ -1,18 +0,0 @@ -import { createJsonErrorResponseHandler } from '@ai-sdk/provider-utils' -import { z } from 'zod/v4' - -export const OpenRouterErrorResponseSchema = z.object({ - error: z.object({ - code: z.union([z.string(), z.number()]).nullable().optional().default(null), - message: z.string(), - type: z.string().nullable().optional().default(null), - param: z.any().nullable().optional().default(null), - }), -}) - -export type OpenRouterErrorData = z.infer - -export const openrouterFailedResponseHandler = createJsonErrorResponseHandler({ - errorSchema: OpenRouterErrorResponseSchema, - errorToMessage: (data: OpenRouterErrorData) => data.error.message, -}) diff --git a/packages/internal/src/openrouter-ai-sdk/schemas/reasoning-details.ts b/packages/internal/src/openrouter-ai-sdk/schemas/reasoning-details.ts deleted file mode 100644 index 51cc9af276..0000000000 --- a/packages/internal/src/openrouter-ai-sdk/schemas/reasoning-details.ts +++ /dev/null @@ -1,48 +0,0 @@ -import { z } from 'zod/v4' - -export enum ReasoningDetailType { - Summary = 'reasoning.summary', - Encrypted = 'reasoning.encrypted', - Text = 'reasoning.text', -} - -export const ReasoningDetailSummarySchema = z.object({ - type: z.literal(ReasoningDetailType.Summary), - summary: z.string(), -}) -export type ReasoningDetailSummary = z.infer< - typeof ReasoningDetailSummarySchema -> - -export const ReasoningDetailEncryptedSchema = z.object({ - type: z.literal(ReasoningDetailType.Encrypted), - data: z.string(), -}) -export type ReasoningDetailEncrypted = z.infer< - typeof ReasoningDetailEncryptedSchema -> - -export const ReasoningDetailTextSchema = z.object({ - type: z.literal(ReasoningDetailType.Text), - text: z.string().nullish(), - signature: z.string().nullish(), -}) - -export type ReasoningDetailText = z.infer - -export const ReasoningDetailUnionSchema = z.union([ - ReasoningDetailSummarySchema, - ReasoningDetailEncryptedSchema, - ReasoningDetailTextSchema, -]) - -const ReasoningDetailsWithUnknownSchema = z.union([ - ReasoningDetailUnionSchema, - z.unknown().transform(() => null), -]) - -export type ReasoningDetailUnion = z.infer - -export const ReasoningDetailArraySchema = z - .array(ReasoningDetailsWithUnknownSchema) - .transform((d) => d.filter((d): d is ReasoningDetailUnion => !!d)) diff --git a/packages/internal/src/openrouter-ai-sdk/tests/provider-options.test.ts b/packages/internal/src/openrouter-ai-sdk/tests/provider-options.test.ts deleted file mode 100644 index 466b0549af..0000000000 --- a/packages/internal/src/openrouter-ai-sdk/tests/provider-options.test.ts +++ /dev/null @@ -1,223 +0,0 @@ -import { streamText } from 'ai' -import { beforeEach, describe, expect, it, mock } from 'bun:test' - -import { createOpenRouter } from '../provider' - -import type { ModelMessage } from 'ai' - -type MockResponseDefinition = - | { - type: 'json-value' - body: any - headers?: Record - status?: number - } - | { - type: 'stream-chunks' - chunks: string[] - headers?: Record - status?: number - } - -type MockServerRoute = { - response: MockResponseDefinition -} - -type MockServerCall = { - requestHeaders: Record - requestBodyJson: Promise -} - -const createStreamFromChunks = (chunks: string[]) => - new ReadableStream({ - start(controller) { - try { - for (const chunk of chunks) { - controller.enqueue(chunk) - } - } finally { - controller.close() - } - }, - }).pipeThrough(new TextEncoderStream()) - -function toHeadersRecord(headers?: HeadersInit): Record { - const result: Record = {} - - if (!headers) { - return result - } - - if (headers instanceof Headers) { - headers.forEach((value, key) => { - result[key.toLowerCase()] = value - }) - return result - } - - if (Array.isArray(headers)) { - for (const [key, value] of headers) { - result[String(key).toLowerCase()] = String(value) - } - return result - } - - for (const [key, value] of Object.entries(headers)) { - if (typeof value !== 'undefined') { - result[key.toLowerCase()] = String(value) - } - } - - return result -} - -function parseRequestBody(body: BodyInit | null | undefined): any { - if (body == null) { - return undefined - } - - if (typeof body === 'string') { - try { - return JSON.parse(body) - } catch { - return undefined - } - } - - return undefined -} - -function createMockServer(routes: Record) { - const urls: Record = Object.fromEntries( - Object.entries(routes).map(([url, config]) => [ - url, - { - response: { ...config.response }, - }, - ]), - ) - - const calls: MockServerCall[] = [] - - const buildResponse = (definition: MockResponseDefinition): Response => { - const status = definition.status ?? 200 - - if (definition.type === 'json-value') { - return new Response(JSON.stringify(definition.body), { - status, - headers: { - 'Content-Type': 'application/json', - ...definition.headers, - }, - }) - } - - return new Response(createStreamFromChunks(definition.chunks), { - status, - headers: { - 'Content-Type': 'text/event-stream', - 'Cache-Control': 'no-cache', - Connection: 'keep-alive', - ...definition.headers, - }, - }) - } - - const fetchImpl = async (input: RequestInfo, init: RequestInit = {}) => { - const url = - typeof input === 'string' - ? input - : input instanceof URL - ? input.toString() - : input.url - - const route = urls[url] - - if (!route) { - return new Response('Not Found', { status: 404 }) - } - - const requestHeaders = toHeadersRecord(init.headers) - const requestBodyJson = Promise.resolve(parseRequestBody(init.body)) - - calls.push({ requestHeaders, requestBodyJson }) - - return buildResponse(route.response) - } - - const fetch = ((input: RequestInfo | URL, init?: RequestInit) => - fetchImpl(input as RequestInfo, init ?? {})) as typeof global.fetch - - fetch.preconnect = async () => {} - - return { - urls, - calls, - fetch, - } -} - -// Add type assertions for the mocked classes -const TEST_MESSAGES: ModelMessage[] = [ - { role: 'user', content: [{ type: 'text', text: 'Hello' }] }, -] - -describe('providerOptions', () => { - const server = createMockServer({ - 'https://openrouter.ai/api/v1/chat/completions': { - response: { - type: 'stream-chunks', - chunks: [], - }, - }, - }) - - const openrouter = createOpenRouter({ - apiKey: 'test', - fetch: server.fetch, - }) - - beforeEach(() => { - mock.clearAllMocks() - server.calls.length = 0 - server.urls['https://openrouter.ai/api/v1/chat/completions']!.response = { - type: 'stream-chunks', - chunks: [ - 'data: {"choices":[{"delta":{"content":"ok"}}]}' + '\n\n', - 'data: [DONE]' + '\n\n', - ], - } - }) - - it('should set providerOptions openrouter to extra body', async () => { - const model = openrouter('anthropic/claude-3.7-sonnet') - - await streamText({ - model: model, - messages: TEST_MESSAGES, - providerOptions: { - openrouter: { - reasoning: { - max_tokens: 1000, - }, - }, - }, - }).consumeStream() - - const requestBody = await server.calls[0]?.requestBodyJson - - expect(requestBody).toStrictEqual({ - messages: [ - { - content: [{ type: 'text', text: 'Hello' }], - role: 'user', - }, - ], - reasoning: { - max_tokens: 1000, - }, - model: 'anthropic/claude-3.7-sonnet', - stream: true, - }) - }) -}) diff --git a/packages/internal/src/openrouter-ai-sdk/tests/stream-usage-accounting.test.ts b/packages/internal/src/openrouter-ai-sdk/tests/stream-usage-accounting.test.ts deleted file mode 100644 index 8091a61a18..0000000000 --- a/packages/internal/src/openrouter-ai-sdk/tests/stream-usage-accounting.test.ts +++ /dev/null @@ -1,219 +0,0 @@ -import { convertReadableStreamToArray } from '@ai-sdk/provider-utils/test' -import { afterEach, beforeEach, describe, expect, it } from 'bun:test' - -import { OpenRouterChatLanguageModel } from '../chat' - -import type { OpenRouterChatSettings } from '../types/openrouter-chat-settings' - -describe('OpenRouter Streaming Usage Accounting', () => { - const originalFetch = global.fetch - let capturedRequests: Array<{ - url: string - body?: any - }> = [] - let nextResponseChunks: string[] = [] - - const createStreamFromChunks = (chunks: string[]) => - new ReadableStream({ - start(controller) { - for (const chunk of chunks) { - controller.enqueue(chunk) - } - controller.close() - }, - }).pipeThrough(new TextEncoderStream()) - - beforeEach(() => { - capturedRequests = [] - global.fetch = (async (input: RequestInfo, init?: RequestInit) => { - const url = - typeof input === 'string' - ? input - : input instanceof URL - ? input.toString() - : input.url - - let parsedBody: any - if (init?.body && typeof init.body === 'string') { - try { - parsedBody = JSON.parse(init.body) - } catch { - parsedBody = undefined - } - } - - capturedRequests.push({ url, body: parsedBody }) - - return new Response(createStreamFromChunks(nextResponseChunks), { - status: 200, - headers: { - 'Content-Type': 'text/event-stream', - 'Cache-Control': 'no-cache', - Connection: 'keep-alive', - }, - }) - }) as typeof global.fetch - }) - - afterEach(() => { - global.fetch = originalFetch - nextResponseChunks = [] - }) - - function prepareStreamResponse(includeUsage = true) { - nextResponseChunks = [ - `data: {"id":"test-id","model":"test-model","choices":[{"delta":{"content":"Hello"},"index":0}]}\n\n`, - `data: {"choices":[{"finish_reason":"stop","index":0}]}\n\n`, - ] - - if (includeUsage) { - nextResponseChunks.push( - `data: ${JSON.stringify({ - usage: { - prompt_tokens: 10, - prompt_tokens_details: { cached_tokens: 5 }, - completion_tokens: 20, - completion_tokens_details: { reasoning_tokens: 8 }, - total_tokens: 30, - cost: 0.0015, - cost_details: { - upstream_inference_cost: 19, - }, - }, - choices: [], - })}\n\n`, - ) - } - - nextResponseChunks.push('data: [DONE]\n\n') - } - - it('should include stream_options.include_usage in request when enabled', async () => { - prepareStreamResponse() - - // Create model with usage accounting enabled - const settings: OpenRouterChatSettings = { - usage: { include: true }, - } - - const model = new OpenRouterChatLanguageModel('test-model', settings, { - provider: 'openrouter.chat', - url: () => 'https://api.openrouter.ai/chat/completions', - headers: () => ({}), - compatibility: 'strict', - fetch: global.fetch, - }) - - // Call the model with streaming - await model.doStream({ - prompt: [ - { - role: 'user', - content: [{ type: 'text', text: 'Hello' }], - }, - ], - maxOutputTokens: 100, - }) - - // Verify stream options - const requestBody = capturedRequests[0]?.body - expect(requestBody).toBeDefined() - expect(requestBody.stream).toBe(true) - expect(requestBody.stream_options).toEqual({ - include_usage: true, - }) - }) - - it('should include provider-specific metadata in finish event when usage accounting is enabled', async () => { - prepareStreamResponse(true) - - // Create model with usage accounting enabled - const settings: OpenRouterChatSettings = { - usage: { include: true }, - } - - const model = new OpenRouterChatLanguageModel('test-model', settings, { - provider: 'openrouter.chat', - url: () => 'https://api.openrouter.ai/chat/completions', - headers: () => ({}), - compatibility: 'strict', - fetch: global.fetch, - }) - - // Call the model with streaming - const result = await model.doStream({ - prompt: [ - { - role: 'user', - content: [{ type: 'text', text: 'Hello' }], - }, - ], - maxOutputTokens: 100, - }) - - // Read all chunks from the stream - const chunks = await convertReadableStreamToArray(result.stream) - - // Find the finish chunk - const finishChunk = chunks.find((chunk) => chunk.type === 'finish') - expect(finishChunk).toBeDefined() - - // Verify metadata is included - expect(finishChunk?.providerMetadata).toBeDefined() - const openrouterData = finishChunk?.providerMetadata?.openrouter - expect(openrouterData).toBeDefined() - - const usage = openrouterData?.usage - expect(usage).toMatchObject({ - promptTokens: 10, - completionTokens: 20, - totalTokens: 30, - cost: 0.0015, - costDetails: { - upstreamInferenceCost: 19, - }, - promptTokensDetails: { cachedTokens: 5 }, - completionTokensDetails: { reasoningTokens: 8 }, - }) - }) - - it('should not include provider-specific metadata when usage accounting is disabled', async () => { - prepareStreamResponse(false) - - // Create model with usage accounting disabled - const settings: OpenRouterChatSettings = { - // No usage property - } - - const model = new OpenRouterChatLanguageModel('test-model', settings, { - provider: 'openrouter.chat', - url: () => 'https://api.openrouter.ai/chat/completions', - headers: () => ({}), - compatibility: 'strict', - fetch: global.fetch, - }) - - // Call the model with streaming - const result = await model.doStream({ - prompt: [ - { - role: 'user', - content: [{ type: 'text', text: 'Hello' }], - }, - ], - maxOutputTokens: 100, - }) - - // Read all chunks from the stream - const chunks = await convertReadableStreamToArray(result.stream) - - // Find the finish chunk - const finishChunk = chunks.find((chunk) => chunk.type === 'finish') - expect(finishChunk).toBeDefined() - - // Verify that provider metadata is not included - expect(finishChunk?.providerMetadata?.openrouter).toStrictEqual({ - usage: {}, - }) - }) -}) diff --git a/packages/internal/src/openrouter-ai-sdk/tests/usage-accounting.test.ts b/packages/internal/src/openrouter-ai-sdk/tests/usage-accounting.test.ts deleted file mode 100644 index 4189a3d8b9..0000000000 --- a/packages/internal/src/openrouter-ai-sdk/tests/usage-accounting.test.ts +++ /dev/null @@ -1,183 +0,0 @@ -import { createTestServer } from '@ai-sdk/provider-utils/test' -import { describe, expect, it } from 'bun:test' - -import { OpenRouterChatLanguageModel } from '../chat' - -import type { OpenRouterChatSettings } from '../types/openrouter-chat-settings' - -describe('OpenRouter Usage Accounting', () => { - const server = createTestServer({ - 'https://api.openrouter.ai/chat/completions': { - response: { type: 'json-value', body: {} }, - }, - }) - - function prepareJsonResponse(includeUsage = true) { - const response = { - id: 'test-id', - model: 'test-model', - choices: [ - { - message: { - role: 'assistant', - content: 'Hello, I am an AI assistant.', - }, - index: 0, - finish_reason: 'stop', - }, - ], - usage: includeUsage - ? { - prompt_tokens: 10, - prompt_tokens_details: { - cached_tokens: 5, - }, - completion_tokens: 20, - completion_tokens_details: { - reasoning_tokens: 8, - }, - total_tokens: 30, - cost: 0.0015, - cost_details: { - upstream_inference_cost: 19, - }, - } - : undefined, - } - - server.urls['https://api.openrouter.ai/chat/completions']!.response = { - type: 'json-value', - body: response, - } - } - - it('should include usage parameter in the request when enabled', async () => { - prepareJsonResponse() - - // Create model with usage accounting enabled - const settings: OpenRouterChatSettings = { - usage: { include: true }, - } - - const model = new OpenRouterChatLanguageModel('test-model', settings, { - provider: 'openrouter.chat', - url: () => 'https://api.openrouter.ai/chat/completions', - headers: () => ({}), - compatibility: 'strict', - fetch: global.fetch, - }) - - // Call the model - await model.doGenerate({ - prompt: [ - { - role: 'user', - content: [{ type: 'text', text: 'Hello' }], - }, - ], - maxOutputTokens: 100, - }) - - // Check request contains usage parameter - const requestBody = await server.calls[0]!.requestBodyJson - expect(requestBody).toBeDefined() - expect(requestBody).toHaveProperty('usage') - expect(requestBody.usage).toEqual({ include: true }) - }) - - it('should include provider-specific metadata in response when usage accounting is enabled', async () => { - prepareJsonResponse() - - // Create model with usage accounting enabled - const settings: OpenRouterChatSettings = { - usage: { include: true }, - } - - const model = new OpenRouterChatLanguageModel('test-model', settings, { - provider: 'openrouter.chat', - url: () => 'https://api.openrouter.ai/chat/completions', - headers: () => ({}), - compatibility: 'strict', - fetch: global.fetch, - }) - - // Call the model - const result = await model.doGenerate({ - prompt: [ - { - role: 'user', - content: [{ type: 'text', text: 'Hello' }], - }, - ], - maxOutputTokens: 100, - }) - - // Check result contains provider metadata - expect(result.providerMetadata).toBeDefined() - const providerData = result.providerMetadata - - // Check for OpenRouter usage data - expect(providerData?.openrouter).toBeDefined() - const openrouterData = providerData?.openrouter as Record - expect(openrouterData.usage).toBeDefined() - - const usage = openrouterData.usage - expect(usage).toMatchObject({ - promptTokens: 10, - completionTokens: 20, - totalTokens: 30, - cost: 0.0015, - promptTokensDetails: { - cachedTokens: 5, - }, - completionTokensDetails: { - reasoningTokens: 8, - }, - }) - }) - - it('should not include provider-specific metadata when usage accounting is disabled', async () => { - prepareJsonResponse() - - // Create model with usage accounting disabled - const settings: OpenRouterChatSettings = { - // No usage property - } - - const model = new OpenRouterChatLanguageModel('test-model', settings, { - provider: 'openrouter.chat', - url: () => 'https://api.openrouter.ai/chat/completions', - headers: () => ({}), - compatibility: 'strict', - fetch: global.fetch, - }) - - // Call the model - const result = await model.doGenerate({ - prompt: [ - { - role: 'user', - content: [{ type: 'text', text: 'Hello' }], - }, - ], - maxOutputTokens: 100, - }) - - // Verify that OpenRouter metadata is not included - expect(result.providerMetadata?.openrouter?.usage).toStrictEqual({ - promptTokens: 10, - completionTokens: 20, - totalTokens: 30, - cost: 0.0015, - costDetails: { - upstreamInferenceCost: 19, - }, - promptTokensDetails: { - cachedTokens: 5, - }, - completionTokensDetails: { - reasoningTokens: 8, - }, - }) - }) -}) diff --git a/packages/internal/src/openrouter-ai-sdk/types/index.ts b/packages/internal/src/openrouter-ai-sdk/types/index.ts deleted file mode 100644 index 64f779c21e..0000000000 --- a/packages/internal/src/openrouter-ai-sdk/types/index.ts +++ /dev/null @@ -1,70 +0,0 @@ -import type { LanguageModelV2, LanguageModelV2Prompt } from '@ai-sdk/provider' - -export type { LanguageModelV2, LanguageModelV2Prompt } - -export type OpenRouterProviderOptions = { - models?: string[] - - /** - * https://openrouter.ai/docs/use-cases/reasoning-tokens - * One of `max_tokens` or `effort` is required. - * If `exclude` is true, reasoning will be removed from the response. Default is false. - */ - reasoning?: { - enabled?: boolean - exclude?: boolean - } & ( - | { - max_tokens: number - } - | { - effort: 'high' | 'medium' | 'low' | 'minimal' | 'none' - } - ) - - /** - * A unique identifier representing your end-user, which can - * help OpenRouter to monitor and detect abuse. - */ - user?: string -} - -export type OpenRouterSharedSettings = OpenRouterProviderOptions & { - /** - * @deprecated use `reasoning` instead - */ - includeReasoning?: boolean - - extraBody?: Record - - /** - * Enable usage accounting to get detailed token usage information. - * https://openrouter.ai/docs/use-cases/usage-accounting - */ - usage?: { - /** - * When true, includes token usage information in the response. - */ - include: boolean - } -} - -/** - * Usage accounting response - * @see https://openrouter.ai/docs/use-cases/usage-accounting - */ -export type OpenRouterUsageAccounting = { - promptTokens: number - promptTokensDetails?: { - cachedTokens: number - } - completionTokens: number - completionTokensDetails?: { - reasoningTokens: number - } - totalTokens: number - cost?: number - costDetails: { - upstreamInferenceCost: number - } -} diff --git a/packages/internal/src/openrouter-ai-sdk/types/openrouter-chat-completions-input.ts b/packages/internal/src/openrouter-ai-sdk/types/openrouter-chat-completions-input.ts deleted file mode 100644 index 4187661d3a..0000000000 --- a/packages/internal/src/openrouter-ai-sdk/types/openrouter-chat-completions-input.ts +++ /dev/null @@ -1,78 +0,0 @@ -import type { ReasoningDetailUnion } from '../schemas/reasoning-details' - -// Type for OpenRouter Cache Control following Anthropic's pattern -export type OpenRouterCacheControl = { type: 'ephemeral' } - -export type OpenRouterChatCompletionsInput = Array - -export type ChatCompletionMessageParam = - | ChatCompletionSystemMessageParam - | ChatCompletionUserMessageParam - | ChatCompletionAssistantMessageParam - | ChatCompletionToolMessageParam - -export interface ChatCompletionSystemMessageParam { - role: 'system' - content: string - cache_control?: OpenRouterCacheControl -} - -export interface ChatCompletionUserMessageParam { - role: 'user' - content: string | Array - cache_control?: OpenRouterCacheControl -} - -export type ChatCompletionContentPart = - | ChatCompletionContentPartText - | ChatCompletionContentPartImage - | ChatCompletionContentPartFile - -export interface ChatCompletionContentPartFile { - type: 'file' - file: { - filename: string - file_data: string - } - cache_control?: OpenRouterCacheControl -} - -export interface ChatCompletionContentPartImage { - type: 'image_url' - image_url: { - url: string - } - cache_control?: OpenRouterCacheControl -} - -export interface ChatCompletionContentPartText { - type: 'text' - text: string - reasoning?: string | null - cache_control?: OpenRouterCacheControl -} - -export interface ChatCompletionAssistantMessageParam { - role: 'assistant' - content?: string | null - reasoning?: string | null - reasoning_details?: ReasoningDetailUnion[] - tool_calls?: Array - cache_control?: OpenRouterCacheControl -} - -export interface ChatCompletionMessageToolCall { - type: 'function' - id: string - function: { - arguments: string - name: string - } -} - -export interface ChatCompletionToolMessageParam { - role: 'tool' - content: string - tool_call_id: string - cache_control?: OpenRouterCacheControl -} diff --git a/packages/internal/src/openrouter-ai-sdk/types/openrouter-chat-settings.ts b/packages/internal/src/openrouter-ai-sdk/types/openrouter-chat-settings.ts deleted file mode 100644 index 90a6690743..0000000000 --- a/packages/internal/src/openrouter-ai-sdk/types/openrouter-chat-settings.ts +++ /dev/null @@ -1,133 +0,0 @@ -import type { OpenRouterSharedSettings } from '..' - -// https://openrouter.ai/api/v1/models -export type OpenRouterChatModelId = string - -export type OpenRouterChatSettings = { - /** -Modify the likelihood of specified tokens appearing in the completion. - -Accepts a JSON object that maps tokens (specified by their token ID in -the GPT tokenizer) to an associated bias value from -100 to 100. You -can use this tokenizer tool to convert text to token IDs. Mathematically, -the bias is added to the logits generated by the model prior to sampling. -The exact effect will vary per model, but values between -1 and 1 should -decrease or increase likelihood of selection; values like -100 or 100 -should result in a ban or exclusive selection of the relevant token. - -As an example, you can pass {"50256": -100} to prevent the <|end-of-text|> -token from being generated. -*/ - logitBias?: Record - - /** -Return the log probabilities of the tokens. Including logprobs will increase -the response size and can slow down response times. However, it can -be useful to understand better how the model is behaving. - -Setting to true will return the log probabilities of the tokens that -were generated. - -Setting to a number will return the log probabilities of the top n -tokens that were generated. -*/ - logprobs?: boolean | number - - /** -Whether to enable parallel function calling during tool use. Default to true. - */ - parallelToolCalls?: boolean - - /** -A unique identifier representing your end-user, which can help OpenRouter to -monitor and detect abuse. Learn more. -*/ - user?: string - - /** - * Web search plugin configuration for enabling web search capabilities - */ - plugins?: Array<{ - id: 'web' - /** - * Maximum number of search results to include (default: 5) - */ - max_results?: number - /** - * Custom search prompt to guide the search query - */ - search_prompt?: string - }> - - /** - * Built-in web search options for models that support native web search - */ - web_search_options?: { - /** - * Maximum number of search results to include - */ - max_results?: number - /** - * Custom search prompt to guide the search query - */ - search_prompt?: string - } - - /** - * Provider routing preferences to control request routing behavior - */ - provider?: { - /** - * List of provider slugs to try in order (e.g. ["anthropic", "openai"]) - */ - order?: string[] - /** - * Whether to allow backup providers when primary is unavailable (default: true) - */ - allow_fallbacks?: boolean - /** - * Only use providers that support all parameters in your request (default: false) - */ - require_parameters?: boolean - /** - * Control whether to use providers that may store data - */ - data_collection?: 'allow' | 'deny' - /** - * List of provider slugs to allow for this request - */ - only?: string[] - /** - * List of provider slugs to skip for this request - */ - ignore?: string[] - /** - * List of quantization levels to filter by (e.g. ["int4", "int8"]) - */ - quantizations?: Array< - | 'int4' - | 'int8' - | 'fp4' - | 'fp6' - | 'fp8' - | 'fp16' - | 'bf16' - | 'fp32' - | 'unknown' - > - /** - * Sort providers by price, throughput, or latency - */ - sort?: 'price' | 'throughput' | 'latency' - /** - * Maximum pricing you want to pay for this request - */ - max_price?: { - prompt?: number | string - completion?: number | string - image?: number | string - audio?: number | string - request?: number | string - } - } -} & OpenRouterSharedSettings diff --git a/packages/internal/src/openrouter-ai-sdk/types/openrouter-completion-settings.ts b/packages/internal/src/openrouter-ai-sdk/types/openrouter-completion-settings.ts deleted file mode 100644 index 661aa3f7e4..0000000000 --- a/packages/internal/src/openrouter-ai-sdk/types/openrouter-completion-settings.ts +++ /dev/null @@ -1,39 +0,0 @@ -import type { OpenRouterSharedSettings } from '.' - -export type OpenRouterCompletionModelId = string - -export type OpenRouterCompletionSettings = { - /** -Modify the likelihood of specified tokens appearing in the completion. - -Accepts a JSON object that maps tokens (specified by their token ID in -the GPT tokenizer) to an associated bias value from -100 to 100. You -can use this tokenizer tool to convert text to token IDs. Mathematically, -the bias is added to the logits generated by the model prior to sampling. -The exact effect will vary per model, but values between -1 and 1 should -decrease or increase likelihood of selection; values like -100 or 100 -should result in a ban or exclusive selection of the relevant token. - -As an example, you can pass {"50256": -100} to prevent the <|end-of-text|> -token from being generated. - */ - logitBias?: Record - - /** -Return the log probabilities of the tokens. Including logprobs will increase -the response size and can slow down response times. However, it can -be useful to better understand how the model is behaving. - -Setting to true will return the log probabilities of the tokens that -were generated. - -Setting to a number will return the log probabilities of the top n -tokens that were generated. - */ - logprobs?: boolean | number - - /** -The suffix that comes after a completion of inserted text. - */ - suffix?: string -} & OpenRouterSharedSettings diff --git a/packages/llm-providers/package.json b/packages/llm-providers/package.json new file mode 100644 index 0000000000..6093d03d57 --- /dev/null +++ b/packages/llm-providers/package.json @@ -0,0 +1,35 @@ +{ + "name": "@codebuff/llm-providers", + "version": "1.0.0", + "private": true, + "type": "module", + "exports": { + "./openai-compatible": { + "bun": "./src/openai-compatible/index.ts", + "import": "./src/openai-compatible/index.ts", + "types": "./src/openai-compatible/index.ts", + "default": "./src/openai-compatible/index.ts" + }, + "./*": { + "bun": "./src/*.ts", + "import": "./src/*.ts", + "types": "./src/*.ts", + "default": "./src/*.ts" + } + }, + "scripts": { + "typecheck": "tsc --noEmit -p .", + "test": "bun test" + }, + "sideEffects": false, + "engines": { + "bun": "1.3.11" + }, + "dependencies": { + "@ai-sdk/provider": "2.0.1", + "@ai-sdk/provider-utils": "^3.0.17", + "ai": "^5.0.52", + "zod": "^4.2.1" + }, + "devDependencies": {} +} diff --git a/packages/internal/src/openai-compatible/chat/convert-to-openai-compatible-chat-messages.test.ts b/packages/llm-providers/src/openai-compatible/chat/convert-to-openai-compatible-chat-messages.test.ts similarity index 100% rename from packages/internal/src/openai-compatible/chat/convert-to-openai-compatible-chat-messages.test.ts rename to packages/llm-providers/src/openai-compatible/chat/convert-to-openai-compatible-chat-messages.test.ts diff --git a/packages/internal/src/openai-compatible/chat/convert-to-openai-compatible-chat-messages.ts b/packages/llm-providers/src/openai-compatible/chat/convert-to-openai-compatible-chat-messages.ts similarity index 100% rename from packages/internal/src/openai-compatible/chat/convert-to-openai-compatible-chat-messages.ts rename to packages/llm-providers/src/openai-compatible/chat/convert-to-openai-compatible-chat-messages.ts diff --git a/packages/internal/src/openai-compatible/chat/get-response-metadata.ts b/packages/llm-providers/src/openai-compatible/chat/get-response-metadata.ts similarity index 65% rename from packages/internal/src/openai-compatible/chat/get-response-metadata.ts rename to packages/llm-providers/src/openai-compatible/chat/get-response-metadata.ts index bd358b23f7..708fd968e3 100644 --- a/packages/internal/src/openai-compatible/chat/get-response-metadata.ts +++ b/packages/llm-providers/src/openai-compatible/chat/get-response-metadata.ts @@ -3,13 +3,13 @@ export function getResponseMetadata({ model, created, }: { - id?: string | undefined | null; - created?: number | undefined | null; - model?: string | undefined | null; + id?: string | undefined | null + created?: number | undefined | null + model?: string | undefined | null }) { return { id: id ?? undefined, modelId: model ?? undefined, timestamp: created != null ? new Date(created * 1000) : undefined, - }; + } } diff --git a/packages/internal/src/openrouter-ai-sdk/utils/map-finish-reason.ts b/packages/llm-providers/src/openai-compatible/chat/map-openai-compatible-finish-reason.ts similarity index 89% rename from packages/internal/src/openrouter-ai-sdk/utils/map-finish-reason.ts rename to packages/llm-providers/src/openai-compatible/chat/map-openai-compatible-finish-reason.ts index b6f9aea783..2a4e9eccc2 100644 --- a/packages/internal/src/openrouter-ai-sdk/utils/map-finish-reason.ts +++ b/packages/llm-providers/src/openai-compatible/chat/map-openai-compatible-finish-reason.ts @@ -1,6 +1,6 @@ import type { LanguageModelV2FinishReason } from '@ai-sdk/provider' -export function mapOpenRouterFinishReason( +export function mapOpenAICompatibleFinishReason( finishReason: string | null | undefined, ): LanguageModelV2FinishReason { switch (finishReason) { diff --git a/packages/internal/src/openai-compatible/chat/openai-compatible-api-types.ts b/packages/llm-providers/src/openai-compatible/chat/openai-compatible-api-types.ts similarity index 53% rename from packages/internal/src/openai-compatible/chat/openai-compatible-api-types.ts rename to packages/llm-providers/src/openai-compatible/chat/openai-compatible-api-types.ts index 87afbd575a..f8db776604 100644 --- a/packages/internal/src/openai-compatible/chat/openai-compatible-api-types.ts +++ b/packages/llm-providers/src/openai-compatible/chat/openai-compatible-api-types.ts @@ -1,63 +1,61 @@ -import type { JSONValue } from '@ai-sdk/provider'; +import type { JSONValue } from '@ai-sdk/provider' -export type OpenAICompatibleChatPrompt = Array; +export type OpenAICompatibleChatPrompt = Array export type OpenAICompatibleMessage = | OpenAICompatibleSystemMessage | OpenAICompatibleUserMessage | OpenAICompatibleAssistantMessage - | OpenAICompatibleToolMessage; + | OpenAICompatibleToolMessage // Allow for arbitrary additional properties for general purpose // provider-metadata-specific extensibility. type JsonRecord = Record< string, JSONValue | JSONValue[] | T | T[] | undefined ->; +> export interface OpenAICompatibleSystemMessage extends JsonRecord { - role: 'system'; - content: string; + role: 'system' + content: string } -export interface OpenAICompatibleUserMessage - extends JsonRecord { - role: 'user'; - content: string | Array; +export interface OpenAICompatibleUserMessage extends JsonRecord { + role: 'user' + content: string | Array } export type OpenAICompatibleContentPart = | OpenAICompatibleContentPartText - | OpenAICompatibleContentPartImage; + | OpenAICompatibleContentPartImage export interface OpenAICompatibleContentPartImage extends JsonRecord { - type: 'image_url'; - image_url: { url: string }; + type: 'image_url' + image_url: { url: string } } export interface OpenAICompatibleContentPartText extends JsonRecord { - type: 'text'; - text: string; + type: 'text' + text: string } -export interface OpenAICompatibleAssistantMessage - extends JsonRecord { - role: 'assistant'; - content?: string | null; - tool_calls?: Array; +export interface OpenAICompatibleAssistantMessage extends JsonRecord { + role: 'assistant' + content?: string | null + tool_calls?: Array } export interface OpenAICompatibleMessageToolCall extends JsonRecord { - type: 'function'; - id: string; + type: 'function' + id: string function: { - arguments: string; - name: string; - }; + arguments: string + name: string + } } export interface OpenAICompatibleToolMessage extends JsonRecord { - role: 'tool'; - content: string; - tool_call_id: string; + role: 'tool' + content: string + tool_call_id: string } diff --git a/packages/internal/src/openai-compatible/chat/openai-compatible-chat-language-model.ts b/packages/llm-providers/src/openai-compatible/chat/openai-compatible-chat-language-model.ts similarity index 81% rename from packages/internal/src/openai-compatible/chat/openai-compatible-chat-language-model.ts rename to packages/llm-providers/src/openai-compatible/chat/openai-compatible-chat-language-model.ts index 7b2619ae8f..7e49bfcadc 100644 --- a/packages/internal/src/openai-compatible/chat/openai-compatible-chat-language-model.ts +++ b/packages/llm-providers/src/openai-compatible/chat/openai-compatible-chat-language-model.ts @@ -1,6 +1,4 @@ -import { - InvalidResponseDataError -} from '@ai-sdk/provider'; +import { InvalidResponseDataError } from '@ai-sdk/provider' import { combineHeaders, createEventSourceResponseHandler, @@ -9,26 +7,20 @@ import { generateId, isParsableJson, parseProviderOptions, - postJsonToApi -} from '@ai-sdk/provider-utils'; -import { z } from 'zod/v4'; - -import { convertToOpenAICompatibleChatMessages } from './convert-to-openai-compatible-chat-messages'; -import { getResponseMetadata } from './get-response-metadata'; -import { mapOpenAICompatibleFinishReason } from './map-openai-compatible-finish-reason'; -import { - openaiCompatibleProviderOptions, -} from './openai-compatible-chat-options'; -import { - defaultOpenAICompatibleErrorStructure -} from '../openai-compatible-error'; -import { prepareTools } from './openai-compatible-prepare-tools'; - -import type { - OpenAICompatibleChatModelId} from './openai-compatible-chat-options'; -import type { - ProviderErrorStructure} from '../openai-compatible-error'; -import type { MetadataExtractor } from './openai-compatible-metadata-extractor'; + postJsonToApi, +} from '@ai-sdk/provider-utils' +import { z } from 'zod/v4' + +import { convertToOpenAICompatibleChatMessages } from './convert-to-openai-compatible-chat-messages' +import { getResponseMetadata } from './get-response-metadata' +import { mapOpenAICompatibleFinishReason } from './map-openai-compatible-finish-reason' +import { openaiCompatibleProviderOptions } from './openai-compatible-chat-options' +import { defaultOpenAICompatibleErrorStructure } from '../openai-compatible-error' +import { prepareTools } from './openai-compatible-prepare-tools' + +import type { OpenAICompatibleChatModelId } from './openai-compatible-chat-options' +import type { ProviderErrorStructure } from '../openai-compatible-error' +import type { MetadataExtractor } from './openai-compatible-metadata-extractor' import type { APICallError, LanguageModelV2, @@ -36,70 +28,72 @@ import type { LanguageModelV2Content, LanguageModelV2FinishReason, LanguageModelV2StreamPart, - SharedV2ProviderMetadata} from '@ai-sdk/provider'; + SharedV2ProviderMetadata, +} from '@ai-sdk/provider' import type { FetchFunction, ParseResult, - ResponseHandler} from '@ai-sdk/provider-utils'; + ResponseHandler, +} from '@ai-sdk/provider-utils' export type OpenAICompatibleChatConfig = { - provider: string; - headers: () => Record; - url: (options: { modelId: string; path: string }) => string; - fetch?: FetchFunction; - includeUsage?: boolean; - errorStructure?: ProviderErrorStructure; - metadataExtractor?: MetadataExtractor; + provider: string + headers: () => Record + url: (options: { modelId: string; path: string }) => string + fetch?: FetchFunction + includeUsage?: boolean + errorStructure?: ProviderErrorStructure + metadataExtractor?: MetadataExtractor /** * Whether the model supports structured outputs. */ - supportsStructuredOutputs?: boolean; + supportsStructuredOutputs?: boolean /** * The supported URLs for the model. */ - supportedUrls?: () => LanguageModelV2['supportedUrls']; -}; + supportedUrls?: () => LanguageModelV2['supportedUrls'] +} export class OpenAICompatibleChatLanguageModel implements LanguageModelV2 { - readonly specificationVersion = 'v2'; + readonly specificationVersion = 'v2' - readonly supportsStructuredOutputs: boolean; + readonly supportsStructuredOutputs: boolean - readonly modelId: OpenAICompatibleChatModelId; - private readonly config: OpenAICompatibleChatConfig; - private readonly failedResponseHandler: ResponseHandler; - private readonly chunkSchema; // type inferred via constructor + readonly modelId: OpenAICompatibleChatModelId + private readonly config: OpenAICompatibleChatConfig + private readonly failedResponseHandler: ResponseHandler + private readonly chunkSchema // type inferred via constructor constructor( modelId: OpenAICompatibleChatModelId, config: OpenAICompatibleChatConfig, ) { - this.modelId = modelId; - this.config = config; + this.modelId = modelId + this.config = config // initialize error handling: const errorStructure = - config.errorStructure ?? defaultOpenAICompatibleErrorStructure; + config.errorStructure ?? defaultOpenAICompatibleErrorStructure this.chunkSchema = createOpenAICompatibleChatChunkSchema( errorStructure.errorSchema, - ); - this.failedResponseHandler = createJsonErrorResponseHandler(errorStructure); + ) + this.failedResponseHandler = createJsonErrorResponseHandler(errorStructure) - this.supportsStructuredOutputs = config.supportsStructuredOutputs ?? false; + this.supportsStructuredOutputs = config.supportsStructuredOutputs ?? false } get provider(): string { - return this.config.provider; + return this.config.provider } private get providerOptionsName(): string { - return this.config.provider.split('.')[0].trim(); + return this.config.provider.split('.')[0].trim() } get supportedUrls() { - return this.config.supportedUrls?.() ?? {}; + return this.config.supportedUrls?.() ?? {} } private async getArgs({ @@ -117,26 +111,26 @@ export class OpenAICompatibleChatLanguageModel implements LanguageModelV2 { toolChoice, tools, }: Parameters[0]) { - const warnings: LanguageModelV2CallWarning[] = []; + const warnings: LanguageModelV2CallWarning[] = [] // Parse provider options const baseOptionsResult = await parseProviderOptions({ provider: 'openai-compatible', providerOptions, schema: openaiCompatibleProviderOptions, - }); + }) const providerOptionsResult = await parseProviderOptions({ provider: this.providerOptionsName, providerOptions, schema: openaiCompatibleProviderOptions, - }); + }) const compatibleOptions = Object.assign( baseOptionsResult ?? {}, providerOptionsResult ?? {}, - ); + ) if (topK != null) { - warnings.push({ type: 'unsupported-setting', setting: 'topK' }); + warnings.push({ type: 'unsupported-setting', setting: 'topK' }) } if ( @@ -149,7 +143,7 @@ export class OpenAICompatibleChatLanguageModel implements LanguageModelV2 { setting: 'responseFormat', details: 'JSON response format schema is only supported with structuredOutputs', - }); + }) } const { @@ -159,7 +153,7 @@ export class OpenAICompatibleChatLanguageModel implements LanguageModelV2 { } = prepareTools({ tools, toolChoice, - }); + }) return { args: { @@ -212,15 +206,15 @@ export class OpenAICompatibleChatLanguageModel implements LanguageModelV2 { tool_choice: openaiToolChoice, }, warnings: [...warnings, ...toolWarnings], - }; + } } async doGenerate( options: Parameters[0], ): Promise>> { - const { args, warnings } = await this.getArgs({ ...options }); + const { args, warnings } = await this.getArgs({ ...options }) - const body = JSON.stringify(args); + const body = JSON.stringify(args) const { responseHeaders, @@ -239,25 +233,25 @@ export class OpenAICompatibleChatLanguageModel implements LanguageModelV2 { ), abortSignal: options.abortSignal, fetch: this.config.fetch, - }); + }) - const choice = responseBody.choices[0]; - const content: Array = []; + const choice = responseBody.choices[0] + const content: Array = [] // text content: - const text = choice.message.content; + const text = choice.message.content if (text != null && text.length > 0) { - content.push({ type: 'text', text }); + content.push({ type: 'text', text }) } // reasoning content: const reasoning = - choice.message.reasoning_content ?? choice.message.reasoning; + choice.message.reasoning_content ?? choice.message.reasoning if (reasoning != null && reasoning.length > 0) { content.push({ type: 'reasoning', text: reasoning, - }); + }) } // tool calls: @@ -268,7 +262,7 @@ export class OpenAICompatibleChatLanguageModel implements LanguageModelV2 { toolCallId: toolCall.id ?? generateId(), toolName: toolCall.function.name, input: toolCall.function.arguments!, - }); + }) } } @@ -276,20 +270,19 @@ export class OpenAICompatibleChatLanguageModel implements LanguageModelV2 { const extractedMetadata = await this.config.metadataExtractor?.extractMetadata?.({ parsedBody: rawResponse, - }); + }) const providerMetadata: SharedV2ProviderMetadata = { [this.providerOptionsName]: {}, ...extractedMetadata, - }; - const completionTokenDetails = - responseBody.usage?.completion_tokens_details; + } + const completionTokenDetails = responseBody.usage?.completion_tokens_details if (completionTokenDetails?.accepted_prediction_tokens != null) { providerMetadata[this.providerOptionsName].acceptedPredictionTokens = - completionTokenDetails?.accepted_prediction_tokens; + completionTokenDetails?.accepted_prediction_tokens } if (completionTokenDetails?.rejected_prediction_tokens != null) { providerMetadata[this.providerOptionsName].rejectedPredictionTokens = - completionTokenDetails?.rejected_prediction_tokens; + completionTokenDetails?.rejected_prediction_tokens } return { @@ -313,13 +306,13 @@ export class OpenAICompatibleChatLanguageModel implements LanguageModelV2 { body: rawResponse, }, warnings, - }; + } } async doStream( options: Parameters[0], ): Promise>> { - const { args, warnings } = await this.getArgs({ ...options }); + const { args, warnings } = await this.getArgs({ ...options }) const body = { ...args, @@ -329,10 +322,10 @@ export class OpenAICompatibleChatLanguageModel implements LanguageModelV2 { stream_options: this.config.includeUsage ? { include_usage: true } : undefined, - }; + } const metadataExtractor = - this.config.metadataExtractor?.createStreamExtractor(); + this.config.metadataExtractor?.createStreamExtractor() const { responseHeaders, value: response } = await postJsonToApi({ url: this.config.url({ @@ -347,31 +340,31 @@ export class OpenAICompatibleChatLanguageModel implements LanguageModelV2 { ), abortSignal: options.abortSignal, fetch: this.config.fetch, - }); + }) const toolCalls: Array<{ - id: string; - type: 'function'; + id: string + type: 'function' function: { - name: string; - arguments: string; - }; - hasFinished: boolean; - }> = []; + name: string + arguments: string + } + hasFinished: boolean + }> = [] - let finishReason: LanguageModelV2FinishReason = 'unknown'; + let finishReason: LanguageModelV2FinishReason = 'unknown' const usage: { - completionTokens: number | undefined; + completionTokens: number | undefined completionTokensDetails: { - reasoningTokens: number | undefined; - acceptedPredictionTokens: number | undefined; - rejectedPredictionTokens: number | undefined; - }; - promptTokens: number | undefined; + reasoningTokens: number | undefined + acceptedPredictionTokens: number | undefined + rejectedPredictionTokens: number | undefined + } + promptTokens: number | undefined promptTokensDetails: { - cachedTokens: number | undefined; - }; - totalTokens: number | undefined; + cachedTokens: number | undefined + } + totalTokens: number | undefined } = { completionTokens: undefined, completionTokensDetails: { @@ -384,11 +377,11 @@ export class OpenAICompatibleChatLanguageModel implements LanguageModelV2 { cachedTokens: undefined, }, totalTokens: undefined, - }; - let isFirstChunk = true; - const providerOptionsName = this.providerOptionsName; - let isActiveReasoning = false; - let isActiveText = false; + } + let isFirstChunk = true + const providerOptionsName = this.providerOptionsName + let isActiveReasoning = false + let isActiveText = false return { stream: response.pipeThrough( @@ -397,40 +390,40 @@ export class OpenAICompatibleChatLanguageModel implements LanguageModelV2 { LanguageModelV2StreamPart >({ start(controller) { - controller.enqueue({ type: 'stream-start', warnings }); + controller.enqueue({ type: 'stream-start', warnings }) }, // TODO we lost type safety on Chunk, most likely due to the error schema. MUST FIX transform(chunk, controller) { // Emit raw chunk if requested (before anything else) if (options.includeRawChunks) { - controller.enqueue({ type: 'raw', rawValue: chunk.rawValue }); + controller.enqueue({ type: 'raw', rawValue: chunk.rawValue }) } // handle failed chunk parsing / validation: if (!chunk.success) { - finishReason = 'error'; - controller.enqueue({ type: 'error', error: chunk.error }); - return; + finishReason = 'error' + controller.enqueue({ type: 'error', error: chunk.error }) + return } - const value = chunk.value; + const value = chunk.value - metadataExtractor?.processChunk(chunk.rawValue); + metadataExtractor?.processChunk(chunk.rawValue) // handle error chunks: if ('error' in value) { - finishReason = 'error'; - controller.enqueue({ type: 'error', error: value.error.message }); - return; + finishReason = 'error' + controller.enqueue({ type: 'error', error: value.error.message }) + return } if (isFirstChunk) { - isFirstChunk = false; + isFirstChunk = false controller.enqueue({ type: 'response-metadata', ...getResponseMetadata(value), - }); + }) } if (value.usage != null) { @@ -440,98 +433,98 @@ export class OpenAICompatibleChatLanguageModel implements LanguageModelV2 { total_tokens, prompt_tokens_details, completion_tokens_details, - } = value.usage; + } = value.usage - usage.promptTokens = prompt_tokens ?? undefined; - usage.completionTokens = completion_tokens ?? undefined; - usage.totalTokens = total_tokens ?? undefined; + usage.promptTokens = prompt_tokens ?? undefined + usage.completionTokens = completion_tokens ?? undefined + usage.totalTokens = total_tokens ?? undefined if (completion_tokens_details?.reasoning_tokens != null) { usage.completionTokensDetails.reasoningTokens = - completion_tokens_details?.reasoning_tokens; + completion_tokens_details?.reasoning_tokens } if ( completion_tokens_details?.accepted_prediction_tokens != null ) { usage.completionTokensDetails.acceptedPredictionTokens = - completion_tokens_details?.accepted_prediction_tokens; + completion_tokens_details?.accepted_prediction_tokens } if ( completion_tokens_details?.rejected_prediction_tokens != null ) { usage.completionTokensDetails.rejectedPredictionTokens = - completion_tokens_details?.rejected_prediction_tokens; + completion_tokens_details?.rejected_prediction_tokens } if (prompt_tokens_details?.cached_tokens != null) { usage.promptTokensDetails.cachedTokens = - prompt_tokens_details?.cached_tokens; + prompt_tokens_details?.cached_tokens } } - const choice = value.choices[0]; + const choice = value.choices[0] if (choice?.finish_reason != null) { finishReason = mapOpenAICompatibleFinishReason( choice.finish_reason, - ); + ) } if (choice?.delta == null) { - return; + return } - const delta = choice.delta; + const delta = choice.delta // enqueue reasoning before text deltas: - const reasoningContent = delta.reasoning_content ?? delta.reasoning; + const reasoningContent = delta.reasoning_content ?? delta.reasoning if (reasoningContent) { if (!isActiveReasoning) { controller.enqueue({ type: 'reasoning-start', id: 'reasoning-0', - }); - isActiveReasoning = true; + }) + isActiveReasoning = true } controller.enqueue({ type: 'reasoning-delta', id: 'reasoning-0', delta: reasoningContent, - }); + }) } if (delta.content) { if (!isActiveText) { - controller.enqueue({ type: 'text-start', id: 'txt-0' }); - isActiveText = true; + controller.enqueue({ type: 'text-start', id: 'txt-0' }) + isActiveText = true } controller.enqueue({ type: 'text-delta', id: 'txt-0', delta: delta.content, - }); + }) } if (delta.tool_calls != null) { for (const toolCallDelta of delta.tool_calls) { - const index = toolCallDelta.index; + const index = toolCallDelta.index if (toolCalls[index] == null) { if (toolCallDelta.function?.name == null) { throw new InvalidResponseDataError({ data: toolCallDelta, message: `Expected 'function.name' to be a string.`, - }); + }) } // UPDATED (James): Generate an ID if the provider doesn't include one (e.g., GLM models) - const toolCallId = toolCallDelta.id ?? generateId(); + const toolCallId = toolCallDelta.id ?? generateId() controller.enqueue({ type: 'tool-input-start', id: toolCallId, toolName: toolCallDelta.function.name, - }); + }) toolCalls[index] = { id: toolCallId, @@ -541,9 +534,9 @@ export class OpenAICompatibleChatLanguageModel implements LanguageModelV2 { arguments: toolCallDelta.function.arguments ?? '', }, hasFinished: false, - }; + } - const toolCall = toolCalls[index]; + const toolCall = toolCalls[index] if ( toolCall.function?.name != null && @@ -555,7 +548,7 @@ export class OpenAICompatibleChatLanguageModel implements LanguageModelV2 { type: 'tool-input-delta', id: toolCall.id, delta: toolCall.function.arguments, - }); + }) } // check if tool call is complete @@ -564,31 +557,31 @@ export class OpenAICompatibleChatLanguageModel implements LanguageModelV2 { controller.enqueue({ type: 'tool-input-end', id: toolCall.id, - }); + }) controller.enqueue({ type: 'tool-call', toolCallId: toolCall.id ?? generateId(), toolName: toolCall.function.name, input: toolCall.function.arguments, - }); - toolCall.hasFinished = true; + }) + toolCall.hasFinished = true } } - continue; + continue } // existing tool call, merge if not finished - const toolCall = toolCalls[index]; + const toolCall = toolCalls[index] if (toolCall.hasFinished) { - continue; + continue } if (toolCallDelta.function?.arguments != null) { toolCall.function!.arguments += - toolCallDelta.function?.arguments ?? ''; + toolCallDelta.function?.arguments ?? '' } // send delta @@ -596,7 +589,7 @@ export class OpenAICompatibleChatLanguageModel implements LanguageModelV2 { type: 'tool-input-delta', id: toolCall.id, delta: toolCallDelta.function.arguments ?? '', - }); + }) // check if tool call is complete if ( @@ -607,15 +600,15 @@ export class OpenAICompatibleChatLanguageModel implements LanguageModelV2 { controller.enqueue({ type: 'tool-input-end', id: toolCall.id, - }); + }) controller.enqueue({ type: 'tool-call', toolCallId: toolCall.id ?? generateId(), toolName: toolCall.function.name, input: toolCall.function.arguments, - }); - toolCall.hasFinished = true; + }) + toolCall.hasFinished = true } } } @@ -623,45 +616,45 @@ export class OpenAICompatibleChatLanguageModel implements LanguageModelV2 { flush(controller) { if (isActiveReasoning) { - controller.enqueue({ type: 'reasoning-end', id: 'reasoning-0' }); + controller.enqueue({ type: 'reasoning-end', id: 'reasoning-0' }) } if (isActiveText) { - controller.enqueue({ type: 'text-end', id: 'txt-0' }); + controller.enqueue({ type: 'text-end', id: 'txt-0' }) } // go through all tool calls and send the ones that are not finished for (const toolCall of toolCalls.filter( - toolCall => !toolCall.hasFinished, + (toolCall) => !toolCall.hasFinished, )) { controller.enqueue({ type: 'tool-input-end', id: toolCall.id, - }); + }) controller.enqueue({ type: 'tool-call', toolCallId: toolCall.id ?? generateId(), toolName: toolCall.function.name, input: toolCall.function.arguments, - }); + }) } const providerMetadata: SharedV2ProviderMetadata = { [providerOptionsName]: {}, ...metadataExtractor?.buildMetadata(), - }; + } if ( usage.completionTokensDetails.acceptedPredictionTokens != null ) { providerMetadata[providerOptionsName].acceptedPredictionTokens = - usage.completionTokensDetails.acceptedPredictionTokens; + usage.completionTokensDetails.acceptedPredictionTokens } if ( usage.completionTokensDetails.rejectedPredictionTokens != null ) { providerMetadata[providerOptionsName].rejectedPredictionTokens = - usage.completionTokensDetails.rejectedPredictionTokens; + usage.completionTokensDetails.rejectedPredictionTokens } controller.enqueue({ @@ -677,13 +670,13 @@ export class OpenAICompatibleChatLanguageModel implements LanguageModelV2 { usage.promptTokensDetails.cachedTokens ?? undefined, }, providerMetadata, - }); + }) }, }), ), request: { body }, response: { headers: responseHeaders }, - }; + } } } @@ -705,7 +698,7 @@ const openaiCompatibleTokenUsageSchema = z }) .nullish(), }) - .nullish(); + .nullish() // limited version of the schema, focussed on what is needed for the implementation // this approach limits breakages when the API changes and increases efficiency @@ -736,7 +729,7 @@ const OpenAICompatibleChatResponseSchema = z.object({ }), ), usage: openaiCompatibleTokenUsageSchema, -}); +}) // limited version of the schema, focussed on what is needed for the implementation // this approach limits breakages when the API changes and increases efficiency @@ -780,4 +773,4 @@ const createOpenAICompatibleChatChunkSchema = < usage: openaiCompatibleTokenUsageSchema, }), errorSchema, - ]); + ]) diff --git a/packages/internal/src/openai-compatible/chat/openai-compatible-chat-options.ts b/packages/llm-providers/src/openai-compatible/chat/openai-compatible-chat-options.ts similarity index 86% rename from packages/internal/src/openai-compatible/chat/openai-compatible-chat-options.ts rename to packages/llm-providers/src/openai-compatible/chat/openai-compatible-chat-options.ts index 4fd1877db0..13c5d76407 100644 --- a/packages/internal/src/openai-compatible/chat/openai-compatible-chat-options.ts +++ b/packages/llm-providers/src/openai-compatible/chat/openai-compatible-chat-options.ts @@ -1,6 +1,6 @@ -import { z } from 'zod/v4'; +import { z } from 'zod/v4' -export type OpenAICompatibleChatModelId = string; +export type OpenAICompatibleChatModelId = string export const openaiCompatibleProviderOptions = z.object({ /** @@ -18,8 +18,8 @@ export const openaiCompatibleProviderOptions = z.object({ * Controls the verbosity of the generated text. Defaults to `medium`. */ textVerbosity: z.string().optional(), -}); +}) export type OpenAICompatibleProviderOptions = z.infer< typeof openaiCompatibleProviderOptions ->; +> diff --git a/packages/internal/src/openai-compatible/chat/openai-compatible-metadata-extractor.ts b/packages/llm-providers/src/openai-compatible/chat/openai-compatible-metadata-extractor.ts similarity index 89% rename from packages/internal/src/openai-compatible/chat/openai-compatible-metadata-extractor.ts rename to packages/llm-providers/src/openai-compatible/chat/openai-compatible-metadata-extractor.ts index 17c56c7ac0..9abd60bcda 100644 --- a/packages/internal/src/openai-compatible/chat/openai-compatible-metadata-extractor.ts +++ b/packages/llm-providers/src/openai-compatible/chat/openai-compatible-metadata-extractor.ts @@ -1,4 +1,4 @@ -import type { SharedV2ProviderMetadata } from '@ai-sdk/provider'; +import type { SharedV2ProviderMetadata } from '@ai-sdk/provider' /** Extracts provider-specific metadata from API responses. @@ -17,8 +17,8 @@ export type MetadataExtractor = { extractMetadata: ({ parsedBody, }: { - parsedBody: unknown; - }) => Promise; + parsedBody: unknown + }) => Promise /** * Creates an extractor for handling streaming responses. The returned object provides @@ -34,7 +34,7 @@ export type MetadataExtractor = { * * @param parsedChunk - The parsed JSON response chunk from the provider's API */ - processChunk(parsedChunk: unknown): void; + processChunk(parsedChunk: unknown): void /** * Builds the metadata object after all chunks have been processed. @@ -43,6 +43,6 @@ export type MetadataExtractor = { * @returns Provider-specific metadata or undefined if no metadata is available. * The metadata should be under a key indicating the provider id. */ - buildMetadata(): SharedV2ProviderMetadata | undefined; - }; -}; + buildMetadata(): SharedV2ProviderMetadata | undefined + } +} diff --git a/packages/internal/src/openai-compatible/chat/openai-compatible-prepare-tools.ts b/packages/llm-providers/src/openai-compatible/chat/openai-compatible-prepare-tools.ts similarity index 61% rename from packages/internal/src/openai-compatible/chat/openai-compatible-prepare-tools.ts rename to packages/llm-providers/src/openai-compatible/chat/openai-compatible-prepare-tools.ts index e48c8ec06c..7a53d2bd15 100644 --- a/packages/internal/src/openai-compatible/chat/openai-compatible-prepare-tools.ts +++ b/packages/llm-providers/src/openai-compatible/chat/openai-compatible-prepare-tools.ts @@ -1,57 +1,56 @@ -import { - UnsupportedFunctionalityError, -} from '@ai-sdk/provider'; +import { UnsupportedFunctionalityError } from '@ai-sdk/provider' import type { LanguageModelV2CallOptions, - LanguageModelV2CallWarning} from '@ai-sdk/provider'; + LanguageModelV2CallWarning, +} from '@ai-sdk/provider' export function prepareTools({ tools, toolChoice, }: { - tools: LanguageModelV2CallOptions['tools']; - toolChoice?: LanguageModelV2CallOptions['toolChoice']; + tools: LanguageModelV2CallOptions['tools'] + toolChoice?: LanguageModelV2CallOptions['toolChoice'] }): { tools: | undefined | Array<{ - type: 'function'; + type: 'function' function: { - name: string; - description: string | undefined; - parameters: unknown; - }; - }>; + name: string + description: string | undefined + parameters: unknown + } + }> toolChoice: | { type: 'function'; function: { name: string } } | 'auto' | 'none' | 'required' - | undefined; - toolWarnings: LanguageModelV2CallWarning[]; + | undefined + toolWarnings: LanguageModelV2CallWarning[] } { // when the tools array is empty, change it to undefined to prevent errors: - tools = tools?.length ? tools : undefined; + tools = tools?.length ? tools : undefined - const toolWarnings: LanguageModelV2CallWarning[] = []; + const toolWarnings: LanguageModelV2CallWarning[] = [] if (tools == null) { - return { tools: undefined, toolChoice: undefined, toolWarnings }; + return { tools: undefined, toolChoice: undefined, toolWarnings } } const openaiCompatTools: Array<{ - type: 'function'; + type: 'function' function: { - name: string; - description: string | undefined; - parameters: unknown; - }; - }> = []; + name: string + description: string | undefined + parameters: unknown + } + }> = [] for (const tool of tools) { if (tool.type === 'provider-defined') { - toolWarnings.push({ type: 'unsupported-tool', tool }); + toolWarnings.push({ type: 'unsupported-tool', tool }) } else { openaiCompatTools.push({ type: 'function', @@ -60,21 +59,21 @@ export function prepareTools({ description: tool.description, parameters: tool.inputSchema, }, - }); + }) } } if (toolChoice == null) { - return { tools: openaiCompatTools, toolChoice: undefined, toolWarnings }; + return { tools: openaiCompatTools, toolChoice: undefined, toolWarnings } } - const type = toolChoice.type; + const type = toolChoice.type switch (type) { case 'auto': case 'none': case 'required': - return { tools: openaiCompatTools, toolChoice: type, toolWarnings }; + return { tools: openaiCompatTools, toolChoice: type, toolWarnings } case 'tool': return { tools: openaiCompatTools, @@ -83,12 +82,12 @@ export function prepareTools({ function: { name: toolChoice.toolName }, }, toolWarnings, - }; + } default: { - const _exhaustiveCheck: never = type; + const _exhaustiveCheck: never = type throw new UnsupportedFunctionalityError({ functionality: `tool choice type: ${_exhaustiveCheck}`, - }); + }) } } } diff --git a/packages/internal/src/openai-compatible/completion/convert-to-openai-compatible-completion-prompt.ts b/packages/llm-providers/src/openai-compatible/completion/convert-to-openai-compatible-completion-prompt.ts similarity index 67% rename from packages/internal/src/openai-compatible/completion/convert-to-openai-compatible-completion-prompt.ts rename to packages/llm-providers/src/openai-compatible/completion/convert-to-openai-compatible-completion-prompt.ts index fec938c059..c22a8ec277 100644 --- a/packages/internal/src/openai-compatible/completion/convert-to-openai-compatible-completion-prompt.ts +++ b/packages/llm-providers/src/openai-compatible/completion/convert-to-openai-compatible-completion-prompt.ts @@ -1,30 +1,29 @@ import { InvalidPromptError, UnsupportedFunctionalityError, -} from '@ai-sdk/provider'; +} from '@ai-sdk/provider' -import type { - LanguageModelV2Prompt} from '@ai-sdk/provider'; +import type { LanguageModelV2Prompt } from '@ai-sdk/provider' export function convertToOpenAICompatibleCompletionPrompt({ prompt, user = 'user', assistant = 'assistant', }: { - prompt: LanguageModelV2Prompt; - user?: string; - assistant?: string; + prompt: LanguageModelV2Prompt + user?: string + assistant?: string }): { - prompt: string; - stopSequences?: string[]; + prompt: string + stopSequences?: string[] } { // transform to a chat message format: - let text = ''; + let text = '' // if first message is a system message, add it to the text: if (prompt[0].role === 'system') { - text += `${prompt[0].content}\n\n`; - prompt = prompt.slice(1); + text += `${prompt[0].content}\n\n` + prompt = prompt.slice(1) } for (const { role, content } of prompt) { @@ -33,65 +32,65 @@ export function convertToOpenAICompatibleCompletionPrompt({ throw new InvalidPromptError({ message: 'Unexpected system message in prompt: ${content}', prompt, - }); + }) } case 'user': { const userMessage = content - .map(part => { + .map((part) => { switch (part.type) { case 'text': { - return part.text; + return part.text } } return }) .filter(Boolean) - .join(''); + .join('') - text += `${user}:\n${userMessage}\n\n`; - break; + text += `${user}:\n${userMessage}\n\n` + break } case 'assistant': { const assistantMessage = content - .map(part => { + .map((part) => { switch (part.type) { case 'text': { - return part.text; + return part.text } case 'tool-call': { throw new UnsupportedFunctionalityError({ functionality: 'tool-call messages', - }); + }) } } return undefined }) - .join(''); + .join('') - text += `${assistant}:\n${assistantMessage}\n\n`; - break; + text += `${assistant}:\n${assistantMessage}\n\n` + break } case 'tool': { throw new UnsupportedFunctionalityError({ functionality: 'tool messages', - }); + }) } default: { - const _exhaustiveCheck: never = role; - throw new Error(`Unsupported role: ${_exhaustiveCheck}`); + const _exhaustiveCheck: never = role + throw new Error(`Unsupported role: ${_exhaustiveCheck}`) } } } // Assistant message prefix: - text += `${assistant}:\n`; + text += `${assistant}:\n` return { prompt: text, stopSequences: [`\n${user}:`], - }; + } } diff --git a/packages/internal/src/openai-compatible/completion/get-response-metadata.ts b/packages/llm-providers/src/openai-compatible/completion/get-response-metadata.ts similarity index 65% rename from packages/internal/src/openai-compatible/completion/get-response-metadata.ts rename to packages/llm-providers/src/openai-compatible/completion/get-response-metadata.ts index bd358b23f7..708fd968e3 100644 --- a/packages/internal/src/openai-compatible/completion/get-response-metadata.ts +++ b/packages/llm-providers/src/openai-compatible/completion/get-response-metadata.ts @@ -3,13 +3,13 @@ export function getResponseMetadata({ model, created, }: { - id?: string | undefined | null; - created?: number | undefined | null; - model?: string | undefined | null; + id?: string | undefined | null + created?: number | undefined | null + model?: string | undefined | null }) { return { id: id ?? undefined, modelId: model ?? undefined, timestamp: created != null ? new Date(created * 1000) : undefined, - }; + } } diff --git a/packages/internal/src/openai-compatible/chat/map-openai-compatible-finish-reason.ts b/packages/llm-providers/src/openai-compatible/completion/map-openai-compatible-finish-reason.ts similarity index 72% rename from packages/internal/src/openai-compatible/chat/map-openai-compatible-finish-reason.ts rename to packages/llm-providers/src/openai-compatible/completion/map-openai-compatible-finish-reason.ts index b18feae081..2a4e9eccc2 100644 --- a/packages/internal/src/openai-compatible/chat/map-openai-compatible-finish-reason.ts +++ b/packages/llm-providers/src/openai-compatible/completion/map-openai-compatible-finish-reason.ts @@ -1,19 +1,19 @@ -import type { LanguageModelV2FinishReason } from '@ai-sdk/provider'; +import type { LanguageModelV2FinishReason } from '@ai-sdk/provider' export function mapOpenAICompatibleFinishReason( finishReason: string | null | undefined, ): LanguageModelV2FinishReason { switch (finishReason) { case 'stop': - return 'stop'; + return 'stop' case 'length': - return 'length'; + return 'length' case 'content_filter': - return 'content-filter'; + return 'content-filter' case 'function_call': case 'tool_calls': - return 'tool-calls'; + return 'tool-calls' default: - return 'unknown'; + return 'unknown' } } diff --git a/packages/internal/src/openai-compatible/completion/openai-compatible-completion-language-model.ts b/packages/llm-providers/src/openai-compatible/completion/openai-compatible-completion-language-model.ts similarity index 76% rename from packages/internal/src/openai-compatible/completion/openai-compatible-completion-language-model.ts rename to packages/llm-providers/src/openai-compatible/completion/openai-compatible-completion-language-model.ts index fb32ad3aeb..ddc84bab9a 100644 --- a/packages/internal/src/openai-compatible/completion/openai-compatible-completion-language-model.ts +++ b/packages/llm-providers/src/openai-compatible/completion/openai-compatible-completion-language-model.ts @@ -4,24 +4,18 @@ import { createJsonErrorResponseHandler, createJsonResponseHandler, parseProviderOptions, - postJsonToApi -} from '@ai-sdk/provider-utils'; -import { z } from 'zod/v4'; - -import { - defaultOpenAICompatibleErrorStructure -} from '../openai-compatible-error'; -import { convertToOpenAICompatibleCompletionPrompt } from './convert-to-openai-compatible-completion-prompt'; -import { getResponseMetadata } from './get-response-metadata'; -import { mapOpenAICompatibleFinishReason } from './map-openai-compatible-finish-reason'; -import { - openaiCompatibleCompletionProviderOptions, -} from './openai-compatible-completion-options'; - -import type { - OpenAICompatibleCompletionModelId} from './openai-compatible-completion-options'; -import type { - ProviderErrorStructure} from '../openai-compatible-error'; + postJsonToApi, +} from '@ai-sdk/provider-utils' +import { z } from 'zod/v4' + +import { defaultOpenAICompatibleErrorStructure } from '../openai-compatible-error' +import { convertToOpenAICompatibleCompletionPrompt } from './convert-to-openai-compatible-completion-prompt' +import { getResponseMetadata } from './get-response-metadata' +import { mapOpenAICompatibleFinishReason } from './map-openai-compatible-finish-reason' +import { openaiCompatibleCompletionProviderOptions } from './openai-compatible-completion-options' + +import type { OpenAICompatibleCompletionModelId } from './openai-compatible-completion-options' +import type { ProviderErrorStructure } from '../openai-compatible-error' import type { APICallError, LanguageModelV2, @@ -30,62 +24,61 @@ import type { LanguageModelV2FinishReason, LanguageModelV2StreamPart, LanguageModelV2Usage, -} from '@ai-sdk/provider'; +} from '@ai-sdk/provider' import type { FetchFunction, ParseResult, - ResponseHandler} from '@ai-sdk/provider-utils'; + ResponseHandler, +} from '@ai-sdk/provider-utils' type OpenAICompatibleCompletionConfig = { - provider: string; - includeUsage?: boolean; - headers: () => Record; - url: (options: { modelId: string; path: string }) => string; - fetch?: FetchFunction; - errorStructure?: ProviderErrorStructure; + provider: string + includeUsage?: boolean + headers: () => Record + url: (options: { modelId: string; path: string }) => string + fetch?: FetchFunction + errorStructure?: ProviderErrorStructure /** * The supported URLs for the model. */ - supportedUrls?: () => LanguageModelV2['supportedUrls']; -}; + supportedUrls?: () => LanguageModelV2['supportedUrls'] +} -export class OpenAICompatibleCompletionLanguageModel - implements LanguageModelV2 -{ - readonly specificationVersion = 'v2'; +export class OpenAICompatibleCompletionLanguageModel implements LanguageModelV2 { + readonly specificationVersion = 'v2' - readonly modelId: OpenAICompatibleCompletionModelId; - private readonly config: OpenAICompatibleCompletionConfig; - private readonly failedResponseHandler: ResponseHandler; - private readonly chunkSchema; // type inferred via constructor + readonly modelId: OpenAICompatibleCompletionModelId + private readonly config: OpenAICompatibleCompletionConfig + private readonly failedResponseHandler: ResponseHandler + private readonly chunkSchema // type inferred via constructor constructor( modelId: OpenAICompatibleCompletionModelId, config: OpenAICompatibleCompletionConfig, ) { - this.modelId = modelId; - this.config = config; + this.modelId = modelId + this.config = config // initialize error handling: const errorStructure = - config.errorStructure ?? defaultOpenAICompatibleErrorStructure; + config.errorStructure ?? defaultOpenAICompatibleErrorStructure this.chunkSchema = createOpenAICompatibleCompletionChunkSchema( errorStructure.errorSchema, - ); - this.failedResponseHandler = createJsonErrorResponseHandler(errorStructure); + ) + this.failedResponseHandler = createJsonErrorResponseHandler(errorStructure) } get provider(): string { - return this.config.provider; + return this.config.provider } private get providerOptionsName(): string { - return this.config.provider.split('.')[0].trim(); + return this.config.provider.split('.')[0].trim() } get supportedUrls() { - return this.config.supportedUrls?.() ?? {}; + return this.config.supportedUrls?.() ?? {} } private async getArgs({ @@ -103,26 +96,26 @@ export class OpenAICompatibleCompletionLanguageModel tools, toolChoice, }: Parameters[0]) { - const warnings: LanguageModelV2CallWarning[] = []; + const warnings: LanguageModelV2CallWarning[] = [] // Parse provider options const completionOptionsResult = await parseProviderOptions({ provider: this.providerOptionsName, providerOptions, schema: openaiCompatibleCompletionProviderOptions, - }); - const completionOptions = completionOptionsResult ?? {}; + }) + const completionOptions = completionOptionsResult ?? {} if (topK != null) { - warnings.push({ type: 'unsupported-setting', setting: 'topK' }); + warnings.push({ type: 'unsupported-setting', setting: 'topK' }) } if (tools?.length) { - warnings.push({ type: 'unsupported-setting', setting: 'tools' }); + warnings.push({ type: 'unsupported-setting', setting: 'tools' }) } if (toolChoice != null) { - warnings.push({ type: 'unsupported-setting', setting: 'toolChoice' }); + warnings.push({ type: 'unsupported-setting', setting: 'toolChoice' }) } if (responseFormat != null && responseFormat.type !== 'text') { @@ -130,13 +123,13 @@ export class OpenAICompatibleCompletionLanguageModel type: 'unsupported-setting', setting: 'responseFormat', details: 'JSON response format is not supported.', - }); + }) } const { prompt: completionPrompt, stopSequences } = - convertToOpenAICompatibleCompletionPrompt({ prompt }); + convertToOpenAICompatibleCompletionPrompt({ prompt }) - const stop = [...(stopSequences ?? []), ...(userStopSequences ?? [])]; + const stop = [...(stopSequences ?? []), ...(userStopSequences ?? [])] return { args: { @@ -165,13 +158,13 @@ export class OpenAICompatibleCompletionLanguageModel stop: stop.length > 0 ? stop : undefined, }, warnings, - }; + } } async doGenerate( options: Parameters[0], ): Promise>> { - const { args, warnings } = await this.getArgs(options); + const { args, warnings } = await this.getArgs(options) const { responseHeaders, @@ -190,14 +183,14 @@ export class OpenAICompatibleCompletionLanguageModel ), abortSignal: options.abortSignal, fetch: this.config.fetch, - }); + }) - const choice = response.choices[0]; - const content: Array = []; + const choice = response.choices[0] + const content: Array = [] // text content: if (choice.text != null && choice.text.length > 0) { - content.push({ type: 'text', text: choice.text }); + content.push({ type: 'text', text: choice.text }) } return { @@ -215,13 +208,13 @@ export class OpenAICompatibleCompletionLanguageModel body: rawResponse, }, warnings, - }; + } } async doStream( options: Parameters[0], ): Promise>> { - const { args, warnings } = await this.getArgs(options); + const { args, warnings } = await this.getArgs(options) const body = { ...args, @@ -231,7 +224,7 @@ export class OpenAICompatibleCompletionLanguageModel stream_options: this.config.includeUsage ? { include_usage: true } : undefined, - }; + } const { responseHeaders, value: response } = await postJsonToApi({ url: this.config.url({ @@ -246,15 +239,15 @@ export class OpenAICompatibleCompletionLanguageModel ), abortSignal: options.abortSignal, fetch: this.config.fetch, - }); + }) - let finishReason: LanguageModelV2FinishReason = 'unknown'; + let finishReason: LanguageModelV2FinishReason = 'unknown' const usage: LanguageModelV2Usage = { inputTokens: undefined, outputTokens: undefined, totalTokens: undefined, - }; - let isFirstChunk = true; + } + let isFirstChunk = true return { stream: response.pipeThrough( @@ -263,56 +256,56 @@ export class OpenAICompatibleCompletionLanguageModel LanguageModelV2StreamPart >({ start(controller) { - controller.enqueue({ type: 'stream-start', warnings }); + controller.enqueue({ type: 'stream-start', warnings }) }, transform(chunk, controller) { if (options.includeRawChunks) { - controller.enqueue({ type: 'raw', rawValue: chunk.rawValue }); + controller.enqueue({ type: 'raw', rawValue: chunk.rawValue }) } // handle failed chunk parsing / validation: if (!chunk.success) { - finishReason = 'error'; - controller.enqueue({ type: 'error', error: chunk.error }); - return; + finishReason = 'error' + controller.enqueue({ type: 'error', error: chunk.error }) + return } - const value = chunk.value; + const value = chunk.value // handle error chunks: if ('error' in value) { - finishReason = 'error'; - controller.enqueue({ type: 'error', error: value.error }); - return; + finishReason = 'error' + controller.enqueue({ type: 'error', error: value.error }) + return } if (isFirstChunk) { - isFirstChunk = false; + isFirstChunk = false controller.enqueue({ type: 'response-metadata', ...getResponseMetadata(value), - }); + }) controller.enqueue({ type: 'text-start', id: '0', - }); + }) } if (value.usage != null) { - usage.inputTokens = value.usage.prompt_tokens ?? undefined; - usage.outputTokens = value.usage.completion_tokens ?? undefined; - usage.totalTokens = value.usage.total_tokens ?? undefined; + usage.inputTokens = value.usage.prompt_tokens ?? undefined + usage.outputTokens = value.usage.completion_tokens ?? undefined + usage.totalTokens = value.usage.total_tokens ?? undefined } - const choice = value.choices[0]; + const choice = value.choices[0] if (choice?.finish_reason != null) { finishReason = mapOpenAICompatibleFinishReason( choice.finish_reason, - ); + ) } if (choice?.text != null) { @@ -320,26 +313,26 @@ export class OpenAICompatibleCompletionLanguageModel type: 'text-delta', id: '0', delta: choice.text, - }); + }) } }, flush(controller) { if (!isFirstChunk) { - controller.enqueue({ type: 'text-end', id: '0' }); + controller.enqueue({ type: 'text-end', id: '0' }) } controller.enqueue({ type: 'finish', finishReason, usage, - }); + }) }, }), ), request: { body }, response: { headers: responseHeaders }, - }; + } } } @@ -347,7 +340,7 @@ const usageSchema = z.object({ prompt_tokens: z.number(), completion_tokens: z.number(), total_tokens: z.number(), -}); +}) // limited version of the schema, focussed on what is needed for the implementation // this approach limits breakages when the API changes and increases efficiency @@ -362,7 +355,7 @@ const openaiCompatibleCompletionResponseSchema = z.object({ }), ), usage: usageSchema.nullish(), -}); +}) // limited version of the schema, focussed on what is needed for the implementation // this approach limits breakages when the API changes and increases efficiency @@ -386,4 +379,4 @@ const createOpenAICompatibleCompletionChunkSchema = < usage: usageSchema.nullish(), }), errorSchema, - ]); + ]) diff --git a/packages/internal/src/openai-compatible/completion/openai-compatible-completion-options.ts b/packages/llm-providers/src/openai-compatible/completion/openai-compatible-completion-options.ts similarity index 90% rename from packages/internal/src/openai-compatible/completion/openai-compatible-completion-options.ts rename to packages/llm-providers/src/openai-compatible/completion/openai-compatible-completion-options.ts index 9f6a5ca114..a34e5de348 100644 --- a/packages/internal/src/openai-compatible/completion/openai-compatible-completion-options.ts +++ b/packages/llm-providers/src/openai-compatible/completion/openai-compatible-completion-options.ts @@ -1,6 +1,6 @@ -import { z } from 'zod/v4'; +import { z } from 'zod/v4' -export type OpenAICompatibleCompletionModelId = string; +export type OpenAICompatibleCompletionModelId = string export const openaiCompatibleCompletionProviderOptions = z.object({ /** @@ -26,8 +26,8 @@ export const openaiCompatibleCompletionProviderOptions = z.object({ * monitor and detect abuse. */ user: z.string().optional(), -}); +}) export type OpenAICompatibleCompletionProviderOptions = z.infer< typeof openaiCompatibleCompletionProviderOptions ->; +> diff --git a/packages/internal/src/openai-compatible/embedding/openai-compatible-embedding-model.ts b/packages/llm-providers/src/openai-compatible/embedding/openai-compatible-embedding-model.ts similarity index 66% rename from packages/internal/src/openai-compatible/embedding/openai-compatible-embedding-model.ts rename to packages/llm-providers/src/openai-compatible/embedding/openai-compatible-embedding-model.ts index 1ef99d2062..096a934f4f 100644 --- a/packages/internal/src/openai-compatible/embedding/openai-compatible-embedding-model.ts +++ b/packages/llm-providers/src/openai-compatible/embedding/openai-compatible-embedding-model.ts @@ -1,79 +1,67 @@ -import { - TooManyEmbeddingValuesForCallError, -} from '@ai-sdk/provider'; +import { TooManyEmbeddingValuesForCallError } from '@ai-sdk/provider' import { combineHeaders, createJsonErrorResponseHandler, createJsonResponseHandler, parseProviderOptions, postJsonToApi, -} from '@ai-sdk/provider-utils'; -import { z } from 'zod/v4'; +} from '@ai-sdk/provider-utils' +import { z } from 'zod/v4' -import { - openaiCompatibleEmbeddingProviderOptions, -} from './openai-compatible-embedding-options'; -import { - defaultOpenAICompatibleErrorStructure -} from '../openai-compatible-error'; - -import type { - OpenAICompatibleEmbeddingModelId} from './openai-compatible-embedding-options'; -import type { - ProviderErrorStructure} from '../openai-compatible-error'; -import type { - EmbeddingModelV2} from '@ai-sdk/provider'; -import type { - FetchFunction} from '@ai-sdk/provider-utils'; +import { openaiCompatibleEmbeddingProviderOptions } from './openai-compatible-embedding-options' +import { defaultOpenAICompatibleErrorStructure } from '../openai-compatible-error' + +import type { OpenAICompatibleEmbeddingModelId } from './openai-compatible-embedding-options' +import type { ProviderErrorStructure } from '../openai-compatible-error' +import type { EmbeddingModelV2 } from '@ai-sdk/provider' +import type { FetchFunction } from '@ai-sdk/provider-utils' type OpenAICompatibleEmbeddingConfig = { /** Override the maximum number of embeddings per call. */ - maxEmbeddingsPerCall?: number; + maxEmbeddingsPerCall?: number /** Override the parallelism of embedding calls. */ - supportsParallelCalls?: boolean; + supportsParallelCalls?: boolean - provider: string; - url: (options: { modelId: string; path: string }) => string; - headers: () => Record; - fetch?: FetchFunction; - errorStructure?: ProviderErrorStructure; -}; + provider: string + url: (options: { modelId: string; path: string }) => string + headers: () => Record + fetch?: FetchFunction + errorStructure?: ProviderErrorStructure +} -export class OpenAICompatibleEmbeddingModel - implements EmbeddingModelV2 -{ - readonly specificationVersion = 'v2'; - readonly modelId: OpenAICompatibleEmbeddingModelId; +export class OpenAICompatibleEmbeddingModel implements EmbeddingModelV2 { + readonly specificationVersion = 'v2' + readonly modelId: OpenAICompatibleEmbeddingModelId - private readonly config: OpenAICompatibleEmbeddingConfig; + private readonly config: OpenAICompatibleEmbeddingConfig get provider(): string { - return this.config.provider; + return this.config.provider } get maxEmbeddingsPerCall(): number { - return this.config.maxEmbeddingsPerCall ?? 2048; + return this.config.maxEmbeddingsPerCall ?? 2048 } get supportsParallelCalls(): boolean { - return this.config.supportsParallelCalls ?? true; + return this.config.supportsParallelCalls ?? true } constructor( modelId: OpenAICompatibleEmbeddingModelId, config: OpenAICompatibleEmbeddingConfig, ) { - this.modelId = modelId; - this.config = config; + this.modelId = modelId + this.config = config } private get providerOptionsName(): string { - return this.config.provider.split('.')[0].trim(); + return this.config.provider.split('.')[0].trim() } async doEmbed({ @@ -88,16 +76,16 @@ export class OpenAICompatibleEmbeddingModel provider: 'openai-compatible', providerOptions, schema: openaiCompatibleEmbeddingProviderOptions, - }); + }) const providerOptionsResult = await parseProviderOptions({ provider: this.providerOptionsName, providerOptions, schema: openaiCompatibleEmbeddingProviderOptions, - }); + }) const compatibleOptions = Object.assign( baseOptionsResult ?? {}, providerOptionsResult ?? {}, - ); + ) if (values.length > this.maxEmbeddingsPerCall) { throw new TooManyEmbeddingValuesForCallError({ @@ -105,7 +93,7 @@ export class OpenAICompatibleEmbeddingModel modelId: this.modelId, maxEmbeddingsPerCall: this.maxEmbeddingsPerCall, values, - }); + }) } const { @@ -133,16 +121,16 @@ export class OpenAICompatibleEmbeddingModel ), abortSignal, fetch: this.config.fetch, - }); + }) return { - embeddings: response.data.map(item => item.embedding), + embeddings: response.data.map((item) => item.embedding), usage: response.usage ? { tokens: response.usage.prompt_tokens } : undefined, providerMetadata: response.providerMetadata, response: { headers: responseHeaders, body: rawValue }, - }; + } } } @@ -154,4 +142,4 @@ const openaiTextEmbeddingResponseSchema = z.object({ providerMetadata: z .record(z.string(), z.record(z.string(), z.any())) .optional(), -}); +}) diff --git a/packages/internal/src/openai-compatible/embedding/openai-compatible-embedding-options.ts b/packages/llm-providers/src/openai-compatible/embedding/openai-compatible-embedding-options.ts similarity index 85% rename from packages/internal/src/openai-compatible/embedding/openai-compatible-embedding-options.ts rename to packages/llm-providers/src/openai-compatible/embedding/openai-compatible-embedding-options.ts index 1bfef6d69c..fec65b664e 100644 --- a/packages/internal/src/openai-compatible/embedding/openai-compatible-embedding-options.ts +++ b/packages/llm-providers/src/openai-compatible/embedding/openai-compatible-embedding-options.ts @@ -1,6 +1,6 @@ -import { z } from 'zod/v4'; +import { z } from 'zod/v4' -export type OpenAICompatibleEmbeddingModelId = string; +export type OpenAICompatibleEmbeddingModelId = string export const openaiCompatibleEmbeddingProviderOptions = z.object({ /** @@ -14,8 +14,8 @@ export const openaiCompatibleEmbeddingProviderOptions = z.object({ * monitor and detect abuse. */ user: z.string().optional(), -}); +}) export type OpenAICompatibleEmbeddingProviderOptions = z.infer< typeof openaiCompatibleEmbeddingProviderOptions ->; +> diff --git a/packages/internal/src/openai-compatible/image/openai-compatible-image-model.ts b/packages/llm-providers/src/openai-compatible/image/openai-compatible-image-model.ts similarity index 73% rename from packages/internal/src/openai-compatible/image/openai-compatible-image-model.ts rename to packages/llm-providers/src/openai-compatible/image/openai-compatible-image-model.ts index 1a0dcc040b..e6ec4e7db4 100644 --- a/packages/internal/src/openai-compatible/image/openai-compatible-image-model.ts +++ b/packages/llm-providers/src/openai-compatible/image/openai-compatible-image-model.ts @@ -3,37 +3,33 @@ import { createJsonErrorResponseHandler, createJsonResponseHandler, postJsonToApi, -} from '@ai-sdk/provider-utils'; -import { z } from 'zod/v4'; +} from '@ai-sdk/provider-utils' +import { z } from 'zod/v4' -import { - defaultOpenAICompatibleErrorStructure -} from '../openai-compatible-error'; +import { defaultOpenAICompatibleErrorStructure } from '../openai-compatible-error' -import type { - ProviderErrorStructure} from '../openai-compatible-error'; -import type { OpenAICompatibleImageModelId } from './openai-compatible-image-settings'; -import type { ImageModelV2, ImageModelV2CallWarning } from '@ai-sdk/provider'; -import type { - FetchFunction} from '@ai-sdk/provider-utils'; +import type { ProviderErrorStructure } from '../openai-compatible-error' +import type { OpenAICompatibleImageModelId } from './openai-compatible-image-settings' +import type { ImageModelV2, ImageModelV2CallWarning } from '@ai-sdk/provider' +import type { FetchFunction } from '@ai-sdk/provider-utils' export type OpenAICompatibleImageModelConfig = { - provider: string; - headers: () => Record; - url: (options: { modelId: string; path: string }) => string; - fetch?: FetchFunction; - errorStructure?: ProviderErrorStructure; + provider: string + headers: () => Record + url: (options: { modelId: string; path: string }) => string + fetch?: FetchFunction + errorStructure?: ProviderErrorStructure _internal?: { - currentDate?: () => Date; - }; -}; + currentDate?: () => Date + } +} export class OpenAICompatibleImageModel implements ImageModelV2 { - readonly specificationVersion = 'v2'; - readonly maxImagesPerCall = 10; + readonly specificationVersion = 'v2' + readonly maxImagesPerCall = 10 get provider(): string { - return this.config.provider; + return this.config.provider } constructor( @@ -53,7 +49,7 @@ export class OpenAICompatibleImageModel implements ImageModelV2 { }: Parameters[0]): Promise< Awaited> > { - const warnings: Array = []; + const warnings: Array = [] if (aspectRatio != null) { warnings.push({ @@ -61,14 +57,14 @@ export class OpenAICompatibleImageModel implements ImageModelV2 { setting: 'aspectRatio', details: 'This model does not support aspect ratio. Use `size` instead.', - }); + }) } if (seed != null) { - warnings.push({ type: 'unsupported-setting', setting: 'seed' }); + warnings.push({ type: 'unsupported-setting', setting: 'seed' }) } - const currentDate = this.config._internal?.currentDate?.() ?? new Date(); + const currentDate = this.config._internal?.currentDate?.() ?? new Date() const { value: response, responseHeaders } = await postJsonToApi({ url: this.config.url({ path: '/images/generations', @@ -91,17 +87,17 @@ export class OpenAICompatibleImageModel implements ImageModelV2 { ), abortSignal, fetch: this.config.fetch, - }); + }) return { - images: response.data.map(item => item.b64_json), + images: response.data.map((item) => item.b64_json), warnings, response: { timestamp: currentDate, modelId: this.modelId, headers: responseHeaders, }, - }; + } } } @@ -109,4 +105,4 @@ export class OpenAICompatibleImageModel implements ImageModelV2 { // this approach limits breakages when the API changes and increases efficiency const openaiCompatibleImageResponseSchema = z.object({ data: z.array(z.object({ b64_json: z.string() })), -}); +}) diff --git a/packages/llm-providers/src/openai-compatible/image/openai-compatible-image-settings.ts b/packages/llm-providers/src/openai-compatible/image/openai-compatible-image-settings.ts new file mode 100644 index 0000000000..6dacdd9d53 --- /dev/null +++ b/packages/llm-providers/src/openai-compatible/image/openai-compatible-image-settings.ts @@ -0,0 +1 @@ +export type OpenAICompatibleImageModelId = string diff --git a/packages/internal/src/openai-compatible/index.ts b/packages/llm-providers/src/openai-compatible/index.ts similarity index 64% rename from packages/internal/src/openai-compatible/index.ts rename to packages/llm-providers/src/openai-compatible/index.ts index 75da5c767b..6d7686255c 100644 --- a/packages/internal/src/openai-compatible/index.ts +++ b/packages/llm-providers/src/openai-compatible/index.ts @@ -1,27 +1,27 @@ -export { OpenAICompatibleChatLanguageModel } from './chat/openai-compatible-chat-language-model'; +export { OpenAICompatibleChatLanguageModel } from './chat/openai-compatible-chat-language-model' export type { OpenAICompatibleChatModelId, OpenAICompatibleProviderOptions, -} from './chat/openai-compatible-chat-options'; -export { OpenAICompatibleCompletionLanguageModel } from './completion/openai-compatible-completion-language-model'; +} from './chat/openai-compatible-chat-options' +export { OpenAICompatibleCompletionLanguageModel } from './completion/openai-compatible-completion-language-model' export type { OpenAICompatibleCompletionModelId, OpenAICompatibleCompletionProviderOptions, -} from './completion/openai-compatible-completion-options'; -export { OpenAICompatibleEmbeddingModel } from './embedding/openai-compatible-embedding-model'; +} from './completion/openai-compatible-completion-options' +export { OpenAICompatibleEmbeddingModel } from './embedding/openai-compatible-embedding-model' export type { OpenAICompatibleEmbeddingModelId, OpenAICompatibleEmbeddingProviderOptions, -} from './embedding/openai-compatible-embedding-options'; -export { OpenAICompatibleImageModel } from './image/openai-compatible-image-model'; +} from './embedding/openai-compatible-embedding-options' +export { OpenAICompatibleImageModel } from './image/openai-compatible-image-model' export type { OpenAICompatibleErrorData, ProviderErrorStructure, -} from './openai-compatible-error'; -export type { MetadataExtractor } from './chat/openai-compatible-metadata-extractor'; -export { createOpenAICompatible } from './openai-compatible-provider'; +} from './openai-compatible-error' +export type { MetadataExtractor } from './chat/openai-compatible-metadata-extractor' +export { createOpenAICompatible } from './openai-compatible-provider' export type { OpenAICompatibleProvider, OpenAICompatibleProviderSettings, -} from './openai-compatible-provider'; -export { VERSION } from './version'; +} from './openai-compatible-provider' +export { VERSION } from './version' diff --git a/packages/internal/src/openai-compatible/internal/index.ts b/packages/llm-providers/src/openai-compatible/internal/index.ts similarity index 69% rename from packages/internal/src/openai-compatible/internal/index.ts rename to packages/llm-providers/src/openai-compatible/internal/index.ts index 2b30d3fa18..632f1fed1a 100644 --- a/packages/internal/src/openai-compatible/internal/index.ts +++ b/packages/llm-providers/src/openai-compatible/internal/index.ts @@ -1,4 +1,4 @@ -export { convertToOpenAICompatibleChatMessages } from '../chat/convert-to-openai-compatible-chat-messages'; -export { mapOpenAICompatibleFinishReason } from '../chat/map-openai-compatible-finish-reason'; -export { getResponseMetadata } from '../chat/get-response-metadata'; -export type { OpenAICompatibleChatConfig } from '../chat/openai-compatible-chat-language-model'; +export { convertToOpenAICompatibleChatMessages } from '../chat/convert-to-openai-compatible-chat-messages' +export { mapOpenAICompatibleFinishReason } from '../chat/map-openai-compatible-finish-reason' +export { getResponseMetadata } from '../chat/get-response-metadata' +export type { OpenAICompatibleChatConfig } from '../chat/openai-compatible-chat-language-model' diff --git a/packages/internal/src/openai-compatible/openai-compatible-error.ts b/packages/llm-providers/src/openai-compatible/openai-compatible-error.ts similarity index 72% rename from packages/internal/src/openai-compatible/openai-compatible-error.ts rename to packages/llm-providers/src/openai-compatible/openai-compatible-error.ts index 5d19ebdcb3..b14cdb3f4c 100644 --- a/packages/internal/src/openai-compatible/openai-compatible-error.ts +++ b/packages/llm-providers/src/openai-compatible/openai-compatible-error.ts @@ -1,6 +1,6 @@ -import { z } from 'zod/v4'; +import { z } from 'zod/v4' -import type { ZodType } from 'zod/v4'; +import type { ZodType } from 'zod/v4' export const openaiCompatibleErrorDataSchema = z.object({ error: z.object({ @@ -13,20 +13,20 @@ export const openaiCompatibleErrorDataSchema = z.object({ param: z.any().nullish(), code: z.union([z.string(), z.number()]).nullish(), }), -}); +}) export type OpenAICompatibleErrorData = z.infer< typeof openaiCompatibleErrorDataSchema ->; +> export type ProviderErrorStructure = { - errorSchema: ZodType; - errorToMessage: (error: T) => string; - isRetryable?: (response: Response, error?: T) => boolean; -}; + errorSchema: ZodType + errorToMessage: (error: T) => string + isRetryable?: (response: Response, error?: T) => boolean +} export const defaultOpenAICompatibleErrorStructure: ProviderErrorStructure = { errorSchema: openaiCompatibleErrorDataSchema, - errorToMessage: data => data.error.message, - }; + errorToMessage: (data) => data.error.message, + } diff --git a/packages/internal/src/openai-compatible/openai-compatible-provider.ts b/packages/llm-providers/src/openai-compatible/openai-compatible-provider.ts similarity index 70% rename from packages/internal/src/openai-compatible/openai-compatible-provider.ts rename to packages/llm-providers/src/openai-compatible/openai-compatible-provider.ts index dcd2a546a2..f6b4a36b7f 100644 --- a/packages/internal/src/openai-compatible/openai-compatible-provider.ts +++ b/packages/llm-providers/src/openai-compatible/openai-compatible-provider.ts @@ -1,26 +1,22 @@ import { withoutTrailingSlash, withUserAgentSuffix, -} from '@ai-sdk/provider-utils'; +} from '@ai-sdk/provider-utils' -import { - OpenAICompatibleChatLanguageModel, -} from './chat/openai-compatible-chat-language-model'; -import { OpenAICompatibleCompletionLanguageModel } from './completion/openai-compatible-completion-language-model'; -import { OpenAICompatibleEmbeddingModel } from './embedding/openai-compatible-embedding-model'; -import { OpenAICompatibleImageModel } from './image/openai-compatible-image-model'; -import { VERSION } from './version'; +import { OpenAICompatibleChatLanguageModel } from './chat/openai-compatible-chat-language-model' +import { OpenAICompatibleCompletionLanguageModel } from './completion/openai-compatible-completion-language-model' +import { OpenAICompatibleEmbeddingModel } from './embedding/openai-compatible-embedding-model' +import { OpenAICompatibleImageModel } from './image/openai-compatible-image-model' +import { VERSION } from './version' -import type { - OpenAICompatibleChatConfig} from './chat/openai-compatible-chat-language-model'; +import type { OpenAICompatibleChatConfig } from './chat/openai-compatible-chat-language-model' import type { EmbeddingModelV2, ImageModelV2, LanguageModelV2, ProviderV2, -} from '@ai-sdk/provider'; -import type { - FetchFunction} from '@ai-sdk/provider-utils'; +} from '@ai-sdk/provider' +import type { FetchFunction } from '@ai-sdk/provider-utils' export interface OpenAICompatibleProvider< CHAT_MODEL_IDS extends string = string, @@ -28,66 +24,66 @@ export interface OpenAICompatibleProvider< EMBEDDING_MODEL_IDS extends string = string, IMAGE_MODEL_IDS extends string = string, > extends Omit { - (modelId: CHAT_MODEL_IDS): LanguageModelV2; + (modelId: CHAT_MODEL_IDS): LanguageModelV2 languageModel( modelId: CHAT_MODEL_IDS, config?: Partial, - ): LanguageModelV2; + ): LanguageModelV2 - chatModel(modelId: CHAT_MODEL_IDS): LanguageModelV2; + chatModel(modelId: CHAT_MODEL_IDS): LanguageModelV2 - completionModel(modelId: COMPLETION_MODEL_IDS): LanguageModelV2; + completionModel(modelId: COMPLETION_MODEL_IDS): LanguageModelV2 - textEmbeddingModel(modelId: EMBEDDING_MODEL_IDS): EmbeddingModelV2; + textEmbeddingModel(modelId: EMBEDDING_MODEL_IDS): EmbeddingModelV2 - imageModel(modelId: IMAGE_MODEL_IDS): ImageModelV2; + imageModel(modelId: IMAGE_MODEL_IDS): ImageModelV2 } export interface OpenAICompatibleProviderSettings { /** Base URL for the API calls. */ - baseURL: string; + baseURL: string /** Provider name. */ - name: string; + name: string /** API key for authenticating requests. If specified, adds an `Authorization` header to request headers with the value `Bearer `. This will be added before any headers potentially specified in the `headers` option. */ - apiKey?: string; + apiKey?: string /** Optional custom headers to include in requests. These will be added to request headers after any headers potentially added by use of the `apiKey` option. */ - headers?: Record; + headers?: Record /** Optional custom url query parameters to include in request urls. */ - queryParams?: Record; + queryParams?: Record /** Custom fetch implementation. You can use it as a middleware to intercept requests, or to provide a custom fetch implementation for e.g. testing. */ - fetch?: FetchFunction; + fetch?: FetchFunction /** Include usage information in streaming responses. */ - includeUsage?: boolean; + includeUsage?: boolean /** * Whether the provider supports structured outputs in chat models. */ - supportsStructuredOutputs?: boolean; + supportsStructuredOutputs?: boolean } /** @@ -106,73 +102,73 @@ export function createOpenAICompatible< EMBEDDING_MODEL_IDS, IMAGE_MODEL_IDS > { - const baseURL = withoutTrailingSlash(options.baseURL); - const providerName = options.name; + const baseURL = withoutTrailingSlash(options.baseURL) + const providerName = options.name interface CommonModelConfig { - provider: string; - url: ({ path }: { path: string }) => string; - headers: () => Record; - fetch?: FetchFunction; + provider: string + url: ({ path }: { path: string }) => string + headers: () => Record + fetch?: FetchFunction } const headers = { ...(options.apiKey && { Authorization: `Bearer ${options.apiKey}` }), ...options.headers, - }; + } const getHeaders = () => - withUserAgentSuffix(headers, `ai-sdk/openai-compatible/${VERSION}`); + withUserAgentSuffix(headers, `ai-sdk/openai-compatible/${VERSION}`) const getCommonModelConfig = (modelType: string): CommonModelConfig => ({ provider: `${providerName}.${modelType}`, url: ({ path }) => { - const url = new URL(`${baseURL}${path}`); + const url = new URL(`${baseURL}${path}`) if (options.queryParams) { - url.search = new URLSearchParams(options.queryParams).toString(); + url.search = new URLSearchParams(options.queryParams).toString() } - return url.toString(); + return url.toString() }, headers: getHeaders, fetch: options.fetch, - }); + }) const createLanguageModel = (modelId: CHAT_MODEL_IDS) => - createChatModel(modelId); + createChatModel(modelId) const createChatModel = (modelId: CHAT_MODEL_IDS) => new OpenAICompatibleChatLanguageModel(modelId, { ...getCommonModelConfig('chat'), includeUsage: options.includeUsage, supportsStructuredOutputs: options.supportsStructuredOutputs, - }); + }) const createCompletionModel = (modelId: COMPLETION_MODEL_IDS) => new OpenAICompatibleCompletionLanguageModel(modelId, { ...getCommonModelConfig('completion'), includeUsage: options.includeUsage, - }); + }) const createEmbeddingModel = (modelId: EMBEDDING_MODEL_IDS) => new OpenAICompatibleEmbeddingModel(modelId, { ...getCommonModelConfig('embedding'), - }); + }) const createImageModel = (modelId: IMAGE_MODEL_IDS) => - new OpenAICompatibleImageModel(modelId, getCommonModelConfig('image')); + new OpenAICompatibleImageModel(modelId, getCommonModelConfig('image')) - const provider = (modelId: CHAT_MODEL_IDS) => createLanguageModel(modelId); + const provider = (modelId: CHAT_MODEL_IDS) => createLanguageModel(modelId) - provider.languageModel = createLanguageModel; - provider.chatModel = createChatModel; - provider.completionModel = createCompletionModel; - provider.textEmbeddingModel = createEmbeddingModel; - provider.imageModel = createImageModel; + provider.languageModel = createLanguageModel + provider.chatModel = createChatModel + provider.completionModel = createCompletionModel + provider.textEmbeddingModel = createEmbeddingModel + provider.imageModel = createImageModel return provider as OpenAICompatibleProvider< CHAT_MODEL_IDS, COMPLETION_MODEL_IDS, EMBEDDING_MODEL_IDS, IMAGE_MODEL_IDS - >; + > } diff --git a/packages/internal/src/openai-compatible/version.ts b/packages/llm-providers/src/openai-compatible/version.ts similarity index 57% rename from packages/internal/src/openai-compatible/version.ts rename to packages/llm-providers/src/openai-compatible/version.ts index 8fda877d6d..e8c98e309f 100644 --- a/packages/internal/src/openai-compatible/version.ts +++ b/packages/llm-providers/src/openai-compatible/version.ts @@ -1,5 +1,5 @@ -declare const __PACKAGE_VERSION__: string | undefined; +declare const __PACKAGE_VERSION__: string | undefined export const VERSION: string = typeof __PACKAGE_VERSION__ !== 'undefined' ? __PACKAGE_VERSION__ - : '0.0.0-test'; + : '0.0.0-test' diff --git a/packages/llm-providers/tsconfig.json b/packages/llm-providers/tsconfig.json new file mode 100644 index 0000000000..51864d1a50 --- /dev/null +++ b/packages/llm-providers/tsconfig.json @@ -0,0 +1,9 @@ +{ + "extends": "../../tsconfig.base.json", + "compilerOptions": { + "target": "ES2022", + "types": ["bun", "node"] + }, + "include": ["src/**/*.ts"], + "exclude": ["node_modules"] +} diff --git a/sdk/scripts/build.ts b/sdk/scripts/build.ts index 854e1ac5fc..a93b246b98 100644 --- a/sdk/scripts/build.ts +++ b/sdk/scripts/build.ts @@ -107,6 +107,7 @@ async function build() { '@codebuff/common', '@codebuff/agent-runtime', '@codebuff/code-map', + '@codebuff/llm-providers', ], }, }, @@ -159,10 +160,7 @@ async function fixDuplicateImports() { await writeFile('dist/index.d.ts', content) console.log(' ✓ Fixed duplicate imports in bundled types') } catch (error) { - console.warn( - ' ⚠ Warning: Could not fix duplicate imports:', - error.message, - ) + console.warn(' ⚠ Warning: Could not fix duplicate imports:', error.message) } } diff --git a/sdk/src/impl/llm.ts b/sdk/src/impl/llm.ts index 60bb678bb1..06988fc565 100644 --- a/sdk/src/impl/llm.ts +++ b/sdk/src/impl/llm.ts @@ -3,7 +3,11 @@ import { isFreeMode } from '@codebuff/common/constants/free-agents' import { models, PROFIT_MARGIN } from '@codebuff/common/old-constants' import { buildArray } from '@codebuff/common/util/array' import { normalizeProviderRequestBodyForCacheDebug } from '@codebuff/common/util/cache-debug' -import { getErrorObject, promptAborted, promptSuccess } from '@codebuff/common/util/error' +import { + getErrorObject, + promptAborted, + promptSuccess, +} from '@codebuff/common/util/error' import { convertCbToModelMessages } from '@codebuff/common/util/messages' import { isExplicitlyDefinedModel } from '@codebuff/common/util/model-utils' import { StopSequenceHandler } from '@codebuff/common/util/stop-sequence' @@ -26,7 +30,10 @@ import { refreshChatGptOAuthToken } from '../credentials' import { getErrorStatusCode } from '../error-utils' import type { ModelRequestParams } from './model-provider' -import type { OpenRouterProviderRoutingOptions } from '@codebuff/common/types/agent-template' +import type { + OpenRouterProviderOptions, + OpenRouterProviderRoutingOptions, +} from '@codebuff/common/types/agent-template' import type { PromptAiSdkFn, PromptAiSdkStreamFn, @@ -35,7 +42,6 @@ import type { } from '@codebuff/common/types/contracts/llm' import type { ParamsOf } from '@codebuff/common/types/function-params' import type { JSONObject } from '@codebuff/common/types/json' -import type { OpenRouterProviderOptions } from '@codebuff/internal/openrouter-ai-sdk' import type { LanguageModel } from 'ai' import type z from 'zod/v4' @@ -283,12 +289,15 @@ export async function* promptAiSdkStream( chatGptOAuthRetried?: boolean }, ): ReturnType { + const { providerOptions: originalProviderOptions, ...streamParams } = params + const { - providerOptions: originalProviderOptions, - ...streamParams + logger, + trackEvent, + userId, + userInputId, + model: requestedModel, } = params - - const { logger, trackEvent, userId, userInputId, model: requestedModel } = params const agentChunkMetadata = params.agentId != null ? { agentId: params.agentId } : undefined @@ -334,12 +343,12 @@ export async function* promptAiSdkStream( ...(isChatGptOAuth ? {} : { - providerOptions: getProviderOptions({ - ...params, - providerOptions: originalProviderOptions, - agentProviderOptions: params.agentProviderOptions, + providerOptions: getProviderOptions({ + ...params, + providerOptions: originalProviderOptions, + agentProviderOptions: params.agentProviderOptions, + }), }), - }), // Handle tool call errors gracefully by passing them through to our validation layer // instead of throwing (which would halt the agent). The only special case is when // the tool name matches a spawnable agent - transform those to spawn_agents calls. @@ -516,7 +525,10 @@ export async function* promptAiSdkStream( }) if (chatGptErrorPolicy === 'fallback-rate-limit') { - const rateLimitErrorDetails = chunkValue.error instanceof Error ? chunkValue.error.message : String(chunkValue.error) + const rateLimitErrorDetails = + chunkValue.error instanceof Error + ? chunkValue.error.message + : String(chunkValue.error) logger.warn( { error: getErrorObject(chunkValue.error) }, 'ChatGPT OAuth rate limited during stream', @@ -568,14 +580,20 @@ export async function* promptAiSdkStream( if (!params.chatGptOAuthRetried) { const refreshed = await refreshChatGptOAuthToken() if (refreshed) { - logger.info({ model: requestedModel }, 'ChatGPT OAuth token refreshed, retrying request') + logger.info( + { model: requestedModel }, + 'ChatGPT OAuth token refreshed, retrying request', + ) const retryResult = yield* promptAiSdkStream({ ...params, chatGptOAuthRetried: true, }) return retryResult } - logger.warn({ model: requestedModel }, 'ChatGPT OAuth token refresh failed, unable to recover') + logger.warn( + { model: requestedModel }, + 'ChatGPT OAuth token refresh failed, unable to recover', + ) } // Refresh failed or already retried @@ -609,11 +627,8 @@ export async function* promptAiSdkStream( if (chunkValue.type === 'reasoning-delta') { const reasoningExcluded = (['openrouter', 'codebuff'] as const).some( (p) => - ( - params.providerOptions?.[p] as - | OpenRouterProviderOptions - | undefined - )?.reasoning?.exclude, + (params.providerOptions?.[p] as OpenRouterProviderOptions | undefined) + ?.reasoning?.exclude, ) if (!reasoningExcluded) { yield { diff --git a/sdk/src/impl/model-provider.ts b/sdk/src/impl/model-provider.ts index 83e016c611..268c7394d0 100644 --- a/sdk/src/impl/model-provider.ts +++ b/sdk/src/impl/model-provider.ts @@ -20,12 +20,10 @@ import { import { OpenAICompatibleChatLanguageModel, VERSION, -} from '@codebuff/internal/openai-compatible/index' +} from '@codebuff/llm-providers/openai-compatible' import { WEBSITE_URL } from '../constants' -import { - getValidChatGptOAuthCredentials, -} from '../credentials' +import { getValidChatGptOAuthCredentials } from '../credentials' import { getByokOpenrouterApiKeyFromEnv } from '../env' import { createChatGptBackendFetch, @@ -111,10 +109,12 @@ type OpenRouterUsageAccounting = { * * If ChatGPT OAuth credentials are available and the model is an OpenAI model, * returns an OpenAI direct model. Otherwise, returns the Codebuff backend model. - * + * * This function is async because it may need to refresh the OAuth token. */ -export async function getModelForRequest(params: ModelRequestParams): Promise { +export async function getModelForRequest( + params: ModelRequestParams, +): Promise { const { apiKey, model, skipChatGptOAuth, costMode } = params // Check if we should use ChatGPT OAuth direct @@ -138,7 +138,10 @@ export async function getModelForRequest(params: ModelRequestParams): Promise Date: Sat, 23 May 2026 14:30:08 -0700 Subject: [PATCH 745/749] Add chat completion concurrency logging (#733) --- .../completions/__tests__/completions.test.ts | 1 + .../__tests__/request-metrics.test.ts | 113 ++++++++++++++++ web/src/app/api/v1/chat/completions/_post.ts | 15 ++- .../v1/chat/completions/request-metrics.ts | 125 ++++++++++++++++++ 4 files changed, 253 insertions(+), 1 deletion(-) create mode 100644 web/src/app/api/v1/chat/completions/__tests__/request-metrics.test.ts create mode 100644 web/src/app/api/v1/chat/completions/request-metrics.ts diff --git a/web/src/app/api/v1/chat/completions/__tests__/completions.test.ts b/web/src/app/api/v1/chat/completions/__tests__/completions.test.ts index c8fdaa232a..5704535f89 100644 --- a/web/src/app/api/v1/chat/completions/__tests__/completions.test.ts +++ b/web/src/app/api/v1/chat/completions/__tests__/completions.test.ts @@ -1779,6 +1779,7 @@ describe('/api/v1/chat/completions POST endpoint', () => { expect(response.headers.get('Content-Type')).toBe('text/event-stream') expect(response.headers.get('Cache-Control')).toBe('no-cache') expect(response.headers.get('Connection')).toBe('keep-alive') + expect(await response.text()).toContain(' stream') }, FETCH_PATH_TEST_TIMEOUT_MS, ) diff --git a/web/src/app/api/v1/chat/completions/__tests__/request-metrics.test.ts b/web/src/app/api/v1/chat/completions/__tests__/request-metrics.test.ts new file mode 100644 index 0000000000..ce6f6544f4 --- /dev/null +++ b/web/src/app/api/v1/chat/completions/__tests__/request-metrics.test.ts @@ -0,0 +1,113 @@ +import { describe, expect, it, mock } from 'bun:test' + +import { + beginChatCompletionRequestMetrics, + getActiveChatCompletionRequestCount, +} from '../request-metrics' + +import type { Logger } from '@codebuff/common/types/contracts/logger' + +const createLogger = (): Logger => ({ + debug: mock(() => {}), + error: mock(() => {}), + info: mock(() => {}), + warn: mock(() => {}), +}) + +const baseParams = (logger: Logger) => ({ + logger, + userId: 'user-1', + agentId: 'agent-1', + runId: 'run-1', + model: 'provider/model', + streaming: true, + costMode: 'normal', + logSampleRate: 1, +}) + +const drainStream = async (stream: ReadableStream) => { + const reader = stream.getReader() + while (true) { + const { done } = await reader.read() + if (done) return + } +} + +describe('chat completion request metrics', () => { + it('increments and decrements when manually ended', () => { + const logger = createLogger() + const metrics = beginChatCompletionRequestMetrics(baseParams(logger)) + + expect(getActiveChatCompletionRequestCount()).toBe(1) + + metrics.end('completed') + metrics.end('completed') + + expect(getActiveChatCompletionRequestCount()).toBe(0) + expect(logger.info).toHaveBeenCalledTimes(2) + }) + + it('tracks requests without logging when sampling skips the request', () => { + const logger = createLogger() + const metrics = beginChatCompletionRequestMetrics({ + ...baseParams(logger), + logSampleRate: 0, + }) + + expect(getActiveChatCompletionRequestCount()).toBe(1) + + metrics.end('completed') + + expect(getActiveChatCompletionRequestCount()).toBe(0) + expect(logger.info).toHaveBeenCalledTimes(0) + }) + + it('decrements when a wrapped stream completes', async () => { + const logger = createLogger() + const metrics = beginChatCompletionRequestMetrics(baseParams(logger)) + const stream = new ReadableStream({ + start(controller) { + controller.enqueue(new TextEncoder().encode('data: test\n\n')) + controller.close() + }, + }) + + await drainStream(metrics.wrapStream(stream)) + + expect(getActiveChatCompletionRequestCount()).toBe(0) + expect(logger.info).toHaveBeenCalledTimes(2) + }) + + it('decrements when a wrapped stream is cancelled', async () => { + const logger = createLogger() + const metrics = beginChatCompletionRequestMetrics(baseParams(logger)) + const stream = new ReadableStream({ + start(controller) { + controller.enqueue(new TextEncoder().encode('data: test\n\n')) + }, + }) + + const reader = metrics.wrapStream(stream).getReader() + await reader.cancel('client disconnected') + + expect(getActiveChatCompletionRequestCount()).toBe(0) + expect(logger.info).toHaveBeenCalledTimes(2) + }) + + it('decrements when a wrapped stream errors', async () => { + const logger = createLogger() + const metrics = beginChatCompletionRequestMetrics(baseParams(logger)) + const stream = new ReadableStream({ + pull() { + throw new Error('provider stream failed') + }, + }) + + await expect(drainStream(metrics.wrapStream(stream))).rejects.toThrow( + 'provider stream failed', + ) + + expect(getActiveChatCompletionRequestCount()).toBe(0) + expect(logger.info).toHaveBeenCalledTimes(2) + }) +}) diff --git a/web/src/app/api/v1/chat/completions/_post.ts b/web/src/app/api/v1/chat/completions/_post.ts index b23e5fe1b7..76aa892485 100644 --- a/web/src/app/api/v1/chat/completions/_post.ts +++ b/web/src/app/api/v1/chat/completions/_post.ts @@ -116,6 +116,7 @@ import type { import { extractApiKeyFromHeader } from '@/util/auth' import { withDefaultProperties } from '@codebuff/common/analytics' import { checkFreeModeRateLimit as defaultCheckFreeModeRateLimit } from './free-mode-rate-limiter' +import { beginChatCompletionRequestMetrics } from './request-metrics' export const formatQuotaResetCountdown = ( nextQuotaReset: string | null | undefined, @@ -794,6 +795,16 @@ export async function postChatCompletions(params: { insertChatCompletionTraceBigquery, }) + const requestMetrics = beginChatCompletionRequestMetrics({ + logger, + userId, + agentId, + runId: runIdFromBody, + model: typedBody.model, + streaming: bodyStream, + costMode, + }) + // Handle streaming vs non-streaming try { if (bodyStream) { @@ -859,7 +870,7 @@ export async function postChatCompletions(params: { logger, }) - return new NextResponse(stream, { + return new NextResponse(requestMetrics.wrapStream(stream), { headers: { 'Content-Type': 'text/event-stream', 'Cache-Control': 'no-cache', @@ -934,9 +945,11 @@ export async function postChatCompletions(params: { logger, }) + requestMetrics.end('completed') return NextResponse.json(result) } } catch (error) { + requestMetrics.end('error', { error: getErrorObject(error) }) let openrouterError: OpenRouterError | undefined if (error instanceof OpenRouterError) { openrouterError = error diff --git a/web/src/app/api/v1/chat/completions/request-metrics.ts b/web/src/app/api/v1/chat/completions/request-metrics.ts new file mode 100644 index 0000000000..54af0063c8 --- /dev/null +++ b/web/src/app/api/v1/chat/completions/request-metrics.ts @@ -0,0 +1,125 @@ +import os from 'os' + +import { getErrorObject } from '@codebuff/common/util/error' + +import type { Logger } from '@codebuff/common/types/contracts/logger' + +const HOSTNAME = os.hostname() +const DEFAULT_LOG_SAMPLE_RATE = 0.05 + +let activeChatCompletionRequests = 0 +let nextRequestSequence = 0 + +type RequestMetricsParams = { + logger: Logger + userId: string + agentId: string + runId: string + model: string + streaming: boolean + costMode: string | undefined + logSampleRate?: number +} + +type EndReason = 'completed' | 'cancelled' | 'error' + +export function beginChatCompletionRequestMetrics({ + logger, + userId, + agentId, + runId, + model, + streaming, + costMode, + logSampleRate = DEFAULT_LOG_SAMPLE_RATE, +}: RequestMetricsParams) { + const requestSequence = ++nextRequestSequence + const startedAt = Date.now() + activeChatCompletionRequests += 1 + const activeRequestsAtStart = activeChatCompletionRequests + const normalizedLogSampleRate = Math.max(0, Math.min(1, logSampleRate)) + const shouldLog = Math.random() < normalizedLogSampleRate + + const baseFields = { + metric: 'chat_completion_concurrency', + host: HOSTNAME, + pid: process.pid, + requestSequence, + userId, + agentId, + runId, + model, + streaming, + costMode, + logSampleRate: normalizedLogSampleRate, + } + + if (shouldLog) { + logger.info( + { + ...baseFields, + event: 'start', + activeChatCompletionRequests: activeRequestsAtStart, + }, + 'Chat completion request started', + ) + } + + let ended = false + + const end = (reason: EndReason, extra?: Record) => { + if (ended) return + ended = true + activeChatCompletionRequests = Math.max(0, activeChatCompletionRequests - 1) + + if (!shouldLog) return + + logger.info( + { + ...baseFields, + ...extra, + event: 'finish', + endReason: reason, + durationMs: Date.now() - startedAt, + activeRequestsAtStart, + activeChatCompletionRequests, + }, + 'Chat completion request finished', + ) + } + + return { + end, + wrapStream(stream: ReadableStream) { + const reader = stream.getReader() + + return new ReadableStream({ + async pull(controller) { + try { + const { done, value } = await reader.read() + if (done) { + end('completed') + controller.close() + return + } + controller.enqueue(value) + } catch (error) { + end('error', { error: getErrorObject(error) }) + controller.error(error) + } + }, + async cancel(reason) { + end('cancelled', { + cancelReason: + typeof reason === 'string' ? reason : getErrorObject(reason), + }) + await reader.cancel(reason) + }, + }) + }, + } +} + +export function getActiveChatCompletionRequestCount() { + return activeChatCompletionRequests +} From 250d4aaf669a3567fedcd5072fa3d3bb58d91a03 Mon Sep 17 00:00:00 2001 From: James Grugett Date: Sat, 23 May 2026 16:38:25 -0700 Subject: [PATCH 746/749] Remove GLM from freebuff sessions (#737) --- cli/src/hooks/use-freebuff-session.ts | 11 ++- common/src/__tests__/freebuff-models.test.ts | 16 ++-- common/src/constants/free-agents.ts | 7 +- common/src/constants/freebuff-models.ts | 19 +---- docs/freebuff-waiting-room.md | 26 +++---- .../completions/__tests__/completions.test.ts | 22 ++---- .../session/__tests__/session.test.ts | 10 +-- .../free-session/__tests__/admission.test.ts | 13 ---- .../free-session/__tests__/public-api.test.ts | 73 +++++++++---------- web/src/server/free-session/config.ts | 2 - 10 files changed, 74 insertions(+), 125 deletions(-) diff --git a/cli/src/hooks/use-freebuff-session.ts b/cli/src/hooks/use-freebuff-session.ts index d66fba5aaf..fd6bfd57c8 100644 --- a/cli/src/hooks/use-freebuff-session.ts +++ b/cli/src/hooks/use-freebuff-session.ts @@ -514,9 +514,8 @@ export function useFreebuffSession(): UseFreebuffSessionResult { return } if (next.status === 'model_unavailable') { - // Server says the requested model isn't available right now (e.g. - // legacy GLM 5.1 outside deployment hours). Flip to the - // always-available fallback for this run. In-memory only — + // Server says the requested model isn't available right now. Flip + // to the always-available fallback for this run. In-memory only — // `setSelectedModel` doesn't persist, so the user's saved preference // is preserved for their next launch. useFreebuffModelStore @@ -637,15 +636,15 @@ export function useFreebuffSession(): UseFreebuffSessionResult { if (response.status === 'none' || response.status === 'queued') { apply({ status: 'none', - accessTier: - response.accessTier ?? landingSession.accessTier, + accessTier: response.accessTier ?? landingSession.accessTier, queueDepthByModel: response.queueDepthByModel ?? landingSession.queueDepthByModel, rateLimitsByModel: response.rateLimitsByModel ?? landingSession.rateLimitsByModel, - countryCode: response.countryCode ?? landingSession.countryCode, + countryCode: + response.countryCode ?? landingSession.countryCode, countryBlockReason: response.countryBlockReason ?? landingSession.countryBlockReason, diff --git a/common/src/__tests__/freebuff-models.test.ts b/common/src/__tests__/freebuff-models.test.ts index ee39ed975b..ca0a020419 100644 --- a/common/src/__tests__/freebuff-models.test.ts +++ b/common/src/__tests__/freebuff-models.test.ts @@ -5,7 +5,6 @@ import { DEFAULT_FREEBUFF_MODEL_ID, FREEBUFF_DEEPSEEK_V4_FLASH_MODEL_ID, FREEBUFF_DEEPSEEK_V4_PRO_MODEL_ID, - FREEBUFF_GLM_MODEL_ID, FREEBUFF_KIMI_MODEL_ID, LIMITED_FREEBUFF_MODEL_ID, FREEBUFF_MINIMAX_MODEL_ID, @@ -84,15 +83,14 @@ describe('freebuff model availability', () => { ).toBe(false) }) - test('supports GLM 5.1 as a legacy server-side model without selecting it for new clients', () => { - expect(FREEBUFF_MODELS.map((model) => model.id)).not.toContain( - FREEBUFF_GLM_MODEL_ID, + test('does not support GLM 5.1 for freebuff sessions', () => { + const glm = 'z-ai/glm-5.1' + expect(FREEBUFF_MODELS.map((model) => model.id)).not.toContain(glm) + expect(SUPPORTED_FREEBUFF_MODELS.map((model) => model.id)).not.toContain( + glm, ) - expect(SUPPORTED_FREEBUFF_MODELS.map((model) => model.id)).toContain( - FREEBUFF_GLM_MODEL_ID, - ) - expect(isFreebuffModelId(FREEBUFF_GLM_MODEL_ID)).toBe(false) - expect(isSupportedFreebuffModelId(FREEBUFF_GLM_MODEL_ID)).toBe(true) + expect(isFreebuffModelId(glm)).toBe(false) + expect(isSupportedFreebuffModelId(glm)).toBe(false) }) test('formats the close time in the user local timezone while deployment is open', () => { diff --git a/common/src/constants/free-agents.ts b/common/src/constants/free-agents.ts index 2d1a55c7ff..9b8c8bb055 100644 --- a/common/src/constants/free-agents.ts +++ b/common/src/constants/free-agents.ts @@ -5,7 +5,6 @@ import { FREEBUFF_DEEPSEEK_V4_FLASH_MODEL_ID, FREEBUFF_DEEPSEEK_V4_PRO_MODEL_ID, FREEBUFF_GEMINI_PRO_MODEL_ID, - FREEBUFF_GLM_MODEL_ID, FREEBUFF_KIMI_MODEL_ID, FREEBUFF_MINIMAX_MODEL_ID, SUPPORTED_FREEBUFF_MODELS, @@ -68,7 +67,6 @@ export const FREE_MODE_AGENT_MODELS: Record> = { // Root orchestrator 'base2-free': new Set([ FREEBUFF_MINIMAX_MODEL_ID, - FREEBUFF_GLM_MODEL_ID, FREEBUFF_DEEPSEEK_V4_PRO_MODEL_ID, FREEBUFF_DEEPSEEK_V4_FLASH_MODEL_ID, FREEBUFF_KIMI_MODEL_ID, @@ -94,10 +92,7 @@ export const FREE_MODE_AGENT_MODELS: Record> = { 'tmux-cli': new Set([FREEBUFF_MINIMAX_MODEL_ID]), // Code reviewer for free mode - 'code-reviewer-minimax': new Set([ - FREEBUFF_MINIMAX_MODEL_ID, - FREEBUFF_GLM_MODEL_ID, - ]), + 'code-reviewer-minimax': new Set([FREEBUFF_MINIMAX_MODEL_ID]), 'code-reviewer-kimi': new Set([FREEBUFF_KIMI_MODEL_ID]), 'code-reviewer-deepseek': new Set([FREEBUFF_DEEPSEEK_V4_PRO_MODEL_ID]), 'code-reviewer-deepseek-flash': new Set([ diff --git a/common/src/constants/freebuff-models.ts b/common/src/constants/freebuff-models.ts index 715b258b50..95f79644a9 100644 --- a/common/src/constants/freebuff-models.ts +++ b/common/src/constants/freebuff-models.ts @@ -35,7 +35,6 @@ export const FREEBUFF_DEPLOYMENT_HOURS_LABEL = '9am ET-5pm PT every day' export const FREEBUFF_GEMINI_PRO_MODEL_ID = 'google/gemini-3.1-pro-preview' export const FREEBUFF_DEEPSEEK_V4_PRO_MODEL_ID = 'deepseek/deepseek-v4-pro' export const FREEBUFF_DEEPSEEK_V4_FLASH_MODEL_ID = 'deepseek/deepseek-v4-flash' -export const FREEBUFF_GLM_MODEL_ID = 'z-ai/glm-5.1' export const FREEBUFF_KIMI_MODEL_ID = 'moonshotai/kimi-k2.6' export const FREEBUFF_MINIMAX_MODEL_ID = 'minimax/minimax-m2.7' export const FREEBUFF_PREMIUM_SESSION_LIMIT = 5 @@ -102,29 +101,15 @@ export const FREEBUFF_MODELS = [ }, ] as const satisfies readonly FreebuffModelOption[] -export const LEGACY_FREEBUFF_MODELS = [ - { - id: FREEBUFF_GLM_MODEL_ID, - displayName: 'GLM 5.1', - tagline: 'Legacy', - availability: 'deployment_hours', - }, -] as const satisfies readonly FreebuffModelOption[] - export const FREEBUFF_PREMIUM_MODEL_IDS = [ FREEBUFF_DEEPSEEK_V4_PRO_MODEL_ID, FREEBUFF_KIMI_MODEL_ID, - FREEBUFF_GLM_MODEL_ID, ] as const -export const SUPPORTED_FREEBUFF_MODELS = [ - ...FREEBUFF_MODELS, - ...LEGACY_FREEBUFF_MODELS, -] as const satisfies readonly FreebuffModelOption[] +export const SUPPORTED_FREEBUFF_MODELS = FREEBUFF_MODELS export type FreebuffModelId = (typeof FREEBUFF_MODELS)[number]['id'] -export type SupportedFreebuffModelId = - (typeof SUPPORTED_FREEBUFF_MODELS)[number]['id'] +export type SupportedFreebuffModelId = FreebuffModelId export type FreebuffPremiumModelId = (typeof FREEBUFF_PREMIUM_MODEL_IDS)[number] /** What new freebuff users see selected in the picker. MiniMax is the diff --git a/docs/freebuff-waiting-room.md b/docs/freebuff-waiting-room.md index 76af547f3d..bc9cfc9881 100644 --- a/docs/freebuff-waiting-room.md +++ b/docs/freebuff-waiting-room.md @@ -5,7 +5,7 @@ The waiting room is the admission control layer for **free-mode** requests against the freebuff Fireworks deployments. It has three jobs: 1. **Drip-admit users per model** — each selectable freebuff model has its own FIFO queue. Admission runs one tick (default `ADMISSION_TICK_MS`, 15s) that tries to admit one user per model, so heavier models can sit cold without starving lighter ones. -2. **Gate on per-deployment health and hours** — a single fleet probe per tick (`getFleetHealth` in `web/src/server/free-session/fireworks-health.ts`) hits the Fireworks metrics endpoint and classifies each dedicated deployment as `healthy | degraded | unhealthy`. Only models whose deployment is `healthy` and currently available admit that tick; GLM 5.1 is available during 9am ET-5pm PT on weekdays, while MiniMax M2.7 is serverless and always available. +2. **Gate on per-deployment health and hours** — a single fleet probe per tick (`getFleetHealth` in `web/src/server/free-session/fireworks-health.ts`) hits the Fireworks metrics endpoint and classifies each dedicated deployment as `healthy | degraded | unhealthy`. Only models whose deployment is `healthy` and currently available admit that tick; models without a dedicated deployment are treated as serverless and always available. 3. **One instance per account** — prevent a single user from running N concurrent freebuff CLIs to get N× throughput. Users who cannot be admitted immediately are placed in the queue for their chosen model and given an estimated wait time. Admitted users get a fixed-length session (default 1h) bound to the model they were admitted on; chat completions use that model for the life of the session. @@ -153,18 +153,18 @@ The final tick result carries a `queueDepthByModel` map and a single `skipped` r ### Tunables -| Constant | Location | Default | Purpose | -| ---------------------------- | ----------------------------------------- | ------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `ADMISSION_TICK_MS` | `config.ts` | 15000 | How often the ticker fires. Up to one user is admitted per model per tick. | -| `FREEBUFF_MODELS` | `common/src/constants/freebuff-models.ts` | `deepseek-v4-pro`, `kimi-k2.6`, `minimax-m2.7`, `deepseek-v4-flash` | Selectable models; each gets its own queue and admission slot. | -| `FIREWORKS_DEPLOYMENT_MAP` | `web/src/llm-api/fireworks-config.ts` | `glm-5.1` | Models with dedicated Fireworks deployments. Models not listed are treated as `healthy` (serverless fallback) — drop this default when they migrate to their own deployments. | -| `HEALTH_CACHE_TTL_MS` | `fireworks-health.ts` | 25000 | Fleet probe cache TTL. Sits just under the Fireworks 30s exporter cadence and 6 req/min rate limit. | -| `FREEBUFF_SESSION_LENGTH_MS` | env | 3_600_000 | Session lifetime | -| `SESSION_GRACE_MS` | `web/src/server/free-session/config.ts` | 1_800_000 | Drain window after expiry — gate still admits requests so an in-flight agent can finish, but the CLI is expected to block new prompts. Hard cutoff at `expires_at + grace`. | +| Constant | Location | Default | Purpose | +| ---------------------------- | ----------------------------------------- | ------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `ADMISSION_TICK_MS` | `config.ts` | 15000 | How often the ticker fires. Up to one user is admitted per model per tick. | +| `FREEBUFF_MODELS` | `common/src/constants/freebuff-models.ts` | `deepseek-v4-pro`, `kimi-k2.6`, `minimax-m2.7`, `deepseek-v4-flash` | Selectable models; each gets its own queue and admission slot. | +| `FIREWORKS_DEPLOYMENT_MAP` | `web/src/llm-api/fireworks-config.ts` | none for current freebuff models | Models with dedicated Fireworks deployments. Models not listed are treated as `healthy` (serverless fallback). | +| `HEALTH_CACHE_TTL_MS` | `fireworks-health.ts` | 25000 | Fleet probe cache TTL. Sits just under the Fireworks 30s exporter cadence and 6 req/min rate limit. | +| `FREEBUFF_SESSION_LENGTH_MS` | env | 3_600_000 | Session lifetime | +| `SESSION_GRACE_MS` | `web/src/server/free-session/config.ts` | 1_800_000 | Drain window after expiry — gate still admits requests so an in-flight agent can finish, but the CLI is expected to block new prompts. Hard cutoff at `expires_at + grace`. | ### Premium Session Quota -DeepSeek V4 Pro, Kimi, and legacy GLM share a per-user premium quota. The server counts `free_session_admit` rows from the last midnight in `America/Los_Angeles`; when the user reaches `FREEBUFF_PREMIUM_SESSION_LIMIT`, the next premium `POST /session` is rejected until the next Pacific midnight reset. MiniMax and DeepSeek V4 Flash remain unlimited. +DeepSeek V4 Pro and Kimi share a per-user premium quota. The server counts `free_session_admit` rows from the last midnight in `America/Los_Angeles`; when the user reaches `FREEBUFF_PREMIUM_SESSION_LIMIT`, the next premium `POST /session` is rejected until the next Pacific midnight reset. MiniMax and DeepSeek V4 Flash remain unlimited. ## HTTP API @@ -198,7 +198,7 @@ Response shapes: "queueDepth": 43, // size of this model's queue "queueDepthByModel": { // snapshot of every model's queue — powers the "minimax/minimax-m2.7": 43, // "N ahead" hint in the selector. Missing - "z-ai/glm-5.1": 4 // entries should be treated as 0. + "deepseek/deepseek-v4-pro": 4 // entries should be treated as 0. }, "estimatedWaitMs": 384000, "queuedAt": "2026-04-17T12:00:00Z" @@ -298,7 +298,7 @@ waitMs = (position - 1) * 24_000 - Position 1 → 0 (next tick admits you) - Position 2 → 24s, and so on. -`position` is scoped to this model's queue — a user at position 1 in the `minimax/minimax-m2.7` queue is not affected by the depth of the `z-ai/glm-5.1` queue. The estimate is intentionally decoupled from the admission tick — it's a human-friendly rule-of-thumb for the UI, not a precise projection. Actual wait depends on admission-tick cadence, health-gated pauses, and deployment-hours availability (during a GLM Fireworks incident or outside 9am ET-5pm PT, only GLM's queue stalls; MiniMax keeps draining), so the real wait can be longer or shorter. +`position` is scoped to this model's queue — a user at position 1 in the `minimax/minimax-m2.7` queue is not affected by the depth of the `deepseek/deepseek-v4-pro` queue. The estimate is intentionally decoupled from the admission tick — it's a human-friendly rule-of-thumb for the UI, not a precise projection. Actual wait depends on admission-tick cadence and health-gated pauses, so the real wait can be longer or shorter. ## CLI Integration (frontend-side contract) @@ -337,7 +337,7 @@ The `disabled` response means the server has the waiting room turned off. CLI tr | Spamming POST/GET to starve admission tick | Admission uses per-model Postgres advisory locks; DDoS protection is upstream (Next's global rate limits). Consider adding a per-user limiter on `/session` if traffic warrants. | | Repeatedly POSTing different models to get across every queue | Single row per user (PK on `user_id`); switching models moves the row, never clones it. A user holds exactly one queue slot at any time. | | Fireworks metrics endpoint down / slow | `getFleetHealth()` fails closed (timeout, non-OK, or missing API key) → every dedicated-deployment model is flagged `unhealthy` and its queue pauses. | -| One deployment degraded while others are fine | Health is classified per-deployment; only the affected model's queue pauses, so a degraded GLM deployment doesn't block MiniMax admissions. | +| One deployment degraded while others are fine | Health is classified per-deployment; only the affected model's queue pauses, so a degraded dedicated deployment doesn't block serverless model admissions. | | Zombie expired sessions holding capacity | Swept on every admission tick, even when upstream is unhealthy | ## Testing diff --git a/web/src/app/api/v1/chat/completions/__tests__/completions.test.ts b/web/src/app/api/v1/chat/completions/__tests__/completions.test.ts index 5704535f89..566516441a 100644 --- a/web/src/app/api/v1/chat/completions/__tests__/completions.test.ts +++ b/web/src/app/api/v1/chat/completions/__tests__/completions.test.ts @@ -7,8 +7,6 @@ import { FREEBUFF_DEEPSEEK_V4_FLASH_MODEL_ID, FREEBUFF_DEEPSEEK_V4_PRO_MODEL_ID, FREEBUFF_GEMINI_PRO_MODEL_ID, - FREEBUFF_GLM_MODEL_ID, - isFreebuffDeploymentHours, } from '@codebuff/common/constants/freebuff-models' import { openCodeZenModels } from '@codebuff/common/constants/model-config' import { postChatCompletions } from '../_post' @@ -963,7 +961,7 @@ describe('/api/v1/chat/completions POST endpoint', () => { }) it( - 'lets old freebuff clients keep using GLM 5.1 through Fireworks availability rules', + 'rejects removed GLM 5.1 for free mode before provider calls', async () => { const fetchedBodies: Record[] = [] const fetchViaFireworks = mock( @@ -994,7 +992,7 @@ describe('/api/v1/chat/completions POST endpoint', () => { method: 'POST', headers: allowedFreeModeHeaders('test-api-key-new-free'), body: JSON.stringify({ - model: FREEBUFF_GLM_MODEL_ID, + model: 'z-ai/glm-5.1', stream: false, codebuff_metadata: { run_id: 'run-free', @@ -1019,19 +1017,9 @@ describe('/api/v1/chat/completions POST endpoint', () => { }) const body = await response.json() - if (isFreebuffDeploymentHours()) { - expect(response.status).toBe(200) - expect(fetchedBodies).toHaveLength(1) - expect(fetchedBodies[0].model).toBe( - 'accounts/fireworks/models/glm-5p1', - ) - expect(body.model).toBe(FREEBUFF_GLM_MODEL_ID) - expect(body.provider).toBe('Fireworks') - } else { - expect(response.status).toBe(503) - expect(fetchedBodies).toHaveLength(0) - expect(body.error.code).toBe('DEPLOYMENT_OUTSIDE_HOURS') - } + expect(response.status).toBe(403) + expect(fetchedBodies).toHaveLength(0) + expect(body.error).toBe('free_mode_invalid_agent_model') }, FETCH_PATH_TEST_TIMEOUT_MS, ) diff --git a/web/src/app/api/v1/freebuff/session/__tests__/session.test.ts b/web/src/app/api/v1/freebuff/session/__tests__/session.test.ts index 54dc6c90de..46ad2763c1 100644 --- a/web/src/app/api/v1/freebuff/session/__tests__/session.test.ts +++ b/web/src/app/api/v1/freebuff/session/__tests__/session.test.ts @@ -380,17 +380,17 @@ describe('POST /api/v1/freebuff/session', () => { expect(body.ipPrivacySignals).toBeUndefined() }) - test('returns model_unavailable for legacy GLM 5.1 outside deployment hours', async () => { + test('falls back for removed GLM 5.1 requests', async () => { const sessionDeps = makeSessionDeps() const resp = await postFreebuffSession( makeReq('ok', { model: 'z-ai/glm-5.1' }), makeDeps(sessionDeps, 'u1'), ) - expect(resp.status).toBe(409) + expect(resp.status).toBe(200) const body = await resp.json() - expect(body.status).toBe('model_unavailable') - expect(body.availableHours).toBe('9am ET-5pm PT every day') - expect(sessionDeps.rows.size).toBe(0) + expect(body.status).toBe('queued') + expect(body.model).toBe('minimax/minimax-m2.7') + expect(sessionDeps.rows.get('u1')?.model).toBe('minimax/minimax-m2.7') }) // Banned bots with valid API keys were POSTing every few seconds and diff --git a/web/src/server/free-session/__tests__/admission.test.ts b/web/src/server/free-session/__tests__/admission.test.ts index f55ab3b796..2ad5c0d0c3 100644 --- a/web/src/server/free-session/__tests__/admission.test.ts +++ b/web/src/server/free-session/__tests__/admission.test.ts @@ -1,7 +1,5 @@ import { describe, expect, test } from 'bun:test' -import { FREEBUFF_GLM_MODEL_ID } from '@codebuff/common/constants/freebuff-models' - import { runAdmissionTick } from '../admission' import type { AdmissionDeps } from '../admission' @@ -113,17 +111,6 @@ describe('runAdmissionTick', () => { expect(result.skipped).toBeNull() }) - test('legacy GLM 5.1 is admitted during deployment hours', async () => { - const deps = makeAdmissionDeps({ - models: [FREEBUFF_GLM_MODEL_ID], - now: () => new Date('2026-04-17T16:00:00Z'), - getFleetHealth: async () => ({ [FREEBUFF_GLM_MODEL_ID]: 'healthy' }), - }) - const result = await runAdmissionTick(deps) - expect(result.admitted).toBe(1) - expect(result.skipped).toBeNull() - }) - test('propagates expiry count and admit count together', async () => { const deps = makeAdmissionDeps({ sweepExpired: async () => 2, diff --git a/web/src/server/free-session/__tests__/public-api.test.ts b/web/src/server/free-session/__tests__/public-api.test.ts index b85c682cb3..9503241269 100644 --- a/web/src/server/free-session/__tests__/public-api.test.ts +++ b/web/src/server/free-session/__tests__/public-api.test.ts @@ -4,7 +4,6 @@ import { FREEBUFF_DEEPSEEK_V4_FLASH_MODEL_ID, FREEBUFF_DEEPSEEK_V4_PRO_MODEL_ID, FREEBUFF_GEMINI_PRO_MODEL_ID, - FREEBUFF_GLM_MODEL_ID, FREEBUFF_KIMI_MODEL_ID, FREEBUFF_LIMITED_SESSION_LIMIT, FREEBUFF_PREMIUM_SESSION_LIMIT, @@ -25,6 +24,7 @@ import type { InternalSessionRow } from '../types' const SESSION_LEN = 60 * 60 * 1000 const GRACE_MS = 30 * 60 * 1000 const DEFAULT_MODEL = 'minimax/minimax-m2.7' +const REMOVED_GLM_MODEL = 'z-ai/glm-5.1' const DEFAULT_PREMIUM_RESET_AT = '2026-04-18T07:00:00.000Z' function expectedRateLimit(model: string, recentCount: number) { @@ -264,42 +264,25 @@ describe('requestSession', () => { expect(state.instanceId).toBe('inst-1') }) - test('deployment-hours-only model is unavailable outside deployment hours', async () => { - // Legacy GLM 5.1 is the only freebuff model still gated to deployment - // hours — Kimi and DeepSeek both run 24/7 from the picker. + test('removed GLM 5.1 request falls back to the default model', async () => { const state = await requestSession({ userId: 'u1', - model: FREEBUFF_GLM_MODEL_ID, - deps, - }) - expect(state).toEqual({ - status: 'model_unavailable', - requestedModel: FREEBUFF_GLM_MODEL_ID, - availableHours: '9am ET-5pm PT every day', - }) - expect(deps.rows.size).toBe(0) - }) - - test('legacy GLM 5.1 model is still accepted for old clients during deployment hours', async () => { - deps._tick(new Date('2026-04-17T16:00:00Z')) - const state = await requestSession({ - userId: 'u1', - model: FREEBUFF_GLM_MODEL_ID, + model: REMOVED_GLM_MODEL, deps, }) expect(state.status).toBe('queued') if (state.status !== 'queued') throw new Error('unreachable') - expect(deps.rows.get('u1')?.model).toBe(FREEBUFF_GLM_MODEL_ID) - expect(state.rateLimit).toEqual(expectedRateLimit(FREEBUFF_GLM_MODEL_ID, 0)) + expect(state.model).toBe(DEFAULT_MODEL) + expect(deps.rows.get('u1')?.model).toBe(DEFAULT_MODEL) }) - test('legacy GLM 5.1 active session can be reclaimed outside deployment hours', async () => { + test('removed GLM 5.1 active session cannot be reclaimed', async () => { const admittedAt = new Date(deps._now().getTime() - 10 * 60 * 1000) deps.rows.set('u1', { user_id: 'u1', status: 'active', active_instance_id: 'inst-pre', - model: FREEBUFF_GLM_MODEL_ID, + model: REMOVED_GLM_MODEL, queued_at: admittedAt, admitted_at: admittedAt, expires_at: new Date(deps._now().getTime() + SESSION_LEN), @@ -309,13 +292,13 @@ describe('requestSession', () => { const state = await requestSession({ userId: 'u1', - model: FREEBUFF_GLM_MODEL_ID, + model: REMOVED_GLM_MODEL, deps, }) - expect(state.status).toBe('active') - if (state.status !== 'active') throw new Error('unreachable') - expect(state.instanceId).not.toBe('inst-pre') - expect(state.rateLimit).toEqual(expectedRateLimit(FREEBUFF_GLM_MODEL_ID, 0)) + expect(state.status).toBe('queued') + if (state.status !== 'queued') throw new Error('unreachable') + expect(state.model).toBe(DEFAULT_MODEL) + expect(deps.rows.get('u1')?.model).toBe(DEFAULT_MODEL) }) test('queued response includes a per-model depth snapshot for the selector', async () => { @@ -548,27 +531,25 @@ describe('requestSession', () => { expect(deps.rows.has('u1')).toBe(false) }) - test('rate_limited: legacy GLM 5.1 uses the shared premium quota', async () => { + test('rate_limited: removed GLM 5.1 request does not use the shared premium quota', async () => { deps._tick(PREMIUM_OPEN_TIME) const now = deps._now() for (let i = 0; i < PREMIUM_LIMIT; i++) { deps.admits.push({ user_id: 'u1', - model: FREEBUFF_GLM_MODEL_ID, + model: PREMIUM_MODEL, admitted_at: new Date(now.getTime() - (i + 1) * 60 * 60 * 1000), }) } const state = await requestSession({ userId: 'u1', - model: FREEBUFF_GLM_MODEL_ID, + model: REMOVED_GLM_MODEL, deps, }) - expect(state.status).toBe('rate_limited') - if (state.status !== 'rate_limited') throw new Error('unreachable') - expect(state.model).toBe(FREEBUFF_GLM_MODEL_ID) - expect(state.limit).toBe(PREMIUM_LIMIT) - expect(state.windowHours).toBe(PREMIUM_WINDOW_HOURS) + expect(state.status).toBe('queued') + if (state.status !== 'queued') throw new Error('unreachable') + expect(state.model).toBe(DEFAULT_MODEL) }) test("rate_limited: admits before today's Pacific reset do not count", async () => { @@ -1311,6 +1292,24 @@ describe('checkSessionAdmissible', () => { expect(result.remainingMs).toBe(SESSION_LEN) }) + test('active removed GLM 5.1 session is not admissible', async () => { + await requestSession({ userId: 'u1', model: DEFAULT_MODEL, deps }) + const row = deps.rows.get('u1')! + row.model = REMOVED_GLM_MODEL + row.status = 'active' + row.admitted_at = deps._now() + row.expires_at = new Date(deps._now().getTime() + SESSION_LEN) + + const result = await checkSessionAdmissible({ + userId: 'u1', + claimedInstanceId: row.active_instance_id, + requestedModel: REMOVED_GLM_MODEL, + deps, + }) + if (result.ok) throw new Error('unreachable') + expect(result.code).toBe('session_model_mismatch') + }) + test('active Kimi session admits Gemini thinker requests', async () => { await requestSession({ userId: 'u1', model: DEFAULT_MODEL, deps }) const row = deps.rows.get('u1')! diff --git a/web/src/server/free-session/config.ts b/web/src/server/free-session/config.ts index da51cee0e7..97a6caf287 100644 --- a/web/src/server/free-session/config.ts +++ b/web/src/server/free-session/config.ts @@ -1,7 +1,6 @@ import { FREEBUFF_DEEPSEEK_V4_FLASH_MODEL_ID, FREEBUFF_DEEPSEEK_V4_PRO_MODEL_ID, - FREEBUFF_GLM_MODEL_ID, FREEBUFF_KIMI_MODEL_ID, FREEBUFF_MINIMAX_MODEL_ID, } from '@codebuff/common/constants/freebuff-models' @@ -58,7 +57,6 @@ export function getSessionGraceMs(): number { const INSTANT_ADMIT_CAPACITY: Record = { [FREEBUFF_DEEPSEEK_V4_FLASH_MODEL_ID]: 1000, [FREEBUFF_DEEPSEEK_V4_PRO_MODEL_ID]: 1000, - [FREEBUFF_GLM_MODEL_ID]: 50, [FREEBUFF_KIMI_MODEL_ID]: 1000, [FREEBUFF_MINIMAX_MODEL_ID]: 1000, } From e87f7a7b267bf66051165eb21b268ae33925e8dd Mon Sep 17 00:00:00 2001 From: James Grugett Date: Sat, 23 May 2026 20:07:37 -0700 Subject: [PATCH 747/749] Fix wrapper terminal reset for help output (#739) --- cli/release-staging/index.js | 51 ++++++++++++++++++++++++++++------- cli/release/index.js | 51 ++++++++++++++++++++++++++++------- freebuff/cli/release/index.js | 51 ++++++++++++++++++++++++++++------- 3 files changed, 123 insertions(+), 30 deletions(-) diff --git a/cli/release-staging/index.js b/cli/release-staging/index.js index 083e8879a9..9f40380085 100644 --- a/cli/release-staging/index.js +++ b/cli/release-staging/index.js @@ -17,11 +17,9 @@ const packageName = 'codecane' * Terminal escape sequences to reset terminal state after the child process exits. * When the binary is SIGKILL'd, it can't clean up its own terminal state. * The wrapper (this process) survives and must reset these modes. - * - * Keep in sync with TERMINAL_RESET_SEQUENCES in cli/src/utils/renderer-cleanup.ts */ -const TERMINAL_RESET_SEQUENCES = - '\x1b[?1049l' + // Exit alternate screen buffer +const EXIT_ALTERNATE_SCREEN_SEQUENCE = '\x1b[?1049l' +const SAFE_TERMINAL_RESET_SEQUENCES = '\x1b[?1000l' + // Disable X10 mouse mode '\x1b[?1002l' + // Disable button event mouse mode '\x1b[?1003l' + // Disable any-event mouse mode (all motion) @@ -30,7 +28,12 @@ const TERMINAL_RESET_SEQUENCES = '\x1b[?2004l' + // Disable bracketed paste mode '\x1b[?25h' // Show cursor -function resetTerminal() { +const FULL_TERMINAL_RESET_SEQUENCES = + EXIT_ALTERNATE_SCREEN_SEQUENCE + SAFE_TERMINAL_RESET_SEQUENCES + +function resetTerminal(options = {}) { + const { exitAlternateScreen = false } = options + try { if (process.stdin.isTTY && process.stdin.setRawMode) { process.stdin.setRawMode(false) @@ -40,13 +43,37 @@ function resetTerminal() { } try { if (process.stdout.isTTY) { - process.stdout.write(TERMINAL_RESET_SEQUENCES) + // Exiting the alternate screen is only safe after an interactive child. + // Plain CLI paths like --help never enter it, and ?1049l can erase output. + process.stdout.write( + exitAlternateScreen + ? FULL_TERMINAL_RESET_SEQUENCES + : SAFE_TERMINAL_RESET_SEQUENCES, + ) } } catch { // stdout may be closed } } +function getUnsignedExitCode(code) { + return code != null && code < 0 ? (code >>> 0) : code +} + +function isWindowsNativeCrashCode(code) { + const unsignedCode = getUnsignedExitCode(code) + return ( + process.platform === 'win32' && + (unsignedCode === 0xC000001D || + unsignedCode === 0xC0000005 || + unsignedCode === 0xC0000409) + ) +} + +function shouldExitAlternateScreen(code, signal) { + return Boolean(signal) || isWindowsNativeCrashCode(code) +} + function createConfig(packageName) { const homeDir = os.homedir() const configDir = path.join(homeDir, '.config', 'manicode') @@ -465,7 +492,7 @@ async function checkForUpdates(runningProcess, exitListener) { }, 5000) }) - resetTerminal() + resetTerminal({ exitAlternateScreen: true }) console.log(`Update available: ${currentVersion} → ${latestVersion}`) await downloadBinary(latestVersion) @@ -476,7 +503,9 @@ async function checkForUpdates(runningProcess, exitListener) { }) newChild.on('exit', (code, signal) => { - resetTerminal() + resetTerminal({ + exitAlternateScreen: shouldExitAlternateScreen(code, signal), + }) printCrashDiagnostics(code, signal) process.exit(signal ? 1 : (code || 0)) }) @@ -495,7 +524,7 @@ async function checkForUpdates(runningProcess, exitListener) { function printCrashDiagnostics(code, signal) { // Windows NTSTATUS codes (unsigned DWORD) - const unsignedCode = code != null && code < 0 ? (code >>> 0) : code + const unsignedCode = getUnsignedExitCode(code) const isIllegalInstruction = signal === 'SIGILL' || (process.platform === 'win32' && unsignedCode === 0xC000001D) @@ -557,7 +586,9 @@ async function main() { }) const exitListener = (code, signal) => { - resetTerminal() + resetTerminal({ + exitAlternateScreen: shouldExitAlternateScreen(code, signal), + }) printCrashDiagnostics(code, signal) process.exit(signal ? 1 : (code || 0)) } diff --git a/cli/release/index.js b/cli/release/index.js index bf1eead545..f5e24e3640 100644 --- a/cli/release/index.js +++ b/cli/release/index.js @@ -17,11 +17,9 @@ const packageName = 'codebuff' * Terminal escape sequences to reset terminal state after the child process exits. * When the binary is SIGKILL'd, it can't clean up its own terminal state. * The wrapper (this process) survives and must reset these modes. - * - * Keep in sync with TERMINAL_RESET_SEQUENCES in cli/src/utils/renderer-cleanup.ts */ -const TERMINAL_RESET_SEQUENCES = - '\x1b[?1049l' + // Exit alternate screen buffer +const EXIT_ALTERNATE_SCREEN_SEQUENCE = '\x1b[?1049l' +const SAFE_TERMINAL_RESET_SEQUENCES = '\x1b[?1000l' + // Disable X10 mouse mode '\x1b[?1002l' + // Disable button event mouse mode '\x1b[?1003l' + // Disable any-event mouse mode (all motion) @@ -30,7 +28,12 @@ const TERMINAL_RESET_SEQUENCES = '\x1b[?2004l' + // Disable bracketed paste mode '\x1b[?25h' // Show cursor -function resetTerminal() { +const FULL_TERMINAL_RESET_SEQUENCES = + EXIT_ALTERNATE_SCREEN_SEQUENCE + SAFE_TERMINAL_RESET_SEQUENCES + +function resetTerminal(options = {}) { + const { exitAlternateScreen = false } = options + try { if (process.stdin.isTTY && process.stdin.setRawMode) { process.stdin.setRawMode(false) @@ -40,13 +43,37 @@ function resetTerminal() { } try { if (process.stdout.isTTY) { - process.stdout.write(TERMINAL_RESET_SEQUENCES) + // Exiting the alternate screen is only safe after an interactive child. + // Plain CLI paths like --help never enter it, and ?1049l can erase output. + process.stdout.write( + exitAlternateScreen + ? FULL_TERMINAL_RESET_SEQUENCES + : SAFE_TERMINAL_RESET_SEQUENCES, + ) } } catch { // stdout may be closed } } +function getUnsignedExitCode(code) { + return code != null && code < 0 ? (code >>> 0) : code +} + +function isWindowsNativeCrashCode(code) { + const unsignedCode = getUnsignedExitCode(code) + return ( + process.platform === 'win32' && + (unsignedCode === 0xC000001D || + unsignedCode === 0xC0000005 || + unsignedCode === 0xC0000409) + ) +} + +function shouldExitAlternateScreen(code, signal) { + return Boolean(signal) || isWindowsNativeCrashCode(code) +} + function createConfig(packageName) { const homeDir = os.homedir() const configDir = path.join(homeDir, '.config', 'manicode') @@ -485,7 +512,7 @@ async function checkForUpdates(runningProcess, exitListener) { }, 5000) }) - resetTerminal() + resetTerminal({ exitAlternateScreen: true }) console.log(`Update available: ${currentVersion} → ${latestVersion}`) await downloadBinary(latestVersion) @@ -493,7 +520,9 @@ async function checkForUpdates(runningProcess, exitListener) { const newChild = spawnInstalledBinary({ detached: false }) newChild.on('exit', (code, signal) => { - resetTerminal() + resetTerminal({ + exitAlternateScreen: shouldExitAlternateScreen(code, signal), + }) printCrashDiagnostics(code, signal) process.exit(signal ? 1 : (code || 0)) }) @@ -507,7 +536,7 @@ async function checkForUpdates(runningProcess, exitListener) { function printCrashDiagnostics(code, signal) { // Windows NTSTATUS codes (unsigned DWORD) - const unsignedCode = code != null && code < 0 ? (code >>> 0) : code + const unsignedCode = getUnsignedExitCode(code) const isIllegalInstruction = signal === 'SIGILL' || (process.platform === 'win32' && unsignedCode === 0xC000001D) @@ -625,7 +654,9 @@ async function main() { const child = spawnInstalledBinary() const exitListener = (code, signal) => { - resetTerminal() + resetTerminal({ + exitAlternateScreen: shouldExitAlternateScreen(code, signal), + }) printCrashDiagnostics(code, signal) process.exit(signal ? 1 : (code || 0)) } diff --git a/freebuff/cli/release/index.js b/freebuff/cli/release/index.js index ca853b83fb..312e96697c 100644 --- a/freebuff/cli/release/index.js +++ b/freebuff/cli/release/index.js @@ -17,11 +17,9 @@ const packageName = 'freebuff' * Terminal escape sequences to reset terminal state after the child process exits. * When the binary is SIGKILL'd, it can't clean up its own terminal state. * The wrapper (this process) survives and must reset these modes. - * - * Keep in sync with TERMINAL_RESET_SEQUENCES in cli/src/utils/renderer-cleanup.ts */ -const TERMINAL_RESET_SEQUENCES = - '\x1b[?1049l' + // Exit alternate screen buffer +const EXIT_ALTERNATE_SCREEN_SEQUENCE = '\x1b[?1049l' +const SAFE_TERMINAL_RESET_SEQUENCES = '\x1b[?1000l' + // Disable X10 mouse mode '\x1b[?1002l' + // Disable button event mouse mode '\x1b[?1003l' + // Disable any-event mouse mode (all motion) @@ -30,7 +28,12 @@ const TERMINAL_RESET_SEQUENCES = '\x1b[?2004l' + // Disable bracketed paste mode '\x1b[?25h' // Show cursor -function resetTerminal() { +const FULL_TERMINAL_RESET_SEQUENCES = + EXIT_ALTERNATE_SCREEN_SEQUENCE + SAFE_TERMINAL_RESET_SEQUENCES + +function resetTerminal(options = {}) { + const { exitAlternateScreen = false } = options + try { if (process.stdin.isTTY && process.stdin.setRawMode) { process.stdin.setRawMode(false) @@ -40,13 +43,37 @@ function resetTerminal() { } try { if (process.stdout.isTTY) { - process.stdout.write(TERMINAL_RESET_SEQUENCES) + // Exiting the alternate screen is only safe after an interactive child. + // Plain CLI paths like --help never enter it, and ?1049l can erase output. + process.stdout.write( + exitAlternateScreen + ? FULL_TERMINAL_RESET_SEQUENCES + : SAFE_TERMINAL_RESET_SEQUENCES, + ) } } catch { // stdout may be closed } } +function getUnsignedExitCode(code) { + return code != null && code < 0 ? (code >>> 0) : code +} + +function isWindowsNativeCrashCode(code) { + const unsignedCode = getUnsignedExitCode(code) + return ( + process.platform === 'win32' && + (unsignedCode === 0xC000001D || + unsignedCode === 0xC0000005 || + unsignedCode === 0xC0000409) + ) +} + +function shouldExitAlternateScreen(code, signal) { + return Boolean(signal) || isWindowsNativeCrashCode(code) +} + function createConfig(packageName) { const homeDir = os.homedir() const configDir = path.join(homeDir, '.config', 'manicode') @@ -472,7 +499,7 @@ async function checkForUpdates(runningProcess, exitListener) { }, 5000) }) - resetTerminal() + resetTerminal({ exitAlternateScreen: true }) console.log(`Update available: ${currentVersion} → ${latestVersion}`) await downloadBinary(latestVersion) @@ -480,7 +507,9 @@ async function checkForUpdates(runningProcess, exitListener) { const newChild = spawnInstalledBinary({ detached: false }) newChild.on('exit', (code, signal) => { - resetTerminal() + resetTerminal({ + exitAlternateScreen: shouldExitAlternateScreen(code, signal), + }) printCrashDiagnostics(code, signal) process.exit(signal ? 1 : (code || 0)) }) @@ -494,7 +523,7 @@ async function checkForUpdates(runningProcess, exitListener) { function printCrashDiagnostics(code, signal) { // Windows NTSTATUS codes (unsigned DWORD) - const unsignedCode = code != null && code < 0 ? (code >>> 0) : code + const unsignedCode = getUnsignedExitCode(code) const isIllegalInstruction = signal === 'SIGILL' || (process.platform === 'win32' && unsignedCode === 0xC000001D) @@ -612,7 +641,9 @@ async function main() { const child = spawnInstalledBinary() const exitListener = (code, signal) => { - resetTerminal() + resetTerminal({ + exitAlternateScreen: shouldExitAlternateScreen(code, signal), + }) printCrashDiagnostics(code, signal) process.exit(signal ? 1 : (code || 0)) } From 1c601b391fac78eaef459c2a9d9cfbe9f98f3d0f Mon Sep 17 00:00:00 2001 From: James Grugett Date: Sat, 23 May 2026 23:13:55 -0700 Subject: [PATCH 748/749] Create simplify skill. Delete other unnecessary skills --- .agents/skills/cleanup/SKILL.md | 8 -------- .agents/skills/meta/SKILL.md | 18 ------------------ .agents/skills/review/SKILL.md | 8 -------- .agents/skills/simplify/SKILL.md | 8 ++++++++ 4 files changed, 8 insertions(+), 34 deletions(-) delete mode 100644 .agents/skills/cleanup/SKILL.md delete mode 100644 .agents/skills/meta/SKILL.md delete mode 100644 .agents/skills/review/SKILL.md create mode 100644 .agents/skills/simplify/SKILL.md diff --git a/.agents/skills/cleanup/SKILL.md b/.agents/skills/cleanup/SKILL.md deleted file mode 100644 index dd41e2a10f..0000000000 --- a/.agents/skills/cleanup/SKILL.md +++ /dev/null @@ -1,8 +0,0 @@ ---- -name: cleanup -description: Simplify and clean code ---- - -# Cleanup - -Please review the uncommitted changes (staged and unstaged) and find ways to simplify the code. Clean up logic. Find a simpler design. Reuse existing functions. Move utilities to utility files. Lower the cyclomatic complexity. Remove try/catch statements when not completely necessary. \ No newline at end of file diff --git a/.agents/skills/meta/SKILL.md b/.agents/skills/meta/SKILL.md deleted file mode 100644 index 8b05efdddf..0000000000 --- a/.agents/skills/meta/SKILL.md +++ /dev/null @@ -1,18 +0,0 @@ ---- -name: meta -description: Broad project-level implementation and validation heuristics ---- - -# Meta - -- When validating CLI changes, run a non-effectful command path first (for example `--help`) before any command that could trigger external side effects. (from .agents/sessions/03-03-0909-add-console-log) -- For tightly scoped edits, pair runtime smoke-checks with `git diff -- ` to verify no unintended spillover. (from .agents/sessions/03-03-0909-add-console-log) -- From monorepo root, run workspace scripts as `bun run --cwd