From da31b5cbb262d15d99988f871fe00eaca733b82e Mon Sep 17 00:00:00 2001 From: James Grugett Date: Thu, 12 Mar 2026 11:04:29 -0700 Subject: [PATCH 001/611] freebuff: Don't show the ads are requried in free mode --- cli/src/components/ad-banner.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cli/src/components/ad-banner.tsx b/cli/src/components/ad-banner.tsx index 9ec6ac56dc..59c38d120c 100644 --- a/cli/src/components/ad-banner.tsx +++ b/cli/src/components/ad-banner.tsx @@ -194,7 +194,7 @@ export const AdBanner: React.FC = ({ ad, onDisableAds, isFreeMode gap: 2, }} > - {isFreeMode ? ( + {isFreeMode && !IS_FREEBUFF ? ( Ads are required in Free mode. From fae9205ce81a8a80b452f9b71cc26f00a7291871 Mon Sep 17 00:00:00 2001 From: James Grugett Date: Thu, 12 Mar 2026 12:14:07 -0700 Subject: [PATCH 002/611] Fireworks: use custom deployment during business hours --- .../__tests__/fireworks-deployment.test.ts | 388 ++++++++++++++++++ web/src/llm-api/fireworks.ts | 120 +++++- 2 files changed, 495 insertions(+), 13 deletions(-) create mode 100644 web/src/llm-api/__tests__/fireworks-deployment.test.ts diff --git a/web/src/llm-api/__tests__/fireworks-deployment.test.ts b/web/src/llm-api/__tests__/fireworks-deployment.test.ts new file mode 100644 index 0000000000..f85fd7d34d --- /dev/null +++ b/web/src/llm-api/__tests__/fireworks-deployment.test.ts @@ -0,0 +1,388 @@ +import { afterEach, beforeEach, describe, expect, it, mock } from 'bun:test' + +import { + createFireworksRequestWithFallback, + DEPLOYMENT_COOLDOWN_MS, + FireworksError, + isDeploymentCoolingDown, + isDeploymentHours, + markDeploymentScalingUp, + resetDeploymentCooldown, +} from '../fireworks' + +import type { Logger } from '@codebuff/common/types/contracts/logger' + +const STANDARD_MODEL_ID = 'accounts/fireworks/models/minimax-m2p5' +const DEPLOYMENT_MODEL_ID = 'accounts/james-65d217/deployments/qne3jo8v' + +function createMockLogger(): Logger { + return { + info: mock(() => {}), + warn: mock(() => {}), + error: mock(() => {}), + debug: mock(() => {}), + } +} + +// Helper: create a Date at a specific ET hour using a known EDT date (June 2025, UTC-4) +function dateAtEtHour(hour: number): Date { + // June 15, 2025 is EDT (UTC-4), so ET hour H = UTC hour H+4 + const utcHour = hour + 4 + if (utcHour < 24) { + return new Date(`2025-06-15T${String(utcHour).padStart(2, '0')}:30:00Z`) + } + // Wraps to next day + return new Date(`2025-06-16T${String(utcHour - 24).padStart(2, '0')}:30:00Z`) +} + +describe('Fireworks deployment routing', () => { + describe('isDeploymentHours', () => { + it('returns true at 10am ET (start of window)', () => { + expect(isDeploymentHours(dateAtEtHour(10))).toBe(true) + }) + + it('returns true at 2pm ET (mid-day)', () => { + expect(isDeploymentHours(dateAtEtHour(14))).toBe(true) + }) + + it('returns true at 7pm ET (19:00, near end of window)', () => { + expect(isDeploymentHours(dateAtEtHour(19))).toBe(true) + }) + + it('returns false at 9am ET (before window)', () => { + expect(isDeploymentHours(dateAtEtHour(9))).toBe(false) + }) + + it('returns false at 8pm ET (20:00, window closed)', () => { + expect(isDeploymentHours(dateAtEtHour(20))).toBe(false) + }) + + it('returns false at midnight ET', () => { + expect(isDeploymentHours(dateAtEtHour(0))).toBe(false) + }) + + it('returns false at 3am ET', () => { + expect(isDeploymentHours(dateAtEtHour(3))).toBe(false) + }) + + it('returns false at 11pm ET', () => { + expect(isDeploymentHours(dateAtEtHour(23))).toBe(false) + }) + }) + + describe('deployment cooldown', () => { + beforeEach(() => { + resetDeploymentCooldown() + }) + + afterEach(() => { + resetDeploymentCooldown() + }) + + it('isDeploymentCoolingDown returns false initially', () => { + expect(isDeploymentCoolingDown()).toBe(false) + }) + + it('isDeploymentCoolingDown returns true after markDeploymentScalingUp', () => { + markDeploymentScalingUp() + expect(isDeploymentCoolingDown()).toBe(true) + }) + + it('isDeploymentCoolingDown returns false after resetDeploymentCooldown', () => { + markDeploymentScalingUp() + expect(isDeploymentCoolingDown()).toBe(true) + resetDeploymentCooldown() + expect(isDeploymentCoolingDown()).toBe(false) + }) + + it('DEPLOYMENT_COOLDOWN_MS is 2 minutes', () => { + expect(DEPLOYMENT_COOLDOWN_MS).toBe(2 * 60 * 1000) + }) + }) + + describe('createFireworksRequestWithFallback', () => { + let logger: Logger + + beforeEach(() => { + resetDeploymentCooldown() + logger = createMockLogger() + }) + + afterEach(() => { + resetDeploymentCooldown() + }) + + const minimalBody = { + model: 'minimax/minimax-m2.5', + messages: [{ role: 'user' as const, content: 'test' }], + } + + function spyDeploymentHours(inHours: boolean) { + // Control isDeploymentHours by mocking Date.prototype.toLocaleString + // When called with the ET timezone options, return an hour inside or outside the window + const original = Date.prototype.toLocaleString + const spy = { + restore: () => { + Date.prototype.toLocaleString = original + }, + } + Date.prototype.toLocaleString = function ( + this: Date, + ...args: Parameters + ) { + const options = args[1] as Intl.DateTimeFormatOptions | undefined + if (options?.timeZone === 'America/New_York' && options?.hour === 'numeric') { + return inHours ? '14' : '3' + } + return original.apply(this, args) + } + return spy + } + + it('uses standard API outside deployment hours', async () => { + const spy = spyDeploymentHours(false) + const fetchCalls: string[] = [] + + const mockFetch = mock(async (_url: string | URL | Request, init?: RequestInit) => { + const body = JSON.parse(init?.body as string) + fetchCalls.push(body.model) + return new Response(JSON.stringify({ ok: true }), { status: 200 }) + }) as unknown as typeof globalThis.fetch + + try { + const response = await createFireworksRequestWithFallback({ + body: minimalBody as never, + originalModel: 'minimax/minimax-m2.5', + fetch: mockFetch, + logger, + }) + + expect(response.status).toBe(200) + expect(fetchCalls).toHaveLength(1) + expect(fetchCalls[0]).toBe(STANDARD_MODEL_ID) + } finally { + spy.restore() + } + }) + + it('tries custom deployment during deployment hours', async () => { + const spy = spyDeploymentHours(true) + const fetchCalls: string[] = [] + + const mockFetch = mock(async (_url: string | URL | Request, init?: RequestInit) => { + const body = JSON.parse(init?.body as string) + fetchCalls.push(body.model) + return new Response(JSON.stringify({ ok: true }), { status: 200 }) + }) as unknown as typeof globalThis.fetch + + try { + const response = await createFireworksRequestWithFallback({ + body: minimalBody as never, + originalModel: 'minimax/minimax-m2.5', + fetch: mockFetch, + logger, + }) + + expect(response.status).toBe(200) + expect(fetchCalls).toHaveLength(1) + expect(fetchCalls[0]).toBe(DEPLOYMENT_MODEL_ID) + } finally { + spy.restore() + } + }) + + it('falls back to standard API on 503 DEPLOYMENT_SCALING_UP', async () => { + const spy = spyDeploymentHours(true) + const fetchCalls: string[] = [] + let callCount = 0 + + const mockFetch = mock(async (_url: string | URL | Request, init?: RequestInit) => { + const body = JSON.parse(init?.body as string) + fetchCalls.push(body.model) + callCount++ + + if (callCount === 1) { + return new Response( + JSON.stringify({ + error: { + message: 'Deployment is currently scaled to zero and is scaling up. Please retry your request in a few minutes.', + code: 'DEPLOYMENT_SCALING_UP', + type: 'error', + }, + }), + { status: 503, statusText: 'Service Unavailable' }, + ) + } + + return new Response(JSON.stringify({ ok: true }), { status: 200 }) + }) as unknown as typeof globalThis.fetch + + try { + const response = await createFireworksRequestWithFallback({ + body: minimalBody as never, + originalModel: 'minimax/minimax-m2.5', + fetch: mockFetch, + logger, + }) + + expect(response.status).toBe(200) + expect(fetchCalls).toHaveLength(2) + expect(fetchCalls[0]).toBe(DEPLOYMENT_MODEL_ID) + expect(fetchCalls[1]).toBe(STANDARD_MODEL_ID) + // Verify cooldown was activated + expect(isDeploymentCoolingDown()).toBe(true) + } finally { + spy.restore() + } + }) + + it('throws FireworksError on non-scaling 503 from deployment', async () => { + const spy = spyDeploymentHours(true) + + const mockFetch = mock(async () => { + return new Response( + JSON.stringify({ + error: { + message: 'Service temporarily unavailable', + code: 'SERVICE_UNAVAILABLE', + type: 'error', + }, + }), + { status: 503, statusText: 'Service Unavailable' }, + ) + }) as unknown as typeof globalThis.fetch + + try { + await expect( + createFireworksRequestWithFallback({ + body: minimalBody as never, + originalModel: 'minimax/minimax-m2.5', + fetch: mockFetch, + logger, + }), + ).rejects.toBeInstanceOf(FireworksError) + } finally { + spy.restore() + } + }) + + it('skips deployment during cooldown and goes straight to standard API', async () => { + const spy = spyDeploymentHours(true) + markDeploymentScalingUp() + + const fetchCalls: string[] = [] + const mockFetch = mock(async (_url: string | URL | Request, init?: RequestInit) => { + const body = JSON.parse(init?.body as string) + fetchCalls.push(body.model) + return new Response(JSON.stringify({ ok: true }), { status: 200 }) + }) as unknown as typeof globalThis.fetch + + try { + const response = await createFireworksRequestWithFallback({ + body: minimalBody as never, + originalModel: 'minimax/minimax-m2.5', + fetch: mockFetch, + logger, + }) + + expect(response.status).toBe(200) + expect(fetchCalls).toHaveLength(1) + expect(fetchCalls[0]).toBe(STANDARD_MODEL_ID) + } finally { + spy.restore() + } + }) + + it('uses standard API for models without a custom deployment', async () => { + const spy = spyDeploymentHours(true) + const fetchCalls: string[] = [] + + const mockFetch = mock(async (_url: string | URL | Request, init?: RequestInit) => { + const body = JSON.parse(init?.body as string) + fetchCalls.push(body.model) + return new Response(JSON.stringify({ ok: true }), { status: 200 }) + }) as unknown as typeof globalThis.fetch + + try { + const response = await createFireworksRequestWithFallback({ + body: { ...minimalBody, model: 'some-other/model' } as never, + originalModel: 'some-other/model', + fetch: mockFetch, + logger, + }) + + expect(response.status).toBe(200) + expect(fetchCalls).toHaveLength(1) + // Model without mapping falls through to the original model + expect(fetchCalls[0]).toBe('some-other/model') + } finally { + spy.restore() + } + }) + + it('returns non-200 responses from deployment without fallback (non-503)', async () => { + const spy = spyDeploymentHours(true) + const fetchCalls: string[] = [] + + const mockFetch = mock(async (_url: string | URL | Request, init?: RequestInit) => { + const body = JSON.parse(init?.body as string) + fetchCalls.push(body.model) + return new Response( + JSON.stringify({ error: { message: 'Rate limited' } }), + { status: 429, statusText: 'Too Many Requests' }, + ) + }) as unknown as typeof globalThis.fetch + + try { + const response = await createFireworksRequestWithFallback({ + body: minimalBody as never, + originalModel: 'minimax/minimax-m2.5', + fetch: mockFetch, + logger, + }) + + // Non-503 errors from deployment are returned as-is (caller handles them) + expect(response.status).toBe(429) + expect(fetchCalls).toHaveLength(1) + expect(fetchCalls[0]).toBe(DEPLOYMENT_MODEL_ID) + } finally { + spy.restore() + } + }) + + it('logs when trying deployment and when falling back', async () => { + const spy = spyDeploymentHours(true) + let callCount = 0 + + const mockFetch = mock(async () => { + callCount++ + if (callCount === 1) { + return new Response( + JSON.stringify({ + error: { + message: 'Scaling up', + code: 'DEPLOYMENT_SCALING_UP', + type: 'error', + }, + }), + { status: 503, statusText: 'Service Unavailable' }, + ) + } + return new Response(JSON.stringify({ ok: true }), { status: 200 }) + }) as unknown as typeof globalThis.fetch + + try { + await createFireworksRequestWithFallback({ + body: minimalBody as never, + originalModel: 'minimax/minimax-m2.5', + fetch: mockFetch, + logger, + }) + + expect(logger.info).toHaveBeenCalledTimes(2) + } finally { + spy.restore() + } + }) + }) +}) diff --git a/web/src/llm-api/fireworks.ts b/web/src/llm-api/fireworks.ts index c35d5aa579..87b840faf8 100644 --- a/web/src/llm-api/fireworks.ts +++ b/web/src/llm-api/fireworks.ts @@ -26,12 +26,48 @@ const fireworksAgent = new Agent({ bodyTimeout: 0, }) -/** Map from OpenRouter model IDs to Fireworks model IDs */ +/** Map from OpenRouter model IDs to Fireworks standard API model IDs */ const FIREWORKS_MODEL_MAP: Record = { - // 'minimax/minimax-m2.5': 'accounts/james-65d217/deployments/qne3jo8v' //'accounts/fireworks/models/minimax-m2p5', 'minimax/minimax-m2.5': 'accounts/fireworks/models/minimax-m2p5', } +/** Custom deployment IDs for models with dedicated Fireworks deployments */ +const FIREWORKS_DEPLOYMENT_MAP: Record = { + 'minimax/minimax-m2.5': 'accounts/james-65d217/deployments/qne3jo8v', +} + +/** Check if current time is within deployment hours (10am–8pm ET) */ +export function isDeploymentHours(now: Date = new Date()): boolean { + const etHour = parseInt( + now.toLocaleString('en-US', { + timeZone: 'America/New_York', + hour: 'numeric', + hour12: false, + }), + 10, + ) + return etHour >= 10 && etHour < 20 +} + +/** + * In-memory cooldown to avoid repeatedly hitting a deployment that is scaling up. + * After a DEPLOYMENT_SCALING_UP 503, we skip the deployment for this many ms. + */ +export const DEPLOYMENT_COOLDOWN_MS = 2 * 60 * 1000 +let deploymentScalingUpUntil = 0 + +export function isDeploymentCoolingDown(): boolean { + return Date.now() < deploymentScalingUpUntil +} + +export function markDeploymentScalingUp(): void { + deploymentScalingUpUntil = Date.now() + DEPLOYMENT_COOLDOWN_MS +} + +export function resetDeploymentCooldown(): void { + deploymentScalingUpUntil = 0 +} + export function isFireworksModel(model: string): boolean { return model in FIREWORKS_MODEL_MAP } @@ -52,11 +88,12 @@ function createFireworksRequest(params: { body: ChatCompletionRequestBody originalModel: string fetch: typeof globalThis.fetch + modelIdOverride?: string }) { - const { body, originalModel, fetch } = params + const { body, originalModel, fetch, modelIdOverride } = params const fireworksBody: Record = { ...body, - model: getFireworksModelId(originalModel), + model: modelIdOverride ?? getFireworksModelId(originalModel), } // Strip OpenRouter-specific / internal fields @@ -128,7 +165,7 @@ export async function handleFireworksNonStream({ const startTime = new Date() const { clientId, clientRequestId, costMode } = extractRequestMetadata({ body, logger }) - const response = await createFireworksRequest({ body, originalModel, fetch }) + const response = await createFireworksRequestWithFallback({ body, originalModel, fetch, logger }) if (!response.ok) { throw await parseFireworksError(response) @@ -204,7 +241,7 @@ export async function handleFireworksStream({ const startTime = new Date() const { clientId, clientRequestId, costMode } = extractRequestMetadata({ body, logger }) - const response = await createFireworksRequest({ body, originalModel, fetch }) + const response = await createFireworksRequestWithFallback({ body, originalModel, fetch, logger }) if (!response.ok) { throw await parseFireworksError(response) @@ -566,8 +603,11 @@ export class FireworksError extends Error { } } -async function parseFireworksError(response: Response): Promise { - const errorText = await response.text() +function parseFireworksErrorFromText( + statusCode: number, + statusText: string, + errorText: string, +): FireworksError { let errorBody: FireworksError['errorBody'] try { const parsed = JSON.parse(errorText) @@ -582,20 +622,74 @@ async function parseFireworksError(response: Response): Promise } else { errorBody = { error: { - message: errorText || response.statusText, - code: response.status, + message: errorText || statusText, + code: statusCode, }, } } } catch { errorBody = { error: { - message: errorText || response.statusText, - code: response.status, + message: errorText || statusText, + code: statusCode, }, } } - return new FireworksError(response.status, response.statusText, errorBody) + return new FireworksError(statusCode, statusText, errorBody) +} + +async function parseFireworksError(response: Response): Promise { + const errorText = await response.text() + return parseFireworksErrorFromText(response.status, response.statusText, errorText) +} + +/** + * Tries the custom Fireworks deployment during business hours (10am–8pm ET), + * falling back to the standard API if the deployment returns 503 DEPLOYMENT_SCALING_UP. + * Outside deployment hours or during cooldown, goes straight to the standard API. + */ +export async function createFireworksRequestWithFallback(params: { + body: ChatCompletionRequestBody + originalModel: string + fetch: typeof globalThis.fetch + logger: Logger +}): Promise { + const { body, originalModel, fetch, logger } = params + const deploymentModelId = FIREWORKS_DEPLOYMENT_MAP[originalModel] + const shouldTryDeployment = + deploymentModelId && isDeploymentHours() && !isDeploymentCoolingDown() + + if (shouldTryDeployment) { + logger.info( + { model: originalModel, deploymentModel: deploymentModelId }, + 'Trying Fireworks custom deployment (business hours)', + ) + const response = await createFireworksRequest({ + body, + originalModel, + fetch, + modelIdOverride: deploymentModelId, + }) + + if (response.status === 503) { + const errorText = await response.text() + if (errorText.includes('DEPLOYMENT_SCALING_UP')) { + logger.info( + { model: originalModel }, + 'Fireworks deployment scaling up, falling back to standard API', + ) + markDeploymentScalingUp() + // Fall through to standard API request below + } else { + // Non-scaling 503 — treat as a real error + throw parseFireworksErrorFromText(response.status, response.statusText, errorText) + } + } else { + return response + } + } + + return createFireworksRequest({ body, originalModel, fetch }) } function creditsToFakeCost(credits: number): number { From 9295e163602d4f432a6d0728dd5a9576dcf4c3d8 Mon Sep 17 00:00:00 2001 From: James Grugett Date: Thu, 12 Mar 2026 15:14:59 -0700 Subject: [PATCH 003/611] Add /connect:chatgpt --- .../LESSONS.md | 42 ++ .../03-02-14:07-chatgpt-oauth-direct/PLAN.md | 104 ++++ .../03-02-14:07-chatgpt-oauth-direct/SPEC.md | 155 ++++++ .agents/skills/meta/SKILL.md | 6 + cli/src/chat.tsx | 21 - .../__tests__/router-connect-chatgpt.test.ts | 87 ++++ .../commands/__tests__/router-input.test.ts | 18 + cli/src/commands/command-registry.ts | 14 + cli/src/commands/router.ts | 24 + cli/src/components/bottom-status-line.tsx | 140 ------ cli/src/components/chatgpt-connect-banner.tsx | 138 +++++ cli/src/components/input-mode-banner.tsx | 5 + cli/src/components/usage-banner.tsx | 13 +- cli/src/data/slash-commands.ts | 11 + cli/src/init/init-app.ts | 12 + cli/src/utils/__tests__/chatgpt-oauth.test.ts | 35 ++ cli/src/utils/chatgpt-oauth.ts | 203 ++++++++ cli/src/utils/input-modes.ts | 10 + common/src/constants/analytics-events.ts | 5 + common/src/constants/chatgpt-oauth.ts | 80 +++ common/src/constants/index.ts | 1 + scripts/chatgpt-oauth-validate.ts | 112 +++++ scripts/test-openai-token-count.ts | 471 ++++++++++++++++++ sdk/src/__tests__/credentials.test.ts | 194 ++++++++ sdk/src/__tests__/env.test.ts | 25 +- sdk/src/__tests__/model-provider.test.ts | 38 ++ sdk/src/credentials.ts | 205 +++++++- sdk/src/env.ts | 8 + .../llm-chatgpt-oauth-policy.test.ts | 67 +++ .../model-provider-free-mode.test.ts | 107 ++++ sdk/src/impl/llm.ts | 211 +++++++- sdk/src/impl/model-provider.ts | 124 ++++- sdk/src/index.ts | 5 +- 33 files changed, 2496 insertions(+), 195 deletions(-) create mode 100644 .agents/sessions/03-02-14:07-chatgpt-oauth-direct/LESSONS.md create mode 100644 .agents/sessions/03-02-14:07-chatgpt-oauth-direct/PLAN.md create mode 100644 .agents/sessions/03-02-14:07-chatgpt-oauth-direct/SPEC.md create mode 100644 cli/src/commands/__tests__/router-connect-chatgpt.test.ts delete mode 100644 cli/src/components/bottom-status-line.tsx create mode 100644 cli/src/components/chatgpt-connect-banner.tsx create mode 100644 cli/src/utils/__tests__/chatgpt-oauth.test.ts create mode 100644 cli/src/utils/chatgpt-oauth.ts create mode 100644 common/src/constants/chatgpt-oauth.ts create mode 100644 scripts/chatgpt-oauth-validate.ts create mode 100644 scripts/test-openai-token-count.ts create mode 100644 sdk/src/impl/__tests__/llm-chatgpt-oauth-policy.test.ts create mode 100644 sdk/src/impl/__tests__/model-provider-free-mode.test.ts diff --git a/.agents/sessions/03-02-14:07-chatgpt-oauth-direct/LESSONS.md b/.agents/sessions/03-02-14:07-chatgpt-oauth-direct/LESSONS.md new file mode 100644 index 0000000000..0dbb6fd5b9 --- /dev/null +++ b/.agents/sessions/03-02-14:07-chatgpt-oauth-direct/LESSONS.md @@ -0,0 +1,42 @@ +# LESSONS — ChatGPT OAuth Direct Routing + +Session: `.agents/sessions/03-02-14:07-chatgpt-oauth-direct/` + +## What went well +- Building this feature behind a strict feature flag (`CHATGPT_OAUTH_ENABLED=false`) reduced rollout risk while allowing full end-to-end wiring. +- Reusing the Claude OAuth architectural pattern (credentials helpers, refresh mutex, routing split) accelerated implementation without coupling the two providers. +- Splitting policy logic into `classifyChatGptOAuthStreamError` made fallback/auth/fail-fast behavior easier to test and reason about. +- Adding focused CLI tests for `/connect:chatgpt` gating and utility sanitization caught regression risk early. + +## Current confidence / known gaps +- Runtime ChatGPT stream policy is **partially tested**: `classifyChatGptOAuthStreamError` is covered, but we do not yet have full behavioral tests for `promptAiSdkStream` recursion branches (actual fallback recursion and post-partial-output behavior). +- CLI routing coverage is strongest for **feature-flag OFF** paths; flag-ON auth-code routing should get explicit dedicated tests in a future pass. + +## What was tricky +- The repo had unrelated local drift during implementation; explicit scope cleanup (`git checkout -- `) was necessary to avoid accidental cross-feature commits. +- CLI module mocking is path-sensitive. Test modules under `cli/src/commands/__tests__` must mock sibling modules with correct relative paths (e.g. `../../state/chat-store`), or mocks silently fail. +- Over-mocking analytics can break transitive imports (`setAnalyticsErrorLogger` export expectations). A safe pattern is spreading real analytics exports and overriding only `trackEvent`. + +## Unexpected behaviors / gotchas +- A staged unrelated file can survive despite working-tree revert; both staged and worktree states must be checked before final handoff. +- “Looks correct” tests can still miss runtime branches if they only validate helper classification, not route wiring; reviewer loops were useful to force coverage on practical paths. +- For OAuth tooling/scripts, sanitize error text aggressively. Returning status-only errors avoids accidental token payload leakage. + +## Useful patterns discovered +- Keep direct-provider routing stream-only initially; explicitly forcing non-streaming/structured calls to backend avoided broad compatibility risk. +- Use deterministic model allowlist + normalization mapping in constants to avoid relying on provider-side parsing/errors for unsupported models. +- Treat temporary protocol validation scripts as first-class validation artifacts: they are valuable for real-account smoke checks without coupling to full CLI runtime. + +## Temporary script disposition +- `scripts/chatgpt-oauth-validate.ts` is currently kept as a **dev utility** for manual protocol revalidation while the feature remains experimental/off by default. +- Removal criteria: if protocol endpoints are either officially documented or the CLI flow gets stable automated integration coverage, this script can be retired. + +## Repeatable security verification +- For redaction checks, run targeted searches against changed code/log handling paths for sensitive markers before handoff, e.g. `access_token`, `refresh_token`, and `Authorization: Bearer`. +- Keep surfaced token exchange errors status-only and avoid echoing raw provider response bodies. + +## Follow-up improvements worth considering +- Add deeper runtime-behavior tests for `promptAiSdkStream` recursive fallback branches (not just policy classifier). +- Add explicit CLI test for flag-ON connect flow path once flag toggling is test-harness friendly. +- If feature graduates from experimental, add richer direct-path observability while preserving strict token redaction. +- Add periodic protocol drift checks (authorize/token/callback PKCE assumptions) before enabling the feature flag in production defaults. diff --git a/.agents/sessions/03-02-14:07-chatgpt-oauth-direct/PLAN.md b/.agents/sessions/03-02-14:07-chatgpt-oauth-direct/PLAN.md new file mode 100644 index 0000000000..9684c95329 --- /dev/null +++ b/.agents/sessions/03-02-14:07-chatgpt-oauth-direct/PLAN.md @@ -0,0 +1,104 @@ +# PLAN — ChatGPT Subscription OAuth Direct Routing + +## Implementation Steps +1. **Add shared ChatGPT OAuth constants** + - Create `common/src/constants/chatgpt-oauth.ts` with: + - feature flag (`CHATGPT_OAUTH_ENABLED=false`) + - endpoints/client id/redirect URI/env var + - model allowlist + normalization helpers + - Export through `common/src/constants/index.ts`. + +2. **Build core OAuth utility + temporary protocol validation script (early gate)** + - Create `cli/src/utils/chatgpt-oauth.ts` with PKCE URL generation, browser-open helper, pasted code/URL parsing, token exchange helper. + - Create `scripts/chatgpt-oauth-validate.ts` to test OAuth URL generation + paste parsing + token exchange interaction. + - **Run this script before full integration** as go/no-go checkpoint for endpoint assumptions. + +3. **Add SDK env + credential support** + - Extend `sdk/src/env.ts` with `getChatGptOAuthTokenFromEnv()`. + - Extend `sdk/src/credentials.ts` with `chatgptOAuth` schema and helpers: + - get/save/clear + - valid-check + refresh mutex + - get-valid-with-refresh + - Preserve all non-target credentials in read/write operations. + +4. **Add CLI connect flow UI and command routing** + - Create `cli/src/components/chatgpt-connect-banner.tsx` with state machine + `handleChatGptAuthCode`. + - Update input modes (`connect:chatgpt`) and banner registry. + - Add `/connect:chatgpt` command + alias handling and slash command entry (feature-gated). + - Extend router to process pasted auth code in `connect:chatgpt` mode. + - Verify command visibility: hidden when flag OFF, present when flag ON. + +5. **Implement direct routing primitives in model-provider (decomposed)** + - 5.1 Add ChatGPT direct eligibility checks (feature flag + creds + model scope + skip flag + rate-limit cache state). + - 5.2 Add model normalization + prevalidation helpers (OpenRouter-style -> provider-native). + - 5.3 Add strict payload sanitization helper for direct requests. + - 5.4 Add ChatGPT OAuth direct model construction using OpenAI-compatible transport. + - 5.5 Add ChatGPT rate-limit cache helpers (parallel to Claude cache pattern). + - Keep Claude OAuth path unchanged. + +6. **Update stream execution + fallback/error policy** + - Extend `sdk/src/impl/llm.ts` to: + - recognize ChatGPT direct route usage + - emit ChatGPT OAuth analytics + - fallback only on rate-limit errors + - fail with reconnect guidance on auth errors + - fail fast for all other direct errors + - skip cost accounting for successful ChatGPT direct requests + - avoid fallback once output has already streamed + +7. **Wire startup refresh + CLI status surfacing** + - Update `cli/src/init/init-app.ts` for background ChatGPT OAuth credential refresh when enabled. + - Update `cli/src/chat.tsx`, `cli/src/components/bottom-status-line.tsx`, and `cli/src/components/usage-banner.tsx` to surface ChatGPT connection/active status. + +8. **Add analytics constants + SDK exports** + - Extend `common/src/constants/analytics-events.ts` with ChatGPT OAuth request/rate-limit/auth-error events. + - Ensure SDK exports newly needed helper(s) in `sdk/src/index.ts`. + +9. **Add/adjust tests (explicit matrix)** + - SDK credentials tests: + - env precedence + - persisted read/write/clear + - refresh success/failure + mutex + - Model-provider tests: + - rate-limit cache lifecycle + - allowlist prevalidation + unsupported-model error + - normalization behavior for mapped/unknown variants + - LLM routing/fallback tests (targeted): + - 429 fallback + - 401/403 no-fallback + reconnect path + - timeout/5xx fail-fast + - no fallback after content emitted + - CLI tests/wiring checks: + - command/mode visibility by feature flag + - connect mode routing and handler call. + - Non-streaming/structured guard check: + - confirm backend-only behavior unchanged. + +10. **Validation and cleanup decision for temporary script** + - Run targeted tests/typechecks for touched packages. + - Run OAuth validation script in manual mode (with your account interaction if needed). + - Decide and apply final disposition of temporary script: + - keep as dev utility, or + - remove before finalization. + +11. **Security/redaction verification** + - Validate no token values are logged in direct feature code paths. + - Grep/check for accidental logging of authorization headers, token payload fields, or raw callback query params. + +## Dependencies / Ordering +- Step 1 must be first. +- Step 2 must run before deep integration (early protocol validation gate). +- Step 3 precedes Steps 5–7. +- Step 4 can run in parallel with Step 3 after constants/util setup. +- Step 5 must precede Step 6. +- Step 8 can be implemented alongside Steps 5–6 but must complete before final validation. +- Step 9 follows core implementation completion. +- Steps 10–11 are final validation/cleanup/security passes. + +## Risk Areas +1. **Unofficial OAuth contract drift** — endpoint/field incompatibility can break token exchange. +2. **Direct payload compatibility** — strict sanitization must retain required OpenAI fields. +3. **Error classification correctness** — misclassification can violate requested fallback policy. +4. **Model normalization accuracy** — wrong mapping yields avoidable provider failures. +5. **Token redaction** — avoid leakage in logs, errors, or analytics payloads. +6. **Streaming boundary behavior** — fallback must not happen after partial output is emitted. diff --git a/.agents/sessions/03-02-14:07-chatgpt-oauth-direct/SPEC.md b/.agents/sessions/03-02-14:07-chatgpt-oauth-direct/SPEC.md new file mode 100644 index 0000000000..d56a415caf --- /dev/null +++ b/.agents/sessions/03-02-14:07-chatgpt-oauth-direct/SPEC.md @@ -0,0 +1,155 @@ +# SPEC — ChatGPT Subscription OAuth Direct Routing + +## Overview +Implement an **experimental, default-disabled** ChatGPT subscription OAuth feature that allows the local CLI to route eligible OpenAI-model **streaming** requests directly to OpenAI instead of Codebuff backend routing, mirroring the prior Claude OAuth architecture pattern. + +## Protocol Assumptions (Explicit) +Because this is unofficial/experimental, this implementation proceeds under the following explicit assumptions: + +1. OAuth authorize endpoint: `https://auth.openai.com/oauth/authorize` +2. OAuth token endpoint: `https://auth.openai.com/oauth/token` +3. Public client id is configurable constant, defaulting to Codex-compatible value from ecosystem references. +4. PKCE (`S256`) is required. +5. Redirect URI is pinned to: `http://localhost:1455/auth/callback` +6. User can paste either: + - raw authorization code, or + - full callback URL containing code/state query params. +7. Token response includes at least `access_token`, optional `refresh_token`, and expiry info (`expires_in` or equivalent). +8. Refresh uses standard `grant_type=refresh_token`. + +If any assumption fails at runtime, the feature fails with explicit guidance and remains safely fallbackable only where policy allows. + +## Requirements +1. Add ChatGPT OAuth feature set, default disabled behind `CHATGPT_OAUTH_ENABLED = false`. +2. Add a new CLI command and mode: `/connect:chatgpt` with dedicated banner flow. +3. Implement browser-based PKCE code-paste flow (no device-code flow in this iteration). +4. Keep user-facing warning minimal (per user preference), while leaving code comments clearly marking experimental nature. +5. Store ChatGPT OAuth credentials in local credentials JSON alongside existing credentials. +6. Support env-var token override (power-user/automation use), but env var **must not bypass feature flag**. +7. Add refresh-token support with concurrency guard (mutex) for persisted credentials. +8. Direct routing scope is **streaming only** (`promptAiSdkStream` path); non-streaming and structured stay backend-routed. +9. Add model allowlist for direct routing; include optimistic aliases: + - `openai/gpt-5.3` + - `openai/gpt-5.3-codex` + - `openai/gpt-5.2` + - `openai/gpt-5.2-codex` + - plus selected nearby GPT/Codex IDs already present in repo config. +10. Provide deterministic model normalization for direct requests (OpenRouter-style -> provider-native): + - Example: `openai/gpt-5.3-codex` -> `gpt-5.3-codex` + - Mapping table lives in constants and is used for prevalidation. +11. Unsupported model handling must be deterministic and prevalidated: + - if model is not in allowlist/mapping for direct route, fail with explicit unsupported-model error (no fallback). +12. Fallback policy: + - Rate-limit/overload classification: auto-fallback to Codebuff backend. + - Auth errors (401/403): fail explicitly with reconnect guidance (no fallback). + - All other direct errors: fail fast (no fallback), per user decision. +13. Successful direct ChatGPT OAuth requests do **not** consume Codebuff credits. +14. Add lightweight ChatGPT connection status surfacing in CLI (usage banner and/or bottom status line), without quota API dependency. +15. Preserve existing Claude OAuth behavior unchanged. +16. Add temporary OAuth validation script that tests auth URL generation + token exchange manually before/alongside full wiring. +17. Add/update tests for credential parsing/storage/refresh, model gating, routing/fallback classification, and CLI command/mode wiring. +18. Never log OAuth tokens in analytics or error logs. + +## Direct Request Transformation Rules +Before sending direct streaming requests to OpenAI, enforce strict sanitization: + +1. Rewrite `model` from `openai/*` format to provider-native mapped id. +2. Remove provider-specific/non-OpenAI fields (e.g., codebuff metadata/provider routing payloads). +3. Preserve fields known to be valid for OpenAI-compatible chat completions. +4. Do not inject Codex-specific required prefix by default in v1 (user preference), but structure code so optional future injection is easy. + +## Error Classification Table +| Class | Detection | Behavior | +|---|---|---| +| Rate limit | HTTP 429 or message/body contains rate-limit indicators | Fallback to backend (if no output emitted yet) | +| Auth | HTTP 401/403 or auth-token-invalid indicators | Fail with reconnect guidance; no fallback | +| Unsupported model | Local allowlist/mapping precheck failure | Fail explicit unsupported-model error; no fallback | +| Other | Network timeout, 5xx, malformed payload, unknown 4xx | Fail fast; no fallback | + +## Routing Scope +1. Direct routing applies only to `promptAiSdkStream` eligible requests. +2. `promptAiSdk` and `promptAiSdkStructured` remain backend-only for this iteration. +3. Backend routing remains unchanged for all non-eligible models and when feature disabled/disconnected. + +## Credentials & Precedence Rules +1. Credentials file schema extends with `chatgptOAuth` object. +2. Precedence: env token override > persisted OAuth credentials > none. +3. Env token produces synthetic non-refreshing credentials object. +4. Persisted credentials refresh when expired/near-expiry (5-minute buffer). +5. On refresh failure for persisted credentials, clear only `chatgptOAuth` entry (preserve other credentials). + +## Feature Gating Matrix +1. `CHATGPT_OAUTH_ENABLED = false` + - hide `/connect:chatgpt` command and banner UX + - disable direct routing even if env token exists +2. `CHATGPT_OAUTH_ENABLED = true` and credentials available + - enable command/UI + - enable direct routing for eligible models + +## Logging/Redaction Requirements +1. Never log raw access tokens, refresh tokens, authorization headers, or token response payloads. +2. If callback URL is logged for debugging, redact query values for `code`, `access_token`, `refresh_token`, and similar sensitive keys. +3. Analytics properties must not include token-bearing strings. + +## Technical Approach +1. Create `common/src/constants/chatgpt-oauth.ts`: + - feature flag, endpoints, client id, redirect URI, env var name, model allowlist/mapping helpers. +2. Export new constants via `common/src/constants/index.ts` so legacy `old-constants` re-export path includes them. +3. Extend `sdk/src/env.ts` with ChatGPT OAuth env-token helper. +4. Extend `sdk/src/credentials.ts` with ChatGPT OAuth schema+helpers mirroring Claude pattern. +5. Create `cli/src/utils/chatgpt-oauth.ts` for PKCE start/open/exchange/disconnect/status. +6. Create `cli/src/components/chatgpt-connect-banner.tsx` and auth-code handler. +7. Wire CLI command/input mode/slash menu/router/banner registry for `connect:chatgpt`. +8. Extend model provider (`sdk/src/impl/model-provider.ts`): + - add ChatGPT direct route decision path for `openai/*` allowlisted models + - add rate-limit cache helpers for ChatGPT path + - build direct OpenAI-compatible language model with OAuth bearer auth + - enforce strict body sanitization + model normalization in the direct path. +9. Extend stream error handling (`sdk/src/impl/llm.ts`) for ChatGPT direct path with required fallback/fail rules and analytics. +10. Extend app init (`cli/src/init/init-app.ts`) for background ChatGPT credential refresh when enabled. +11. Add analytics events for ChatGPT OAuth request/rate-limit/auth-error. +12. Update usage/status UI text to include ChatGPT connection state. +13. Add temporary validation script (e.g., `scripts/chatgpt-oauth-validate.ts`) to exercise OAuth setup interactively. + +## Acceptance Criteria +1. With feature disabled, `/connect:chatgpt` is unavailable and no direct routing occurs. +2. With feature enabled, user can run `/connect:chatgpt`, complete browser flow, paste code/URL, and connect. +3. Eligible streaming requests on allowlisted `openai/*` models use direct OAuth path. +4. Direct request payloads are sanitized and model ids normalized before transmission. +5. Rate-limited direct requests fallback to backend automatically. +6. Auth failures produce reconnect guidance and do not fallback. +7. Unsupported models fail immediately with explicit unsupported-model message. +8. Successful direct requests skip Codebuff credit accounting path. +9. Existing Claude OAuth flow remains behaviorally unchanged. +10. New/updated tests pass for touched behavior. +11. Temporary validation script can run and guide manual OAuth exchange checks. + +## Files to Create/Modify +- Create: `common/src/constants/chatgpt-oauth.ts` +- Create: `cli/src/utils/chatgpt-oauth.ts` +- Create: `cli/src/components/chatgpt-connect-banner.tsx` +- Create: `scripts/chatgpt-oauth-validate.ts` (temporary validation utility) +- Modify: `common/src/constants/index.ts` +- Modify: `common/src/constants/analytics-events.ts` +- Modify: `sdk/src/env.ts` +- Modify: `sdk/src/credentials.ts` +- Modify: `sdk/src/impl/model-provider.ts` +- Modify: `sdk/src/impl/llm.ts` +- Modify: `sdk/src/index.ts` +- Modify: `cli/src/utils/input-modes.ts` +- Modify: `cli/src/components/input-mode-banner.tsx` +- Modify: `cli/src/data/slash-commands.ts` +- Modify: `cli/src/commands/command-registry.ts` +- Modify: `cli/src/commands/router.ts` +- Modify: `cli/src/chat.tsx` +- Modify: `cli/src/components/usage-banner.tsx` +- Modify: `cli/src/components/bottom-status-line.tsx` +- Modify: `cli/src/init/init-app.ts` +- Modify tests in SDK/CLI for new behavior. + +## Out of Scope +1. Device-code auth flow. +2. Legal/policy guarantees around undocumented endpoints. +3. Full quota/usage API integration for ChatGPT subscription plans. +4. Local callback server daemon beyond paste-based flow. +5. Enabling feature by default. diff --git a/.agents/skills/meta/SKILL.md b/.agents/skills/meta/SKILL.md index a66b88dafb..8b05efdddf 100644 --- a/.agents/skills/meta/SKILL.md +++ b/.agents/skills/meta/SKILL.md @@ -10,3 +10,9 @@ description: Broad project-level implementation and validation heuristics - From monorepo root, run workspace scripts as `bun run --cwd