From 74fd76df6be6fed6ffc63160c715441f8606b7c3 Mon Sep 17 00:00:00 2001 From: Matt Aitken Date: Thu, 26 Mar 2026 16:22:42 +0000 Subject: [PATCH 01/34] Implement organization-scoped ClickHouse instances The only way to get a ClickHouse client now is through the factory. Refactored all existing code to use that and pass in an org. The runReplication and otlpExporter are the hot paths here which need special attention in reviews. --- .cursor/mcp.json | 6 +- .../organization-scoped-clickhouse.md | 6 + .../v3/ApiRunListPresenter.server.ts | 5 +- .../v3/CreateBulkActionPresenter.server.ts | 5 +- .../v3/RunTagListPresenter.server.ts | 5 +- .../presenters/v3/TaskListPresenter.server.ts | 25 +- .../presenters/v3/UsagePresenter.server.ts | 5 +- .../v3/ViewSchedulePresenter.server.ts | 5 +- .../v3/WaitpointPresenter.server.ts | 5 +- .../route.tsx | 8 +- .../route.tsx | 5 +- .../route.tsx | 5 +- .../route.tsx | 5 +- .../route.tsx | 5 +- .../route.tsx | 5 +- .../route.tsx | 7 +- .../route.tsx | 5 +- .../route.tsx | 5 +- .../route.tsx | 5 +- .../route.tsx | 5 +- .../webapp/app/routes/api.v1.prompts.$slug.ts | 15 +- .../routes/api.v1.prompts.$slug.versions.ts | 12 +- .../app/routes/api.v1.prompts._index.ts | 5 +- apps/webapp/app/routes/otel.v1.logs.ts | 5 +- apps/webapp/app/routes/otel.v1.metrics.ts | 5 +- apps/webapp/app/routes/otel.v1.traces.ts | 5 +- ...projectParam.env.$envParam.logs.$logId.tsx | 5 +- ...ojects.$projectParam.env.$envParam.logs.ts | 5 +- ...nvParam.prompts.$promptSlug.generations.ts | 5 +- .../services/admin/missingLlmModels.server.ts | 10 +- .../clickhouseCredentialsService.server.ts | 109 +++++ .../clickhouse/clickhouseFactory.server.ts | 422 ++++++++++++++++++ .../clickhouse/clickhouseFactory.test.ts | 155 +++++++ .../clickhouseSecretSchemas.server.ts | 11 + .../app/services/clickhouseInstance.server.ts | 130 ------ .../app/services/queryService.server.ts | 5 +- .../services/runsReplicationService.server.ts | 107 ++++- apps/webapp/app/v3/otlpExporter.server.ts | 47 +- .../v3/services/bulk/BulkActionV2.server.ts | 8 +- 39 files changed, 959 insertions(+), 234 deletions(-) create mode 100644 .server-changes/organization-scoped-clickhouse.md create mode 100644 apps/webapp/app/services/clickhouse/clickhouseCredentialsService.server.ts create mode 100644 apps/webapp/app/services/clickhouse/clickhouseFactory.server.ts create mode 100644 apps/webapp/app/services/clickhouse/clickhouseFactory.test.ts create mode 100644 apps/webapp/app/services/clickhouse/clickhouseSecretSchemas.server.ts delete mode 100644 apps/webapp/app/services/clickhouseInstance.server.ts diff --git a/.cursor/mcp.json b/.cursor/mcp.json index da39e4ffafe..c4b06a67630 100644 --- a/.cursor/mcp.json +++ b/.cursor/mcp.json @@ -1,3 +1,7 @@ { - "mcpServers": {} + "mcpServers": { + "linear": { + "url": "https://mcp.linear.app/mcp" + } + } } diff --git a/.server-changes/organization-scoped-clickhouse.md b/.server-changes/organization-scoped-clickhouse.md new file mode 100644 index 00000000000..874b9dc6026 --- /dev/null +++ b/.server-changes/organization-scoped-clickhouse.md @@ -0,0 +1,6 @@ +--- +area: webapp +type: feature +--- + +Organization-scoped ClickHouse routing enables customers with HIPAA and other data security requirements to use dedicated database instances diff --git a/apps/webapp/app/presenters/v3/ApiRunListPresenter.server.ts b/apps/webapp/app/presenters/v3/ApiRunListPresenter.server.ts index 70b2c78b641..4f9c61b001a 100644 --- a/apps/webapp/app/presenters/v3/ApiRunListPresenter.server.ts +++ b/apps/webapp/app/presenters/v3/ApiRunListPresenter.server.ts @@ -9,7 +9,7 @@ import { type Project, type RuntimeEnvironment, type TaskRunStatus } from "@trig import assertNever from "assert-never"; import { z } from "zod"; import { API_VERSIONS, RunStatusUnspecifiedApiVersion } from "~/api/versions"; -import { clickhouseClient } from "~/services/clickhouseInstance.server"; +import { getClickhouseForOrganization } from "~/services/clickhouse/clickhouseFactory.server"; import { logger } from "~/services/logger.server"; import { CoercedDate } from "~/utils/zod"; import { ServiceValidationError } from "~/v3/services/baseService.server"; @@ -269,7 +269,8 @@ export class ApiRunListPresenter extends BasePresenter { options.machines = searchParams["filter[machine]"]; } - const presenter = new NextRunListPresenter(this._replica, clickhouseClient); + const clickhouse = await getClickhouseForOrganization(organizationId, "standard"); + const presenter = new NextRunListPresenter(this._replica, clickhouse); logger.debug("Calling RunListPresenter", { options }); diff --git a/apps/webapp/app/presenters/v3/CreateBulkActionPresenter.server.ts b/apps/webapp/app/presenters/v3/CreateBulkActionPresenter.server.ts index acf511f0f5e..5e8bfc405b8 100644 --- a/apps/webapp/app/presenters/v3/CreateBulkActionPresenter.server.ts +++ b/apps/webapp/app/presenters/v3/CreateBulkActionPresenter.server.ts @@ -1,6 +1,6 @@ import { type PrismaClient } from "@trigger.dev/database"; import { CreateBulkActionSearchParams } from "~/routes/resources.orgs.$organizationSlug.projects.$projectParam.env.$envParam.runs.bulkaction"; -import { clickhouseClient } from "~/services/clickhouseInstance.server"; +import { getClickhouseForOrganization } from "~/services/clickhouse/clickhouseFactory.server"; import { RunsRepository } from "~/services/runsRepository/runsRepository.server"; import { getRunFiltersFromRequest } from "../RunFilters.server"; import { BasePresenter } from "./basePresenter.server"; @@ -24,8 +24,9 @@ export class CreateBulkActionPresenter extends BasePresenter { Object.fromEntries(new URL(request.url).searchParams) ); + const clickhouse = await getClickhouseForOrganization(organizationId, "standard"); const runsRepository = new RunsRepository({ - clickhouse: clickhouseClient, + clickhouse, prisma: this._replica as PrismaClient, }); diff --git a/apps/webapp/app/presenters/v3/RunTagListPresenter.server.ts b/apps/webapp/app/presenters/v3/RunTagListPresenter.server.ts index e9de368eceb..89b9c8b41fa 100644 --- a/apps/webapp/app/presenters/v3/RunTagListPresenter.server.ts +++ b/apps/webapp/app/presenters/v3/RunTagListPresenter.server.ts @@ -1,6 +1,6 @@ import { RunsRepository } from "~/services/runsRepository/runsRepository.server"; import { BasePresenter } from "./basePresenter.server"; -import { clickhouseClient } from "~/services/clickhouseInstance.server"; +import { getClickhouseForOrganization } from "~/services/clickhouse/clickhouseFactory.server"; import { type PrismaClient } from "@trigger.dev/database"; import { timeFilters } from "~/components/runs/v3/SharedFilters"; @@ -37,8 +37,9 @@ export class RunTagListPresenter extends BasePresenter { }: TagListOptions) { const hasFilters = Boolean(name?.trim()); + const clickhouse = await getClickhouseForOrganization(organizationId, "standard"); const runsRepository = new RunsRepository({ - clickhouse: clickhouseClient, + clickhouse, prisma: this._replica as PrismaClient, }); diff --git a/apps/webapp/app/presenters/v3/TaskListPresenter.server.ts b/apps/webapp/app/presenters/v3/TaskListPresenter.server.ts index fc29f5510e8..ebbe1ca9f90 100644 --- a/apps/webapp/app/presenters/v3/TaskListPresenter.server.ts +++ b/apps/webapp/app/presenters/v3/TaskListPresenter.server.ts @@ -4,7 +4,7 @@ import { type TaskTriggerSource, } from "@trigger.dev/database"; import { $replica } from "~/db.server"; -import { clickhouseClient } from "~/services/clickhouseInstance.server"; +import { getClickhouseForOrganization } from "~/services/clickhouse/clickhouseFactory.server"; import { type AverageDurations, ClickHouseEnvironmentMetricsRepository, @@ -25,10 +25,7 @@ export type TaskListItem = { export type TaskActivity = DailyTaskActivity[string]; export class TaskListPresenter { - constructor( - private readonly environmentMetricsRepository: EnvironmentMetricsRepository, - private readonly _replica: PrismaClientOrTransaction - ) {} + constructor(private readonly _replica: PrismaClientOrTransaction) {} public async call({ organizationId, @@ -77,9 +74,15 @@ export class TaskListPresenter { const slugs = tasks.map((t) => t.slug); + // Create org-specific environment metrics repository + const clickhouse = await getClickhouseForOrganization(organizationId, "standard"); + const environmentMetricsRepository = new ClickHouseEnvironmentMetricsRepository({ + clickhouse, + }); + // IMPORTANT: Don't await these, we want to return the promises // so we can defer the loading of the data - const activity = this.environmentMetricsRepository.getDailyTaskActivity({ + const activity = environmentMetricsRepository.getDailyTaskActivity({ organizationId, projectId, environmentId, @@ -87,7 +90,7 @@ export class TaskListPresenter { tasks: slugs, }); - const runningStats = this.environmentMetricsRepository.getCurrentRunningStats({ + const runningStats = environmentMetricsRepository.getCurrentRunningStats({ organizationId, projectId, environmentId, @@ -95,7 +98,7 @@ export class TaskListPresenter { tasks: slugs, }); - const durations = this.environmentMetricsRepository.getAverageDurations({ + const durations = environmentMetricsRepository.getAverageDurations({ organizationId, projectId, environmentId, @@ -110,9 +113,5 @@ export class TaskListPresenter { export const taskListPresenter = singleton("taskListPresenter", setupTaskListPresenter); function setupTaskListPresenter() { - const environmentMetricsRepository = new ClickHouseEnvironmentMetricsRepository({ - clickhouse: clickhouseClient, - }); - - return new TaskListPresenter(environmentMetricsRepository, $replica); + return new TaskListPresenter($replica); } diff --git a/apps/webapp/app/presenters/v3/UsagePresenter.server.ts b/apps/webapp/app/presenters/v3/UsagePresenter.server.ts index 2fac95617a6..c4654e870ed 100644 --- a/apps/webapp/app/presenters/v3/UsagePresenter.server.ts +++ b/apps/webapp/app/presenters/v3/UsagePresenter.server.ts @@ -4,7 +4,7 @@ import { getUsage, getUsageSeries } from "~/services/platform.v3.server"; import { createTimeSeriesData } from "~/utils/graphs"; import { BasePresenter } from "./basePresenter.server"; import { DataPoint, linear } from "regression"; -import { clickhouseClient } from "~/services/clickhouseInstance.server"; +import { getClickhouseForOrganization } from "~/services/clickhouse/clickhouseFactory.server"; type Options = { organizationId: string; @@ -124,7 +124,8 @@ async function getTaskUsageByOrganization( endOfMonth: Date, replica: PrismaClientOrTransaction ) { - const [queryError, tasks] = await clickhouseClient.taskRuns.getTaskUsageByOrganization({ + const clickhouse = await getClickhouseForOrganization(organizationId, "standard"); + const [queryError, tasks] = await clickhouse.taskRuns.getTaskUsageByOrganization({ startTime: startOfMonth.getTime(), endTime: endOfMonth.getTime(), organizationId, diff --git a/apps/webapp/app/presenters/v3/ViewSchedulePresenter.server.ts b/apps/webapp/app/presenters/v3/ViewSchedulePresenter.server.ts index f0e955fd04d..52ebad96b4e 100644 --- a/apps/webapp/app/presenters/v3/ViewSchedulePresenter.server.ts +++ b/apps/webapp/app/presenters/v3/ViewSchedulePresenter.server.ts @@ -1,7 +1,7 @@ import { ScheduleObject } from "@trigger.dev/core/v3"; import { PrismaClient, prisma } from "~/db.server"; import { displayableEnvironment } from "~/models/runtimeEnvironment.server"; -import { clickhouseClient } from "~/services/clickhouseInstance.server"; +import { getClickhouseForOrganization } from "~/services/clickhouse/clickhouseFactory.server"; import { nextScheduledTimestamps } from "~/v3/utils/calculateNextSchedule.server"; import { NextRunListPresenter } from "./NextRunListPresenter.server"; import { scheduleWhereClause } from "~/models/schedules.server"; @@ -75,7 +75,8 @@ export class ViewSchedulePresenter { ? nextScheduledTimestamps(schedule.generatorExpression, schedule.timezone, new Date(), 5) : []; - const runPresenter = new NextRunListPresenter(this.#prismaClient, clickhouseClient); + const clickhouse = await getClickhouseForOrganization(schedule.project.organizationId, "standard"); + const runPresenter = new NextRunListPresenter(this.#prismaClient, clickhouse); const { runs } = await runPresenter.call(schedule.project.organizationId, environmentId, { projectId: schedule.project.id, scheduleId: schedule.id, diff --git a/apps/webapp/app/presenters/v3/WaitpointPresenter.server.ts b/apps/webapp/app/presenters/v3/WaitpointPresenter.server.ts index 9abcdf32215..15eaef0d13d 100644 --- a/apps/webapp/app/presenters/v3/WaitpointPresenter.server.ts +++ b/apps/webapp/app/presenters/v3/WaitpointPresenter.server.ts @@ -1,5 +1,5 @@ import { isWaitpointOutputTimeout, prettyPrintPacket } from "@trigger.dev/core/v3"; -import { clickhouseClient } from "~/services/clickhouseInstance.server"; +import { getClickhouseForOrganization } from "~/services/clickhouse/clickhouseFactory.server"; import { generateHttpCallbackUrl } from "~/services/httpCallback.server"; import { logger } from "~/services/logger.server"; import { BasePresenter } from "./basePresenter.server"; @@ -79,7 +79,8 @@ export class WaitpointPresenter extends BasePresenter { const connectedRuns: NextRunListItem[] = []; if (connectedRunIds.length > 0) { - const runPresenter = new NextRunListPresenter(this._prisma, clickhouseClient); + const clickhouse = await getClickhouseForOrganization(waitpoint.environment.organizationId, "standard"); + const runPresenter = new NextRunListPresenter(this._prisma, clickhouse); const { runs } = await runPresenter.call( waitpoint.environment.organizationId, environmentId, diff --git a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.dashboards.$dashboardKey/route.tsx b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.dashboards.$dashboardKey/route.tsx index 283be35e50b..533b0630467 100644 --- a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.dashboards.$dashboardKey/route.tsx +++ b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.dashboards.$dashboardKey/route.tsx @@ -34,7 +34,7 @@ import { MetricDashboardPresenter, } from "~/presenters/v3/MetricDashboardPresenter.server"; import { PromptPresenter } from "~/presenters/v3/PromptPresenter.server"; -import { clickhouseClient } from "~/services/clickhouseInstance.server"; +import { getClickhouseForOrganization } from "~/services/clickhouse/clickhouseFactory.server"; import { requireUser } from "~/services/session.server"; import { cn } from "~/utils/cn"; import { EnvironmentParamSchema } from "~/utils/pathBuilder"; @@ -77,10 +77,12 @@ export const loader = async ({ request, params }: LoaderFunctionArgs) => { const filters = dashboard.filters ?? ["tasks", "queues"]; + const clickhouse = await getClickhouseForOrganization(project.organizationId, "standard"); + // Load distinct models from ClickHouse if the dashboard has a models filter let possibleModels: { model: string; system: string }[] = []; if (filters.includes("models")) { - const queryFn = clickhouseClient.reader.query({ + const queryFn = clickhouse.reader.query({ name: "getDistinctModels", query: `SELECT response_model, any(gen_ai_system) AS gen_ai_system FROM trigger_dev.llm_metrics_v1 WHERE organization_id = {organizationId: String} AND project_id = {projectId: String} AND environment_id = {environmentId: String} AND response_model != '' GROUP BY response_model ORDER BY response_model`, params: z.object({ @@ -100,7 +102,7 @@ export const loader = async ({ request, params }: LoaderFunctionArgs) => { } } - const promptPresenter = new PromptPresenter(clickhouseClient); + const promptPresenter = new PromptPresenter(clickhouse); const [possiblePrompts, possibleOperations, possibleProviders] = await Promise.all([ filters.includes("prompts") ? promptPresenter.getDistinctPromptSlugs(project.organizationId, project.id, environment.id) diff --git a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.errors._index/route.tsx b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.errors._index/route.tsx index 80392208886..7a54637f703 100644 --- a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.errors._index/route.tsx +++ b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.errors._index/route.tsx @@ -71,7 +71,7 @@ import { type ErrorOccurrences, type ErrorsList as ErrorsListData, } from "~/presenters/v3/ErrorsListPresenter.server"; -import { logsClickhouseClient } from "~/services/clickhouseInstance.server"; +import { getClickhouseForOrganization } from "~/services/clickhouse/clickhouseFactory.server"; import { getCurrentPlan } from "~/services/platform.v3.server"; import { requireUser } from "~/services/session.server"; import { formatNumberCompact } from "~/utils/numberFormatter"; @@ -124,7 +124,8 @@ export const loader = async ({ request, params }: LoaderFunctionArgs) => { const plan = await getCurrentPlan(project.organizationId); const retentionLimitDays = plan?.v3Subscription?.plan?.limits.logRetentionDays.number ?? 30; - const presenter = new ErrorsListPresenter($replica, logsClickhouseClient); + const logsClickhouse = await getClickhouseForOrganization(project.organizationId, "logs"); + const presenter = new ErrorsListPresenter($replica, logsClickhouse); const listPromise = presenter .call(project.organizationId, environment.id, { diff --git a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.logs/route.tsx b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.logs/route.tsx index 28e49a014ff..d0138ca0dbd 100644 --- a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.logs/route.tsx +++ b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.logs/route.tsx @@ -16,7 +16,7 @@ import { findEnvironmentBySlug } from "~/models/runtimeEnvironment.server"; import { LogsListPresenter, LogEntry } from "~/presenters/v3/LogsListPresenter.server"; import type { LogLevel } from "~/utils/logUtils"; import { $replica, prisma } from "~/db.server"; -import { logsClickhouseClient } from "~/services/clickhouseInstance.server"; +import { getClickhouseForOrganization } from "~/services/clickhouse/clickhouseFactory.server"; import { NavBar, PageTitle } from "~/components/primitives/PageHeader"; import { PageBody, PageContainer } from "~/components/layout/AppLayout"; import { Suspense, useCallback, useEffect, useMemo, useRef, useState, useTransition } from "react"; @@ -137,7 +137,8 @@ export const loader = async ({ request, params }: LoaderFunctionArgs) => { const plan = await getCurrentPlan(project.organizationId); const retentionLimitDays = plan?.v3Subscription?.plan?.limits.logRetentionDays.number ?? 30; - const presenter = new LogsListPresenter($replica, logsClickhouseClient); + const logsClickhouse = await getClickhouseForOrganization(project.organizationId, "logs"); + const presenter = new LogsListPresenter($replica, logsClickhouse); const listPromise = presenter .call(project.organizationId, environment.id, { diff --git a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.models.$modelId/route.tsx b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.models.$modelId/route.tsx index 7a25f996d4d..4256c64d455 100644 --- a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.models.$modelId/route.tsx +++ b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.models.$modelId/route.tsx @@ -28,7 +28,7 @@ import type { QueryWidgetConfig } from "~/components/metrics/QueryWidget"; import { findProjectBySlug } from "~/models/project.server"; import { findEnvironmentBySlug } from "~/models/runtimeEnvironment.server"; import { ModelRegistryPresenter } from "~/presenters/v3/ModelRegistryPresenter.server"; -import { clickhouseClient } from "~/services/clickhouseInstance.server"; +import { getClickhouseForOrganization } from "~/services/clickhouse/clickhouseFactory.server"; import { requireUserId } from "~/services/session.server"; import { useOrganization } from "~/hooks/useOrganizations"; import { useProject } from "~/hooks/useProject"; @@ -68,7 +68,8 @@ export const loader = async ({ request, params }: LoaderFunctionArgs) => { throw new Response("Environment not found", { status: 404 }); } - const presenter = new ModelRegistryPresenter(clickhouseClient); + const clickhouse = await getClickhouseForOrganization(project.organizationId, "standard"); + const presenter = new ModelRegistryPresenter(clickhouse); const model = await presenter.getModelDetail(modelId); if (!model) { diff --git a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.models._index/route.tsx b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.models._index/route.tsx index 344d9eefb11..5e3e3e4b600 100644 --- a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.models._index/route.tsx +++ b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.models._index/route.tsx @@ -73,7 +73,7 @@ import { type PopularModel, ModelRegistryPresenter, } from "~/presenters/v3/ModelRegistryPresenter.server"; -import { clickhouseClient } from "~/services/clickhouseInstance.server"; +import { getClickhouseForOrganization } from "~/services/clickhouse/clickhouseFactory.server"; import { requireUserId } from "~/services/session.server"; import { useEnvironment } from "~/hooks/useEnvironment"; import { useOrganization } from "~/hooks/useOrganizations"; @@ -112,7 +112,8 @@ export const loader = async ({ request, params }: LoaderFunctionArgs) => { throw new Response("Environment not found", { status: 404 }); } - const presenter = new ModelRegistryPresenter(clickhouseClient); + const clickhouse = await getClickhouseForOrganization(project.organizationId, "standard"); + const presenter = new ModelRegistryPresenter(clickhouse); const catalog = await presenter.getModelCatalog(); const now = new Date(); diff --git a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.models.compare/route.tsx b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.models.compare/route.tsx index 661fb294268..879dcf47e6b 100644 --- a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.models.compare/route.tsx +++ b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.models.compare/route.tsx @@ -20,7 +20,7 @@ import { type ModelComparisonItem, ModelRegistryPresenter, } from "~/presenters/v3/ModelRegistryPresenter.server"; -import { clickhouseClient } from "~/services/clickhouseInstance.server"; +import { getClickhouseForOrganization } from "~/services/clickhouse/clickhouseFactory.server"; import { requireUserId } from "~/services/session.server"; import { useOrganization } from "~/hooks/useOrganizations"; import { useProject } from "~/hooks/useProject"; @@ -55,7 +55,8 @@ export const loader = async ({ request, params }: LoaderFunctionArgs) => { return typedjson({ comparison: [] as ModelComparisonItem[], models: responseModels }); } - const presenter = new ModelRegistryPresenter(clickhouseClient); + const clickhouse = await getClickhouseForOrganization(project.organizationId, "standard"); + const presenter = new ModelRegistryPresenter(clickhouse); const now = new Date(); const sevenDaysAgo = new Date(now.getTime() - 7 * 24 * 60 * 60 * 1000); diff --git a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.prompts.$promptSlug/route.tsx b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.prompts.$promptSlug/route.tsx index 5a953c0199b..f37e8d3fed9 100644 --- a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.prompts.$promptSlug/route.tsx +++ b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.prompts.$promptSlug/route.tsx @@ -70,7 +70,7 @@ import { findProjectBySlug } from "~/models/project.server"; import { findEnvironmentBySlug } from "~/models/runtimeEnvironment.server"; import { type GenerationRow, PromptPresenter } from "~/presenters/v3/PromptPresenter.server"; import { SpanView } from "~/routes/resources.orgs.$organizationSlug.projects.$projectParam.env.$envParam.runs.$runParam.spans.$spanParam/route"; -import { clickhouseClient } from "~/services/clickhouseInstance.server"; +import { getClickhouseForOrganization } from "~/services/clickhouse/clickhouseFactory.server"; import { getResizableSnapshot } from "~/services/resizablePanel.server"; import { requireUserId } from "~/services/session.server"; import { PromptService } from "~/v3/services/promptService.server"; @@ -242,7 +242,8 @@ export const loader = async ({ request, params }: LoaderFunctionArgs) => { const startTime = fromTime ? new Date(fromTime) : new Date(Date.now() - periodMs); const endTime = toTime ? new Date(toTime) : new Date(); - const presenter = new PromptPresenter(clickhouseClient); + const clickhouse = await getClickhouseForOrganization(project.organizationId, "standard"); + const presenter = new PromptPresenter(clickhouse); let generations: Awaited>["generations"] = []; let generationsPagination: { next?: string } = {}; try { @@ -273,7 +274,7 @@ export const loader = async ({ request, params }: LoaderFunctionArgs) => { // Load distinct filter values and resizable snapshots in parallel const distinctQuery = (col: string, name: string) => - clickhouseClient.reader.query({ + clickhouse.reader.query({ name, query: `SELECT DISTINCT ${col} AS val FROM trigger_dev.llm_metrics_v1 WHERE environment_id = {environmentId: String} AND prompt_slug = {promptSlug: String} AND ${col} != '' ORDER BY val`, params: z.object({ environmentId: z.string(), promptSlug: z.string() }), diff --git a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.prompts._index/route.tsx b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.prompts._index/route.tsx index 02c7cc444b7..4e229a48f74 100644 --- a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.prompts._index/route.tsx +++ b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.prompts._index/route.tsx @@ -22,7 +22,7 @@ import { useProject } from "~/hooks/useProject"; import { findProjectBySlug } from "~/models/project.server"; import { findEnvironmentBySlug } from "~/models/runtimeEnvironment.server"; import { PromptPresenter } from "~/presenters/v3/PromptPresenter.server"; -import { clickhouseClient } from "~/services/clickhouseInstance.server"; +import { getClickhouseForOrganization } from "~/services/clickhouse/clickhouseFactory.server"; import { requireUserId } from "~/services/session.server"; import { docsPath, EnvironmentParamSchema, v3PromptsPath } from "~/utils/pathBuilder"; import { LinkButton } from "~/components/primitives/Buttons"; @@ -46,7 +46,8 @@ export const loader = async ({ request, params }: LoaderFunctionArgs) => { throw new Response("Environment not found", { status: 404 }); } - const presenter = new PromptPresenter(clickhouseClient); + const clickhouse = await getClickhouseForOrganization(project.organizationId, "standard"); + const presenter = new PromptPresenter(clickhouse); const prompts = await presenter.listPrompts(project.id, environment.id); const sparklines = await presenter.getUsageSparklines( diff --git a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.runs.$runParam/route.tsx b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.runs.$runParam/route.tsx index 601ffb2d766..ab92bceb182 100644 --- a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.runs.$runParam/route.tsx +++ b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.runs.$runParam/route.tsx @@ -92,7 +92,7 @@ import { findProjectBySlug } from "~/models/project.server"; import { findEnvironmentBySlug } from "~/models/runtimeEnvironment.server"; import { NextRunListPresenter } from "~/presenters/v3/NextRunListPresenter.server"; import { RunEnvironmentMismatchError, RunPresenter } from "~/presenters/v3/RunPresenter.server"; -import { clickhouseClient } from "~/services/clickhouseInstance.server"; +import { getClickhouseForOrganization } from "~/services/clickhouse/clickhouseFactory.server"; import { getImpersonationId } from "~/services/impersonation.server"; import { logger } from "~/services/logger.server"; import { getResizableSnapshot } from "~/services/resizablePanel.server"; @@ -182,7 +182,8 @@ async function getRunsListFromTableState({ return null; } - const runsListPresenter = new NextRunListPresenter($replica, clickhouseClient); + const clickhouse = await getClickhouseForOrganization(project.organizationId, "standard"); + const runsListPresenter = new NextRunListPresenter($replica, clickhouse); const currentPageResult = await runsListPresenter.call(project.organizationId, environment.id, { userId, projectId: project.id, diff --git a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.runs._index/route.tsx b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.runs._index/route.tsx index d271e6f2b22..691b45c678b 100644 --- a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.runs._index/route.tsx +++ b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.runs._index/route.tsx @@ -45,7 +45,7 @@ import { findProjectBySlug } from "~/models/project.server"; import { findEnvironmentBySlug } from "~/models/runtimeEnvironment.server"; import { getRunFiltersFromRequest } from "~/presenters/RunFilters.server"; import { NextRunListPresenter } from "~/presenters/v3/NextRunListPresenter.server"; -import { clickhouseClient } from "~/services/clickhouseInstance.server"; +import { getClickhouseForOrganization } from "~/services/clickhouse/clickhouseFactory.server"; import { setRootOnlyFilterPreference, uiPreferencesStorage, @@ -89,7 +89,8 @@ export const loader = async ({ request, params }: LoaderFunctionArgs) => { const filters = await getRunFiltersFromRequest(request); - const presenter = new NextRunListPresenter($replica, clickhouseClient); + const clickhouse = await getClickhouseForOrganization(project.organizationId, "standard"); + const presenter = new NextRunListPresenter($replica, clickhouse); const list = presenter.call(project.organizationId, environment.id, { userId, projectId: project.id, diff --git a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.test.tasks.$taskParam/route.tsx b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.test.tasks.$taskParam/route.tsx index ee69419e1b7..38356c6a247 100644 --- a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.test.tasks.$taskParam/route.tsx +++ b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.test.tasks.$taskParam/route.tsx @@ -74,7 +74,7 @@ import { Dialog, DialogContent, DialogHeader, DialogTrigger } from "~/components import { DialogClose, DialogDescription } from "@radix-ui/react-dialog"; import { FormButtons } from "~/components/primitives/FormButtons"; import { $replica } from "~/db.server"; -import { clickhouseClient } from "~/services/clickhouseInstance.server"; +import { getClickhouseForOrganization } from "~/services/clickhouse/clickhouseFactory.server"; import { RegionsPresenter, type Region } from "~/presenters/v3/RegionsPresenter.server"; import { TestSidebarTabs } from "./TestSidebarTabs"; import { AIPayloadTabContent } from "./AIPayloadTabContent"; @@ -102,7 +102,8 @@ export const loader = async ({ request, params }: LoaderFunctionArgs) => { }); } - const presenter = new TestTaskPresenter($replica, clickhouseClient); + const clickhouse = await getClickhouseForOrganization(project.organizationId, "standard"); + const presenter = new TestTaskPresenter($replica, clickhouse); try { const [result, regionsResult] = await Promise.all([ presenter.call({ diff --git a/apps/webapp/app/routes/api.v1.prompts.$slug.ts b/apps/webapp/app/routes/api.v1.prompts.$slug.ts index 0d101ae6122..0f12686972e 100644 --- a/apps/webapp/app/routes/api.v1.prompts.$slug.ts +++ b/apps/webapp/app/routes/api.v1.prompts.$slug.ts @@ -2,7 +2,7 @@ import { json } from "@remix-run/server-runtime"; import { z } from "zod"; import { prisma } from "~/db.server"; import { PromptPresenter } from "~/presenters/v3/PromptPresenter.server"; -import { clickhouseClient } from "~/services/clickhouseInstance.server"; +import { getClickhouseForOrganization } from "~/services/clickhouse/clickhouseFactory.server"; import { createActionApiRoute, createLoaderApiRoute, @@ -33,6 +33,13 @@ export const loader = createLoaderApiRoute( slug: params.slug, }, }, + include: { + project: { + select: { + organizationId: true, + }, + }, + }, }); }, authorization: { @@ -45,7 +52,8 @@ export const loader = createLoaderApiRoute( return json({ error: "Prompt not found" }, { status: 404 }); } - const presenter = new PromptPresenter(clickhouseClient); + const clickhouse = await getClickhouseForOrganization(prompt.project.organizationId, "standard"); + const presenter = new PromptPresenter(clickhouse); const version = await presenter.resolveVersion(prompt.id, { version: searchParams.version, label: searchParams.label, @@ -115,7 +123,8 @@ const { action } = createActionApiRoute( return json({ error: "Prompt not found" }, { status: 404 }); } - const presenter = new PromptPresenter(clickhouseClient); + const clickhouse = await getClickhouseForOrganization(authentication.environment.organizationId, "standard"); + const presenter = new PromptPresenter(clickhouse); const version = await presenter.resolveVersion(prompt.id, { version: body.version, label: body.label, diff --git a/apps/webapp/app/routes/api.v1.prompts.$slug.versions.ts b/apps/webapp/app/routes/api.v1.prompts.$slug.versions.ts index 49f90a98c84..5141ed78b97 100644 --- a/apps/webapp/app/routes/api.v1.prompts.$slug.versions.ts +++ b/apps/webapp/app/routes/api.v1.prompts.$slug.versions.ts @@ -2,7 +2,7 @@ import { json } from "@remix-run/server-runtime"; import { z } from "zod"; import { prisma } from "~/db.server"; import { PromptPresenter } from "~/presenters/v3/PromptPresenter.server"; -import { clickhouseClient } from "~/services/clickhouseInstance.server"; +import { getClickhouseForOrganization } from "~/services/clickhouse/clickhouseFactory.server"; import { createLoaderApiRoute } from "~/services/routeBuilders/apiBuilder.server"; const ParamsSchema = z.object({ @@ -23,6 +23,13 @@ export const loader = createLoaderApiRoute( slug: params.slug, }, }, + include: { + project: { + select: { + organizationId: true, + }, + }, + }, }); }, authorization: { @@ -35,7 +42,8 @@ export const loader = createLoaderApiRoute( return json({ error: "Prompt not found" }, { status: 404 }); } - const presenter = new PromptPresenter(clickhouseClient); + const clickhouse = await getClickhouseForOrganization(prompt.project.organizationId, "standard"); + const presenter = new PromptPresenter(clickhouse); const versions = await presenter.listVersions(prompt.id); return json({ diff --git a/apps/webapp/app/routes/api.v1.prompts._index.ts b/apps/webapp/app/routes/api.v1.prompts._index.ts index e4ef5f9702e..adce4dba80e 100644 --- a/apps/webapp/app/routes/api.v1.prompts._index.ts +++ b/apps/webapp/app/routes/api.v1.prompts._index.ts @@ -1,6 +1,6 @@ import { json } from "@remix-run/server-runtime"; import { PromptPresenter } from "~/presenters/v3/PromptPresenter.server"; -import { clickhouseClient } from "~/services/clickhouseInstance.server"; +import { getClickhouseForOrganization } from "~/services/clickhouse/clickhouseFactory.server"; import { createLoaderApiRoute } from "~/services/routeBuilders/apiBuilder.server"; export const loader = createLoaderApiRoute( @@ -14,7 +14,8 @@ export const loader = createLoaderApiRoute( }, }, async ({ authentication }) => { - const presenter = new PromptPresenter(clickhouseClient); + const clickhouse = await getClickhouseForOrganization(authentication.environment.organizationId, "standard"); + const presenter = new PromptPresenter(clickhouse); const prompts = await presenter.listPrompts( authentication.environment.projectId, authentication.environment.id diff --git a/apps/webapp/app/routes/otel.v1.logs.ts b/apps/webapp/app/routes/otel.v1.logs.ts index a05ddd24cf2..1dc7c07c16c 100644 --- a/apps/webapp/app/routes/otel.v1.logs.ts +++ b/apps/webapp/app/routes/otel.v1.logs.ts @@ -4,12 +4,13 @@ import { otlpExporter } from "~/v3/otlpExporter.server"; export async function action({ request }: ActionFunctionArgs) { try { + const exporter = await otlpExporter; const contentType = request.headers.get("content-type")?.toLowerCase() ?? ""; if (contentType.startsWith("application/json")) { const body = await request.json(); - const exportResponse = await otlpExporter.exportLogs(body as ExportLogsServiceRequest); + const exportResponse = await exporter.exportLogs(body as ExportLogsServiceRequest); return json(exportResponse, { status: 200 }); } else if (contentType.startsWith("application/x-protobuf")) { @@ -17,7 +18,7 @@ export async function action({ request }: ActionFunctionArgs) { const exportRequest = ExportLogsServiceRequest.decode(new Uint8Array(buffer)); - const exportResponse = await otlpExporter.exportLogs(exportRequest); + const exportResponse = await exporter.exportLogs(exportRequest); return new Response(ExportLogsServiceResponse.encode(exportResponse).finish(), { status: 200, diff --git a/apps/webapp/app/routes/otel.v1.metrics.ts b/apps/webapp/app/routes/otel.v1.metrics.ts index 5529f9310ec..9a09cb18233 100644 --- a/apps/webapp/app/routes/otel.v1.metrics.ts +++ b/apps/webapp/app/routes/otel.v1.metrics.ts @@ -7,12 +7,13 @@ import { otlpExporter } from "~/v3/otlpExporter.server"; export async function action({ request }: ActionFunctionArgs) { try { + const exporter = await otlpExporter; const contentType = request.headers.get("content-type")?.toLowerCase() ?? ""; if (contentType.startsWith("application/json")) { const body = await request.json(); - const exportResponse = await otlpExporter.exportMetrics( + const exportResponse = await exporter.exportMetrics( body as ExportMetricsServiceRequest ); @@ -22,7 +23,7 @@ export async function action({ request }: ActionFunctionArgs) { const exportRequest = ExportMetricsServiceRequest.decode(new Uint8Array(buffer)); - const exportResponse = await otlpExporter.exportMetrics(exportRequest); + const exportResponse = await exporter.exportMetrics(exportRequest); return new Response(ExportMetricsServiceResponse.encode(exportResponse).finish(), { status: 200, diff --git a/apps/webapp/app/routes/otel.v1.traces.ts b/apps/webapp/app/routes/otel.v1.traces.ts index 609b72c0465..8e974c7b1dd 100644 --- a/apps/webapp/app/routes/otel.v1.traces.ts +++ b/apps/webapp/app/routes/otel.v1.traces.ts @@ -4,12 +4,13 @@ import { otlpExporter } from "~/v3/otlpExporter.server"; export async function action({ request }: ActionFunctionArgs) { try { + const exporter = await otlpExporter; const contentType = request.headers.get("content-type")?.toLowerCase() ?? ""; if (contentType.startsWith("application/json")) { const body = await request.json(); - const exportResponse = await otlpExporter.exportTraces(body as ExportTraceServiceRequest); + const exportResponse = await exporter.exportTraces(body as ExportTraceServiceRequest); return json(exportResponse, { status: 200 }); } else if (contentType.startsWith("application/x-protobuf")) { @@ -17,7 +18,7 @@ export async function action({ request }: ActionFunctionArgs) { const exportRequest = ExportTraceServiceRequest.decode(new Uint8Array(buffer)); - const exportResponse = await otlpExporter.exportTraces(exportRequest); + const exportResponse = await exporter.exportTraces(exportRequest); return new Response(ExportTraceServiceResponse.encode(exportResponse).finish(), { status: 200, diff --git a/apps/webapp/app/routes/resources.orgs.$organizationSlug.projects.$projectParam.env.$envParam.logs.$logId.tsx b/apps/webapp/app/routes/resources.orgs.$organizationSlug.projects.$projectParam.env.$envParam.logs.$logId.tsx index f862ced6b05..0e0469bcd1b 100644 --- a/apps/webapp/app/routes/resources.orgs.$organizationSlug.projects.$projectParam.env.$envParam.logs.$logId.tsx +++ b/apps/webapp/app/routes/resources.orgs.$organizationSlug.projects.$projectParam.env.$envParam.logs.$logId.tsx @@ -1,7 +1,7 @@ import { type LoaderFunctionArgs } from "@remix-run/server-runtime"; import { typedjson } from "remix-typedjson"; import { z } from "zod"; -import { logsClickhouseClient } from "~/services/clickhouseInstance.server"; +import { getClickhouseForOrganization } from "~/services/clickhouse/clickhouseFactory.server"; import { requireUserId } from "~/services/session.server"; import { LogDetailPresenter } from "~/presenters/v3/LogDetailPresenter.server"; import { findProjectBySlug } from "~/models/project.server"; @@ -43,7 +43,8 @@ export const loader = async ({ request, params }: LoaderFunctionArgs) => { const [traceId, spanId, , startTime] = parts; - const presenter = new LogDetailPresenter($replica, logsClickhouseClient); + const logsClickhouse = await getClickhouseForOrganization(project.organizationId, "logs"); + const presenter = new LogDetailPresenter($replica, logsClickhouse); let result; try { diff --git a/apps/webapp/app/routes/resources.orgs.$organizationSlug.projects.$projectParam.env.$envParam.logs.ts b/apps/webapp/app/routes/resources.orgs.$organizationSlug.projects.$projectParam.env.$envParam.logs.ts index 66ddebe4e2a..d55c7496258 100644 --- a/apps/webapp/app/routes/resources.orgs.$organizationSlug.projects.$projectParam.env.$envParam.logs.ts +++ b/apps/webapp/app/routes/resources.orgs.$organizationSlug.projects.$projectParam.env.$envParam.logs.ts @@ -6,7 +6,7 @@ import { findProjectBySlug } from "~/models/project.server"; import { findEnvironmentBySlug } from "~/models/runtimeEnvironment.server"; import { LogsListPresenter, type LogLevel, LogsListOptionsSchema } from "~/presenters/v3/LogsListPresenter.server"; import { $replica } from "~/db.server"; -import { logsClickhouseClient } from "~/services/clickhouseInstance.server"; +import { getClickhouseForOrganization } from "~/services/clickhouse/clickhouseFactory.server"; import { getCurrentPlan } from "~/services/platform.v3.server"; // Valid log levels for filtering @@ -69,7 +69,8 @@ export const loader = async ({ request, params }: LoaderFunctionArgs) => { retentionLimitDays, }) as any; // Validated by LogsListOptionsSchema at runtime - const presenter = new LogsListPresenter($replica, logsClickhouseClient); + const logsClickhouse = await getClickhouseForOrganization(project.organizationId, "logs"); + const presenter = new LogsListPresenter($replica, logsClickhouse); const result = await presenter.call(project.organizationId, environment.id, options); return json({ diff --git a/apps/webapp/app/routes/resources.orgs.$organizationSlug.projects.$projectParam.env.$envParam.prompts.$promptSlug.generations.ts b/apps/webapp/app/routes/resources.orgs.$organizationSlug.projects.$projectParam.env.$envParam.prompts.$promptSlug.generations.ts index 77a55ec3f0b..17a11e05837 100644 --- a/apps/webapp/app/routes/resources.orgs.$organizationSlug.projects.$projectParam.env.$envParam.prompts.$promptSlug.generations.ts +++ b/apps/webapp/app/routes/resources.orgs.$organizationSlug.projects.$projectParam.env.$envParam.prompts.$promptSlug.generations.ts @@ -6,7 +6,7 @@ import { EnvironmentParamSchema } from "~/utils/pathBuilder"; import { parsePeriodToMs } from "~/utils/periods"; import { findProjectBySlug } from "~/models/project.server"; import { findEnvironmentBySlug } from "~/models/runtimeEnvironment.server"; -import { clickhouseClient } from "~/services/clickhouseInstance.server"; +import { getClickhouseForOrganization } from "~/services/clickhouse/clickhouseFactory.server"; import { PromptPresenter, type GenerationRow, @@ -59,7 +59,8 @@ export const loader = async ({ request, params }: LoaderFunctionArgs) => { const operations = url.searchParams.getAll("operations").filter(Boolean); const providers = url.searchParams.getAll("providers").filter(Boolean); - const presenter = new PromptPresenter(clickhouseClient); + const clickhouse = await getClickhouseForOrganization(project.organizationId, "standard"); + const presenter = new PromptPresenter(clickhouse); const result = await presenter.listGenerations({ environmentId: environment.id, promptSlug, diff --git a/apps/webapp/app/services/admin/missingLlmModels.server.ts b/apps/webapp/app/services/admin/missingLlmModels.server.ts index 7ce6bc2ab7e..07e6160ee03 100644 --- a/apps/webapp/app/services/admin/missingLlmModels.server.ts +++ b/apps/webapp/app/services/admin/missingLlmModels.server.ts @@ -1,4 +1,4 @@ -import { adminClickhouseClient } from "~/services/clickhouseInstance.server"; +import { getAdminClickhouse } from "~/services/clickhouse/clickhouseFactory.server"; import { llmPricingRegistry } from "~/v3/llmPricingRegistry.server"; export type MissingLlmModel = { @@ -13,8 +13,10 @@ export async function getMissingLlmModels(opts: { const lookbackHours = opts.lookbackHours ?? 24; const since = new Date(Date.now() - lookbackHours * 60 * 60 * 1000); + const adminClickhouse = getAdminClickhouse(); + // queryBuilderFast returns a factory function β€” call it to get the builder - const createBuilder = adminClickhouseClient.reader.queryBuilderFast<{ + const createBuilder = adminClickhouse.reader.queryBuilderFast<{ model: string; system: string; cnt: string; @@ -93,7 +95,9 @@ export async function getMissingModelSamples(opts: { const limit = opts.limit ?? 10; const since = new Date(Date.now() - lookbackHours * 60 * 60 * 1000); - const createBuilder = adminClickhouseClient.reader.queryBuilderFast({ + const adminClickhouse = getAdminClickhouse(); + + const createBuilder = adminClickhouse.reader.queryBuilderFast({ name: "missingModelSamples", table: "trigger_dev.task_events_v2", columns: [ diff --git a/apps/webapp/app/services/clickhouse/clickhouseCredentialsService.server.ts b/apps/webapp/app/services/clickhouse/clickhouseCredentialsService.server.ts new file mode 100644 index 00000000000..c2c8c77f7c4 --- /dev/null +++ b/apps/webapp/app/services/clickhouse/clickhouseCredentialsService.server.ts @@ -0,0 +1,109 @@ +import { getSecretStore } from "~/services/secrets/secretStore.server"; +import { prisma } from "~/db.server"; +import { + ClickhouseConnectionSchema, + getClickhouseSecretKey, +} from "./clickhouseSecretSchemas.server"; +import { clearClickhouseCacheForOrganization } from "./clickhouseFactory.server"; + +export async function setOrganizationClickhouseUrl( + organizationId: string, + clientType: "standard" | "events" | "replication", + url: string +): Promise { + // Validate URL format + const connection = ClickhouseConnectionSchema.parse({ url }); + + // Store in SecretStore + const secretStore = getSecretStore("DATABASE"); + const secretKey = getClickhouseSecretKey(organizationId, clientType); + await secretStore.setSecret(secretKey, connection); + + // Update featureFlags to reference the secret + const org = await prisma.organization.findUnique({ + where: { id: organizationId }, + select: { featureFlags: true }, + }); + + const featureFlags = (org?.featureFlags || {}) as any; + const clickhouseConfig = featureFlags.clickhouse || {}; + clickhouseConfig[clientType] = secretKey; + featureFlags.clickhouse = clickhouseConfig; + + await prisma.organization.update({ + where: { id: organizationId }, + data: { featureFlags }, + }); + + // Clear cache + clearClickhouseCacheForOrganization(organizationId); +} + +export async function removeOrganizationClickhouseUrl( + organizationId: string, + clientType: "standard" | "events" | "replication" +): Promise { + // Remove from SecretStore + const secretStore = getSecretStore("DATABASE"); + const secretKey = getClickhouseSecretKey(organizationId, clientType); + await secretStore.deleteSecret(secretKey); + + // Update featureFlags + const org = await prisma.organization.findUnique({ + where: { id: organizationId }, + select: { featureFlags: true }, + }); + + if (org?.featureFlags) { + const featureFlags = org.featureFlags as any; + if (featureFlags.clickhouse && featureFlags.clickhouse[clientType]) { + delete featureFlags.clickhouse[clientType]; + + // If no more clickhouse configs, remove the clickhouse key entirely + if (Object.keys(featureFlags.clickhouse).length === 0) { + delete featureFlags.clickhouse; + } + + await prisma.organization.update({ + where: { id: organizationId }, + data: { featureFlags }, + }); + } + } + + // Clear cache + clearClickhouseCacheForOrganization(organizationId); +} + +export async function getOrganizationClickhouseUrl( + organizationId: string, + clientType: "standard" | "events" | "replication" +): Promise { + const org = await prisma.organization.findUnique({ + where: { id: organizationId }, + select: { featureFlags: true }, + }); + + if (!org?.featureFlags) { + return null; + } + + const clickhouseConfig = (org.featureFlags as any).clickhouse; + if (!clickhouseConfig || typeof clickhouseConfig !== "object") { + return null; + } + + const secretKey = clickhouseConfig[clientType]; + if (!secretKey || typeof secretKey !== "string") { + return null; + } + + const secretStore = getSecretStore("DATABASE"); + const connection = await secretStore.getSecret(ClickhouseConnectionSchema, secretKey); + + if (!connection) { + return null; + } + + return connection.url; +} diff --git a/apps/webapp/app/services/clickhouse/clickhouseFactory.server.ts b/apps/webapp/app/services/clickhouse/clickhouseFactory.server.ts new file mode 100644 index 00000000000..94498868759 --- /dev/null +++ b/apps/webapp/app/services/clickhouse/clickhouseFactory.server.ts @@ -0,0 +1,422 @@ +/** + * ClickHouse Factory - Organization-Scoped ClickHouse Routing + * + * This module provides organization-scoped ClickHouse instance routing to support: + * - HIPAA compliance (dedicated ClickHouse clusters) + * - High-volume customer isolation + * - Geographic data residency requirements + * - Performance tier differentiation + * + * ## Architecture + * + * ### Credential Storage + * - ClickHouse URLs stored encrypted in SecretStore (AES-256-GCM) + * - Organization references secret via `featureFlags.clickhouse` JSON + * - No plaintext credentials in database + * + * ### Caching Strategy + * - **Org configs**: Unkey cache with LRU memory (5min fresh, 10min stale, SWR) + * - **ClickHouse clients**: Cached by hostname hash (multiple orgs share same instance) + * - **Event repositories**: Cached by hostname hash (stateful, must be reused) + * - **Security**: Memory-only cache for org configs (no credentials in Redis) + * + * ## Usage in Presenters + * + * Presenters should fetch org-specific ClickHouse clients in their `call()` method: + * + * ```typescript + * import { getClickhouseForOrganization } from "~/services/clickhouse/clickhouseFactory.server"; + * + * export class MyPresenter extends BasePresenter { + * constructor(private options: PresenterOptions = {}) { + * super(); + * } + * + * async call({ organizationId, ... }) { + * const clickhouse = await getClickhouseForOrganization(organizationId, "standard"); + * // Use clickhouse for queries... + * } + * } + * ``` + * + * ## Usage in Services + * + * The replication service and OTLP exporter automatically route data by organization. + * Other services should follow the same pattern when working with ClickHouse. + * + * @module clickhouseFactory + */ + +import { ClickHouse } from "@internal/clickhouse"; +import { createHash } from "crypto"; +import { createCache, DefaultStatefulContext, Namespace } from "@unkey/cache"; +import { createLRUMemoryStore } from "@internal/cache"; +import { getSecretStore } from "~/services/secrets/secretStore.server"; +import { prisma } from "~/db.server"; +import { + ClickhouseConnectionSchema, + getClickhouseSecretKey, +} from "./clickhouseSecretSchemas.server"; +import { ClickhouseEventRepository } from "~/v3/eventRepository/clickhouseEventRepository.server"; +import { env } from "~/env.server"; +import { singleton } from "~/utils/singleton"; + +// Module-level caches for ClickHouse clients and event repositories +const clickhouseClientCache = new Map(); +const eventRepositoryCache = new Map(); + +// Default ClickHouse clients (not exported - internal use only) +const defaultClickhouseClient = singleton("clickhouseClient", initializeClickhouseClient); + +function initializeClickhouseClient() { + const url = new URL(env.CLICKHOUSE_URL); + url.searchParams.delete("secure"); + + console.log(`πŸ—ƒοΈ Clickhouse service enabled to host ${url.host}`); + + return new ClickHouse({ + url: url.toString(), + name: "clickhouse-instance", + keepAlive: { + enabled: env.CLICKHOUSE_KEEP_ALIVE_ENABLED === "1", + idleSocketTtl: env.CLICKHOUSE_KEEP_ALIVE_IDLE_SOCKET_TTL_MS, + }, + logLevel: env.CLICKHOUSE_LOG_LEVEL, + compression: { + request: true, + }, + maxOpenConnections: env.CLICKHOUSE_MAX_OPEN_CONNECTIONS, + }); +} + +const defaultLogsClickhouseClient = singleton( + "logsClickhouseClient", + initializeLogsClickhouseClient +); + +function initializeLogsClickhouseClient() { + if (!env.LOGS_CLICKHOUSE_URL) { + throw new Error("LOGS_CLICKHOUSE_URL is not set"); + } + + const url = new URL(env.LOGS_CLICKHOUSE_URL); + url.searchParams.delete("secure"); + + return new ClickHouse({ + url: url.toString(), + name: "logs-clickhouse", + keepAlive: { + enabled: env.CLICKHOUSE_KEEP_ALIVE_ENABLED === "1", + idleSocketTtl: env.CLICKHOUSE_KEEP_ALIVE_IDLE_SOCKET_TTL_MS, + }, + logLevel: env.CLICKHOUSE_LOG_LEVEL, + compression: { + request: true, + }, + maxOpenConnections: env.CLICKHOUSE_MAX_OPEN_CONNECTIONS, + clickhouseSettings: { + max_memory_usage: env.CLICKHOUSE_LOGS_LIST_MAX_MEMORY_USAGE.toString(), + max_bytes_before_external_sort: + env.CLICKHOUSE_LOGS_LIST_MAX_BYTES_BEFORE_EXTERNAL_SORT.toString(), + max_threads: env.CLICKHOUSE_LOGS_LIST_MAX_THREADS, + ...(env.CLICKHOUSE_LOGS_LIST_MAX_ROWS_TO_READ && { + max_rows_to_read: env.CLICKHOUSE_LOGS_LIST_MAX_ROWS_TO_READ.toString(), + }), + ...(env.CLICKHOUSE_LOGS_LIST_MAX_EXECUTION_TIME && { + max_execution_time: env.CLICKHOUSE_LOGS_LIST_MAX_EXECUTION_TIME, + }), + }, + }); +} + +const defaultAdminClickhouseClient = singleton( + "adminClickhouseClient", + initializeAdminClickhouseClient +); + +function initializeAdminClickhouseClient() { + if (!env.ADMIN_CLICKHOUSE_URL) { + throw new Error("ADMIN_CLICKHOUSE_URL is not set"); + } + + const url = new URL(env.ADMIN_CLICKHOUSE_URL); + url.searchParams.delete("secure"); + + return new ClickHouse({ + url: url.toString(), + name: "admin-clickhouse", + keepAlive: { + enabled: env.CLICKHOUSE_KEEP_ALIVE_ENABLED === "1", + idleSocketTtl: env.CLICKHOUSE_KEEP_ALIVE_IDLE_SOCKET_TTL_MS, + }, + logLevel: env.CLICKHOUSE_LOG_LEVEL, + compression: { + request: true, + }, + maxOpenConnections: env.CLICKHOUSE_MAX_OPEN_CONNECTIONS, + }); +} + +const defaultQueryClickhouseClient = singleton( + "queryClickhouseClient", + initializeQueryClickhouseClient +); + +function initializeQueryClickhouseClient() { + if (!env.QUERY_CLICKHOUSE_URL) { + throw new Error("QUERY_CLICKHOUSE_URL is not set"); + } + + const url = new URL(env.QUERY_CLICKHOUSE_URL); + url.searchParams.delete("secure"); + + return new ClickHouse({ + url: url.toString(), + name: "query-clickhouse", + keepAlive: { + enabled: env.CLICKHOUSE_KEEP_ALIVE_ENABLED === "1", + idleSocketTtl: env.CLICKHOUSE_KEEP_ALIVE_IDLE_SOCKET_TTL_MS, + }, + logLevel: env.CLICKHOUSE_LOG_LEVEL, + compression: { + request: true, + }, + maxOpenConnections: env.CLICKHOUSE_MAX_OPEN_CONNECTIONS, + }); +} + +// Org config cache with Unkey (memory-only, no Redis for security) +type OrgClickhouseConfig = { + organizationId: string; + hostnameHash: string; + url: string; + clientType: string; +}; + +const ctx = new DefaultStatefulContext(); +const memory = createLRUMemoryStore(1000); + +const orgConfigCache = createCache({ + orgClickhouse: new Namespace(ctx, { + stores: [memory], // Memory-only, no Redis store for security + fresh: 5 * 60 * 1000, // 5 minutes + stale: 10 * 60 * 1000, // 10 minutes (SWR pattern) + }), +}); + +function hashHostname(url: string): string { + const parsed = new URL(url); + return createHash("sha256").update(parsed.hostname).digest("hex"); +} + +async function getOrgClickhouseConfig( + ctx: DefaultStatefulContext, + orgId: string, + clientType: string +): Promise { + const org = await prisma.organization.findUnique({ + where: { id: orgId }, + select: { featureFlags: true }, + }); + + if (!org?.featureFlags) { + return null; + } + + const clickhouseConfig = (org.featureFlags as any).clickhouse; + if (!clickhouseConfig || typeof clickhouseConfig !== "object") { + return null; + } + + const secretKey = clickhouseConfig[clientType]; + if (!secretKey || typeof secretKey !== "string") { + return null; + } + + const secretStore = getSecretStore("DATABASE"); + const connection = await secretStore.getSecret(ClickhouseConnectionSchema, secretKey); + + if (!connection) { + return null; + } + + const hostnameHash = hashHostname(connection.url); + + return { + organizationId: orgId, + hostnameHash, + url: connection.url, + clientType, + }; +} + +export async function getClickhouseForOrganization( + organizationId: string, + clientType: "standard" | "events" | "replication" | "logs" | "query" | "admin" +): Promise { + // Try to get org-specific config + const configResult = await orgConfigCache.orgClickhouse.swr( + `org:${organizationId}:ch:${clientType}`, + async () => getOrgClickhouseConfig(ctx, organizationId, clientType) + ); + + // Handle Result type - check for error or null value + const config = configResult.err ? null : configResult.val; + + // If no custom config, return appropriate default client + if (!config) { + switch (clientType) { + case "standard": + case "events": + case "replication": + return defaultClickhouseClient; + case "logs": + return defaultLogsClickhouseClient; + case "query": + return defaultQueryClickhouseClient; + case "admin": + return defaultAdminClickhouseClient; + } + } + + // Check if client already exists for this hostname + const cacheKey = `${config.hostnameHash}:${clientType}`; + let client = clickhouseClientCache.get(cacheKey); + + if (!client) { + const url = new URL(config.url); + url.searchParams.delete("secure"); + + client = new ClickHouse({ + url: url.toString(), + name: `org-clickhouse-${clientType}`, + keepAlive: { + enabled: env.CLICKHOUSE_KEEP_ALIVE_ENABLED === "1", + idleSocketTtl: env.CLICKHOUSE_KEEP_ALIVE_IDLE_SOCKET_TTL_MS, + }, + logLevel: env.CLICKHOUSE_LOG_LEVEL, + compression: { + request: true, + }, + maxOpenConnections: env.CLICKHOUSE_MAX_OPEN_CONNECTIONS, + }); + clickhouseClientCache.set(cacheKey, client); + } + + return client; +} + +export async function getEventRepositoryForOrganization( + organizationId: string +): Promise { + // Try to get org-specific config + const configResult = await orgConfigCache.orgClickhouse.swr( + `org:${organizationId}:ch:events`, + async () => getOrgClickhouseConfig(ctx, organizationId, "events") + ); + + // Handle Result type - check for error or null value + const config = configResult.err ? null : configResult.val; + + // If no custom config, return default repository (created on demand) + if (!config) { + const defaultKey = "default:events"; + let defaultRepo = eventRepositoryCache.get(defaultKey); + if (!defaultRepo) { + // Create default event repository using standard clickhouse client + // This matches the existing pattern in clickhouseEventRepositoryInstance.server.ts + const eventsClickhouse = await getEventsClickhouseClient(); + defaultRepo = new ClickhouseEventRepository({ + clickhouse: eventsClickhouse, + batchSize: env.EVENTS_CLICKHOUSE_BATCH_SIZE, + flushInterval: env.EVENTS_CLICKHOUSE_FLUSH_INTERVAL_MS, + maximumTraceSummaryViewCount: env.EVENTS_CLICKHOUSE_MAX_TRACE_SUMMARY_VIEW_COUNT, + maximumTraceDetailedSummaryViewCount: + env.EVENTS_CLICKHOUSE_MAX_TRACE_DETAILED_SUMMARY_VIEW_COUNT, + maximumLiveReloadingSetting: env.EVENTS_CLICKHOUSE_MAX_LIVE_RELOADING_SETTING, + insertStrategy: env.EVENTS_CLICKHOUSE_INSERT_STRATEGY, + waitForAsyncInsert: env.EVENTS_CLICKHOUSE_WAIT_FOR_ASYNC_INSERT === "1", + asyncInsertMaxDataSize: env.EVENTS_CLICKHOUSE_ASYNC_INSERT_MAX_DATA_SIZE, + asyncInsertBusyTimeoutMs: env.EVENTS_CLICKHOUSE_ASYNC_INSERT_BUSY_TIMEOUT_MS, + startTimeMaxAgeMs: env.EVENTS_CLICKHOUSE_START_TIME_MAX_AGE_MS, + llmMetricsBatchSize: env.LLM_METRICS_BATCH_SIZE, + llmMetricsFlushInterval: env.LLM_METRICS_FLUSH_INTERVAL_MS, + llmMetricsMaxBatchSize: env.LLM_METRICS_MAX_BATCH_SIZE, + llmMetricsMaxConcurrency: env.LLM_METRICS_MAX_CONCURRENCY, + version: "v2", + }); + eventRepositoryCache.set(defaultKey, defaultRepo); + } + return defaultRepo; + } + + // Check if repository already exists for this hostname + const cacheKey = `${config.hostnameHash}:events`; + let repository = eventRepositoryCache.get(cacheKey); + + if (!repository) { + const client = await getClickhouseForOrganization(organizationId, "events"); + repository = new ClickhouseEventRepository({ + clickhouse: client, + batchSize: env.EVENTS_CLICKHOUSE_BATCH_SIZE, + flushInterval: env.EVENTS_CLICKHOUSE_FLUSH_INTERVAL_MS, + maximumTraceSummaryViewCount: env.EVENTS_CLICKHOUSE_MAX_TRACE_SUMMARY_VIEW_COUNT, + maximumTraceDetailedSummaryViewCount: + env.EVENTS_CLICKHOUSE_MAX_TRACE_DETAILED_SUMMARY_VIEW_COUNT, + maximumLiveReloadingSetting: env.EVENTS_CLICKHOUSE_MAX_LIVE_RELOADING_SETTING, + insertStrategy: env.EVENTS_CLICKHOUSE_INSERT_STRATEGY, + waitForAsyncInsert: env.EVENTS_CLICKHOUSE_WAIT_FOR_ASYNC_INSERT === "1", + asyncInsertMaxDataSize: env.EVENTS_CLICKHOUSE_ASYNC_INSERT_MAX_DATA_SIZE, + asyncInsertBusyTimeoutMs: env.EVENTS_CLICKHOUSE_ASYNC_INSERT_BUSY_TIMEOUT_MS, + startTimeMaxAgeMs: env.EVENTS_CLICKHOUSE_START_TIME_MAX_AGE_MS, + llmMetricsBatchSize: env.LLM_METRICS_BATCH_SIZE, + llmMetricsFlushInterval: env.LLM_METRICS_FLUSH_INTERVAL_MS, + llmMetricsMaxBatchSize: env.LLM_METRICS_MAX_BATCH_SIZE, + llmMetricsMaxConcurrency: env.LLM_METRICS_MAX_CONCURRENCY, + version: "v2", + }); + eventRepositoryCache.set(cacheKey, repository); + } + + return repository; +} + +// Helper to create the default events ClickHouse client +async function getEventsClickhouseClient(): Promise { + if (!env.EVENTS_CLICKHOUSE_URL) { + throw new Error("EVENTS_CLICKHOUSE_URL is not set"); + } + + const url = new URL(env.EVENTS_CLICKHOUSE_URL); + url.searchParams.delete("secure"); + + return new ClickHouse({ + url: url.toString(), + name: "task-events", + keepAlive: { + enabled: env.EVENTS_CLICKHOUSE_KEEP_ALIVE_ENABLED === "1", + idleSocketTtl: env.EVENTS_CLICKHOUSE_KEEP_ALIVE_IDLE_SOCKET_TTL_MS, + }, + logLevel: env.EVENTS_CLICKHOUSE_LOG_LEVEL, + compression: { + request: env.EVENTS_CLICKHOUSE_COMPRESSION_REQUEST === "1", + }, + maxOpenConnections: env.EVENTS_CLICKHOUSE_MAX_OPEN_CONNECTIONS, + }); +} + +/** + * Get admin ClickHouse client for cross-organization queries + * This should only be used for admin tools and analytics that need to query across all orgs + */ +export function getAdminClickhouse(): ClickHouse { + return defaultAdminClickhouseClient; +} + +// Clear caches when needed (e.g., when org config changes) +export function clearClickhouseCacheForOrganization(organizationId: string): void { + // The Unkey cache will naturally expire based on TTL (5min fresh, 10min stale) + // No explicit removal needed - cache entries will be refreshed on next access + // Note: We don't clear client/repository caches as they're keyed by hostname + // and may be shared by other orgs +} diff --git a/apps/webapp/app/services/clickhouse/clickhouseFactory.test.ts b/apps/webapp/app/services/clickhouse/clickhouseFactory.test.ts new file mode 100644 index 00000000000..f0b24b941b1 --- /dev/null +++ b/apps/webapp/app/services/clickhouse/clickhouseFactory.test.ts @@ -0,0 +1,155 @@ +import { describe, it, expect, beforeEach, vi } from "vitest"; +import { prisma } from "~/db.server"; +import { + getClickhouseForOrganization, + getEventRepositoryForOrganization, + clearClickhouseCacheForOrganization, +} from "./clickhouseFactory.server"; +import { + setOrganizationClickhouseUrl, + removeOrganizationClickhouseUrl, + getOrganizationClickhouseUrl, +} from "./clickhouseCredentialsService.server"; + +describe("ClickHouse Factory", () => { + const testOrgId = "test-org-" + Date.now(); + const testClickhouseUrl = "https://test-ch.example.com:8443?user=test&password=secret"; + + beforeEach(async () => { + // Clean up any existing test data + await prisma.organization.deleteMany({ + where: { id: testOrgId }, + }); + }); + + it("should return default ClickHouse client when org has no custom config", async () => { + const client = await getClickhouseForOrganization(testOrgId, "standard"); + expect(client).toBeDefined(); + // Default client should be returned (not null) + expect(client).toBeTruthy(); + }); + + it("should set and retrieve organization ClickHouse URL", async () => { + // First create the test organization + await prisma.organization.create({ + data: { + id: testOrgId, + title: "Test Org", + slug: "test-org-" + Date.now(), + }, + }); + + // Set the URL + await setOrganizationClickhouseUrl(testOrgId, "standard", testClickhouseUrl); + + // Retrieve it + const retrievedUrl = await getOrganizationClickhouseUrl(testOrgId, "standard"); + expect(retrievedUrl).toBe(testClickhouseUrl); + + // Verify it's stored in featureFlags + const org = await prisma.organization.findUnique({ + where: { id: testOrgId }, + select: { featureFlags: true }, + }); + + expect(org?.featureFlags).toBeDefined(); + const featureFlags = org?.featureFlags as any; + expect(featureFlags.clickhouse).toBeDefined(); + expect(featureFlags.clickhouse.standard).toBeDefined(); + + // Clean up + await removeOrganizationClickhouseUrl(testOrgId, "standard"); + await prisma.organization.delete({ where: { id: testOrgId } }); + }); + + it("should remove organization ClickHouse URL", async () => { + // First create the test organization + await prisma.organization.create({ + data: { + id: testOrgId, + title: "Test Org", + slug: "test-org-" + Date.now(), + }, + }); + + // Set and then remove + await setOrganizationClickhouseUrl(testOrgId, "standard", testClickhouseUrl); + await removeOrganizationClickhouseUrl(testOrgId, "standard"); + + // Verify it's gone + const retrievedUrl = await getOrganizationClickhouseUrl(testOrgId, "standard"); + expect(retrievedUrl).toBeNull(); + + // Clean up + await prisma.organization.delete({ where: { id: testOrgId } }); + }); + + it("should cache ClickHouse clients by hostname", async () => { + // This test verifies that multiple orgs pointing to the same ClickHouse hostname + // share the same client instance (deduplication) + + const org1Id = testOrgId + "-1"; + const org2Id = testOrgId + "-2"; + + // Create test organizations + await prisma.organization.createMany({ + data: [ + { id: org1Id, title: "Test Org 1", slug: "test-org-1-" + Date.now() }, + { id: org2Id, title: "Test Org 2", slug: "test-org-2-" + Date.now() }, + ], + }); + + // Set both orgs to use the same ClickHouse URL + await setOrganizationClickhouseUrl(org1Id, "standard", testClickhouseUrl); + await setOrganizationClickhouseUrl(org2Id, "standard", testClickhouseUrl); + + // Get clients for both orgs + const client1 = await getClickhouseForOrganization(org1Id, "standard"); + const client2 = await getClickhouseForOrganization(org2Id, "standard"); + + // Both should be defined + expect(client1).toBeDefined(); + expect(client2).toBeDefined(); + + // They should be the same instance (cached by hostname) + expect(client1).toBe(client2); + + // Clean up + await removeOrganizationClickhouseUrl(org1Id, "standard"); + await removeOrganizationClickhouseUrl(org2Id, "standard"); + await prisma.organization.deleteMany({ + where: { id: { in: [org1Id, org2Id] } }, + }); + }); + + it("should clear cache when organization config changes", async () => { + // Create test organization + await prisma.organization.create({ + data: { + id: testOrgId, + title: "Test Org", + slug: "test-org-" + Date.now(), + }, + }); + + // Set URL + await setOrganizationClickhouseUrl(testOrgId, "standard", testClickhouseUrl); + + // Get client to populate cache + const client1 = await getClickhouseForOrganization(testOrgId, "standard"); + + // Clear cache + clearClickhouseCacheForOrganization(testOrgId); + + // Get client again (should hit the database again, not cache) + const client2 = await getClickhouseForOrganization(testOrgId, "standard"); + + // Both should be defined + expect(client1).toBeDefined(); + expect(client2).toBeDefined(); + + // Clean up + await removeOrganizationClickhouseUrl(testOrgId, "standard"); + await prisma.organization.delete({ where: { id: testOrgId } }); + }); +}); diff --git a/apps/webapp/app/services/clickhouse/clickhouseSecretSchemas.server.ts b/apps/webapp/app/services/clickhouse/clickhouseSecretSchemas.server.ts new file mode 100644 index 00000000000..016eb717c18 --- /dev/null +++ b/apps/webapp/app/services/clickhouse/clickhouseSecretSchemas.server.ts @@ -0,0 +1,11 @@ +import { z } from "zod"; + +export const ClickhouseConnectionSchema = z.object({ + url: z.string().url(), +}); + +export type ClickhouseConnection = z.infer; + +export function getClickhouseSecretKey(orgId: string, clientType: string): string { + return `org:${orgId}:clickhouse:${clientType}`; +} diff --git a/apps/webapp/app/services/clickhouseInstance.server.ts b/apps/webapp/app/services/clickhouseInstance.server.ts deleted file mode 100644 index 9c4941671f3..00000000000 --- a/apps/webapp/app/services/clickhouseInstance.server.ts +++ /dev/null @@ -1,130 +0,0 @@ -import { ClickHouse } from "@internal/clickhouse"; -import { env } from "~/env.server"; -import { singleton } from "~/utils/singleton"; - -export const clickhouseClient = singleton("clickhouseClient", initializeClickhouseClient); - -function initializeClickhouseClient() { - const url = new URL(env.CLICKHOUSE_URL); - - // Remove secure param - url.searchParams.delete("secure"); - - console.log(`πŸ—ƒοΈ Clickhouse service enabled to host ${url.host}`); - - const clickhouse = new ClickHouse({ - url: url.toString(), - name: "clickhouse-instance", - keepAlive: { - enabled: env.CLICKHOUSE_KEEP_ALIVE_ENABLED === "1", - idleSocketTtl: env.CLICKHOUSE_KEEP_ALIVE_IDLE_SOCKET_TTL_MS, - }, - logLevel: env.CLICKHOUSE_LOG_LEVEL, - compression: { - request: true, - }, - maxOpenConnections: env.CLICKHOUSE_MAX_OPEN_CONNECTIONS, - }); - - return clickhouse; -} - -export const logsClickhouseClient = singleton( - "logsClickhouseClient", - initializeLogsClickhouseClient -); - -function initializeLogsClickhouseClient() { - if (!env.LOGS_CLICKHOUSE_URL) { - throw new Error("LOGS_CLICKHOUSE_URL is not set"); - } - - const url = new URL(env.LOGS_CLICKHOUSE_URL); - - // Remove secure param - url.searchParams.delete("secure"); - - return new ClickHouse({ - url: url.toString(), - name: "logs-clickhouse", - keepAlive: { - enabled: env.CLICKHOUSE_KEEP_ALIVE_ENABLED === "1", - idleSocketTtl: env.CLICKHOUSE_KEEP_ALIVE_IDLE_SOCKET_TTL_MS, - }, - logLevel: env.CLICKHOUSE_LOG_LEVEL, - compression: { - request: true, - }, - maxOpenConnections: env.CLICKHOUSE_MAX_OPEN_CONNECTIONS, - clickhouseSettings: { - max_memory_usage: env.CLICKHOUSE_LOGS_LIST_MAX_MEMORY_USAGE.toString(), - max_bytes_before_external_sort: - env.CLICKHOUSE_LOGS_LIST_MAX_BYTES_BEFORE_EXTERNAL_SORT.toString(), - max_threads: env.CLICKHOUSE_LOGS_LIST_MAX_THREADS, - ...(env.CLICKHOUSE_LOGS_LIST_MAX_ROWS_TO_READ && { - max_rows_to_read: env.CLICKHOUSE_LOGS_LIST_MAX_ROWS_TO_READ.toString(), - }), - ...(env.CLICKHOUSE_LOGS_LIST_MAX_EXECUTION_TIME && { - max_execution_time: env.CLICKHOUSE_LOGS_LIST_MAX_EXECUTION_TIME, - }), - }, - }); -} - -export const adminClickhouseClient = singleton( - "adminClickhouseClient", - initializeAdminClickhouseClient -); - -function initializeAdminClickhouseClient() { - if (!env.ADMIN_CLICKHOUSE_URL) { - throw new Error("ADMIN_CLICKHOUSE_URL is not set"); - } - - const url = new URL(env.ADMIN_CLICKHOUSE_URL); - url.searchParams.delete("secure"); - - return new ClickHouse({ - url: url.toString(), - name: "admin-clickhouse", - keepAlive: { - enabled: env.CLICKHOUSE_KEEP_ALIVE_ENABLED === "1", - idleSocketTtl: env.CLICKHOUSE_KEEP_ALIVE_IDLE_SOCKET_TTL_MS, - }, - logLevel: env.CLICKHOUSE_LOG_LEVEL, - compression: { - request: true, - }, - maxOpenConnections: env.CLICKHOUSE_MAX_OPEN_CONNECTIONS, - }); -} - -export const queryClickhouseClient = singleton( - "queryClickhouseClient", - initializeQueryClickhouseClient -); - -function initializeQueryClickhouseClient() { - if (!env.QUERY_CLICKHOUSE_URL) { - throw new Error("QUERY_CLICKHOUSE_URL is not set"); - } - - const url = new URL(env.QUERY_CLICKHOUSE_URL); - - // Remove secure param - url.searchParams.delete("secure"); - - return new ClickHouse({ - url: url.toString(), - name: "query-clickhouse", - keepAlive: { - enabled: env.CLICKHOUSE_KEEP_ALIVE_ENABLED === "1", - idleSocketTtl: env.CLICKHOUSE_KEEP_ALIVE_IDLE_SOCKET_TTL_MS, - }, - logLevel: env.CLICKHOUSE_LOG_LEVEL, - compression: { - request: true, - }, - maxOpenConnections: env.CLICKHOUSE_MAX_OPEN_CONNECTIONS, - }); -} diff --git a/apps/webapp/app/services/queryService.server.ts b/apps/webapp/app/services/queryService.server.ts index 1f3bdbba18a..f24df9eb023 100644 --- a/apps/webapp/app/services/queryService.server.ts +++ b/apps/webapp/app/services/queryService.server.ts @@ -11,7 +11,7 @@ import type { TableSchema, WhereClauseCondition } from "@internal/tsql"; import { z } from "zod"; import { prisma } from "~/db.server"; import { env } from "~/env.server"; -import { queryClickhouseClient } from "./clickhouseInstance.server"; +import { getClickhouseForOrganization } from "./clickhouse/clickhouseFactory.server"; import { queryConcurrencyLimiter, DEFAULT_ORG_CONCURRENCY_LIMIT, @@ -275,7 +275,8 @@ export async function executeQuery( environment: Object.fromEntries(environments.map((e) => [e.id, e.slug])), }; - const result = await executeTSQL(queryClickhouseClient.reader, { + const queryClickhouse = await getClickhouseForOrganization(organizationId, "query"); + const result = await executeTSQL(queryClickhouse.reader, { ...baseOptions, schema: z.record(z.any()), tableSchema: querySchemas, diff --git a/apps/webapp/app/services/runsReplicationService.server.ts b/apps/webapp/app/services/runsReplicationService.server.ts index 167564572eb..15cec9c7664 100644 --- a/apps/webapp/app/services/runsReplicationService.server.ts +++ b/apps/webapp/app/services/runsReplicationService.server.ts @@ -617,18 +617,65 @@ export class RunsReplicationService { payloadInserts: payloadInserts.length, }); + // Group task runs by organization for routing to correct ClickHouse instance + const taskRunsByOrg = new Map(); + for (const taskRun of taskRunInserts) { + const orgId = getTaskRunField(taskRun, "organization_id"); + const orgRuns = taskRunsByOrg.get(orgId) || []; + orgRuns.push(taskRun); + taskRunsByOrg.set(orgId, orgRuns); + } + + // Group payloads by organization (extract from run_id -> task runs mapping) + const payloadsByOrg = new Map(); + for (const payload of payloadInserts) { + const runId = getPayloadField(payload, "run_id"); + // Find the corresponding task run to get its organization + const taskRun = taskRunInserts.find((tr) => getTaskRunField(tr, "run_id") === runId); + if (taskRun) { + const orgId = getTaskRunField(taskRun, "organization_id"); + const orgPayloads = payloadsByOrg.get(orgId) || []; + orgPayloads.push(payload); + payloadsByOrg.set(orgId, orgPayloads); + } + } + // Insert task runs and payloads with retry logic for connection errors - const [taskRunError, taskRunResult] = await this.#insertWithRetry( - (attempt) => this.#insertTaskRunInserts(taskRunInserts, attempt), - "task run inserts", - flushId + // Process each organization's data in parallel + const insertPromises = Array.from(taskRunsByOrg.entries()).map( + async ([orgId, orgTaskRuns]) => { + const orgPayloads = payloadsByOrg.get(orgId) || []; + + const [taskRunError, taskRunResult] = await this.#insertWithRetry( + (attempt) => this.#insertTaskRunInserts(orgId, orgTaskRuns, attempt), + "task run inserts", + flushId + ); + + const [payloadError, payloadResult] = await this.#insertWithRetry( + (attempt) => this.#insertPayloadInserts(orgId, orgPayloads, attempt), + "payload inserts", + flushId + ); + + return { taskRunError, payloadError, orgId }; + } ); - const [payloadError, payloadResult] = await this.#insertWithRetry( - (attempt) => this.#insertPayloadInserts(payloadInserts, attempt), - "payload inserts", - flushId - ); + const results = await Promise.all(insertPromises); + + // Aggregate errors from all organizations + let taskRunError: Error | null = null; + let payloadError: Error | null = null; + + for (const result of results) { + if (result.taskRunError) { + taskRunError = result.taskRunError; + } + if (result.payloadError) { + payloadError = result.payloadError; + } + } // Log any errors that occurred if (taskRunError) { @@ -770,19 +817,32 @@ export class RunsReplicationService { }; } - async #insertTaskRunInserts(taskRunInserts: TaskRunInsertArray[], attempt: number) { + async #insertTaskRunInserts( + organizationId: string, + taskRunInserts: TaskRunInsertArray[], + attempt: number + ) { return await startSpan(this._tracer, "insertTaskRunsInserts", async (span) => { - const [insertError, insertResult] = - await this.options.clickhouse.taskRuns.insertCompactArrays(taskRunInserts, { + // Get the appropriate ClickHouse client for this organization + const { getClickhouseForOrganization } = await import( + "~/services/clickhouse/clickhouseFactory.server" + ); + const clickhouse = await getClickhouseForOrganization(organizationId, "replication"); + + const [insertError, insertResult] = await clickhouse.taskRuns.insertCompactArrays( + taskRunInserts, + { params: { clickhouse_settings: this.#getClickhouseInsertSettings(), }, - }); + } + ); if (insertError) { this.logger.error("Error inserting task run inserts attempt", { error: insertError, attempt, + organizationId, }); recordSpanError(span, insertError); @@ -793,19 +853,32 @@ export class RunsReplicationService { }); } - async #insertPayloadInserts(payloadInserts: PayloadInsertArray[], attempt: number) { + async #insertPayloadInserts( + organizationId: string, + payloadInserts: PayloadInsertArray[], + attempt: number + ) { return await startSpan(this._tracer, "insertPayloadInserts", async (span) => { - const [insertError, insertResult] = - await this.options.clickhouse.taskRuns.insertPayloadsCompactArrays(payloadInserts, { + // Get the appropriate ClickHouse client for this organization + const { getClickhouseForOrganization } = await import( + "~/services/clickhouse/clickhouseFactory.server" + ); + const clickhouse = await getClickhouseForOrganization(organizationId, "replication"); + + const [insertError, insertResult] = await clickhouse.taskRuns.insertPayloadsCompactArrays( + payloadInserts, + { params: { clickhouse_settings: this.#getClickhouseInsertSettings(), }, - }); + } + ); if (insertError) { this.logger.error("Error inserting payload inserts attempt", { error: insertError, attempt, + organizationId, }); recordSpanError(span, insertError); diff --git a/apps/webapp/app/v3/otlpExporter.server.ts b/apps/webapp/app/v3/otlpExporter.server.ts index 22dba93f22e..5572a6130e5 100644 --- a/apps/webapp/app/v3/otlpExporter.server.ts +++ b/apps/webapp/app/v3/otlpExporter.server.ts @@ -20,7 +20,6 @@ import { } from "@trigger.dev/otlp-importer"; import type { MetricsV1Input } from "@internal/clickhouse"; import { logger } from "~/services/logger.server"; -import { clickhouseClient } from "~/services/clickhouseInstance.server"; import { DynamicFlushScheduler } from "./dynamicFlushScheduler.server"; import { ClickhouseEventRepository } from "./eventRepository/clickhouseEventRepository.server"; import { @@ -117,21 +116,26 @@ class OTLPExporter { async #exportEvents( eventsWithStores: { events: Array; taskEventStore: string }[] ) { - const eventsGroupedByStore = eventsWithStores.reduce((acc, { events, taskEventStore }) => { - acc[taskEventStore] = acc[taskEventStore] || []; - acc[taskEventStore].push(...events); + // Group events by both store and organization for proper routing + const eventsGroupedByStoreAndOrg = eventsWithStores.reduce((acc, { events, taskEventStore }) => { + for (const event of events) { + const orgId = event.organizationId || "default"; + const key = `${taskEventStore}:${orgId}`; + acc[key] = acc[key] || { store: taskEventStore, orgId, events: [] }; + acc[key].events.push(event); + } return acc; - }, {} as Record>); + }, {} as Record }>); let eventCount = 0; - for (const [store, events] of Object.entries(eventsGroupedByStore)) { - const eventRepository = this.#getEventRepositoryForStore(store); + for (const { store, orgId, events } of Object.values(eventsGroupedByStoreAndOrg)) { + const eventRepository = await this.#getEventRepositoryForStoreAndOrg(store, orgId); await waitForLlmPricingReady(); const enrichedEvents = enrichCreatableEvents(events); - this.#logEventsVerbose(enrichedEvents, `exportEvents ${store}`); + this.#logEventsVerbose(enrichedEvents, `exportEvents ${store}:${orgId}`); eventCount += enrichedEvents.length; @@ -141,6 +145,19 @@ class OTLPExporter { return eventCount; } + async #getEventRepositoryForStoreAndOrg(store: string, orgId: string): Promise { + // For ClickHouse stores with a specific org (not "default"), use org-specific repository + if ((store === "clickhouse" || store === "clickhouse_v2") && orgId !== "default") { + const { getEventRepositoryForOrganization } = await import( + "~/services/clickhouse/clickhouseFactory.server" + ); + return await getEventRepositoryForOrganization(orgId); + } + + // Fall back to default repositories for non-ClickHouse stores or default org + return this.#getEventRepositoryForStore(store); + } + #getEventRepositoryForStore(store: string): IEventRepository { if (store === "clickhouse") { return this._clickhouseEventRepository; @@ -1171,12 +1188,22 @@ function hasUnpairedSurrogateAtEnd(str: string): boolean { export const otlpExporter = singleton("otlpExporter", initializeOTLPExporter); -function initializeOTLPExporter() { +async function initializeOTLPExporter() { + // Metrics are written globally (not per-org), use standard clickhouse + // We use a dummy org ID since metrics table is global + const { getClickhouseForOrganization } = await import( + "~/services/clickhouse/clickhouseFactory.server" + ); + + // Use a sentinel org ID for global metrics writes + // In practice, all orgs currently share the same metrics table/instance + const metricsClickhouse = await getClickhouseForOrganization("METRICS_GLOBAL", "standard"); + const metricsFlushScheduler = new DynamicFlushScheduler({ batchSize: env.METRICS_CLICKHOUSE_BATCH_SIZE, flushInterval: env.METRICS_CLICKHOUSE_FLUSH_INTERVAL_MS, callback: async (_flushId, batch) => { - await clickhouseClient.metrics.insert(batch); + await metricsClickhouse.metrics.insert(batch); }, minConcurrency: 1, maxConcurrency: env.METRICS_CLICKHOUSE_MAX_CONCURRENCY, diff --git a/apps/webapp/app/v3/services/bulk/BulkActionV2.server.ts b/apps/webapp/app/v3/services/bulk/BulkActionV2.server.ts index 156b68bff59..07a4286297f 100644 --- a/apps/webapp/app/v3/services/bulk/BulkActionV2.server.ts +++ b/apps/webapp/app/v3/services/bulk/BulkActionV2.server.ts @@ -7,7 +7,7 @@ import { } from "@trigger.dev/database"; import { getRunFiltersFromRequest } from "~/presenters/RunFilters.server"; import { type CreateBulkActionPayload } from "~/routes/resources.orgs.$organizationSlug.projects.$projectParam.env.$envParam.runs.bulkaction"; -import { clickhouseClient } from "~/services/clickhouseInstance.server"; +import { getClickhouseForOrganization } from "~/services/clickhouse/clickhouseFactory.server"; import { parseRunListInputOptions, type RunListInputFilters, @@ -38,8 +38,9 @@ export class BulkActionService extends BaseService { const filters = await getFilters(payload, request); // Count the runs that will be affected by the bulk action + const clickhouse = await getClickhouseForOrganization(organizationId, "standard"); const runsRepository = new RunsRepository({ - clickhouse: clickhouseClient, + clickhouse, prisma: this._replica as PrismaClient, }); const count = await runsRepository.countRuns({ @@ -147,8 +148,9 @@ export class BulkActionService extends BaseService { ...rawParams, }); + const clickhouse = await getClickhouseForOrganization(group.project.organizationId, "standard"); const runsRepository = new RunsRepository({ - clickhouse: clickhouseClient, + clickhouse, prisma: this._replica as PrismaClient, }); From d0d1f0e4973853d52377033b55c37a24f285a1e5 Mon Sep 17 00:00:00 2001 From: Matt Aitken Date: Thu, 26 Mar 2026 17:32:05 +0000 Subject: [PATCH 02/34] Better replication performance --- .../services/runsReplicationService.server.ts | 102 +++++++++++------- 1 file changed, 63 insertions(+), 39 deletions(-) diff --git a/apps/webapp/app/services/runsReplicationService.server.ts b/apps/webapp/app/services/runsReplicationService.server.ts index 15cec9c7664..cc7c1139d15 100644 --- a/apps/webapp/app/services/runsReplicationService.server.ts +++ b/apps/webapp/app/services/runsReplicationService.server.ts @@ -29,6 +29,7 @@ import EventEmitter from "node:events"; import pLimit from "p-limit"; import { detectBadJsonStrings } from "~/utils/detectBadJsonStrings"; import { calculateErrorFingerprint } from "~/utils/errorFingerprinting"; +import { getClickhouseForOrganization } from "~/services/clickhouse/clickhouseFactory.server"; interface TransactionEvent { tag: "insert" | "update" | "delete"; @@ -617,51 +618,59 @@ export class RunsReplicationService { payloadInserts: payloadInserts.length, }); - // Group task runs by organization for routing to correct ClickHouse instance - const taskRunsByOrg = new Map(); - for (const taskRun of taskRunInserts) { - const orgId = getTaskRunField(taskRun, "organization_id"); - const orgRuns = taskRunsByOrg.get(orgId) || []; - orgRuns.push(taskRun); - taskRunsByOrg.set(orgId, orgRuns); - } + // Task runs are already sorted by org (lines 571-576), so we can stream through + // and flush when org changes - no grouping overhead, no O(nΒ²) lookups - // Group payloads by organization (extract from run_id -> task runs mapping) + // Build run_id -> org_id index for O(1) payload->org lookups + const runIdToOrgId = new Map( + taskRunInserts.map(tr => [getTaskRunField(tr, "run_id"), getTaskRunField(tr, "organization_id")]) + ); + + // Group payloads by org using the index (O(n) instead of O(nΒ²)) const payloadsByOrg = new Map(); for (const payload of payloadInserts) { const runId = getPayloadField(payload, "run_id"); - // Find the corresponding task run to get its organization - const taskRun = taskRunInserts.find((tr) => getTaskRunField(tr, "run_id") === runId); - if (taskRun) { - const orgId = getTaskRunField(taskRun, "organization_id"); - const orgPayloads = payloadsByOrg.get(orgId) || []; - orgPayloads.push(payload); - payloadsByOrg.set(orgId, orgPayloads); + const orgId = runIdToOrgId.get(runId); + if (orgId) { + const orgPayloads = payloadsByOrg.get(orgId); + if (orgPayloads) { + orgPayloads.push(payload); + } else { + payloadsByOrg.set(orgId, [payload]); + } } } - // Insert task runs and payloads with retry logic for connection errors - // Process each organization's data in parallel - const insertPromises = Array.from(taskRunsByOrg.entries()).map( - async ([orgId, orgTaskRuns]) => { - const orgPayloads = payloadsByOrg.get(orgId) || []; + // Stream through task runs, flushing when org changes + const insertPromises: Promise<{ taskRunError: Error | null; payloadError: Error | null; orgId: string }>[] = []; + let currentOrgId: string | null = null; + let currentOrgTaskRuns: TaskRunInsertArray[] = []; - const [taskRunError, taskRunResult] = await this.#insertWithRetry( - (attempt) => this.#insertTaskRunInserts(orgId, orgTaskRuns, attempt), - "task run inserts", - flushId - ); + for (const taskRun of taskRunInserts) { + const orgId = getTaskRunField(taskRun, "organization_id"); - const [payloadError, payloadResult] = await this.#insertWithRetry( - (attempt) => this.#insertPayloadInserts(orgId, orgPayloads, attempt), - "payload inserts", - flushId + // Org changed? Flush previous org's batch + if (currentOrgId !== null && currentOrgId !== orgId) { + const orgPayloads = payloadsByOrg.get(currentOrgId) || []; + insertPromises.push( + this.#insertOrgBatch(currentOrgId, currentOrgTaskRuns, orgPayloads, flushId) ); - - return { taskRunError, payloadError, orgId }; + currentOrgTaskRuns = []; } - ); + currentOrgId = orgId; + currentOrgTaskRuns.push(taskRun); + } + + // Flush final org's batch + if (currentOrgId !== null && currentOrgTaskRuns.length > 0) { + const orgPayloads = payloadsByOrg.get(currentOrgId) || []; + insertPromises.push( + this.#insertOrgBatch(currentOrgId, currentOrgTaskRuns, orgPayloads, flushId) + ); + } + + // Wait for all org batches to complete (parallel execution) const results = await Promise.all(insertPromises); // Aggregate errors from all organizations @@ -817,6 +826,27 @@ export class RunsReplicationService { }; } + async #insertOrgBatch( + organizationId: string, + taskRunInserts: TaskRunInsertArray[], + payloadInserts: PayloadInsertArray[], + flushId: string + ): Promise<{ taskRunError: Error | null; payloadError: Error | null; orgId: string }> { + const [taskRunError] = await this.#insertWithRetry( + (attempt) => this.#insertTaskRunInserts(organizationId, taskRunInserts, attempt), + "task run inserts", + flushId + ); + + const [payloadError] = await this.#insertWithRetry( + (attempt) => this.#insertPayloadInserts(organizationId, payloadInserts, attempt), + "payload inserts", + flushId + ); + + return { taskRunError, payloadError, orgId: organizationId }; + } + async #insertTaskRunInserts( organizationId: string, taskRunInserts: TaskRunInsertArray[], @@ -824,9 +854,6 @@ export class RunsReplicationService { ) { return await startSpan(this._tracer, "insertTaskRunsInserts", async (span) => { // Get the appropriate ClickHouse client for this organization - const { getClickhouseForOrganization } = await import( - "~/services/clickhouse/clickhouseFactory.server" - ); const clickhouse = await getClickhouseForOrganization(organizationId, "replication"); const [insertError, insertResult] = await clickhouse.taskRuns.insertCompactArrays( @@ -860,9 +887,6 @@ export class RunsReplicationService { ) { return await startSpan(this._tracer, "insertPayloadInserts", async (span) => { // Get the appropriate ClickHouse client for this organization - const { getClickhouseForOrganization } = await import( - "~/services/clickhouse/clickhouseFactory.server" - ); const clickhouse = await getClickhouseForOrganization(organizationId, "replication"); const [insertError, insertResult] = await clickhouse.taskRuns.insertPayloadsCompactArrays( From 94c9a83be1513c93885b81b676bbdf445ac4d742 Mon Sep 17 00:00:00 2001 From: Matt Aitken Date: Thu, 26 Mar 2026 17:32:19 +0000 Subject: [PATCH 03/34] Removed dynamic imports --- CLAUDE.md | 11 +++++++++++ apps/webapp/app/v3/otlpExporter.server.ts | 9 +-------- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/CLAUDE.md b/CLAUDE.md index 99b5c4c4033..d6aa9d5e3a4 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -69,6 +69,17 @@ containerTest("should use both", async ({ prisma, redisOptions }) => { }); ``` +## Code Style + +### Imports + +**Prefer static imports over dynamic imports.** Only use dynamic `import()` when: +- Circular dependencies cannot be resolved otherwise +- Code splitting is genuinely needed for performance +- The module must be loaded conditionally at runtime + +Dynamic imports add unnecessary overhead in hot paths and make code harder to analyze. If you find yourself using `await import()`, ask if a regular `import` statement would work instead. + ## Changesets and Server Changes When modifying any public package (`packages/*` or `integrations/*`), add a changeset: diff --git a/apps/webapp/app/v3/otlpExporter.server.ts b/apps/webapp/app/v3/otlpExporter.server.ts index 5572a6130e5..4f73312d638 100644 --- a/apps/webapp/app/v3/otlpExporter.server.ts +++ b/apps/webapp/app/v3/otlpExporter.server.ts @@ -39,6 +39,7 @@ import { enrichCreatableEvents } from "./utils/enrichCreatableEvents.server"; import { waitForLlmPricingReady } from "./llmPricingRegistry.server"; import { env } from "~/env.server"; import { singleton } from "~/utils/singleton"; +import { getClickhouseForOrganization, getEventRepositoryForOrganization } from "~/services/clickhouse/clickhouseFactory.server"; class OTLPExporter { private _tracer: Tracer; @@ -148,9 +149,6 @@ class OTLPExporter { async #getEventRepositoryForStoreAndOrg(store: string, orgId: string): Promise { // For ClickHouse stores with a specific org (not "default"), use org-specific repository if ((store === "clickhouse" || store === "clickhouse_v2") && orgId !== "default") { - const { getEventRepositoryForOrganization } = await import( - "~/services/clickhouse/clickhouseFactory.server" - ); return await getEventRepositoryForOrganization(orgId); } @@ -1190,11 +1188,6 @@ export const otlpExporter = singleton("otlpExporter", initializeOTLPExporter); async function initializeOTLPExporter() { // Metrics are written globally (not per-org), use standard clickhouse - // We use a dummy org ID since metrics table is global - const { getClickhouseForOrganization } = await import( - "~/services/clickhouse/clickhouseFactory.server" - ); - // Use a sentinel org ID for global metrics writes // In practice, all orgs currently share the same metrics table/instance const metricsClickhouse = await getClickhouseForOrganization("METRICS_GLOBAL", "standard"); From 49bf0f72203e259dfc0c6845eeead707660e0749 Mon Sep 17 00:00:00 2001 From: Matt Aitken Date: Fri, 27 Mar 2026 16:48:56 +0000 Subject: [PATCH 04/34] otlpExporter.server reverted to main --- apps/webapp/app/v3/otlpExporter.server.ts | 40 ++++++----------------- 1 file changed, 10 insertions(+), 30 deletions(-) diff --git a/apps/webapp/app/v3/otlpExporter.server.ts b/apps/webapp/app/v3/otlpExporter.server.ts index 4f73312d638..22dba93f22e 100644 --- a/apps/webapp/app/v3/otlpExporter.server.ts +++ b/apps/webapp/app/v3/otlpExporter.server.ts @@ -20,6 +20,7 @@ import { } from "@trigger.dev/otlp-importer"; import type { MetricsV1Input } from "@internal/clickhouse"; import { logger } from "~/services/logger.server"; +import { clickhouseClient } from "~/services/clickhouseInstance.server"; import { DynamicFlushScheduler } from "./dynamicFlushScheduler.server"; import { ClickhouseEventRepository } from "./eventRepository/clickhouseEventRepository.server"; import { @@ -39,7 +40,6 @@ import { enrichCreatableEvents } from "./utils/enrichCreatableEvents.server"; import { waitForLlmPricingReady } from "./llmPricingRegistry.server"; import { env } from "~/env.server"; import { singleton } from "~/utils/singleton"; -import { getClickhouseForOrganization, getEventRepositoryForOrganization } from "~/services/clickhouse/clickhouseFactory.server"; class OTLPExporter { private _tracer: Tracer; @@ -117,26 +117,21 @@ class OTLPExporter { async #exportEvents( eventsWithStores: { events: Array; taskEventStore: string }[] ) { - // Group events by both store and organization for proper routing - const eventsGroupedByStoreAndOrg = eventsWithStores.reduce((acc, { events, taskEventStore }) => { - for (const event of events) { - const orgId = event.organizationId || "default"; - const key = `${taskEventStore}:${orgId}`; - acc[key] = acc[key] || { store: taskEventStore, orgId, events: [] }; - acc[key].events.push(event); - } + const eventsGroupedByStore = eventsWithStores.reduce((acc, { events, taskEventStore }) => { + acc[taskEventStore] = acc[taskEventStore] || []; + acc[taskEventStore].push(...events); return acc; - }, {} as Record }>); + }, {} as Record>); let eventCount = 0; - for (const { store, orgId, events } of Object.values(eventsGroupedByStoreAndOrg)) { - const eventRepository = await this.#getEventRepositoryForStoreAndOrg(store, orgId); + for (const [store, events] of Object.entries(eventsGroupedByStore)) { + const eventRepository = this.#getEventRepositoryForStore(store); await waitForLlmPricingReady(); const enrichedEvents = enrichCreatableEvents(events); - this.#logEventsVerbose(enrichedEvents, `exportEvents ${store}:${orgId}`); + this.#logEventsVerbose(enrichedEvents, `exportEvents ${store}`); eventCount += enrichedEvents.length; @@ -146,16 +141,6 @@ class OTLPExporter { return eventCount; } - async #getEventRepositoryForStoreAndOrg(store: string, orgId: string): Promise { - // For ClickHouse stores with a specific org (not "default"), use org-specific repository - if ((store === "clickhouse" || store === "clickhouse_v2") && orgId !== "default") { - return await getEventRepositoryForOrganization(orgId); - } - - // Fall back to default repositories for non-ClickHouse stores or default org - return this.#getEventRepositoryForStore(store); - } - #getEventRepositoryForStore(store: string): IEventRepository { if (store === "clickhouse") { return this._clickhouseEventRepository; @@ -1186,17 +1171,12 @@ function hasUnpairedSurrogateAtEnd(str: string): boolean { export const otlpExporter = singleton("otlpExporter", initializeOTLPExporter); -async function initializeOTLPExporter() { - // Metrics are written globally (not per-org), use standard clickhouse - // Use a sentinel org ID for global metrics writes - // In practice, all orgs currently share the same metrics table/instance - const metricsClickhouse = await getClickhouseForOrganization("METRICS_GLOBAL", "standard"); - +function initializeOTLPExporter() { const metricsFlushScheduler = new DynamicFlushScheduler({ batchSize: env.METRICS_CLICKHOUSE_BATCH_SIZE, flushInterval: env.METRICS_CLICKHOUSE_FLUSH_INTERVAL_MS, callback: async (_flushId, batch) => { - await metricsClickhouse.metrics.insert(batch); + await clickhouseClient.metrics.insert(batch); }, minConcurrency: 1, maxConcurrency: env.METRICS_CLICKHOUSE_MAX_CONCURRENCY, From 795d6036e01b52651f25b39591b6e9cbbdb7a623 Mon Sep 17 00:00:00 2001 From: Matt Aitken Date: Tue, 31 Mar 2026 22:44:36 +0100 Subject: [PATCH 05/34] Switch to a DataStore registry --- apps/webapp/app/env.server.ts | 3 + .../clickhouseCredentialsService.server.ts | 109 ------ .../clickhouse/clickhouseFactory.server.ts | 340 ++++++++---------- ...ganizationDataStoreConfigSchemas.server.ts | 35 ++ .../organizationDataStoresRegistry.server.ts | 82 +++++ ...zationDataStoresRegistryInstance.server.ts | 46 +++ .../migration.sql | 21 ++ .../database/prisma/schema.prisma | 23 +- 8 files changed, 362 insertions(+), 297 deletions(-) delete mode 100644 apps/webapp/app/services/clickhouse/clickhouseCredentialsService.server.ts create mode 100644 apps/webapp/app/services/dataStores/organizationDataStoreConfigSchemas.server.ts create mode 100644 apps/webapp/app/services/dataStores/organizationDataStoresRegistry.server.ts create mode 100644 apps/webapp/app/services/dataStores/organizationDataStoresRegistryInstance.server.ts create mode 100644 internal-packages/database/prisma/migrations/20260331212308_add_organization_data_stores/migration.sql diff --git a/apps/webapp/app/env.server.ts b/apps/webapp/app/env.server.ts index 6fb6c4ac283..f57b3105a7f 100644 --- a/apps/webapp/app/env.server.ts +++ b/apps/webapp/app/env.server.ts @@ -1489,6 +1489,9 @@ const EnvironmentSchema = z EVENTS_CLICKHOUSE_MAX_TRACE_DETAILED_SUMMARY_VIEW_COUNT: z.coerce.number().int().default(5_000), EVENTS_CLICKHOUSE_MAX_LIVE_RELOADING_SETTING: z.coerce.number().int().default(2000), + // Organization data stores registry + ORGANIZATION_DATA_STORES_RELOAD_INTERVAL_MS: z.coerce.number().int().default(5 * 60 * 1000), // 5 minutes + // LLM cost tracking LLM_COST_TRACKING_ENABLED: BoolEnv.default(true), LLM_PRICING_RELOAD_INTERVAL_MS: z.coerce.number().int().default(5 * 60 * 1000), // 5 minutes diff --git a/apps/webapp/app/services/clickhouse/clickhouseCredentialsService.server.ts b/apps/webapp/app/services/clickhouse/clickhouseCredentialsService.server.ts deleted file mode 100644 index c2c8c77f7c4..00000000000 --- a/apps/webapp/app/services/clickhouse/clickhouseCredentialsService.server.ts +++ /dev/null @@ -1,109 +0,0 @@ -import { getSecretStore } from "~/services/secrets/secretStore.server"; -import { prisma } from "~/db.server"; -import { - ClickhouseConnectionSchema, - getClickhouseSecretKey, -} from "./clickhouseSecretSchemas.server"; -import { clearClickhouseCacheForOrganization } from "./clickhouseFactory.server"; - -export async function setOrganizationClickhouseUrl( - organizationId: string, - clientType: "standard" | "events" | "replication", - url: string -): Promise { - // Validate URL format - const connection = ClickhouseConnectionSchema.parse({ url }); - - // Store in SecretStore - const secretStore = getSecretStore("DATABASE"); - const secretKey = getClickhouseSecretKey(organizationId, clientType); - await secretStore.setSecret(secretKey, connection); - - // Update featureFlags to reference the secret - const org = await prisma.organization.findUnique({ - where: { id: organizationId }, - select: { featureFlags: true }, - }); - - const featureFlags = (org?.featureFlags || {}) as any; - const clickhouseConfig = featureFlags.clickhouse || {}; - clickhouseConfig[clientType] = secretKey; - featureFlags.clickhouse = clickhouseConfig; - - await prisma.organization.update({ - where: { id: organizationId }, - data: { featureFlags }, - }); - - // Clear cache - clearClickhouseCacheForOrganization(organizationId); -} - -export async function removeOrganizationClickhouseUrl( - organizationId: string, - clientType: "standard" | "events" | "replication" -): Promise { - // Remove from SecretStore - const secretStore = getSecretStore("DATABASE"); - const secretKey = getClickhouseSecretKey(organizationId, clientType); - await secretStore.deleteSecret(secretKey); - - // Update featureFlags - const org = await prisma.organization.findUnique({ - where: { id: organizationId }, - select: { featureFlags: true }, - }); - - if (org?.featureFlags) { - const featureFlags = org.featureFlags as any; - if (featureFlags.clickhouse && featureFlags.clickhouse[clientType]) { - delete featureFlags.clickhouse[clientType]; - - // If no more clickhouse configs, remove the clickhouse key entirely - if (Object.keys(featureFlags.clickhouse).length === 0) { - delete featureFlags.clickhouse; - } - - await prisma.organization.update({ - where: { id: organizationId }, - data: { featureFlags }, - }); - } - } - - // Clear cache - clearClickhouseCacheForOrganization(organizationId); -} - -export async function getOrganizationClickhouseUrl( - organizationId: string, - clientType: "standard" | "events" | "replication" -): Promise { - const org = await prisma.organization.findUnique({ - where: { id: organizationId }, - select: { featureFlags: true }, - }); - - if (!org?.featureFlags) { - return null; - } - - const clickhouseConfig = (org.featureFlags as any).clickhouse; - if (!clickhouseConfig || typeof clickhouseConfig !== "object") { - return null; - } - - const secretKey = clickhouseConfig[clientType]; - if (!secretKey || typeof secretKey !== "string") { - return null; - } - - const secretStore = getSecretStore("DATABASE"); - const connection = await secretStore.getSecret(ClickhouseConnectionSchema, secretKey); - - if (!connection) { - return null; - } - - return connection.url; -} diff --git a/apps/webapp/app/services/clickhouse/clickhouseFactory.server.ts b/apps/webapp/app/services/clickhouse/clickhouseFactory.server.ts index 94498868759..69a7555384a 100644 --- a/apps/webapp/app/services/clickhouse/clickhouseFactory.server.ts +++ b/apps/webapp/app/services/clickhouse/clickhouseFactory.server.ts @@ -11,61 +11,45 @@ * * ### Credential Storage * - ClickHouse URLs stored encrypted in SecretStore (AES-256-GCM) - * - Organization references secret via `featureFlags.clickhouse` JSON + * - Organization data store overrides live in the `OrganizationDataStore` table + * - The config JSON stores a `secretKey` that references the SecretStore entry * - No plaintext credentials in database * * ### Caching Strategy - * - **Org configs**: Unkey cache with LRU memory (5min fresh, 10min stale, SWR) - * - **ClickHouse clients**: Cached by hostname hash (multiple orgs share same instance) - * - **Event repositories**: Cached by hostname hash (stateful, must be reused) - * - **Security**: Memory-only cache for org configs (no credentials in Redis) + * - **Org β†’ data store mapping**: `OrganizationDataStoresRegistry` (in-memory Map, reloaded + * periodically via setInterval) + * - **SecretKey β†’ resolved URL**: module-level Map (persists for process lifetime) + * - **ClickHouse clients**: cached by hostname hash (multiple orgs share same instance) + * - **Event repositories**: cached by hostname hash (stateful, must be reused) * * ## Usage in Presenters * - * Presenters should fetch org-specific ClickHouse clients in their `call()` method: - * * ```typescript * import { getClickhouseForOrganization } from "~/services/clickhouse/clickhouseFactory.server"; * * export class MyPresenter extends BasePresenter { - * constructor(private options: PresenterOptions = {}) { - * super(); - * } - * * async call({ organizationId, ... }) { * const clickhouse = await getClickhouseForOrganization(organizationId, "standard"); - * // Use clickhouse for queries... * } * } * ``` * - * ## Usage in Services - * - * The replication service and OTLP exporter automatically route data by organization. - * Other services should follow the same pattern when working with ClickHouse. - * * @module clickhouseFactory */ import { ClickHouse } from "@internal/clickhouse"; import { createHash } from "crypto"; -import { createCache, DefaultStatefulContext, Namespace } from "@unkey/cache"; -import { createLRUMemoryStore } from "@internal/cache"; import { getSecretStore } from "~/services/secrets/secretStore.server"; -import { prisma } from "~/db.server"; -import { - ClickhouseConnectionSchema, - getClickhouseSecretKey, -} from "./clickhouseSecretSchemas.server"; +import { ClickhouseConnectionSchema } from "./clickhouseSecretSchemas.server"; import { ClickhouseEventRepository } from "~/v3/eventRepository/clickhouseEventRepository.server"; import { env } from "~/env.server"; import { singleton } from "~/utils/singleton"; +import { organizationDataStoresRegistry } from "~/services/dataStores/organizationDataStoresRegistryInstance.server"; -// Module-level caches for ClickHouse clients and event repositories -const clickhouseClientCache = new Map(); -const eventRepositoryCache = new Map(); +// --------------------------------------------------------------------------- +// Default clients (singleton per process) +// --------------------------------------------------------------------------- -// Default ClickHouse clients (not exported - internal use only) const defaultClickhouseClient = singleton("clickhouseClient", initializeClickhouseClient); function initializeClickhouseClient() { @@ -82,9 +66,7 @@ function initializeClickhouseClient() { idleSocketTtl: env.CLICKHOUSE_KEEP_ALIVE_IDLE_SOCKET_TTL_MS, }, logLevel: env.CLICKHOUSE_LOG_LEVEL, - compression: { - request: true, - }, + compression: { request: true }, maxOpenConnections: env.CLICKHOUSE_MAX_OPEN_CONNECTIONS, }); } @@ -110,9 +92,7 @@ function initializeLogsClickhouseClient() { idleSocketTtl: env.CLICKHOUSE_KEEP_ALIVE_IDLE_SOCKET_TTL_MS, }, logLevel: env.CLICKHOUSE_LOG_LEVEL, - compression: { - request: true, - }, + compression: { request: true }, maxOpenConnections: env.CLICKHOUSE_MAX_OPEN_CONNECTIONS, clickhouseSettings: { max_memory_usage: env.CLICKHOUSE_LOGS_LIST_MAX_MEMORY_USAGE.toString(), @@ -150,9 +130,7 @@ function initializeAdminClickhouseClient() { idleSocketTtl: env.CLICKHOUSE_KEEP_ALIVE_IDLE_SOCKET_TTL_MS, }, logLevel: env.CLICKHOUSE_LOG_LEVEL, - compression: { - request: true, - }, + compression: { request: true }, maxOpenConnections: env.CLICKHOUSE_MAX_OPEN_CONNECTIONS, }); } @@ -178,93 +156,108 @@ function initializeQueryClickhouseClient() { idleSocketTtl: env.CLICKHOUSE_KEEP_ALIVE_IDLE_SOCKET_TTL_MS, }, logLevel: env.CLICKHOUSE_LOG_LEVEL, - compression: { - request: true, - }, + compression: { request: true }, maxOpenConnections: env.CLICKHOUSE_MAX_OPEN_CONNECTIONS, }); } -// Org config cache with Unkey (memory-only, no Redis for security) -type OrgClickhouseConfig = { - organizationId: string; - hostnameHash: string; - url: string; - clientType: string; -}; - -const ctx = new DefaultStatefulContext(); -const memory = createLRUMemoryStore(1000); - -const orgConfigCache = createCache({ - orgClickhouse: new Namespace(ctx, { - stores: [memory], // Memory-only, no Redis store for security - fresh: 5 * 60 * 1000, // 5 minutes - stale: 10 * 60 * 1000, // 10 minutes (SWR pattern) - }), -}); +// --------------------------------------------------------------------------- +// Org-scoped client caches +// --------------------------------------------------------------------------- + +/** ClickHouse clients keyed by hostname hash (shared across orgs pointing at the same host). */ +const clickhouseClientCache = new Map(); + +/** Event repositories keyed by hostname hash (stateful, must be reused). */ +const eventRepositoryCache = new Map(); + +/** Resolved connection URLs keyed by secret-store key (avoids repeated secret fetches). */ +const resolvedConnectionCache = new Map(); + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- function hashHostname(url: string): string { const parsed = new URL(url); return createHash("sha256").update(parsed.hostname).digest("hex"); } -async function getOrgClickhouseConfig( - ctx: DefaultStatefulContext, - orgId: string, - clientType: string -): Promise { - const org = await prisma.organization.findUnique({ - where: { id: orgId }, - select: { featureFlags: true }, - }); +type ClientType = "standard" | "events" | "replication" | "logs" | "query" | "admin"; - if (!org?.featureFlags) { - return null; - } - - const clickhouseConfig = (org.featureFlags as any).clickhouse; - if (!clickhouseConfig || typeof clickhouseConfig !== "object") { - return null; - } - - const secretKey = clickhouseConfig[clientType]; - if (!secretKey || typeof secretKey !== "string") { - return null; - } +/** + * Resolve a secret-store key to a connection URL + hostname hash. + * Results are cached for the process lifetime (the registry reloads keep orgβ†’key mapping fresh). + */ +async function resolveSecretKey( + secretKey: string +): Promise<{ url: string; hostnameHash: string } | null> { + const cached = resolvedConnectionCache.get(secretKey); + if (cached) return cached; const secretStore = getSecretStore("DATABASE"); const connection = await secretStore.getSecret(ClickhouseConnectionSchema, secretKey); + if (!connection) return null; - if (!connection) { - return null; - } + const resolved = { url: connection.url, hostnameHash: hashHostname(connection.url) }; + resolvedConnectionCache.set(secretKey, resolved); + return resolved; +} - const hostnameHash = hashHostname(connection.url); +function buildOrgClickhouseClient(url: string, clientType: ClientType): ClickHouse { + const parsed = new URL(url); + parsed.searchParams.delete("secure"); - return { - organizationId: orgId, - hostnameHash, - url: connection.url, - clientType, - }; + return new ClickHouse({ + url: parsed.toString(), + name: `org-clickhouse-${clientType}`, + keepAlive: { + enabled: env.CLICKHOUSE_KEEP_ALIVE_ENABLED === "1", + idleSocketTtl: env.CLICKHOUSE_KEEP_ALIVE_IDLE_SOCKET_TTL_MS, + }, + logLevel: env.CLICKHOUSE_LOG_LEVEL, + compression: { request: true }, + maxOpenConnections: env.CLICKHOUSE_MAX_OPEN_CONNECTIONS, + }); } +// --------------------------------------------------------------------------- +// Public API +// --------------------------------------------------------------------------- + export async function getClickhouseForOrganization( organizationId: string, - clientType: "standard" | "events" | "replication" | "logs" | "query" | "admin" + clientType: ClientType ): Promise { - // Try to get org-specific config - const configResult = await orgConfigCache.orgClickhouse.swr( - `org:${organizationId}:ch:${clientType}`, - async () => getOrgClickhouseConfig(ctx, organizationId, clientType) - ); + if (!organizationDataStoresRegistry.isLoaded) { + await organizationDataStoresRegistry.isReady; + } + + const dataStore = organizationDataStoresRegistry.get(organizationId, "CLICKHOUSE"); + + if (!dataStore) { + // No override β€” use the appropriate default client. + switch (clientType) { + case "standard": + case "events": + case "replication": + return defaultClickhouseClient; + case "logs": + return defaultLogsClickhouseClient; + case "query": + return defaultQueryClickhouseClient; + case "admin": + return defaultAdminClickhouseClient; + } + } - // Handle Result type - check for error or null value - const config = configResult.err ? null : configResult.val; + const { secretKey } = dataStore.config.data; + const connection = await resolveSecretKey(secretKey); - // If no custom config, return appropriate default client - if (!config) { + if (!connection) { + console.warn( + `[clickhouseFactory] Secret key "${secretKey}" not found for org ${organizationId}; falling back to default` + ); switch (clientType) { case "standard": case "events": @@ -279,27 +272,11 @@ export async function getClickhouseForOrganization( } } - // Check if client already exists for this hostname - const cacheKey = `${config.hostnameHash}:${clientType}`; + const cacheKey = `${connection.hostnameHash}:${clientType}`; let client = clickhouseClientCache.get(cacheKey); if (!client) { - const url = new URL(config.url); - url.searchParams.delete("secure"); - - client = new ClickHouse({ - url: url.toString(), - name: `org-clickhouse-${clientType}`, - keepAlive: { - enabled: env.CLICKHOUSE_KEEP_ALIVE_ENABLED === "1", - idleSocketTtl: env.CLICKHOUSE_KEEP_ALIVE_IDLE_SOCKET_TTL_MS, - }, - logLevel: env.CLICKHOUSE_LOG_LEVEL, - compression: { - request: true, - }, - maxOpenConnections: env.CLICKHOUSE_MAX_OPEN_CONNECTIONS, - }); + client = buildOrgClickhouseClient(connection.url, clientType); clickhouseClientCache.set(cacheKey, client); } @@ -309,79 +286,64 @@ export async function getClickhouseForOrganization( export async function getEventRepositoryForOrganization( organizationId: string ): Promise { - // Try to get org-specific config - const configResult = await orgConfigCache.orgClickhouse.swr( - `org:${organizationId}:ch:events`, - async () => getOrgClickhouseConfig(ctx, organizationId, "events") - ); + if (!organizationDataStoresRegistry.isLoaded) { + await organizationDataStoresRegistry.isReady; + } - // Handle Result type - check for error or null value - const config = configResult.err ? null : configResult.val; + const dataStore = organizationDataStoresRegistry.get(organizationId, "CLICKHOUSE"); - // If no custom config, return default repository (created on demand) - if (!config) { + if (!dataStore) { const defaultKey = "default:events"; let defaultRepo = eventRepositoryCache.get(defaultKey); if (!defaultRepo) { - // Create default event repository using standard clickhouse client - // This matches the existing pattern in clickhouseEventRepositoryInstance.server.ts const eventsClickhouse = await getEventsClickhouseClient(); - defaultRepo = new ClickhouseEventRepository({ - clickhouse: eventsClickhouse, - batchSize: env.EVENTS_CLICKHOUSE_BATCH_SIZE, - flushInterval: env.EVENTS_CLICKHOUSE_FLUSH_INTERVAL_MS, - maximumTraceSummaryViewCount: env.EVENTS_CLICKHOUSE_MAX_TRACE_SUMMARY_VIEW_COUNT, - maximumTraceDetailedSummaryViewCount: - env.EVENTS_CLICKHOUSE_MAX_TRACE_DETAILED_SUMMARY_VIEW_COUNT, - maximumLiveReloadingSetting: env.EVENTS_CLICKHOUSE_MAX_LIVE_RELOADING_SETTING, - insertStrategy: env.EVENTS_CLICKHOUSE_INSERT_STRATEGY, - waitForAsyncInsert: env.EVENTS_CLICKHOUSE_WAIT_FOR_ASYNC_INSERT === "1", - asyncInsertMaxDataSize: env.EVENTS_CLICKHOUSE_ASYNC_INSERT_MAX_DATA_SIZE, - asyncInsertBusyTimeoutMs: env.EVENTS_CLICKHOUSE_ASYNC_INSERT_BUSY_TIMEOUT_MS, - startTimeMaxAgeMs: env.EVENTS_CLICKHOUSE_START_TIME_MAX_AGE_MS, - llmMetricsBatchSize: env.LLM_METRICS_BATCH_SIZE, - llmMetricsFlushInterval: env.LLM_METRICS_FLUSH_INTERVAL_MS, - llmMetricsMaxBatchSize: env.LLM_METRICS_MAX_BATCH_SIZE, - llmMetricsMaxConcurrency: env.LLM_METRICS_MAX_CONCURRENCY, - version: "v2", - }); + defaultRepo = buildEventRepository(eventsClickhouse); eventRepositoryCache.set(defaultKey, defaultRepo); } return defaultRepo; } - // Check if repository already exists for this hostname - const cacheKey = `${config.hostnameHash}:events`; + const { secretKey } = dataStore.config.data; + const connection = await resolveSecretKey(secretKey); + + if (!connection) { + console.warn( + `[clickhouseFactory] Secret key "${secretKey}" not found for org ${organizationId}; falling back to default event repository` + ); + const defaultKey = "default:events"; + let defaultRepo = eventRepositoryCache.get(defaultKey); + if (!defaultRepo) { + const eventsClickhouse = await getEventsClickhouseClient(); + defaultRepo = buildEventRepository(eventsClickhouse); + eventRepositoryCache.set(defaultKey, defaultRepo); + } + return defaultRepo; + } + + const cacheKey = `${connection.hostnameHash}:events`; let repository = eventRepositoryCache.get(cacheKey); if (!repository) { const client = await getClickhouseForOrganization(organizationId, "events"); - repository = new ClickhouseEventRepository({ - clickhouse: client, - batchSize: env.EVENTS_CLICKHOUSE_BATCH_SIZE, - flushInterval: env.EVENTS_CLICKHOUSE_FLUSH_INTERVAL_MS, - maximumTraceSummaryViewCount: env.EVENTS_CLICKHOUSE_MAX_TRACE_SUMMARY_VIEW_COUNT, - maximumTraceDetailedSummaryViewCount: - env.EVENTS_CLICKHOUSE_MAX_TRACE_DETAILED_SUMMARY_VIEW_COUNT, - maximumLiveReloadingSetting: env.EVENTS_CLICKHOUSE_MAX_LIVE_RELOADING_SETTING, - insertStrategy: env.EVENTS_CLICKHOUSE_INSERT_STRATEGY, - waitForAsyncInsert: env.EVENTS_CLICKHOUSE_WAIT_FOR_ASYNC_INSERT === "1", - asyncInsertMaxDataSize: env.EVENTS_CLICKHOUSE_ASYNC_INSERT_MAX_DATA_SIZE, - asyncInsertBusyTimeoutMs: env.EVENTS_CLICKHOUSE_ASYNC_INSERT_BUSY_TIMEOUT_MS, - startTimeMaxAgeMs: env.EVENTS_CLICKHOUSE_START_TIME_MAX_AGE_MS, - llmMetricsBatchSize: env.LLM_METRICS_BATCH_SIZE, - llmMetricsFlushInterval: env.LLM_METRICS_FLUSH_INTERVAL_MS, - llmMetricsMaxBatchSize: env.LLM_METRICS_MAX_BATCH_SIZE, - llmMetricsMaxConcurrency: env.LLM_METRICS_MAX_CONCURRENCY, - version: "v2", - }); + repository = buildEventRepository(client); eventRepositoryCache.set(cacheKey, repository); } return repository; } -// Helper to create the default events ClickHouse client +/** + * Get admin ClickHouse client for cross-organization queries. + * Only use for admin tools and analytics that need to query across all orgs. + */ +export function getAdminClickhouse(): ClickHouse { + return defaultAdminClickhouseClient; +} + +// --------------------------------------------------------------------------- +// Private helpers +// --------------------------------------------------------------------------- + async function getEventsClickhouseClient(): Promise { if (!env.EVENTS_CLICKHOUSE_URL) { throw new Error("EVENTS_CLICKHOUSE_URL is not set"); @@ -398,25 +360,29 @@ async function getEventsClickhouseClient(): Promise { idleSocketTtl: env.EVENTS_CLICKHOUSE_KEEP_ALIVE_IDLE_SOCKET_TTL_MS, }, logLevel: env.EVENTS_CLICKHOUSE_LOG_LEVEL, - compression: { - request: env.EVENTS_CLICKHOUSE_COMPRESSION_REQUEST === "1", - }, + compression: { request: env.EVENTS_CLICKHOUSE_COMPRESSION_REQUEST === "1" }, maxOpenConnections: env.EVENTS_CLICKHOUSE_MAX_OPEN_CONNECTIONS, }); } -/** - * Get admin ClickHouse client for cross-organization queries - * This should only be used for admin tools and analytics that need to query across all orgs - */ -export function getAdminClickhouse(): ClickHouse { - return defaultAdminClickhouseClient; -} - -// Clear caches when needed (e.g., when org config changes) -export function clearClickhouseCacheForOrganization(organizationId: string): void { - // The Unkey cache will naturally expire based on TTL (5min fresh, 10min stale) - // No explicit removal needed - cache entries will be refreshed on next access - // Note: We don't clear client/repository caches as they're keyed by hostname - // and may be shared by other orgs +function buildEventRepository(clickhouse: ClickHouse): ClickhouseEventRepository { + return new ClickhouseEventRepository({ + clickhouse, + batchSize: env.EVENTS_CLICKHOUSE_BATCH_SIZE, + flushInterval: env.EVENTS_CLICKHOUSE_FLUSH_INTERVAL_MS, + maximumTraceSummaryViewCount: env.EVENTS_CLICKHOUSE_MAX_TRACE_SUMMARY_VIEW_COUNT, + maximumTraceDetailedSummaryViewCount: + env.EVENTS_CLICKHOUSE_MAX_TRACE_DETAILED_SUMMARY_VIEW_COUNT, + maximumLiveReloadingSetting: env.EVENTS_CLICKHOUSE_MAX_LIVE_RELOADING_SETTING, + insertStrategy: env.EVENTS_CLICKHOUSE_INSERT_STRATEGY, + waitForAsyncInsert: env.EVENTS_CLICKHOUSE_WAIT_FOR_ASYNC_INSERT === "1", + asyncInsertMaxDataSize: env.EVENTS_CLICKHOUSE_ASYNC_INSERT_MAX_DATA_SIZE, + asyncInsertBusyTimeoutMs: env.EVENTS_CLICKHOUSE_ASYNC_INSERT_BUSY_TIMEOUT_MS, + startTimeMaxAgeMs: env.EVENTS_CLICKHOUSE_START_TIME_MAX_AGE_MS, + llmMetricsBatchSize: env.LLM_METRICS_BATCH_SIZE, + llmMetricsFlushInterval: env.LLM_METRICS_FLUSH_INTERVAL_MS, + llmMetricsMaxBatchSize: env.LLM_METRICS_MAX_BATCH_SIZE, + llmMetricsMaxConcurrency: env.LLM_METRICS_MAX_CONCURRENCY, + version: "v2", + }); } diff --git a/apps/webapp/app/services/dataStores/organizationDataStoreConfigSchemas.server.ts b/apps/webapp/app/services/dataStores/organizationDataStoreConfigSchemas.server.ts new file mode 100644 index 00000000000..89465ff5b4a --- /dev/null +++ b/apps/webapp/app/services/dataStores/organizationDataStoreConfigSchemas.server.ts @@ -0,0 +1,35 @@ +import { z } from "zod"; + +// --------------------------------------------------------------------------- +// ClickHouse config (kind = CLICKHOUSE) +// --------------------------------------------------------------------------- + +/** V1: single secret-store key that supplies the ClickHouse connection URL. */ +export const ClickhouseDataStoreConfigV1 = z.object({ + version: z.literal(1), + data: z.object({ + /** Key into the SecretStore that resolves to a ClickhouseConnection ({url}). */ + secretKey: z.string(), + }), +}); + +export type ClickhouseDataStoreConfigV1 = z.infer; + +/** Discriminated union over version β€” extend by adding new literals here. */ +export const ClickhouseDataStoreConfig = z.discriminatedUnion("version", [ + ClickhouseDataStoreConfigV1, +]); + +export type ClickhouseDataStoreConfig = z.infer; + +// --------------------------------------------------------------------------- +// Top-level per-kind union +// --------------------------------------------------------------------------- + +export type ParsedClickhouseDataStore = { + kind: "CLICKHOUSE"; + config: ClickhouseDataStoreConfig; +}; + +/** Union of all parsed data store types. Extend as new DataStoreKind values are added. */ +export type ParsedDataStore = ParsedClickhouseDataStore; diff --git a/apps/webapp/app/services/dataStores/organizationDataStoresRegistry.server.ts b/apps/webapp/app/services/dataStores/organizationDataStoresRegistry.server.ts new file mode 100644 index 00000000000..8f398f680ea --- /dev/null +++ b/apps/webapp/app/services/dataStores/organizationDataStoresRegistry.server.ts @@ -0,0 +1,82 @@ +import type { PrismaClient, PrismaReplicaClient } from "@trigger.dev/database"; +import { + ClickhouseDataStoreConfig, + type ParsedDataStore, +} from "./organizationDataStoreConfigSchemas.server"; + +export class OrganizationDataStoresRegistry { + private _prisma: PrismaClient | PrismaReplicaClient; + /** Keyed by `${organizationId}:${kind}` */ + private _lookup: Map = new Map(); + private _loaded = false; + private _readyResolve!: () => void; + + /** Resolves once the initial `loadFromDatabase()` completes successfully. */ + readonly isReady: Promise; + + constructor(prisma: PrismaClient | PrismaReplicaClient) { + this._prisma = prisma; + this.isReady = new Promise((resolve) => { + this._readyResolve = resolve; + }); + } + + get isLoaded(): boolean { + return this._loaded; + } + + async loadFromDatabase(): Promise { + const rows = await this._prisma.organizationDataStore.findMany(); + + const lookup = new Map(); + + for (const row of rows) { + let parsed: ParsedDataStore | null = null; + + switch (row.kind) { + case "CLICKHOUSE": { + const result = ClickhouseDataStoreConfig.safeParse(row.config); + if (!result.success) { + console.warn( + `[OrganizationDataStoresRegistry] Invalid config for OrganizationDataStore "${row.key}" (kind=CLICKHOUSE): ${result.error.message}` + ); + continue; + } + parsed = { kind: "CLICKHOUSE", config: result.data }; + break; + } + default: { + console.warn( + `[OrganizationDataStoresRegistry] Unknown kind "${row.kind}" for OrganizationDataStore "${row.key}" β€” skipping` + ); + continue; + } + } + + for (const orgId of row.organizationIds) { + const key = `${orgId}:${row.kind}`; + lookup.set(key, parsed); + } + } + + this._lookup = lookup; + + if (!this._loaded) { + this._loaded = true; + this._readyResolve(); + } + } + + async reload(): Promise { + await this.loadFromDatabase(); + } + + /** + * Returns the parsed data store config for the given organization and kind, + * or `null` if no override is configured (caller should use the default). + */ + get(organizationId: string, kind: "CLICKHOUSE"): ParsedDataStore | null { + if (!this._loaded) return null; + return this._lookup.get(`${organizationId}:${kind}`) ?? null; + } +} diff --git a/apps/webapp/app/services/dataStores/organizationDataStoresRegistryInstance.server.ts b/apps/webapp/app/services/dataStores/organizationDataStoresRegistryInstance.server.ts new file mode 100644 index 00000000000..ac5c25d9aaa --- /dev/null +++ b/apps/webapp/app/services/dataStores/organizationDataStoresRegistryInstance.server.ts @@ -0,0 +1,46 @@ +import { $replica } from "~/db.server"; +import { env } from "~/env.server"; +import { signalsEmitter } from "~/services/signals.server"; +import { singleton } from "~/utils/singleton"; +import { OrganizationDataStoresRegistry } from "./organizationDataStoresRegistry.server"; + +export const organizationDataStoresRegistry = singleton( + "organizationDataStoresRegistry", + () => { + const registry = new OrganizationDataStoresRegistry($replica); + + registry.loadFromDatabase().catch((err) => { + console.error("[OrganizationDataStoresRegistry] Failed to initialize", err); + }); + + const interval = setInterval( + () => { + registry.reload().catch((err) => { + console.error("[OrganizationDataStoresRegistry] Failed to reload", err); + }); + }, + env.ORGANIZATION_DATA_STORES_RELOAD_INTERVAL_MS + ); + + signalsEmitter.on("SIGTERM", () => clearInterval(interval)); + signalsEmitter.on("SIGINT", () => clearInterval(interval)); + + return registry; + } +); + +/** + * Wait for the registry to finish its initial load, with a timeout. + * After the first call resolves (or times out), subsequent calls are no-ops. + */ +export async function waitForOrganizationDataStoresReady( + timeoutMs = 5000 +): Promise { + if (organizationDataStoresRegistry.isLoaded) return; + if (timeoutMs <= 0) return; + + await Promise.race([ + organizationDataStoresRegistry.isReady, + new Promise((resolve) => setTimeout(resolve, timeoutMs)), + ]); +} diff --git a/internal-packages/database/prisma/migrations/20260331212308_add_organization_data_stores/migration.sql b/internal-packages/database/prisma/migrations/20260331212308_add_organization_data_stores/migration.sql new file mode 100644 index 00000000000..52b8385539a --- /dev/null +++ b/internal-packages/database/prisma/migrations/20260331212308_add_organization_data_stores/migration.sql @@ -0,0 +1,21 @@ +-- CreateEnum +CREATE TYPE "public"."DataStoreKind" AS ENUM ('CLICKHOUSE'); + +-- CreateTable +CREATE TABLE "public"."OrganizationDataStore" ( + "id" TEXT NOT NULL, + "key" TEXT NOT NULL, + "organizationIds" TEXT[], + "kind" "public"."DataStoreKind" NOT NULL, + "config" JSONB NOT NULL, + "createdAt" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP, + "updatedAt" TIMESTAMP(3) NOT NULL, + + CONSTRAINT "OrganizationDataStore_pkey" PRIMARY KEY ("id") +); + +-- CreateIndex +CREATE UNIQUE INDEX "OrganizationDataStore_key_key" ON "public"."OrganizationDataStore"("key"); + +-- CreateIndex +CREATE INDEX "OrganizationDataStore_kind_idx" ON "public"."OrganizationDataStore"("kind"); diff --git a/internal-packages/database/prisma/schema.prisma b/internal-packages/database/prisma/schema.prisma index 2dc3e9db56e..2167387c32e 100644 --- a/internal-packages/database/prisma/schema.prisma +++ b/internal-packages/database/prisma/schema.prisma @@ -3153,4 +3153,25 @@ model TaskIdentifier { @@unique([runtimeEnvironmentId, slug]) @@index([runtimeEnvironmentId, isInLatestDeployment]) -} \ No newline at end of file +} + +enum DataStoreKind { + CLICKHOUSE +} + +/// Defines org-scoped data store overrides (e.g. dedicated ClickHouse for HIPAA orgs). +/// Multiple organizations can share a single data store row via organizationIds. +model OrganizationDataStore { + id String @id @default(cuid()) + /// Human-readable unique key (e.g. "hipaa-clickhouse-us-east") + key String @unique + /// Organization IDs that use this data store + organizationIds String[] + kind DataStoreKind + /// Versioned config JSON. Structure is discriminated by the top-level `version` field. + config Json + createdAt DateTime @default(now()) + updatedAt DateTime @updatedAt + + @@index([kind]) +} From adc95eaac8df64c1ab717f77a91d2c392ab1feac Mon Sep 17 00:00:00 2001 From: Matt Aitken Date: Wed, 1 Apr 2026 12:10:15 +0100 Subject: [PATCH 06/34] Admin page for adding data stores --- apps/webapp/app/routes/admin.data-stores.tsx | 368 +++++++++++++++++++ apps/webapp/app/routes/admin.tsx | 4 + 2 files changed, 372 insertions(+) create mode 100644 apps/webapp/app/routes/admin.data-stores.tsx diff --git a/apps/webapp/app/routes/admin.data-stores.tsx b/apps/webapp/app/routes/admin.data-stores.tsx new file mode 100644 index 00000000000..4397c6d33d1 --- /dev/null +++ b/apps/webapp/app/routes/admin.data-stores.tsx @@ -0,0 +1,368 @@ +import { useState } from "react"; +import { useFetcher } from "@remix-run/react"; +import type { ActionFunctionArgs, LoaderFunctionArgs } from "@remix-run/server-runtime"; +import { redirect } from "@remix-run/server-runtime"; +import { typedjson, useTypedLoaderData } from "remix-typedjson"; +import { z } from "zod"; +import { Button } from "~/components/primitives/Buttons"; +import { + Dialog, + DialogContent, + DialogFooter, + DialogHeader, + DialogTitle, +} from "~/components/primitives/Dialog"; +import { Input } from "~/components/primitives/Input"; +import { Paragraph } from "~/components/primitives/Paragraph"; +import { Popover, PopoverContent, PopoverTrigger } from "~/components/primitives/Popover"; +import { + Table, + TableBlankRow, + TableBody, + TableCell, + TableHeader, + TableHeaderCell, + TableRow, +} from "~/components/primitives/Table"; +import { prisma } from "~/db.server"; +import { requireUser } from "~/services/session.server"; +import { getSecretStore } from "~/services/secrets/secretStore.server"; +import { ClickhouseConnectionSchema } from "~/services/clickhouse/clickhouseSecretSchemas.server"; + +// --------------------------------------------------------------------------- +// Loader +// --------------------------------------------------------------------------- + +export const loader = async ({ request }: LoaderFunctionArgs) => { + const user = await requireUser(request); + if (!user.admin) throw redirect("/"); + + const dataStores = await prisma.organizationDataStore.findMany({ + orderBy: { createdAt: "desc" }, + }); + + return typedjson({ dataStores }); +}; + +// --------------------------------------------------------------------------- +// Action +// --------------------------------------------------------------------------- + +const AddSchema = z.object({ + _action: z.literal("add"), + key: z.string().min(1), + organizationIds: z.string().min(1), + connectionUrl: z.string().url(), +}); + +const DeleteSchema = z.object({ + _action: z.literal("delete"), + id: z.string().min(1), +}); + +export async function action({ request }: ActionFunctionArgs) { + const user = await requireUser(request); + if (!user.admin) throw redirect("/"); + + const formData = await request.formData(); + const _action = formData.get("_action"); + + if (_action === "add") { + const result = AddSchema.safeParse(Object.fromEntries(formData)); + if (!result.success) { + return typedjson( + { error: result.error.issues.map((i) => i.message).join(", ") }, + { status: 400 } + ); + } + + const { key, organizationIds: rawOrgIds, connectionUrl } = result.data; + const organizationIds = rawOrgIds + .split(",") + .map((s) => s.trim()) + .filter(Boolean); + + const secretKey = `data-store:${key}:clickhouse`; + + const secretStore = getSecretStore("DATABASE"); + await secretStore.setSecret(secretKey, ClickhouseConnectionSchema.parse({ url: connectionUrl })); + + await prisma.organizationDataStore.create({ + data: { + key, + organizationIds, + kind: "CLICKHOUSE", + config: { version: 1, data: { secretKey } }, + }, + }); + + + return typedjson({ success: true }); + } + + if (_action === "delete") { + const result = DeleteSchema.safeParse(Object.fromEntries(formData)); + if (!result.success) { + return typedjson({ error: "Invalid request" }, { status: 400 }); + } + + const { id } = result.data; + + const dataStore = await prisma.organizationDataStore.findFirst({ where: { id } }); + if (!dataStore) { + return typedjson({ error: "Data store not found" }, { status: 404 }); + } + + // Delete secret if config references one + const config = dataStore.config as any; + if (config?.data?.secretKey) { + const secretStore = getSecretStore("DATABASE"); + await secretStore.deleteSecret(config.data.secretKey).catch(() => { + // Secret may not exist β€” proceed with deletion + }); + } + + await prisma.organizationDataStore.delete({ where: { id } }); + + return typedjson({ success: true }); + } + + return typedjson({ error: "Unknown action" }, { status: 400 }); +} + +// --------------------------------------------------------------------------- +// Component +// --------------------------------------------------------------------------- + +export default function AdminDataStoresRoute() { + const { dataStores } = useTypedLoaderData(); + const [addOpen, setAddOpen] = useState(false); + + return ( +
+
+
+ + {dataStores.length} data store{dataStores.length !== 1 ? "s" : ""} + + +
+ + + + + Key + Kind + Organizations + Created + Updated + + Actions + + + + + {dataStores.length === 0 ? ( + + No data stores configured + + ) : ( + dataStores.map((ds) => ( + + + {ds.key} + + + + {ds.kind} + + + + + {ds.organizationIds.length} org{ds.organizationIds.length !== 1 ? "s" : ""} + + {ds.organizationIds.length > 0 && ( + + ({ds.organizationIds.slice(0, 2).join(", ")} + {ds.organizationIds.length > 2 + ? ` +${ds.organizationIds.length - 2} more` + : ""} + ) + + )} + + + + {new Date(ds.createdAt).toLocaleString()} + + + + + {new Date(ds.updatedAt).toLocaleString()} + + + + + + + )) + )} + +
+
+ + +
+ ); +} + +// --------------------------------------------------------------------------- +// Delete button with popover confirmation +// --------------------------------------------------------------------------- + +function DeleteButton({ id, name }: { id: string; name: string }) { + const [open, setOpen] = useState(false); + const fetcher = useFetcher<{ success?: boolean; error?: string }>(); + const isDeleting = fetcher.state !== "idle"; + + return ( + + + + + + + Delete {name}? + + + This will remove the data store and its secret. Organizations using it will fall back to + the default ClickHouse instance. + +
+ + setOpen(false)}> + + + + +
+
+
+ ); +} + +// --------------------------------------------------------------------------- +// Add data store dialog +// --------------------------------------------------------------------------- + +function AddDataStoreDialog({ + open, + onOpenChange, +}: { + open: boolean; + onOpenChange: (open: boolean) => void; +}) { + const fetcher = useFetcher<{ success?: boolean; error?: string }>(); + const isSubmitting = fetcher.state !== "idle"; + + // Close dialog on success + if (fetcher.data?.success && open) { + onOpenChange(false); + } + + return ( + + + + Add data store + + + + + +
+ + +

+ Unique identifier for this data store. Used as the secret key prefix. +

+
+ +
+ + +
+ +
+ + +

Comma-separated organization IDs.

+
+ +
+ + +

+ Stored encrypted in SecretStore. Never logged or displayed again. +

+
+ + {fetcher.data?.error && ( +

{fetcher.data.error}

+ )} + + + + + +
+
+
+ ); +} diff --git a/apps/webapp/app/routes/admin.tsx b/apps/webapp/app/routes/admin.tsx index 1ac09efc76c..bf01e5f187a 100644 --- a/apps/webapp/app/routes/admin.tsx +++ b/apps/webapp/app/routes/admin.tsx @@ -48,6 +48,10 @@ export default function Page() { to: "/admin/back-office", end: false, }, + { + label: "Data Stores", + to: "/admin/data-stores", + }, ]} layoutId={"admin"} /> From 99cf100a421c5f4dbd02156090c708d9e5156908 Mon Sep 17 00:00:00 2001 From: Matt Aitken Date: Thu, 2 Apr 2026 11:42:48 +0100 Subject: [PATCH 07/34] New admin page, lots of improvements to make it more robust and testable --- apps/webapp/app/routes/admin.data-stores.tsx | 150 +++++++----- .../clickhouse/clickhouseFactory.server.ts | 214 ++++++++---------- .../clickhouse/clickhouseFactory.test.ts | 155 ------------- ...ganizationDataStoreConfigSchemas.server.ts | 6 +- .../organizationDataStoresRegistry.server.ts | 91 +++++++- ...zationDataStoresRegistryInstance.server.ts | 50 ++-- apps/webapp/test/clickhouseFactory.test.ts | 130 +++++++++++ .../organizationDataStoresRegistry.test.ts | 197 ++++++++++++++++ 8 files changed, 614 insertions(+), 379 deletions(-) delete mode 100644 apps/webapp/app/services/clickhouse/clickhouseFactory.test.ts create mode 100644 apps/webapp/test/clickhouseFactory.test.ts create mode 100644 apps/webapp/test/organizationDataStoresRegistry.test.ts diff --git a/apps/webapp/app/routes/admin.data-stores.tsx b/apps/webapp/app/routes/admin.data-stores.tsx index 4397c6d33d1..033b143151f 100644 --- a/apps/webapp/app/routes/admin.data-stores.tsx +++ b/apps/webapp/app/routes/admin.data-stores.tsx @@ -26,8 +26,9 @@ import { } from "~/components/primitives/Table"; import { prisma } from "~/db.server"; import { requireUser } from "~/services/session.server"; -import { getSecretStore } from "~/services/secrets/secretStore.server"; import { ClickhouseConnectionSchema } from "~/services/clickhouse/clickhouseSecretSchemas.server"; +import { organizationDataStoresRegistry } from "~/services/dataStores/organizationDataStoresRegistryInstance.server"; +import { tryCatch } from "@trigger.dev/core"; // --------------------------------------------------------------------------- // Loader @@ -55,79 +56,106 @@ const AddSchema = z.object({ connectionUrl: z.string().url(), }); +const UpdateSchema = z.object({ + _action: z.literal("update"), + key: z.string().min(1), + organizationIds: z.string().min(1), + connectionUrl: z.string().url().optional(), +}); + const DeleteSchema = z.object({ _action: z.literal("delete"), - id: z.string().min(1), + key: z.string().min(1), }); +const FormSchema = z.discriminatedUnion("_action", [AddSchema, UpdateSchema, DeleteSchema]); + export async function action({ request }: ActionFunctionArgs) { const user = await requireUser(request); if (!user.admin) throw redirect("/"); const formData = await request.formData(); - const _action = formData.get("_action"); - - if (_action === "add") { - const result = AddSchema.safeParse(Object.fromEntries(formData)); - if (!result.success) { - return typedjson( - { error: result.error.issues.map((i) => i.message).join(", ") }, - { status: 400 } - ); - } - const { key, organizationIds: rawOrgIds, connectionUrl } = result.data; - const organizationIds = rawOrgIds - .split(",") - .map((s) => s.trim()) - .filter(Boolean); + const result = FormSchema.safeParse(Object.fromEntries(formData)); - const secretKey = `data-store:${key}:clickhouse`; + if (!result.success) { + return typedjson( + { error: result.error.issues.map((i) => i.message).join(", ") }, + { status: 400 } + ); + } - const secretStore = getSecretStore("DATABASE"); - await secretStore.setSecret(secretKey, ClickhouseConnectionSchema.parse({ url: connectionUrl })); + switch (result.data._action) { + case "add": { + const { key, organizationIds: rawOrgIds, connectionUrl } = result.data; + const organizationIds = rawOrgIds + .split(",") + .map((s) => s.trim()) + .filter(Boolean); + + const config = ClickhouseConnectionSchema.parse({ url: connectionUrl }); + + const [error, _] = await tryCatch( + organizationDataStoresRegistry.addDataStore({ + key, + kind: "CLICKHOUSE", + organizationIds, + config, + }) + ); - await prisma.organizationDataStore.create({ - data: { - key, - organizationIds, - kind: "CLICKHOUSE", - config: { version: 1, data: { secretKey } }, - }, - }); + if (error) { + return typedjson({ error: error.message }, { status: 400 }); + } + return typedjson({ success: true }); + } + case "update": { + const { key, organizationIds: rawOrgIds, connectionUrl } = result.data; + const organizationIds = rawOrgIds + .split(",") + .map((s) => s.trim()) + .filter(Boolean); + + const config = connectionUrl + ? ClickhouseConnectionSchema.parse({ url: connectionUrl }) + : undefined; + + const [error, _] = await tryCatch( + organizationDataStoresRegistry.updateDataStore({ + key, + kind: "CLICKHOUSE", + organizationIds, + config, + }) + ); - return typedjson({ success: true }); - } + if (error) { + return typedjson({ error: error.message }, { status: 400 }); + } - if (_action === "delete") { - const result = DeleteSchema.safeParse(Object.fromEntries(formData)); - if (!result.success) { - return typedjson({ error: "Invalid request" }, { status: 400 }); + return typedjson({ success: true }); } + case "delete": { + const { key } = result.data; + + const [error, _] = await tryCatch( + organizationDataStoresRegistry.deleteDataStore({ + key, + kind: "CLICKHOUSE", + }) + ); - const { id } = result.data; + if (error) { + return typedjson({ error: error.message }, { status: 400 }); + } - const dataStore = await prisma.organizationDataStore.findFirst({ where: { id } }); - if (!dataStore) { - return typedjson({ error: "Data store not found" }, { status: 404 }); + return typedjson({ success: true }); } - - // Delete secret if config references one - const config = dataStore.config as any; - if (config?.data?.secretKey) { - const secretStore = getSecretStore("DATABASE"); - await secretStore.deleteSecret(config.data.secretKey).catch(() => { - // Secret may not exist β€” proceed with deletion - }); + default: { + return typedjson({ error: "Unknown action" }, { status: 400 }); } - - await prisma.organizationDataStore.delete({ where: { id } }); - - return typedjson({ success: true }); } - - return typedjson({ error: "Unknown action" }, { status: 400 }); } // --------------------------------------------------------------------------- @@ -207,7 +235,7 @@ export default function AdminDataStoresRoute() { - + )) @@ -225,7 +253,7 @@ export default function AdminDataStoresRoute() { // Delete button with popover confirmation // --------------------------------------------------------------------------- -function DeleteButton({ id, name }: { id: string; name: string }) { +function DeleteButton({ name }: { name: string }) { const [open, setOpen] = useState(false); const fetcher = useFetcher<{ success?: boolean; error?: string }>(); const isDeleting = fetcher.state !== "idle"; @@ -251,7 +279,7 @@ function DeleteButton({ id, name }: { id: string; name: string }) { setOpen(false)}> - + @@ -311,7 +339,13 @@ function AddDataStoreDialog({ - +
@@ -344,9 +378,7 @@ function AddDataStoreDialog({

- {fetcher.data?.error && ( -

{fetcher.data.error}

- )} + {fetcher.data?.error &&

{fetcher.data.error}

} + + + + Edit data store + + + + + + +
+ + +
+ +
+ + +

Comma-separated organization IDs.

+
+ + {fetcher.data?.error &&

{fetcher.data.error}

} + + + + + +
+
+
+ + ); +} + // --------------------------------------------------------------------------- // Add data store dialog // --------------------------------------------------------------------------- From ef2114826b7ec6b724e1291ef15649d3b5a01407 Mon Sep 17 00:00:00 2001 From: Matt Aitken Date: Tue, 14 Apr 2026 13:52:27 +0100 Subject: [PATCH 19/34] Reload the data store every minute --- apps/webapp/app/env.server.ts | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/apps/webapp/app/env.server.ts b/apps/webapp/app/env.server.ts index f57b3105a7f..d2162de025c 100644 --- a/apps/webapp/app/env.server.ts +++ b/apps/webapp/app/env.server.ts @@ -459,7 +459,10 @@ const EnvironmentSchema = z // If specified, you must configure the corresponding provider using OBJECT_STORE_{PROTOCOL}_* env vars. // Example: OBJECT_STORE_DEFAULT_PROTOCOL=s3 requires OBJECT_STORE_S3_BASE_URL, OBJECT_STORE_S3_ACCESS_KEY_ID, etc. // Enables zero-downtime migration between providers (old data keeps working, new data uses new provider). - OBJECT_STORE_DEFAULT_PROTOCOL: z.string().regex(/^[a-z0-9]+$/).optional(), + OBJECT_STORE_DEFAULT_PROTOCOL: z + .string() + .regex(/^[a-z0-9]+$/) + .optional(), ARTIFACTS_OBJECT_STORE_BUCKET: z.string().optional(), ARTIFACTS_OBJECT_STORE_BASE_URL: z.string().optional(), @@ -1490,11 +1493,17 @@ const EnvironmentSchema = z EVENTS_CLICKHOUSE_MAX_LIVE_RELOADING_SETTING: z.coerce.number().int().default(2000), // Organization data stores registry - ORGANIZATION_DATA_STORES_RELOAD_INTERVAL_MS: z.coerce.number().int().default(5 * 60 * 1000), // 5 minutes + ORGANIZATION_DATA_STORES_RELOAD_INTERVAL_MS: z.coerce + .number() + .int() + .default(60 * 1000), // 1 minute // LLM cost tracking LLM_COST_TRACKING_ENABLED: BoolEnv.default(true), - LLM_PRICING_RELOAD_INTERVAL_MS: z.coerce.number().int().default(5 * 60 * 1000), // 5 minutes + LLM_PRICING_RELOAD_INTERVAL_MS: z.coerce + .number() + .int() + .default(5 * 60 * 1000), // 5 minutes LLM_PRICING_RELOAD_CHANNEL: z.string().default("llm-registry:reload"), LLM_PRICING_RELOAD_DEBOUNCE_MS: z.coerce.number().int().default(1000), // Whether to subscribe this process to the LLM_PRICING_RELOAD_CHANNEL. From 3a1d6f14ef9bcd9c2aae7e807df7805a196561ff Mon Sep 17 00:00:00 2001 From: Matt Aitken Date: Thu, 16 Apr 2026 13:29:34 +0100 Subject: [PATCH 20/34] Work with Postgres EventRepository for self-hosters --- .../app/v3/eventRepository/index.server.ts | 62 +++++++++---------- 1 file changed, 31 insertions(+), 31 deletions(-) diff --git a/apps/webapp/app/v3/eventRepository/index.server.ts b/apps/webapp/app/v3/eventRepository/index.server.ts index 98d6858ed9f..04d6678f92b 100644 --- a/apps/webapp/app/v3/eventRepository/index.server.ts +++ b/apps/webapp/app/v3/eventRepository/index.server.ts @@ -26,7 +26,8 @@ export function resolveEventRepositoryForStore( organizationId: string ): IEventRepository { if (store === EVENT_STORE_TYPES.CLICKHOUSE || store === EVENT_STORE_TYPES.CLICKHOUSE_V2) { - return clickhouseFactory.getEventRepositoryForOrganizationSync(store, organizationId).repository; + return clickhouseFactory.getEventRepositoryForOrganizationSync(store, organizationId) + .repository; } return eventRepository; } @@ -56,10 +57,8 @@ export async function getConfiguredEventRepository( (organization.featureFlags as Record | null) ?? undefined ); - const { repository: resolvedRepository } = await clickhouseFactory.getEventRepositoryForOrganization( - taskEventStore, - organizationId - ); + const { repository: resolvedRepository } = + await clickhouseFactory.getEventRepositoryForOrganization(taskEventStore, organizationId); if (taskEventStore === EVENT_STORE_TYPES.CLICKHOUSE_V2) { return { repository: resolvedRepository, store: EVENT_STORE_TYPES.CLICKHOUSE_V2 }; @@ -78,20 +77,26 @@ export async function getEventRepository( parentStore: string | undefined ): Promise<{ repository: IEventRepository; store: string }> { const taskEventStore = parentStore ?? (await resolveTaskEventRepositoryFlag(featureFlags)); - const { repository: resolvedRepository } = await clickhouseFactory.getEventRepositoryForOrganization( - taskEventStore, - organizationId - ); - if (taskEventStore === "clickhouse_v2") { - return { repository: resolvedRepository, store: "clickhouse_v2" }; + // Support legacy Postgres store for self-hosters + if (taskEventStore === EVENT_STORE_TYPES.POSTGRES) { + return { repository: eventRepository, store: getTaskEventStore() }; } - if (taskEventStore === "clickhouse") { - return { repository: resolvedRepository, store: "clickhouse" }; - } + const { repository: resolvedRepository } = + await clickhouseFactory.getEventRepositoryForOrganization(taskEventStore, organizationId); - return { repository: eventRepository, store: getTaskEventStore() }; + switch (taskEventStore) { + case EVENT_STORE_TYPES.CLICKHOUSE_V2: { + return { repository: resolvedRepository, store: EVENT_STORE_TYPES.CLICKHOUSE_V2 }; + } + case EVENT_STORE_TYPES.CLICKHOUSE: { + return { repository: resolvedRepository, store: EVENT_STORE_TYPES.CLICKHOUSE }; + } + default: { + return { repository: eventRepository, store: getTaskEventStore() }; + } + } } export async function getV3EventRepository( @@ -99,24 +104,18 @@ export async function getV3EventRepository( parentStore: string | undefined ): Promise<{ repository: IEventRepository; store: string }> { if (typeof parentStore === "string") { - const { repository: resolvedRepository } = await clickhouseFactory.getEventRepositoryForOrganization( - parentStore, - organizationId - ); + const { repository: resolvedRepository } = + await clickhouseFactory.getEventRepositoryForOrganization(parentStore, organizationId); return { repository: resolvedRepository, store: parentStore }; } if (env.EVENT_REPOSITORY_DEFAULT_STORE === "clickhouse_v2") { - const { repository: resolvedRepository } = await clickhouseFactory.getEventRepositoryForOrganization( - "clickhouse_v2", - organizationId - ); + const { repository: resolvedRepository } = + await clickhouseFactory.getEventRepositoryForOrganization("clickhouse_v2", organizationId); return { repository: resolvedRepository, store: "clickhouse_v2" }; } else if (env.EVENT_REPOSITORY_DEFAULT_STORE === "clickhouse") { - const { repository: resolvedRepository } = await clickhouseFactory.getEventRepositoryForOrganization( - "clickhouse", - organizationId - ); + const { repository: resolvedRepository } = + await clickhouseFactory.getEventRepositoryForOrganization("clickhouse", organizationId); return { repository: resolvedRepository, store: "clickhouse" }; } else { return { repository: eventRepository, store: getTaskEventStore() }; @@ -206,10 +205,11 @@ async function recordRunEvent( }; } - const { repository: $eventRepository } = await clickhouseFactory.getEventRepositoryForOrganization( - foundRun.taskEventStore, - foundRun.runtimeEnvironment.organizationId - ); + const { repository: $eventRepository } = + await clickhouseFactory.getEventRepositoryForOrganization( + foundRun.taskEventStore, + foundRun.runtimeEnvironment.organizationId + ); const { attributes, startTime, ...optionsRest } = options; From 8b2c5db0faa72047de9122654e77ea8d34980ef4 Mon Sep 17 00:00:00 2001 From: Matt Aitken Date: Thu, 16 Apr 2026 13:31:29 +0100 Subject: [PATCH 21/34] Error fingerprint should use the logs client --- .../route.tsx | 127 +++++++++--------- 1 file changed, 60 insertions(+), 67 deletions(-) diff --git a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.errors.$fingerprint/route.tsx b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.errors.$fingerprint/route.tsx index 7908efe07da..ba8dbefe592 100644 --- a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.errors.$fingerprint/route.tsx +++ b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.errors.$fingerprint/route.tsx @@ -1,60 +1,16 @@ -import { type LoaderFunctionArgs, type ActionFunctionArgs, json } from "@remix-run/server-runtime"; -import { type MetaFunction, useFetcher, useRevalidator } from "@remix-run/react"; +import { parse } from "@conform-to/zod"; import { BellAlertIcon } from "@heroicons/react/20/solid"; +import { type MetaFunction, useFetcher, useRevalidator } from "@remix-run/react"; +import { type ActionFunctionArgs, json, type LoaderFunctionArgs } from "@remix-run/server-runtime"; import { IconAlarmSnooze as IconAlarmSnoozeBase, IconBugFilled, IconCircleDotted, } from "@tabler/icons-react"; -import { parse } from "@conform-to/zod"; -import { z } from "zod"; -import { ErrorStatusBadge } from "~/components/errors/ErrorStatusBadge"; -import { ServiceValidationError } from "~/v3/services/baseService.server"; -import { TypedAwait, typeddefer, useTypedLoaderData } from "remix-typedjson"; -import { requireUser, requireUserId } from "~/services/session.server"; -import { - EnvironmentParamSchema, - v3CreateBulkActionPath, - v3ErrorsPath, - v3RunsPath, -} from "~/utils/pathBuilder"; -import { findProjectBySlug } from "~/models/project.server"; -import { findEnvironmentBySlug } from "~/models/runtimeEnvironment.server"; -import { - ErrorGroupPresenter, - type ErrorGroupActivity, - type ErrorGroupActivityVersions, - type ErrorGroupOccurrences, - type ErrorGroupSummary, - type ErrorGroupState, -} from "~/presenters/v3/ErrorGroupPresenter.server"; -import { type NextRunList } from "~/presenters/v3/NextRunListPresenter.server"; -import { $replica } from "~/db.server"; -import { - clickhouseFactory, - getDefaultClickhouseClient, - getDefaultLogsClickhouseClient, -} from "~/services/clickhouse/clickhouseFactory.server"; -import { NavBar, PageAccessories, PageTitle } from "~/components/primitives/PageHeader"; -import { PageBody } from "~/components/layout/AppLayout"; -import { - ResizableHandle, - ResizablePanel, - ResizablePanelGroup, -} from "~/components/primitives/Resizable"; +import { ErrorId } from "@trigger.dev/core/v3/isomorphic"; +import { isPast } from "date-fns"; import { AnimatePresence, motion } from "framer-motion"; import { Suspense, useEffect, useMemo, useRef, useState } from "react"; -import { Spinner } from "~/components/primitives/Spinner"; -import { Paragraph } from "~/components/primitives/Paragraph"; -import { Callout } from "~/components/primitives/Callout"; -import { Header2, Header3 } from "~/components/primitives/Headers"; - -import { formatDistanceToNow, isPast } from "date-fns"; - -import * as Property from "~/components/primitives/PropertyTable"; -import { TaskRunsTable } from "~/components/runs/v3/TaskRunsTable"; -import { DateTime, RelativeDateTime } from "~/components/primitives/DateTime"; -import { ErrorId } from "@trigger.dev/core/v3/isomorphic"; import { Bar, BarChart, @@ -65,31 +21,68 @@ import { XAxis, YAxis, } from "recharts"; -import TooltipPortal from "~/components/primitives/TooltipPortal"; -import { TimeFilter, timeFilterFromTo } from "~/components/runs/v3/SharedFilters"; -import { useOptimisticLocation } from "~/hooks/useOptimisticLocation"; -import { DirectionSchema, ListPagination } from "~/components/ListPagination"; -import { Button, LinkButton } from "~/components/primitives/Buttons"; +import { TypedAwait, typeddefer, useTypedLoaderData } from "remix-typedjson"; +import { z } from "zod"; import { ListCheckedIcon } from "~/assets/icons/ListCheckedIcon"; -import { useOrganization } from "~/hooks/useOrganizations"; -import { useProject } from "~/hooks/useProject"; -import { useEnvironment } from "~/hooks/useEnvironment"; import { RunsIcon } from "~/assets/icons/RunsIcon"; -import type { TaskRunListSearchFilters } from "~/components/runs/v3/RunFilters"; -import { useSearchParams } from "~/hooks/useSearchParam"; -import { CopyableText } from "~/components/primitives/CopyableText"; -import { cn } from "~/utils/cn"; -import { LogsVersionFilter } from "~/components/logs/LogsVersionFilter"; import { CodeBlock } from "~/components/code/CodeBlock"; - -import { Popover, PopoverArrowTrigger, PopoverContent } from "~/components/primitives/Popover"; -import { ErrorGroupActions } from "~/v3/services/errorGroupActions.server"; +import { ErrorStatusBadge } from "~/components/errors/ErrorStatusBadge"; import { - ErrorStatusMenuItems, CustomIgnoreDialog, + ErrorStatusMenuItems, statusActionToastMessage, } from "~/components/errors/ErrorStatusMenu"; +import { PageBody } from "~/components/layout/AppLayout"; +import { DirectionSchema, ListPagination } from "~/components/ListPagination"; +import { LogsVersionFilter } from "~/components/logs/LogsVersionFilter"; +import { LinkButton } from "~/components/primitives/Buttons"; +import { Callout } from "~/components/primitives/Callout"; +import { CopyableText } from "~/components/primitives/CopyableText"; +import { DateTime, RelativeDateTime } from "~/components/primitives/DateTime"; +import { Header2, Header3 } from "~/components/primitives/Headers"; +import { NavBar, PageAccessories, PageTitle } from "~/components/primitives/PageHeader"; +import { Paragraph } from "~/components/primitives/Paragraph"; +import { Popover, PopoverArrowTrigger, PopoverContent } from "~/components/primitives/Popover"; +import * as Property from "~/components/primitives/PropertyTable"; +import { + ResizableHandle, + ResizablePanel, + ResizablePanelGroup, +} from "~/components/primitives/Resizable"; +import { Spinner } from "~/components/primitives/Spinner"; import { useToast } from "~/components/primitives/Toast"; +import TooltipPortal from "~/components/primitives/TooltipPortal"; +import type { TaskRunListSearchFilters } from "~/components/runs/v3/RunFilters"; +import { TimeFilter, timeFilterFromTo } from "~/components/runs/v3/SharedFilters"; +import { TaskRunsTable } from "~/components/runs/v3/TaskRunsTable"; +import { $replica } from "~/db.server"; +import { useEnvironment } from "~/hooks/useEnvironment"; +import { useOptimisticLocation } from "~/hooks/useOptimisticLocation"; +import { useOrganization } from "~/hooks/useOrganizations"; +import { useProject } from "~/hooks/useProject"; +import { useSearchParams } from "~/hooks/useSearchParam"; +import { findProjectBySlug } from "~/models/project.server"; +import { findEnvironmentBySlug } from "~/models/runtimeEnvironment.server"; +import { + type ErrorGroupActivity, + type ErrorGroupActivityVersions, + type ErrorGroupOccurrences, + ErrorGroupPresenter, + type ErrorGroupState, + type ErrorGroupSummary, +} from "~/presenters/v3/ErrorGroupPresenter.server"; +import { type NextRunList } from "~/presenters/v3/NextRunListPresenter.server"; +import { clickhouseFactory } from "~/services/clickhouse/clickhouseFactory.server"; +import { requireUser, requireUserId } from "~/services/session.server"; +import { cn } from "~/utils/cn"; +import { + EnvironmentParamSchema, + v3CreateBulkActionPath, + v3ErrorsPath, + v3RunsPath, +} from "~/utils/pathBuilder"; +import { ServiceValidationError } from "~/v3/services/baseService.server"; +import { ErrorGroupActions } from "~/v3/services/errorGroupActions.server"; export const meta: MetaFunction = ({ data }) => { return [ @@ -251,7 +244,7 @@ export const loader = async ({ request, params }: LoaderFunctionArgs) => { const clickhouseClient = await clickhouseFactory.getClickhouseForOrganization( environment.organizationId, - "query" + "logs" ); const presenter = new ErrorGroupPresenter($replica, clickhouseClient, clickhouseClient); From 8fa3153fb3ebc52e4eaa5f2701ed6f4d2092d880 Mon Sep 17 00:00:00 2001 From: Matt Aitken Date: Thu, 16 Apr 2026 14:02:43 +0100 Subject: [PATCH 22/34] Retry the initial boot using p-retry --- .../organizationDataStoresRegistry.server.ts | 7 +++++- ...zationDataStoresRegistryInstance.server.ts | 22 +++++++++++++++++-- 2 files changed, 26 insertions(+), 3 deletions(-) diff --git a/apps/webapp/app/services/dataStores/organizationDataStoresRegistry.server.ts b/apps/webapp/app/services/dataStores/organizationDataStoresRegistry.server.ts index 379084a0d74..f2ef02cc3bd 100644 --- a/apps/webapp/app/services/dataStores/organizationDataStoresRegistry.server.ts +++ b/apps/webapp/app/services/dataStores/organizationDataStoresRegistry.server.ts @@ -13,7 +13,12 @@ export class OrganizationDataStoresRegistry { private _loaded = false; private _readyResolve!: () => void; - /** Resolves once the initial `loadFromDatabase()` completes successfully. */ + /** + * Resolves once the initial `loadFromDatabase()` completes successfully. + * At process startup the singleton loads the registry with unbounded retries + * (exponential backoff, capped delay) until Postgres is reachable; until then + * this promise stays pending and callers that await readiness will block. + */ readonly isReady: Promise; constructor(prisma: PrismaClient | PrismaReplicaClient) { diff --git a/apps/webapp/app/services/dataStores/organizationDataStoresRegistryInstance.server.ts b/apps/webapp/app/services/dataStores/organizationDataStoresRegistryInstance.server.ts index f7d82da5682..24ec572c5d5 100644 --- a/apps/webapp/app/services/dataStores/organizationDataStoresRegistryInstance.server.ts +++ b/apps/webapp/app/services/dataStores/organizationDataStoresRegistryInstance.server.ts @@ -1,5 +1,7 @@ +import pRetry from "p-retry"; import { $replica } from "~/db.server"; import { env } from "~/env.server"; +import { logger } from "~/services/logger.server"; import { signalsEmitter } from "~/services/signals.server"; import { singleton } from "~/utils/singleton"; import { OrganizationDataStoresRegistry } from "./organizationDataStoresRegistry.server"; @@ -7,8 +9,24 @@ import { OrganizationDataStoresRegistry } from "./organizationDataStoresRegistry export const organizationDataStoresRegistry = singleton("organizationDataStoresRegistry", () => { const registry = new OrganizationDataStoresRegistry($replica); - registry.loadFromDatabase().catch((err) => { - console.error("[OrganizationDataStoresRegistry] Failed to initialize", err); + // Runs as soon as this singleton is created (first import of this module). The + // registry’s `isReady` promise resolves when this eventually succeeds. + const startupLoadPromise = pRetry(() => registry.loadFromDatabase(), { + forever: true, + retries: 10, + minTimeout: 1_000, + maxTimeout: 60_000, + factor: 2, + onFailedAttempt: (error) => { + logger.warn("[OrganizationDataStoresRegistry] Startup load failed, retrying", { + attemptNumber: error.attemptNumber, + retriesLeft: error.retriesLeft, + error: error.message, + }); + }, + }); + startupLoadPromise.catch((err) => { + console.error("[OrganizationDataStoresRegistry] Unexpected startup load failure", err); }); const interval = setInterval(() => { From e837a1b4438627096753ef9dee8de7f936d98180 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Thu, 16 Apr 2026 16:43:07 +0000 Subject: [PATCH 23/34] Fix Devin Review bugs: postgres event repository fallback and error presenter clients Co-Authored-By: Matt Aitken --- .../route.tsx | 10 +++++----- .../route.tsx | 6 +++--- .../app/v3/eventRepository/index.server.ts | 16 +++++++++++++--- 3 files changed, 21 insertions(+), 11 deletions(-) diff --git a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.errors.$fingerprint/route.tsx b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.errors.$fingerprint/route.tsx index ba8dbefe592..c88d4e301e2 100644 --- a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.errors.$fingerprint/route.tsx +++ b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.errors.$fingerprint/route.tsx @@ -242,12 +242,12 @@ export const loader = async ({ request, params }: LoaderFunctionArgs) => { const directionRaw = url.searchParams.get("direction") ?? undefined; const direction = directionRaw ? DirectionSchema.parse(directionRaw) : undefined; - const clickhouseClient = await clickhouseFactory.getClickhouseForOrganization( - environment.organizationId, - "logs" - ); + const [logsClickhouseClient, clickhouseClient] = await Promise.all([ + clickhouseFactory.getClickhouseForOrganization(environment.organizationId, "logs"), + clickhouseFactory.getClickhouseForOrganization(environment.organizationId, "standard"), + ]); - const presenter = new ErrorGroupPresenter($replica, clickhouseClient, clickhouseClient); + const presenter = new ErrorGroupPresenter($replica, logsClickhouseClient, clickhouseClient); const detailPromise = presenter .call(project.organizationId, environment.id, { diff --git a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.errors._index/route.tsx b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.errors._index/route.tsx index e2b1d6634ca..0e971f10b91 100644 --- a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.errors._index/route.tsx +++ b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.errors._index/route.tsx @@ -124,11 +124,11 @@ export const loader = async ({ request, params }: LoaderFunctionArgs) => { const plan = await getCurrentPlan(project.organizationId); const retentionLimitDays = plan?.v3Subscription?.plan?.limits.logRetentionDays.number ?? 30; - const queryClickhouse = await clickhouseFactory.getClickhouseForOrganization( + const logsClickhouseClient = await clickhouseFactory.getClickhouseForOrganization( project.organizationId, - "query" + "logs" ); - const presenter = new ErrorsListPresenter($replica, queryClickhouse); + const presenter = new ErrorsListPresenter($replica, logsClickhouseClient); const listPromise = presenter .call(project.organizationId, environment.id, { diff --git a/apps/webapp/app/v3/eventRepository/index.server.ts b/apps/webapp/app/v3/eventRepository/index.server.ts index 04d6678f92b..69c6e80ef25 100644 --- a/apps/webapp/app/v3/eventRepository/index.server.ts +++ b/apps/webapp/app/v3/eventRepository/index.server.ts @@ -57,14 +57,15 @@ export async function getConfiguredEventRepository( (organization.featureFlags as Record | null) ?? undefined ); - const { repository: resolvedRepository } = - await clickhouseFactory.getEventRepositoryForOrganization(taskEventStore, organizationId); - if (taskEventStore === EVENT_STORE_TYPES.CLICKHOUSE_V2) { + const { repository: resolvedRepository } = + await clickhouseFactory.getEventRepositoryForOrganization(taskEventStore, organizationId); return { repository: resolvedRepository, store: EVENT_STORE_TYPES.CLICKHOUSE_V2 }; } if (taskEventStore === EVENT_STORE_TYPES.CLICKHOUSE) { + const { repository: resolvedRepository } = + await clickhouseFactory.getEventRepositoryForOrganization(taskEventStore, organizationId); return { repository: resolvedRepository, store: EVENT_STORE_TYPES.CLICKHOUSE }; } @@ -104,6 +105,15 @@ export async function getV3EventRepository( parentStore: string | undefined ): Promise<{ repository: IEventRepository; store: string }> { if (typeof parentStore === "string") { + // Support legacy Postgres store for self-hosters and runs persisted with a + // non-ClickHouse store β€” fall back to the Prisma-based event repository. + if ( + parentStore !== EVENT_STORE_TYPES.CLICKHOUSE && + parentStore !== EVENT_STORE_TYPES.CLICKHOUSE_V2 + ) { + return { repository: eventRepository, store: parentStore }; + } + const { repository: resolvedRepository } = await clickhouseFactory.getEventRepositoryForOrganization(parentStore, organizationId); return { repository: resolvedRepository, store: parentStore }; From 7f473c6ce76315877105ae95cab3330b4c84ebb4 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Fri, 17 Apr 2026 10:46:37 +0000 Subject: [PATCH 24/34] Fall back to Postgres eventRepository for non-ClickHouse stores Co-Authored-By: Matt Aitken --- .../app/services/clickhouse/clickhouseFactory.server.ts | 9 +++++++++ apps/webapp/app/v3/eventRepository/index.server.ts | 8 ++++++-- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/apps/webapp/app/services/clickhouse/clickhouseFactory.server.ts b/apps/webapp/app/services/clickhouse/clickhouseFactory.server.ts index 92df7de2b54..55c0feef7f5 100644 --- a/apps/webapp/app/services/clickhouse/clickhouseFactory.server.ts +++ b/apps/webapp/app/services/clickhouse/clickhouseFactory.server.ts @@ -6,6 +6,7 @@ import { singleton } from "~/utils/singleton"; import { organizationDataStoresRegistry } from "~/services/dataStores/organizationDataStoresRegistryInstance.server"; import type { OrganizationDataStoresRegistry } from "~/services/dataStores/organizationDataStoresRegistry.server"; import { type IEventRepository } from "~/v3/eventRepository/eventRepository.types"; +import { eventRepository as postgresEventRepository } from "~/v3/eventRepository/eventRepository.server"; // --------------------------------------------------------------------------- // Default clients (singleton per process) @@ -254,6 +255,14 @@ export class ClickhouseFactory { store: string, organizationId: string ): { key: string; repository: IEventRepository } { + // Non-ClickHouse stores (e.g. the "taskEvent" DB default for Postgres-backed + // runs, or "postgres") fall back to the Prisma event repository. This lets + // callers pass `run.taskEventStore` directly without needing to guard + // against legacy/Postgres values. + if (store !== "clickhouse" && store !== "clickhouse_v2") { + return { key: `postgres:${store}`, repository: postgresEventRepository }; + } + const dataStore = this._registry.get(organizationId, "CLICKHOUSE"); if (!dataStore) { diff --git a/apps/webapp/app/v3/eventRepository/index.server.ts b/apps/webapp/app/v3/eventRepository/index.server.ts index 69c6e80ef25..f3449564fce 100644 --- a/apps/webapp/app/v3/eventRepository/index.server.ts +++ b/apps/webapp/app/v3/eventRepository/index.server.ts @@ -79,8 +79,12 @@ export async function getEventRepository( ): Promise<{ repository: IEventRepository; store: string }> { const taskEventStore = parentStore ?? (await resolveTaskEventRepositoryFlag(featureFlags)); - // Support legacy Postgres store for self-hosters - if (taskEventStore === EVENT_STORE_TYPES.POSTGRES) { + // Non-ClickHouse stores (e.g. the "taskEvent" DB default for Postgres-backed + // runs, or the legacy "postgres" value) resolve to the Prisma event repo. + if ( + taskEventStore !== EVENT_STORE_TYPES.CLICKHOUSE && + taskEventStore !== EVENT_STORE_TYPES.CLICKHOUSE_V2 + ) { return { repository: eventRepository, store: getTaskEventStore() }; } From 8f3c622653288fa01c7cfc2af3ff66620a90e2d9 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Fri, 17 Apr 2026 10:58:46 +0000 Subject: [PATCH 25/34] Move Postgres event repository fallback out of ClickHouse factory MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The factory should not import eventRepository.server.ts β€” doing so pulls the tracePubSub singleton into any module graph that imports the factory, which eagerly connects to Redis at module load time (see singleton.ts). The fallback now lives in index.server.ts via getEventRepositoryForStore, which is called from RunPresenter, SpanPresenter, and recordRunEvent. This restores runsReplicationBenchmark.test.ts's module isolation while still handling non-ClickHouse ("taskEvent"/"postgres") stores. Co-Authored-By: Matt Aitken --- .../app/presenters/v3/RunPresenter.server.ts | 4 +-- .../app/presenters/v3/SpanPresenter.server.ts | 4 +-- .../clickhouse/clickhouseFactory.server.ts | 9 ------ .../app/v3/eventRepository/index.server.ts | 30 +++++++++++++++---- 4 files changed, 29 insertions(+), 18 deletions(-) diff --git a/apps/webapp/app/presenters/v3/RunPresenter.server.ts b/apps/webapp/app/presenters/v3/RunPresenter.server.ts index cd437b07b91..b4733144907 100644 --- a/apps/webapp/app/presenters/v3/RunPresenter.server.ts +++ b/apps/webapp/app/presenters/v3/RunPresenter.server.ts @@ -7,7 +7,7 @@ import { SpanSummary } from "~/v3/eventRepository/eventRepository.types"; import { getTaskEventStoreTableForRun } from "~/v3/taskEventStore.server"; import { isFinalRunStatus } from "~/v3/taskStatus"; import { env } from "~/env.server"; -import { clickhouseFactory } from "~/services/clickhouse/clickhouseFactory.server"; +import { getEventRepositoryForStore } from "~/v3/eventRepository/index.server"; type Result = Awaited>; export type Run = Result["run"]; @@ -145,7 +145,7 @@ export class RunPresenter { }; } - const { repository } = await clickhouseFactory.getEventRepositoryForOrganization( + const repository = await getEventRepositoryForStore( run.taskEventStore, run.runtimeEnvironment.organizationId ); diff --git a/apps/webapp/app/presenters/v3/SpanPresenter.server.ts b/apps/webapp/app/presenters/v3/SpanPresenter.server.ts index 31457ee0cb3..61334ba96e3 100644 --- a/apps/webapp/app/presenters/v3/SpanPresenter.server.ts +++ b/apps/webapp/app/presenters/v3/SpanPresenter.server.ts @@ -31,7 +31,7 @@ import { extractAIToolCallData, extractAIEmbedData, } from "~/components/runs/v3/ai"; -import { clickhouseFactory } from "~/services/clickhouse/clickhouseFactory.server"; +import { getEventRepositoryForStore } from "~/v3/eventRepository/index.server"; export type PromptSpanData = { slug: string; @@ -132,7 +132,7 @@ export class SpanPresenter extends BasePresenter { const { traceId } = parentRun; - const { repository } = await clickhouseFactory.getEventRepositoryForOrganization( + const repository = await getEventRepositoryForStore( parentRun.taskEventStore, project.organizationId ); diff --git a/apps/webapp/app/services/clickhouse/clickhouseFactory.server.ts b/apps/webapp/app/services/clickhouse/clickhouseFactory.server.ts index 55c0feef7f5..92df7de2b54 100644 --- a/apps/webapp/app/services/clickhouse/clickhouseFactory.server.ts +++ b/apps/webapp/app/services/clickhouse/clickhouseFactory.server.ts @@ -6,7 +6,6 @@ import { singleton } from "~/utils/singleton"; import { organizationDataStoresRegistry } from "~/services/dataStores/organizationDataStoresRegistryInstance.server"; import type { OrganizationDataStoresRegistry } from "~/services/dataStores/organizationDataStoresRegistry.server"; import { type IEventRepository } from "~/v3/eventRepository/eventRepository.types"; -import { eventRepository as postgresEventRepository } from "~/v3/eventRepository/eventRepository.server"; // --------------------------------------------------------------------------- // Default clients (singleton per process) @@ -255,14 +254,6 @@ export class ClickhouseFactory { store: string, organizationId: string ): { key: string; repository: IEventRepository } { - // Non-ClickHouse stores (e.g. the "taskEvent" DB default for Postgres-backed - // runs, or "postgres") fall back to the Prisma event repository. This lets - // callers pass `run.taskEventStore` directly without needing to guard - // against legacy/Postgres values. - if (store !== "clickhouse" && store !== "clickhouse_v2") { - return { key: `postgres:${store}`, repository: postgresEventRepository }; - } - const dataStore = this._registry.get(organizationId, "CLICKHOUSE"); if (!dataStore) { diff --git a/apps/webapp/app/v3/eventRepository/index.server.ts b/apps/webapp/app/v3/eventRepository/index.server.ts index f3449564fce..c2e772cc127 100644 --- a/apps/webapp/app/v3/eventRepository/index.server.ts +++ b/apps/webapp/app/v3/eventRepository/index.server.ts @@ -32,6 +32,27 @@ export function resolveEventRepositoryForStore( return eventRepository; } +/** + * Async variant of {@link resolveEventRepositoryForStore}. Awaits the factory's + * registry readiness before returning the ClickHouse event repository; for + * non-ClickHouse stores (e.g. the "taskEvent" DB default for Postgres-backed + * runs) it returns the Prisma event repository without ever touching the + * factory β€” so the factory never needs to know about Postgres. + */ +export async function getEventRepositoryForStore( + store: string, + organizationId: string +): Promise { + if (store !== EVENT_STORE_TYPES.CLICKHOUSE && store !== EVENT_STORE_TYPES.CLICKHOUSE_V2) { + return eventRepository; + } + const { repository } = await clickhouseFactory.getEventRepositoryForOrganization( + store, + organizationId + ); + return repository; +} + export async function getConfiguredEventRepository( organizationId: string ): Promise<{ repository: IEventRepository; store: EventStoreType }> { @@ -219,11 +240,10 @@ async function recordRunEvent( }; } - const { repository: $eventRepository } = - await clickhouseFactory.getEventRepositoryForOrganization( - foundRun.taskEventStore, - foundRun.runtimeEnvironment.organizationId - ); + const $eventRepository = await getEventRepositoryForStore( + foundRun.taskEventStore, + foundRun.runtimeEnvironment.organizationId + ); const { attributes, startTime, ...optionsRest } = options; From 00e4cfba0d260a7b4442f3dd82b664ffd875d3c7 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Fri, 17 Apr 2026 11:27:52 +0000 Subject: [PATCH 26/34] Make OrganizationDataStoresRegistry deterministic on overlap MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Sort findMany() by `key` for a stable winner when multiple rows assign the same `${orgId}:${kind}`, and log an error identifying the winning and ignored rows instead of overwriting silently. Does not fail the load β€” failing the registry would break every customer, not just the misconfigured orgs. Co-Authored-By: Matt Aitken --- .../organizationDataStoresRegistry.server.ts | 22 ++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/apps/webapp/app/services/dataStores/organizationDataStoresRegistry.server.ts b/apps/webapp/app/services/dataStores/organizationDataStoresRegistry.server.ts index f2ef02cc3bd..838dd5ae5b8 100644 --- a/apps/webapp/app/services/dataStores/organizationDataStoresRegistry.server.ts +++ b/apps/webapp/app/services/dataStores/organizationDataStoresRegistry.server.ts @@ -33,10 +33,18 @@ export class OrganizationDataStoresRegistry { } async loadFromDatabase(): Promise { - const rows = await this._prisma.organizationDataStore.findMany(); + // Sort by `key` (unique, immutable) to ensure a deterministic winner when the + // same `${orgId}:${kind}` appears in multiple rows. The registry must never + // throw on overlap β€” failing the load would break every customer, not just the + // misconfigured orgs β€” so we keep the first entry and log an error instead. + const rows = await this._prisma.organizationDataStore.findMany({ + orderBy: { key: "asc" }, + }); const secretStore = getSecretStore("DATABASE", { prismaClient: this._prisma }); const lookup = new Map(); + /** Tracks which row's `key` already owns each `${orgId}:${kind}` so we can log conflicts. */ + const winnerByLookupKey = new Map(); for (const row of rows) { let parsed: ParsedDataStore | null = null; @@ -75,8 +83,16 @@ export class OrganizationDataStoresRegistry { } for (const orgId of row.organizationIds) { - const key = `${orgId}:${row.kind}`; - lookup.set(key, parsed); + const lookupKey = `${orgId}:${row.kind}`; + const existingWinner = winnerByLookupKey.get(lookupKey); + if (existingWinner) { + console.error( + `[OrganizationDataStoresRegistry] Overlapping OrganizationDataStore assignment for orgId="${orgId}" kind=${row.kind}: already routed to "${existingWinner}", ignoring "${row.key}". Pick one store per (org, kind) to resolve.` + ); + continue; + } + winnerByLookupKey.set(lookupKey, row.key); + lookup.set(lookupKey, parsed); } } From 506cf5a595171703c30e806168b2afbdcc655e0a Mon Sep 17 00:00:00 2001 From: Matt Aitken Date: Fri, 17 Apr 2026 12:17:56 +0100 Subject: [PATCH 27/34] try/catch getting the clickhouse client --- .../route.tsx | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.test.tasks.$taskParam/route.tsx b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.test.tasks.$taskParam/route.tsx index 69ce432f4e0..d6e866184a5 100644 --- a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.test.tasks.$taskParam/route.tsx +++ b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.test.tasks.$taskParam/route.tsx @@ -102,9 +102,13 @@ export const loader = async ({ request, params }: LoaderFunctionArgs) => { }); } - const clickhouse = await clickhouseFactory.getClickhouseForOrganization(project.organizationId, "standard"); - const presenter = new TestTaskPresenter($replica, clickhouse); try { + const clickhouse = await clickhouseFactory.getClickhouseForOrganization( + project.organizationId, + "standard" + ); + const presenter = new TestTaskPresenter($replica, clickhouse); + const [result, regionsResult] = await Promise.all([ presenter.call({ userId: user.id, From 8c789ab0c3a969246336da2ad0f779cdc411263c Mon Sep 17 00:00:00 2001 From: Matt Aitken Date: Fri, 17 Apr 2026 12:40:48 +0100 Subject: [PATCH 28/34] Better handling for no org id on task run --- .../resources.runs.$runParam.logs.download.ts | 7 +- .../webapp/app/v3/runEngineHandlers.server.ts | 485 +++++++++--------- 2 files changed, 253 insertions(+), 239 deletions(-) diff --git a/apps/webapp/app/routes/resources.runs.$runParam.logs.download.ts b/apps/webapp/app/routes/resources.runs.$runParam.logs.download.ts index 5da10a8a8af..5f19f129729 100644 --- a/apps/webapp/app/routes/resources.runs.$runParam.logs.download.ts +++ b/apps/webapp/app/routes/resources.runs.$runParam.logs.download.ts @@ -29,14 +29,11 @@ export async function loader({ params, request }: LoaderFunctionArgs) { }, }); - if (!run) { + if (!run || !run.organizationId) { return new Response("Not found", { status: 404 }); } - const eventRepository = resolveEventRepositoryForStore( - run.taskEventStore, - run.organizationId ?? "" - ); + const eventRepository = resolveEventRepositoryForStore(run.taskEventStore, run.organizationId); const runEvents = await eventRepository.getRunEvents( getTaskEventStoreTableForRun(run), diff --git a/apps/webapp/app/v3/runEngineHandlers.server.ts b/apps/webapp/app/v3/runEngineHandlers.server.ts index eced6c758ee..82ed633e468 100644 --- a/apps/webapp/app/v3/runEngineHandlers.server.ts +++ b/apps/webapp/app/v3/runEngineHandlers.server.ts @@ -17,10 +17,7 @@ import { QueueSizeLimitExceededError } from "~/v3/services/common.server"; import { TriggerTaskService } from "~/v3/services/triggerTask.server"; import { tracer } from "~/v3/tracer.server"; import { createExceptionPropertiesFromError } from "./eventRepository/common.server"; -import { - recordRunDebugLog, - resolveEventRepositoryForStore, -} from "./eventRepository/index.server"; +import { recordRunDebugLog, resolveEventRepositoryForStore } from "./eventRepository/index.server"; import { roomFromFriendlyRunId, socketIo } from "./handleSocketIo.server"; import { engine } from "./runEngine.server"; import { PerformTaskRunAlertsService } from "./services/alerts/performTaskRunAlerts.server"; @@ -187,9 +184,16 @@ export function registerRunEngineEventBusHandlers() { return; } + if (!taskRun.organizationId) { + logger.error("[runAttemptFailed] Task run has no organization id", { + runId: run.id, + }); + return; + } + const eventRepository = resolveEventRepositoryForStore( run.taskEventStore, - taskRun.organizationId ?? "" + taskRun.organizationId ); const [createAttemptFailedRunEventError] = await tryCatch( @@ -291,9 +295,16 @@ export function registerRunEngineEventBusHandlers() { return; } + if (!blockedRun.organizationId) { + logger.error("[cachedRunCompleted] Blocked run has no organization id", { + blockedRunId, + }); + return; + } + const eventRepository = resolveEventRepositoryForStore( blockedRun.taskEventStore, - blockedRun.organizationId ?? "" + blockedRun.organizationId ); const [completeCachedRunEventError] = await tryCatch( @@ -431,49 +442,53 @@ export function registerRunEngineEventBusHandlers() { } }); - engine.eventBus.on("runRetryScheduled", async ({ time, run, environment, retryAt, organization }) => { - try { - if (retryAt && time && time >= retryAt) { - return; - } + engine.eventBus.on( + "runRetryScheduled", + async ({ time, run, environment, retryAt, organization }) => { + try { + if (retryAt && time && time >= retryAt) { + return; + } - let retryMessage = `Retry ${typeof run.attemptNumber === "number" ? `#${run.attemptNumber - 1}` : "" + let retryMessage = `Retry ${ + typeof run.attemptNumber === "number" ? `#${run.attemptNumber - 1}` : "" } delay`; - if (run.nextMachineAfterOOM) { - retryMessage += ` after OOM`; - } + if (run.nextMachineAfterOOM) { + retryMessage += ` after OOM`; + } - const eventRepository = resolveEventRepositoryForStore( - run.taskEventStore ?? "taskEvent", - organization.id - ); + const eventRepository = resolveEventRepositoryForStore( + run.taskEventStore ?? "taskEvent", + organization.id + ); - await eventRepository.recordEvent(retryMessage, { - startTime: BigInt(time.getTime() * 1000000), - taskSlug: run.taskIdentifier, - environment, - attributes: { - properties: { - retryAt: retryAt.toISOString(), - nextMachine: run.nextMachineAfterOOM, - }, - runId: run.friendlyId, - style: { - icon: "schedule-attempt", + await eventRepository.recordEvent(retryMessage, { + startTime: BigInt(time.getTime() * 1000000), + taskSlug: run.taskIdentifier, + environment, + attributes: { + properties: { + retryAt: retryAt.toISOString(), + nextMachine: run.nextMachineAfterOOM, + }, + runId: run.friendlyId, + style: { + icon: "schedule-attempt", + }, }, - }, - context: run.traceContext as Record, - endTime: retryAt, - }); - } catch (error) { - logger.error("[runRetryScheduled] Failed to record retry event", { - error: error instanceof Error ? error.message : error, - runId: run.id, - spanId: run.spanId, - }); + context: run.traceContext as Record, + endTime: retryAt, + }); + } catch (error) { + logger.error("[runRetryScheduled] Failed to record retry event", { + error: error instanceof Error ? error.message : error, + runId: run.id, + spanId: run.spanId, + }); + } } - }); + ); engine.eventBus.on("runAttemptStarted", async ({ time, run, organization }) => { try { @@ -506,10 +521,10 @@ export function registerRunEngineEventBusHandlers() { error: e instanceof Error ? { - name: e.name, - message: e.message, - stack: e.stack, - } + name: e.name, + message: e.message, + stack: e.stack, + } : e, }); } else { @@ -518,10 +533,10 @@ export function registerRunEngineEventBusHandlers() { error: e instanceof Error ? { - name: e.name, - message: e.message, - stack: e.stack, - } + name: e.name, + message: e.message, + stack: e.stack, + } : e, }); } @@ -679,121 +694,200 @@ const QUEUE_SIZE_LIMIT_EXCEEDED_ERROR_CODE = "QUEUE_SIZE_LIMIT_EXCEEDED"; */ export function setupBatchQueueCallbacks() { // Item processing callback - creates a run for each batch item - engine.setBatchProcessItemCallback(async ({ batchId, friendlyId, itemIndex, item, meta, attempt, isFinalAttempt }) => { - return tracer.startActiveSpan( - "batch.processItem", - { - kind: SpanKind.INTERNAL, - attributes: { - "batch.id": friendlyId, - "batch.item_index": itemIndex, - "batch.task": item.task, - "batch.environment_id": meta.environmentId, - "batch.parent_run_id": meta.parentRunId ?? "", - "batch.attempt": attempt, - "batch.is_final_attempt": isFinalAttempt, + engine.setBatchProcessItemCallback( + async ({ batchId, friendlyId, itemIndex, item, meta, attempt, isFinalAttempt }) => { + return tracer.startActiveSpan( + "batch.processItem", + { + kind: SpanKind.INTERNAL, + attributes: { + "batch.id": friendlyId, + "batch.item_index": itemIndex, + "batch.task": item.task, + "batch.environment_id": meta.environmentId, + "batch.parent_run_id": meta.parentRunId ?? "", + "batch.attempt": attempt, + "batch.is_final_attempt": isFinalAttempt, + }, }, - }, - async (span) => { - const triggerFailedTaskService = new TriggerFailedTaskService({ - prisma, - engine, - replicaPrisma: $replica, - }); + async (span) => { + const triggerFailedTaskService = new TriggerFailedTaskService({ + prisma, + engine, + replicaPrisma: $replica, + }); - // Check for pre-marked error items (e.g. oversized payloads) - const itemError = item.options?.__error as string | undefined; - if (itemError) { - const errorCode = (item.options?.__errorCode as string) ?? "ITEM_ERROR"; + // Check for pre-marked error items (e.g. oversized payloads) + const itemError = item.options?.__error as string | undefined; + if (itemError) { + const errorCode = (item.options?.__errorCode as string) ?? "ITEM_ERROR"; + + let environment: AuthenticatedEnvironment | undefined; + try { + environment = (await findEnvironmentById(meta.environmentId)) ?? undefined; + } catch { + // Best-effort environment lookup + } + + if (environment) { + const failedRunId = await triggerFailedTaskService.call({ + taskId: item.task, + environment, + payload: item.payload ?? "{}", + payloadType: item.payloadType as string, + errorMessage: itemError, + errorCode: errorCode as TaskRunErrorCodes, + parentRunId: meta.parentRunId, + resumeParentOnCompletion: meta.resumeParentOnCompletion, + batch: { id: batchId, index: itemIndex }, + traceContext: meta.traceContext as Record | undefined, + spanParentAsLink: meta.spanParentAsLink, + }); + + if (failedRunId) { + span.setAttribute("batch.result.pre_failed", true); + span.setAttribute("batch.result.run_id", failedRunId); + span.end(); + return { success: true as const, runId: failedRunId }; + } + } + + // Fallback if TriggerFailedTaskService or environment lookup fails + span.end(); + return { success: false as const, error: itemError, errorCode }; + } let environment: AuthenticatedEnvironment | undefined; try { environment = (await findEnvironmentById(meta.environmentId)) ?? undefined; - } catch { - // Best-effort environment lookup - } - if (environment) { - const failedRunId = await triggerFailedTaskService.call({ - taskId: item.task, - environment, - payload: item.payload ?? "{}", - payloadType: item.payloadType as string, - errorMessage: itemError, - errorCode: errorCode as TaskRunErrorCodes, - parentRunId: meta.parentRunId, - resumeParentOnCompletion: meta.resumeParentOnCompletion, - batch: { id: batchId, index: itemIndex }, - traceContext: meta.traceContext as Record | undefined, - spanParentAsLink: meta.spanParentAsLink, - }); - - if (failedRunId) { - span.setAttribute("batch.result.pre_failed", true); - span.setAttribute("batch.result.run_id", failedRunId); + if (!environment) { + span.setAttribute("batch.result.error", "Environment not found"); span.end(); - return { success: true as const, runId: failedRunId }; + + return { + success: false as const, + error: "Environment not found", + errorCode: "ENVIRONMENT_NOT_FOUND", + }; } - } - // Fallback if TriggerFailedTaskService or environment lookup fails - span.end(); - return { success: false as const, error: itemError, errorCode }; - } + const triggerTaskService = new TriggerTaskService(); - let environment: AuthenticatedEnvironment | undefined; - try { - environment = (await findEnvironmentById(meta.environmentId)) ?? undefined; + // Normalize payload - for application/store (R2 paths), this passes through as-is + const payload = normalizePayload(item.payload, item.payloadType); - if (!environment) { - span.setAttribute("batch.result.error", "Environment not found"); - span.end(); + const result = await triggerTaskService.call( + item.task, + environment, + { + payload, + options: { + ...(item.options as Record), + payloadType: item.payloadType, + parentRunId: meta.parentRunId, + resumeParentOnCompletion: meta.resumeParentOnCompletion, + parentBatch: batchId, + }, + }, + { + triggerVersion: meta.triggerVersion, + traceContext: meta.traceContext as Record | undefined, + spanParentAsLink: meta.spanParentAsLink, + batchId, + batchIndex: itemIndex, + realtimeStreamsVersion: meta.realtimeStreamsVersion, + planType: meta.planType, + triggerSource: meta.parentRunId ? "sdk" : meta.triggerSource ?? "api", + triggerAction: "trigger", + }, + "V2" + ); - return { - success: false as const, - error: "Environment not found", - errorCode: "ENVIRONMENT_NOT_FOUND", - }; - } + if (result) { + span.setAttribute("batch.result.run_id", result.run.friendlyId); + span.end(); + return { success: true as const, runId: result.run.friendlyId }; + } else { + logger.error("[BatchQueue] TriggerTaskService returned undefined", { + batchId, + friendlyId, + itemIndex, + task: item.task, + environmentId: meta.environmentId, + attempt, + isFinalAttempt, + }); - const triggerTaskService = new TriggerTaskService(); + span.setAttribute("batch.result.error", "TriggerTaskService returned undefined"); + + // Only create a pre-failed run on the final attempt; otherwise let the retry mechanism handle it + if (isFinalAttempt) { + const failedRunId = await triggerFailedTaskService.call({ + taskId: item.task, + environment, + payload: item.payload, + payloadType: item.payloadType as string, + errorMessage: "TriggerTaskService returned undefined", + parentRunId: meta.parentRunId, + resumeParentOnCompletion: meta.resumeParentOnCompletion, + batch: { id: batchId, index: itemIndex }, + options: item.options as Record, + traceContext: meta.traceContext as Record | undefined, + spanParentAsLink: meta.spanParentAsLink, + errorCode: TaskRunErrorCodes.BATCH_ITEM_COULD_NOT_TRIGGER, + }); + + span.end(); + + if (failedRunId) { + return { success: true as const, runId: failedRunId }; + } + } else { + span.end(); + } - // Normalize payload - for application/store (R2 paths), this passes through as-is - const payload = normalizePayload(item.payload, item.payloadType); + return { + success: false as const, + error: "TriggerTaskService returned undefined", + errorCode: "TRIGGER_FAILED", + }; + } + } catch (error) { + const errorMessage = error instanceof Error ? error.message : String(error); + + // Queue-size-limit rejections are a customer-overload scenario (the + // env's queue is at its configured max). Retrying is pointless β€” the + // same item will fail again β€” and creating pre-failed TaskRuns for + // every item of every retried batch is exactly what chews through + // DB capacity when a noisy tenant fills their queue. Signal the + // BatchQueue to skip retries and skip pre-failed run creation, and + // let the completion callback collapse the per-item errors into a + // single summary row. + if (error instanceof QueueSizeLimitExceededError) { + logger.warn("[BatchQueue] Batch item rejected: queue size limit reached", { + batchId, + friendlyId, + itemIndex, + task: item.task, + environmentId: meta.environmentId, + maximumSize: error.maximumSize, + }); - const result = await triggerTaskService.call( - item.task, - environment, - { - payload, - options: { - ...(item.options as Record), - payloadType: item.payloadType, - parentRunId: meta.parentRunId, - resumeParentOnCompletion: meta.resumeParentOnCompletion, - parentBatch: batchId, - }, - }, - { - triggerVersion: meta.triggerVersion, - traceContext: meta.traceContext as Record | undefined, - spanParentAsLink: meta.spanParentAsLink, - batchId, - batchIndex: itemIndex, - realtimeStreamsVersion: meta.realtimeStreamsVersion, - planType: meta.planType, - triggerSource: meta.parentRunId ? "sdk" : meta.triggerSource ?? "api", - triggerAction: "trigger", - }, - "V2" - ); + span.setAttribute("batch.result.error", errorMessage); + span.setAttribute("batch.result.errorCode", QUEUE_SIZE_LIMIT_EXCEEDED_ERROR_CODE); + span.setAttribute("batch.result.skipRetries", true); + span.end(); - if (result) { - span.setAttribute("batch.result.run_id", result.run.friendlyId); - span.end(); - return { success: true as const, runId: result.run.friendlyId }; - } else { - logger.error("[BatchQueue] TriggerTaskService returned undefined", { + return { + success: false as const, + error: errorMessage, + errorCode: QUEUE_SIZE_LIMIT_EXCEEDED_ERROR_CODE, + skipRetries: true, + }; + } + + logger.error("[BatchQueue] Failed to trigger batch item", { batchId, friendlyId, itemIndex, @@ -801,18 +895,20 @@ export function setupBatchQueueCallbacks() { environmentId: meta.environmentId, attempt, isFinalAttempt, + error, }); - span.setAttribute("batch.result.error", "TriggerTaskService returned undefined"); + span.setAttribute("batch.result.error", errorMessage); + span.recordException(error instanceof Error ? error : new Error(String(error))); // Only create a pre-failed run on the final attempt; otherwise let the retry mechanism handle it - if (isFinalAttempt) { + if (isFinalAttempt && environment) { const failedRunId = await triggerFailedTaskService.call({ taskId: item.task, environment, payload: item.payload, payloadType: item.payloadType as string, - errorMessage: "TriggerTaskService returned undefined", + errorMessage, parentRunId: meta.parentRunId, resumeParentOnCompletion: meta.resumeParentOnCompletion, batch: { id: batchId, index: itemIndex }, @@ -831,95 +927,16 @@ export function setupBatchQueueCallbacks() { span.end(); } - return { - success: false as const, - error: "TriggerTaskService returned undefined", - errorCode: "TRIGGER_FAILED", - }; - } - } catch (error) { - const errorMessage = error instanceof Error ? error.message : String(error); - - // Queue-size-limit rejections are a customer-overload scenario (the - // env's queue is at its configured max). Retrying is pointless β€” the - // same item will fail again β€” and creating pre-failed TaskRuns for - // every item of every retried batch is exactly what chews through - // DB capacity when a noisy tenant fills their queue. Signal the - // BatchQueue to skip retries and skip pre-failed run creation, and - // let the completion callback collapse the per-item errors into a - // single summary row. - if (error instanceof QueueSizeLimitExceededError) { - logger.warn("[BatchQueue] Batch item rejected: queue size limit reached", { - batchId, - friendlyId, - itemIndex, - task: item.task, - environmentId: meta.environmentId, - maximumSize: error.maximumSize, - }); - - span.setAttribute("batch.result.error", errorMessage); - span.setAttribute("batch.result.errorCode", QUEUE_SIZE_LIMIT_EXCEEDED_ERROR_CODE); - span.setAttribute("batch.result.skipRetries", true); - span.end(); - return { success: false as const, error: errorMessage, - errorCode: QUEUE_SIZE_LIMIT_EXCEEDED_ERROR_CODE, - skipRetries: true, + errorCode: "TRIGGER_ERROR", }; } - - logger.error("[BatchQueue] Failed to trigger batch item", { - batchId, - friendlyId, - itemIndex, - task: item.task, - environmentId: meta.environmentId, - attempt, - isFinalAttempt, - error, - }); - - span.setAttribute("batch.result.error", errorMessage); - span.recordException(error instanceof Error ? error : new Error(String(error))); - - // Only create a pre-failed run on the final attempt; otherwise let the retry mechanism handle it - if (isFinalAttempt && environment) { - const failedRunId = await triggerFailedTaskService.call({ - taskId: item.task, - environment, - payload: item.payload, - payloadType: item.payloadType as string, - errorMessage, - parentRunId: meta.parentRunId, - resumeParentOnCompletion: meta.resumeParentOnCompletion, - batch: { id: batchId, index: itemIndex }, - options: item.options as Record, - traceContext: meta.traceContext as Record | undefined, - spanParentAsLink: meta.spanParentAsLink, - errorCode: TaskRunErrorCodes.BATCH_ITEM_COULD_NOT_TRIGGER, - }); - - span.end(); - - if (failedRunId) { - return { success: true as const, runId: failedRunId }; - } - } else { - span.end(); - } - - return { - success: false as const, - error: errorMessage, - errorCode: "TRIGGER_ERROR", - }; } - } - ); - }); + ); + } + ); // Batch completion callback - updates Postgres with results engine.setBatchCompletionCallback(async (result: CompleteBatchResult) => { From ff6f53f7715f53fef7f424bec4ce40e2c24ac3ac Mon Sep 17 00:00:00 2001 From: Matt Aitken Date: Fri, 17 Apr 2026 12:41:02 +0100 Subject: [PATCH 29/34] Test for deterministic data store resolution if there are multiple entries --- .../organizationDataStoresRegistry.test.ts | 33 +++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/apps/webapp/test/organizationDataStoresRegistry.test.ts b/apps/webapp/test/organizationDataStoresRegistry.test.ts index 44fe851a404..f8ff9dde41f 100644 --- a/apps/webapp/test/organizationDataStoresRegistry.test.ts +++ b/apps/webapp/test/organizationDataStoresRegistry.test.ts @@ -117,6 +117,39 @@ describe("OrganizationDataStoresRegistry", () => { expect(z?.url).toBe(TEST_URL); }); + postgresTest( + "when an org appears in multiple data stores, first row by id asc wins", + async ({ prisma }) => { + const registry = new OrganizationDataStoresRegistry(prisma); + const sharedOrg = "org-dup-overlap"; + + await registry.addDataStore({ + key: "dup-overlap-first", + kind: "CLICKHOUSE", + organizationIds: [sharedOrg], + config: ClickhouseConnectionSchema.parse({ url: TEST_URL }), + }); + await registry.addDataStore({ + key: "dup-overlap-second", + kind: "CLICKHOUSE", + organizationIds: [sharedOrg], + config: ClickhouseConnectionSchema.parse({ url: TEST_URL_2 }), + }); + + const [winner] = await prisma.organizationDataStore.findMany({ + where: { key: { in: ["dup-overlap-first", "dup-overlap-second"] } }, + orderBy: { id: "asc" }, + }); + expect(winner).toBeDefined(); + + await registry.loadFromDatabase(); + + const expectedUrl = + winner!.key === "dup-overlap-first" ? TEST_URL : TEST_URL_2; + expect(registry.get(sharedOrg, "CLICKHOUSE")?.url).toBe(expectedUrl); + } + ); + postgresTest("updateDataStore updates organizationIds and rotates the secret", async ({ prisma }) => { const registry = new OrganizationDataStoresRegistry(prisma); From 67521974ae0338ade5fa8f159012de77f07474e9 Mon Sep 17 00:00:00 2001 From: Matt Aitken Date: Fri, 17 Apr 2026 13:13:00 +0100 Subject: [PATCH 30/34] Separate the clickhouseFactoryInstance so the db client isn't pulled into tests --- .../app/presenters/v3/ApiRunListPresenter.server.ts | 2 +- .../v3/CreateBulkActionPresenter.server.ts | 2 +- .../app/presenters/v3/RunTagListPresenter.server.ts | 2 +- .../app/presenters/v3/TaskListPresenter.server.ts | 2 +- .../app/presenters/v3/UsagePresenter.server.ts | 2 +- .../presenters/v3/ViewSchedulePresenter.server.ts | 2 +- .../app/presenters/v3/WaitpointPresenter.server.ts | 2 +- .../route.tsx | 2 +- .../route.tsx | 2 +- .../route.tsx | 2 +- .../route.tsx | 2 +- .../route.tsx | 2 +- .../route.tsx | 2 +- .../route.tsx | 2 +- .../route.tsx | 2 +- .../route.tsx | 2 +- .../route.tsx | 2 +- .../route.tsx | 2 +- .../route.tsx | 2 +- .../routes/admin.api.v1.runs-replication.create.ts | 2 +- .../routes/admin.api.v1.runs-replication.start.ts | 2 +- apps/webapp/app/routes/api.v1.prompts.$slug.ts | 2 +- .../app/routes/api.v1.prompts.$slug.versions.ts | 2 +- apps/webapp/app/routes/api.v1.prompts._index.ts | 2 +- ...ects.$projectParam.env.$envParam.logs.$logId.tsx | 2 +- ...lug.projects.$projectParam.env.$envParam.logs.ts | 2 +- ...env.$envParam.prompts.$promptSlug.generations.ts | 2 +- .../services/clickhouse/clickhouseFactory.server.ts | 10 ---------- .../clickhouse/clickhouseFactoryInstance.server.ts | 13 +++++++++++++ apps/webapp/app/services/queryService.server.ts | 2 +- .../app/services/runsReplicationInstance.server.ts | 2 +- apps/webapp/app/v3/eventRepository/index.server.ts | 2 +- apps/webapp/app/v3/otlpExporter.server.ts | 6 ++---- .../services/alerts/errorAlertEvaluator.server.ts | 2 +- .../app/v3/services/bulk/BulkActionV2.server.ts | 2 +- 35 files changed, 47 insertions(+), 46 deletions(-) create mode 100644 apps/webapp/app/services/clickhouse/clickhouseFactoryInstance.server.ts diff --git a/apps/webapp/app/presenters/v3/ApiRunListPresenter.server.ts b/apps/webapp/app/presenters/v3/ApiRunListPresenter.server.ts index b47baa0e239..0e7077b3dfc 100644 --- a/apps/webapp/app/presenters/v3/ApiRunListPresenter.server.ts +++ b/apps/webapp/app/presenters/v3/ApiRunListPresenter.server.ts @@ -9,7 +9,7 @@ import { type Project, type RuntimeEnvironment, type TaskRunStatus } from "@trig import assertNever from "assert-never"; import { z } from "zod"; import { API_VERSIONS, RunStatusUnspecifiedApiVersion } from "~/api/versions"; -import { clickhouseFactory } from "~/services/clickhouse/clickhouseFactory.server"; +import { clickhouseFactory } from "~/services/clickhouse/clickhouseFactoryInstance.server"; import { logger } from "~/services/logger.server"; import { CoercedDate } from "~/utils/zod"; import { ServiceValidationError } from "~/v3/services/baseService.server"; diff --git a/apps/webapp/app/presenters/v3/CreateBulkActionPresenter.server.ts b/apps/webapp/app/presenters/v3/CreateBulkActionPresenter.server.ts index eeb5b3d871e..c3c62cd5d95 100644 --- a/apps/webapp/app/presenters/v3/CreateBulkActionPresenter.server.ts +++ b/apps/webapp/app/presenters/v3/CreateBulkActionPresenter.server.ts @@ -1,6 +1,6 @@ import { type PrismaClient } from "@trigger.dev/database"; import { CreateBulkActionSearchParams } from "~/routes/resources.orgs.$organizationSlug.projects.$projectParam.env.$envParam.runs.bulkaction"; -import { clickhouseFactory } from "~/services/clickhouse/clickhouseFactory.server"; +import { clickhouseFactory } from "~/services/clickhouse/clickhouseFactoryInstance.server"; import { RunsRepository } from "~/services/runsRepository/runsRepository.server"; import { getRunFiltersFromRequest } from "../RunFilters.server"; import { BasePresenter } from "./basePresenter.server"; diff --git a/apps/webapp/app/presenters/v3/RunTagListPresenter.server.ts b/apps/webapp/app/presenters/v3/RunTagListPresenter.server.ts index 44d6f2a0747..c4b524ec329 100644 --- a/apps/webapp/app/presenters/v3/RunTagListPresenter.server.ts +++ b/apps/webapp/app/presenters/v3/RunTagListPresenter.server.ts @@ -1,6 +1,6 @@ import { RunsRepository } from "~/services/runsRepository/runsRepository.server"; import { BasePresenter } from "./basePresenter.server"; -import { clickhouseFactory } from "~/services/clickhouse/clickhouseFactory.server"; +import { clickhouseFactory } from "~/services/clickhouse/clickhouseFactoryInstance.server"; import { type PrismaClient } from "@trigger.dev/database"; import { timeFilters } from "~/components/runs/v3/SharedFilters"; diff --git a/apps/webapp/app/presenters/v3/TaskListPresenter.server.ts b/apps/webapp/app/presenters/v3/TaskListPresenter.server.ts index 085615511e2..5d1d4c45d45 100644 --- a/apps/webapp/app/presenters/v3/TaskListPresenter.server.ts +++ b/apps/webapp/app/presenters/v3/TaskListPresenter.server.ts @@ -4,7 +4,7 @@ import { type TaskTriggerSource, } from "@trigger.dev/database"; import { $replica } from "~/db.server"; -import { clickhouseFactory } from "~/services/clickhouse/clickhouseFactory.server"; +import { clickhouseFactory } from "~/services/clickhouse/clickhouseFactoryInstance.server"; import { type AverageDurations, ClickHouseEnvironmentMetricsRepository, diff --git a/apps/webapp/app/presenters/v3/UsagePresenter.server.ts b/apps/webapp/app/presenters/v3/UsagePresenter.server.ts index d312088b6d9..f04e53496a2 100644 --- a/apps/webapp/app/presenters/v3/UsagePresenter.server.ts +++ b/apps/webapp/app/presenters/v3/UsagePresenter.server.ts @@ -4,7 +4,7 @@ import { getUsage, getUsageSeries } from "~/services/platform.v3.server"; import { createTimeSeriesData } from "~/utils/graphs"; import { BasePresenter } from "./basePresenter.server"; import { DataPoint, linear } from "regression"; -import { clickhouseFactory } from "~/services/clickhouse/clickhouseFactory.server"; +import { clickhouseFactory } from "~/services/clickhouse/clickhouseFactoryInstance.server"; type Options = { organizationId: string; diff --git a/apps/webapp/app/presenters/v3/ViewSchedulePresenter.server.ts b/apps/webapp/app/presenters/v3/ViewSchedulePresenter.server.ts index 5341568e6f7..dbb1123c488 100644 --- a/apps/webapp/app/presenters/v3/ViewSchedulePresenter.server.ts +++ b/apps/webapp/app/presenters/v3/ViewSchedulePresenter.server.ts @@ -1,7 +1,7 @@ import { ScheduleObject } from "@trigger.dev/core/v3"; import { PrismaClient, prisma } from "~/db.server"; import { displayableEnvironment } from "~/models/runtimeEnvironment.server"; -import { clickhouseFactory } from "~/services/clickhouse/clickhouseFactory.server"; +import { clickhouseFactory } from "~/services/clickhouse/clickhouseFactoryInstance.server"; import { nextScheduledTimestamps } from "~/v3/utils/calculateNextSchedule.server"; import { NextRunListPresenter } from "./NextRunListPresenter.server"; import { scheduleWhereClause } from "~/models/schedules.server"; diff --git a/apps/webapp/app/presenters/v3/WaitpointPresenter.server.ts b/apps/webapp/app/presenters/v3/WaitpointPresenter.server.ts index dc9bf3d1ef0..7877c2cc0c8 100644 --- a/apps/webapp/app/presenters/v3/WaitpointPresenter.server.ts +++ b/apps/webapp/app/presenters/v3/WaitpointPresenter.server.ts @@ -1,5 +1,5 @@ import { isWaitpointOutputTimeout, prettyPrintPacket } from "@trigger.dev/core/v3"; -import { clickhouseFactory } from "~/services/clickhouse/clickhouseFactory.server"; +import { clickhouseFactory } from "~/services/clickhouse/clickhouseFactoryInstance.server"; import { generateHttpCallbackUrl } from "~/services/httpCallback.server"; import { logger } from "~/services/logger.server"; import { BasePresenter } from "./basePresenter.server"; diff --git a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.dashboards.$dashboardKey/route.tsx b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.dashboards.$dashboardKey/route.tsx index bc4c2297e23..3f922351bfb 100644 --- a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.dashboards.$dashboardKey/route.tsx +++ b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.dashboards.$dashboardKey/route.tsx @@ -34,7 +34,7 @@ import { MetricDashboardPresenter, } from "~/presenters/v3/MetricDashboardPresenter.server"; import { PromptPresenter } from "~/presenters/v3/PromptPresenter.server"; -import { clickhouseFactory } from "~/services/clickhouse/clickhouseFactory.server"; +import { clickhouseFactory } from "~/services/clickhouse/clickhouseFactoryInstance.server"; import { requireUser } from "~/services/session.server"; import { cn } from "~/utils/cn"; import { EnvironmentParamSchema } from "~/utils/pathBuilder"; diff --git a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.errors.$fingerprint/route.tsx b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.errors.$fingerprint/route.tsx index c88d4e301e2..f65de7bc8c1 100644 --- a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.errors.$fingerprint/route.tsx +++ b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.errors.$fingerprint/route.tsx @@ -72,7 +72,7 @@ import { type ErrorGroupSummary, } from "~/presenters/v3/ErrorGroupPresenter.server"; import { type NextRunList } from "~/presenters/v3/NextRunListPresenter.server"; -import { clickhouseFactory } from "~/services/clickhouse/clickhouseFactory.server"; +import { clickhouseFactory } from "~/services/clickhouse/clickhouseFactoryInstance.server"; import { requireUser, requireUserId } from "~/services/session.server"; import { cn } from "~/utils/cn"; import { diff --git a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.errors._index/route.tsx b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.errors._index/route.tsx index 0e971f10b91..385f0cd8b19 100644 --- a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.errors._index/route.tsx +++ b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.errors._index/route.tsx @@ -71,7 +71,7 @@ import { type ErrorOccurrences, type ErrorsList as ErrorsListData, } from "~/presenters/v3/ErrorsListPresenter.server"; -import { clickhouseFactory } from "~/services/clickhouse/clickhouseFactory.server"; +import { clickhouseFactory } from "~/services/clickhouse/clickhouseFactoryInstance.server"; import { getCurrentPlan } from "~/services/platform.v3.server"; import { requireUser } from "~/services/session.server"; import { formatNumberCompact } from "~/utils/numberFormatter"; diff --git a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.logs/route.tsx b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.logs/route.tsx index 7c0204d5e64..c913623ebab 100644 --- a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.logs/route.tsx +++ b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.logs/route.tsx @@ -16,7 +16,7 @@ import { findEnvironmentBySlug } from "~/models/runtimeEnvironment.server"; import { LogsListPresenter, LogEntry } from "~/presenters/v3/LogsListPresenter.server"; import type { LogLevel } from "~/utils/logUtils"; import { $replica, prisma } from "~/db.server"; -import { clickhouseFactory } from "~/services/clickhouse/clickhouseFactory.server"; +import { clickhouseFactory } from "~/services/clickhouse/clickhouseFactoryInstance.server"; import { NavBar, PageTitle } from "~/components/primitives/PageHeader"; import { PageBody, PageContainer } from "~/components/layout/AppLayout"; import { Suspense, useCallback, useEffect, useMemo, useRef, useState, useTransition } from "react"; diff --git a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.models.$modelId/route.tsx b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.models.$modelId/route.tsx index 813879cf24f..f4248aa64b6 100644 --- a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.models.$modelId/route.tsx +++ b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.models.$modelId/route.tsx @@ -28,7 +28,7 @@ import type { QueryWidgetConfig } from "~/components/metrics/QueryWidget"; import { findProjectBySlug } from "~/models/project.server"; import { findEnvironmentBySlug } from "~/models/runtimeEnvironment.server"; import { ModelRegistryPresenter } from "~/presenters/v3/ModelRegistryPresenter.server"; -import { clickhouseFactory } from "~/services/clickhouse/clickhouseFactory.server"; +import { clickhouseFactory } from "~/services/clickhouse/clickhouseFactoryInstance.server"; import { requireUserId } from "~/services/session.server"; import { useOrganization } from "~/hooks/useOrganizations"; import { useProject } from "~/hooks/useProject"; diff --git a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.models._index/route.tsx b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.models._index/route.tsx index 4cf63c6e868..9140b37dc95 100644 --- a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.models._index/route.tsx +++ b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.models._index/route.tsx @@ -73,7 +73,7 @@ import { type PopularModel, ModelRegistryPresenter, } from "~/presenters/v3/ModelRegistryPresenter.server"; -import { clickhouseFactory } from "~/services/clickhouse/clickhouseFactory.server"; +import { clickhouseFactory } from "~/services/clickhouse/clickhouseFactoryInstance.server"; import { requireUserId } from "~/services/session.server"; import { useEnvironment } from "~/hooks/useEnvironment"; import { useOrganization } from "~/hooks/useOrganizations"; diff --git a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.models.compare/route.tsx b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.models.compare/route.tsx index 1306eb91943..1f8748f08cf 100644 --- a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.models.compare/route.tsx +++ b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.models.compare/route.tsx @@ -20,7 +20,7 @@ import { type ModelComparisonItem, ModelRegistryPresenter, } from "~/presenters/v3/ModelRegistryPresenter.server"; -import { clickhouseFactory } from "~/services/clickhouse/clickhouseFactory.server"; +import { clickhouseFactory } from "~/services/clickhouse/clickhouseFactoryInstance.server"; import { requireUserId } from "~/services/session.server"; import { useOrganization } from "~/hooks/useOrganizations"; import { useProject } from "~/hooks/useProject"; diff --git a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.prompts.$promptSlug/route.tsx b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.prompts.$promptSlug/route.tsx index de43ea46b23..7201b94a16e 100644 --- a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.prompts.$promptSlug/route.tsx +++ b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.prompts.$promptSlug/route.tsx @@ -70,7 +70,7 @@ import { findProjectBySlug } from "~/models/project.server"; import { findEnvironmentBySlug } from "~/models/runtimeEnvironment.server"; import { type GenerationRow, PromptPresenter } from "~/presenters/v3/PromptPresenter.server"; import { SpanView } from "~/routes/resources.orgs.$organizationSlug.projects.$projectParam.env.$envParam.runs.$runParam.spans.$spanParam/route"; -import { clickhouseFactory } from "~/services/clickhouse/clickhouseFactory.server"; +import { clickhouseFactory } from "~/services/clickhouse/clickhouseFactoryInstance.server"; import { getResizableSnapshot } from "~/services/resizablePanel.server"; import { requireUserId } from "~/services/session.server"; import { PromptService } from "~/v3/services/promptService.server"; diff --git a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.prompts._index/route.tsx b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.prompts._index/route.tsx index b44c5954ffe..5d3f36a06f1 100644 --- a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.prompts._index/route.tsx +++ b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.prompts._index/route.tsx @@ -22,7 +22,7 @@ import { useProject } from "~/hooks/useProject"; import { findProjectBySlug } from "~/models/project.server"; import { findEnvironmentBySlug } from "~/models/runtimeEnvironment.server"; import { PromptPresenter } from "~/presenters/v3/PromptPresenter.server"; -import { clickhouseFactory } from "~/services/clickhouse/clickhouseFactory.server"; +import { clickhouseFactory } from "~/services/clickhouse/clickhouseFactoryInstance.server"; import { requireUserId } from "~/services/session.server"; import { docsPath, EnvironmentParamSchema, v3PromptsPath } from "~/utils/pathBuilder"; import { LinkButton } from "~/components/primitives/Buttons"; diff --git a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.runs.$runParam/route.tsx b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.runs.$runParam/route.tsx index 1ff56df26c0..d55511e7ff5 100644 --- a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.runs.$runParam/route.tsx +++ b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.runs.$runParam/route.tsx @@ -92,7 +92,7 @@ import { findProjectBySlug } from "~/models/project.server"; import { findEnvironmentBySlug } from "~/models/runtimeEnvironment.server"; import { NextRunListPresenter } from "~/presenters/v3/NextRunListPresenter.server"; import { RunEnvironmentMismatchError, RunPresenter } from "~/presenters/v3/RunPresenter.server"; -import { clickhouseFactory } from "~/services/clickhouse/clickhouseFactory.server"; +import { clickhouseFactory } from "~/services/clickhouse/clickhouseFactoryInstance.server"; import { getImpersonationId } from "~/services/impersonation.server"; import { logger } from "~/services/logger.server"; import { getResizableSnapshot } from "~/services/resizablePanel.server"; diff --git a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.runs._index/route.tsx b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.runs._index/route.tsx index 2f9e191b4ec..cc41f738a29 100644 --- a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.runs._index/route.tsx +++ b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.runs._index/route.tsx @@ -45,7 +45,7 @@ import { findProjectBySlug } from "~/models/project.server"; import { findEnvironmentBySlug } from "~/models/runtimeEnvironment.server"; import { getRunFiltersFromRequest } from "~/presenters/RunFilters.server"; import { NextRunListPresenter } from "~/presenters/v3/NextRunListPresenter.server"; -import { clickhouseFactory } from "~/services/clickhouse/clickhouseFactory.server"; +import { clickhouseFactory } from "~/services/clickhouse/clickhouseFactoryInstance.server"; import { setRootOnlyFilterPreference, uiPreferencesStorage, diff --git a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.test.tasks.$taskParam/route.tsx b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.test.tasks.$taskParam/route.tsx index d6e866184a5..a611ffbcf89 100644 --- a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.test.tasks.$taskParam/route.tsx +++ b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.test.tasks.$taskParam/route.tsx @@ -74,7 +74,7 @@ import { Dialog, DialogContent, DialogHeader, DialogTrigger } from "~/components import { DialogClose, DialogDescription } from "@radix-ui/react-dialog"; import { FormButtons } from "~/components/primitives/FormButtons"; import { $replica } from "~/db.server"; -import { clickhouseFactory } from "~/services/clickhouse/clickhouseFactory.server"; +import { clickhouseFactory } from "~/services/clickhouse/clickhouseFactoryInstance.server"; import { RegionsPresenter, type Region } from "~/presenters/v3/RegionsPresenter.server"; import { TestSidebarTabs } from "./TestSidebarTabs"; import { AIPayloadTabContent } from "./AIPayloadTabContent"; diff --git a/apps/webapp/app/routes/admin.api.v1.runs-replication.create.ts b/apps/webapp/app/routes/admin.api.v1.runs-replication.create.ts index 870c5891a13..0026c66bfda 100644 --- a/apps/webapp/app/routes/admin.api.v1.runs-replication.create.ts +++ b/apps/webapp/app/routes/admin.api.v1.runs-replication.create.ts @@ -2,7 +2,7 @@ import { ActionFunctionArgs, json } from "@remix-run/server-runtime"; import { requireAdminApiRequest } from "~/services/personalAccessToken.server"; import { z } from "zod"; import { env } from "~/env.server"; -import { clickhouseFactory } from "~/services/clickhouse/clickhouseFactory.server"; +import { clickhouseFactory } from "~/services/clickhouse/clickhouseFactoryInstance.server"; import { RunsReplicationService } from "~/services/runsReplicationService.server"; import { getRunsReplicationGlobal, diff --git a/apps/webapp/app/routes/admin.api.v1.runs-replication.start.ts b/apps/webapp/app/routes/admin.api.v1.runs-replication.start.ts index 7291a275b19..d67ca3a1ae3 100644 --- a/apps/webapp/app/routes/admin.api.v1.runs-replication.start.ts +++ b/apps/webapp/app/routes/admin.api.v1.runs-replication.start.ts @@ -1,6 +1,6 @@ import { ActionFunctionArgs, json } from "@remix-run/server-runtime"; import { requireAdminApiRequest } from "~/services/personalAccessToken.server"; -import { clickhouseFactory } from "~/services/clickhouse/clickhouseFactory.server"; +import { clickhouseFactory } from "~/services/clickhouse/clickhouseFactoryInstance.server"; import { getRunsReplicationGlobal } from "~/services/runsReplicationGlobal.server"; import { runsReplicationInstance } from "~/services/runsReplicationInstance.server"; diff --git a/apps/webapp/app/routes/api.v1.prompts.$slug.ts b/apps/webapp/app/routes/api.v1.prompts.$slug.ts index f8ee8518ac8..3d8de30c25c 100644 --- a/apps/webapp/app/routes/api.v1.prompts.$slug.ts +++ b/apps/webapp/app/routes/api.v1.prompts.$slug.ts @@ -2,7 +2,7 @@ import { json } from "@remix-run/server-runtime"; import { z } from "zod"; import { prisma } from "~/db.server"; import { PromptPresenter } from "~/presenters/v3/PromptPresenter.server"; -import { clickhouseFactory } from "~/services/clickhouse/clickhouseFactory.server"; +import { clickhouseFactory } from "~/services/clickhouse/clickhouseFactoryInstance.server"; import { createActionApiRoute, createLoaderApiRoute, diff --git a/apps/webapp/app/routes/api.v1.prompts.$slug.versions.ts b/apps/webapp/app/routes/api.v1.prompts.$slug.versions.ts index 6a732c29a67..6ef8a014f9c 100644 --- a/apps/webapp/app/routes/api.v1.prompts.$slug.versions.ts +++ b/apps/webapp/app/routes/api.v1.prompts.$slug.versions.ts @@ -2,7 +2,7 @@ import { json } from "@remix-run/server-runtime"; import { z } from "zod"; import { prisma } from "~/db.server"; import { PromptPresenter } from "~/presenters/v3/PromptPresenter.server"; -import { clickhouseFactory } from "~/services/clickhouse/clickhouseFactory.server"; +import { clickhouseFactory } from "~/services/clickhouse/clickhouseFactoryInstance.server"; import { createLoaderApiRoute } from "~/services/routeBuilders/apiBuilder.server"; const ParamsSchema = z.object({ diff --git a/apps/webapp/app/routes/api.v1.prompts._index.ts b/apps/webapp/app/routes/api.v1.prompts._index.ts index dc1c4c6b30e..8ba10c660de 100644 --- a/apps/webapp/app/routes/api.v1.prompts._index.ts +++ b/apps/webapp/app/routes/api.v1.prompts._index.ts @@ -1,6 +1,6 @@ import { json } from "@remix-run/server-runtime"; import { PromptPresenter } from "~/presenters/v3/PromptPresenter.server"; -import { clickhouseFactory } from "~/services/clickhouse/clickhouseFactory.server"; +import { clickhouseFactory } from "~/services/clickhouse/clickhouseFactoryInstance.server"; import { createLoaderApiRoute } from "~/services/routeBuilders/apiBuilder.server"; export const loader = createLoaderApiRoute( diff --git a/apps/webapp/app/routes/resources.orgs.$organizationSlug.projects.$projectParam.env.$envParam.logs.$logId.tsx b/apps/webapp/app/routes/resources.orgs.$organizationSlug.projects.$projectParam.env.$envParam.logs.$logId.tsx index ae552e96eb9..f4d34907042 100644 --- a/apps/webapp/app/routes/resources.orgs.$organizationSlug.projects.$projectParam.env.$envParam.logs.$logId.tsx +++ b/apps/webapp/app/routes/resources.orgs.$organizationSlug.projects.$projectParam.env.$envParam.logs.$logId.tsx @@ -1,7 +1,7 @@ import { type LoaderFunctionArgs } from "@remix-run/server-runtime"; import { typedjson } from "remix-typedjson"; import { z } from "zod"; -import { clickhouseFactory } from "~/services/clickhouse/clickhouseFactory.server"; +import { clickhouseFactory } from "~/services/clickhouse/clickhouseFactoryInstance.server"; import { requireUserId } from "~/services/session.server"; import { LogDetailPresenter } from "~/presenters/v3/LogDetailPresenter.server"; import { findProjectBySlug } from "~/models/project.server"; diff --git a/apps/webapp/app/routes/resources.orgs.$organizationSlug.projects.$projectParam.env.$envParam.logs.ts b/apps/webapp/app/routes/resources.orgs.$organizationSlug.projects.$projectParam.env.$envParam.logs.ts index 7bb0db0d54b..38b7dd390f8 100644 --- a/apps/webapp/app/routes/resources.orgs.$organizationSlug.projects.$projectParam.env.$envParam.logs.ts +++ b/apps/webapp/app/routes/resources.orgs.$organizationSlug.projects.$projectParam.env.$envParam.logs.ts @@ -6,7 +6,7 @@ import { findProjectBySlug } from "~/models/project.server"; import { findEnvironmentBySlug } from "~/models/runtimeEnvironment.server"; import { LogsListPresenter, type LogLevel, LogsListOptionsSchema } from "~/presenters/v3/LogsListPresenter.server"; import { $replica } from "~/db.server"; -import { clickhouseFactory } from "~/services/clickhouse/clickhouseFactory.server"; +import { clickhouseFactory } from "~/services/clickhouse/clickhouseFactoryInstance.server"; import { getCurrentPlan } from "~/services/platform.v3.server"; // Valid log levels for filtering diff --git a/apps/webapp/app/routes/resources.orgs.$organizationSlug.projects.$projectParam.env.$envParam.prompts.$promptSlug.generations.ts b/apps/webapp/app/routes/resources.orgs.$organizationSlug.projects.$projectParam.env.$envParam.prompts.$promptSlug.generations.ts index 5188d8ccdfe..e468438cdc1 100644 --- a/apps/webapp/app/routes/resources.orgs.$organizationSlug.projects.$projectParam.env.$envParam.prompts.$promptSlug.generations.ts +++ b/apps/webapp/app/routes/resources.orgs.$organizationSlug.projects.$projectParam.env.$envParam.prompts.$promptSlug.generations.ts @@ -6,7 +6,7 @@ import { EnvironmentParamSchema } from "~/utils/pathBuilder"; import { parsePeriodToMs } from "~/utils/periods"; import { findProjectBySlug } from "~/models/project.server"; import { findEnvironmentBySlug } from "~/models/runtimeEnvironment.server"; -import { clickhouseFactory } from "~/services/clickhouse/clickhouseFactory.server"; +import { clickhouseFactory } from "~/services/clickhouse/clickhouseFactoryInstance.server"; import { PromptPresenter, type GenerationRow, diff --git a/apps/webapp/app/services/clickhouse/clickhouseFactory.server.ts b/apps/webapp/app/services/clickhouse/clickhouseFactory.server.ts index 92df7de2b54..deeddd5e167 100644 --- a/apps/webapp/app/services/clickhouse/clickhouseFactory.server.ts +++ b/apps/webapp/app/services/clickhouse/clickhouseFactory.server.ts @@ -3,7 +3,6 @@ import { createHash } from "crypto"; import { ClickhouseEventRepository } from "~/v3/eventRepository/clickhouseEventRepository.server"; import { env } from "~/env.server"; import { singleton } from "~/utils/singleton"; -import { organizationDataStoresRegistry } from "~/services/dataStores/organizationDataStoresRegistryInstance.server"; import type { OrganizationDataStoresRegistry } from "~/services/dataStores/organizationDataStoresRegistry.server"; import { type IEventRepository } from "~/v3/eventRepository/eventRepository.types"; @@ -281,15 +280,6 @@ export class ClickhouseFactory { } } -// --------------------------------------------------------------------------- -// Singleton factory instance -// --------------------------------------------------------------------------- - -export const clickhouseFactory = singleton( - "clickhouseFactory", - () => new ClickhouseFactory(organizationDataStoresRegistry) -); - /** * Get admin ClickHouse client for cross-organization queries. * Only use for admin tools and analytics that need to query across all orgs. diff --git a/apps/webapp/app/services/clickhouse/clickhouseFactoryInstance.server.ts b/apps/webapp/app/services/clickhouse/clickhouseFactoryInstance.server.ts new file mode 100644 index 00000000000..6795df5f644 --- /dev/null +++ b/apps/webapp/app/services/clickhouse/clickhouseFactoryInstance.server.ts @@ -0,0 +1,13 @@ +import { organizationDataStoresRegistry } from "~/services/dataStores/organizationDataStoresRegistryInstance.server"; +import { singleton } from "~/utils/singleton"; +import { ClickhouseFactory } from "./clickhouseFactory.server"; + +/** + * Production singleton wired to the global organization data-stores registry. + * Import this only from app/runtime code β€” not from tests that construct a + * {@link ClickhouseFactory} with a stub registry (see `clickhouseFactory.server.ts`). + */ +export const clickhouseFactory = singleton( + "clickhouseFactory", + () => new ClickhouseFactory(organizationDataStoresRegistry) +); diff --git a/apps/webapp/app/services/queryService.server.ts b/apps/webapp/app/services/queryService.server.ts index 214232ebdf5..4a576c2bf34 100644 --- a/apps/webapp/app/services/queryService.server.ts +++ b/apps/webapp/app/services/queryService.server.ts @@ -11,7 +11,7 @@ import type { TableSchema, WhereClauseCondition } from "@internal/tsql"; import { z } from "zod"; import { prisma } from "~/db.server"; import { env } from "~/env.server"; -import { clickhouseFactory } from "./clickhouse/clickhouseFactory.server"; +import { clickhouseFactory } from "./clickhouse/clickhouseFactoryInstance.server"; import { queryConcurrencyLimiter, DEFAULT_ORG_CONCURRENCY_LIMIT, diff --git a/apps/webapp/app/services/runsReplicationInstance.server.ts b/apps/webapp/app/services/runsReplicationInstance.server.ts index 1a507aafd74..d5071e2d2b8 100644 --- a/apps/webapp/app/services/runsReplicationInstance.server.ts +++ b/apps/webapp/app/services/runsReplicationInstance.server.ts @@ -1,6 +1,6 @@ import invariant from "tiny-invariant"; import { env } from "~/env.server"; -import { clickhouseFactory } from "~/services/clickhouse/clickhouseFactory.server"; +import { clickhouseFactory } from "~/services/clickhouse/clickhouseFactoryInstance.server"; import { singleton } from "~/utils/singleton"; import { meter, provider } from "~/v3/tracer.server"; import { RunsReplicationService } from "./runsReplicationService.server"; diff --git a/apps/webapp/app/v3/eventRepository/index.server.ts b/apps/webapp/app/v3/eventRepository/index.server.ts index c2e772cc127..9c30f011a73 100644 --- a/apps/webapp/app/v3/eventRepository/index.server.ts +++ b/apps/webapp/app/v3/eventRepository/index.server.ts @@ -6,7 +6,7 @@ import { logger } from "~/services/logger.server"; import { FEATURE_FLAG } from "../featureFlags"; import { flag } from "../featureFlags.server"; import { getTaskEventStore } from "../taskEventStore.server"; -import { clickhouseFactory } from "~/services/clickhouse/clickhouseFactory.server"; +import { clickhouseFactory } from "~/services/clickhouse/clickhouseFactoryInstance.server"; export const EVENT_STORE_TYPES = { POSTGRES: "postgres", diff --git a/apps/webapp/app/v3/otlpExporter.server.ts b/apps/webapp/app/v3/otlpExporter.server.ts index 37c5c2840bc..788e7339834 100644 --- a/apps/webapp/app/v3/otlpExporter.server.ts +++ b/apps/webapp/app/v3/otlpExporter.server.ts @@ -20,10 +20,8 @@ import { } from "@trigger.dev/otlp-importer"; import type { MetricsV1Input } from "@internal/clickhouse"; import { logger } from "~/services/logger.server"; -import { - clickhouseFactory, - type ClickhouseFactory, -} from "~/services/clickhouse/clickhouseFactory.server"; +import type { ClickhouseFactory } from "~/services/clickhouse/clickhouseFactory.server"; +import { clickhouseFactory } from "~/services/clickhouse/clickhouseFactoryInstance.server"; import { generateSpanId } from "./eventRepository/common.server"; import type { diff --git a/apps/webapp/app/v3/services/alerts/errorAlertEvaluator.server.ts b/apps/webapp/app/v3/services/alerts/errorAlertEvaluator.server.ts index 40fe2d5f10d..cd68258c015 100644 --- a/apps/webapp/app/v3/services/alerts/errorAlertEvaluator.server.ts +++ b/apps/webapp/app/v3/services/alerts/errorAlertEvaluator.server.ts @@ -7,7 +7,7 @@ import { } from "@trigger.dev/database"; import { $replica, prisma } from "~/db.server"; import { ErrorAlertConfig } from "~/models/projectAlert.server"; -import { clickhouseFactory } from "~/services/clickhouse/clickhouseFactory.server"; +import { clickhouseFactory } from "~/services/clickhouse/clickhouseFactoryInstance.server"; import { logger } from "~/services/logger.server"; import { alertsWorker } from "~/v3/alertsWorker.server"; diff --git a/apps/webapp/app/v3/services/bulk/BulkActionV2.server.ts b/apps/webapp/app/v3/services/bulk/BulkActionV2.server.ts index cf1f80165dc..21f5d39db91 100644 --- a/apps/webapp/app/v3/services/bulk/BulkActionV2.server.ts +++ b/apps/webapp/app/v3/services/bulk/BulkActionV2.server.ts @@ -7,7 +7,7 @@ import { } from "@trigger.dev/database"; import { getRunFiltersFromRequest } from "~/presenters/RunFilters.server"; import { type CreateBulkActionPayload } from "~/routes/resources.orgs.$organizationSlug.projects.$projectParam.env.$envParam.runs.bulkaction"; -import { clickhouseFactory } from "~/services/clickhouse/clickhouseFactory.server"; +import { clickhouseFactory } from "~/services/clickhouse/clickhouseFactoryInstance.server"; import { parseRunListInputOptions, type RunListInputFilters, From 635ab84b6979540109cd65a75951ff742d7b26f4 Mon Sep 17 00:00:00 2001 From: Matt Aitken Date: Fri, 17 Apr 2026 14:35:12 +0100 Subject: [PATCH 31/34] Use the same ClickHouse settings for org specific clients --- .../clickhouse/clickhouseFactory.server.ts | 97 ++++++++++++++----- 1 file changed, 73 insertions(+), 24 deletions(-) diff --git a/apps/webapp/app/services/clickhouse/clickhouseFactory.server.ts b/apps/webapp/app/services/clickhouse/clickhouseFactory.server.ts index deeddd5e167..0159da4fc12 100644 --- a/apps/webapp/app/services/clickhouse/clickhouseFactory.server.ts +++ b/apps/webapp/app/services/clickhouse/clickhouseFactory.server.ts @@ -36,6 +36,21 @@ const defaultLogsClickhouseClient = singleton( initializeLogsClickhouseClient ); +function getLogsListClickhouseSettings() { + return { + max_memory_usage: env.CLICKHOUSE_LOGS_LIST_MAX_MEMORY_USAGE.toString(), + max_bytes_before_external_sort: + env.CLICKHOUSE_LOGS_LIST_MAX_BYTES_BEFORE_EXTERNAL_SORT.toString(), + max_threads: env.CLICKHOUSE_LOGS_LIST_MAX_THREADS, + ...(env.CLICKHOUSE_LOGS_LIST_MAX_ROWS_TO_READ && { + max_rows_to_read: env.CLICKHOUSE_LOGS_LIST_MAX_ROWS_TO_READ.toString(), + }), + ...(env.CLICKHOUSE_LOGS_LIST_MAX_EXECUTION_TIME && { + max_execution_time: env.CLICKHOUSE_LOGS_LIST_MAX_EXECUTION_TIME, + }), + }; +} + function initializeLogsClickhouseClient() { if (!env.LOGS_CLICKHOUSE_URL) { throw new Error("LOGS_CLICKHOUSE_URL is not set"); @@ -54,18 +69,7 @@ function initializeLogsClickhouseClient() { logLevel: env.CLICKHOUSE_LOG_LEVEL, compression: { request: true }, maxOpenConnections: env.CLICKHOUSE_MAX_OPEN_CONNECTIONS, - clickhouseSettings: { - max_memory_usage: env.CLICKHOUSE_LOGS_LIST_MAX_MEMORY_USAGE.toString(), - max_bytes_before_external_sort: - env.CLICKHOUSE_LOGS_LIST_MAX_BYTES_BEFORE_EXTERNAL_SORT.toString(), - max_threads: env.CLICKHOUSE_LOGS_LIST_MAX_THREADS, - ...(env.CLICKHOUSE_LOGS_LIST_MAX_ROWS_TO_READ && { - max_rows_to_read: env.CLICKHOUSE_LOGS_LIST_MAX_ROWS_TO_READ.toString(), - }), - ...(env.CLICKHOUSE_LOGS_LIST_MAX_EXECUTION_TIME && { - max_execution_time: env.CLICKHOUSE_LOGS_LIST_MAX_EXECUTION_TIME, - }), - }, + clickhouseSettings: getLogsListClickhouseSettings(), }); } @@ -163,18 +167,63 @@ export type ClientType = "standard" | "events" | "replication" | "logs" | "query function buildOrgClickhouseClient(url: string, clientType: ClientType): ClickHouse { const parsed = new URL(url); parsed.searchParams.delete("secure"); - - return new ClickHouse({ - url: parsed.toString(), - name: `org-clickhouse-${clientType}`, - keepAlive: { - enabled: env.CLICKHOUSE_KEEP_ALIVE_ENABLED === "1", - idleSocketTtl: env.CLICKHOUSE_KEEP_ALIVE_IDLE_SOCKET_TTL_MS, - }, - logLevel: env.CLICKHOUSE_LOG_LEVEL, - compression: { request: true }, - maxOpenConnections: env.CLICKHOUSE_MAX_OPEN_CONNECTIONS, - }); + const name = `org-clickhouse-${clientType}`; + + switch (clientType) { + case "events": + return new ClickHouse({ + url: parsed.toString(), + name, + keepAlive: { + enabled: env.EVENTS_CLICKHOUSE_KEEP_ALIVE_ENABLED === "1", + idleSocketTtl: env.EVENTS_CLICKHOUSE_KEEP_ALIVE_IDLE_SOCKET_TTL_MS, + }, + logLevel: env.EVENTS_CLICKHOUSE_LOG_LEVEL, + compression: { + request: env.EVENTS_CLICKHOUSE_COMPRESSION_REQUEST === "1", + }, + maxOpenConnections: env.EVENTS_CLICKHOUSE_MAX_OPEN_CONNECTIONS, + }); + case "replication": + return new ClickHouse({ + url: parsed.toString(), + name, + keepAlive: { + enabled: env.RUN_REPLICATION_KEEP_ALIVE_ENABLED === "1", + idleSocketTtl: env.RUN_REPLICATION_KEEP_ALIVE_IDLE_SOCKET_TTL_MS, + }, + logLevel: env.RUN_REPLICATION_CLICKHOUSE_LOG_LEVEL, + compression: { request: true }, + maxOpenConnections: env.RUN_REPLICATION_MAX_OPEN_CONNECTIONS, + }); + case "logs": + return new ClickHouse({ + url: parsed.toString(), + name, + keepAlive: { + enabled: env.CLICKHOUSE_KEEP_ALIVE_ENABLED === "1", + idleSocketTtl: env.CLICKHOUSE_KEEP_ALIVE_IDLE_SOCKET_TTL_MS, + }, + logLevel: env.CLICKHOUSE_LOG_LEVEL, + compression: { request: true }, + maxOpenConnections: env.CLICKHOUSE_MAX_OPEN_CONNECTIONS, + clickhouseSettings: getLogsListClickhouseSettings(), + }); + case "standard": + case "query": + case "admin": + return new ClickHouse({ + url: parsed.toString(), + name, + keepAlive: { + enabled: env.CLICKHOUSE_KEEP_ALIVE_ENABLED === "1", + idleSocketTtl: env.CLICKHOUSE_KEEP_ALIVE_IDLE_SOCKET_TTL_MS, + }, + logLevel: env.CLICKHOUSE_LOG_LEVEL, + compression: { request: true }, + maxOpenConnections: env.CLICKHOUSE_MAX_OPEN_CONNECTIONS, + }); + } } // --------------------------------------------------------------------------- From e8282a00b281faf9535704d12947d902b8a6309f Mon Sep 17 00:00:00 2001 From: Matt Aitken Date: Fri, 17 Apr 2026 14:57:27 +0100 Subject: [PATCH 32/34] Update apps/webapp/app/services/dataStores/organizationDataStoresRegistry.server.ts Co-authored-by: devin-ai-integration[bot] <158243242+devin-ai-integration[bot]@users.noreply.github.com> --- .../dataStores/organizationDataStoresRegistry.server.ts | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/apps/webapp/app/services/dataStores/organizationDataStoresRegistry.server.ts b/apps/webapp/app/services/dataStores/organizationDataStoresRegistry.server.ts index 838dd5ae5b8..dff741cab0f 100644 --- a/apps/webapp/app/services/dataStores/organizationDataStoresRegistry.server.ts +++ b/apps/webapp/app/services/dataStores/organizationDataStoresRegistry.server.ts @@ -132,10 +132,11 @@ export class OrganizationDataStoresRegistry { data: { key, organizationIds, - kind: "CLICKHOUSE", + kind, config: { version: 1, data: { secretKey } }, }, }); + } async updateDataStore({ From d9506f7a801e96a479a106fffd15e575ee29c2f1 Mon Sep 17 00:00:00 2001 From: Matt Aitken Date: Fri, 22 May 2026 12:27:44 +0100 Subject: [PATCH 33/34] Route sessions replication via ClickhouseFactory, gate replication services on factory readiness, and tighten review-comment fixes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - SessionsReplicationService now takes ClickhouseFactory; #flushBatch buckets per org and routes each bucket to the org's ClickHouse (mirrors runs replication). - sessionsReplicationInstance gates service.start() on clickhouseFactory.isReady() and owns SIGTERM/SIGINT shutdown β€” collapsed entry.server.tsx's 35-line block to a single bootstrapping reference. Prevents misrouting org-scoped writes to the default cluster while the registry is still loading. - Factory: added sessions_replication ClientType + default singleton (parallels runs replication), converted the events client to a singleton (was building two pools to the same URL for v1/v2 repos), added missing startTimeMaxAgeMs to the clickhouse_v2 case. - Converted non-hot-path callers (runEngineHandlers, services, route handlers) from sync resolveEventRepositoryForStore to async getEventRepositoryForStore so they await registry readiness too. Sync now reserved for OTEL exporter and the replication services, which gate startup on isReady. - resolveEventRepositoryForStore is now non-exported with a JSDoc note explaining when sync-bypass-with-isReady-gating is justified. - Minor review fixes: tryCatch from @trigger.dev/core/utils subpath, safeParse + 400 response in admin.data-stores add/update branches, otlpExporter await moved past the content-type check so the unsupported branch short-circuits. - TestReplicationClickhouseFactory routes both replication and sessions_replication to the test client; sessions replication tests updated. Co-Authored-By: Claude Opus 4.7 (1M context) --- apps/webapp/app/entry.server.tsx | 42 +------- apps/webapp/app/routes/admin.data-stores.tsx | 26 +++-- .../app/routes/api.v1.runs.$runId.events.ts | 4 +- .../api.v1.runs.$runId.spans.$spanId.ts | 4 +- .../app/routes/api.v1.runs.$runId.trace.ts | 4 +- apps/webapp/app/routes/otel.v1.metrics.ts | 3 +- .../resources.runs.$runParam.logs.download.ts | 7 +- .../clickhouse/clickhouseFactory.server.ts | 102 ++++++++++++++---- .../sessionsReplicationInstance.server.ts | 48 ++++++--- .../sessionsReplicationService.server.ts | 100 ++++++++++++----- .../app/v3/eventRepository/index.server.ts | 9 +- .../webapp/app/v3/runEngineHandlers.server.ts | 16 +-- .../app/v3/services/cancelTaskRunV1.server.ts | 4 +- .../app/v3/services/completeAttempt.server.ts | 8 +- .../app/v3/services/crashTaskRun.server.ts | 4 +- .../v3/services/expireEnqueuedRun.server.ts | 4 +- .../test/sessionsReplicationService.test.ts | 5 +- .../utils/testReplicationClickhouseFactory.ts | 5 +- 18 files changed, 258 insertions(+), 137 deletions(-) diff --git a/apps/webapp/app/entry.server.tsx b/apps/webapp/app/entry.server.tsx index 11c3274e865..0a9232ee82f 100644 --- a/apps/webapp/app/entry.server.tsx +++ b/apps/webapp/app/entry.server.tsx @@ -24,44 +24,12 @@ import { registerRunEngineEventBusHandlers, setupBatchQueueCallbacks, } from "./v3/runEngineHandlers.server"; +// Touch the sessions replication singleton at entry so it boots deterministically +// on webapp startup. The singleton's initializer wires start (gated on +// `clickhouseFactory.isReady()`) and SIGTERM/SIGINT shutdown β€” mirrors +// runsReplicationInstance. import { sessionsReplicationInstance } from "./services/sessionsReplicationInstance.server"; -import { signalsEmitter } from "./services/signals.server"; - -// Start the sessions replication service (subscribes to the logical replication -// slot, runs leader election, flushes to ClickHouse). Done at entry level so it -// runs deterministically on webapp boot rather than lazily via a singleton -// reference elsewhere in the module graph. -if (sessionsReplicationInstance && env.SESSION_REPLICATION_ENABLED === "1") { - // Capture a non-nullable reference so the shutdown closure below - // doesn't need to re-null-check (TS narrowing doesn't follow through - // an inner function scope). - const replicator = sessionsReplicationInstance; - replicator - .start() - .then(() => { - console.log("πŸ—ƒοΈ Sessions replication service started"); - }) - .catch((error) => { - console.error("πŸ—ƒοΈ Sessions replication service failed to start", { - error, - }); - }); - - // Wrap the async shutdown in a sync handler that catches rejections β€” - // SIGTERM/SIGINT fire during process teardown, and an unhandled - // promise rejection from `_replicationClient.stop()` there would - // bubble up past the process exit. Matches the pattern in - // dynamicFlushScheduler.server.ts. - const shutdownSessionsReplication = () => { - replicator.shutdown().catch((error) => { - console.error("πŸ—ƒοΈ Sessions replication service shutdown error", { - error, - }); - }); - }; - signalsEmitter.on("SIGTERM", shutdownSessionsReplication); - signalsEmitter.on("SIGINT", shutdownSessionsReplication); -} +void sessionsReplicationInstance; const ABORT_DELAY = 30000; diff --git a/apps/webapp/app/routes/admin.data-stores.tsx b/apps/webapp/app/routes/admin.data-stores.tsx index e6da4998fe0..8dafeb9b2fe 100644 --- a/apps/webapp/app/routes/admin.data-stores.tsx +++ b/apps/webapp/app/routes/admin.data-stores.tsx @@ -28,7 +28,7 @@ import { prisma } from "~/db.server"; import { requireUser } from "~/services/session.server"; import { ClickhouseConnectionSchema } from "~/services/clickhouse/clickhouseSecretSchemas.server"; import { organizationDataStoresRegistry } from "~/services/dataStores/organizationDataStoresRegistryInstance.server"; -import { tryCatch } from "@trigger.dev/core"; +import { tryCatch } from "@trigger.dev/core/utils"; // --------------------------------------------------------------------------- // Loader @@ -93,14 +93,20 @@ export async function action({ request }: ActionFunctionArgs) { .map((s) => s.trim()) .filter(Boolean); - const config = ClickhouseConnectionSchema.parse({ url: connectionUrl }); + const parsedConfig = ClickhouseConnectionSchema.safeParse({ url: connectionUrl }); + if (!parsedConfig.success) { + return typedjson( + { error: parsedConfig.error.issues.map((i) => i.message).join(", ") }, + { status: 400 } + ); + } const [error, _] = await tryCatch( organizationDataStoresRegistry.addDataStore({ key, kind: "CLICKHOUSE", organizationIds, - config, + config: parsedConfig.data, }) ); @@ -117,9 +123,17 @@ export async function action({ request }: ActionFunctionArgs) { .map((s) => s.trim()) .filter(Boolean); - const config = connectionUrl - ? ClickhouseConnectionSchema.parse({ url: connectionUrl }) - : undefined; + let config: ReturnType | undefined; + if (connectionUrl) { + const parsedConfig = ClickhouseConnectionSchema.safeParse({ url: connectionUrl }); + if (!parsedConfig.success) { + return typedjson( + { error: parsedConfig.error.issues.map((i) => i.message).join(", ") }, + { status: 400 } + ); + } + config = parsedConfig.data; + } const [error, _] = await tryCatch( organizationDataStoresRegistry.updateDataStore({ diff --git a/apps/webapp/app/routes/api.v1.runs.$runId.events.ts b/apps/webapp/app/routes/api.v1.runs.$runId.events.ts index 5a4220ca540..bfa3cab971b 100644 --- a/apps/webapp/app/routes/api.v1.runs.$runId.events.ts +++ b/apps/webapp/app/routes/api.v1.runs.$runId.events.ts @@ -6,7 +6,7 @@ import { createLoaderApiRoute, } from "~/services/routeBuilders/apiBuilder.server"; import { ApiRetrieveRunPresenter } from "~/presenters/v3/ApiRetrieveRunPresenter.server"; -import { resolveEventRepositoryForStore } from "~/v3/eventRepository/index.server"; +import { getEventRepositoryForStore } from "~/v3/eventRepository/index.server"; const ParamsSchema = z.object({ runId: z.string(), // This is the run friendly ID @@ -38,7 +38,7 @@ export const loader = createLoaderApiRoute( }, }, async ({ resource: run, authentication }) => { - const eventRepository = resolveEventRepositoryForStore( + const eventRepository = await getEventRepositoryForStore( run.taskEventStore, authentication.environment.organization.id ); diff --git a/apps/webapp/app/routes/api.v1.runs.$runId.spans.$spanId.ts b/apps/webapp/app/routes/api.v1.runs.$runId.spans.$spanId.ts index f0d90703445..be0d12087b6 100644 --- a/apps/webapp/app/routes/api.v1.runs.$runId.spans.$spanId.ts +++ b/apps/webapp/app/routes/api.v1.runs.$runId.spans.$spanId.ts @@ -7,7 +7,7 @@ import { anyResource, createLoaderApiRoute, } from "~/services/routeBuilders/apiBuilder.server"; -import { resolveEventRepositoryForStore } from "~/v3/eventRepository/index.server"; +import { getEventRepositoryForStore } from "~/v3/eventRepository/index.server"; import { getTaskEventStoreTableForRun } from "~/v3/taskEventStore.server"; const ParamsSchema = z.object({ @@ -45,7 +45,7 @@ export const loader = createLoaderApiRoute( }, }, async ({ params, resource: run, authentication }) => { - const eventRepository = resolveEventRepositoryForStore( + const eventRepository = await getEventRepositoryForStore( run.taskEventStore, authentication.environment.organization.id ); diff --git a/apps/webapp/app/routes/api.v1.runs.$runId.trace.ts b/apps/webapp/app/routes/api.v1.runs.$runId.trace.ts index 751b5618617..77e6a4df043 100644 --- a/apps/webapp/app/routes/api.v1.runs.$runId.trace.ts +++ b/apps/webapp/app/routes/api.v1.runs.$runId.trace.ts @@ -6,7 +6,7 @@ import { anyResource, createLoaderApiRoute, } from "~/services/routeBuilders/apiBuilder.server"; -import { resolveEventRepositoryForStore } from "~/v3/eventRepository/index.server"; +import { getEventRepositoryForStore } from "~/v3/eventRepository/index.server"; import { getTaskEventStoreTableForRun } from "~/v3/taskEventStore.server"; const ParamsSchema = z.object({ @@ -43,7 +43,7 @@ export const loader = createLoaderApiRoute( }, }, async ({ resource: run, authentication }) => { - const eventRepository = resolveEventRepositoryForStore( + const eventRepository = await getEventRepositoryForStore( run.taskEventStore, authentication.environment.organization.id ); diff --git a/apps/webapp/app/routes/otel.v1.metrics.ts b/apps/webapp/app/routes/otel.v1.metrics.ts index 9a09cb18233..803dc3da260 100644 --- a/apps/webapp/app/routes/otel.v1.metrics.ts +++ b/apps/webapp/app/routes/otel.v1.metrics.ts @@ -7,10 +7,10 @@ import { otlpExporter } from "~/v3/otlpExporter.server"; export async function action({ request }: ActionFunctionArgs) { try { - const exporter = await otlpExporter; const contentType = request.headers.get("content-type")?.toLowerCase() ?? ""; if (contentType.startsWith("application/json")) { + const exporter = await otlpExporter; const body = await request.json(); const exportResponse = await exporter.exportMetrics( @@ -19,6 +19,7 @@ export async function action({ request }: ActionFunctionArgs) { return json(exportResponse, { status: 200 }); } else if (contentType.startsWith("application/x-protobuf")) { + const exporter = await otlpExporter; const buffer = await request.arrayBuffer(); const exportRequest = ExportMetricsServiceRequest.decode(new Uint8Array(buffer)); diff --git a/apps/webapp/app/routes/resources.runs.$runParam.logs.download.ts b/apps/webapp/app/routes/resources.runs.$runParam.logs.download.ts index 5f19f129729..5c7725c510b 100644 --- a/apps/webapp/app/routes/resources.runs.$runParam.logs.download.ts +++ b/apps/webapp/app/routes/resources.runs.$runParam.logs.download.ts @@ -8,7 +8,7 @@ import { Readable } from "stream"; import { formatDurationMilliseconds } from "@trigger.dev/core/v3/utils/durations"; import { getTaskEventStoreTableForRun } from "~/v3/taskEventStore.server"; import { TaskEventKind } from "@trigger.dev/database"; -import { resolveEventRepositoryForStore } from "~/v3/eventRepository/index.server"; +import { getEventRepositoryForStore } from "~/v3/eventRepository/index.server"; export async function loader({ params, request }: LoaderFunctionArgs) { const user = await requireUser(request); @@ -33,7 +33,10 @@ export async function loader({ params, request }: LoaderFunctionArgs) { return new Response("Not found", { status: 404 }); } - const eventRepository = resolveEventRepositoryForStore(run.taskEventStore, run.organizationId); + const eventRepository = await getEventRepositoryForStore( + run.taskEventStore, + run.organizationId + ); const runEvents = await eventRepository.getRunEvents( getTaskEventStoreTableForRun(run), diff --git a/apps/webapp/app/services/clickhouse/clickhouseFactory.server.ts b/apps/webapp/app/services/clickhouse/clickhouseFactory.server.ts index 0159da4fc12..a62e55ae844 100644 --- a/apps/webapp/app/services/clickhouse/clickhouseFactory.server.ts +++ b/apps/webapp/app/services/clickhouse/clickhouseFactory.server.ts @@ -153,6 +153,63 @@ function initializeRunsReplicationClickhouseClient(): ClickHouse { }); } +/** Session replication to ClickHouse (`SESSION_REPLICATION_CLICKHOUSE_URL`); not exported. */ +const defaultSessionsReplicationClickhouseClient = singleton( + "sessionsReplicationClickhouseClient", + initializeSessionsReplicationClickhouseClient +); + +function initializeSessionsReplicationClickhouseClient(): ClickHouse { + if (!env.SESSION_REPLICATION_CLICKHOUSE_URL) { + // Sessions replication worker gates on this URL; factory may still resolve "sessions_replication" for tests. + return defaultClickhouseClient; + } + + const url = new URL(env.SESSION_REPLICATION_CLICKHOUSE_URL); + url.searchParams.delete("secure"); + + return new ClickHouse({ + url: url.toString(), + name: "sessions-replication", + keepAlive: { + enabled: env.SESSION_REPLICATION_KEEP_ALIVE_ENABLED === "1", + idleSocketTtl: env.SESSION_REPLICATION_KEEP_ALIVE_IDLE_SOCKET_TTL_MS, + }, + logLevel: env.SESSION_REPLICATION_CLICKHOUSE_LOG_LEVEL, + compression: { request: true }, + maxOpenConnections: env.SESSION_REPLICATION_MAX_OPEN_CONNECTIONS, + }); +} + +/** Task events (`EVENTS_CLICKHOUSE_URL`); not exported β€” accessed via factory. */ +const defaultEventsClickhouseClient = singleton( + "eventsClickhouseClient", + initializeEventsClickhouseClient +); + +function initializeEventsClickhouseClient(): ClickHouse { + if (!env.EVENTS_CLICKHOUSE_URL) { + throw new Error("EVENTS_CLICKHOUSE_URL is not set"); + } + + const url = new URL(env.EVENTS_CLICKHOUSE_URL); + url.searchParams.delete("secure"); + + return new ClickHouse({ + url: url.toString(), + name: "task-events", + keepAlive: { + enabled: env.EVENTS_CLICKHOUSE_KEEP_ALIVE_ENABLED === "1", + idleSocketTtl: env.EVENTS_CLICKHOUSE_KEEP_ALIVE_IDLE_SOCKET_TTL_MS, + }, + logLevel: env.EVENTS_CLICKHOUSE_LOG_LEVEL, + compression: { + request: env.EVENTS_CLICKHOUSE_COMPRESSION_REQUEST === "1", + }, + maxOpenConnections: env.EVENTS_CLICKHOUSE_MAX_OPEN_CONNECTIONS, + }); +} + // --------------------------------------------------------------------------- // Helpers // --------------------------------------------------------------------------- @@ -162,7 +219,14 @@ function hashHostname(url: string): string { return createHash("sha256").update(parsed.hostname).digest("hex"); } -export type ClientType = "standard" | "events" | "replication" | "logs" | "query" | "admin"; +export type ClientType = + | "standard" + | "events" + | "replication" + | "sessions_replication" + | "logs" + | "query" + | "admin"; function buildOrgClickhouseClient(url: string, clientType: ClientType): ClickHouse { const parsed = new URL(url); @@ -196,6 +260,18 @@ function buildOrgClickhouseClient(url: string, clientType: ClientType): ClickHou compression: { request: true }, maxOpenConnections: env.RUN_REPLICATION_MAX_OPEN_CONNECTIONS, }); + case "sessions_replication": + return new ClickHouse({ + url: parsed.toString(), + name, + keepAlive: { + enabled: env.SESSION_REPLICATION_KEEP_ALIVE_ENABLED === "1", + idleSocketTtl: env.SESSION_REPLICATION_KEEP_ALIVE_IDLE_SOCKET_TTL_MS, + }, + logLevel: env.SESSION_REPLICATION_CLICKHOUSE_LOG_LEVEL, + compression: { request: true }, + maxOpenConnections: env.SESSION_REPLICATION_MAX_OPEN_CONNECTIONS, + }); case "logs": return new ClickHouse({ url: parsed.toString(), @@ -266,6 +342,8 @@ export class ClickhouseFactory { return defaultClickhouseClient; case "replication": return defaultRunsReplicationClickhouseClient; + case "sessions_replication": + return defaultSessionsReplicationClickhouseClient; case "logs": return defaultLogsClickhouseClient; case "query": @@ -350,26 +428,7 @@ export function getDefaultLogsClickhouseClient(): ClickHouse { // --------------------------------------------------------------------------- function getEventsClickhouseClient(): ClickHouse { - if (!env.EVENTS_CLICKHOUSE_URL) { - throw new Error("EVENTS_CLICKHOUSE_URL is not set"); - } - - const url = new URL(env.EVENTS_CLICKHOUSE_URL); - url.searchParams.delete("secure"); - - return new ClickHouse({ - url: url.toString(), - name: "task-events", - keepAlive: { - enabled: env.EVENTS_CLICKHOUSE_KEEP_ALIVE_ENABLED === "1", - idleSocketTtl: env.EVENTS_CLICKHOUSE_KEEP_ALIVE_IDLE_SOCKET_TTL_MS, - }, - logLevel: env.EVENTS_CLICKHOUSE_LOG_LEVEL, - compression: { - request: env.EVENTS_CLICKHOUSE_COMPRESSION_REQUEST === "1", - }, - maxOpenConnections: env.EVENTS_CLICKHOUSE_MAX_OPEN_CONNECTIONS, - }); + return defaultEventsClickhouseClient; } function buildEventRepository(store: string, clickhouse: ClickHouse): ClickhouseEventRepository { @@ -411,6 +470,7 @@ function buildEventRepository(store: string, clickhouse: ClickHouse): Clickhouse waitForAsyncInsert: env.EVENTS_CLICKHOUSE_WAIT_FOR_ASYNC_INSERT === "1", asyncInsertMaxDataSize: env.EVENTS_CLICKHOUSE_ASYNC_INSERT_MAX_DATA_SIZE, asyncInsertBusyTimeoutMs: env.EVENTS_CLICKHOUSE_ASYNC_INSERT_BUSY_TIMEOUT_MS, + startTimeMaxAgeMs: env.EVENTS_CLICKHOUSE_START_TIME_MAX_AGE_MS, llmMetricsBatchSize: env.LLM_METRICS_BATCH_SIZE, llmMetricsFlushInterval: env.LLM_METRICS_FLUSH_INTERVAL_MS, llmMetricsMaxBatchSize: env.LLM_METRICS_MAX_BATCH_SIZE, diff --git a/apps/webapp/app/services/sessionsReplicationInstance.server.ts b/apps/webapp/app/services/sessionsReplicationInstance.server.ts index c6ed1b6b088..8cbc8303a6f 100644 --- a/apps/webapp/app/services/sessionsReplicationInstance.server.ts +++ b/apps/webapp/app/services/sessionsReplicationInstance.server.ts @@ -1,9 +1,10 @@ -import { ClickHouse } from "@internal/clickhouse"; import invariant from "tiny-invariant"; import { env } from "~/env.server"; +import { clickhouseFactory } from "~/services/clickhouse/clickhouseFactoryInstance.server"; import { singleton } from "~/utils/singleton"; import { meter, provider } from "~/v3/tracer.server"; import { SessionsReplicationService } from "./sessionsReplicationService.server"; +import { signalsEmitter } from "./signals.server"; export const sessionsReplicationInstance = singleton( "sessionsReplicationInstance", @@ -21,22 +22,8 @@ function initializeSessionsReplicationInstance() { console.log("πŸ—ƒοΈ Sessions replication service enabled"); - const clickhouse = new ClickHouse({ - url: env.SESSION_REPLICATION_CLICKHOUSE_URL, - name: "sessions-replication", - keepAlive: { - enabled: env.SESSION_REPLICATION_KEEP_ALIVE_ENABLED === "1", - idleSocketTtl: env.SESSION_REPLICATION_KEEP_ALIVE_IDLE_SOCKET_TTL_MS, - }, - logLevel: env.SESSION_REPLICATION_CLICKHOUSE_LOG_LEVEL, - compression: { - request: true, - }, - maxOpenConnections: env.SESSION_REPLICATION_MAX_OPEN_CONNECTIONS, - }); - const service = new SessionsReplicationService({ - clickhouse: clickhouse, + clickhouseFactory, pgConnectionUrl: DATABASE_URL, serviceName: "sessions-replication", slotName: env.SESSION_REPLICATION_SLOT_NAME, @@ -68,5 +55,34 @@ function initializeSessionsReplicationInstance() { insertStrategy: env.SESSION_REPLICATION_INSERT_STRATEGY, }); + if (env.SESSION_REPLICATION_ENABLED === "1") { + // Gate start() on the org data-stores registry being loaded. Starting earlier would + // race the registry load β€” sync factory lookups would return `null` and route org-scoped + // sessions to the default ClickHouse, writing them to the wrong cluster. + clickhouseFactory + .isReady() + .then(() => service.start()) + .then(() => { + console.log("πŸ—ƒοΈ Sessions replication service started"); + }) + .catch((error) => { + console.error("πŸ—ƒοΈ Sessions replication service failed to start", { + error, + }); + }); + + // SIGTERM/SIGINT fire during process teardown; wrap the async shutdown so an + // unhandled rejection doesn't bubble past process exit. + const shutdownSessionsReplication = () => { + service.shutdown().catch((error) => { + console.error("πŸ—ƒοΈ Sessions replication service shutdown error", { + error, + }); + }); + }; + signalsEmitter.on("SIGTERM", shutdownSessionsReplication); + signalsEmitter.on("SIGINT", shutdownSessionsReplication); + } + return service; } diff --git a/apps/webapp/app/services/sessionsReplicationService.server.ts b/apps/webapp/app/services/sessionsReplicationService.server.ts index f7f384faffc..12b66e29dfb 100644 --- a/apps/webapp/app/services/sessionsReplicationService.server.ts +++ b/apps/webapp/app/services/sessionsReplicationService.server.ts @@ -22,6 +22,7 @@ import { Logger, type LogLevel } from "@trigger.dev/core/logger"; import { tryCatch } from "@trigger.dev/core/utils"; import { type Session } from "@trigger.dev/database"; import EventEmitter from "node:events"; +import type { ClickhouseFactory } from "~/services/clickhouse/clickhouseFactory.server"; import { ConcurrentFlushScheduler } from "./runsReplicationService.server"; interface TransactionEvent { @@ -40,7 +41,7 @@ interface Transaction { } export type SessionsReplicationServiceOptions = { - clickhouse: ClickHouse; + clickhouseFactory: ClickhouseFactory; pgConnectionUrl: string; serviceName: string; slotName: string; @@ -537,11 +538,38 @@ export class SessionsReplicationService { const flushStartTime = performance.now(); await startSpan(this._tracer, "flushBatch", async (span) => { - const sessionInserts = batch - .map((item) => toSessionInsertArray(item.session, item._version, item.event === "delete")) - // batch inserts in clickhouse are more performant if the items - // are pre-sorted by the primary key - .sort((a, b) => { + const routeCache = new Map(); + const groups = new Map(); + + for (const item of batch) { + if (!item.session.organizationId) { + continue; + } + + let client = routeCache.get(item.session.organizationId); + if (!client) { + client = this.options.clickhouseFactory.getClickhouseForOrganizationSync( + item.session.organizationId, + "sessions_replication" + ); + routeCache.set(item.session.organizationId, client); + } + + let group = groups.get(client); + if (!group) { + group = { sessionInserts: [] }; + groups.set(client, group); + } + + group.sessionInserts.push( + toSessionInsertArray(item.session, item._version, item.event === "delete") + ); + } + + // batch inserts in clickhouse are more performant if the items + // are pre-sorted by the primary key + const sortSessionInserts = (rows: SessionInsertArray[]) => + rows.sort((a, b) => { const aOrgId = getSessionField(a, "organization_id"); const bOrgId = getSessionField(b, "organization_id"); if (aOrgId !== bOrgId) { @@ -568,19 +596,37 @@ export class SessionsReplicationService { return aSessionId < bSessionId ? -1 : 1; }); - span.setAttribute("session_inserts", sessionInserts.length); + const combinedSessionInserts: SessionInsertArray[] = []; + let sessionError: Error | null = null; + + // Sequential per-group flush β€” matches runsReplicationService for the same reason + // (parallel writes have hit Linux net.ipv4.tcp_wmem buffer pressure at high throughput). + for (const [clickhouse, group] of groups) { + sortSessionInserts(group.sessionInserts); + combinedSessionInserts.push(...group.sessionInserts); + + const [insErr] = await this.#insertWithRetry( + (attempt) => this.#insertSessionInserts(clickhouse, group.sessionInserts, attempt), + "session inserts", + flushId + ); + if (insErr && !sessionError) { + sessionError = insErr; + } + + if (!insErr) { + this._sessionsInsertedCounter.add(group.sessionInserts.length); + } + } + + span.setAttribute("session_inserts", combinedSessionInserts.length); this.logger.debug("Flushing inserts", { flushId, - sessionInserts: sessionInserts.length, + sessionInserts: combinedSessionInserts.length, + clickhouseGroups: groups.size, }); - const [sessionError, sessionResult] = await this.#insertWithRetry( - (attempt) => this.#insertSessionInserts(sessionInserts, attempt), - "session inserts", - flushId - ); - if (sessionError) { this.logger.error("Error inserting session inserts", { error: sessionError, @@ -591,22 +637,17 @@ export class SessionsReplicationService { this.logger.debug("Flushed inserts", { flushId, - sessionInserts: sessionInserts.length, + sessionInserts: combinedSessionInserts.length, }); - this.events.emit("batchFlushed", { flushId, sessionInserts }); + this.events.emit("batchFlushed", { flushId, sessionInserts: combinedSessionInserts }); - // Record metrics const flushDurationMs = performance.now() - flushStartTime; const hasErrors = sessionError !== null; this._batchSizeHistogram.record(batch.length); this._flushDurationHistogram.record(flushDurationMs); this._batchesFlushedCounter.add(1, { success: !hasErrors }); - - if (!sessionError) { - this._sessionsInsertedCounter.add(sessionInserts.length); - } }); } @@ -706,14 +747,23 @@ export class SessionsReplicationService { }; } - async #insertSessionInserts(sessionInserts: SessionInsertArray[], attempt: number) { + async #insertSessionInserts( + clickhouse: ClickHouse, + sessionInserts: SessionInsertArray[], + attempt: number + ) { + if (sessionInserts.length === 0) { + return; + } return await startSpan(this._tracer, "insertSessionInserts", async (span) => { - const [insertError, insertResult] = - await this.options.clickhouse.sessions.insertCompactArrays(sessionInserts, { + const [insertError, insertResult] = await clickhouse.sessions.insertCompactArrays( + sessionInserts, + { params: { clickhouse_settings: this.#getClickhouseInsertSettings(), }, - }); + } + ); if (insertError) { this.logger.error("Error inserting session inserts attempt", { diff --git a/apps/webapp/app/v3/eventRepository/index.server.ts b/apps/webapp/app/v3/eventRepository/index.server.ts index 9c30f011a73..4be392535c3 100644 --- a/apps/webapp/app/v3/eventRepository/index.server.ts +++ b/apps/webapp/app/v3/eventRepository/index.server.ts @@ -20,8 +20,15 @@ export type EventStoreType = (typeof EVENT_STORE_TYPES)[keyof typeof EVENT_STORE * Resolve the event repository for a run's persisted `taskEventStore` value and org. * Postgres-backed runs use the Prisma `eventRepository`; ClickHouse-backed runs use * `clickhouseFactory.getEventRepositoryForOrganizationSync`. + * + * Intentionally NOT exported. Sync resolution can race the org data-stores + * registry load and silently route writes to the default ClickHouse instead of + * the org's configured override. Hot paths that genuinely cannot afford to await + * (OTEL exporter, replication services) call `clickhouseFactory.getEvent…Sync` + * directly and gate startup on `clickhouseFactory.isReady()`. Everything else + * should use {@link getEventRepositoryForStore}, the async variant below. */ -export function resolveEventRepositoryForStore( +function resolveEventRepositoryForStore( store: string, organizationId: string ): IEventRepository { diff --git a/apps/webapp/app/v3/runEngineHandlers.server.ts b/apps/webapp/app/v3/runEngineHandlers.server.ts index 82ed633e468..3277d74ba6e 100644 --- a/apps/webapp/app/v3/runEngineHandlers.server.ts +++ b/apps/webapp/app/v3/runEngineHandlers.server.ts @@ -17,7 +17,7 @@ import { QueueSizeLimitExceededError } from "~/v3/services/common.server"; import { TriggerTaskService } from "~/v3/services/triggerTask.server"; import { tracer } from "~/v3/tracer.server"; import { createExceptionPropertiesFromError } from "./eventRepository/common.server"; -import { recordRunDebugLog, resolveEventRepositoryForStore } from "./eventRepository/index.server"; +import { getEventRepositoryForStore, recordRunDebugLog } from "./eventRepository/index.server"; import { roomFromFriendlyRunId, socketIo } from "./handleSocketIo.server"; import { engine } from "./runEngine.server"; import { PerformTaskRunAlertsService } from "./services/alerts/performTaskRunAlerts.server"; @@ -57,7 +57,7 @@ export function registerRunEngineEventBusHandlers() { return; } - const eventRepository = resolveEventRepositoryForStore( + const eventRepository = await getEventRepositoryForStore( run.taskEventStore, taskRun.organizationId ?? organization.id ); @@ -127,7 +127,7 @@ export function registerRunEngineEventBusHandlers() { return; } - const eventRepository = resolveEventRepositoryForStore( + const eventRepository = await getEventRepositoryForStore( run.taskEventStore, taskRun.organizationId ?? organization.id ); @@ -191,7 +191,7 @@ export function registerRunEngineEventBusHandlers() { return; } - const eventRepository = resolveEventRepositoryForStore( + const eventRepository = await getEventRepositoryForStore( run.taskEventStore, taskRun.organizationId ); @@ -302,7 +302,7 @@ export function registerRunEngineEventBusHandlers() { return; } - const eventRepository = resolveEventRepositoryForStore( + const eventRepository = await getEventRepositoryForStore( blockedRun.taskEventStore, blockedRun.organizationId ); @@ -365,7 +365,7 @@ export function registerRunEngineEventBusHandlers() { return; } - const eventRepository = resolveEventRepositoryForStore( + const eventRepository = await getEventRepositoryForStore( taskRun.taskEventStore, taskRun.organizationId ?? organization.id ); @@ -419,7 +419,7 @@ export function registerRunEngineEventBusHandlers() { return; } - const eventRepository = resolveEventRepositoryForStore( + const eventRepository = await getEventRepositoryForStore( taskRun.taskEventStore, taskRun.organizationId ?? organization.id ); @@ -458,7 +458,7 @@ export function registerRunEngineEventBusHandlers() { retryMessage += ` after OOM`; } - const eventRepository = resolveEventRepositoryForStore( + const eventRepository = await getEventRepositoryForStore( run.taskEventStore ?? "taskEvent", organization.id ); diff --git a/apps/webapp/app/v3/services/cancelTaskRunV1.server.ts b/apps/webapp/app/v3/services/cancelTaskRunV1.server.ts index 6a5b5ef8c24..eb366834c01 100644 --- a/apps/webapp/app/v3/services/cancelTaskRunV1.server.ts +++ b/apps/webapp/app/v3/services/cancelTaskRunV1.server.ts @@ -10,7 +10,7 @@ import { CancelTaskAttemptDependenciesService } from "./cancelTaskAttemptDepende import { CancelableTaskRun } from "./cancelTaskRun.server"; import { FinalizeTaskRunService } from "./finalizeTaskRun.server"; import { tryCatch } from "@trigger.dev/core/utils"; -import { resolveEventRepositoryForStore } from "../eventRepository/index.server"; +import { getEventRepositoryForStore } from "../eventRepository/index.server"; type ExtendedTaskRun = Prisma.TaskRunGetPayload<{ include: { @@ -101,7 +101,7 @@ export class CancelTaskRunServiceV1 extends BaseService { }, }); - const eventRepository = resolveEventRepositoryForStore( + const eventRepository = await getEventRepositoryForStore( cancelledTaskRun.taskEventStore, cancelledTaskRun.runtimeEnvironment.organizationId ); diff --git a/apps/webapp/app/v3/services/completeAttempt.server.ts b/apps/webapp/app/v3/services/completeAttempt.server.ts index 79647b2f1c4..c4076648819 100644 --- a/apps/webapp/app/v3/services/completeAttempt.server.ts +++ b/apps/webapp/app/v3/services/completeAttempt.server.ts @@ -31,7 +31,7 @@ import { CancelAttemptService } from "./cancelAttempt.server"; import { CreateCheckpointService } from "./createCheckpoint.server"; import { FinalizeTaskRunService } from "./finalizeTaskRun.server"; import { RetryAttemptService } from "./retryAttempt.server"; -import { resolveEventRepositoryForStore } from "../eventRepository/index.server"; +import { getEventRepositoryForStore } from "../eventRepository/index.server"; type FoundAttempt = Awaited>; @@ -163,7 +163,7 @@ export class CompleteAttemptService extends BaseService { env, }); - const eventRepository = resolveEventRepositoryForStore( + const eventRepository = await getEventRepositoryForStore( taskRunAttempt.taskRun.taskEventStore, taskRunAttempt.taskRun.organizationId ?? "" ); @@ -319,7 +319,7 @@ export class CompleteAttemptService extends BaseService { exitRun(taskRunAttempt.taskRunId); } - const eventRepository = resolveEventRepositoryForStore( + const eventRepository = await getEventRepositoryForStore( taskRunAttempt.taskRun.taskEventStore, taskRunAttempt.taskRun.organizationId ?? "" ); @@ -544,7 +544,7 @@ export class CompleteAttemptService extends BaseService { }) { const retryAt = new Date(executionRetry.timestamp); - const eventRepository = resolveEventRepositoryForStore( + const eventRepository = await getEventRepositoryForStore( taskRunAttempt.taskRun.taskEventStore, taskRunAttempt.taskRun.organizationId ?? "" ); diff --git a/apps/webapp/app/v3/services/crashTaskRun.server.ts b/apps/webapp/app/v3/services/crashTaskRun.server.ts index 333a488f753..cd55b9ec0f9 100644 --- a/apps/webapp/app/v3/services/crashTaskRun.server.ts +++ b/apps/webapp/app/v3/services/crashTaskRun.server.ts @@ -7,7 +7,7 @@ import { FailedTaskRunRetryHelper } from "../failedTaskRun.server"; import { CRASHABLE_ATTEMPT_STATUSES, isCrashableRunStatus } from "../taskStatus"; import { BaseService } from "./baseService.server"; import { FinalizeTaskRunService } from "./finalizeTaskRun.server"; -import { resolveEventRepositoryForStore } from "../eventRepository/index.server"; +import { getEventRepositoryForStore } from "../eventRepository/index.server"; export type CrashTaskRunServiceOptions = { reason?: string; @@ -120,7 +120,7 @@ export class CrashTaskRunService extends BaseService { }, }); - const eventRepository = resolveEventRepositoryForStore( + const eventRepository = await getEventRepositoryForStore( crashedTaskRun.taskEventStore, crashedTaskRun.runtimeEnvironment.organizationId ); diff --git a/apps/webapp/app/v3/services/expireEnqueuedRun.server.ts b/apps/webapp/app/v3/services/expireEnqueuedRun.server.ts index 3fa1c356c3b..0409b6ed956 100644 --- a/apps/webapp/app/v3/services/expireEnqueuedRun.server.ts +++ b/apps/webapp/app/v3/services/expireEnqueuedRun.server.ts @@ -4,7 +4,7 @@ import { commonWorker } from "../commonWorker.server"; import { BaseService } from "./baseService.server"; import { FinalizeTaskRunService } from "./finalizeTaskRun.server"; import { tryCatch } from "@trigger.dev/core/utils"; -import { resolveEventRepositoryForStore } from "../eventRepository/index.server"; +import { getEventRepositoryForStore } from "../eventRepository/index.server"; export class ExpireEnqueuedRunService extends BaseService { public static async ack(runId: string, tx?: PrismaClientOrTransaction) { @@ -78,7 +78,7 @@ export class ExpireEnqueuedRunService extends BaseService { }, }); - const eventRepository = resolveEventRepositoryForStore( + const eventRepository = await getEventRepositoryForStore( run.taskEventStore, run.runtimeEnvironment.organization.id ); diff --git a/apps/webapp/test/sessionsReplicationService.test.ts b/apps/webapp/test/sessionsReplicationService.test.ts index 3a16ce4471a..8b5dfe22fe1 100644 --- a/apps/webapp/test/sessionsReplicationService.test.ts +++ b/apps/webapp/test/sessionsReplicationService.test.ts @@ -3,6 +3,7 @@ import { containerTest } from "@internal/testcontainers"; import { setTimeout } from "node:timers/promises"; import { z } from "zod"; import { SessionsReplicationService } from "~/services/sessionsReplicationService.server"; +import { TestReplicationClickhouseFactory } from "./utils/testReplicationClickhouseFactory"; vi.setConfig({ testTimeout: 60_000 }); @@ -21,7 +22,7 @@ describe("SessionsReplicationService", () => { }); const service = new SessionsReplicationService({ - clickhouse, + clickhouseFactory: new TestReplicationClickhouseFactory(clickhouse), pgConnectionUrl: postgresContainer.getConnectionUri(), serviceName: "sessions-replication", slotName: "sessions_to_clickhouse_v1", @@ -128,7 +129,7 @@ describe("SessionsReplicationService", () => { }); const service = new SessionsReplicationService({ - clickhouse, + clickhouseFactory: new TestReplicationClickhouseFactory(clickhouse), pgConnectionUrl: postgresContainer.getConnectionUri(), serviceName: "sessions-replication", slotName: "sessions_to_clickhouse_v1", diff --git a/apps/webapp/test/utils/testReplicationClickhouseFactory.ts b/apps/webapp/test/utils/testReplicationClickhouseFactory.ts index 4d34fac376c..2422a461d61 100644 --- a/apps/webapp/test/utils/testReplicationClickhouseFactory.ts +++ b/apps/webapp/test/utils/testReplicationClickhouseFactory.ts @@ -12,7 +12,8 @@ const testReplicationRegistryStub = { } as unknown as OrganizationDataStoresRegistry; /** - * Routes all `replication` clients to a single test ClickHouse; other client types use the real factory defaults. + * Routes all `replication` and `sessions_replication` clients to a single test ClickHouse; + * other client types use the real factory defaults. */ export class TestReplicationClickhouseFactory extends ClickhouseFactory { constructor(private readonly replicationClient: ClickHouse) { @@ -23,7 +24,7 @@ export class TestReplicationClickhouseFactory extends ClickhouseFactory { organizationId: string, clientType: ClientType ): ClickHouse { - if (clientType === "replication") { + if (clientType === "replication" || clientType === "sessions_replication") { return this.replicationClient; } return super.getClickhouseForOrganizationSync(organizationId, clientType); From d03b1fac948030c9130e7c90646c088d33f4f65c Mon Sep 17 00:00:00 2001 From: Matt Aitken Date: Fri, 22 May 2026 12:39:30 +0100 Subject: [PATCH 34/34] Fix CI: stale clickhouseInstance imports + Devin events-default routing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CI failures on the previous push had two causes: 1. Three files (AgentListPresenter, sessions list route, api.v1.sessions) still imported clickhouseClient from ~/services/clickhouseInstance.server, a path that was deleted by the initial commit of this branch. They came in via the recent merge from main and broke typecheck + e2e on production webapp boot. All three now resolve their ClickHouse client per-call via clickhouseFactory (async, awaits factory.isReady()), matching the pattern used elsewhere in the PR for non-hot-path callers. 2. getClickhouseForOrganizationSync's default branch grouped 'standard' and 'events' together to return defaultClickhouseClient β€” but 'events' should resolve to defaultEventsClickhouseClient (EVENTS_CLICKHOUSE_URL singleton). The bug was unreachable today because the only caller of the events default path resolves via getEventsClickhouseClient() directly, but the API contract is now correct for any future caller. Flagged by Devin Review. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../v3/AgentListPresenter.server.ts | 36 +++++++++++-------- .../route.tsx | 8 +++-- apps/webapp/app/routes/api.v1.sessions.ts | 8 +++-- .../clickhouse/clickhouseFactory.server.ts | 3 +- 4 files changed, 35 insertions(+), 20 deletions(-) diff --git a/apps/webapp/app/presenters/v3/AgentListPresenter.server.ts b/apps/webapp/app/presenters/v3/AgentListPresenter.server.ts index d34f7393884..fe813ca08b4 100644 --- a/apps/webapp/app/presenters/v3/AgentListPresenter.server.ts +++ b/apps/webapp/app/presenters/v3/AgentListPresenter.server.ts @@ -3,10 +3,10 @@ import { type RuntimeEnvironmentType, type TaskTriggerSource, } from "@trigger.dev/database"; -import { ClickHouse } from "@internal/clickhouse"; +import { type ClickHouse } from "@internal/clickhouse"; import { z } from "zod"; import { $replica } from "~/db.server"; -import { clickhouseClient } from "~/services/clickhouseInstance.server"; +import { clickhouseFactory } from "~/services/clickhouse/clickhouseFactoryInstance.server"; import { singleton } from "~/utils/singleton"; import { findCurrentWorkerFromEnvironment } from "~/v3/models/workerDeployment.server"; @@ -24,10 +24,7 @@ export type AgentActiveState = { }; export class AgentListPresenter { - constructor( - private readonly clickhouse: ClickHouse, - private readonly _replica: PrismaClientOrTransaction - ) {} + constructor(private readonly _replica: PrismaClientOrTransaction) {} public async call({ organizationId, @@ -40,6 +37,11 @@ export class AgentListPresenter { environmentId: string; environmentType: RuntimeEnvironmentType; }) { + const clickhouse = await clickhouseFactory.getClickhouseForOrganization( + organizationId, + "standard" + ); + const currentWorker = await findCurrentWorkerFromEnvironment( { id: environmentId, @@ -89,20 +91,21 @@ export class AgentListPresenter { } // All queries are deferred for streaming - const activeStates = this.#getActiveStates(environmentId, slugs); - const conversationSparklines = this.#getConversationSparklines(environmentId, slugs); - const costSparklines = this.#getCostSparklines(environmentId, slugs); - const tokenSparklines = this.#getTokenSparklines(environmentId, slugs); + const activeStates = this.#getActiveStates(clickhouse, environmentId, slugs); + const conversationSparklines = this.#getConversationSparklines(clickhouse, environmentId, slugs); + const costSparklines = this.#getCostSparklines(clickhouse, environmentId, slugs); + const tokenSparklines = this.#getTokenSparklines(clickhouse, environmentId, slugs); return { agents, activeStates, conversationSparklines, costSparklines, tokenSparklines }; } /** Count runs currently executing vs suspended per agent */ async #getActiveStates( + clickhouse: ClickHouse, environmentId: string, slugs: string[] ): Promise> { - const queryFn = this.clickhouse.reader.query({ + const queryFn = clickhouse.reader.query({ name: "agentActiveStates", query: `SELECT task_identifier, @@ -140,10 +143,11 @@ export class AgentListPresenter { /** 24h hourly sparkline of conversation (run) count per agent */ async #getConversationSparklines( + clickhouse: ClickHouse, environmentId: string, slugs: string[] ): Promise> { - const queryFn = this.clickhouse.reader.query({ + const queryFn = clickhouse.reader.query({ name: "agentConversationSparklines", query: `SELECT task_identifier, @@ -172,10 +176,11 @@ export class AgentListPresenter { /** 24h hourly sparkline of LLM cost per agent */ async #getCostSparklines( + clickhouse: ClickHouse, environmentId: string, slugs: string[] ): Promise> { - const queryFn = this.clickhouse.reader.query({ + const queryFn = clickhouse.reader.query({ name: "agentCostSparklines", query: `SELECT task_identifier, @@ -203,10 +208,11 @@ export class AgentListPresenter { /** 24h hourly sparkline of total tokens per agent */ async #getTokenSparklines( + clickhouse: ClickHouse, environmentId: string, slugs: string[] ): Promise> { - const queryFn = this.clickhouse.reader.query({ + const queryFn = clickhouse.reader.query({ name: "agentTokenSparklines", query: `SELECT task_identifier, @@ -284,5 +290,5 @@ export class AgentListPresenter { export const agentListPresenter = singleton("agentListPresenter", setupAgentListPresenter); function setupAgentListPresenter() { - return new AgentListPresenter(clickhouseClient, $replica); + return new AgentListPresenter($replica); } diff --git a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.sessions._index/route.tsx b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.sessions._index/route.tsx index 8d2fa6f7961..510cb880468 100644 --- a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.sessions._index/route.tsx +++ b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.sessions._index/route.tsx @@ -16,7 +16,7 @@ import { findProjectBySlug } from "~/models/project.server"; import { findEnvironmentBySlug } from "~/models/runtimeEnvironment.server"; import { getSessionFiltersFromRequest } from "~/presenters/SessionFilters.server"; import { SessionListPresenter } from "~/presenters/v3/SessionListPresenter.server"; -import { clickhouseClient } from "~/services/clickhouseInstance.server"; +import { clickhouseFactory } from "~/services/clickhouse/clickhouseFactoryInstance.server"; import { requireUserId } from "~/services/session.server"; import { docsPath, EnvironmentParamSchema } from "~/utils/pathBuilder"; import { throwNotFound } from "~/utils/httpErrors"; @@ -45,7 +45,11 @@ export const loader = async ({ request, params }: LoaderFunctionArgs) => { const filters = getSessionFiltersFromRequest(request); - const presenter = new SessionListPresenter($replica, clickhouseClient); + const clickhouse = await clickhouseFactory.getClickhouseForOrganization( + project.organizationId, + "standard" + ); + const presenter = new SessionListPresenter($replica, clickhouse); const list = await presenter.call(project.organizationId, environment.id, { userId, projectId: project.id, diff --git a/apps/webapp/app/routes/api.v1.sessions.ts b/apps/webapp/app/routes/api.v1.sessions.ts index 9b67c714127..308901b0874 100644 --- a/apps/webapp/app/routes/api.v1.sessions.ts +++ b/apps/webapp/app/routes/api.v1.sessions.ts @@ -10,7 +10,7 @@ import { import { SessionId } from "@trigger.dev/core/v3/isomorphic"; import type { Prisma, Session } from "@trigger.dev/database"; import { $replica, prisma, type PrismaClient } from "~/db.server"; -import { clickhouseClient } from "~/services/clickhouseInstance.server"; +import { clickhouseFactory } from "~/services/clickhouse/clickhouseFactoryInstance.server"; import { logger } from "~/services/logger.server"; import { mintSessionToken } from "~/services/realtime/mintSessionToken.server"; import { @@ -58,8 +58,12 @@ export const loader = createLoaderApiRoute( findResource: async () => 1, }, async ({ searchParams, authentication }) => { + const clickhouse = await clickhouseFactory.getClickhouseForOrganization( + authentication.environment.organizationId, + "standard" + ); const repository = new SessionsRepository({ - clickhouse: clickhouseClient, + clickhouse, prisma: $replica as PrismaClient, }); diff --git a/apps/webapp/app/services/clickhouse/clickhouseFactory.server.ts b/apps/webapp/app/services/clickhouse/clickhouseFactory.server.ts index a62e55ae844..652fc26ae35 100644 --- a/apps/webapp/app/services/clickhouse/clickhouseFactory.server.ts +++ b/apps/webapp/app/services/clickhouse/clickhouseFactory.server.ts @@ -338,8 +338,9 @@ export class ClickhouseFactory { if (!dataStore) { switch (clientType) { case "standard": - case "events": return defaultClickhouseClient; + case "events": + return defaultEventsClickhouseClient; case "replication": return defaultRunsReplicationClickhouseClient; case "sessions_replication":