diff --git a/.vscode/settings.json b/.vscode/settings.json index f8a7bd0697..12aefeb358 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -6,5 +6,6 @@ "**/node_modules/**": true, "packages/cli-v3/e2e": true }, - "vitest.disableWorkspaceWarning": true + "vitest.disableWorkspaceWarning": true, + "typescript.experimental.useTsgo": false } diff --git a/TaskEvent_Property_Analysis.md b/TaskEvent_Property_Analysis.md new file mode 100644 index 0000000000..64c121d286 --- /dev/null +++ b/TaskEvent_Property_Analysis.md @@ -0,0 +1,188 @@ +# TaskEvent/CreatableEvent Property Usage Analysis + +This document analyzes every property in TaskEvent/CreatableEvent to determine which ones are actually used in the UI and which can be removed for optimization. + +## Properties to KEEP (Used in UI) + +| Property | Type | Used In | Usage Description | +| ------------------------------ | ---------------------- | -------------------------------------- | ------------------------------------------------------------------------ | +| **Core Identity & Structure** | +| `id` | String | Database operations | Primary key, generated automatically | +| `traceId` | String | Query operations, trace identification | Used for trace queries and grouping | +| `spanId` | String | TraceSummary, SpanDetails | Tree structure, span identification | +| `parentId` | String? | TraceSummary | Tree hierarchy in `createTreeFromFlatItems` | +| `message` | String | TraceSummary, SpanDetails | Displayed as span title in `SpanTitle.tsx:20`, tree view | +| **Status & State** | +| `isError` | Boolean | TraceSummary, SpanDetails | Error status display, filtering, status icons | +| `isPartial` | Boolean | TraceSummary, SpanDetails | In-progress status display, timeline calculations | +| `isCancelled` | Boolean | TraceSummary, SpanDetails | Cancelled status display, status determination | +| `level` | TaskEventLevel | TraceSummary, SpanDetails | Text styling (`SpanTitle.tsx:91-109`), timeline rendering decisions | +| `kind` | TaskEventKind | TraceSummary | Filter "UNSPECIFIED" events, determine debug status | +| `status` | TaskEventStatus | Event creation | Status tracking in event creation | +| **Timing** | +| `startTime` | BigInt | TraceSummary, SpanDetails | Timeline calculations, display (`RunPresenter.server.ts:166,171`) | +| `duration` | BigInt | TraceSummary, SpanDetails | Timeline width, duration display, calculations | +| `createdAt` | DateTime | Database queries | Time-based queries, automatic generation | +| **Content & Display** | +| `events` | Json | TraceSummary, SpanDetails | Timeline events (`RunPresenter.server.ts:181-185`), SpanEvents component | +| `style` | Json | TraceSummary, SpanDetails | Icons, variants, accessories (`RunIcon`, `SpanTitle`) | +| `properties` | Json | SpanDetails | Displayed as JSON in span properties (`CodeBlock`) | +| `metadata` | Json? | SpanDetails | Event transformation, span details processing | +| **Context (Query/Processing)** | +| `runId` | String | Query operations | Used in queries, not displayed in TraceSummary UI | +| `attemptNumber` | Int? | Processing logic | Used for attempt failed logic, not displayed | +| `environmentType` | RuntimeEnvironmentType | Processing | Selected in queries, used in processing | + +## Properties to REMOVE (Not Used in UI) + +| Property | Type | Reason for Removal | Notes | +| ---------------------------------- | ------- | ------------------------------------- | ------------------------------------------------ | +| **Service Information** | +| `serviceName` | String | Set to "api server", never displayed | Hardcoded value, no UI usage | +| `serviceNamespace` | String | Set to "trigger.dev", never displayed | Hardcoded value, no UI usage | +| `tracestate` | String? | OpenTelemetry tracestate, not used | OpenTelemetry field, no UI display | +| **Organization & Project Context** | +| `environmentId` | String | Used for queries, not displayed | Backend context only | +| `organizationId` | String | Used for queries, not displayed | Backend context only | +| `projectId` | String | Used for queries, not displayed | Backend context only | +| `projectRef` | String | Used for queries, not displayed | Backend context only | +| `runIsTest` | Boolean | Not displayed in UI | Backend flag, no UI display | +| **Worker & Queue Information** | +| `workerId` | String? | Not used in UI rendering | Backend context only | +| `queueId` | String? | Not used in UI rendering | Backend context only | +| `queueName` | String? | Selected but not rendered | Selected in DetailedTraceEvent but not displayed | +| `batchId` | String? | Not used in UI rendering | Backend context only | +| **Task Information** | +| `taskPath` | String? | Selected but not rendered | Selected in DetailedTraceEvent but not used | +| `taskExportName` | String? | Not selected or used | Not selected in any queries | +| `taskSlug` | String | Not displayed in current UI | Previously used for filtering, no longer needed | +| **Worker Information** | +| `workerVersion` | String? | Not displayed in current UI | Previously used for version display, removed | +| **Key Information** | +| `idempotencyKey` | String? | Not displayed in current UI | Previously used for span details, removed | +| **Link Information** | +| `links` | Json? | Not displayed in current UI | Span linking functionality, not currently used | +| **Attempt Information** | +| `attemptId` | String? | Not selected or used | Legacy field, not used | +| `isDebug` | Boolean | Deprecated field | Replaced by `kind === TaskEventKind.LOG` | +| **Content (Unused)** | +| `output` | Json? | **NOT DISPLAYED** in span UI | Returned by getSpan but never rendered | +| `payload` | Json? | **NOT DISPLAYED** in span UI | Returned by getSpan but never rendered | +| `outputType` | String? | Not used in UI rendering | Type information not displayed | +| `payloadType` | String? | Not used in UI rendering | Type information not displayed | +| **Usage & Cost Tracking** | +| `usageDurationMs` | Int | Not used in UI rendering | Analytics data, no UI display | +| `usageCostInCents` | Float | Not used in UI rendering | Analytics data, no UI display | +| **Machine Information** | +| `machinePreset` | String? | Selected but not rendered | Selected in DetailedTraceEvent but not displayed | +| `machinePresetCpu` | Float? | Not selected or used | Not selected in queries | +| `machinePresetMemory` | Float? | Not selected or used | Not selected in queries | +| `machinePresetCentsPerMs` | Float? | Not selected or used | Not selected in queries | + +## Summary Statistics + +- **Total Properties**: ~51 properties in TaskEvent +- **Properties to Keep**: 18 properties (35%) +- **Properties to Remove**: 33 properties (65%) + +## Optimization Opportunities + +### TraceSummary (getTraceSummary) + +- **Current Selection**: 15 properties via `QueriedEvent` +- **Optimization**: Already optimized, only selects necessary fields +- **Potential Removal**: `runId`, `environmentType` (selected but not used in UI) + +### Span Details (getSpan) + +- **Current Selection**: ALL TaskEvent properties (full object) +- **Used in UI**: 15 properties (after removing idempotencyKey, taskSlug, workerVersion, links) +- **Optimization**: Could remove ~65% of properties +- **Major Removals**: `payload`, `output`, `idempotencyKey`, `taskSlug`, `workerVersion`, `links`, all context/metadata fields + +### CreatableEvent (Event Creation) + +- **Current**: Includes many unused fields +- **Optimization**: Remove ~33 properties that are never displayed +- **Keep**: Core fields needed for queries and UI display + +## Implementation Notes + +1. **TraceSummary** is already well-optimized with selective field queries +2. **getSpan** has the biggest optimization opportunity - fetches full TaskEvent but only uses ~30% +3. **CreatableEvent** could be split into: + - `MinimalCreatableEvent` for TraceSummary use cases + - `DetailedCreatableEvent` for full span details +4. Properties marked as "Selected but not rendered" could be removed unless needed for future features + +## Verification Status + +✅ **Verified**: All property usage has been systematically verified by examining: + +- TraceSummary UI components and data flow +- Span detail UI components (`SpanBody`, `SpanEntity`, `SpanTitle`) +- All query selectors (`QueriedEvent`, `DetailedTraceEvent`) +- Actual UI rendering code + +This analysis is based on comprehensive examination of the actual UI components and their property access patterns. + +## Properties and Metadata Column Extraction Analysis + +This table shows the specific keys that are extracted from the `properties` and `metadata` JSON columns and how they are used. + +### Properties Column Extractions + +| Key | SemanticInternalAttribute | Used In | Usage Description | Status | +| ---------------------------------------- | ------------------------- | --------------------- | ------------------------------------------------------- | ------- | +| **Entity Information** | +| `$entity.type` | `ENTITY_TYPE` | SpanPresenter | Entity type switching (waitpoint, attempt, etc.) | ✅ USED | +| `$entity.id` | `ENTITY_ID` | SpanPresenter | Entity ID for waitpoint/attempt lookup | ✅ USED | +| **Run Relationships** | +| `$original_run_id` | `ORIGINAL_RUN_ID` | SpanPresenter | Points to original run for cached spans | ✅ USED | +| **Display Control** | +| `$show.actions` | `SHOW_ACTIONS` | EventRepository | Controls action bar display (computed to showActionBar) | ✅ USED | +| **Styling (from enrichCreatableEvents)** | +| `gen_ai.system` | N/A | enrichCreatableEvents | Icon determination for AI spans | ✅ USED | +| `name` | N/A | enrichCreatableEvents | Icon determination for agent workflows | ✅ USED | +| **Exception Handling** | +| `project.dir` | `PROJECT_DIR` | transformException | Stack trace correction in development | ✅ USED | +| **All Other Properties** | +| Various | N/A | SpanDetails UI | Displayed as JSON in properties CodeBlock | ✅ USED | + +### Metadata Column Extractions + +| Key | SemanticInternalAttribute | Used In | Usage Description | Status | +| ------------------------ | ------------------------- | --------------- | ------------------------------------ | ------- | +| **Warm Start Detection** | +| `$warm_start` | `WARM_START` | SpanPresenter | Determines if attempt was warm start | ✅ USED | +| **Event Transformation** | +| Various | N/A | transformEvents | Used in event transformation logic | ✅ USED | + +### Properties/Metadata Keys NOT Extracted (Unused) + +Based on SemanticInternalAttributes that are NOT used in UI code: + +| Key | SemanticInternalAttribute | Reason Not Used | +| ------------------------ | ------------------------- | ------------------------ | +| `ctx.environment.id` | `ENVIRONMENT_ID` | Backend context only | +| `ctx.environment.type` | `ENVIRONMENT_TYPE` | Backend context only | +| `ctx.organization.id` | `ORGANIZATION_ID` | Backend context only | +| `ctx.project.id` | `PROJECT_ID` | Backend context only | +| `ctx.run.id` | `RUN_ID` | Backend context only | +| `ctx.task.id` | `TASK_SLUG` | No longer displayed | +| `worker.version` | `WORKER_VERSION` | No longer displayed | +| `ctx.run.idempotencyKey` | `IDEMPOTENCY_KEY` | No longer displayed | +| `ctx.queue.name` | `QUEUE_NAME` | Backend context only | +| `ctx.machine.*` | `MACHINE_PRESET_*` | Not displayed in UI | +| `$output` | `OUTPUT` | Not displayed in span UI | +| `$payload` | `PAYLOAD` | Not displayed in span UI | +| And many others... | | Backend/processing only | + +### Summary + +- **Properties Column**: ~9 specific keys extracted and used in UI +- **Metadata Column**: ~2 specific keys extracted and used in UI +- **Unused Keys**: ~30+ SemanticInternalAttributes not used in UI +- **Generic Usage**: Properties are also displayed as JSON in span details + +The majority of data in both columns is either backend context or unused in the current UI implementation. diff --git a/apps/webapp/app/entry.server.tsx b/apps/webapp/app/entry.server.tsx index 0e37555cb0..0efc7f7eaa 100644 --- a/apps/webapp/app/entry.server.tsx +++ b/apps/webapp/app/entry.server.tsx @@ -240,6 +240,7 @@ import { logger } from "./services/logger.server"; import { Prisma } from "./db.server"; import { registerRunEngineEventBusHandlers } from "./v3/runEngineHandlers.server"; import { remoteBuildsEnabled } from "./v3/remoteImageBuilder.server"; +import { resourceMonitor } from "./services/resourceMonitor.server"; if (env.EVENT_LOOP_MONITOR_ENABLED === "1") { eventLoopMonitor.enable(); @@ -250,3 +251,7 @@ if (remoteBuildsEnabled()) { } else { console.log("🏗️ Local builds enabled"); } + +if (env.RESOURCE_MONITOR_ENABLED === "1") { + resourceMonitor.startMonitoring(1000); +} diff --git a/apps/webapp/app/env.server.ts b/apps/webapp/app/env.server.ts index 683a512c40..78cf9f0449 100644 --- a/apps/webapp/app/env.server.ts +++ b/apps/webapp/app/env.server.ts @@ -493,6 +493,7 @@ const EnvironmentSchema = z CENTS_PER_RUN: z.coerce.number().default(0), EVENT_LOOP_MONITOR_ENABLED: z.string().default("1"), + RESOURCE_MONITOR_ENABLED: z.string().default("0"), MAXIMUM_LIVE_RELOADING_EVENTS: z.coerce.number().int().default(1000), MAXIMUM_TRACE_SUMMARY_VIEW_COUNT: z.coerce.number().int().default(25_000), MAXIMUM_TRACE_DETAILED_SUMMARY_VIEW_COUNT: z.coerce.number().int().default(10_000), @@ -1110,6 +1111,23 @@ const EnvironmentSchema = z CLICKHOUSE_LOG_LEVEL: z.enum(["log", "error", "warn", "info", "debug"]).default("info"), CLICKHOUSE_COMPRESSION_REQUEST: z.string().default("1"), + EVENTS_CLICKHOUSE_URL: z + .string() + .optional() + .transform((v) => v ?? process.env.CLICKHOUSE_URL), + EVENTS_CLICKHOUSE_KEEP_ALIVE_ENABLED: z.string().default("1"), + EVENTS_CLICKHOUSE_KEEP_ALIVE_IDLE_SOCKET_TTL_MS: z.coerce.number().int().optional(), + EVENTS_CLICKHOUSE_MAX_OPEN_CONNECTIONS: z.coerce.number().int().default(10), + EVENTS_CLICKHOUSE_LOG_LEVEL: z.enum(["log", "error", "warn", "info", "debug"]).default("info"), + EVENTS_CLICKHOUSE_COMPRESSION_REQUEST: z.string().default("1"), + EVENTS_CLICKHOUSE_BATCH_SIZE: z.coerce.number().int().default(1000), + EVENTS_CLICKHOUSE_FLUSH_INTERVAL_MS: z.coerce.number().int().default(1000), + EVENT_REPOSITORY_CLICKHOUSE_ROLLOUT_PERCENT: z.coerce.number().optional(), + EVENT_REPOSITORY_DEFAULT_STORE: z.enum(["postgres", "clickhouse"]).default("postgres"), + EVENTS_CLICKHOUSE_MAX_TRACE_SUMMARY_VIEW_COUNT: z.coerce.number().int().default(25_000), + EVENTS_CLICKHOUSE_MAX_TRACE_DETAILED_SUMMARY_VIEW_COUNT: z.coerce.number().int().default(5_000), + EVENTS_CLICKHOUSE_MAX_LIVE_RELOADING_SETTING: z.coerce.number().int().default(2000), + // Bootstrap TRIGGER_BOOTSTRAP_ENABLED: z.string().default("0"), TRIGGER_BOOTSTRAP_WORKER_GROUP_NAME: z.string().optional(), diff --git a/apps/webapp/app/presenters/v3/ApiRetrieveRunPresenter.server.ts b/apps/webapp/app/presenters/v3/ApiRetrieveRunPresenter.server.ts index 5fcae91120..671496586a 100644 --- a/apps/webapp/app/presenters/v3/ApiRetrieveRunPresenter.server.ts +++ b/apps/webapp/app/presenters/v3/ApiRetrieveRunPresenter.server.ts @@ -75,6 +75,7 @@ export class ApiRetrieveRunPresenter { }, select: { ...commonRunSelect, + traceId: true, payload: true, payloadType: true, output: true, diff --git a/apps/webapp/app/presenters/v3/RunPresenter.server.ts b/apps/webapp/app/presenters/v3/RunPresenter.server.ts index 4ec0260e71..40b5871190 100644 --- a/apps/webapp/app/presenters/v3/RunPresenter.server.ts +++ b/apps/webapp/app/presenters/v3/RunPresenter.server.ts @@ -3,9 +3,11 @@ import { createTreeFromFlatItems, flattenTree } from "~/components/primitives/Tr import { prisma, type PrismaClient } from "~/db.server"; import { createTimelineSpanEventsFromSpanEvents } from "~/utils/timelineSpanEvents"; import { getUsername } from "~/utils/username"; -import { eventRepository } from "~/v3/eventRepository.server"; +import { resolveEventRepositoryForStore } from "~/v3/eventRepository/index.server"; +import { SpanSummary } from "~/v3/eventRepository/eventRepository.types"; import { getTaskEventStoreTableForRun } from "~/v3/taskEventStore.server"; import { isFinalRunStatus } from "~/v3/taskStatus"; +import { env } from "~/env.server"; type Result = Awaited>; export type Run = Result["run"]; @@ -45,9 +47,11 @@ export class RunPresenter { id: true, createdAt: true, taskEventStore: true, + taskIdentifier: true, number: true, traceId: true, spanId: true, + parentSpanId: true, friendlyId: true, status: true, startedAt: true, @@ -137,21 +141,55 @@ export class RunPresenter { return { run: runData, trace: undefined, + maximumLiveReloadingSetting: env.MAXIMUM_LIVE_RELOADING_EVENTS, }; } + const eventRepository = resolveEventRepositoryForStore(run.taskEventStore); + // get the events - const traceSummary = await eventRepository.getTraceSummary( + let traceSummary = await eventRepository.getTraceSummary( getTaskEventStoreTableForRun(run), + run.runtimeEnvironment.id, run.traceId, run.rootTaskRun?.createdAt ?? run.createdAt, run.completedAt ?? undefined, { includeDebugLogs: showDebug } ); + if (!traceSummary) { - return { - run: runData, - trace: undefined, + const spanSummary: SpanSummary = { + id: run.spanId, + parentId: run.parentSpanId ?? undefined, + runId: run.friendlyId, + data: { + message: run.taskIdentifier, + style: { icon: "task", variant: "primary" }, + events: [], + startTime: run.createdAt, + duration: 0, + isError: + run.status === "COMPLETED_WITH_ERRORS" || + run.status === "CRASHED" || + run.status === "EXPIRED" || + run.status === "SYSTEM_FAILURE" || + run.status === "TIMED_OUT", + isPartial: + run.status === "DELAYED" || + run.status === "PENDING" || + run.status === "PAUSED" || + run.status === "RETRYING_AFTER_FAILURE" || + run.status === "DEQUEUED" || + run.status === "EXECUTING", + isCancelled: run.status === "CANCELED", + isDebug: false, + level: "TRACE", + }, + }; + + traceSummary = { + rootSpan: spanSummary, + spans: [spanSummary], }; } @@ -220,7 +258,9 @@ export class RunPresenter { queuedDuration: run.startedAt ? millisecondsToNanoseconds(run.startedAt.getTime() - run.createdAt.getTime()) : undefined, + overridesBySpanId: traceSummary.overridesBySpanId, }, + maximumLiveReloadingSetting: eventRepository.maximumLiveReloadingSetting, }; } } diff --git a/apps/webapp/app/presenters/v3/RunStreamPresenter.server.ts b/apps/webapp/app/presenters/v3/RunStreamPresenter.server.ts index 6624048e8c..9197b2c9d1 100644 --- a/apps/webapp/app/presenters/v3/RunStreamPresenter.server.ts +++ b/apps/webapp/app/presenters/v3/RunStreamPresenter.server.ts @@ -3,7 +3,7 @@ import { eventStream } from "remix-utils/sse/server"; import { PrismaClient, prisma } from "~/db.server"; import { logger } from "~/services/logger.server"; import { throttle } from "~/utils/throttle"; -import { eventRepository } from "~/v3/eventRepository.server"; +import { tracePubSub } from "~/v3/services/tracePubSub.server"; const pingInterval = 1000; @@ -41,7 +41,7 @@ export class RunStreamPresenter { let pinger: NodeJS.Timeout | undefined = undefined; - const { unsubscribe, eventEmitter } = await eventRepository.subscribeToTrace(run.traceId); + const { unsubscribe, eventEmitter } = await tracePubSub.subscribeToTrace(run.traceId); return eventStream(request.signal, (send, close) => { const safeSend = (args: { event?: string; data: string }) => { diff --git a/apps/webapp/app/presenters/v3/SpanPresenter.server.ts b/apps/webapp/app/presenters/v3/SpanPresenter.server.ts index 94d10cb1c2..45b5263db0 100644 --- a/apps/webapp/app/presenters/v3/SpanPresenter.server.ts +++ b/apps/webapp/app/presenters/v3/SpanPresenter.server.ts @@ -10,13 +10,15 @@ import { import { AttemptId, getMaxDuration, parseTraceparent } from "@trigger.dev/core/v3/isomorphic"; import { RUNNING_STATUSES } from "~/components/runs/v3/TaskRunStatus"; import { logger } from "~/services/logger.server"; -import { eventRepository, rehydrateAttribute } from "~/v3/eventRepository.server"; +import { rehydrateAttribute } from "~/v3/eventRepository/eventRepository.server"; import { machinePresetFromRun } from "~/v3/machinePresets.server"; import { getTaskEventStoreTableForRun, type TaskEventStoreTable } from "~/v3/taskEventStore.server"; import { isFailedRunStatus, isFinalRunStatus } from "~/v3/taskStatus"; import { BasePresenter } from "./basePresenter.server"; import { WaitpointPresenter } from "./WaitpointPresenter.server"; import { engine } from "~/v3/runEngine.server"; +import { resolveEventRepositoryForStore } from "~/v3/eventRepository/index.server"; +import { IEventRepository, SpanDetail } from "~/v3/eventRepository/eventRepository.types"; type Result = Awaited>; export type Span = NonNullable["span"]>; @@ -24,7 +26,7 @@ export type SpanRun = NonNullable["run"]>; type FindRunResult = NonNullable< Awaited["findRun"]>> >; -type GetSpanResult = NonNullable>>; +type GetSpanResult = SpanDetail; export class SpanPresenter extends BasePresenter { public async call({ @@ -74,14 +76,20 @@ export class SpanPresenter extends BasePresenter { return; } + const { traceId } = parentRun; + + const eventRepository = resolveEventRepositoryForStore(parentRun.taskEventStore); + const eventStore = getTaskEventStoreTableForRun(parentRun); const run = await this.getRun({ eventStore, - environmentId: parentRun.runtimeEnvironmentId, + traceId, + eventRepository, spanId, createdAt: parentRun.createdAt, completedAt: parentRun.completedAt, + environmentId: parentRun.runtimeEnvironmentId, }); if (run) { return { @@ -93,10 +101,12 @@ export class SpanPresenter extends BasePresenter { const span = await this.#getSpan({ eventStore, spanId, + traceId, environmentId: parentRun.runtimeEnvironmentId, projectId: parentRun.projectId, createdAt: parentRun.createdAt, completedAt: parentRun.completedAt, + eventRepository, }); if (!span) { @@ -112,29 +122,30 @@ export class SpanPresenter extends BasePresenter { async getRun({ eventStore, environmentId, + traceId, + eventRepository, spanId, createdAt, completedAt, }: { eventStore: TaskEventStoreTable; environmentId: string; + traceId: string; + eventRepository: IEventRepository; spanId: string; createdAt: Date; completedAt: Date | null; }) { - const span = await eventRepository.getSpan({ - storeTable: eventStore, - spanId, + const originalRunId = await eventRepository.getSpanOriginalRunId( + eventStore, environmentId, - startCreatedAt: createdAt, - endCreatedAt: completedAt ?? undefined, - }); - - if (!span) { - return; - } + spanId, + traceId, + createdAt, + completedAt ?? undefined + ); - const run = await this.findRun({ span, spanId }); + const run = await this.findRun({ originalRunId, spanId, environmentId }); if (!run) { return; @@ -259,7 +270,7 @@ export class SpanPresenter extends BasePresenter { workerQueue: run.workerQueue, traceId: run.traceId, spanId: run.spanId, - isCached: !!span.originalRun, + isCached: !!originalRunId, machinePreset: machine?.name, externalTraceId, }; @@ -294,7 +305,15 @@ export class SpanPresenter extends BasePresenter { }; } - async findRun({ span, spanId }: { span: GetSpanResult; spanId: string }) { + async findRun({ + originalRunId, + spanId, + environmentId, + }: { + originalRunId?: string; + spanId: string; + environmentId: string; + }) { const run = await this._replica.taskRun.findFirst({ select: { id: true, @@ -404,12 +423,14 @@ export class SpanPresenter extends BasePresenter { }, }, }, - where: span.originalRun + where: originalRunId ? { - friendlyId: span.originalRun, + friendlyId: originalRunId, + runtimeEnvironmentId: environmentId, } : { spanId, + runtimeEnvironmentId: environmentId, }, }); @@ -418,12 +439,16 @@ export class SpanPresenter extends BasePresenter { async #getSpan({ eventStore, + eventRepository, + traceId, spanId, environmentId, projectId, createdAt, completedAt, }: { + eventRepository: IEventRepository; + traceId: string; spanId: string; environmentId: string; projectId: string; @@ -431,14 +456,15 @@ export class SpanPresenter extends BasePresenter { createdAt: Date; completedAt: Date | null; }) { - const span = await eventRepository.getSpan({ - storeTable: eventStore, - spanId, + const span = await eventRepository.getSpan( + eventStore, environmentId, - startCreatedAt: createdAt, - endCreatedAt: completedAt ?? undefined, - options: { includeDebugLogs: true }, - }); + spanId, + traceId, + createdAt, + completedAt ?? undefined, + { includeDebugLogs: true } + ); if (!span) { return; @@ -451,11 +477,7 @@ export class SpanPresenter extends BasePresenter { spanId: true, createdAt: true, number: true, - lockedToVersion: { - select: { - version: true, - }, - }, + taskVersion: true, }, where: { parentSpanId: spanId, @@ -463,11 +485,21 @@ export class SpanPresenter extends BasePresenter { }); const data = { - ...span, + spanId: span.spanId, + parentId: span.parentId, + message: span.message, + isError: span.isError, + isPartial: span.isPartial, + isCancelled: span.isCancelled, + level: span.level, + startTime: span.startTime, + duration: span.duration, events: span.events, + style: span.style, properties: span.properties ? JSON.stringify(span.properties, null, 2) : undefined, + entity: span.entity, + metadata: span.metadata, triggeredRuns, - showActionBar: span.show?.actions === true, }; switch (span.entity.type) { diff --git a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.runs.$runParam/route.tsx b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.runs.$runParam/route.tsx index 5948f87dc8..79ab0b8e5b 100644 --- a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.runs.$runParam/route.tsx +++ b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.runs.$runParam/route.tsx @@ -97,6 +97,7 @@ import { useCurrentPlan } from "../_app.orgs.$organizationSlug/route"; import { SpanView } from "../resources.orgs.$organizationSlug.projects.$projectParam.env.$envParam.runs.$runParam.spans.$spanParam/route"; import { useSearchParams } from "~/hooks/useSearchParam"; import { CopyableText } from "~/components/primitives/CopyableText"; +import type { SpanOverride } from "~/v3/eventRepository/eventRepository.types"; const resizableSettings = { parent: { @@ -171,7 +172,7 @@ export const loader = async ({ request, params }: LoaderFunctionArgs) => { return json({ run: result.run, trace: result.trace, - maximumLiveReloadingSetting: env.MAXIMUM_LIVE_RELOADING_EVENTS, + maximumLiveReloadingSetting: result.maximumLiveReloadingSetting, resizable: { parent, tree, @@ -301,7 +302,8 @@ function TraceView({ run, trace, maximumLiveReloadingSetting, resizable }: Loade return <>; } - const { events, duration, rootSpanStatus, rootStartedAt, queuedDuration } = trace; + const { events, duration, rootSpanStatus, rootStartedAt, queuedDuration, overridesBySpanId } = + trace; const shouldLiveReload = events.length <= maximumLiveReloadingSetting; const changeToSpan = useDebounce((selectedSpan: string) => { @@ -323,6 +325,8 @@ function TraceView({ run, trace, maximumLiveReloadingSetting, resizable }: Loade // WARNING Don't put the revalidator in the useEffect deps array or bad things will happen }, [streamedEvents]); // eslint-disable-line react-hooks/exhaustive-deps + const spanOverrides = selectedSpanId ? overridesBySpanId?.[selectedSpanId] : undefined; + return (
replaceSearchParam("span")} /> diff --git a/apps/webapp/app/routes/api.v1.runs.$runId.events.ts b/apps/webapp/app/routes/api.v1.runs.$runId.events.ts index bba571d7c5..ac96c9ddb8 100644 --- a/apps/webapp/app/routes/api.v1.runs.$runId.events.ts +++ b/apps/webapp/app/routes/api.v1.runs.$runId.events.ts @@ -2,8 +2,8 @@ import { json } from "@remix-run/server-runtime"; import { z } from "zod"; import { getTaskEventStoreTableForRun } from "~/v3/taskEventStore.server"; import { createLoaderApiRoute } from "~/services/routeBuilders/apiBuilder.server"; -import { eventRepository } from "~/v3/eventRepository.server"; import { ApiRetrieveRunPresenter } from "~/presenters/v3/ApiRetrieveRunPresenter.server"; +import { resolveEventRepositoryForStore } from "~/v3/eventRepository/index.server"; const ParamsSchema = z.object({ runId: z.string(), // This is the run friendly ID @@ -30,9 +30,13 @@ export const loader = createLoaderApiRoute( superScopes: ["read:runs", "read:all", "admin"], }, }, - async ({ resource: run }) => { + async ({ resource: run, authentication }) => { + const eventRepository = resolveEventRepositoryForStore(run.taskEventStore); + const runEvents = await eventRepository.getRunEvents( getTaskEventStoreTableForRun(run), + authentication.environment.id, + run.traceId, run.friendlyId, run.createdAt, run.completedAt ?? undefined diff --git a/apps/webapp/app/routes/api.v1.runs.$runId.trace.ts b/apps/webapp/app/routes/api.v1.runs.$runId.trace.ts index 8ab42d8c3c..cc35836bfe 100644 --- a/apps/webapp/app/routes/api.v1.runs.$runId.trace.ts +++ b/apps/webapp/app/routes/api.v1.runs.$runId.trace.ts @@ -3,7 +3,7 @@ import { BatchId } from "@trigger.dev/core/v3/isomorphic"; import { z } from "zod"; import { $replica } from "~/db.server"; import { createLoaderApiRoute } from "~/services/routeBuilders/apiBuilder.server"; -import { eventRepository } from "~/v3/eventRepository.server"; +import { resolveEventRepositoryForStore } from "~/v3/eventRepository/index.server"; import { getTaskEventStoreTableForRun } from "~/v3/taskEventStore.server"; const ParamsSchema = z.object({ @@ -35,9 +35,12 @@ export const loader = createLoaderApiRoute( superScopes: ["read:runs", "read:all", "admin"], }, }, - async ({ resource: run }) => { + async ({ resource: run, authentication }) => { + const eventRepository = resolveEventRepositoryForStore(run.taskEventStore); + const traceSummary = await eventRepository.getTraceDetailedSummary( getTaskEventStoreTableForRun(run), + authentication.environment.id, run.traceId, run.createdAt, run.completedAt ?? undefined diff --git a/apps/webapp/app/routes/engine.v1.dev.runs.$runFriendlyId.logs.debug.ts b/apps/webapp/app/routes/engine.v1.dev.runs.$runFriendlyId.logs.debug.ts index 4e24006025..6d39553df1 100644 --- a/apps/webapp/app/routes/engine.v1.dev.runs.$runFriendlyId.logs.debug.ts +++ b/apps/webapp/app/routes/engine.v1.dev.runs.$runFriendlyId.logs.debug.ts @@ -1,79 +1,3 @@ -import { TypedResponse } from "@remix-run/server-runtime"; -import { assertExhaustive } from "@trigger.dev/core/utils"; -import { RunId } from "@trigger.dev/core/v3/isomorphic"; -import { - WorkerApiDebugLogBody, - WorkerApiRunAttemptStartResponseBody, -} from "@trigger.dev/core/v3/workers"; -import { z } from "zod"; -import { prisma } from "~/db.server"; -import { logger } from "~/services/logger.server"; -import { createActionApiRoute } from "~/services/routeBuilders/apiBuilder.server"; -import { recordRunDebugLog } from "~/v3/eventRepository.server"; - -// const { action } = createActionApiRoute( -// { -// params: z.object({ -// runFriendlyId: z.string(), -// }), -// body: WorkerApiDebugLogBody, -// method: "POST", -// }, -// async ({ -// authentication, -// body, -// params, -// }): Promise> => { -// const { runFriendlyId } = params; - -// try { -// const run = await prisma.taskRun.findFirst({ -// where: { -// friendlyId: params.runFriendlyId, -// runtimeEnvironmentId: authentication.environment.id, -// }, -// }); - -// if (!run) { -// throw new Response("You don't have permissions for this run", { status: 401 }); -// } - -// const eventResult = await recordRunDebugLog( -// RunId.fromFriendlyId(runFriendlyId), -// body.message, -// { -// attributes: { -// properties: body.properties, -// }, -// startTime: body.time, -// } -// ); - -// if (eventResult.success) { -// return new Response(null, { status: 204 }); -// } - -// switch (eventResult.code) { -// case "FAILED_TO_RECORD_EVENT": -// return new Response(null, { status: 400 }); // send a 400 to prevent retries -// case "RUN_NOT_FOUND": -// return new Response(null, { status: 404 }); -// default: -// return assertExhaustive(eventResult.code); -// } -// } catch (error) { -// logger.error("Failed to record dev log", { -// environmentId: authentication.environment.id, -// error, -// }); -// throw error; -// } -// } -// ); - -// export { action }; - -// Create a generic JSON action in remix export function action() { return new Response(null, { status: 204 }); } diff --git a/apps/webapp/app/routes/engine.v1.dev.runs.$runFriendlyId.snapshots.$snapshotFriendlyId.attempts.start.ts b/apps/webapp/app/routes/engine.v1.dev.runs.$runFriendlyId.snapshots.$snapshotFriendlyId.attempts.start.ts index 65d729d31d..0c88cc45f6 100644 --- a/apps/webapp/app/routes/engine.v1.dev.runs.$runFriendlyId.snapshots.$snapshotFriendlyId.attempts.start.ts +++ b/apps/webapp/app/routes/engine.v1.dev.runs.$runFriendlyId.snapshots.$snapshotFriendlyId.attempts.start.ts @@ -1,5 +1,5 @@ import { json, TypedResponse } from "@remix-run/server-runtime"; -import { MachinePreset } from "@trigger.dev/core/v3"; +import { MachinePreset, SemanticInternalAttributes } from "@trigger.dev/core/v3"; import { RunId, SnapshotId } from "@trigger.dev/core/v3/isomorphic"; import { WorkerApiRunAttemptStartRequestBody, @@ -57,7 +57,8 @@ const { action } = createActionApiRoute( const envVars = await getEnvVars( authentication.environment, engineResult.run.id, - engineResult.execution.machine ?? defaultMachinePreset + engineResult.execution.machine ?? defaultMachinePreset, + engineResult.run.taskEventStore ); return json({ @@ -77,7 +78,8 @@ const { action } = createActionApiRoute( async function getEnvVars( environment: RuntimeEnvironment, runId: string, - machinePreset: MachinePreset + machinePreset: MachinePreset, + taskEventStore?: string ): Promise> { const variables = await resolveVariablesForEnvironment(environment); @@ -94,6 +96,19 @@ async function getEnvVars( ] ); + if (taskEventStore) { + const resourceAttributes = JSON.stringify({ + [SemanticInternalAttributes.TASK_EVENT_STORE]: taskEventStore, + }); + + variables.push( + ...[ + { key: "OTEL_RESOURCE_ATTRIBUTES", value: resourceAttributes }, + { key: "TRIGGER_OTEL_RESOURCE_ATTRIBUTES", value: resourceAttributes }, + ] + ); + } + return variables.reduce((acc: Record, curr) => { acc[curr.key] = curr.value; return acc; diff --git a/apps/webapp/app/routes/engine.v1.worker-actions.runs.$runFriendlyId.logs.debug.ts b/apps/webapp/app/routes/engine.v1.worker-actions.runs.$runFriendlyId.logs.debug.ts index a814ae257f..457c2b4d2c 100644 --- a/apps/webapp/app/routes/engine.v1.worker-actions.runs.$runFriendlyId.logs.debug.ts +++ b/apps/webapp/app/routes/engine.v1.worker-actions.runs.$runFriendlyId.logs.debug.ts @@ -3,7 +3,7 @@ import { RunId } from "@trigger.dev/core/v3/isomorphic"; import { WorkerApiDebugLogBody } from "@trigger.dev/core/v3/runEngineWorker"; import { z } from "zod"; import { createActionWorkerApiRoute } from "~/services/routeBuilders/apiBuilder.server"; -import { recordRunDebugLog } from "~/v3/eventRepository.server"; +import { recordRunDebugLog } from "~/v3/eventRepository/index.server"; export const action = createActionWorkerApiRoute( { diff --git a/apps/webapp/app/routes/otel.v1.logs.ts b/apps/webapp/app/routes/otel.v1.logs.ts index c04be32b4e..a05ddd24cf 100644 --- a/apps/webapp/app/routes/otel.v1.logs.ts +++ b/apps/webapp/app/routes/otel.v1.logs.ts @@ -4,15 +4,15 @@ import { otlpExporter } from "~/v3/otlpExporter.server"; export async function action({ request }: ActionFunctionArgs) { try { - const contentType = request.headers.get("content-type"); + const contentType = request.headers.get("content-type")?.toLowerCase() ?? ""; - if (contentType === "application/json") { + if (contentType.startsWith("application/json")) { const body = await request.json(); - const exportResponse = await otlpExporter.exportLogs(body as ExportLogsServiceRequest, false); + const exportResponse = await otlpExporter.exportLogs(body as ExportLogsServiceRequest); return json(exportResponse, { status: 200 }); - } else if (contentType === "application/x-protobuf") { + } else if (contentType.startsWith("application/x-protobuf")) { const buffer = await request.arrayBuffer(); const exportRequest = ExportLogsServiceRequest.decode(new Uint8Array(buffer)); diff --git a/apps/webapp/app/routes/otel.v1.traces.ts b/apps/webapp/app/routes/otel.v1.traces.ts index 5d77314a1e..609b72c046 100644 --- a/apps/webapp/app/routes/otel.v1.traces.ts +++ b/apps/webapp/app/routes/otel.v1.traces.ts @@ -4,18 +4,15 @@ import { otlpExporter } from "~/v3/otlpExporter.server"; export async function action({ request }: ActionFunctionArgs) { try { - const contentType = request.headers.get("content-type"); + const contentType = request.headers.get("content-type")?.toLowerCase() ?? ""; - if (contentType === "application/json") { + if (contentType.startsWith("application/json")) { const body = await request.json(); - const exportResponse = await otlpExporter.exportTraces( - body as ExportTraceServiceRequest, - false - ); + const exportResponse = await otlpExporter.exportTraces(body as ExportTraceServiceRequest); return json(exportResponse, { status: 200 }); - } else if (contentType === "application/x-protobuf") { + } else if (contentType.startsWith("application/x-protobuf")) { const buffer = await request.arrayBuffer(); const exportRequest = ExportTraceServiceRequest.decode(new Uint8Array(buffer)); diff --git a/apps/webapp/app/routes/resources.orgs.$organizationSlug.projects.$projectParam.env.$envParam.runs.$runParam.spans.$spanParam/route.tsx b/apps/webapp/app/routes/resources.orgs.$organizationSlug.projects.$projectParam.env.$envParam.runs.$runParam.spans.$spanParam/route.tsx index 29a5cb873a..98338c1fce 100644 --- a/apps/webapp/app/routes/resources.orgs.$organizationSlug.projects.$projectParam.env.$envParam.runs.$runParam.spans.$spanParam/route.tsx +++ b/apps/webapp/app/routes/resources.orgs.$organizationSlug.projects.$projectParam.env.$envParam.runs.$runParam.spans.$spanParam/route.tsx @@ -79,6 +79,7 @@ import { import { createTimelineSpanEventsFromSpanEvents } from "~/utils/timelineSpanEvents"; import { CompleteWaitpointForm } from "../resources.orgs.$organizationSlug.projects.$projectParam.env.$envParam.waitpoints.$waitpointFriendlyId.complete/route"; import { requireUserId } from "~/services/session.server"; +import type { SpanOverride } from "~/v3/eventRepository/eventRepository.types"; export const loader = async ({ request, params }: LoaderFunctionArgs) => { const userId = await requireUserId(request); @@ -120,10 +121,12 @@ export const loader = async ({ request, params }: LoaderFunctionArgs) => { export function SpanView({ runParam, spanId, + spanOverrides, closePanel, }: { runParam: string; spanId: string | undefined; + spanOverrides?: SpanOverride; closePanel?: () => void; }) { const organization = useOrganization(); @@ -174,17 +177,26 @@ export function SpanView({ ); } case "span": { - return ; + return ( + + ); } } } function SpanBody({ span, + spanOverrides, runParam, closePanel, }: { span: Span; + spanOverrides?: SpanOverride; runParam?: string; closePanel?: () => void; }) { @@ -198,6 +210,8 @@ function SpanBody({ tab = "overview"; } + span = applySpanOverrides(span, spanOverrides); + return (
@@ -232,88 +246,47 @@ function SpanBody({ > Overview - { - replace({ tab: "detail" }); - }} - shortcut={{ key: "d" }} - > - Detail -
- {tab === "detail" ? ( -
- - - Status - - - - - - Task - - - {span.taskSlug} - - } - content={`Filter runs by ${span.taskSlug}`} - /> - - - {span.idempotencyKey && ( - - Idempotency key - {span.idempotencyKey} - - )} - - Version - - {span.workerVersion ? ( - span.workerVersion - ) : ( - - Never started - - - )} - - - -
- ) : ( - - )} +
); } +function applySpanOverrides(span: Span, spanOverrides?: SpanOverride): Span { + if (!spanOverrides) { + return span; + } + + const newSpan = { ...span }; + + if (spanOverrides.isCancelled) { + newSpan.isCancelled = true; + newSpan.isPartial = false; + newSpan.isError = false; + } else if (spanOverrides.isError) { + newSpan.isError = true; + newSpan.isPartial = false; + newSpan.isCancelled = false; + } + + if (typeof spanOverrides.duration !== "undefined") { + newSpan.duration = spanOverrides.duration; + } + + if (spanOverrides.events) { + if (newSpan.events) { + newSpan.events = [...newSpan.events, ...spanOverrides.events]; + } else { + newSpan.events = spanOverrides.events; + } + } + + return newSpan; +} + function RunBody({ run, runParam, @@ -1081,7 +1054,7 @@ function SpanEntity({ span }: { span: Span }) { {run.taskIdentifier} - {run.lockedToVersion?.version ?? "–"} + {run.taskVersion ?? "–"} diff --git a/apps/webapp/app/routes/resources.runs.$runParam.logs.download.ts b/apps/webapp/app/routes/resources.runs.$runParam.logs.download.ts index 9bfe5dc9e2..f3f21fc15b 100644 --- a/apps/webapp/app/routes/resources.runs.$runParam.logs.download.ts +++ b/apps/webapp/app/routes/resources.runs.$runParam.logs.download.ts @@ -2,13 +2,13 @@ import { LoaderFunctionArgs } from "@remix-run/server-runtime"; import { prisma } from "~/db.server"; import { requireUser } from "~/services/session.server"; import { v3RunParamsSchema } from "~/utils/pathBuilder"; -import { RunPreparedEvent, eventRepository } from "~/v3/eventRepository.server"; +import type { RunPreparedEvent } from "~/v3/eventRepository/eventRepository.types"; import { createGzip } from "zlib"; import { Readable } from "stream"; import { formatDurationMilliseconds } from "@trigger.dev/core/v3/utils/durations"; -import { getDateFromNanoseconds } from "~/utils/taskEvent"; import { getTaskEventStoreTableForRun } from "~/v3/taskEventStore.server"; import { TaskEventKind } from "@trigger.dev/database"; +import { resolveEventRepositoryForStore } from "~/v3/eventRepository/index.server"; export async function loader({ params, request }: LoaderFunctionArgs) { const user = await requireUser(request); @@ -33,8 +33,12 @@ export async function loader({ params, request }: LoaderFunctionArgs) { return new Response("Not found", { status: 404 }); } + const eventRepository = resolveEventRepositoryForStore(run.taskEventStore); + const runEvents = await eventRepository.getRunEvents( getTaskEventStoreTableForRun(run), + run.runtimeEnvironmentId, + run.traceId, run.friendlyId, run.createdAt, run.completedAt ?? undefined @@ -117,3 +121,7 @@ function formatRunEvent(event: RunPreparedEvent): string { return entries.join("\n"); } + +function getDateFromNanoseconds(nanoseconds: bigint) { + return new Date(Number(nanoseconds) / 1_000_000); +} diff --git a/apps/webapp/app/runEngine/concerns/traceEvents.server.ts b/apps/webapp/app/runEngine/concerns/traceEvents.server.ts index 2aafb068c0..7d880a5e57 100644 --- a/apps/webapp/app/runEngine/concerns/traceEvents.server.ts +++ b/apps/webapp/app/runEngine/concerns/traceEvents.server.ts @@ -1,41 +1,53 @@ -import { EventRepository } from "~/v3/eventRepository.server"; +import { EventRepository } from "~/v3/eventRepository/eventRepository.server"; import { TracedEventSpan, TraceEventConcern, TriggerTaskRequest } from "../types"; import { SemanticInternalAttributes } from "@trigger.dev/core/v3/semanticInternalAttributes"; -import { BatchId } from "@trigger.dev/core/v3/isomorphic"; import { TaskRun } from "@trigger.dev/database"; +import { getTaskEventStore } from "~/v3/taskEventStore.server"; +import { ClickhouseEventRepository } from "~/v3/eventRepository/clickhouseEventRepository.server"; +import { IEventRepository } from "~/v3/eventRepository/eventRepository.types"; +import { FEATURE_FLAG, flags } from "~/v3/featureFlags.server"; +import { env } from "~/env.server"; +import { getEventRepository } from "~/v3/eventRepository/index.server"; export class DefaultTraceEventsConcern implements TraceEventConcern { private readonly eventRepository: EventRepository; + private readonly clickhouseEventRepository: ClickhouseEventRepository; - constructor(eventRepository: EventRepository) { + constructor( + eventRepository: EventRepository, + clickhouseEventRepository: ClickhouseEventRepository + ) { this.eventRepository = eventRepository; + this.clickhouseEventRepository = clickhouseEventRepository; + } + + async #getEventRepository( + request: TriggerTaskRequest + ): Promise<{ repository: IEventRepository; store: string }> { + return await getEventRepository( + request.environment.organization.featureFlags as Record + ); } async traceRun( request: TriggerTaskRequest, - callback: (span: TracedEventSpan) => Promise + callback: (span: TracedEventSpan, store: string) => Promise ): Promise { - return await this.eventRepository.traceEvent( + const { repository, store } = await this.#getEventRepository(request); + + return await repository.traceEvent( request.taskId, { context: request.options?.traceContext, spanParentAsLink: request.options?.spanParentAsLink, - parentAsLinkType: request.options?.parentAsLinkType, kind: "SERVER", environment: request.environment, taskSlug: request.taskId, attributes: { - properties: { - [SemanticInternalAttributes.SHOW_ACTIONS]: true, - }, + properties: {}, style: { icon: request.options?.customIcon ?? "task", }, - runIsTest: request.body.options?.test ?? false, - batchId: request.options?.batchId - ? BatchId.toFriendlyId(request.options.batchId) - : undefined, - idempotencyKey: request.options?.idempotencyKey, }, incomplete: true, immediate: true, @@ -44,14 +56,17 @@ export class DefaultTraceEventsConcern implements TraceEventConcern { : undefined, }, async (event, traceContext, traceparent) => { - return await callback({ - traceId: event.traceId, - spanId: event.spanId, - traceContext, - traceparent, - setAttribute: (key, value) => event.setAttribute(key as any, value), - failWithError: event.failWithError.bind(event), - }); + return await callback( + { + traceId: event.traceId, + spanId: event.spanId, + traceContext, + traceparent, + setAttribute: (key, value) => event.setAttribute(key as any, value), + failWithError: event.failWithError.bind(event), + }, + store + ); } ); } @@ -64,32 +79,26 @@ export class DefaultTraceEventsConcern implements TraceEventConcern { incomplete: boolean; isError: boolean; }, - callback: (span: TracedEventSpan) => Promise + callback: (span: TracedEventSpan, store: string) => Promise ): Promise { const { existingRun, idempotencyKey, incomplete, isError } = options; + const { repository, store } = await this.#getEventRepository(request); - return await this.eventRepository.traceEvent( + return await repository.traceEvent( `${request.taskId} (cached)`, { context: request.options?.traceContext, spanParentAsLink: request.options?.spanParentAsLink, - parentAsLinkType: request.options?.parentAsLinkType, kind: "SERVER", environment: request.environment, taskSlug: request.taskId, attributes: { properties: { - [SemanticInternalAttributes.SHOW_ACTIONS]: true, [SemanticInternalAttributes.ORIGINAL_RUN_ID]: existingRun.friendlyId, }, style: { icon: "task-cached", }, - runIsTest: request.body.options?.test ?? false, - batchId: request.options?.batchId - ? BatchId.toFriendlyId(request.options.batchId) - : undefined, - idempotencyKey, runId: existingRun.friendlyId, }, incomplete, @@ -111,14 +120,17 @@ export class DefaultTraceEventsConcern implements TraceEventConcern { } ); - return await callback({ - traceId: event.traceId, - spanId: event.spanId, - traceContext, - traceparent, - setAttribute: (key, value) => event.setAttribute(key as any, value), - failWithError: event.failWithError.bind(event), - }); + return await callback( + { + traceId: event.traceId, + spanId: event.spanId, + traceContext, + traceparent, + setAttribute: (key, value) => event.setAttribute(key as any, value), + failWithError: event.failWithError.bind(event), + }, + store + ); } ); } diff --git a/apps/webapp/app/runEngine/services/triggerTask.server.ts b/apps/webapp/app/runEngine/services/triggerTask.server.ts index 1b598e592b..4916e237bb 100644 --- a/apps/webapp/app/runEngine/services/triggerTask.server.ts +++ b/apps/webapp/app/runEngine/services/triggerTask.server.ts @@ -29,7 +29,6 @@ import type { TriggerTaskServiceOptions, TriggerTaskServiceResult, } from "../../v3/services/triggerTask.server"; -import { getTaskEventStore } from "../../v3/taskEventStore.server"; import { clampMaxDuration } from "../../v3/utils/maxDuration"; import { IdempotencyKeyConcern } from "../concerns/idempotencyKeys.server"; import type { @@ -267,7 +266,7 @@ export class RunEngineTriggerTaskService { const workerQueue = await this.queueConcern.getWorkerQueue(environment, body.options?.region); try { - return await this.traceEventConcern.traceRun(triggerRequest, async (event) => { + return await this.traceEventConcern.traceRun(triggerRequest, async (event, store) => { const result = await this.runNumberIncrementer.incrementRunNumber( triggerRequest, async (num) => { @@ -311,7 +310,7 @@ export class RunEngineTriggerTaskService { delayUntil, queuedAt: delayUntil ? undefined : new Date(), maxAttempts: body.options?.maxAttempts, - taskEventStore: getTaskEventStore(), + taskEventStore: store, ttl, tags, oneTimeUseToken: options.oneTimeUseToken, diff --git a/apps/webapp/app/runEngine/types.ts b/apps/webapp/app/runEngine/types.ts index 10dcbd7a3d..2324edc6b8 100644 --- a/apps/webapp/app/runEngine/types.ts +++ b/apps/webapp/app/runEngine/types.ts @@ -143,7 +143,7 @@ export type TracedEventSpan = { export interface TraceEventConcern { traceRun( request: TriggerTaskRequest, - callback: (span: TracedEventSpan) => Promise + callback: (span: TracedEventSpan, store: string) => Promise ): Promise; traceIdempotentRun( request: TriggerTaskRequest, @@ -153,7 +153,7 @@ export interface TraceEventConcern { incomplete: boolean; isError: boolean; }, - callback: (span: TracedEventSpan) => Promise + callback: (span: TracedEventSpan, store: string) => Promise ): Promise; } diff --git a/apps/webapp/app/services/apiRateLimit.server.ts b/apps/webapp/app/services/apiRateLimit.server.ts index 611a19fb3e..416d7834ec 100644 --- a/apps/webapp/app/services/apiRateLimit.server.ts +++ b/apps/webapp/app/services/apiRateLimit.server.ts @@ -22,6 +22,7 @@ export const apiRateLimiter = authorizationRateLimitMiddleware({ limiterCache: { fresh: 60_000 * 10, // Data is fresh for 10 minutes stale: 60_000 * 20, // Date is stale after 20 minutes + maxItems: 1000, }, limiterConfigOverride: async (authorizationValue) => { const authenticatedEnv = await authenticateAuthorizationHeader(authorizationValue, { diff --git a/apps/webapp/app/services/authorizationRateLimitMiddleware.server.ts b/apps/webapp/app/services/authorizationRateLimitMiddleware.server.ts index 0ca4b80afd..b94a664a36 100644 --- a/apps/webapp/app/services/authorizationRateLimitMiddleware.server.ts +++ b/apps/webapp/app/services/authorizationRateLimitMiddleware.server.ts @@ -2,14 +2,13 @@ import { createCache, DefaultStatefulContext, Namespace, Cache as UnkeyCache } f import { MemoryStore } from "@unkey/cache/stores"; import { Ratelimit } from "@upstash/ratelimit"; import { Request as ExpressRequest, Response as ExpressResponse, NextFunction } from "express"; -import { RedisOptions } from "ioredis"; import { createHash } from "node:crypto"; import { z } from "zod"; import { env } from "~/env.server"; +import { RedisWithClusterOptions } from "~/redis.server"; import { logger } from "./logger.server"; import { createRedisRateLimitClient, Duration, RateLimiter } from "./rateLimiter.server"; import { RedisCacheStore } from "./unkey/redisCacheStore.server"; -import { RedisWithClusterOptions } from "~/redis.server"; const DurationSchema = z.custom((value) => { if (typeof value !== "string") { @@ -64,6 +63,7 @@ type Options = { limiterCache?: { fresh: number; stale: number; + maxItems: number; }; log?: { requests?: boolean; @@ -145,7 +145,10 @@ export function authorizationRateLimitMiddleware({ limiterConfigOverride, }: Options) { const ctx = new DefaultStatefulContext(); - const memory = new MemoryStore({ persistentMap: new Map() }); + const memory = new MemoryStore({ + persistentMap: new Map(), + unstableEvictOnSet: { frequency: 0.001, maxItems: limiterCache?.maxItems ?? 1000 }, + }); const redisCacheStore = new RedisCacheStore({ connection: { keyPrefix: `cache:${keyPrefix}:rate-limit-cache:`, diff --git a/apps/webapp/app/services/engineRateLimit.server.ts b/apps/webapp/app/services/engineRateLimit.server.ts index 9eb20342f3..ba043c7fa3 100644 --- a/apps/webapp/app/services/engineRateLimit.server.ts +++ b/apps/webapp/app/services/engineRateLimit.server.ts @@ -1,5 +1,4 @@ import { env } from "~/env.server"; -import { authenticateAuthorizationHeader } from "./apiAuth.server"; import { authorizationRateLimitMiddleware } from "./authorizationRateLimitMiddleware.server"; import { Duration } from "./rateLimiter.server"; @@ -22,6 +21,7 @@ export const engineRateLimiter = authorizationRateLimitMiddleware({ limiterCache: { fresh: 60_000 * 10, // Data is fresh for 10 minutes stale: 60_000 * 20, // Date is stale after 20 minutes + maxItems: 1000, }, pathMatchers: [/^\/engine/], // Regex allow any path starting with /engine/v1/worker-actions/ diff --git a/apps/webapp/app/services/platform.v3.server.ts b/apps/webapp/app/services/platform.v3.server.ts index 1263b864e1..f964470625 100644 --- a/apps/webapp/app/services/platform.v3.server.ts +++ b/apps/webapp/app/services/platform.v3.server.ts @@ -44,7 +44,13 @@ const client = singleton("billingClient", initializeClient); function initializePlatformCache() { const ctx = new DefaultStatefulContext(); - const memory = new MemoryStore({ persistentMap: new Map() }); + const memory = new MemoryStore({ + persistentMap: new Map(), + unstableEvictOnSet: { + frequency: 0.01, + maxItems: 1000, + }, + }); const redisCacheStore = new RedisCacheStore({ connection: { keyPrefix: "tr:cache:platform:v3", diff --git a/apps/webapp/app/services/realtimeClient.server.ts b/apps/webapp/app/services/realtimeClient.server.ts index 68bccc9f6a..05fdfff54e 100644 --- a/apps/webapp/app/services/realtimeClient.server.ts +++ b/apps/webapp/app/services/realtimeClient.server.ts @@ -83,7 +83,10 @@ export class RealtimeClient { this.#registerCommands(); const ctx = new DefaultStatefulContext(); - const memory = new MemoryStore({ persistentMap: new Map() }); + const memory = new MemoryStore({ + persistentMap: new Map(), + unstableEvictOnSet: { frequency: 0.01, maxItems: 1000 }, + }); const redisCacheStore = new RedisCacheStore({ connection: { keyPrefix: "tr:cache:realtime", diff --git a/apps/webapp/app/services/requestIdempotency.server.ts b/apps/webapp/app/services/requestIdempotency.server.ts index 64e839fbca..cda697b968 100644 --- a/apps/webapp/app/services/requestIdempotency.server.ts +++ b/apps/webapp/app/services/requestIdempotency.server.ts @@ -33,7 +33,13 @@ export class RequestIdempotencyService { : "request-idempotency:"; const ctx = new DefaultStatefulContext(); - const memory = new MemoryStore({ persistentMap: new Map() }); + const memory = new MemoryStore({ + persistentMap: new Map(), + unstableEvictOnSet: { + frequency: 0.001, + maxItems: 1000, + }, + }); const redisCacheStore = new RedisCacheStore({ name: "request-idempotency", connection: { diff --git a/apps/webapp/app/services/resourceMonitor.server.ts b/apps/webapp/app/services/resourceMonitor.server.ts new file mode 100644 index 0000000000..9a8fee52eb --- /dev/null +++ b/apps/webapp/app/services/resourceMonitor.server.ts @@ -0,0 +1,11 @@ +import { ResourceMonitor } from "@trigger.dev/core/v3/serverOnly"; +import { singleton } from "~/utils/singleton"; + +export const resourceMonitor = singleton("resourceMonitor", initializeResourceMonitor); + +function initializeResourceMonitor() { + return new ResourceMonitor({ + ctx: {}, + verbose: false, + }); +} diff --git a/apps/webapp/app/utils/taskEvent.ts b/apps/webapp/app/utils/taskEvent.ts deleted file mode 100644 index 5fa2713e16..0000000000 --- a/apps/webapp/app/utils/taskEvent.ts +++ /dev/null @@ -1,572 +0,0 @@ -import { Attributes, Link } from "@opentelemetry/api"; -import { - correctErrorStackTrace, - ExceptionEventProperties, - isExceptionSpanEvent, - millisecondsToNanoseconds, - NULL_SENTINEL, - SemanticInternalAttributes, - SpanEvent, - SpanEvents, - SpanMessagingEvent, - TaskEventStyle, - unflattenAttributes, -} from "@trigger.dev/core/v3"; -import { Prisma, TaskEvent, TaskEventKind } from "@trigger.dev/database"; -import { createTreeFromFlatItems, flattenTree } from "~/components/primitives/TreeView/TreeView"; -import type { - PreparedEvent, - SpanLink, - SpanSummary, - TraceSummary, -} from "~/v3/eventRepository.server"; - -export type TraceSpan = NonNullable>; - -export function prepareTrace(events: TaskEvent[]): TraceSummary | undefined { - let preparedEvents: Array = []; - let rootSpanId: string | undefined; - const eventsBySpanId = new Map(); - - for (const event of events) { - preparedEvents.push(prepareEvent(event)); - - if (!rootSpanId && !event.parentId) { - rootSpanId = event.spanId; - } - } - - for (const event of preparedEvents) { - const existingEvent = eventsBySpanId.get(event.spanId); - - if (!existingEvent) { - eventsBySpanId.set(event.spanId, event); - continue; - } - - if (event.isCancelled || !event.isPartial) { - eventsBySpanId.set(event.spanId, event); - } - } - - preparedEvents = Array.from(eventsBySpanId.values()); - - const spansBySpanId = new Map(); - - const spans = preparedEvents.map((event) => { - const ancestorCancelled = isAncestorCancelled(eventsBySpanId, event.spanId); - const duration = calculateDurationIfAncestorIsCancelled( - eventsBySpanId, - event.spanId, - event.duration - ); - - const span = { - id: event.spanId, - parentId: event.parentId ?? undefined, - runId: event.runId, - data: { - message: event.message, - style: event.style, - duration, - isError: event.isError, - isPartial: ancestorCancelled ? false : event.isPartial, - isCancelled: event.isCancelled === true ? true : event.isPartial && ancestorCancelled, - startTime: getDateFromNanoseconds(event.startTime), - level: event.level, - events: event.events, - environmentType: event.environmentType, - isDebug: event.kind === TaskEventKind.LOG, - }, - } satisfies SpanSummary; - - spansBySpanId.set(event.spanId, span); - - return span; - }); - - if (!rootSpanId) { - return; - } - - const rootSpan = spansBySpanId.get(rootSpanId); - - if (!rootSpan) { - return; - } - - return { - rootSpan, - spans, - }; -} - -export function createTraceTreeFromEvents(traceSummary: TraceSummary, spanId: string) { - //this tree starts at the passed in span (hides parent elements if there are any) - const tree = createTreeFromFlatItems(traceSummary.spans, spanId); - - //we need the start offset for each item, and the total duration of the entire tree - const treeRootStartTimeMs = tree ? tree?.data.startTime.getTime() : 0; - let totalDuration = tree?.data.duration ?? 0; - const events = tree - ? flattenTree(tree).map((n) => { - const offset = millisecondsToNanoseconds(n.data.startTime.getTime() - treeRootStartTimeMs); - totalDuration = Math.max(totalDuration, offset + n.data.duration); - return { - ...n, - data: { - ...n.data, - //set partial nodes to null duration - duration: n.data.isPartial ? null : n.data.duration, - offset, - isRoot: n.id === traceSummary.rootSpan.id, - }, - }; - }) - : []; - - //total duration should be a minimum of 1ms - totalDuration = Math.max(totalDuration, millisecondsToNanoseconds(1)); - - let rootSpanStatus: "executing" | "completed" | "failed" = "executing"; - if (events[0]) { - if (events[0].data.isError) { - rootSpanStatus = "failed"; - } else if (!events[0].data.isPartial) { - rootSpanStatus = "completed"; - } - } - - return { - rootSpanStatus, - events: events, - parentRunFriendlyId: - tree?.id === traceSummary.rootSpan.id ? undefined : traceSummary.rootSpan.runId, - duration: totalDuration, - rootStartedAt: tree?.data.startTime, - }; -} - -export function createSpanFromEvents(events: TaskEvent[], spanId: string) { - const spanEvent = getSpanEvent(events, spanId); - - if (!spanEvent) { - return; - } - - const preparedEvent = prepareEvent(spanEvent); - const span = createSpanFromEvent(events, preparedEvent); - - const output = rehydrateJson(spanEvent.output); - const payload = rehydrateJson(spanEvent.payload); - - const show = rehydrateShow(spanEvent.properties); - - const properties = sanitizedAttributes(spanEvent.properties); - - const messagingEvent = SpanMessagingEvent.optional().safeParse((properties as any)?.messaging); - - const links: SpanLink[] = []; - - if (messagingEvent.success && messagingEvent.data) { - if (messagingEvent.data.message && "id" in messagingEvent.data.message) { - if (messagingEvent.data.message.id.startsWith("run_")) { - links.push({ - type: "run", - icon: "runs", - title: `Run ${messagingEvent.data.message.id}`, - runId: messagingEvent.data.message.id, - }); - } - } - } - - const backLinks = spanEvent.links as any as Link[] | undefined; - - if (backLinks && backLinks.length > 0) { - backLinks.forEach((l) => { - const title = String(l.attributes?.[SemanticInternalAttributes.LINK_TITLE] ?? "Triggered by"); - - links.push({ - type: "span", - icon: "trigger", - title, - traceId: l.context.traceId, - spanId: l.context.spanId, - }); - }); - } - - const spanEvents = transformEvents( - preparedEvent.events, - spanEvent.metadata as Attributes, - spanEvent.environmentType === "DEVELOPMENT" - ); - - return { - ...spanEvent, - ...span.data, - payload, - output, - events: spanEvents, - show, - links, - properties: properties ? JSON.stringify(properties, null, 2) : undefined, - showActionBar: show?.actions === true, - }; -} - -export function createSpanFromEvent(events: TaskEvent[], event: PreparedEvent) { - let ancestorCancelled = false; - let duration = event.duration; - - if (!event.isCancelled && event.isPartial) { - walkSpanAncestors(events, event, (ancestorEvent, level) => { - if (level >= 8) { - return { stop: true }; - } - - if (ancestorEvent.isCancelled) { - ancestorCancelled = true; - - // We need to get the cancellation time from the cancellation span event - const cancellationEvent = ancestorEvent.events.find( - (event) => event.name === "cancellation" - ); - - if (cancellationEvent) { - duration = calculateDurationFromStart(event.startTime, cancellationEvent.time); - } - - return { stop: true }; - } - - return { stop: false }; - }); - } - - const span = { - id: event.spanId, - parentId: event.parentId ?? undefined, - runId: event.runId, - idempotencyKey: event.idempotencyKey, - data: { - message: event.message, - style: event.style, - duration, - isError: event.isError, - isPartial: ancestorCancelled ? false : event.isPartial, - isCancelled: event.isCancelled === true ? true : event.isPartial && ancestorCancelled, - startTime: getDateFromNanoseconds(event.startTime), - level: event.level, - events: event.events, - environmentType: event.environmentType, - }, - }; - - return span; -} - -function walkSpanAncestors( - events: TaskEvent[], - event: PreparedEvent, - callback: (event: PreparedEvent, level: number) => { stop: boolean } -) { - const parentId = event.parentId; - if (!parentId) { - return; - } - - let parentEvent = getSpanEvent(events, parentId); - let level = 1; - - while (parentEvent) { - const preparedParentEvent = prepareEvent(parentEvent); - - const result = callback(preparedParentEvent, level); - - if (result.stop) { - return; - } - - if (!preparedParentEvent.parentId) { - return; - } - - parentEvent = getSpanEvent(events, preparedParentEvent.parentId); - - level++; - } -} - -function getSpanEvent(events: TaskEvent[], spanId: string) { - const spans = events.filter((e) => e.spanId === spanId); - const completedSpan = spans.find((s) => !s.isPartial); - - if (completedSpan) { - return completedSpan; - } - - return spans.at(0); -} - -export function prepareEvent(event: TaskEvent): PreparedEvent { - return { - ...event, - duration: Number(event.duration), - events: parseEventsField(event.events), - style: parseStyleField(event.style), - }; -} - -function parseEventsField(events: Prisma.JsonValue): SpanEvents { - const unsafe = events - ? (events as any[]).map((e) => ({ - ...e, - properties: unflattenAttributes(e.properties as Attributes), - })) - : undefined; - - return unsafe as SpanEvents; -} - -function parseStyleField(style: Prisma.JsonValue): TaskEventStyle { - const unsafe = unflattenAttributes(style as Attributes); - - if (!unsafe) { - return {}; - } - - if (typeof unsafe === "object") { - return Object.assign( - { - icon: undefined, - variant: undefined, - }, - unsafe - ) as TaskEventStyle; - } - - return {}; -} - -export function isAncestorCancelled(events: Map, spanId: string) { - const event = events.get(spanId); - - if (!event) { - return false; - } - - if (event.isCancelled) { - return true; - } - - if (event.parentId) { - return isAncestorCancelled(events, event.parentId); - } - - return false; -} - -function calculateDurationIfAncestorIsCancelled( - events: Map, - spanId: string, - defaultDuration: number -) { - const event = events.get(spanId); - - if (!event) { - return defaultDuration; - } - - if (event.isCancelled) { - return defaultDuration; - } - - if (!event.isPartial) { - return defaultDuration; - } - - if (event.parentId) { - const cancelledAncestor = findFirstCancelledAncestor(events, event.parentId); - - if (cancelledAncestor) { - // We need to get the cancellation time from the cancellation span event - const cancellationEvent = cancelledAncestor.events.find( - (event) => event.name === "cancellation" - ); - - if (cancellationEvent) { - return calculateDurationFromStart(event.startTime, cancellationEvent.time); - } - } - } - - return defaultDuration; -} - -export function calculateDurationFromStart(startTime: bigint, endTime: Date = new Date()) { - const $endtime = typeof endTime === "string" ? new Date(endTime) : endTime; - - return Number(BigInt($endtime.getTime() * 1_000_000) - startTime); -} - -function findFirstCancelledAncestor(events: Map, spanId: string) { - const event = events.get(spanId); - - if (!event) { - return; - } - - if (event.isCancelled) { - return event; - } - - if (event.parentId) { - return findFirstCancelledAncestor(events, event.parentId); - } - - return; -} - -export function getDateFromNanoseconds(nanoseconds: bigint) { - return new Date(Number(nanoseconds) / 1_000_000); -} - -export function getNowInNanoseconds(): bigint { - return BigInt(new Date().getTime() * 1_000_000); -} - -export function rehydrateJson(json: Prisma.JsonValue): any { - if (json === null) { - return undefined; - } - - if (json === NULL_SENTINEL) { - return null; - } - - if (typeof json === "string") { - return json; - } - - if (typeof json === "number") { - return json; - } - - if (typeof json === "boolean") { - return json; - } - - if (Array.isArray(json)) { - return json.map((item) => rehydrateJson(item)); - } - - if (typeof json === "object") { - return unflattenAttributes(json as Attributes); - } - - return null; -} - -export function rehydrateShow(properties: Prisma.JsonValue): { actions?: boolean } | undefined { - if (properties === null || properties === undefined) { - return; - } - - if (typeof properties !== "object") { - return; - } - - if (Array.isArray(properties)) { - return; - } - - const actions = properties[SemanticInternalAttributes.SHOW_ACTIONS]; - - if (typeof actions === "boolean") { - return { actions }; - } - - return; -} - -export function sanitizedAttributes(json: Prisma.JsonValue) { - if (json === null || json === undefined) { - return; - } - - const withoutPrivateProperties = removePrivateProperties(json as Attributes); - if (!withoutPrivateProperties) { - return; - } - - return unflattenAttributes(withoutPrivateProperties); -} -// removes keys that start with a $ sign. If there are no keys left, return undefined -function removePrivateProperties( - attributes: Attributes | undefined | null -): Attributes | undefined { - if (!attributes) { - return undefined; - } - - const result: Attributes = {}; - - for (const [key, value] of Object.entries(attributes)) { - if (key.startsWith("$")) { - continue; - } - - result[key] = value; - } - - if (Object.keys(result).length === 0) { - return undefined; - } - - return result; -} - -export function transformEvents( - events: SpanEvents, - properties: Attributes, - isDev: boolean -): SpanEvents { - return (events ?? []).map((event) => transformEvent(event, properties, isDev)); -} - -function transformEvent(event: SpanEvent, properties: Attributes, isDev: boolean): SpanEvent { - if (isExceptionSpanEvent(event)) { - return { - ...event, - properties: { - exception: transformException(event.properties.exception, properties, isDev), - }, - }; - } - - return event; -} - -function transformException( - exception: ExceptionEventProperties, - properties: Attributes, - isDev: boolean -): ExceptionEventProperties { - const projectDirAttributeValue = properties[SemanticInternalAttributes.PROJECT_DIR]; - - if (projectDirAttributeValue !== undefined && typeof projectDirAttributeValue !== "string") { - return exception; - } - - return { - ...exception, - stacktrace: exception.stacktrace - ? correctErrorStackTrace(exception.stacktrace, projectDirAttributeValue, { - removeFirstLine: true, - isDev, - }) - : undefined, - }; -} diff --git a/apps/webapp/app/v3/dynamicFlushScheduler.server.ts b/apps/webapp/app/v3/dynamicFlushScheduler.server.ts index 30c508d037..946bc19366 100644 --- a/apps/webapp/app/v3/dynamicFlushScheduler.server.ts +++ b/apps/webapp/app/v3/dynamicFlushScheduler.server.ts @@ -1,4 +1,5 @@ import { Logger } from "@trigger.dev/core/logger"; +import { tryCatch } from "@trigger.dev/core/utils"; import { nanoid } from "nanoid"; import pLimit from "p-limit"; import { signalsEmitter } from "~/services/signals.server"; @@ -195,55 +196,72 @@ export class DynamicFlushScheduler { // Schedule all batches for concurrent processing const flushPromises = batchesToFlush.map((batch) => this.limiter(async () => { - const flushId = nanoid(); const itemCount = batch.length; - try { - const startTime = Date.now(); - await this.callback(flushId, batch); - - const duration = Date.now() - startTime; - this.totalQueuedItems -= itemCount; - this.consecutiveFlushFailures = 0; - this.lastFlushTime = Date.now(); - this.metrics.flushedBatches++; - this.metrics.totalItemsFlushed += itemCount; - - this.logger.debug("Batch flushed successfully", { - flushId, - itemCount, - duration, - remainingQueueDepth: this.totalQueuedItems, - activeConcurrency: this.limiter.activeCount, - pendingConcurrency: this.limiter.pendingCount, - }); - } catch (error) { - this.consecutiveFlushFailures++; - this.metrics.failedBatches++; + const self = this; + + async function tryFlush(flushId: string, batchToFlush: T[], attempt: number = 1) { + try { + const startTime = Date.now(); + await self.callback(flushId, batchToFlush); + + const duration = Date.now() - startTime; + self.totalQueuedItems -= itemCount; + self.consecutiveFlushFailures = 0; + self.lastFlushTime = Date.now(); + self.metrics.flushedBatches++; + self.metrics.totalItemsFlushed += itemCount; + + self.logger.debug("Batch flushed successfully", { + flushId, + itemCount, + duration, + remainingQueueDepth: self.totalQueuedItems, + activeConcurrency: self.limiter.activeCount, + pendingConcurrency: self.limiter.pendingCount, + }); + } catch (error) { + self.consecutiveFlushFailures++; + self.metrics.failedBatches++; + + self.logger.error("Error attempting to flush batch", { + flushId, + itemCount, + error, + consecutiveFailures: self.consecutiveFlushFailures, + attempt, + }); + + // Back off on failures + if (self.consecutiveFlushFailures > 5) { + self.adjustConcurrency(true); + } + + if (attempt <= 3) { + await new Promise((resolve) => setTimeout(resolve, 500)); + return await tryFlush(flushId, batchToFlush, attempt + 1); + } else { + throw error; + } + } + } + + const [flushError] = await tryCatch(tryFlush(nanoid(), batch)); + if (flushError) { this.logger.error("Error flushing batch", { - flushId, - itemCount, - error, - consecutiveFailures: this.consecutiveFlushFailures, + error: flushError, }); - - // Re-queue the batch at the front if it fails - this.batchQueue.unshift(batch); - this.totalQueuedItems += itemCount; - - // Back off on failures - if (this.consecutiveFlushFailures > 3) { - this.adjustConcurrency(true); - } } }) ); // Don't await here - let them run concurrently Promise.allSettled(flushPromises).then(() => { + const shouldContinueFlushing = + this.batchQueue.length > 0 && (this.consecutiveFlushFailures < 3 || this.isShuttingDown); // After flush completes, check if we need to flush more - if (this.batchQueue.length > 0) { + if (shouldContinueFlushing) { this.flushBatches(); } }); diff --git a/apps/webapp/app/v3/eventRepository/clickhouseEventRepository.server.ts b/apps/webapp/app/v3/eventRepository/clickhouseEventRepository.server.ts new file mode 100644 index 0000000000..93d801c897 --- /dev/null +++ b/apps/webapp/app/v3/eventRepository/clickhouseEventRepository.server.ts @@ -0,0 +1,1805 @@ +import type { + ClickHouse, + TaskEventDetailedSummaryV1Result, + TaskEventDetailsV1Result, + TaskEventSummaryV1Result, + TaskEventV1Input, +} from "@internal/clickhouse"; +import { Attributes, startSpan, trace, Tracer } from "@internal/tracing"; +import { createJsonErrorObject } from "@trigger.dev/core/v3/errors"; +import { serializeTraceparent } from "@trigger.dev/core/v3/isomorphic"; +import { + AttemptFailedSpanEvent, + CancellationSpanEvent, + ExceptionSpanEvent, + isAttemptFailedSpanEvent, + isCancellationSpanEvent, + isExceptionSpanEvent, + OtherSpanEvent, + PRIMARY_VARIANT, + SpanEvents, + TaskEventStyle, + TaskRunError, +} from "@trigger.dev/core/v3/schemas"; +import { SemanticInternalAttributes } from "@trigger.dev/core/v3/semanticInternalAttributes"; +import { unflattenAttributes } from "@trigger.dev/core/v3/utils/flattenAttributes"; +import { TaskEventLevel } from "@trigger.dev/database"; +import { logger } from "~/services/logger.server"; +import { DynamicFlushScheduler } from "../dynamicFlushScheduler.server"; +import { tracePubSub } from "../services/tracePubSub.server"; +import type { TaskEventStoreTable } from "../taskEventStore.server"; +import { + calculateDurationFromStart, + calculateDurationFromStartJsDate, + convertDateToNanoseconds, + createExceptionPropertiesFromError, + extractContextFromCarrier, + generateDeterministicSpanId, + generateSpanId, + generateTraceId, + getNowInNanoseconds, + parseEventsField, + removePrivateProperties, + isEmptyObject, +} from "./common.server"; +import type { + CompleteableTaskRun, + CreateEventInput, + EventBuilder, + IEventRepository, + RunPreparedEvent, + SpanDetail, + SpanDetailedSummary, + SpanOverride, + SpanSummary, + SpanSummaryCommon, + TraceAttributes, + TraceDetailedSummary, + TraceEventOptions, + TraceSummary, +} from "./eventRepository.types"; +import { originalRunIdCache } from "./originalRunIdCache.server"; + +export type ClickhouseEventRepositoryConfig = { + clickhouse: ClickHouse; + batchSize?: number; + flushInterval?: number; + tracer?: Tracer; + maximumTraceSummaryViewCount?: number; + maximumTraceDetailedSummaryViewCount?: number; + maximumLiveReloadingSetting?: number; +}; + +/** + * ClickHouse-based implementation of the EventRepository. + * This implementation stores events in ClickHouse for better analytics and performance. + */ +export class ClickhouseEventRepository implements IEventRepository { + private _clickhouse: ClickHouse; + private _config: ClickhouseEventRepositoryConfig; + private readonly _flushScheduler: DynamicFlushScheduler; + private _tracer: Tracer; + + constructor(config: ClickhouseEventRepositoryConfig) { + this._clickhouse = config.clickhouse; + this._config = config; + this._tracer = config.tracer ?? trace.getTracer("clickhouseEventRepo", "0.0.1"); + + this._flushScheduler = new DynamicFlushScheduler({ + batchSize: config.batchSize ?? 1000, + flushInterval: config.flushInterval ?? 1000, + callback: this.#flushBatch.bind(this), + minConcurrency: 1, + maxConcurrency: 10, + maxBatchSize: 10000, + memoryPressureThreshold: 10000, + loadSheddingThreshold: 10000, + loadSheddingEnabled: false, + isDroppableEvent: (event: TaskEventV1Input) => { + // Only drop LOG events during load shedding + return event.kind === "DEBUG_EVENT"; + }, + }); + } + + get maximumLiveReloadingSetting() { + return this._config.maximumLiveReloadingSetting ?? 1000; + } + + async #flushBatch(flushId: string, events: TaskEventV1Input[]) { + await startSpan(this._tracer, "flushBatch", async (span) => { + span.setAttribute("flush_id", flushId); + span.setAttribute("event_count", events.length); + + const [insertError, insertResult] = await this._clickhouse.taskEvents.insert(events); + + if (insertError) { + throw insertError; + } + + logger.info("ClickhouseEventRepository.flushBatch Inserted batch into clickhouse", { + events: events.length, + insertResult, + }); + + this.#publishToRedis(events); + }); + } + + async #publishToRedis(events: TaskEventV1Input[]) { + if (events.length === 0) return; + await tracePubSub.publish(events.map((e) => e.trace_id)); + } + + async insertMany(events: CreateEventInput[]): Promise { + this.addToBatch(events.flatMap((event) => this.createEventToTaskEventV1Input(event))); + } + + async insertManyImmediate(events: CreateEventInput[]): Promise { + this.insertMany(events); + } + + private createEventToTaskEventV1Input(event: CreateEventInput): TaskEventV1Input[] { + return [ + { + environment_id: event.environmentId, + organization_id: event.organizationId, + project_id: event.projectId, + task_identifier: event.taskSlug, + run_id: event.runId, + start_time: event.startTime.toString(), + duration: (event.duration ?? 0).toString(), + trace_id: event.traceId, + span_id: event.spanId, + parent_span_id: event.parentId ?? "", + message: event.message, + kind: this.createEventToTaskEventV1InputKind(event), + status: this.createEventToTaskEventV1InputStatus(event), + attributes: this.createEventToTaskEventV1InputAttributes(event.properties), + metadata: this.createEventToTaskEventV1InputMetadata(event), + expires_at: convertDateToClickhouseDateTime( + new Date(Date.now() + 365 * 24 * 60 * 60 * 1000) // 1 year + ), + }, + ...this.spanEventsToTaskEventV1Input(event), + ]; + } + + private spanEventsToTaskEventV1Input(event: CreateEventInput): TaskEventV1Input[] { + if (!event.events) return []; + + const spanEvents = parseEventsField(event.events); + + const records = spanEvents.map((e) => this.createTaskEventV1InputFromSpanEvent(e, event)); + + if (event.isPartial) { + return records; + } + + // Only return events where the event start_time is greater than the span start_time + return records.filter((r) => BigInt(r.start_time) > BigInt(event.startTime)); + } + + private createTaskEventV1InputFromSpanEvent( + spanEvent: SpanEvents[number], + event: CreateEventInput + ): TaskEventV1Input { + if (isExceptionSpanEvent(spanEvent)) { + return this.createTaskEventV1InputFromExceptionEvent(spanEvent, event); + } + + if (isCancellationSpanEvent(spanEvent)) { + return this.createTaskEventV1InputFromCancellationEvent(spanEvent, event); + } + + if (isAttemptFailedSpanEvent(spanEvent)) { + return this.createTaskEventV1InputFromAttemptFailedEvent(spanEvent, event); + } + + return this.createTaskEventV1InputFromOtherEvent(spanEvent, event); + } + + private createTaskEventV1InputFromExceptionEvent( + spanEvent: ExceptionSpanEvent, + event: CreateEventInput + ): TaskEventV1Input { + return { + environment_id: event.environmentId, + organization_id: event.organizationId, + project_id: event.projectId, + task_identifier: event.taskSlug, + run_id: event.runId, + start_time: convertDateToNanoseconds(spanEvent.time).toString(), + duration: "0", // Events have no duration + trace_id: event.traceId, + span_id: event.spanId, + parent_span_id: event.parentId ?? "", + message: spanEvent.name, + kind: "SPAN_EVENT", + status: "ERROR", + attributes: { + error: { + message: spanEvent.properties.exception.message, + name: spanEvent.properties.exception.type, + stackTrace: spanEvent.properties.exception.stacktrace, + }, + }, + metadata: JSON.stringify({ + exception: spanEvent.properties.exception, + }), // Events have no metadata + expires_at: convertDateToClickhouseDateTime( + new Date(Date.now() + 365 * 24 * 60 * 60 * 1000) // 1 year + ), + }; + } + + private createTaskEventV1InputFromCancellationEvent( + spanEvent: CancellationSpanEvent, + event: CreateEventInput + ): TaskEventV1Input { + return { + environment_id: event.environmentId, + organization_id: event.organizationId, + project_id: event.projectId, + task_identifier: event.taskSlug, + run_id: event.runId, + start_time: convertDateToNanoseconds(spanEvent.time).toString(), + duration: "0", // Events have no duration + trace_id: event.traceId, + span_id: event.spanId, + parent_span_id: event.parentId ?? "", + message: spanEvent.name, + kind: "SPAN_EVENT", + status: "CANCELLED", + attributes: {}, + metadata: JSON.stringify({ + reason: spanEvent.properties.reason, + }), // Events have no metadata + expires_at: convertDateToClickhouseDateTime( + new Date(Date.now() + 365 * 24 * 60 * 60 * 1000) // 1 year + ), + }; + } + + private createTaskEventV1InputFromAttemptFailedEvent( + spanEvent: AttemptFailedSpanEvent, + event: CreateEventInput + ): TaskEventV1Input { + return { + environment_id: event.environmentId, + organization_id: event.organizationId, + project_id: event.projectId, + task_identifier: event.taskSlug, + run_id: event.runId, + start_time: convertDateToNanoseconds(spanEvent.time).toString(), + duration: "0", // Events have no duration + trace_id: event.traceId, + span_id: event.spanId, + parent_span_id: event.parentId ?? "", + message: spanEvent.name, + kind: "ANCESTOR_OVERRIDE", + status: "OK", + attributes: { + error: { + message: spanEvent.properties.exception.message, + name: spanEvent.properties.exception.type, + stackTrace: spanEvent.properties.exception.stacktrace, + }, + }, + metadata: JSON.stringify(spanEvent.properties), + expires_at: convertDateToClickhouseDateTime( + new Date(Date.now() + 365 * 24 * 60 * 60 * 1000) // 1 year + ), + }; + } + + private createTaskEventV1InputFromOtherEvent( + spanEvent: OtherSpanEvent, + event: CreateEventInput + ): TaskEventV1Input { + return { + environment_id: event.environmentId, + organization_id: event.organizationId, + project_id: event.projectId, + task_identifier: event.taskSlug, + run_id: event.runId, + start_time: convertDateToNanoseconds(spanEvent.time).toString(), + duration: "0", // Events have no duration + trace_id: event.traceId, + span_id: event.spanId, + parent_span_id: event.parentId ?? "", + message: spanEvent.name, + kind: "SPAN_EVENT", + status: "OK", + attributes: {}, + metadata: JSON.stringify(unflattenAttributes(spanEvent.properties as Attributes)), + expires_at: convertDateToClickhouseDateTime( + new Date(Date.now() + 365 * 24 * 60 * 60 * 1000) // 1 year + ), + }; + } + + private createEventToTaskEventV1InputKind(event: CreateEventInput): string { + if (event.kind === "UNSPECIFIED") { + return "ANCESTOR_OVERRIDE"; + } + + if (event.level === "TRACE") { + return "SPAN"; + } + + if (event.isDebug) { + return "DEBUG_EVENT"; + } + + return `LOG_${(event.level ?? "LOG").toUpperCase()}`; + } + + private createEventToTaskEventV1InputStatus(event: CreateEventInput): string { + if (event.isPartial) { + return "PARTIAL"; + } + + if (event.isError) { + return "ERROR"; + } + + if (event.isCancelled) { + return "CANCELLED"; + } + + return "OK"; + } + + private createEventToTaskEventV1InputAttributes(attributes: Attributes): Record { + const publicAttributes = removePrivateProperties(attributes); + + if (!publicAttributes) { + return {}; + } + + const unflattenedAttributes = unflattenAttributes(publicAttributes); + + if (unflattenedAttributes && typeof unflattenedAttributes === "object") { + return { + ...unflattenedAttributes, + }; + } + + return {}; + } + + private createEventToTaskEventV1InputMetadata(event: CreateEventInput): string { + return JSON.stringify({ + style: event.style ? unflattenAttributes(event.style) : undefined, + attemptNumber: event.attemptNumber, + entity: this.extractEntityFromAttributes(event.properties), + }); + } + + private extractEntityFromAttributes( + attributes: Attributes + ): { entityType: string; entityId?: string } | undefined { + if (!attributes || typeof attributes !== "object") { + return undefined; + } + + const entityType = attributes[SemanticInternalAttributes.ENTITY_TYPE]; + const entityId = attributes[SemanticInternalAttributes.ENTITY_ID]; + + if (typeof entityType !== "string") { + return undefined; + } + + return { entityType, entityId: entityId as string | undefined }; + } + + private addToBatch(events: TaskEventV1Input[] | TaskEventV1Input) { + this._flushScheduler.addToBatch(Array.isArray(events) ? events : [events]); + } + + // Event recording methods + async recordEvent( + message: string, + options: TraceEventOptions & { duration?: number; parentId?: string } + ): Promise { + const propagatedContext = extractContextFromCarrier(options.context ?? {}); + + const startTime = options.startTime ?? getNowInNanoseconds(); + const duration = + options.duration ?? + (options.endTime ? calculateDurationFromStart(startTime, options.endTime) : 100); + + const traceId = propagatedContext?.traceparent?.traceId ?? generateTraceId(); + const parentId = options.parentId ?? propagatedContext?.traceparent?.spanId; + const spanId = options.spanIdSeed + ? generateDeterministicSpanId(traceId, options.spanIdSeed) + : generateSpanId(); + + if (!options.attributes.runId) { + throw new Error("runId is required"); + } + + const kind = options.attributes.isDebug ? "DEBUG_EVENT" : "SPAN"; + + const metadata = { + style: { + icon: options.attributes.isDebug ? "warn" : "play", + }, + ...options.attributes.metadata, + }; + + const event: TaskEventV1Input = { + environment_id: options.environment.id, + organization_id: options.environment.organizationId, + project_id: options.environment.projectId, + task_identifier: options.taskSlug, + run_id: options.attributes.runId, + start_time: startTime.toString(), + duration: duration.toString(), + trace_id: traceId, + span_id: spanId, + parent_span_id: parentId ?? "", + message, + kind, + status: "OK", + attributes: options.attributes.properties + ? this.createEventToTaskEventV1InputAttributes(options.attributes.properties) + : undefined, + metadata: JSON.stringify(metadata), + // TODO: make sure configurable and by org + expires_at: convertDateToClickhouseDateTime(new Date(Date.now() + 365 * 24 * 60 * 60 * 1000)), + }; + + this._flushScheduler.addToBatch([event]); + } + + async traceEvent( + message: string, + options: TraceEventOptions & { incomplete?: boolean; isError?: boolean }, + callback: ( + e: EventBuilder, + traceContext: Record, + traceparent?: { traceId: string; spanId: string } + ) => Promise + ): Promise { + const propagatedContext = extractContextFromCarrier(options.context ?? {}); + + const start = process.hrtime.bigint(); + const startTime = options.startTime ?? getNowInNanoseconds(); + + const traceId = options.spanParentAsLink + ? generateTraceId() + : propagatedContext?.traceparent?.traceId ?? generateTraceId(); + const parentId = options.spanParentAsLink ? undefined : propagatedContext?.traceparent?.spanId; + const spanId = options.spanIdSeed + ? generateDeterministicSpanId(traceId, options.spanIdSeed) + : generateSpanId(); + + const traceContext = { + ...options.context, + traceparent: serializeTraceparent(traceId, spanId), + }; + + let isStopped = false; + let failedWithError: TaskRunError | undefined; + + const eventBuilder = { + traceId, + spanId, + setAttribute: (key: keyof TraceAttributes, value: TraceAttributes[keyof TraceAttributes]) => { + if (value) { + // We need to merge the attributes with the existing attributes + const existingValue = options.attributes[key]; + + if (existingValue && typeof existingValue === "object" && typeof value === "object") { + // @ts-ignore + options.attributes[key] = { ...existingValue, ...value }; + } else { + // @ts-ignore + options.attributes[key] = value; + } + } + }, + stop: () => { + isStopped = true; + }, + failWithError: (error: TaskRunError) => { + failedWithError = error; + }, + }; + + const result = await callback(eventBuilder, traceContext, propagatedContext?.traceparent); + + if (isStopped) { + return result; + } + + const duration = process.hrtime.bigint() - start; + + if (!options.attributes.runId) { + throw new Error("runId is required"); + } + + const metadata = { + style: { + icon: "task", + variant: PRIMARY_VARIANT, + ...options.attributes.style, + }, + ...options.attributes.metadata, + }; + + const event: TaskEventV1Input = { + environment_id: options.environment.id, + organization_id: options.environment.organizationId, + project_id: options.environment.projectId, + task_identifier: options.taskSlug, + run_id: options.attributes.runId, + start_time: startTime.toString(), + duration: String(options.incomplete ? 0 : duration), + trace_id: traceId, + span_id: spanId, + parent_span_id: parentId ?? "", + message, + kind: "SPAN", + status: failedWithError ? "ERROR" : options.incomplete ? "PARTIAL" : "OK", + attributes: options.attributes.properties + ? this.createEventToTaskEventV1InputAttributes(options.attributes.properties) + : {}, + metadata: JSON.stringify(metadata), + // TODO: make sure configurable and by org + expires_at: convertDateToClickhouseDateTime(new Date(Date.now() + 365 * 24 * 60 * 60 * 1000)), + }; + + const originalRunId = + options.attributes.properties?.[SemanticInternalAttributes.ORIGINAL_RUN_ID]; + + if (typeof originalRunId === "string") { + await originalRunIdCache.set(traceId, spanId, originalRunId); + } + + const events = [event]; + + if (failedWithError) { + const error = createJsonErrorObject(failedWithError); + + events.push({ + environment_id: options.environment.id, + organization_id: options.environment.organizationId, + project_id: options.environment.projectId, + task_identifier: options.taskSlug, + run_id: options.attributes.runId, + start_time: startTime.toString(), + duration: String(options.incomplete ? 0 : duration), + trace_id: traceId, + span_id: spanId, + parent_span_id: parentId ?? "", + message: "exception", + kind: "SPAN_EVENT", + status: "ERROR", + attributes: { + error, + }, + metadata: JSON.stringify({ + exception: createExceptionPropertiesFromError(failedWithError), + }), + // TODO: make sure configurable and by org + expires_at: convertDateToClickhouseDateTime( + new Date(Date.now() + 365 * 24 * 60 * 60 * 1000) + ), + }); + } + + this._flushScheduler.addToBatch(events); + + return result; + } + + // Run event completion methods + async completeSuccessfulRunEvent({ + run, + endTime, + }: { + run: CompleteableTaskRun; + endTime?: Date; + }): Promise { + if (!run.organizationId) { + return; + } + + const startTime = convertDateToNanoseconds(run.createdAt); + const expiresAt = convertDateToClickhouseDateTime( + new Date(run.createdAt.getTime() + 30 * 24 * 60 * 60 * 1000) + ); + + const event: TaskEventV1Input = { + environment_id: run.runtimeEnvironmentId, + organization_id: run.organizationId, + project_id: run.projectId, + task_identifier: run.taskIdentifier, + run_id: run.friendlyId, + start_time: startTime.toString(), + duration: calculateDurationFromStart(startTime, endTime ?? new Date()).toString(), + trace_id: run.traceId, + span_id: run.spanId, + parent_span_id: run.parentSpanId ?? "", + message: run.taskIdentifier, + kind: "SPAN", + status: "OK", + attributes: {}, + metadata: "{}", + expires_at: expiresAt, + }; + + this.addToBatch(event); + } + + async completeCachedRunEvent({ + run, + blockedRun, + spanId, + parentSpanId, + spanCreatedAt, + isError, + endTime, + }: { + run: CompleteableTaskRun; + blockedRun: CompleteableTaskRun; + spanId: string; + parentSpanId: string; + spanCreatedAt: Date; + isError: boolean; + endTime?: Date; + }): Promise { + if (!run.organizationId) { + return; + } + + const startTime = convertDateToNanoseconds(spanCreatedAt); + const expiresAt = convertDateToClickhouseDateTime( + new Date(run.createdAt.getTime() + 30 * 24 * 60 * 60 * 1000) + ); + + const event: TaskEventV1Input = { + environment_id: run.runtimeEnvironmentId, + organization_id: run.organizationId, + project_id: run.projectId, + task_identifier: run.taskIdentifier, + run_id: blockedRun.friendlyId, + start_time: startTime.toString(), + duration: calculateDurationFromStart(startTime, endTime ?? new Date()).toString(), + trace_id: blockedRun.traceId, + span_id: spanId, + parent_span_id: parentSpanId, + message: run.taskIdentifier, + kind: "SPAN", + status: isError ? "ERROR" : "OK", + attributes: {}, + metadata: "{}", + expires_at: expiresAt, + }; + + this.addToBatch(event); + } + + async completeFailedRunEvent({ + run, + endTime, + exception, + }: { + run: CompleteableTaskRun; + endTime?: Date; + exception: { message?: string; type?: string; stacktrace?: string }; + }): Promise { + if (!run.organizationId) { + return; + } + + const startTime = convertDateToNanoseconds(run.createdAt); + const expiresAt = convertDateToClickhouseDateTime( + new Date(run.createdAt.getTime() + 30 * 24 * 60 * 60 * 1000) + ); + + const event: TaskEventV1Input = { + environment_id: run.runtimeEnvironmentId, + organization_id: run.organizationId, + project_id: run.projectId, + task_identifier: run.taskIdentifier, + run_id: run.friendlyId, + start_time: startTime.toString(), + duration: calculateDurationFromStart(startTime, endTime ?? new Date()).toString(), + trace_id: run.traceId, + span_id: run.spanId, + parent_span_id: run.parentSpanId ?? "", + message: run.taskIdentifier, + kind: "SPAN", + status: "ERROR", + attributes: { + error: { + name: exception.type, + message: exception.message, + stackTrace: exception.stacktrace, + }, + }, + metadata: "{}", + expires_at: expiresAt, + }; + + this.addToBatch(event); + } + + async completeExpiredRunEvent({ + run, + endTime, + ttl, + }: { + run: CompleteableTaskRun; + endTime?: Date; + ttl: string; + }): Promise { + if (!run.organizationId) { + return; + } + + const startTime = convertDateToNanoseconds(run.createdAt); + const expiresAt = convertDateToClickhouseDateTime( + new Date(run.createdAt.getTime() + 30 * 24 * 60 * 60 * 1000) + ); + + const event: TaskEventV1Input = { + environment_id: run.runtimeEnvironmentId, + organization_id: run.organizationId, + project_id: run.projectId, + task_identifier: run.taskIdentifier, + run_id: run.friendlyId, + start_time: startTime.toString(), + duration: calculateDurationFromStart(startTime, endTime ?? new Date()).toString(), + trace_id: run.traceId, + span_id: run.spanId, + parent_span_id: run.parentSpanId ?? "", + message: run.taskIdentifier, + kind: "SPAN", + status: "ERROR", + attributes: { + error: { + message: `Run expired because the TTL (${ttl}) was reached`, + }, + }, + metadata: "{}", + expires_at: expiresAt, + }; + + this.addToBatch(event); + } + + async createAttemptFailedRunEvent({ + run, + endTime, + attemptNumber, + exception, + }: { + run: CompleteableTaskRun; + endTime?: Date; + attemptNumber: number; + exception: { message?: string; type?: string; stacktrace?: string }; + }): Promise { + if (!run.organizationId) { + return; + } + + const startTime = convertDateToNanoseconds(endTime ?? new Date()); + const expiresAt = convertDateToClickhouseDateTime( + new Date(run.createdAt.getTime() + 30 * 24 * 60 * 60 * 1000) + ); + + const event: TaskEventV1Input = { + environment_id: run.runtimeEnvironmentId, + organization_id: run.organizationId, + project_id: run.projectId, + task_identifier: run.taskIdentifier, + run_id: run.friendlyId, + start_time: startTime.toString(), + duration: "0", + trace_id: run.traceId, + span_id: run.spanId, + parent_span_id: run.parentSpanId ?? "", + message: "attempt_failed", + kind: "ANCESTOR_OVERRIDE", + status: "OK", + attributes: {}, + metadata: JSON.stringify({ + exception, + attemptNumber, + runId: run.friendlyId, + }), + expires_at: expiresAt, + }; + + this.addToBatch(event); + } + + async cancelRunEvent({ + reason, + run, + cancelledAt, + }: { + reason: string; + run: CompleteableTaskRun; + cancelledAt: Date; + }): Promise { + if (!run.organizationId) { + return; + } + + const startTime = convertDateToNanoseconds(run.createdAt); + const expiresAt = convertDateToClickhouseDateTime( + new Date(run.createdAt.getTime() + 30 * 24 * 60 * 60 * 1000) + ); + + const event: TaskEventV1Input = { + environment_id: run.runtimeEnvironmentId, + organization_id: run.organizationId, + project_id: run.projectId, + task_identifier: run.taskIdentifier, + run_id: run.friendlyId, + start_time: startTime.toString(), + duration: calculateDurationFromStart(startTime, cancelledAt).toString(), + trace_id: run.traceId, + span_id: run.spanId, + parent_span_id: run.parentSpanId ?? "", + message: run.taskIdentifier, + kind: "SPAN", + status: "CANCELLED", + attributes: {}, + metadata: JSON.stringify({ + reason, + }), + expires_at: expiresAt, + }; + + this.addToBatch(event); + } + + // Query methods + async getTraceSummary( + storeTable: TaskEventStoreTable, + environmentId: string, + traceId: string, + startCreatedAt: Date, + endCreatedAt?: Date, + options?: { includeDebugLogs?: boolean } + ): Promise { + const startCreatedAtWithBuffer = new Date(startCreatedAt.getTime() - 1000); + + const queryBuilder = this._clickhouse.taskEvents.traceSummaryQueryBuilder(); + + queryBuilder.where("environment_id = {environmentId: String}", { environmentId }); + queryBuilder.where("trace_id = {traceId: String}", { traceId }); + queryBuilder.where("start_time >= {startCreatedAt: String}", { + startCreatedAt: convertDateToNanoseconds(startCreatedAtWithBuffer).toString(), + }); + + if (endCreatedAt) { + queryBuilder.where("start_time <= {endCreatedAt: String}", { + endCreatedAt: convertDateToNanoseconds(endCreatedAt).toString(), + }); + } + + if (options?.includeDebugLogs === false) { + queryBuilder.where("kind != {kind: String}", { kind: "DEBUG_EVENT" }); + } + + queryBuilder.orderBy("start_time ASC"); + + if (this._config.maximumTraceSummaryViewCount) { + queryBuilder.limit(this._config.maximumTraceSummaryViewCount); + } + + const [queryError, records] = await queryBuilder.execute(); + + if (queryError) { + throw queryError; + } + + if (!records) { + return; + } + + // O(n) grouping instead of O(n²) array spreading + const recordsGroupedBySpanId: Record = {}; + for (const record of records) { + if (!recordsGroupedBySpanId[record.span_id]) { + recordsGroupedBySpanId[record.span_id] = []; + } + recordsGroupedBySpanId[record.span_id].push(record); + } + + const spanSummaries = new Map(); + let rootSpanId: string | undefined; + + // Create temporary metadata cache for this query + const metadataCache = new Map>(); + + for (const [spanId, spanRecords] of Object.entries(recordsGroupedBySpanId)) { + const spanSummary = this.#mergeRecordsIntoSpanSummary(spanId, spanRecords, metadataCache); + + if (!spanSummary) { + continue; + } + + spanSummaries.set(spanId, spanSummary); + + if (!rootSpanId && !spanSummary.parentId) { + rootSpanId = spanId; + } + } + + if (!rootSpanId) { + return; + } + + const spans = Array.from(spanSummaries.values()); + const rootSpan = spanSummaries.get(rootSpanId); + + if (!rootSpan) { + return; + } + + const overridesBySpanId: Record = {}; + + const finalSpans = spans.map((span) => { + return this.#applyAncestorOverrides(span, spanSummaries, overridesBySpanId); + }); + + return { + rootSpan, + spans: finalSpans, + overridesBySpanId, + }; + } + + async getSpan( + storeTable: TaskEventStoreTable, + environmentId: string, + spanId: string, + traceId: string, + startCreatedAt: Date, + endCreatedAt?: Date, + options?: { includeDebugLogs?: boolean } + ): Promise { + const startCreatedAtWithBuffer = new Date(startCreatedAt.getTime() - 1000); + + const queryBuilder = this._clickhouse.taskEvents.spanDetailsQueryBuilder(); + + queryBuilder.where("environment_id = {environmentId: String}", { environmentId }); + queryBuilder.where("trace_id = {traceId: String}", { traceId }); + queryBuilder.where("span_id = {spanId: String}", { spanId }); + queryBuilder.where("start_time >= {startCreatedAt: String}", { + startCreatedAt: convertDateToNanoseconds(startCreatedAtWithBuffer).toString(), + }); + + if (endCreatedAt) { + queryBuilder.where("start_time <= {endCreatedAt: String}", { + endCreatedAt: convertDateToNanoseconds(endCreatedAt).toString(), + }); + } + + queryBuilder.orderBy("start_time ASC"); + + const [queryError, records] = await queryBuilder.execute(); + + if (queryError) { + throw queryError; + } + + if (!records) { + return; + } + + // Create temporary metadata cache for this query + const metadataCache = new Map>(); + const span = this.#mergeRecordsIntoSpanDetail(spanId, records, metadataCache); + + return span; + } + + async getSpanOriginalRunId( + storeTable: TaskEventStoreTable, + environmentId: string, + spanId: string, + traceId: string, + startCreatedAt: Date, + endCreatedAt?: Date + ): Promise { + return await originalRunIdCache.lookup(traceId, spanId); + } + + #mergeRecordsIntoSpanDetail( + spanId: string, + records: TaskEventDetailsV1Result[], + metadataCache: Map> + ): SpanDetail | undefined { + if (records.length === 0) { + return undefined; + } + + let span: SpanDetail | undefined; + + for (const record of records) { + if (!span) { + span = { + spanId: spanId, + parentId: record.parent_span_id ? record.parent_span_id : null, + message: record.message, + isError: false, + isPartial: true, // Partial by default, can only be set to false + isCancelled: false, + level: kindToLevel(record.kind), + startTime: convertClickhouseDateTime64ToJsDate(record.start_time), + duration: typeof record.duration === "number" ? record.duration : Number(record.duration), + events: [], + style: {}, + properties: undefined, + entity: { + type: undefined, + id: undefined, + }, + metadata: {}, + }; + } + + if (isLogEvent(record.kind)) { + span.isPartial = false; + span.isCancelled = false; + span.isError = record.status === "ERROR"; + } + + const parsedMetadata = this.#parseMetadata(record.metadata, metadataCache); + + if (record.kind === "SPAN_EVENT") { + // We need to add an event to the span + span.events.push({ + name: record.message, + time: convertClickhouseDateTime64ToJsDate(record.start_time), + properties: parsedMetadata ?? {}, + }); + } + + if (parsedMetadata && "style" in parsedMetadata && parsedMetadata.style) { + span.style = parsedMetadata.style as TaskEventStyle; + } + + if ( + parsedMetadata && + "entity" in parsedMetadata && + typeof parsedMetadata.entity === "object" && + parsedMetadata.entity + ) { + span.entity = parsedMetadata.entity as { type: string | undefined; id: string | undefined }; + } + + if (record.kind === "SPAN") { + if (record.status === "ERROR") { + span.isError = true; + span.isPartial = false; + span.isCancelled = false; + } else if (record.status === "CANCELLED") { + span.isCancelled = true; + span.isPartial = false; + span.isError = false; + } else if (record.status === "OK") { + span.isPartial = false; + } + + if (record.status !== "PARTIAL") { + span.duration = + typeof record.duration === "number" ? record.duration : Number(record.duration); + } else { + span.startTime = convertClickhouseDateTime64ToJsDate(record.start_time); + span.message = record.message; + } + } + + if (!span.properties && typeof record.attributes_text === "string") { + span.properties = this.#parseAttributes(record.attributes_text); + } + } + + return span; + } + + #parseAttributes(attributes_text: string): Record { + if (!attributes_text) { + return {}; + } + + return JSON.parse(attributes_text) as Record; + } + + #applyAncestorOverrides( + span: TSpanSummary, + spansById: Map, + overridesBySpanId: Record + ): TSpanSummary { + if (span.data.level !== "TRACE") { + return span; + } + + if (!span.data.isPartial) { + return span; + } + + if (!span.parentId) { + return span; + } + + // Now we need to walk the ancestors of the span by span.parentId + // The first ancestor that is a TRACE span that is "closed" we will use to override the span + let parentSpanId: string | undefined = span.parentId; + let overrideSpan: TSpanSummary | undefined; + + while (parentSpanId) { + const parentSpan = spansById.get(parentSpanId); + + if (!parentSpan) { + break; + } + + if (parentSpan.data.level === "TRACE" && !parentSpan.data.isPartial) { + overrideSpan = parentSpan; + break; + } + + parentSpanId = parentSpan.parentId; + } + + if (overrideSpan) { + return this.#applyAncestorToSpan(span, overrideSpan, overridesBySpanId); + } + + return span; + } + + #applyAncestorToSpan( + span: TSpanSummary, + overrideSpan: TSpanSummary, + overridesBySpanId: Record + ): TSpanSummary { + if (overridesBySpanId[span.id]) { + return span; + } + + let override: SpanOverride | undefined = undefined; + + const overrideEndTime = calculateEndTimeFromStartTime( + overrideSpan.data.startTime, + overrideSpan.data.duration + ); + + if (overrideSpan.data.isCancelled) { + override = { + isCancelled: true, + duration: calculateDurationFromStartJsDate(span.data.startTime, overrideEndTime), + }; + + span.data.isCancelled = true; + span.data.isPartial = false; + span.data.isError = false; + span.data.duration = calculateDurationFromStartJsDate(span.data.startTime, overrideEndTime); + + const cancellationEvent = overrideSpan.data.events.find( + (event) => event.name === "cancellation" + ); + + if (cancellationEvent) { + span.data.events.push(cancellationEvent); + override.events = [cancellationEvent]; + } + } + + if (overrideSpan.data.isError && span.data.attemptNumber) { + const attemptFailedEvent = overrideSpan.data.events.find( + (event) => + event.name === "attempt_failed" && + event.properties.attemptNumber === span.data.attemptNumber && + event.properties.runId === span.runId + ) as AttemptFailedSpanEvent | undefined; + + if (attemptFailedEvent) { + const exceptionEvent = { + name: "exception", + time: attemptFailedEvent.time, + properties: { + exception: attemptFailedEvent.properties.exception, + }, + } satisfies ExceptionSpanEvent; + + span.data.isError = true; + span.data.isPartial = false; + span.data.isCancelled = false; + span.data.duration = calculateDurationFromStartJsDate(span.data.startTime, overrideEndTime); + span.data.events.push(exceptionEvent); + span.data.events.push(attemptFailedEvent); + + override = { + isError: true, + events: [exceptionEvent], + duration: calculateDurationFromStartJsDate(span.data.startTime, overrideEndTime), + }; + } + } + + if (override) { + overridesBySpanId[span.id] = override; + } + + return span; + } + + #mergeRecordsIntoSpanSummary( + spanId: string, + records: TaskEventSummaryV1Result[], + metadataCache: Map> + ): SpanSummary | undefined { + if (records.length === 0) { + return undefined; + } + + let span: SpanSummary | undefined; + + for (const record of records) { + if (!span) { + span = { + id: spanId, + parentId: record.parent_span_id ? record.parent_span_id : undefined, + runId: record.run_id, + data: { + message: record.message, + style: {}, + duration: + typeof record.duration === "number" ? record.duration : Number(record.duration), + isError: false, + isPartial: true, // Partial by default, can only be set to false + isCancelled: false, + isDebug: record.kind === "DEBUG_EVENT", + startTime: convertClickhouseDateTime64ToJsDate(record.start_time), + level: kindToLevel(record.kind), + events: [], + }, + }; + } + + if (isLogEvent(record.kind)) { + span.data.isPartial = false; + span.data.isCancelled = false; + span.data.isError = record.status === "ERROR"; + } + + const parsedMetadata = this.#parseMetadata(record.metadata, metadataCache); + + if ( + parsedMetadata && + "attemptNumber" in parsedMetadata && + typeof parsedMetadata.attemptNumber === "number" + ) { + span.data.attemptNumber = parsedMetadata.attemptNumber; + } + + if (record.kind === "ANCESTOR_OVERRIDE" || record.kind === "SPAN_EVENT") { + // We need to add an event to the span + span.data.events.push({ + name: record.message, + time: convertClickhouseDateTime64ToJsDate(record.start_time), + properties: parsedMetadata ?? {}, + }); + } + + if (parsedMetadata && "style" in parsedMetadata && parsedMetadata.style) { + span.data.style = parsedMetadata.style as TaskEventStyle; + } + + if (record.kind === "SPAN") { + if (record.status === "ERROR") { + span.data.isError = true; + span.data.isPartial = false; + span.data.isCancelled = false; + } else if (record.status === "CANCELLED") { + span.data.isCancelled = true; + span.data.isPartial = false; + span.data.isError = false; + } else if (record.status === "OK") { + span.data.isPartial = false; + } + + if (record.status !== "PARTIAL") { + span.data.duration = + typeof record.duration === "number" ? record.duration : Number(record.duration); + } else { + span.data.startTime = convertClickhouseDateTime64ToJsDate(record.start_time); + span.data.message = record.message; + } + } + } + + return span; + } + + #parseMetadata( + metadata: string, + cache: Map> + ): Record | undefined { + if (!metadata) { + return undefined; + } + + // Check cache first + const cached = cache.get(metadata); + if (cached) { + return cached; + } + + const parsed = JSON.parse(metadata); + + if (typeof parsed !== "object" || parsed === null) { + return undefined; + } + + const result = parsed as Record; + + // Cache the result - no size limit needed since cache is per-query + cache.set(metadata, result); + + return result; + } + + async getTraceDetailedSummary( + storeTable: TaskEventStoreTable, + environmentId: string, + traceId: string, + startCreatedAt: Date, + endCreatedAt?: Date, + options?: { includeDebugLogs?: boolean } + ): Promise { + const startCreatedAtWithBuffer = new Date(startCreatedAt.getTime() - 1000); + + const queryBuilder = this._clickhouse.taskEvents.traceDetailedSummaryQueryBuilder(); + + queryBuilder.where("environment_id = {environmentId: String}", { environmentId }); + queryBuilder.where("trace_id = {traceId: String}", { traceId }); + queryBuilder.where("start_time >= {startCreatedAt: String}", { + startCreatedAt: convertDateToNanoseconds(startCreatedAtWithBuffer).toString(), + }); + + if (endCreatedAt) { + queryBuilder.where("start_time <= {endCreatedAt: String}", { + endCreatedAt: convertDateToNanoseconds(endCreatedAt).toString(), + }); + } + + if (options?.includeDebugLogs === false) { + queryBuilder.where("kind != {kind: String}", { kind: "DEBUG_EVENT" }); + } + + queryBuilder.orderBy("start_time ASC"); + + if (this._config.maximumTraceDetailedSummaryViewCount) { + queryBuilder.limit(this._config.maximumTraceDetailedSummaryViewCount); + } + + const [queryError, records] = await queryBuilder.execute(); + + if (queryError) { + throw queryError; + } + + if (!records) { + return; + } + + // O(n) grouping instead of O(n²) array spreading + const recordsGroupedBySpanId: Record = {}; + for (const record of records) { + if (!recordsGroupedBySpanId[record.span_id]) { + recordsGroupedBySpanId[record.span_id] = []; + } + recordsGroupedBySpanId[record.span_id].push(record); + } + + const spanSummaries = new Map(); + let rootSpanId: string | undefined; + + // Create temporary metadata cache for this query + const metadataCache = new Map>(); + + for (const [spanId, spanRecords] of Object.entries(recordsGroupedBySpanId)) { + const spanSummary = this.#mergeRecordsIntoSpanDetailedSummary( + spanId, + spanRecords, + metadataCache + ); + + if (!spanSummary) { + continue; + } + + spanSummaries.set(spanId, spanSummary); + + if (!rootSpanId && !spanSummary.parentId) { + rootSpanId = spanId; + } + } + + if (!rootSpanId) { + return; + } + + const spans = Array.from(spanSummaries.values()); + + const overridesBySpanId: Record = {}; + const spanDetailedSummaryMap = new Map(); + + const finalSpans = spans.map((span) => { + const finalSpan = this.#applyAncestorOverrides(span, spanSummaries, overridesBySpanId); + spanDetailedSummaryMap.set(span.id, finalSpan); + return finalSpan; + }); + + // Second pass: build parent-child relationships + for (const finalSpan of finalSpans) { + if (finalSpan.parentId) { + const parent = spanDetailedSummaryMap.get(finalSpan.parentId); + if (parent) { + parent.children.push(finalSpan); + } + } + } + + const rootSpan = spanDetailedSummaryMap.get(rootSpanId); + + if (!rootSpan) { + return; + } + + return { + traceId, + rootSpan, + }; + } + + #mergeRecordsIntoSpanDetailedSummary( + spanId: string, + records: TaskEventDetailedSummaryV1Result[], + metadataCache: Map> + ): SpanDetailedSummary | undefined { + if (records.length === 0) { + return undefined; + } + + let span: SpanDetailedSummary | undefined; + + for (const record of records) { + if (!span) { + span = { + id: spanId, + parentId: record.parent_span_id ? record.parent_span_id : undefined, + runId: record.run_id, + data: { + message: record.message, + taskSlug: undefined, + duration: + typeof record.duration === "number" ? record.duration : Number(record.duration), + isError: false, + isPartial: true, // Partial by default, can only be set to false + isCancelled: false, + startTime: convertClickhouseDateTime64ToJsDate(record.start_time), + level: kindToLevel(record.kind), + events: [], + }, + children: [], + }; + } + + if (isLogEvent(record.kind)) { + span.data.isPartial = false; + span.data.isCancelled = false; + span.data.isError = record.status === "ERROR"; + } + + const parsedMetadata = this.#parseMetadata(record.metadata, metadataCache); + + if ( + parsedMetadata && + "attemptNumber" in parsedMetadata && + typeof parsedMetadata.attemptNumber === "number" + ) { + span.data.attemptNumber = parsedMetadata.attemptNumber; + } + + if (record.kind === "ANCESTOR_OVERRIDE" || record.kind === "SPAN_EVENT") { + // We need to add an event to the span + span.data.events.push({ + name: record.message, + time: convertClickhouseDateTime64ToJsDate(record.start_time), + properties: parsedMetadata ?? {}, + }); + } + + if (record.kind === "SPAN") { + if (record.status === "ERROR") { + span.data.isError = true; + span.data.isPartial = false; + span.data.isCancelled = false; + } else if (record.status === "CANCELLED") { + span.data.isCancelled = true; + span.data.isPartial = false; + span.data.isError = false; + } else if (record.status === "OK") { + span.data.isPartial = false; + } + + if (record.status !== "PARTIAL") { + span.data.duration = + typeof record.duration === "number" ? record.duration : Number(record.duration); + } else { + span.data.startTime = convertClickhouseDateTime64ToJsDate(record.start_time); + span.data.message = record.message; + } + } + } + + return span; + } + + async getRunEvents( + storeTable: TaskEventStoreTable, + environmentId: string, + traceId: string, + runId: string, + startCreatedAt: Date, + endCreatedAt?: Date + ): Promise { + const startCreatedAtWithBuffer = new Date(startCreatedAt.getTime() - 1000); + + const queryBuilder = this._clickhouse.taskEvents.traceSummaryQueryBuilder(); + + queryBuilder.where("environment_id = {environmentId: String}", { environmentId }); + queryBuilder.where("trace_id = {traceId: String}", { traceId }); + queryBuilder.where("run_id = {runId: String}", { runId }); + queryBuilder.where("start_time >= {startCreatedAt: String}", { + startCreatedAt: convertDateToNanoseconds(startCreatedAtWithBuffer).toString(), + }); + + if (endCreatedAt) { + queryBuilder.where("start_time <= {endCreatedAt: String}", { + endCreatedAt: convertDateToNanoseconds(endCreatedAt).toString(), + }); + } + + queryBuilder.where("kind != {kind: String}", { kind: "DEBUG_EVENT" }); + queryBuilder.orderBy("start_time ASC"); + + if (this._config.maximumTraceSummaryViewCount) { + queryBuilder.limit(this._config.maximumTraceSummaryViewCount); + } + + const [queryError, records] = await queryBuilder.execute(); + + if (queryError) { + throw queryError; + } + + if (!records) { + return []; + } + + // O(n) grouping instead of O(n²) array spreading + const recordsGroupedBySpanId: Record = {}; + for (const record of records) { + if (!recordsGroupedBySpanId[record.span_id]) { + recordsGroupedBySpanId[record.span_id] = []; + } + recordsGroupedBySpanId[record.span_id].push(record); + } + + const spanSummaries = new Map(); + let rootSpanId: string | undefined; + + // Create temporary metadata cache for this query + const metadataCache = new Map>(); + + for (const [spanId, spanRecords] of Object.entries(recordsGroupedBySpanId)) { + const spanSummary = this.#mergeRecordsIntoSpanSummary(spanId, spanRecords, metadataCache); + + if (!spanSummary) { + continue; + } + + spanSummaries.set(spanId, spanSummary); + + // Find root span for optimized override algorithm + if (!rootSpanId && !spanSummary.parentId) { + rootSpanId = spanId; + } + } + + const spans = Array.from(spanSummaries.values()); + + const overridesBySpanId: Record = {}; + + const finalSpans = spans.map((span) => { + return this.#applyAncestorOverrides(span, spanSummaries, overridesBySpanId); + }); + + const runPreparedEvents = finalSpans.map((span) => this.#spanSummaryToRunPreparedEvent(span)); + + return runPreparedEvents; + } + + #spanSummaryToRunPreparedEvent(span: SpanSummary): RunPreparedEvent { + return { + spanId: span.id, + parentId: span.parentId ?? null, + runId: span.runId, + message: span.data.message, + style: span.data.style, + events: span.data.events, + startTime: convertDateToNanoseconds(span.data.startTime), + duration: span.data.duration, + isError: span.data.isError, + isPartial: span.data.isPartial, + isCancelled: span.data.isCancelled, + kind: "UNSPECIFIED", + attemptNumber: span.data.attemptNumber ?? null, + level: span.data.level, + }; + } +} + +// Precompile regex for performance (used ~30k times per trace) +const CLICKHOUSE_DATETIME_REGEX = + /^(\d{4})-(\d{2})-(\d{2})[ T](\d{2}):(\d{2}):(\d{2})(?:\.(\d{1,9}))?(?:Z|([+-])(\d{2}):?(\d{2}))?$/; + +export const convertDateToClickhouseDateTime = (date: Date): string => { + // 2024-11-06T20:37:00.123Z -> 2024-11-06 21:37:00.123 + return date.toISOString().replace("T", " ").replace("Z", ""); +}; + +/** + * Convert a ClickHouse DateTime64 to nanoseconds since epoch (UTC). + * Accepts: + * - "2025-09-23 12:32:46.130262875" + * - "2025-09-23T12:32:46.13" + * - "2025-09-23 12:32:46Z" + * - "2025-09-23 12:32:46.130262875+02:00" + */ +export function convertClickhouseDateTime64ToNanosecondsEpoch(date: string): bigint { + const s = date.trim(); + const m = CLICKHOUSE_DATETIME_REGEX.exec(s); + if (!m) { + throw new Error(`Invalid ClickHouse DateTime64 string: "${date}"`); + } + + const year = Number(m[1]); + const month = Number(m[2]); // 1-12 + const day = Number(m[3]); // 1-31 + const hour = Number(m[4]); + const minute = Number(m[5]); + const second = Number(m[6]); + const fraction = m[7] ?? ""; // up to 9 digits + const sign = m[8] as "+" | "-" | undefined; + const offH = m[9] ? Number(m[9]) : 0; + const offM = m[10] ? Number(m[10]) : 0; + + // Convert fractional seconds to exactly 9 digits (nanoseconds within the second). + const nsWithinSecond = Number(fraction.padEnd(9, "0")); // 0..999_999_999 + + // Split into millisecond part (for Date) and leftover nanoseconds. + const msPart = Math.trunc(nsWithinSecond / 1_000_000); // 0..999 + const leftoverNs = nsWithinSecond - msPart * 1_000_000; // 0..999_999 + + // Build milliseconds since epoch in UTC using Date.UTC (avoids local TZ/DST issues). + let msEpoch = Date.UTC(year, month - 1, day, hour, minute, second, msPart); + + // If an explicit offset was provided, adjust to true UTC. + if (sign) { + const offsetMinutesSigned = (sign === "+" ? 1 : -1) * (offH * 60 + offM); + msEpoch -= offsetMinutesSigned * 60_000; + } + + // Combine ms to ns with leftover. + return BigInt(msEpoch) * 1_000_000n + BigInt(leftoverNs); +} + +/** + * Convert a ClickHouse DateTime64 to a JS Date. + * Accepts: + * - "2025-09-23 12:32:46.130262875" + * - "2025-09-23T12:32:46.13" + * - "2025-09-23 12:32:46Z" + * - "2025-09-23 12:32:46.130262875+02:00" + * + * Optimized with fast path for common format (avoids regex for 99% of cases). + */ +export function convertClickhouseDateTime64ToJsDate(date: string): Date { + // Fast path for common format: "2025-09-23 12:32:46.130262875" or "2025-09-23 12:32:46" + // This avoids the expensive regex for the common case + if (date.length >= 19 && date[4] === "-" && date[7] === "-" && date[10] === " ") { + const year = Number(date.substring(0, 4)); + const month = Number(date.substring(5, 7)); + const day = Number(date.substring(8, 10)); + const hour = Number(date.substring(11, 13)); + const minute = Number(date.substring(14, 16)); + const second = Number(date.substring(17, 19)); + + // Parse fractional seconds if present + let ms = 0; + if (date.length > 20 && date[19] === ".") { + // Take first 3 digits after decimal (milliseconds), pad if shorter + const fracStr = date.substring(20, Math.min(23, date.length)); + ms = Number(fracStr.padEnd(3, "0")); + } + + return new Date(Date.UTC(year, month - 1, day, hour, minute, second, ms)); + } + + // Fallback to regex for other formats (T separator, timezone offsets, etc.) + const s = date.trim(); + const m = CLICKHOUSE_DATETIME_REGEX.exec(s); + if (!m) { + throw new Error(`Invalid ClickHouse DateTime64 string: "${date}"`); + } + + const year = Number(m[1]); + const month = Number(m[2]); // 1-12 + const day = Number(m[3]); // 1-31 + const hour = Number(m[4]); + const minute = Number(m[5]); + const second = Number(m[6]); + const fraction = m[7] ?? ""; // up to 9 digits + + // Convert fractional seconds to exactly 9 digits (nanoseconds within the second). + const nsWithinSecond = Number(fraction.padEnd(9, "0")); // 0..999_999_999 + + // Split into millisecond part (for Date) + const msPart = Math.trunc(nsWithinSecond / 1_000_000); // 0..999 + + return new Date(Date.UTC(year, month - 1, day, hour, minute, second, msPart)); +} + +function kindToLevel(kind: string): TaskEventLevel { + switch (kind) { + case "DEBUG_EVENT": + case "LOG_DEBUG": { + return "DEBUG"; + } + case "LOG_LOG": { + return "LOG"; + } + case "LOG_INFO": { + return "INFO"; + } + case "LOG_WARN": { + return "WARN"; + } + case "LOG_ERROR": { + return "ERROR"; + } + case "SPAN": + case "ANCESTOR_OVERRIDE": + case "SPAN_EVENT": { + return "TRACE"; + } + default: { + return "TRACE"; + } + } +} + +function isLogEvent(kind: string): boolean { + return kind.startsWith("LOG_") || kind === "DEBUG_EVENT"; +} + +function calculateEndTimeFromStartTime(startTime: Date, duration: number): Date { + return new Date(startTime.getTime() + duration / 1_000_000); +} diff --git a/apps/webapp/app/v3/eventRepository/clickhouseEventRepositoryInstance.server.ts b/apps/webapp/app/v3/eventRepository/clickhouseEventRepositoryInstance.server.ts new file mode 100644 index 0000000000..d80ab93e38 --- /dev/null +++ b/apps/webapp/app/v3/eventRepository/clickhouseEventRepositoryInstance.server.ts @@ -0,0 +1,49 @@ +import { ClickHouse } from "@internal/clickhouse"; +import { env } from "~/env.server"; +import { singleton } from "~/utils/singleton"; +import { ClickhouseEventRepository } from "./clickhouseEventRepository.server"; + +export const clickhouseEventRepository = singleton( + "clickhouseEventRepository", + initializeClickhouseRepository +); + +function initializeClickhouseRepository() { + if (!env.EVENTS_CLICKHOUSE_URL) { + throw new Error("EVENTS_CLICKHOUSE_URL is not set"); + } + + const url = new URL(env.EVENTS_CLICKHOUSE_URL); + url.searchParams.delete("secure"); + + const safeUrl = new URL(url.toString()); + safeUrl.password = "redacted"; + + console.log("🗃️ Initializing Clickhouse event repository", { url: safeUrl.toString() }); + + const clickhouse = new ClickHouse({ + url: url.toString(), + name: "task-events", + keepAlive: { + enabled: env.EVENTS_CLICKHOUSE_KEEP_ALIVE_ENABLED === "1", + idleSocketTtl: env.EVENTS_CLICKHOUSE_KEEP_ALIVE_IDLE_SOCKET_TTL_MS, + }, + logLevel: env.EVENTS_CLICKHOUSE_LOG_LEVEL, + compression: { + request: env.EVENTS_CLICKHOUSE_COMPRESSION_REQUEST === "1", + }, + maxOpenConnections: env.EVENTS_CLICKHOUSE_MAX_OPEN_CONNECTIONS, + }); + + const repository = new ClickhouseEventRepository({ + clickhouse: clickhouse, + batchSize: env.EVENTS_CLICKHOUSE_BATCH_SIZE, + flushInterval: env.EVENTS_CLICKHOUSE_FLUSH_INTERVAL_MS, + maximumTraceSummaryViewCount: env.EVENTS_CLICKHOUSE_MAX_TRACE_SUMMARY_VIEW_COUNT, + maximumTraceDetailedSummaryViewCount: + env.EVENTS_CLICKHOUSE_MAX_TRACE_DETAILED_SUMMARY_VIEW_COUNT, + maximumLiveReloadingSetting: env.EVENTS_CLICKHOUSE_MAX_LIVE_RELOADING_SETTING, + }); + + return repository; +} diff --git a/apps/webapp/app/v3/eventRepository/common.server.ts b/apps/webapp/app/v3/eventRepository/common.server.ts new file mode 100644 index 0000000000..ecfc9cfdc7 --- /dev/null +++ b/apps/webapp/app/v3/eventRepository/common.server.ts @@ -0,0 +1,166 @@ +import { Attributes } from "@opentelemetry/api"; +import { RandomIdGenerator } from "@opentelemetry/sdk-trace-base"; +import { parseTraceparent } from "@trigger.dev/core/v3/isomorphic"; +import { ExceptionEventProperties, SpanEvents, TaskRunError } from "@trigger.dev/core/v3/schemas"; +import { unflattenAttributes } from "@trigger.dev/core/v3/utils/flattenAttributes"; +import { createHash } from "node:crypto"; + +export function extractContextFromCarrier(carrier: Record) { + const traceparent = carrier["traceparent"]; + const tracestate = carrier["tracestate"]; + + if (typeof traceparent !== "string") { + return undefined; + } + + return { + ...carrier, + traceparent: parseTraceparent(traceparent), + tracestate, + }; +} + +export function getNowInNanoseconds(): bigint { + return BigInt(new Date().getTime() * 1_000_000); +} + +export function getDateFromNanoseconds(nanoseconds: bigint): Date { + return new Date(Number(nanoseconds) / 1_000_000); +} + +export function calculateDurationFromStart(startTime: bigint, endTime: Date = new Date()) { + const $endtime = typeof endTime === "string" ? new Date(endTime) : endTime; + + return Number(BigInt($endtime.getTime() * 1_000_000) - startTime); +} + +export function calculateDurationFromStartJsDate(startTime: Date, endTime: Date = new Date()) { + const $endtime = typeof endTime === "string" ? new Date(endTime) : endTime; + + return ($endtime.getTime() - startTime.getTime()) * 1_000_000; +} + +export function convertDateToNanoseconds(date: Date): bigint { + return BigInt(date.getTime()) * BigInt(1_000_000); +} + +/** + * Returns a deterministically random 8-byte span ID formatted/encoded as a 16 lowercase hex + * characters corresponding to 64 bits, based on the trace ID and seed. + */ +export function generateDeterministicSpanId(traceId: string, seed: string) { + const hash = createHash("sha1"); + hash.update(traceId); + hash.update(seed); + const buffer = hash.digest(); + let hexString = ""; + for (let i = 0; i < 8; i++) { + const val = buffer.readUInt8(i); + const str = val.toString(16).padStart(2, "0"); + hexString += str; + } + return hexString; +} + +const randomIdGenerator = new RandomIdGenerator(); + +export function generateTraceId() { + return randomIdGenerator.generateTraceId(); +} + +export function generateSpanId() { + return randomIdGenerator.generateSpanId(); +} + +export function stripAttributePrefix(attributes: Attributes, prefix: string) { + const result: Attributes = {}; + + for (const [key, value] of Object.entries(attributes)) { + if (key.startsWith(prefix)) { + result[key.slice(prefix.length + 1)] = value; + } else { + result[key] = value; + } + } + return result; +} + +export function parseEventsField(events: unknown): SpanEvents { + if (!events) return []; + if (!Array.isArray(events)) return []; + + const unsafe = events + ? (events as any[]).map((e) => ({ + ...e, + properties: unflattenAttributes(e.properties as Attributes), + })) + : undefined; + + return unsafe as SpanEvents; +} + +export function createExceptionPropertiesFromError(error: TaskRunError): ExceptionEventProperties { + switch (error.type) { + case "BUILT_IN_ERROR": { + return { + type: error.name, + message: error.message, + stacktrace: error.stackTrace, + }; + } + case "CUSTOM_ERROR": { + return { + type: "Error", + message: error.raw, + }; + } + case "INTERNAL_ERROR": { + return { + type: "Internal error", + message: [error.code, error.message].filter(Boolean).join(": "), + stacktrace: error.stackTrace, + }; + } + case "STRING_ERROR": { + return { + type: "Error", + message: error.raw, + }; + } + } +} + +// removes keys that start with a $ sign. If there are no keys left, return undefined +export function removePrivateProperties( + attributes: Attributes | undefined | null +): Attributes | undefined { + if (!attributes) { + return undefined; + } + + const result: Attributes = {}; + + for (const [key, value] of Object.entries(attributes)) { + if (key.startsWith("$")) { + continue; + } + + result[key] = value; + } + + if (Object.keys(result).length === 0) { + return undefined; + } + + return result; +} + +export function isEmptyObject(obj: object) { + for (var prop in obj) { + if (Object.prototype.hasOwnProperty.call(obj, prop)) { + return false; + } + } + + return true; +} diff --git a/apps/webapp/app/v3/eventRepository.server.ts b/apps/webapp/app/v3/eventRepository/eventRepository.server.ts similarity index 68% rename from apps/webapp/app/v3/eventRepository.server.ts rename to apps/webapp/app/v3/eventRepository/eventRepository.server.ts index d19a2963ee..535c034fe6 100644 --- a/apps/webapp/app/v3/eventRepository.server.ts +++ b/apps/webapp/app/v3/eventRepository/eventRepository.server.ts @@ -1,4 +1,4 @@ -import { Attributes, AttributeValue, Link, trace, TraceFlags, Tracer } from "@opentelemetry/api"; +import { Attributes, AttributeValue, trace, Tracer } from "@opentelemetry/api"; import { RandomIdGenerator } from "@opentelemetry/sdk-trace-base"; import { SemanticResourceAttributes } from "@opentelemetry/semantic-conventions"; import { @@ -8,248 +8,69 @@ import { ExceptionSpanEvent, flattenAttributes, isExceptionSpanEvent, - NULL_SENTINEL, - omit, + nanosecondsToMilliseconds, PRIMARY_VARIANT, SemanticInternalAttributes, SpanEvent, SpanEvents, - SpanMessagingEvent, - TaskEventEnvironment, TaskEventStyle, TaskRunError, unflattenAttributes, } from "@trigger.dev/core/v3"; -import { parseTraceparent, serializeTraceparent } from "@trigger.dev/core/v3/isomorphic"; -import { Prisma, TaskEvent, TaskEventKind, TaskEventStatus, TaskRun } from "@trigger.dev/database"; +import { serializeTraceparent } from "@trigger.dev/core/v3/isomorphic"; +import { Prisma, TaskEvent, TaskEventKind } from "@trigger.dev/database"; import { nanoid } from "nanoid"; -import { createHash } from "node:crypto"; -import { EventEmitter } from "node:stream"; import { Gauge } from "prom-client"; import { $replica, prisma, PrismaClient, PrismaReplicaClient } from "~/db.server"; import { env } from "~/env.server"; import { metricsRegister } from "~/metrics.server"; -import { createRedisClient, RedisClient, RedisWithClusterOptions } from "~/redis.server"; import { logger } from "~/services/logger.server"; import { singleton } from "~/utils/singleton"; -import { DynamicFlushScheduler } from "./dynamicFlushScheduler.server"; -import { DetailedTraceEvent, TaskEventStore, TaskEventStoreTable } from "./taskEventStore.server"; -import { startActiveSpan } from "./tracer.server"; -import { startSpan } from "./tracing.server"; +import { DynamicFlushScheduler } from "../dynamicFlushScheduler.server"; +import { tracePubSub } from "../services/tracePubSub.server"; +import { DetailedTraceEvent, TaskEventStore, TaskEventStoreTable } from "../taskEventStore.server"; +import { startActiveSpan } from "../tracer.server"; +import { startSpan } from "../tracing.server"; +import { + calculateDurationFromStart, + convertDateToNanoseconds, + createExceptionPropertiesFromError, + extractContextFromCarrier, + generateDeterministicSpanId, + generateSpanId, + generateTraceId, + getDateFromNanoseconds, + getNowInNanoseconds, + isEmptyObject, + parseEventsField, + removePrivateProperties, + stripAttributePrefix, +} from "./common.server"; +import type { + CompleteableTaskRun, + CreateEventInput, + EventBuilder, + EventRepoConfig, + IEventRepository, + PreparedDetailedEvent, + PreparedEvent, + QueriedEvent, + RunPreparedEvent, + SpanDetail, + SpanDetailedSummary, + SpanSummary, + TraceAttributes, + TraceDetailedSummary, + TraceEventOptions, + TraceSummary, +} from "./eventRepository.types"; +import { originalRunIdCache } from "./originalRunIdCache.server"; const MAX_FLUSH_DEPTH = 5; -export type CreatableEvent = Omit< - Prisma.TaskEventCreateInput, - "id" | "createdAt" | "properties" | "metadata" | "style" | "output" | "payload" -> & { - properties: Attributes; - metadata: Attributes | undefined; - style: Attributes | undefined; - output: Attributes | string | boolean | number | undefined; - payload: Attributes | string | boolean | number | undefined; -}; - -export type CreatableEventKind = TaskEventKind; -export type CreatableEventStatus = TaskEventStatus; -export type CreatableEventEnvironmentType = CreatableEvent["environmentType"]; - -export type CompleteableTaskRun = Pick< - TaskRun, - | "friendlyId" - | "traceId" - | "spanId" - | "parentSpanId" - | "createdAt" - | "completedAt" - | "taskIdentifier" - | "projectId" - | "runtimeEnvironmentId" - | "organizationId" - | "environmentType" - | "isTest" ->; - -export type TraceAttributes = Partial< - Pick< - CreatableEvent, - | "attemptId" - | "isError" - | "isCancelled" - | "isDebug" - | "runId" - | "runIsTest" - | "output" - | "outputType" - | "metadata" - | "properties" - | "style" - | "queueId" - | "queueName" - | "batchId" - | "payload" - | "payloadType" - | "idempotencyKey" - > ->; - -export type SetAttribute = (key: keyof T, value: T[keyof T]) => void; - -export type TraceEventOptions = { - kind?: CreatableEventKind; - context?: Record; - spanParentAsLink?: boolean; - parentAsLinkType?: "trigger" | "replay"; - spanIdSeed?: string; - attributes: TraceAttributes; - environment: TaskEventEnvironment; - taskSlug: string; - startTime?: bigint; - endTime?: Date; - immediate?: boolean; -}; - -export type EventBuilder = { - traceId: string; - spanId: string; - setAttribute: SetAttribute; - stop: () => void; - failWithError: (error: TaskRunError) => void; -}; - -export type EventRepoConfig = { - batchSize: number; - batchInterval: number; - redis: RedisWithClusterOptions; - retentionInDays: number; - partitioningEnabled: boolean; - tracer?: Tracer; - minConcurrency?: number; - maxConcurrency?: number; - maxBatchSize?: number; - memoryPressureThreshold?: number; - loadSheddingThreshold?: number; - loadSheddingEnabled?: boolean; -}; - -export type QueryOptions = Prisma.TaskEventWhereInput; - -export type TaskEventRecord = TaskEvent; - -export type QueriedEvent = Prisma.TaskEventGetPayload<{ - select: { - spanId: true; - parentId: true; - runId: true; - idempotencyKey: true; - message: true; - style: true; - startTime: true; - duration: true; - isError: true; - isPartial: true; - isCancelled: true; - level: true; - events: true; - environmentType: true; - kind: true; - attemptNumber: true; - }; -}>; - -export type PreparedEvent = Omit & { - duration: number; - events: SpanEvents; - style: TaskEventStyle; -}; - -export type PreparedDetailedEvent = Omit & { - duration: number; - events: SpanEvents; - style: TaskEventStyle; -}; - -export type RunPreparedEvent = PreparedEvent & { - taskSlug?: string; -}; - -export type SpanLink = - | { - type: "run"; - icon?: string; - title: string; - runId: string; - } - | { - type: "span"; - icon?: string; - title: string; - traceId: string; - spanId: string; - }; - -export type SpanSummary = { - id: string; - parentId: string | undefined; - runId: string; - data: { - message: string; - style: TaskEventStyle; - events: SpanEvents; - startTime: Date; - duration: number; - isError: boolean; - isPartial: boolean; - isCancelled: boolean; - isDebug: boolean; - level: NonNullable; - environmentType: CreatableEventEnvironmentType; - }; -}; - -export type TraceSummary = { rootSpan: SpanSummary; spans: Array }; - -export type SpanDetailedSummary = { - id: string; - parentId: string | undefined; - message: string; - data: { - runId: string; - taskSlug?: string; - taskPath?: string; - events: SpanEvents; - startTime: Date; - duration: number; - isError: boolean; - isPartial: boolean; - isCancelled: boolean; - level: NonNullable; - environmentType: CreatableEventEnvironmentType; - workerVersion?: string; - queueName?: string; - machinePreset?: string; - properties?: Attributes; - output?: Attributes; - }; - children: Array; -}; - -export type TraceDetailedSummary = { - traceId: string; - rootSpan: SpanDetailedSummary; -}; - -export type UpdateEventOptions = { - attributes: TraceAttributes; - endTime?: Date; - immediate?: boolean; - events?: SpanEvents; -}; - -export class EventRepository { - private readonly _flushScheduler: DynamicFlushScheduler; +export class EventRepository implements IEventRepository { + private readonly _flushScheduler: DynamicFlushScheduler; private _randomIdGenerator = new RandomIdGenerator(); - private _redisPublishClient: RedisClient; private _subscriberCount = 0; private _tracer: Tracer; private _lastFlushedAt: Date | undefined; @@ -263,6 +84,10 @@ export class EventRepository { return this._flushScheduler.getStatus(); } + get maximumLiveReloadingSetting() { + return env.MAXIMUM_LIVE_RELOADING_EVENTS; + } + constructor( db: PrismaClient = prisma, readReplica: PrismaReplicaClient = $replica, @@ -278,33 +103,62 @@ export class EventRepository { memoryPressureThreshold: _config.memoryPressureThreshold, loadSheddingThreshold: _config.loadSheddingThreshold, loadSheddingEnabled: _config.loadSheddingEnabled, - isDroppableEvent: (event: CreatableEvent) => { + isDroppableEvent: (event: Prisma.TaskEventCreateManyInput) => { // Only drop LOG events during load shedding return event.kind === TaskEventKind.LOG; }, }); - this._redisPublishClient = createRedisClient("trigger:eventRepoPublisher", this._config.redis); this._tracer = _config.tracer ?? trace.getTracer("eventRepo", "0.0.1"); // Instantiate the store using the partitioning flag. this.taskEventStore = new TaskEventStore(db, readReplica); } - async insert(event: CreatableEvent) { - this._flushScheduler.addToBatch([event]); + #createableEventToPrismaEvent(event: CreateEventInput): Prisma.TaskEventCreateManyInput { + return { + message: event.message, + traceId: event.traceId, + spanId: event.spanId, + parentId: event.parentId, + isError: event.isError, + isPartial: event.isPartial, + isCancelled: event.isCancelled, + isDebug: false, + serviceName: "", + serviceNamespace: "", + level: event.level, + kind: event.kind, + status: event.status, + links: [], + events: event.events, + startTime: event.startTime, + duration: event.duration, + attemptNumber: event.attemptNumber, + environmentId: event.environmentId, + environmentType: event.environmentType, + organizationId: event.organizationId, + projectId: event.projectId, + projectRef: "", + runId: event.runId, + runIsTest: false, + taskSlug: event.taskSlug, + properties: event.properties as Prisma.InputJsonValue, + metadata: event.metadata as Prisma.InputJsonValue, + style: event.style as Prisma.InputJsonValue, + }; } - async insertImmediate(event: CreatableEvent) { - await this.#flushBatch(nanoid(), [event]); + private async insertImmediate(event: CreateEventInput) { + await this.#flushBatch(nanoid(), [this.#createableEventToPrismaEvent(event)]); } - async insertMany(events: CreatableEvent[]) { - this._flushScheduler.addToBatch(events); + async insertMany(events: CreateEventInput[]) { + this._flushScheduler.addToBatch(events.map(this.#createableEventToPrismaEvent)); } - async insertManyImmediate(events: CreatableEvent[]) { - return await this.#flushBatch(nanoid(), events); + async insertManyImmediate(events: CreateEventInput[]) { + await this.#flushBatchWithReturn(nanoid(), events.map(this.#createableEventToPrismaEvent)); } async completeSuccessfulRunEvent({ run, endTime }: { run: CompleteableTaskRun; endTime?: Date }) { @@ -312,33 +166,26 @@ export class EventRepository { await this.insertImmediate({ message: run.taskIdentifier, - serviceName: "api server", - serviceNamespace: "trigger.dev", level: "TRACE", kind: "SERVER", traceId: run.traceId, spanId: run.spanId, parentId: run.parentSpanId, runId: run.friendlyId, - taskSlug: run.taskIdentifier, - projectRef: "", projectId: run.projectId, + taskSlug: run.taskIdentifier, environmentId: run.runtimeEnvironmentId, - environmentType: run.environmentType ?? "DEVELOPMENT", + environmentType: "DEVELOPMENT", organizationId: run.organizationId ?? "", isPartial: false, isError: false, isCancelled: false, status: "OK", - runIsTest: run.isTest, startTime, properties: {}, metadata: undefined, style: undefined, duration: calculateDurationFromStart(startTime, endTime ?? new Date()), - output: undefined, - payload: undefined, - payloadType: undefined, }); } @@ -363,33 +210,26 @@ export class EventRepository { await this.insertImmediate({ message: run.taskIdentifier, - serviceName: "api server", - serviceNamespace: "trigger.dev", level: "TRACE", kind: "SERVER", traceId: blockedRun.traceId, spanId: spanId, parentId: parentSpanId, runId: blockedRun.friendlyId, - taskSlug: run.taskIdentifier, - projectRef: "", projectId: run.projectId, + taskSlug: run.taskIdentifier, environmentId: run.runtimeEnvironmentId, - environmentType: run.environmentType ?? "DEVELOPMENT", + environmentType: "DEVELOPMENT", organizationId: run.organizationId ?? "", isPartial: false, isError, isCancelled: false, status: "OK", - runIsTest: run.isTest, startTime, properties: {}, metadata: undefined, style: undefined, duration: calculateDurationFromStart(startTime, endTime ?? new Date()), - output: undefined, - payload: undefined, - payloadType: undefined, }); } @@ -406,25 +246,21 @@ export class EventRepository { await this.insertImmediate({ message: run.taskIdentifier, - serviceName: "api server", - serviceNamespace: "trigger.dev", level: "TRACE", kind: "SERVER", traceId: run.traceId, spanId: run.spanId, parentId: run.parentSpanId, runId: run.friendlyId, - taskSlug: run.taskIdentifier, - projectRef: "", projectId: run.projectId, + taskSlug: run.taskIdentifier, environmentId: run.runtimeEnvironmentId, - environmentType: run.environmentType ?? "DEVELOPMENT", + environmentType: "DEVELOPMENT", organizationId: run.organizationId ?? "", isPartial: false, isError: true, isCancelled: false, status: "ERROR", - runIsTest: run.isTest, startTime, events: [ { @@ -439,9 +275,6 @@ export class EventRepository { metadata: undefined, style: undefined, duration: calculateDurationFromStart(startTime, endTime ?? new Date()), - output: undefined, - payload: undefined, - payloadType: undefined, }); } @@ -458,25 +291,21 @@ export class EventRepository { await this.insertImmediate({ message: run.taskIdentifier, - serviceName: "api server", - serviceNamespace: "trigger.dev", level: "TRACE", kind: "SERVER", traceId: run.traceId, spanId: run.spanId, parentId: run.parentSpanId, runId: run.friendlyId, - taskSlug: run.taskIdentifier, - projectRef: "", projectId: run.projectId, + taskSlug: run.taskIdentifier, environmentId: run.runtimeEnvironmentId, - environmentType: run.environmentType ?? "DEVELOPMENT", + environmentType: "DEVELOPMENT", organizationId: run.organizationId ?? "", isPartial: false, isError: true, isCancelled: false, status: "ERROR", - runIsTest: run.isTest, startTime, events: [ { @@ -493,9 +322,6 @@ export class EventRepository { metadata: undefined, style: undefined, duration: calculateDurationFromStart(startTime, endTime ?? new Date()), - output: undefined, - payload: undefined, - payloadType: undefined, }); } @@ -514,25 +340,21 @@ export class EventRepository { await this.insertImmediate({ message: run.taskIdentifier, - serviceName: "api server", - serviceNamespace: "trigger.dev", level: "TRACE", kind: "UNSPECIFIED", // This will be treated as an "invisible" event traceId: run.traceId, spanId: run.spanId, parentId: run.parentSpanId, runId: run.friendlyId, - taskSlug: run.taskIdentifier, - projectRef: "", projectId: run.projectId, + taskSlug: run.taskIdentifier, environmentId: run.runtimeEnvironmentId, - environmentType: run.environmentType ?? "DEVELOPMENT", + environmentType: "DEVELOPMENT", organizationId: run.organizationId ?? "", isPartial: true, isError: false, isCancelled: false, status: "OK", - runIsTest: run.isTest, startTime, events: [ { @@ -549,9 +371,6 @@ export class EventRepository { metadata: undefined, style: undefined, duration: calculateDurationFromStart(startTime, endTime ?? new Date()), - output: undefined, - payload: undefined, - payloadType: undefined, }); } @@ -568,25 +387,21 @@ export class EventRepository { await this.insertImmediate({ message: run.taskIdentifier, - serviceName: "api server", - serviceNamespace: "trigger.dev", level: "TRACE", kind: "SERVER", traceId: run.traceId, spanId: run.spanId, parentId: run.parentSpanId, runId: run.friendlyId, - taskSlug: run.taskIdentifier, - projectRef: "", projectId: run.projectId, + taskSlug: run.taskIdentifier, environmentId: run.runtimeEnvironmentId, - environmentType: run.environmentType ?? "DEVELOPMENT", + environmentType: "DEVELOPMENT", organizationId: run.organizationId ?? "", isPartial: false, isError: true, isCancelled: true, status: "ERROR", - runIsTest: run.isTest, events: [ { name: "cancellation", @@ -601,55 +416,12 @@ export class EventRepository { metadata: undefined, style: undefined, duration: calculateDurationFromStart(startTime, cancelledAt), - output: undefined, - payload: undefined, - payloadType: undefined, - }); - } - - async crashEvent({ - event, - crashedAt, - exception, - }: { - event: TaskEventRecord; - crashedAt: Date; - exception: ExceptionEventProperties; - }) { - if (!event.isPartial) { - return; - } - - await this.insertImmediate({ - ...omit(event, "id"), - isPartial: false, - isError: true, - isCancelled: false, - status: "ERROR", - links: event.links ?? [], - events: [ - { - name: "exception", - time: crashedAt, - properties: { - exception, - }, - } satisfies ExceptionSpanEvent, - ...((event.events as any[]) ?? []), - ], - duration: calculateDurationFromStart(event.startTime, crashedAt), - properties: event.properties as Attributes, - metadata: event.metadata as Attributes, - style: event.style as Attributes, - output: event.output as Attributes, - outputType: event.outputType, - payload: event.payload as Attributes, - payloadType: event.payloadType, }); } public async getTraceSummary( storeTable: TaskEventStoreTable, + environmentId: string, traceId: string, startCreatedAt: Date, endCreatedAt?: Date, @@ -733,7 +505,6 @@ export class EventRepository { id: event.spanId, parentId: event.parentId ?? undefined, runId: event.runId, - idempotencyKey: event.idempotencyKey, data: { message: event.message, style: event.style, @@ -745,7 +516,6 @@ export class EventRepository { startTime: getDateFromNanoseconds(event.startTime), level: event.level, events, - environmentType: event.environmentType, }, }; @@ -773,6 +543,7 @@ export class EventRepository { public async getTraceDetailedSummary( storeTable: TaskEventStoreTable, + environmentId: string, traceId: string, startCreatedAt: Date, endCreatedAt?: Date, @@ -862,7 +633,6 @@ export class EventRepository { ? overrides.isError : event.isError; - const output = event.output ? (event.output as Attributes) : undefined; const properties = event.properties ? removePrivateProperties(event.properties as Attributes) : {}; @@ -870,11 +640,10 @@ export class EventRepository { const spanDetailedSummary: SpanDetailedSummary = { id: event.spanId, parentId: event.parentId ?? undefined, - message: event.message, + runId: event.runId, data: { - runId: event.runId, + message: event.message, taskSlug: event.taskSlug ?? undefined, - taskPath: event.taskPath ?? undefined, events: events?.filter((e) => !e.name.startsWith("trigger.dev")), startTime: getDateFromNanoseconds(event.startTime), duration: nanosecondsToMilliseconds(duration), @@ -882,12 +651,7 @@ export class EventRepository { isPartial, isCancelled, level: event.level, - environmentType: event.environmentType, - workerVersion: event.workerVersion ?? undefined, - queueName: event.queueName ?? undefined, - machinePreset: event.machinePreset ?? undefined, properties, - output, }, children: [], }; @@ -920,6 +684,8 @@ export class EventRepository { public async getRunEvents( storeTable: TaskEventStoreTable, + environmentId: string, + traceId: string, runId: string, startCreatedAt: Date, endCreatedAt?: Date @@ -970,22 +736,16 @@ export class EventRepository { // A Span can be cancelled if it is partial and has a parent that is cancelled // And a span's duration, if it is partial and has a cancelled parent, is the time between the start of the span and the time of the cancellation event of the parent - public async getSpan({ - storeTable, - spanId, - environmentId, - startCreatedAt, - endCreatedAt, - options, - }: { - storeTable: TaskEventStoreTable; - spanId: string; - environmentId: string; - startCreatedAt: Date; - endCreatedAt?: Date; - options?: { includeDebugLogs?: boolean }; - }) { - return await startActiveSpan("getSpan", async () => { + public async getSpan( + storeTable: TaskEventStoreTable, + environmentId: string, + spanId: string, + traceId: string, + startCreatedAt: Date, + endCreatedAt?: Date, + options?: { includeDebugLogs?: boolean } + ): Promise { + return await startActiveSpan("getSpan", async (s) => { const spanEvent = await this.#getSpanEvent({ storeTable, spanId, @@ -1009,61 +769,15 @@ export class EventRepository { endCreatedAt ); - const output = rehydrateJson(spanEvent.output); - const payload = rehydrateJson(spanEvent.payload); - - const show = rehydrateShow(spanEvent.properties); - const properties = sanitizedAttributes(spanEvent.properties); - const messagingEvent = SpanMessagingEvent.optional().safeParse( - (properties as any)?.messaging - ); - - const links: SpanLink[] = []; - - if (messagingEvent.success && messagingEvent.data) { - if (messagingEvent.data.message && "id" in messagingEvent.data.message) { - if (messagingEvent.data.message.id.startsWith("run_")) { - links.push({ - type: "run", - icon: "runs", - title: `Run ${messagingEvent.data.message.id}`, - runId: messagingEvent.data.message.id, - }); - } - } - } - - const backLinks = spanEvent.links as any as Link[] | undefined; - - if (backLinks && backLinks.length > 0) { - backLinks.forEach((l) => { - const title = String( - l.attributes?.[SemanticInternalAttributes.LINK_TITLE] ?? "Triggered by" - ); - - links.push({ - type: "span", - icon: "trigger", - title, - traceId: l.context.traceId, - spanId: l.context.spanId, - }); - }); - } - const spanEvents = transformEvents( span.data.events, spanEvent.metadata as Attributes, spanEvent.environmentType === "DEVELOPMENT" ); - const originalRun = rehydrateAttribute( - spanEvent.properties, - SemanticInternalAttributes.ORIGINAL_RUN_ID - ); - + // Used for waitpoint token spans const entity = { type: rehydrateAttribute( spanEvent.properties, @@ -1073,20 +787,70 @@ export class EventRepository { }; return { - ...spanEvent, - ...span.data, - payload, - output, - properties, + // Core Identity & Structure + spanId: spanEvent.spanId, + parentId: spanEvent.parentId, + message: spanEvent.message, + + // Status & State + isError: span.data.isError, + isPartial: span.data.isPartial, + isCancelled: span.data.isCancelled, + level: spanEvent.level, + kind: spanEvent.kind, + + // Timing + startTime: span.data.startTime, + duration: nanosecondsToMilliseconds(span.data.duration), + + // Content & Display events: spanEvents, - show, - links, - originalRun, + style: span.data.style, + properties: properties, + + // Entity & Relationships entity, + + // Additional properties + metadata: spanEvent.metadata, }; }); } + async getSpanOriginalRunId( + storeTable: TaskEventStoreTable, + environmentId: string, + spanId: string, + traceId: string, + startCreatedAt: Date, + endCreatedAt?: Date + ): Promise { + return await startActiveSpan("getSpanOriginalRunId", async (s) => { + return await originalRunIdCache.swr(traceId, spanId, async () => { + const spanEvent = await this.#getSpanEvent({ + storeTable, + spanId, + environmentId, + startCreatedAt, + endCreatedAt, + options: { includeDebugLogs: false }, + }); + + if (!spanEvent) { + return; + } + // This is used when the span is a cached run (because of idempotency key) + // so this span isn't the actual run span, but points to the original run + const originalRun = rehydrateAttribute( + spanEvent.properties, + SemanticInternalAttributes.ORIGINAL_RUN_ID + ); + + return originalRun; + }); + }); + } + async #createSpanFromEvent( storeTable: TaskEventStoreTable, event: PreparedEvent, @@ -1174,7 +938,6 @@ export class EventRepository { id: event.spanId, parentId: event.parentId ?? undefined, runId: event.runId, - idempotencyKey: event.idempotencyKey, data: { message: event.message, style: event.style, @@ -1185,7 +948,6 @@ export class EventRepository { startTime: getDateFromNanoseconds(event.startTime), level: event.level, events, - environmentType: event.environmentType, }, }; @@ -1356,12 +1118,12 @@ export class EventRepository { options.duration ?? (options.endTime ? calculateDurationFromStart(startTime, options.endTime) : 100); - const traceId = propagatedContext?.traceparent?.traceId ?? this.generateTraceId(); + const traceId = propagatedContext?.traceparent?.traceId ?? generateTraceId(); const parentId = options.parentId ?? propagatedContext?.traceparent?.spanId; const tracestate = propagatedContext?.tracestate; const spanId = options.spanIdSeed - ? this.#generateDeterministicSpanId(traceId, options.spanIdSeed) - : this.generateSpanId(); + ? generateDeterministicSpanId(traceId, options.spanIdSeed) + : generateSpanId(); const metadata = { [SemanticInternalAttributes.ENVIRONMENT_ID]: options.environment.id, @@ -1370,8 +1132,6 @@ export class EventRepository { [SemanticInternalAttributes.PROJECT_ID]: options.environment.projectId, [SemanticInternalAttributes.PROJECT_REF]: options.environment.project.externalRef, [SemanticInternalAttributes.RUN_ID]: options.attributes.runId, - [SemanticInternalAttributes.RUN_IS_TEST]: options.attributes.runIsTest ?? false, - [SemanticInternalAttributes.BATCH_ID]: options.attributes.batchId ?? undefined, [SemanticInternalAttributes.TASK_SLUG]: options.taskSlug, [SemanticResourceAttributes.SERVICE_NAME]: "api server", [SemanticResourceAttributes.SERVICE_NAMESPACE]: "trigger.dev", @@ -1386,14 +1146,11 @@ export class EventRepository { throw new Error("runId is required"); } - const event: CreatableEvent = { + const event: CreateEventInput = { traceId, spanId, parentId, - tracestate: typeof tracestate === "string" ? tracestate : undefined, message: message, - serviceName: "api server", - serviceNamespace: "trigger.dev", level: options.attributes.isDebug ? "WARN" : "TRACE", kind: options.attributes.isDebug ? TaskEventKind.LOG : options.kind, status: "OK", @@ -1403,14 +1160,9 @@ export class EventRepository { environmentId: options.environment.id, environmentType: options.environment.type, organizationId: options.environment.organizationId, - projectId: options.environment.projectId, - projectRef: options.environment.project.externalRef, runId: options.attributes.runId, - runIsTest: options.attributes.runIsTest ?? false, + projectId: options.environment.projectId, taskSlug: options.taskSlug, - queueId: options.attributes.queueId, - queueName: options.attributes.queueName, - batchId: options.attributes.batchId ?? undefined, properties: { ...style, ...(flattenAttributes(metadata, SemanticInternalAttributes.METADATA) as Record< @@ -1421,19 +1173,13 @@ export class EventRepository { }, metadata: metadata, style: stripAttributePrefix(style, SemanticInternalAttributes.STYLE), - output: undefined, - outputType: undefined, - payload: undefined, - payloadType: undefined, }; if (options.immediate) { await this.insertImmediate(event); } else { - this._flushScheduler.addToBatch([event]); + this._flushScheduler.addToBatch([this.#createableEventToPrismaEvent(event)]); } - - return event; } public async traceEvent( @@ -1451,36 +1197,19 @@ export class EventRepository { const startTime = options.startTime ?? getNowInNanoseconds(); const traceId = options.spanParentAsLink - ? this.generateTraceId() - : propagatedContext?.traceparent?.traceId ?? this.generateTraceId(); + ? generateTraceId() + : propagatedContext?.traceparent?.traceId ?? generateTraceId(); const parentId = options.spanParentAsLink ? undefined : propagatedContext?.traceparent?.spanId; const tracestate = options.spanParentAsLink ? undefined : propagatedContext?.tracestate; const spanId = options.spanIdSeed - ? this.#generateDeterministicSpanId(traceId, options.spanIdSeed) - : this.generateSpanId(); + ? generateDeterministicSpanId(traceId, options.spanIdSeed) + : generateSpanId(); const traceContext = { ...options.context, traceparent: serializeTraceparent(traceId, spanId), }; - const links: Link[] = - options.spanParentAsLink && propagatedContext?.traceparent - ? [ - { - context: { - traceId: propagatedContext.traceparent.traceId, - spanId: propagatedContext.traceparent.spanId, - traceFlags: TraceFlags.SAMPLED, - }, - attributes: { - [SemanticInternalAttributes.LINK_TITLE]: - options.parentAsLinkType === "replay" ? "Replay of" : "Triggered by", - }, - }, - ] - : []; - let isStopped = false; let failedWithError: TaskRunError | undefined; @@ -1524,8 +1253,6 @@ export class EventRepository { [SemanticInternalAttributes.PROJECT_ID]: options.environment.projectId, [SemanticInternalAttributes.PROJECT_REF]: options.environment.project.externalRef, [SemanticInternalAttributes.RUN_ID]: options.attributes.runId, - [SemanticInternalAttributes.RUN_IS_TEST]: options.attributes.runIsTest ?? false, - [SemanticInternalAttributes.BATCH_ID]: options.attributes.batchId ?? undefined, [SemanticInternalAttributes.TASK_SLUG]: options.taskSlug, [SemanticResourceAttributes.SERVICE_NAME]: "api server", [SemanticResourceAttributes.SERVICE_NAMESPACE]: "trigger.dev", @@ -1542,17 +1269,14 @@ export class EventRepository { throw new Error("runId is required"); } - const event: CreatableEvent = { + const event: CreateEventInput = { traceId, spanId, parentId, - tracestate: typeof tracestate === "string" ? tracestate : undefined, duration: options.incomplete ? 0 : duration, isPartial: failedWithError ? false : options.incomplete, isError: options.isError === true || !!failedWithError, message: message, - serviceName: "api server", - serviceNamespace: "trigger.dev", level: "TRACE", kind: options.kind, status: failedWithError ? "ERROR" : "OK", @@ -1561,13 +1285,8 @@ export class EventRepository { environmentType: options.environment.type, organizationId: options.environment.organizationId, projectId: options.environment.projectId, - projectRef: options.environment.project.externalRef, runId: options.attributes.runId, - runIsTest: options.attributes.runIsTest ?? false, taskSlug: options.taskSlug, - queueId: options.attributes.queueId, - queueName: options.attributes.queueName, - batchId: options.attributes.batchId ?? undefined, properties: { ...(flattenAttributes(metadata, SemanticInternalAttributes.METADATA) as Record< string, @@ -1577,12 +1296,6 @@ export class EventRepository { }, metadata: metadata, style: stripAttributePrefix(style, SemanticInternalAttributes.STYLE), - output: undefined, - outputType: undefined, - links: links as unknown as Prisma.InputJsonValue, - payload: options.attributes.payload, - payloadType: options.attributes.payloadType, - idempotencyKey: options.attributes.idempotencyKey, events: failedWithError ? [ { @@ -1599,44 +1312,45 @@ export class EventRepository { if (options.immediate) { await this.insertImmediate(event); } else { - this._flushScheduler.addToBatch([event]); + this._flushScheduler.addToBatch([this.#createableEventToPrismaEvent(event)]); } return result; } - async subscribeToTrace(traceId: string) { - const redis = createRedisClient("trigger:eventRepoSubscriber", this._config.redis); - - const channel = `events:${traceId}`; + async #flushBatch(flushId: string, batch: Prisma.TaskEventCreateManyInput[]) { + await startSpan(this._tracer, "flushBatch", async (span) => { + const events = excludePartialEventsWithCorrespondingFullEvent(batch); - // Subscribe to the channel. - await redis.subscribe(channel); + span.setAttribute("flush_id", flushId); + span.setAttribute("event_count", events.length); + span.setAttribute("partial_event_count", batch.length - events.length); + span.setAttribute( + "last_flush_in_ms", + this._lastFlushedAt ? new Date().getTime() - this._lastFlushedAt.getTime() : 0 + ); - // Increment the subscriber count. - this._subscriberCount++; + const flushedEvents = await this.#doFlushBatch(flushId, events); - const eventEmitter = new EventEmitter(); + this._lastFlushedAt = new Date(); - // Define the message handler. - redis.on("message", (_, message) => { - eventEmitter.emit("message", message); - }); + if (flushedEvents.length !== events.length) { + logger.debug("[EventRepository][flushBatch] Failed to insert all events", { + attemptCount: events.length, + successCount: flushedEvents.length, + }); - // Return a function that can be used to unsubscribe. - const unsubscribe = async () => { - await redis.unsubscribe(channel); - redis.quit(); - this._subscriberCount--; - }; + span.setAttribute("failed_event_count", events.length - flushedEvents.length); + } - return { - unsubscribe, - eventEmitter, - }; + this.#publishToRedis(flushedEvents); + }); } - async #flushBatch(flushId: string, batch: CreatableEvent[]) { + async #flushBatchWithReturn( + flushId: string, + batch: Prisma.TaskEventCreateManyInput[] + ): Promise { return await startSpan(this._tracer, "flushBatch", async (span) => { const events = excludePartialEventsWithCorrespondingFullEvent(batch); @@ -1662,6 +1376,8 @@ export class EventRepository { } this.#publishToRedis(flushedEvents); + + return flushedEvents; }); } @@ -1671,9 +1387,9 @@ export class EventRepository { async #doFlushBatch( flushId: string, - events: CreatableEvent[], + events: Prisma.TaskEventCreateManyInput[], depth: number = 1 - ): Promise { + ): Promise { return await startSpan(this._tracer, "doFlushBatch", async (span) => { try { span.setAttribute("event_count", events.length); @@ -1750,41 +1466,10 @@ export class EventRepository { }); } - async #publishToRedis(events: CreatableEvent[]) { + async #publishToRedis(events: Prisma.TaskEventCreateManyInput[]) { if (events.length === 0) return; - const uniqueTraces = new Set(events.map((e) => `events:${e.traceId}`)); - - await Promise.allSettled( - Array.from(uniqueTraces).map((traceId) => - this._redisPublishClient.publish(traceId, new Date().toISOString()) - ) - ); - } - - public generateTraceId() { - return this._randomIdGenerator.generateTraceId(); - } - - public generateSpanId() { - return this._randomIdGenerator.generateSpanId(); - } - /** - * Returns a deterministically random 8-byte span ID formatted/encoded as a 16 lowercase hex - * characters corresponding to 64 bits, based on the trace ID and seed. - */ - #generateDeterministicSpanId(traceId: string, seed: string) { - const hash = createHash("sha1"); - hash.update(traceId); - hash.update(seed); - const buffer = hash.digest(); - let hexString = ""; - for (let i = 0; i < 8; i++) { - const val = buffer.readUInt8(i); - const str = val.toString(16).padStart(2, "0"); - hexString += str; - } - return hexString; + await tracePubSub.publish(events.map((e) => e.traceId)); } } @@ -1802,14 +1487,6 @@ function initializeEventRepo() { memoryPressureThreshold: env.EVENTS_MEMORY_PRESSURE_THRESHOLD, loadSheddingThreshold: env.EVENTS_LOAD_SHEDDING_THRESHOLD, loadSheddingEnabled: env.EVENTS_LOAD_SHEDDING_ENABLED === "1", - redis: { - port: env.PUBSUB_REDIS_PORT, - host: env.PUBSUB_REDIS_HOST, - username: env.PUBSUB_REDIS_USERNAME, - password: env.PUBSUB_REDIS_PASSWORD, - tlsDisabled: env.PUBSUB_REDIS_TLS_DISABLED === "true", - clusterMode: env.PUBSUB_REDIS_CLUSTER_MODE_ENABLED === "1", - }, }); new Gauge({ @@ -1885,56 +1562,14 @@ function initializeEventRepo() { return repo; } -export function stripAttributePrefix(attributes: Attributes, prefix: string) { - const result: Attributes = {}; - - for (const [key, value] of Object.entries(attributes)) { - if (key.startsWith(prefix)) { - result[key.slice(prefix.length + 1)] = value; - } else { - result[key] = value; - } - } - return result; -} - -export function createExceptionPropertiesFromError(error: TaskRunError): ExceptionEventProperties { - switch (error.type) { - case "BUILT_IN_ERROR": { - return { - type: error.name, - message: error.message, - stacktrace: error.stackTrace, - }; - } - case "CUSTOM_ERROR": { - return { - type: "Error", - message: error.raw, - }; - } - case "INTERNAL_ERROR": { - return { - type: "Internal error", - message: [error.code, error.message].filter(Boolean).join(": "), - stacktrace: error.stackTrace, - }; - } - case "STRING_ERROR": { - return { - type: "Error", - message: error.raw, - }; - } - } -} - /** * Filters out partial events from a batch of creatable events, excluding those that have a corresponding full event. * @param batch - The batch of creatable events to filter. * @returns The filtered array of creatable events, excluding partial events with corresponding full events. */ -function excludePartialEventsWithCorrespondingFullEvent(batch: CreatableEvent[]): CreatableEvent[] { +function excludePartialEventsWithCorrespondingFullEvent( + batch: Prisma.TaskEventCreateManyInput[] +): Prisma.TaskEventCreateManyInput[] { const partialEvents = batch.filter((event) => event.isPartial); const fullEvents = batch.filter((event) => !event.isPartial); @@ -1945,21 +1580,6 @@ function excludePartialEventsWithCorrespondingFullEvent(batch: CreatableEvent[]) ); } -export function extractContextFromCarrier(carrier: Record) { - const traceparent = carrier["traceparent"]; - const tracestate = carrier["tracestate"]; - - if (typeof traceparent !== "string") { - return undefined; - } - - return { - ...carrier, - traceparent: parseTraceparent(traceparent), - tracestate, - }; -} - function prepareEvent(event: QueriedEvent): PreparedEvent { return { ...event, @@ -1978,17 +1598,6 @@ function prepareDetailedEvent(event: DetailedTraceEvent): PreparedDetailedEvent }; } -function parseEventsField(events: Prisma.JsonValue): SpanEvents { - const unsafe = events - ? (events as any[]).map((e) => ({ - ...e, - properties: unflattenAttributes(e.properties as Attributes), - })) - : undefined; - - return unsafe as SpanEvents; -} - function parseStyleField(style: Prisma.JsonValue): TaskEventStyle { const unsafe = unflattenAttributes(style as Attributes); @@ -2152,30 +1761,6 @@ function sanitizedAttributes(json: Prisma.JsonValue) { return unflattenAttributes(withoutPrivateProperties); } -// removes keys that start with a $ sign. If there are no keys left, return undefined -function removePrivateProperties( - attributes: Attributes | undefined | null -): Attributes | undefined { - if (!attributes) { - return undefined; - } - - const result: Attributes = {}; - - for (const [key, value] of Object.entries(attributes)) { - if (key.startsWith("$")) { - continue; - } - - result[key] = value; - } - - if (Object.keys(result).length === 0) { - return undefined; - } - - return result; -} function transformEvents(events: SpanEvents, properties: Attributes, isDev: boolean): SpanEvents { return (events ?? []).map((event) => transformEvent(event, properties, isDev)); @@ -2216,82 +1801,6 @@ function transformException( }; } -function calculateDurationFromStart(startTime: bigint, endTime: Date = new Date()) { - const $endtime = typeof endTime === "string" ? new Date(endTime) : endTime; - - return Number(BigInt($endtime.getTime() * 1_000_000) - startTime); -} - -function getNowInNanoseconds(): bigint { - return BigInt(new Date().getTime() * 1_000_000); -} - -export function getDateFromNanoseconds(nanoseconds: bigint) { - return new Date(Number(nanoseconds) / 1_000_000); -} - -function convertDateToNanoseconds(date: Date) { - return BigInt(date.getTime()) * BigInt(1_000_000); -} - -function nanosecondsToMilliseconds(nanoseconds: bigint | number): number { - return Number(nanoseconds) / 1_000_000; -} - -function rehydrateJson(json: Prisma.JsonValue): any { - if (json === null) { - return undefined; - } - - if (json === NULL_SENTINEL) { - return null; - } - - if (typeof json === "string") { - return json; - } - - if (typeof json === "number") { - return json; - } - - if (typeof json === "boolean") { - return json; - } - - if (Array.isArray(json)) { - return json.map((item) => rehydrateJson(item)); - } - - if (typeof json === "object") { - return unflattenAttributes(json as Attributes); - } - - return null; -} - -function rehydrateShow(properties: Prisma.JsonValue): { actions?: boolean } | undefined { - if (properties === null || properties === undefined) { - return; - } - - if (typeof properties !== "object") { - return; - } - - if (Array.isArray(properties)) { - return; - } - - const actions = properties[SemanticInternalAttributes.SHOW_ACTIONS]; - - if (typeof actions === "boolean") { - return { actions }; - } - - return; -} - export function rehydrateAttribute( properties: Prisma.JsonValue, key: string @@ -2317,119 +1826,6 @@ export function rehydrateAttribute( return value as T; } -export async function findRunForEventCreation(runId: string) { - return prisma.taskRun.findFirst({ - where: { - id: runId, - }, - select: { - friendlyId: true, - taskIdentifier: true, - traceContext: true, - runtimeEnvironment: { - select: { - id: true, - type: true, - organizationId: true, - projectId: true, - project: { - select: { - externalRef: true, - }, - }, - }, - }, - }, - }); -} - -export async function recordRunEvent( - runId: string, - message: string, - options: Omit & { - duration?: number; - parentId?: string; - startTime?: Date; - } -): Promise< - | { - success: true; - } - | { - success: false; - code: "RUN_NOT_FOUND" | "FAILED_TO_RECORD_EVENT"; - error?: unknown; - } -> { - try { - const foundRun = await findRunForEventCreation(runId); - - if (!foundRun) { - logger.error("Failed to find run for event creation", { runId }); - return { - success: false, - code: "RUN_NOT_FOUND", - }; - } - - const { attributes, startTime, ...optionsRest } = options; - - await eventRepository.recordEvent(message, { - environment: foundRun.runtimeEnvironment, - taskSlug: foundRun.taskIdentifier, - context: foundRun.traceContext as Record, - attributes: { - runId: foundRun.friendlyId, - ...attributes, - }, - startTime: BigInt((startTime?.getTime() ?? Date.now()) * 1_000_000), - ...optionsRest, - }); - - return { - success: true, - }; - } catch (error) { - logger.error("Failed to record event for run", { - error: error instanceof Error ? error.message : error, - runId, - }); - - return { - success: false, - code: "FAILED_TO_RECORD_EVENT", - error, - }; - } -} - -export async function recordRunDebugLog( - runId: string, - message: string, - options: Omit & { - duration?: number; - parentId?: string; - startTime?: Date; - } -): Promise< - | { - success: true; - } - | { - success: false; - code: "RUN_NOT_FOUND" | "FAILED_TO_RECORD_EVENT"; - error?: unknown; - } -> { - return recordRunEvent(runId, message, { - ...options, - attributes: { - ...options?.attributes, - isDebug: true, - }, - }); -} - /** * Extracts error details from Prisma errors in a type-safe way. * Only includes 'code' property for PrismaClientKnownRequestError. @@ -2488,16 +1884,6 @@ function isRetriablePrismaError( return false; } - -function isEmptyObject(obj: object) { - for (var prop in obj) { - if (Object.prototype.hasOwnProperty.call(obj, prop)) { - return false; - } - } - - return true; -} // Helper function to check if a field is empty/missing function isEmpty(value: any): boolean { if (value === null || value === undefined) return true; diff --git a/apps/webapp/app/v3/eventRepository/eventRepository.types.ts b/apps/webapp/app/v3/eventRepository/eventRepository.types.ts new file mode 100644 index 0000000000..cdacd15e38 --- /dev/null +++ b/apps/webapp/app/v3/eventRepository/eventRepository.types.ts @@ -0,0 +1,417 @@ +import { Attributes, Tracer } from "@opentelemetry/api"; +import type { + ExceptionEventProperties, + SpanEvents, + TaskEventEnvironment, + TaskEventStyle, + TaskRunError, +} from "@trigger.dev/core/v3"; +import type { + Prisma, + TaskEvent, + TaskEventKind, + TaskEventLevel, + TaskEventStatus, + TaskRun, +} from "@trigger.dev/database"; +import type { DetailedTraceEvent, TaskEventStoreTable } from "../taskEventStore.server"; +export type { ExceptionEventProperties }; + +// ============================================================================ +// Event Creation Types +// ============================================================================ + +export type CreateEventInput = Omit< + Prisma.TaskEventCreateInput, + | "id" + | "createdAt" + | "properties" + | "metadata" + | "style" + | "output" + | "payload" + | "serviceName" + | "serviceNamespace" + | "tracestate" + | "projectRef" + | "runIsTest" + | "workerId" + | "queueId" + | "queueName" + | "batchId" + | "taskPath" + | "taskExportName" + | "workerVersion" + | "idempotencyKey" + | "attemptId" + | "usageDurationMs" + | "usageCostInCents" + | "machinePreset" + | "machinePresetCpu" + | "machinePresetMemory" + | "machinePresetCentsPerMs" + | "links" +> & { + properties: Attributes; + metadata: Attributes | undefined; + style: Attributes | undefined; +}; + +export type CreatableEventKind = TaskEventKind; +export type CreatableEventStatus = TaskEventStatus; + +// ============================================================================ +// Task Run Types +// ============================================================================ + +export type CompleteableTaskRun = Pick< + TaskRun, + | "friendlyId" + | "traceId" + | "spanId" + | "parentSpanId" + | "createdAt" + | "completedAt" + | "taskIdentifier" + | "projectId" + | "runtimeEnvironmentId" + | "organizationId" + | "isTest" +>; + +// ============================================================================ +// Trace and Event Types +// ============================================================================ + +export type TraceAttributes = Partial< + Pick< + CreateEventInput, + "isError" | "isCancelled" | "isDebug" | "runId" | "metadata" | "properties" | "style" + > +>; + +export type SetAttribute = (key: keyof T, value: T[keyof T]) => void; + +export type TraceEventOptions = { + kind?: CreatableEventKind; + context?: Record; + spanParentAsLink?: boolean; + spanIdSeed?: string; + attributes: TraceAttributes; + environment: TaskEventEnvironment; + taskSlug: string; + startTime?: bigint; + endTime?: Date; + immediate?: boolean; +}; + +export type EventBuilder = { + traceId: string; + spanId: string; + setAttribute: SetAttribute; + stop: () => void; + failWithError: (error: TaskRunError) => void; +}; + +export type UpdateEventOptions = { + attributes: TraceAttributes; + endTime?: Date; + immediate?: boolean; + events?: SpanEvents; +}; + +// ============================================================================ +// Configuration Types +// ============================================================================ + +export type EventRepoConfig = { + batchSize: number; + batchInterval: number; + retentionInDays: number; + partitioningEnabled: boolean; + tracer?: Tracer; + minConcurrency?: number; + maxConcurrency?: number; + maxBatchSize?: number; + memoryPressureThreshold?: number; + loadSheddingThreshold?: number; + loadSheddingEnabled?: boolean; +}; + +// ============================================================================ +// Query Types +// ============================================================================ + +export type QueryOptions = Prisma.TaskEventWhereInput; + +export type TaskEventRecord = TaskEvent; + +export type QueriedEvent = Prisma.TaskEventGetPayload<{ + select: { + spanId: true; + parentId: true; + runId: true; + message: true; + style: true; + startTime: true; + duration: true; + isError: true; + isPartial: true; + isCancelled: true; + level: true; + events: true; + kind: true; + attemptNumber: true; + }; +}>; + +export type PreparedEvent = Omit & { + duration: number; + events: SpanEvents; + style: TaskEventStyle; +}; + +export type PreparedDetailedEvent = Omit & { + duration: number; + events: SpanEvents; + style: TaskEventStyle; +}; + +export type RunPreparedEvent = PreparedEvent & { + taskSlug?: string; +}; + +export type SpanDetail = { + // ============================================================================ + // Core Identity & Structure + // ============================================================================ + spanId: string; // Tree structure, span identification + parentId: string | null; // Tree hierarchy + message: string; // Displayed as span title + + // ============================================================================ + // Status & State + // ============================================================================ + isError: boolean; // Error status display, filtering, status icons + isPartial: boolean; // In-progress status display, timeline calculations + isCancelled: boolean; // Cancelled status display, status determination + level: TaskEventLevel; // Text styling, timeline rendering decisions + + // ============================================================================ + // Timing + // ============================================================================ + startTime: Date; // Timeline calculations, display + duration: number; // Timeline width, duration display, calculations + + // ============================================================================ + // Content & Display + // ============================================================================ + events: SpanEvents; // Timeline events, SpanEvents component + style: TaskEventStyle; // Icons, variants, accessories (RunIcon, SpanTitle) + properties: Record | string | number | boolean | null | undefined; // Displayed as JSON in span properties (CodeBlock) + + // ============================================================================ + // Entity & Relationships + // ============================================================================ + entity: { + // Used for entity type switching in SpanEntity + type: string | undefined; + id: string | undefined; + }; + + metadata: any; // Used by SpanPresenter for entity processing +}; + +// ============================================================================ +// Span and Link Types +// ============================================================================ + +export type SpanSummaryCommon = { + id: string; + parentId: string | undefined; + runId: string; + data: { + message: string; + events: SpanEvents; + startTime: Date; + duration: number; + isError: boolean; + isPartial: boolean; + isCancelled: boolean; + level: NonNullable; + attemptNumber?: number; + }; +}; + +export type SpanSummary = { + id: string; + parentId: string | undefined; + runId: string; + data: { + message: string; + style: TaskEventStyle; + events: SpanEvents; + startTime: Date; + duration: number; + isError: boolean; + isPartial: boolean; + isCancelled: boolean; + isDebug: boolean; + level: NonNullable; + attemptNumber?: number; + }; +}; + +export type SpanOverride = { + isCancelled?: boolean; + isError?: boolean; + duration?: number; + events?: SpanEvents; +}; + +export type TraceSummary = { + rootSpan: SpanSummary; + spans: Array; + overridesBySpanId?: Record; +}; + +export type SpanDetailedSummary = { + id: string; + parentId: string | undefined; + runId: string; + data: { + message: string; + taskSlug?: string; + events: SpanEvents; + startTime: Date; + duration: number; + isError: boolean; + isPartial: boolean; + isCancelled: boolean; + level: NonNullable; + attemptNumber?: number; + properties?: Attributes; + }; + children: Array; +}; + +export type TraceDetailedSummary = { + traceId: string; + rootSpan: SpanDetailedSummary; +}; + +// ============================================================================ +// Event Repository Interface +// ============================================================================ + +/** + * Interface for the EventRepository class. + * Defines the public API for managing task events, traces, and spans. + */ +export interface IEventRepository { + maximumLiveReloadingSetting: number; + // Event insertion methods + insertMany(events: CreateEventInput[]): Promise; + insertManyImmediate(events: CreateEventInput[]): Promise; + + // Run event completion methods + completeSuccessfulRunEvent(params: { run: CompleteableTaskRun; endTime?: Date }): Promise; + + completeCachedRunEvent(params: { + run: CompleteableTaskRun; + blockedRun: CompleteableTaskRun; + spanId: string; + parentSpanId: string; + spanCreatedAt: Date; + isError: boolean; + endTime?: Date; + }): Promise; + + completeFailedRunEvent(params: { + run: CompleteableTaskRun; + endTime?: Date; + exception: { message?: string; type?: string; stacktrace?: string }; + }): Promise; + + completeExpiredRunEvent(params: { + run: CompleteableTaskRun; + endTime?: Date; + ttl: string; + }): Promise; + + createAttemptFailedRunEvent(params: { + run: CompleteableTaskRun; + endTime?: Date; + attemptNumber: number; + exception: { message?: string; type?: string; stacktrace?: string }; + }): Promise; + + cancelRunEvent(params: { + reason: string; + run: CompleteableTaskRun; + cancelledAt: Date; + }): Promise; + + // Query methods + getTraceSummary( + storeTable: TaskEventStoreTable, + environmentId: string, + traceId: string, + startCreatedAt: Date, + endCreatedAt?: Date, + options?: { includeDebugLogs?: boolean } + ): Promise; + + getTraceDetailedSummary( + storeTable: TaskEventStoreTable, + environmentId: string, + traceId: string, + startCreatedAt: Date, + endCreatedAt?: Date, + options?: { includeDebugLogs?: boolean } + ): Promise; + + getRunEvents( + storeTable: TaskEventStoreTable, + environmentId: string, + traceId: string, + runId: string, + startCreatedAt: Date, + endCreatedAt?: Date + ): Promise; + + getSpan( + storeTable: TaskEventStoreTable, + environmentId: string, + spanId: string, + traceId: string, + startCreatedAt: Date, + endCreatedAt?: Date, + options?: { includeDebugLogs?: boolean } + ): Promise; + + getSpanOriginalRunId( + storeTable: TaskEventStoreTable, + environmentId: string, + spanId: string, + traceId: string, + startCreatedAt: Date, + endCreatedAt?: Date + ): Promise; + + // Event recording methods + recordEvent( + message: string, + options: TraceEventOptions & { duration?: number; parentId?: string } + ): Promise; + + traceEvent( + message: string, + options: TraceEventOptions & { incomplete?: boolean; isError?: boolean }, + callback: ( + e: EventBuilder, + traceContext: Record, + traceparent?: { traceId: string; spanId: string } + ) => Promise + ): Promise; +} diff --git a/apps/webapp/app/v3/eventRepository/index.server.ts b/apps/webapp/app/v3/eventRepository/index.server.ts new file mode 100644 index 0000000000..cda9e58940 --- /dev/null +++ b/apps/webapp/app/v3/eventRepository/index.server.ts @@ -0,0 +1,172 @@ +import { env } from "~/env.server"; +import { eventRepository } from "./eventRepository.server"; +import { clickhouseEventRepository } from "./clickhouseEventRepositoryInstance.server"; +import { IEventRepository, TraceEventOptions } from "./eventRepository.types"; +import { $replica } from "~/db.server"; +import { logger } from "~/services/logger.server"; +import { FEATURE_FLAG, flags } from "../featureFlags.server"; +import { getTaskEventStore } from "../taskEventStore.server"; + +export function resolveEventRepositoryForStore(store: string | undefined): IEventRepository { + const taskEventStore = store ?? env.EVENT_REPOSITORY_DEFAULT_STORE; + + if (taskEventStore === "clickhouse") { + return clickhouseEventRepository; + } + + return eventRepository; +} + +export async function getEventRepository( + featureFlags: Record | undefined +): Promise<{ repository: IEventRepository; store: string }> { + const taskEventRepository = await resolveTaskEventRepositoryFlag(featureFlags); + + if (taskEventRepository === "clickhouse") { + return { repository: clickhouseEventRepository, store: "clickhouse" }; + } + + return { repository: eventRepository, store: getTaskEventStore() }; +} + +async function resolveTaskEventRepositoryFlag( + featureFlags: Record | undefined +): Promise<"clickhouse" | "postgres"> { + const flag = await flags({ + key: FEATURE_FLAG.taskEventRepository, + defaultValue: env.EVENT_REPOSITORY_DEFAULT_STORE, + overrides: featureFlags, + }); + + if (flag === "clickhouse") { + return "clickhouse"; + } + + if (env.EVENT_REPOSITORY_CLICKHOUSE_ROLLOUT_PERCENT) { + const rolloutPercent = env.EVENT_REPOSITORY_CLICKHOUSE_ROLLOUT_PERCENT; + + const randomNumber = Math.random(); + + if (randomNumber < rolloutPercent) { + return "clickhouse"; + } + } + + return flag; +} + +export async function recordRunDebugLog( + runId: string, + message: string, + options: Omit & { + duration?: number; + parentId?: string; + startTime?: Date; + } +): Promise< + | { + success: true; + } + | { + success: false; + code: "RUN_NOT_FOUND" | "FAILED_TO_RECORD_EVENT"; + error?: unknown; + } +> { + return recordRunEvent(runId, message, { + ...options, + attributes: { + ...options?.attributes, + isDebug: true, + }, + }); +} + +async function recordRunEvent( + runId: string, + message: string, + options: Omit & { + duration?: number; + parentId?: string; + startTime?: Date; + } +): Promise< + | { + success: true; + } + | { + success: false; + code: "RUN_NOT_FOUND" | "FAILED_TO_RECORD_EVENT"; + error?: unknown; + } +> { + try { + const foundRun = await findRunForEventCreation(runId); + + if (!foundRun) { + logger.error("Failed to find run for event creation", { runId }); + return { + success: false, + code: "RUN_NOT_FOUND", + }; + } + + const $eventRepository = resolveEventRepositoryForStore(foundRun.taskEventStore); + + const { attributes, startTime, ...optionsRest } = options; + + await $eventRepository.recordEvent(message, { + environment: foundRun.runtimeEnvironment, + taskSlug: foundRun.taskIdentifier, + context: foundRun.traceContext as Record, + attributes: { + runId: foundRun.friendlyId, + ...attributes, + }, + startTime: BigInt((startTime?.getTime() ?? Date.now()) * 1_000_000), + ...optionsRest, + }); + + return { + success: true, + }; + } catch (error) { + logger.error("Failed to record event for run", { + error: error instanceof Error ? error.message : error, + runId, + }); + + return { + success: false, + code: "FAILED_TO_RECORD_EVENT", + error, + }; + } +} + +async function findRunForEventCreation(runId: string) { + return $replica.taskRun.findFirst({ + where: { + id: runId, + }, + select: { + friendlyId: true, + taskIdentifier: true, + traceContext: true, + taskEventStore: true, + runtimeEnvironment: { + select: { + id: true, + type: true, + organizationId: true, + projectId: true, + project: { + select: { + externalRef: true, + }, + }, + }, + }, + }, + }); +} diff --git a/apps/webapp/app/v3/eventRepository/originalRunIdCache.server.ts b/apps/webapp/app/v3/eventRepository/originalRunIdCache.server.ts new file mode 100644 index 0000000000..caad8885ef --- /dev/null +++ b/apps/webapp/app/v3/eventRepository/originalRunIdCache.server.ts @@ -0,0 +1,75 @@ +import { + createCache, + DefaultStatefulContext, + Namespace, + RedisCacheStore, + type UnkeyCache, +} from "@internal/cache"; +import type { RedisOptions } from "@internal/redis"; +import { env } from "~/env.server"; +import { singleton } from "~/utils/singleton"; + +export type OriginalRunIdCacheOptions = { + redisOptions: RedisOptions; +}; + +const ORIGINAL_RUN_ID_FRESH_TTL = 60000 * 60 * 24 * 30; // 30 days +const ORIGINAL_RUN_ID_STALE_TTL = 60000 * 60 * 24 * 31; // 31 days + +export class OriginalRunIdCache { + private readonly cache: UnkeyCache<{ + originalRunId: string; + }>; + + constructor(options: OriginalRunIdCacheOptions) { + // Initialize cache + const ctx = new DefaultStatefulContext(); + const redisCacheStore = new RedisCacheStore({ + name: "original-run-id-cache", + connection: { + ...options.redisOptions, + keyPrefix: "original-run-id-cache:", + }, + useModernCacheKeyBuilder: true, + }); + + this.cache = createCache({ + originalRunId: new Namespace(ctx, { + stores: [redisCacheStore], + fresh: ORIGINAL_RUN_ID_FRESH_TTL, + stale: ORIGINAL_RUN_ID_STALE_TTL, + }), + }); + } + + public async lookup(traceId: string, spanId: string) { + const result = await this.cache.originalRunId.get(`${traceId}:${spanId}`); + + return result.val; + } + + public async set(traceId: string, spanId: string, originalRunId: string) { + await this.cache.originalRunId.set(`${traceId}:${spanId}`, originalRunId); + } + + public async swr(traceId: string, spanId: string, callback: () => Promise) { + const result = await this.cache.originalRunId.swr(`${traceId}:${spanId}`, callback); + + return result.val; + } +} + +export const originalRunIdCache = singleton( + "originalRunIdCache", + () => + new OriginalRunIdCache({ + redisOptions: { + port: env.REDIS_PORT ?? undefined, + host: env.REDIS_HOST ?? undefined, + username: env.REDIS_USERNAME ?? undefined, + password: env.REDIS_PASSWORD ?? undefined, + enableAutoPipelining: true, + ...(env.REDIS_TLS_DISABLED === "true" ? {} : { tls: {} }), + }, + }) +); diff --git a/apps/webapp/app/v3/featureFlags.server.ts b/apps/webapp/app/v3/featureFlags.server.ts index f1bc913c42..72c4f67593 100644 --- a/apps/webapp/app/v3/featureFlags.server.ts +++ b/apps/webapp/app/v3/featureFlags.server.ts @@ -4,11 +4,13 @@ import { prisma, type PrismaClientOrTransaction } from "~/db.server"; export const FEATURE_FLAG = { defaultWorkerInstanceGroupId: "defaultWorkerInstanceGroupId", runsListRepository: "runsListRepository", + taskEventRepository: "taskEventRepository", } as const; const FeatureFlagCatalog = { [FEATURE_FLAG.defaultWorkerInstanceGroupId]: z.string(), [FEATURE_FLAG.runsListRepository]: z.enum(["clickhouse", "postgres"]), + [FEATURE_FLAG.taskEventRepository]: z.enum(["clickhouse", "postgres"]), }; type FeatureFlagKey = keyof typeof FeatureFlagCatalog; @@ -16,6 +18,7 @@ type FeatureFlagKey = keyof typeof FeatureFlagCatalog; export type FlagsOptions = { key: T; defaultValue?: z.infer<(typeof FeatureFlagCatalog)[T]>; + overrides?: Record; }; export function makeFlags(_prisma: PrismaClientOrTransaction = prisma) { @@ -34,7 +37,17 @@ export function makeFlags(_prisma: PrismaClientOrTransaction = prisma) { }, }); - const parsed = FeatureFlagCatalog[opts.key].safeParse(value?.value); + const flagSchema = FeatureFlagCatalog[opts.key]; + + if (opts.overrides?.[opts.key]) { + const parsed = flagSchema.safeParse(opts.overrides[opts.key]); + + if (parsed.success) { + return parsed.data; + } + } + + const parsed = flagSchema.safeParse(value?.value); if (!parsed.success) { return opts.defaultValue; diff --git a/apps/webapp/app/v3/handleSocketIo.server.ts b/apps/webapp/app/v3/handleSocketIo.server.ts index 3ed00f3e07..fbc5042840 100644 --- a/apps/webapp/app/v3/handleSocketIo.server.ts +++ b/apps/webapp/app/v3/handleSocketIo.server.ts @@ -22,7 +22,7 @@ import { findEnvironmentById } from "~/models/runtimeEnvironment.server"; import { authenticateApiRequestWithFailure } from "~/services/apiAuth.server"; import { logger } from "~/services/logger.server"; import { singleton } from "~/utils/singleton"; -import { recordRunDebugLog } from "./eventRepository.server"; +import { recordRunDebugLog } from "./eventRepository/index.server"; import { sharedQueueTasks } from "./marqs/sharedQueueConsumer.server"; import { engine } from "./runEngine.server"; import { CompleteAttemptService } from "./services/completeAttempt.server"; diff --git a/apps/webapp/app/v3/marqs/fairDequeuingStrategy.server.ts b/apps/webapp/app/v3/marqs/fairDequeuingStrategy.server.ts index b0b3fe89db..cbae7e8468 100644 --- a/apps/webapp/app/v3/marqs/fairDequeuingStrategy.server.ts +++ b/apps/webapp/app/v3/marqs/fairDequeuingStrategy.server.ts @@ -99,7 +99,13 @@ export class FairDequeuingStrategy implements MarQSFairDequeueStrategy { constructor(private options: FairDequeuingStrategyOptions) { const ctx = new DefaultStatefulContext(); - const memory = new MemoryStore({ persistentMap: new Map() }); + const memory = new MemoryStore({ + persistentMap: new Map(), + unstableEvictOnSet: { + frequency: 0.01, + maxItems: 500, + }, + }); this._cache = createCache({ concurrencyLimit: new Namespace(ctx, { diff --git a/apps/webapp/app/v3/marqs/sharedQueueConsumer.server.ts b/apps/webapp/app/v3/marqs/sharedQueueConsumer.server.ts index 20abf87b32..8cc10fd5c0 100644 --- a/apps/webapp/app/v3/marqs/sharedQueueConsumer.server.ts +++ b/apps/webapp/app/v3/marqs/sharedQueueConsumer.server.ts @@ -22,6 +22,7 @@ import { TaskRunSuccessfulExecutionResult, parsePacket, serverWebsocketMessages, + SemanticInternalAttributes, } from "@trigger.dev/core/v3"; import { ZodMessageSender } from "@trigger.dev/core/v3/zodMessageHandler"; import { @@ -1648,6 +1649,7 @@ export const AttemptForExecutionGetPayload = { baseCostInCents: true, maxDurationInSeconds: true, tags: true, + taskEventStore: true, }, }, queue: { @@ -1880,7 +1882,8 @@ class SharedQueueTasks { const variables = await this.#buildEnvironmentVariables( attempt.runtimeEnvironment, taskRun.id, - machinePreset + machinePreset, + taskRun.taskEventStore ?? undefined ); const payload: V3ProdTaskRunExecutionPayload = { @@ -2049,6 +2052,7 @@ class SharedQueueTasks { }, }, machinePreset: true, + taskEventStore: true, }, }); @@ -2071,7 +2075,12 @@ class SharedQueueTasks { const machinePreset = machinePresetFromRun(run) ?? machinePresetFromConfig(run.lockedBy?.machineConfig ?? {}); - const variables = await this.#buildEnvironmentVariables(environment, run.id, machinePreset); + const variables = await this.#buildEnvironmentVariables( + environment, + run.id, + machinePreset, + run.taskEventStore ?? undefined + ); return { traceContext: run.traceContext as Record, @@ -2178,7 +2187,8 @@ class SharedQueueTasks { async #buildEnvironmentVariables( environment: RuntimeEnvironmentForEnvRepo, runId: string, - machinePreset: MachinePreset + machinePreset: MachinePreset, + taskEventStore?: string ): Promise> { const variables = await resolveVariablesForEnvironment(environment); @@ -2187,6 +2197,14 @@ class SharedQueueTasks { machine_preset: machinePreset.name, }); + if (taskEventStore) { + const resourceAttributes = JSON.stringify({ + [SemanticInternalAttributes.TASK_EVENT_STORE]: taskEventStore, + }); + + variables.push(...[{ key: "OTEL_RESOURCE_ATTRIBUTES", value: resourceAttributes }]); + } + return [ ...variables, ...[ diff --git a/apps/webapp/app/v3/otlpExporter.server.ts b/apps/webapp/app/v3/otlpExporter.server.ts index 888a6f46bb..b36ad1a939 100644 --- a/apps/webapp/app/v3/otlpExporter.server.ts +++ b/apps/webapp/app/v3/otlpExporter.server.ts @@ -1,4 +1,4 @@ -import { SemanticResourceAttributes } from "@opentelemetry/semantic-conventions"; +import { trace, Tracer } from "@opentelemetry/api"; import { SemanticInternalAttributes } from "@trigger.dev/core/v3"; import { AnyValue, @@ -12,94 +12,107 @@ import { SeverityNumber, Span, Span_Event, - Span_Link, Span_SpanKind, Status_StatusCode, } from "@trigger.dev/otlp-importer"; -import { +import { logger } from "~/services/logger.server"; +import { ClickhouseEventRepository } from "./eventRepository/clickhouseEventRepository.server"; +import { clickhouseEventRepository } from "./eventRepository/clickhouseEventRepositoryInstance.server"; +import { generateSpanId } from "./eventRepository/common.server"; +import { EventRepository, eventRepository } from "./eventRepository/eventRepository.server"; +import type { CreatableEventKind, CreatableEventStatus, - EventRepository, - eventRepository, - type CreatableEvent, - CreatableEventEnvironmentType, -} from "./eventRepository.server"; -import { logger } from "~/services/logger.server"; -import { trace, Tracer } from "@opentelemetry/api"; + CreateEventInput, + IEventRepository, +} from "./eventRepository/eventRepository.types"; import { startSpan } from "./tracing.server"; import { enrichCreatableEvents } from "./utils/enrichCreatableEvents.server"; - -export type OTLPExporterConfig = { - batchSize: number; - batchInterval: number; -}; +import { env } from "~/env.server"; class OTLPExporter { private _tracer: Tracer; constructor( private readonly _eventRepository: EventRepository, + private readonly _clickhouseEventRepository: ClickhouseEventRepository, private readonly _verbose: boolean, private readonly _spanAttributeValueLengthLimit: number ) { this._tracer = trace.getTracer("otlp-exporter"); } - async exportTraces( - request: ExportTraceServiceRequest, - immediate: boolean = false - ): Promise { + async exportTraces(request: ExportTraceServiceRequest): Promise { return await startSpan(this._tracer, "exportTraces", async (span) => { this.#logExportTracesVerbose(request); - const events = this.#filterResourceSpans(request.resourceSpans).flatMap((resourceSpan) => { - return convertSpansToCreateableEvents(resourceSpan, this._spanAttributeValueLengthLimit); - }); - - const enrichedEvents = enrichCreatableEvents(events); - - this.#logEventsVerbose(enrichedEvents, "exportTraces"); + const eventsWithStores = this.#filterResourceSpans(request.resourceSpans).flatMap( + (resourceSpan) => { + return convertSpansToCreateableEvents(resourceSpan, this._spanAttributeValueLengthLimit); + } + ); - span.setAttribute("event_count", enrichedEvents.length); + const eventCount = await this.#exportEvents(eventsWithStores); - if (immediate) { - await this._eventRepository.insertManyImmediate(enrichedEvents); - } else { - await this._eventRepository.insertMany(enrichedEvents); - } + span.setAttribute("event_count", eventCount); return ExportTraceServiceResponse.create(); }); } - async exportLogs( - request: ExportLogsServiceRequest, - immediate: boolean = false - ): Promise { + async exportLogs(request: ExportLogsServiceRequest): Promise { return await startSpan(this._tracer, "exportLogs", async (span) => { this.#logExportLogsVerbose(request); - const events = this.#filterResourceLogs(request.resourceLogs).flatMap((resourceLog) => { - return convertLogsToCreateableEvents(resourceLog, this._spanAttributeValueLengthLimit); - }); + const eventsWithStores = this.#filterResourceLogs(request.resourceLogs).flatMap( + (resourceLog) => { + return convertLogsToCreateableEvents(resourceLog, this._spanAttributeValueLengthLimit); + } + ); + + const eventCount = await this.#exportEvents(eventsWithStores); + + span.setAttribute("event_count", eventCount); + + return ExportLogsServiceResponse.create(); + }); + } + + async #exportEvents( + eventsWithStores: { events: Array; taskEventStore: string }[] + ) { + const eventsGroupedByStore = eventsWithStores.reduce((acc, { events, taskEventStore }) => { + acc[taskEventStore] = acc[taskEventStore] || []; + acc[taskEventStore].push(...events); + return acc; + }, {} as Record>); + + let eventCount = 0; + + for (const [store, events] of Object.entries(eventsGroupedByStore)) { + const eventRepository = this.#getEventRepositoryForStore(store); const enrichedEvents = enrichCreatableEvents(events); - this.#logEventsVerbose(enrichedEvents, "exportLogs"); + this.#logEventsVerbose(enrichedEvents, `exportEvents ${store}`); - span.setAttribute("event_count", enrichedEvents.length); + eventCount += enrichedEvents.length; - if (immediate) { - await this._eventRepository.insertManyImmediate(enrichedEvents); - } else { - await this._eventRepository.insertMany(enrichedEvents); - } + await eventRepository.insertMany(enrichedEvents); + } - return ExportLogsServiceResponse.create(); - }); + return eventCount; } - #logEventsVerbose(events: CreatableEvent[], prefix: string) { + #getEventRepositoryForStore(store: string): IEventRepository { + if (store === "clickhouse") { + return this._clickhouseEventRepository; + } + + return this._eventRepository; + } + + #logEventsVerbose(events: CreateEventInput[], prefix: string) { if (!this._verbose) return; events.forEach((event) => { @@ -184,12 +197,16 @@ class OTLPExporter { function convertLogsToCreateableEvents( resourceLog: ResourceLogs, spanAttributeValueLengthLimit: number -): Array { +): { events: Array; taskEventStore: string } { const resourceAttributes = resourceLog.resource?.attributes ?? []; const resourceProperties = extractEventProperties(resourceAttributes); - return resourceLog.scopeLogs.flatMap((scopeLog) => { + const taskEventStore = + extractStringAttribute(resourceAttributes, [SemanticInternalAttributes.TASK_EVENT_STORE]) ?? + env.EVENT_REPOSITORY_DEFAULT_STORE; + + const events = resourceLog.scopeLogs.flatMap((scopeLog) => { return scopeLog.logRecords .map((log) => { const logLevel = logLevelToEventLevel(log.severityNumber); @@ -203,9 +220,24 @@ function convertLogsToCreateableEvents( SemanticInternalAttributes.METADATA ); + const properties = + convertKeyValueItemsToMap( + truncateAttributes(log.attributes ?? [], spanAttributeValueLengthLimit), + [], + undefined, + [ + SemanticInternalAttributes.USAGE, + SemanticInternalAttributes.SPAN, + SemanticInternalAttributes.METADATA, + SemanticInternalAttributes.STYLE, + SemanticInternalAttributes.METRIC_EVENTS, + SemanticInternalAttributes.TRIGGER, + ] + ) ?? {}; + return { traceId: binaryToHex(log.traceId), - spanId: eventRepository.generateSpanId(), + spanId: generateSpanId(), parentId: binaryToHex(log.spanId), message: isStringValue(log.body) ? log.body.stringValue.slice(0, 4096) @@ -216,64 +248,20 @@ function convertLogsToCreateableEvents( isError: logLevel === "ERROR", status: logLevelToEventStatus(log.severityNumber), startTime: log.timeUnixNano, - properties: { - ...convertKeyValueItemsToMap( - truncateAttributes(log.attributes ?? [], spanAttributeValueLengthLimit), - [SemanticInternalAttributes.SPAN_ID, SemanticInternalAttributes.SPAN_PARTIAL] - ), - }, + properties, style: convertKeyValueItemsToMap( pickAttributes(log.attributes ?? [], SemanticInternalAttributes.STYLE), [] ), - output: detectPrimitiveValue( - convertKeyValueItemsToMap( - pickAttributes(log.attributes ?? [], SemanticInternalAttributes.OUTPUT), - [] - ), - SemanticInternalAttributes.OUTPUT - ), - payload: detectPrimitiveValue( - convertKeyValueItemsToMap( - pickAttributes(log.attributes ?? [], SemanticInternalAttributes.PAYLOAD), - [] - ), - SemanticInternalAttributes.PAYLOAD - ), - metadata: logProperties.metadata ?? resourceProperties.metadata, - serviceName: logProperties.serviceName ?? resourceProperties.serviceName ?? "unknown", - serviceNamespace: - logProperties.serviceNamespace ?? resourceProperties.serviceNamespace ?? "unknown", + metadata: logProperties.metadata ?? resourceProperties.metadata ?? {}, environmentId: logProperties.environmentId ?? resourceProperties.environmentId ?? "unknown", - environmentType: - logProperties.environmentType ?? resourceProperties.environmentType ?? "DEVELOPMENT", + environmentType: "DEVELOPMENT" as const, // We've deprecated this but we need to keep it for backwards compatibility organizationId: logProperties.organizationId ?? resourceProperties.organizationId ?? "unknown", projectId: logProperties.projectId ?? resourceProperties.projectId ?? "unknown", - projectRef: logProperties.projectRef ?? resourceProperties.projectRef ?? "unknown", runId: logProperties.runId ?? resourceProperties.runId ?? "unknown", - runIsTest: logProperties.runIsTest ?? resourceProperties.runIsTest ?? false, taskSlug: logProperties.taskSlug ?? resourceProperties.taskSlug ?? "unknown", - taskPath: logProperties.taskPath ?? resourceProperties.taskPath ?? "unknown", - workerId: logProperties.workerId ?? resourceProperties.workerId ?? "unknown", - workerVersion: - logProperties.workerVersion ?? resourceProperties.workerVersion ?? "unknown", - queueId: logProperties.queueId ?? resourceProperties.queueId ?? "unknown", - queueName: logProperties.queueName ?? resourceProperties.queueName ?? "unknown", - batchId: logProperties.batchId ?? resourceProperties.batchId, - idempotencyKey: logProperties.idempotencyKey ?? resourceProperties.idempotencyKey, - machinePreset: logProperties.machinePreset ?? resourceProperties.machinePreset, - machinePresetCpu: logProperties.machinePresetCpu ?? resourceProperties.machinePresetCpu, - machinePresetMemory: - logProperties.machinePresetMemory ?? resourceProperties.machinePresetMemory, - machinePresetCentsPerMs: - logProperties.machinePresetCentsPerMs ?? resourceProperties.machinePresetCentsPerMs, - attemptId: - extractStringAttribute( - log.attributes ?? [], - [SemanticInternalAttributes.METADATA, SemanticInternalAttributes.ATTEMPT_ID].join(".") - ) ?? resourceProperties.attemptId, attemptNumber: extractNumberAttribute( log.attributes ?? [], @@ -285,17 +273,23 @@ function convertLogsToCreateableEvents( }) .filter(Boolean); }); + + return { events, taskEventStore }; } function convertSpansToCreateableEvents( resourceSpan: ResourceSpans, spanAttributeValueLengthLimit: number -): Array { +): { events: Array; taskEventStore: string } { const resourceAttributes = resourceSpan.resource?.attributes ?? []; const resourceProperties = extractEventProperties(resourceAttributes); - return resourceSpan.scopeSpans.flatMap((scopeSpan) => { + const taskEventStore = + extractStringAttribute(resourceAttributes, [SemanticInternalAttributes.TASK_EVENT_STORE]) ?? + env.EVENT_REPOSITORY_DEFAULT_STORE; + + const events = resourceSpan.scopeSpans.flatMap((scopeSpan) => { return scopeSpan.spans .map((span) => { const isPartial = isPartialSpan(span); @@ -309,6 +303,21 @@ function convertSpansToCreateableEvents( SemanticInternalAttributes.METADATA ); + const properties = + convertKeyValueItemsToMap( + truncateAttributes(span.attributes ?? [], spanAttributeValueLengthLimit), + [], + undefined, + [ + SemanticInternalAttributes.USAGE, + SemanticInternalAttributes.SPAN, + SemanticInternalAttributes.METADATA, + SemanticInternalAttributes.STYLE, + SemanticInternalAttributes.METRIC_EVENTS, + SemanticInternalAttributes.TRIGGER, + ] + ) ?? {}; + return { traceId: binaryToHex(span.traceId), spanId: isPartial @@ -326,76 +335,22 @@ function convertSpansToCreateableEvents( level: "TRACE" as const, status: spanStatusToEventStatus(span.status), startTime: span.startTimeUnixNano, - links: spanLinksToEventLinks(span.links ?? []), events: spanEventsToEventEvents(span.events ?? []), duration: span.endTimeUnixNano - span.startTimeUnixNano, - properties: { - ...convertKeyValueItemsToMap( - truncateAttributes(span.attributes ?? [], spanAttributeValueLengthLimit), - [SemanticInternalAttributes.SPAN_ID, SemanticInternalAttributes.SPAN_PARTIAL] - ), - }, + properties, style: convertKeyValueItemsToMap( pickAttributes(span.attributes ?? [], SemanticInternalAttributes.STYLE), [] ), - output: detectPrimitiveValue( - convertKeyValueItemsToMap( - pickAttributes(span.attributes ?? [], SemanticInternalAttributes.OUTPUT), - [] - ), - SemanticInternalAttributes.OUTPUT - ), - outputType: pickAttributeStringValue( - span.attributes ?? [], - SemanticInternalAttributes.OUTPUT_TYPE - ), - payload: detectPrimitiveValue( - convertKeyValueItemsToMap( - pickAttributes(span.attributes ?? [], SemanticInternalAttributes.PAYLOAD), - [] - ), - SemanticInternalAttributes.PAYLOAD - ), - payloadType: - pickAttributeStringValue( - span.attributes ?? [], - SemanticInternalAttributes.PAYLOAD_TYPE - ) ?? "application/json", - metadata: spanProperties.metadata ?? resourceProperties.metadata, - serviceName: spanProperties.serviceName ?? resourceProperties.serviceName ?? "unknown", - serviceNamespace: - spanProperties.serviceNamespace ?? resourceProperties.serviceNamespace ?? "unknown", + metadata: spanProperties.metadata ?? resourceProperties.metadata ?? {}, environmentId: spanProperties.environmentId ?? resourceProperties.environmentId ?? "unknown", - environmentType: - spanProperties.environmentType ?? resourceProperties.environmentType ?? "DEVELOPMENT", + environmentType: "DEVELOPMENT" as const, organizationId: spanProperties.organizationId ?? resourceProperties.organizationId ?? "unknown", projectId: spanProperties.projectId ?? resourceProperties.projectId ?? "unknown", - projectRef: spanProperties.projectRef ?? resourceProperties.projectRef ?? "unknown", runId: spanProperties.runId ?? resourceProperties.runId ?? "unknown", - runIsTest: spanProperties.runIsTest ?? resourceProperties.runIsTest ?? false, taskSlug: spanProperties.taskSlug ?? resourceProperties.taskSlug ?? "unknown", - taskPath: spanProperties.taskPath ?? resourceProperties.taskPath ?? "unknown", - workerId: spanProperties.workerId ?? resourceProperties.workerId ?? "unknown", - workerVersion: - spanProperties.workerVersion ?? resourceProperties.workerVersion ?? "unknown", - queueId: spanProperties.queueId ?? resourceProperties.queueId ?? "unknown", - queueName: spanProperties.queueName ?? resourceProperties.queueName ?? "unknown", - batchId: spanProperties.batchId ?? resourceProperties.batchId, - idempotencyKey: spanProperties.idempotencyKey ?? resourceProperties.idempotencyKey, - machinePreset: spanProperties.machinePreset ?? resourceProperties.machinePreset, - machinePresetCpu: spanProperties.machinePresetCpu ?? resourceProperties.machinePresetCpu, - machinePresetMemory: - spanProperties.machinePresetMemory ?? resourceProperties.machinePresetMemory, - machinePresetCentsPerMs: - spanProperties.machinePresetCentsPerMs ?? resourceProperties.machinePresetCentsPerMs, - attemptId: - extractStringAttribute( - span.attributes ?? [], - [SemanticInternalAttributes.METADATA, SemanticInternalAttributes.ATTEMPT_ID].join(".") - ) ?? resourceProperties.attemptId, attemptNumber: extractNumberAttribute( span.attributes ?? [], @@ -403,96 +358,32 @@ function convertSpansToCreateableEvents( "." ) ) ?? resourceProperties.attemptNumber, - usageDurationMs: - extractDoubleAttribute( - span.attributes ?? [], - SemanticInternalAttributes.USAGE_DURATION_MS - ) ?? - extractNumberAttribute( - span.attributes ?? [], - SemanticInternalAttributes.USAGE_DURATION_MS - ), - usageCostInCents: extractDoubleAttribute( - span.attributes ?? [], - SemanticInternalAttributes.USAGE_COST_IN_CENTS - ), }; }) .filter(Boolean); }); + + return { events, taskEventStore }; } function extractEventProperties(attributes: KeyValue[], prefix?: string) { return { - metadata: convertKeyValueItemsToMap(attributes, [SemanticInternalAttributes.TRIGGER]), - serviceName: extractStringAttribute(attributes, SemanticResourceAttributes.SERVICE_NAME), - serviceNamespace: extractStringAttribute( - attributes, - SemanticResourceAttributes.SERVICE_NAMESPACE - ), + metadata: convertSelectedKeyValueItemsToMap(attributes, [SemanticInternalAttributes.METADATA]), environmentId: extractStringAttribute(attributes, [ prefix, SemanticInternalAttributes.ENVIRONMENT_ID, ]), - environmentType: extractStringAttribute(attributes, [ - prefix, - SemanticInternalAttributes.ENVIRONMENT_TYPE, - ]) as CreatableEventEnvironmentType, organizationId: extractStringAttribute(attributes, [ prefix, SemanticInternalAttributes.ORGANIZATION_ID, ]), projectId: extractStringAttribute(attributes, [prefix, SemanticInternalAttributes.PROJECT_ID]), - projectRef: extractStringAttribute(attributes, [ - prefix, - SemanticInternalAttributes.PROJECT_REF, - ]), runId: extractStringAttribute(attributes, [prefix, SemanticInternalAttributes.RUN_ID]), - runIsTest: extractBooleanAttribute( - attributes, - [prefix, SemanticInternalAttributes.RUN_IS_TEST], - false - ), - attemptId: extractStringAttribute(attributes, [prefix, SemanticInternalAttributes.ATTEMPT_ID]), attemptNumber: extractNumberAttribute(attributes, [ prefix, SemanticInternalAttributes.ATTEMPT_NUMBER, ]), taskSlug: extractStringAttribute(attributes, [prefix, SemanticInternalAttributes.TASK_SLUG]), - taskPath: extractStringAttribute(attributes, [prefix, SemanticInternalAttributes.TASK_PATH]), - taskExportName: "@deprecated", - workerId: extractStringAttribute(attributes, [prefix, SemanticInternalAttributes.WORKER_ID]), - workerVersion: extractStringAttribute(attributes, [ - prefix, - SemanticInternalAttributes.WORKER_VERSION, - ]), - queueId: extractStringAttribute(attributes, [prefix, SemanticInternalAttributes.QUEUE_ID]), - queueName: extractStringAttribute(attributes, [prefix, SemanticInternalAttributes.QUEUE_NAME]), - batchId: extractStringAttribute(attributes, [prefix, SemanticInternalAttributes.BATCH_ID]), - idempotencyKey: extractStringAttribute(attributes, [ - prefix, - SemanticInternalAttributes.IDEMPOTENCY_KEY, - ]), - machinePreset: extractStringAttribute(attributes, [ - prefix, - SemanticInternalAttributes.MACHINE_PRESET_NAME, - ]), - machinePresetCpu: - extractDoubleAttribute(attributes, [prefix, SemanticInternalAttributes.MACHINE_PRESET_CPU]) ?? - extractNumberAttribute(attributes, [prefix, SemanticInternalAttributes.MACHINE_PRESET_CPU]), - machinePresetMemory: - extractDoubleAttribute(attributes, [ - prefix, - SemanticInternalAttributes.MACHINE_PRESET_MEMORY, - ]) ?? - extractNumberAttribute(attributes, [ - prefix, - SemanticInternalAttributes.MACHINE_PRESET_MEMORY, - ]), - machinePresetCentsPerMs: extractDoubleAttribute(attributes, [ - prefix, - SemanticInternalAttributes.MACHINE_PRESET_CENTS_PER_MS, - ]), }; } @@ -507,29 +398,64 @@ function pickAttributes(attributes: KeyValue[], prefix: string): KeyValue[] { }); } -function pickAttributeStringValue(attributes: KeyValue[], key: string): string | undefined { - const attribute = attributes.find((attribute) => attribute.key === key); +function convertKeyValueItemsToMap( + attributes: KeyValue[], + filteredKeys: string[] = [], + prefix?: string, + filteredPrefixes: string[] = [] +): Record | undefined { + if (!attributes) return; + if (!attributes.length) return; - if (!attribute) return undefined; + let filteredAttributes = attributes.filter((attribute) => !filteredKeys.includes(attribute.key)); - return isStringValue(attribute.value) ? attribute.value.stringValue : undefined; + if (!filteredAttributes.length) return; + + if (filteredPrefixes.length) { + filteredAttributes = filteredAttributes.filter( + (attribute) => !filteredPrefixes.some((prefix) => attribute.key.startsWith(prefix)) + ); + } + + if (!filteredAttributes.length) return; + + const result = filteredAttributes.reduce( + (map: Record, attribute) => { + map[`${prefix ? `${prefix}.` : ""}${attribute.key}`] = isStringValue(attribute.value) + ? attribute.value.stringValue + : isIntValue(attribute.value) + ? Number(attribute.value.intValue) + : isDoubleValue(attribute.value) + ? attribute.value.doubleValue + : isBoolValue(attribute.value) + ? attribute.value.boolValue + : isBytesValue(attribute.value) + ? binaryToHex(attribute.value.bytesValue) + : undefined; + + return map; + }, + {} + ); + + return result; } -function convertKeyValueItemsToMap( +function convertSelectedKeyValueItemsToMap( attributes: KeyValue[], - filteredKeys: string[] = [], + selectedPrefixes: string[] = [], prefix?: string ): Record | undefined { if (!attributes) return; if (!attributes.length) return; - const filteredAttributes = attributes.filter( - (attribute) => !filteredKeys.includes(attribute.key) + let selectedAttributes = attributes.filter((attribute) => + selectedPrefixes.some((prefix) => attribute.key.startsWith(prefix)) ); - if (!filteredAttributes.length) return; + if (!selectedAttributes.length) return; - const result = filteredAttributes.reduce( + const result = selectedAttributes.reduce( (map: Record, attribute) => { map[`${prefix ? `${prefix}.` : ""}${attribute.key}`] = isStringValue(attribute.value) ? attribute.value.stringValue @@ -564,18 +490,7 @@ function detectPrimitiveValue( return attributes; } -function spanLinksToEventLinks(links: Span_Link[]): CreatableEvent["links"] { - return links.map((link) => { - return { - traceId: binaryToHex(link.traceId), - spanId: binaryToHex(link.spanId), - tracestate: link.traceState, - properties: convertKeyValueItemsToMap(link.attributes ?? []), - }; - }); -} - -function spanEventsToEventEvents(events: Span_Event[]): CreatableEvent["events"] { +function spanEventsToEventEvents(events: Span_Event[]): CreateEventInput["events"] { return events.map((event) => { return { name: event.name, @@ -624,7 +539,7 @@ function spanKindToEventKind(kind: Span["kind"]): CreatableEventKind { } } -function logLevelToEventLevel(level: SeverityNumber): CreatableEvent["level"] { +function logLevelToEventLevel(level: SeverityNumber): CreateEventInput["level"] { switch (level) { case SeverityNumber.TRACE: case SeverityNumber.TRACE2: @@ -874,6 +789,7 @@ function truncateAttributes(attributes: KeyValue[], maximumLength: number = 1024 export const otlpExporter = new OTLPExporter( eventRepository, + clickhouseEventRepository, process.env.OTLP_EXPORTER_VERBOSE === "1", process.env.SERVER_OTEL_SPAN_ATTRIBUTE_VALUE_LENGTH_LIMIT ? parseInt(process.env.SERVER_OTEL_SPAN_ATTRIBUTE_VALUE_LENGTH_LIMIT, 10) diff --git a/apps/webapp/app/v3/runEngineHandlers.server.ts b/apps/webapp/app/v3/runEngineHandlers.server.ts index 726cd68ff7..7c0b7003e0 100644 --- a/apps/webapp/app/v3/runEngineHandlers.server.ts +++ b/apps/webapp/app/v3/runEngineHandlers.server.ts @@ -8,14 +8,11 @@ import { logger } from "~/services/logger.server"; import { updateMetadataService } from "~/services/metadata/updateMetadataInstance.server"; import { reportInvocationUsage } from "~/services/platform.v3.server"; import { MetadataTooLargeError } from "~/utils/packets"; -import { - createExceptionPropertiesFromError, - eventRepository, - recordRunDebugLog, -} from "./eventRepository.server"; import { roomFromFriendlyRunId, socketIo } from "./handleSocketIo.server"; import { engine } from "./runEngine.server"; import { PerformTaskRunAlertsService } from "./services/alerts/performTaskRunAlerts.server"; +import { resolveEventRepositoryForStore, recordRunDebugLog } from "./eventRepository/index.server"; +import { createExceptionPropertiesFromError } from "./eventRepository/common.server"; export function registerRunEngineEventBusHandlers() { engine.eventBus.on("runSucceeded", async ({ time, run }) => { @@ -38,6 +35,7 @@ export function registerRunEngineEventBusHandlers() { environmentType: true, isTest: true, organizationId: true, + taskEventStore: true, }, }) ); @@ -50,6 +48,8 @@ export function registerRunEngineEventBusHandlers() { return; } + const eventRepository = resolveEventRepositoryForStore(run.taskEventStore); + const [completeSuccessfulRunEventError] = await tryCatch( eventRepository.completeSuccessfulRunEvent({ run: taskRun, @@ -102,6 +102,7 @@ export function registerRunEngineEventBusHandlers() { environmentType: true, isTest: true, organizationId: true, + taskEventStore: true, }, }) ); @@ -114,6 +115,8 @@ export function registerRunEngineEventBusHandlers() { return; } + const eventRepository = resolveEventRepositoryForStore(taskRun.taskEventStore); + const [completeFailedRunEventError] = await tryCatch( eventRepository.completeFailedRunEvent({ run: taskRun, @@ -153,6 +156,7 @@ export function registerRunEngineEventBusHandlers() { environmentType: true, isTest: true, organizationId: true, + taskEventStore: true, }, }) ); @@ -165,6 +169,8 @@ export function registerRunEngineEventBusHandlers() { return; } + const eventRepository = resolveEventRepositoryForStore(taskRun.taskEventStore); + const [createAttemptFailedRunEventError] = await tryCatch( eventRepository.createAttemptFailedRunEvent({ run: taskRun, @@ -245,6 +251,7 @@ export function registerRunEngineEventBusHandlers() { environmentType: true, isTest: true, organizationId: true, + taskEventStore: true, }, }) ); @@ -263,6 +270,8 @@ export function registerRunEngineEventBusHandlers() { return; } + const eventRepository = resolveEventRepositoryForStore(blockedRun.taskEventStore); + const [completeCachedRunEventError] = await tryCatch( eventRepository.completeCachedRunEvent({ run: cachedRun, @@ -308,6 +317,7 @@ export function registerRunEngineEventBusHandlers() { environmentType: true, isTest: true, organizationId: true, + taskEventStore: true, }, }) ); @@ -320,6 +330,8 @@ export function registerRunEngineEventBusHandlers() { return; } + const eventRepository = resolveEventRepositoryForStore(taskRun.taskEventStore); + const [completeExpiredRunEventError] = await tryCatch( eventRepository.completeExpiredRunEvent({ run: taskRun, @@ -356,6 +368,7 @@ export function registerRunEngineEventBusHandlers() { environmentType: true, isTest: true, organizationId: true, + taskEventStore: true, }, }) ); @@ -368,6 +381,8 @@ export function registerRunEngineEventBusHandlers() { return; } + const eventRepository = resolveEventRepositoryForStore(taskRun.taskEventStore); + const error = createJsonErrorObject(run.error); const [cancelRunEventError] = await tryCatch( @@ -394,6 +409,8 @@ export function registerRunEngineEventBusHandlers() { retryMessage += ` after OOM`; } + const eventRepository = resolveEventRepositoryForStore(run.taskEventStore); + await eventRepository.recordEvent(retryMessage, { startTime: BigInt(time.getTime() * 1000000), taskSlug: run.taskIdentifier, @@ -407,7 +424,6 @@ export function registerRunEngineEventBusHandlers() { style: { icon: "schedule-attempt", }, - queueName: run.queue, }, context: run.traceContext as Record, endTime: retryAt, diff --git a/apps/webapp/app/v3/services/cancelAttempt.server.ts b/apps/webapp/app/v3/services/cancelAttempt.server.ts index 04d61f42f8..79b05ede26 100644 --- a/apps/webapp/app/v3/services/cancelAttempt.server.ts +++ b/apps/webapp/app/v3/services/cancelAttempt.server.ts @@ -1,11 +1,9 @@ import { $transaction, type PrismaClientOrTransaction, prisma } from "~/db.server"; import { type AuthenticatedEnvironment } from "~/services/apiAuth.server"; import { logger } from "~/services/logger.server"; -import { eventRepository } from "../eventRepository.server"; import { isCancellableRunStatus } from "../taskStatus"; import { BaseService } from "./baseService.server"; import { FinalizeTaskRunService } from "./finalizeTaskRun.server"; -import { getTaskEventStoreTableForRun } from "../taskEventStore.server"; export class CancelAttemptService extends BaseService { public async call( diff --git a/apps/webapp/app/v3/services/cancelTaskRun.server.ts b/apps/webapp/app/v3/services/cancelTaskRun.server.ts index ef2bc5ee1c..0c27a0b957 100644 --- a/apps/webapp/app/v3/services/cancelTaskRun.server.ts +++ b/apps/webapp/app/v3/services/cancelTaskRun.server.ts @@ -1,8 +1,5 @@ import { RunEngineVersion, type TaskRun } from "@trigger.dev/database"; -import { logger } from "~/services/logger.server"; -import { eventRepository } from "../eventRepository.server"; import { engine } from "../runEngine.server"; -import { getTaskEventStoreTableForRun } from "../taskEventStore.server"; import { BaseService } from "./baseService.server"; import { CancelTaskRunServiceV1 } from "./cancelTaskRunV1.server"; diff --git a/apps/webapp/app/v3/services/cancelTaskRunV1.server.ts b/apps/webapp/app/v3/services/cancelTaskRunV1.server.ts index ea152233ab..4b4482a1da 100644 --- a/apps/webapp/app/v3/services/cancelTaskRunV1.server.ts +++ b/apps/webapp/app/v3/services/cancelTaskRunV1.server.ts @@ -1,7 +1,6 @@ import { type Prisma } from "@trigger.dev/database"; import assertNever from "assert-never"; import { logger } from "~/services/logger.server"; -import { eventRepository } from "../eventRepository.server"; import { socketIo } from "../handleSocketIo.server"; import { devPubSub } from "../marqs/devPubSub.server"; import { CANCELLABLE_ATTEMPT_STATUSES, isCancellableRunStatus } from "../taskStatus"; @@ -11,6 +10,7 @@ import { CancelTaskAttemptDependenciesService } from "./cancelTaskAttemptDepende import { CancelableTaskRun } from "./cancelTaskRun.server"; import { FinalizeTaskRunService } from "./finalizeTaskRun.server"; import { tryCatch } from "@trigger.dev/core/utils"; +import { resolveEventRepositoryForStore } from "../eventRepository/index.server"; type ExtendedTaskRun = Prisma.TaskRunGetPayload<{ include: { @@ -101,6 +101,8 @@ export class CancelTaskRunServiceV1 extends BaseService { }, }); + const eventRepository = resolveEventRepositoryForStore(cancelledTaskRun.taskEventStore); + const [cancelRunEventError] = await tryCatch( eventRepository.cancelRunEvent({ reason: opts.reason, diff --git a/apps/webapp/app/v3/services/completeAttempt.server.ts b/apps/webapp/app/v3/services/completeAttempt.server.ts index 7998204b62..99169331a9 100644 --- a/apps/webapp/app/v3/services/completeAttempt.server.ts +++ b/apps/webapp/app/v3/services/completeAttempt.server.ts @@ -1,4 +1,4 @@ -import { Attributes } from "@opentelemetry/api"; +import { tryCatch } from "@trigger.dev/core/utils"; import { MachinePresetName, TaskRunContext, @@ -21,19 +21,17 @@ import { PrismaClientOrTransaction } from "~/db.server"; import { env } from "~/env.server"; import { AuthenticatedEnvironment } from "~/services/apiAuth.server"; import { logger } from "~/services/logger.server"; -import { safeJsonParse } from "~/utils/json"; import { marqs } from "~/v3/marqs/index.server"; -import { createExceptionPropertiesFromError, eventRepository } from "../eventRepository.server"; import { FailedTaskRunRetryHelper } from "../failedTaskRun.server"; import { socketIo } from "../handleSocketIo.server"; -import { getTaskEventStoreTableForRun } from "../taskEventStore.server"; +import { createExceptionPropertiesFromError } from "../eventRepository/common.server"; import { FAILED_RUN_STATUSES, isFinalAttemptStatus, isFinalRunStatus } from "../taskStatus"; import { BaseService } from "./baseService.server"; import { CancelAttemptService } from "./cancelAttempt.server"; import { CreateCheckpointService } from "./createCheckpoint.server"; import { FinalizeTaskRunService } from "./finalizeTaskRun.server"; import { RetryAttemptService } from "./retryAttempt.server"; -import { tryCatch } from "@trigger.dev/core/utils"; +import { resolveEventRepositoryForStore } from "../eventRepository/index.server"; type FoundAttempt = Awaited>; @@ -165,6 +163,8 @@ export class CompleteAttemptService extends BaseService { env, }); + const eventRepository = resolveEventRepositoryForStore(taskRunAttempt.taskRun.taskEventStore); + const [completeSuccessfulRunEventError] = await tryCatch( eventRepository.completeSuccessfulRunEvent({ run: taskRunAttempt.taskRun, @@ -316,6 +316,8 @@ export class CompleteAttemptService extends BaseService { exitRun(taskRunAttempt.taskRunId); } + const eventRepository = resolveEventRepositoryForStore(taskRunAttempt.taskRun.taskEventStore); + const [completeFailedRunEventError] = await tryCatch( eventRepository.completeFailedRunEvent({ run: taskRunAttempt.taskRun, @@ -536,6 +538,8 @@ export class CompleteAttemptService extends BaseService { }) { const retryAt = new Date(executionRetry.timestamp); + const eventRepository = resolveEventRepositoryForStore(taskRunAttempt.taskRun.taskEventStore); + // Retry the task run await eventRepository.recordEvent( `Retry #${execution.attempt.number} delay${oomMachine ? " after OOM" : ""}`, @@ -555,8 +559,6 @@ export class CompleteAttemptService extends BaseService { style: { icon: "schedule-attempt", }, - queueId: taskRunAttempt.queueId, - queueName: taskRunAttempt.taskRun.queue, }, context: taskRunAttempt.taskRun.traceContext as Record, spanIdSeed: `retry-${taskRunAttempt.number + 1}`, diff --git a/apps/webapp/app/v3/services/crashTaskRun.server.ts b/apps/webapp/app/v3/services/crashTaskRun.server.ts index 61ef107dab..9ed7d8b7aa 100644 --- a/apps/webapp/app/v3/services/crashTaskRun.server.ts +++ b/apps/webapp/app/v3/services/crashTaskRun.server.ts @@ -1,14 +1,13 @@ +import { tryCatch } from "@trigger.dev/core/utils"; +import { sanitizeError, TaskRunErrorCodes, TaskRunInternalError } from "@trigger.dev/core/v3"; import { TaskRun, TaskRunAttempt } from "@trigger.dev/database"; -import { eventRepository } from "../eventRepository.server"; -import { BaseService } from "./baseService.server"; -import { logger } from "~/services/logger.server"; import { AuthenticatedEnvironment } from "~/services/apiAuth.server"; +import { logger } from "~/services/logger.server"; +import { FailedTaskRunRetryHelper } from "../failedTaskRun.server"; import { CRASHABLE_ATTEMPT_STATUSES, isCrashableRunStatus } from "../taskStatus"; -import { sanitizeError, TaskRunErrorCodes, TaskRunInternalError } from "@trigger.dev/core/v3"; +import { BaseService } from "./baseService.server"; import { FinalizeTaskRunService } from "./finalizeTaskRun.server"; -import { FailedTaskRunRetryHelper } from "../failedTaskRun.server"; -import { getTaskEventStoreTableForRun } from "../taskEventStore.server"; -import { tryCatch } from "@trigger.dev/core/utils"; +import { resolveEventRepositoryForStore } from "../eventRepository/index.server"; export type CrashTaskRunServiceOptions = { reason?: string; @@ -121,6 +120,8 @@ export class CrashTaskRunService extends BaseService { }, }); + const eventRepository = resolveEventRepositoryForStore(crashedTaskRun.taskEventStore); + const [createAttemptFailedEventError] = await tryCatch( eventRepository.completeFailedRunEvent({ run: crashedTaskRun, diff --git a/apps/webapp/app/v3/services/expireEnqueuedRun.server.ts b/apps/webapp/app/v3/services/expireEnqueuedRun.server.ts index 6658907b0d..aa69f4c9f6 100644 --- a/apps/webapp/app/v3/services/expireEnqueuedRun.server.ts +++ b/apps/webapp/app/v3/services/expireEnqueuedRun.server.ts @@ -1,11 +1,10 @@ import { PrismaClientOrTransaction } from "~/db.server"; import { logger } from "~/services/logger.server"; import { commonWorker } from "../commonWorker.server"; -import { eventRepository } from "../eventRepository.server"; import { BaseService } from "./baseService.server"; import { FinalizeTaskRunService } from "./finalizeTaskRun.server"; -import { getTaskEventStoreTableForRun } from "../taskEventStore.server"; import { tryCatch } from "@trigger.dev/core/utils"; +import { resolveEventRepositoryForStore } from "../eventRepository/index.server"; export class ExpireEnqueuedRunService extends BaseService { public static async ack(runId: string, tx?: PrismaClientOrTransaction) { @@ -79,6 +78,8 @@ export class ExpireEnqueuedRunService extends BaseService { }, }); + const eventRepository = resolveEventRepositoryForStore(run.taskEventStore); + if (run.ttl) { const [completeExpiredRunEventError] = await tryCatch( eventRepository.completeExpiredRunEvent({ diff --git a/apps/webapp/app/v3/services/tracePubSub.server.ts b/apps/webapp/app/v3/services/tracePubSub.server.ts new file mode 100644 index 0000000000..f94967d6b9 --- /dev/null +++ b/apps/webapp/app/v3/services/tracePubSub.server.ts @@ -0,0 +1,75 @@ +import { createRedisClient, RedisClient, RedisWithClusterOptions } from "~/redis.server"; +import { EventEmitter } from "node:events"; +import { env } from "~/env.server"; +import { singleton } from "~/utils/singleton"; + +export type TracePubSubOptions = { + redis: RedisWithClusterOptions; +}; + +export class TracePubSub { + private _publisher: RedisClient; + private _subscriberCount = 0; + + constructor(private _options: TracePubSubOptions) { + this._publisher = createRedisClient("trigger:eventRepoPublisher", this._options.redis); + } + + // TODO: do this more efficiently + async publish(traceIds: string[]) { + if (traceIds.length === 0) return; + const uniqueTraces = new Set(traceIds.map((e) => `events:${e}`)); + + await Promise.allSettled( + Array.from(uniqueTraces).map((traceId) => + this._publisher.publish(traceId, new Date().toISOString()) + ) + ); + } + + async subscribeToTrace(traceId: string) { + const redis = createRedisClient("trigger:eventRepoSubscriber", this._options.redis); + + const channel = `events:${traceId}`; + + // Subscribe to the channel. + await redis.subscribe(channel); + + // Increment the subscriber count. + this._subscriberCount++; + + const eventEmitter = new EventEmitter(); + + // Define the message handler. + redis.on("message", (_, message) => { + eventEmitter.emit("message", message); + }); + + // Return a function that can be used to unsubscribe. + const unsubscribe = async () => { + await redis.unsubscribe(channel); + redis.quit(); + this._subscriberCount--; + }; + + return { + unsubscribe, + eventEmitter, + }; + } +} + +export const tracePubSub = singleton("tracePubSub", initializeTracePubSub); + +function initializeTracePubSub() { + return new TracePubSub({ + redis: { + port: env.PUBSUB_REDIS_PORT, + host: env.PUBSUB_REDIS_HOST, + username: env.PUBSUB_REDIS_USERNAME, + password: env.PUBSUB_REDIS_PASSWORD, + tlsDisabled: env.PUBSUB_REDIS_TLS_DISABLED === "true", + clusterMode: env.PUBSUB_REDIS_CLUSTER_MODE_ENABLED === "1", + }, + }); +} diff --git a/apps/webapp/app/v3/services/triggerTask.server.ts b/apps/webapp/app/v3/services/triggerTask.server.ts index f2e0d3c08a..5f56a35af2 100644 --- a/apps/webapp/app/v3/services/triggerTask.server.ts +++ b/apps/webapp/app/v3/services/triggerTask.server.ts @@ -9,11 +9,12 @@ import { RunEngineTriggerTaskService } from "~/runEngine/services/triggerTask.se import { DefaultTriggerTaskValidator } from "~/runEngine/validators/triggerTaskValidator"; import { AuthenticatedEnvironment } from "~/services/apiAuth.server"; import { determineEngineVersion } from "../engineVersion.server"; -import { eventRepository } from "../eventRepository.server"; +import { eventRepository } from "../eventRepository/eventRepository.server"; import { tracer } from "../tracer.server"; import { WithRunEngine } from "./baseService.server"; import { TriggerTaskServiceV1 } from "./triggerTaskV1.server"; import { env } from "~/env.server"; +import { clickhouseEventRepository } from "../eventRepository/clickhouseEventRepositoryInstance.server"; export type TriggerTaskServiceOptions = { idempotencyKey?: string; @@ -93,7 +94,10 @@ export class TriggerTaskService extends WithRunEngine { body: TriggerTaskRequestBody, options: TriggerTaskServiceOptions = {} ): Promise { - const traceEventConcern = new DefaultTraceEventsConcern(eventRepository); + const traceEventConcern = new DefaultTraceEventsConcern( + eventRepository, + clickhouseEventRepository + ); const service = new RunEngineTriggerTaskService({ prisma: this._prisma, diff --git a/apps/webapp/app/v3/services/triggerTaskV1.server.ts b/apps/webapp/app/v3/services/triggerTaskV1.server.ts index 3a5ac0f961..c193f142d6 100644 --- a/apps/webapp/app/v3/services/triggerTaskV1.server.ts +++ b/apps/webapp/app/v3/services/triggerTaskV1.server.ts @@ -1,9 +1,8 @@ import { IOPacket, packetRequiresOffloading, - SemanticInternalAttributes, - taskRunErrorToString, taskRunErrorEnhancer, + taskRunErrorToString, TriggerTaskRequestBody, } from "@trigger.dev/core/v3"; import { @@ -12,6 +11,7 @@ import { stringifyDuration, } from "@trigger.dev/core/v3/isomorphic"; import { Prisma } from "@trigger.dev/database"; +import { z } from "zod"; import { env } from "~/env.server"; import { createTag, MAX_TAGS_PER_RUN } from "~/models/taskRunTag.server"; import { AuthenticatedEnvironment } from "~/services/apiAuth.server"; @@ -22,7 +22,7 @@ import { parseDelay } from "~/utils/delays"; import { resolveIdempotencyKeyTTL } from "~/utils/idempotencyKeys.server"; import { handleMetadataPacket } from "~/utils/packets"; import { marqs } from "~/v3/marqs/index.server"; -import { eventRepository } from "../eventRepository.server"; +import { getEventRepository } from "../eventRepository/index.server"; import { generateFriendlyId } from "../friendlyIdentifiers"; import { findCurrentWorkerFromEnvironment } from "../models/workerDeployment.server"; import { guardQueueSizeLimitsForEnv } from "../queueSizeLimits.server"; @@ -33,6 +33,7 @@ import { startActiveSpan } from "../tracer.server"; import { clampMaxDuration } from "../utils/maxDuration"; import { BaseService, ServiceValidationError } from "./baseService.server"; import { EnqueueDelayedRunService } from "./enqueueDelayedRun.server"; +import { enqueueRun } from "./enqueueRun.server"; import { ExpireEnqueuedRunService } from "./expireEnqueuedRun.server"; import { MAX_ATTEMPTS, @@ -40,9 +41,6 @@ import { TriggerTaskServiceOptions, TriggerTaskServiceResult, } from "./triggerTask.server"; -import { getTaskEventStore } from "../taskEventStore.server"; -import { enqueueRun } from "./enqueueRun.server"; -import { z } from "zod"; // This is here for backwords compatibility for v3 users const QueueOptions = z.object({ @@ -290,26 +288,24 @@ export class TriggerTaskServiceV1 extends BaseService { }) : undefined; + const { repository, store } = await getEventRepository( + environment.organization.featureFlags as Record + ); + try { - const result = await eventRepository.traceEvent( + const result = await repository.traceEvent( taskId, { context: options.traceContext, spanParentAsLink: options.spanParentAsLink, - parentAsLinkType: options.parentAsLinkType, kind: "SERVER", environment, taskSlug: taskId, attributes: { - properties: { - [SemanticInternalAttributes.SHOW_ACTIONS]: true, - }, + properties: {}, style: { icon: options.customIcon ?? "task", }, - runIsTest: body.options?.test ?? false, - batchId: options.batchId, - idempotencyKey, }, incomplete: true, immediate: true, @@ -340,7 +336,6 @@ export class TriggerTaskServiceV1 extends BaseService { queueName = sanitizeQueueName(`task/${taskId}`); } - event.setAttribute("queueName", queueName); span.setAttribute("queueName", queueName); //upsert tags @@ -405,7 +400,7 @@ export class TriggerTaskServiceV1 extends BaseService { queuedAt: delayUntil ? undefined : new Date(), queueTimestamp, maxAttempts: body.options?.maxAttempts, - taskEventStore: getTaskEventStore(), + taskEventStore: store, ttl, tags: tagIds.length === 0 diff --git a/apps/webapp/app/v3/services/worker/workerGroupTokenService.server.ts b/apps/webapp/app/v3/services/worker/workerGroupTokenService.server.ts index 37aab78e62..befe2a0a89 100644 --- a/apps/webapp/app/v3/services/worker/workerGroupTokenService.server.ts +++ b/apps/webapp/app/v3/services/worker/workerGroupTokenService.server.ts @@ -1,10 +1,11 @@ -import { createCache, DefaultStatefulContext, MemoryStore, Namespace } from "@internal/cache"; +import { createCache, createMemoryStore, DefaultStatefulContext, Namespace } from "@internal/cache"; import { CheckpointInput, CompleteRunAttemptResult, DequeuedMessage, ExecutionResult, MachinePreset, + SemanticInternalAttributes, StartRunAttemptResult, TaskRunExecutionResult, } from "@trigger.dev/core/v3"; @@ -38,7 +39,7 @@ function createAuthenticatedWorkerInstanceCache() { authenticatedWorkerInstance: new Namespace( new DefaultStatefulContext(), { - stores: [new MemoryStore({ persistentMap: new Map() })], + stores: [createMemoryStore(1000, 0.001)], fresh: 60_000 * 10, // 10 minutes stale: 60_000 * 11, // 11 minutes } @@ -444,7 +445,8 @@ export class AuthenticatedWorkerInstance extends WithRunEngine { environment, engineResult.run.id, engineResult.execution.machine ?? defaultMachinePreset, - environment.parentEnvironment ?? undefined + environment.parentEnvironment ?? undefined, + engineResult.run.taskEventStore ?? undefined ) : {}; @@ -544,7 +546,8 @@ export class AuthenticatedWorkerInstance extends WithRunEngine { environment: RuntimeEnvironment, runId: string, machinePreset: MachinePreset, - parentEnvironment?: RuntimeEnvironment + parentEnvironment?: RuntimeEnvironment, + taskEventStore?: string ): Promise> { const variables = await resolveVariablesForEnvironment(environment, parentEnvironment); @@ -561,6 +564,19 @@ export class AuthenticatedWorkerInstance extends WithRunEngine { ] ); + if (taskEventStore) { + const resourceAttributes = JSON.stringify({ + [SemanticInternalAttributes.TASK_EVENT_STORE]: taskEventStore, + }); + + variables.push( + ...[ + { key: "OTEL_RESOURCE_ATTRIBUTES", value: resourceAttributes }, + { key: "TRIGGER_OTEL_RESOURCE_ATTRIBUTES", value: resourceAttributes }, + ] + ); + } + return variables.reduce((acc: Record, curr) => { acc[curr.key] = curr.value; return acc; diff --git a/apps/webapp/app/v3/taskEventStore.server.ts b/apps/webapp/app/v3/taskEventStore.server.ts index 251db6642c..6a80aa9926 100644 --- a/apps/webapp/app/v3/taskEventStore.server.ts +++ b/apps/webapp/app/v3/taskEventStore.server.ts @@ -9,7 +9,6 @@ export type TraceEvent = Pick< | "spanId" | "parentId" | "runId" - | "idempotencyKey" | "message" | "style" | "startTime" @@ -19,7 +18,6 @@ export type TraceEvent = Pick< | "isCancelled" | "level" | "events" - | "environmentType" | "kind" | "attemptNumber" >; @@ -29,7 +27,6 @@ export type DetailedTraceEvent = Pick< | "spanId" | "parentId" | "runId" - | "idempotencyKey" | "message" | "style" | "startTime" @@ -39,15 +36,9 @@ export type DetailedTraceEvent = Pick< | "isCancelled" | "level" | "events" - | "environmentType" | "kind" | "taskSlug" - | "taskPath" - | "workerVersion" - | "queueName" - | "machinePreset" | "properties" - | "output" | "attemptNumber" >; @@ -179,7 +170,6 @@ export class TaskEventStore { "spanId", "parentId", "runId", - "idempotencyKey", LEFT(message, 256) as message, style, "startTime", @@ -189,7 +179,6 @@ export class TaskEventStore { "isCancelled", level, events, - "environmentType", "kind", "attemptNumber" FROM "TaskEventPartitioned" @@ -212,7 +201,6 @@ export class TaskEventStore { "spanId", "parentId", "runId", - "idempotencyKey", LEFT(message, 256) as message, style, "startTime", @@ -222,7 +210,6 @@ export class TaskEventStore { "isCancelled", level, events, - "environmentType", "kind", "attemptNumber" FROM "TaskEvent" @@ -259,7 +246,6 @@ export class TaskEventStore { "spanId", "parentId", "runId", - "idempotencyKey", message, style, "startTime", @@ -269,15 +255,9 @@ export class TaskEventStore { "isCancelled", level, events, - "environmentType", "kind", "taskSlug", - "taskPath", - "workerVersion", - "queueName", - "machinePreset", properties, - output, "attemptNumber" FROM "TaskEventPartitioned" WHERE @@ -298,7 +278,6 @@ export class TaskEventStore { "spanId", "parentId", "runId", - "idempotencyKey", message, style, "startTime", @@ -308,15 +287,9 @@ export class TaskEventStore { "isCancelled", level, events, - "environmentType", "kind", "taskSlug", - "taskPath", - "workerVersion", - "queueName", - "machinePreset", properties, - output, "attemptNumber" FROM "TaskEvent" WHERE "traceId" = ${traceId} diff --git a/apps/webapp/app/v3/utils/enrichCreatableEvents.server.ts b/apps/webapp/app/v3/utils/enrichCreatableEvents.server.ts index 70cf1e3749..f718c13d2d 100644 --- a/apps/webapp/app/v3/utils/enrichCreatableEvents.server.ts +++ b/apps/webapp/app/v3/utils/enrichCreatableEvents.server.ts @@ -1,12 +1,12 @@ -import type { CreatableEvent } from "../eventRepository.server"; +import type { CreateEventInput } from "../eventRepository/eventRepository.types"; -export function enrichCreatableEvents(events: CreatableEvent[]) { +export function enrichCreatableEvents(events: CreateEventInput[]) { return events.map((event) => { return enrichCreatableEvent(event); }); } -function enrichCreatableEvent(event: CreatableEvent): CreatableEvent { +function enrichCreatableEvent(event: CreateEventInput): CreateEventInput { const message = formatPythonStyle(event.message, event.properties); event.message = message; @@ -15,10 +15,14 @@ function enrichCreatableEvent(event: CreatableEvent): CreatableEvent { return event; } -function enrichStyle(event: CreatableEvent) { +function enrichStyle(event: CreateEventInput) { const baseStyle = event.style ?? {}; const props = event.properties; + if (!props) { + return baseStyle; + } + // Direct property access and early returns // GenAI System check const system = props["gen_ai.system"]; @@ -66,7 +70,7 @@ function formatPythonStyle(template: string, values: Record): strin return template.replace(/\{([^}]+?)(?:!r)?\}/g, (match, key) => { const hasRepr = match.endsWith("!r}"); const actualKey = hasRepr ? key : key; - const value = values[actualKey]; + const value = values?.[actualKey]; if (value === undefined) { return match; diff --git a/apps/webapp/test/authorizationRateLimitMiddleware.test.ts b/apps/webapp/test/authorizationRateLimitMiddleware.test.ts index af88d4c4af..5c02abfbab 100644 --- a/apps/webapp/test/authorizationRateLimitMiddleware.test.ts +++ b/apps/webapp/test/authorizationRateLimitMiddleware.test.ts @@ -368,6 +368,7 @@ describe.skipIf(process.env.GITHUB_ACTIONS)("authorizationRateLimitMiddleware", limiterCache: { fresh: 1000, // 1 second stale: 2000, // 2 seconds + maxItems: 1000, }, limiterConfigOverride: async (authorizationValue) => { configOverrideCalls++; diff --git a/apps/webapp/test/engine/triggerTask.test.ts b/apps/webapp/test/engine/triggerTask.test.ts index e21e0dbb2e..36dabd008c 100644 --- a/apps/webapp/test/engine/triggerTask.test.ts +++ b/apps/webapp/test/engine/triggerTask.test.ts @@ -79,16 +79,19 @@ class MockTriggerTaskValidator implements TriggerTaskValidator { class MockTraceEventConcern implements TraceEventConcern { async traceRun( request: TriggerTaskRequest, - callback: (span: TracedEventSpan) => Promise + callback: (span: TracedEventSpan, store: string) => Promise ): Promise { - return await callback({ - traceId: "test", - spanId: "test", - traceContext: {}, - traceparent: undefined, - setAttribute: () => {}, - failWithError: () => {}, - }); + return await callback( + { + traceId: "test", + spanId: "test", + traceContext: {}, + traceparent: undefined, + setAttribute: () => {}, + failWithError: () => {}, + }, + "test" + ); } async traceIdempotentRun( @@ -99,16 +102,19 @@ class MockTraceEventConcern implements TraceEventConcern { incomplete: boolean; isError: boolean; }, - callback: (span: TracedEventSpan) => Promise + callback: (span: TracedEventSpan, store: string) => Promise ): Promise { - return await callback({ - traceId: "test", - spanId: "test", - traceContext: {}, - traceparent: undefined, - setAttribute: () => {}, - failWithError: () => {}, - }); + return await callback( + { + traceId: "test", + spanId: "test", + traceContext: {}, + traceparent: undefined, + setAttribute: () => {}, + failWithError: () => {}, + }, + "test" + ); } } diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml index 7d5a81e30f..ad45dd3dbc 100644 --- a/docker/docker-compose.yml +++ b/docker/docker-compose.yml @@ -4,7 +4,8 @@ volumes: database-data: database-data-alt: redis-data: - clickhouse: + clickhouse-data: + clickhouse-logs: networks: app_network: @@ -76,17 +77,23 @@ services: - database clickhouse: - image: bitnamilegacy/clickhouse:latest + image: clickhouse/clickhouse-server:25.6.2 restart: always container_name: clickhouse + ulimits: + nofile: + soft: 262144 + hard: 262144 environment: - CLICKHOUSE_ADMIN_USER: default - CLICKHOUSE_ADMIN_PASSWORD: password + CLICKHOUSE_USER: default + CLICKHOUSE_PASSWORD: password + CLICKHOUSE_DEFAULT_ACCESS_MANAGEMENT: 1 ports: - "8123:8123" - "9000:9000" volumes: - - clickhouse:/bitnami/clickhouse + - clickhouse-data:/var/lib/clickhouse + - clickhouse-logs:/var/log/clickhouse-server networks: - app_network healthcheck: @@ -105,10 +112,10 @@ services: "--query", "SELECT 1", ] - interval: 3s - timeout: 5s - retries: 5 - start_period: 10s + interval: "3s" + timeout: "5s" + retries: "5" + start_period: "10s" clickhouse_migrator: build: diff --git a/internal-packages/cache/package.json b/internal-packages/cache/package.json index 63dd03a4b4..267ff2d92d 100644 --- a/internal-packages/cache/package.json +++ b/internal-packages/cache/package.json @@ -6,10 +6,10 @@ "types": "./src/index.ts", "type": "module", "dependencies": { + "@internal/redis": "workspace:*", + "@trigger.dev/core": "workspace:*", "@unkey/cache": "^1.5.0", "@unkey/error": "^0.2.0", - "@trigger.dev/core": "workspace:*", - "@internal/redis": "workspace:*", "superjson": "^2.2.1" }, "scripts": { diff --git a/internal-packages/cache/src/index.ts b/internal-packages/cache/src/index.ts index d378191d29..479d2fce1b 100644 --- a/internal-packages/cache/src/index.ts +++ b/internal-packages/cache/src/index.ts @@ -6,5 +6,5 @@ export { type CacheError, } from "@unkey/cache"; export { type Result, Ok, Err } from "@unkey/error"; -export { MemoryStore } from "@unkey/cache/stores"; export { RedisCacheStore } from "./stores/redis.js"; +export { createMemoryStore, type MemoryStore } from "./stores/memory.js"; diff --git a/internal-packages/cache/src/stores/memory.ts b/internal-packages/cache/src/stores/memory.ts new file mode 100644 index 0000000000..76c1fc3d2d --- /dev/null +++ b/internal-packages/cache/src/stores/memory.ts @@ -0,0 +1,13 @@ +import { MemoryStore } from "@unkey/cache/stores"; + +export type { MemoryStore }; + +export function createMemoryStore(maxItems: number, frequency: number = 0.01) { + return new MemoryStore({ + persistentMap: new Map(), + unstableEvictOnSet: { + frequency, + maxItems, + }, + }); +} diff --git a/internal-packages/clickhouse/package.json b/internal-packages/clickhouse/package.json index efa7cffd12..da55314636 100644 --- a/internal-packages/clickhouse/package.json +++ b/internal-packages/clickhouse/package.json @@ -6,7 +6,7 @@ "types": "./dist/src/index.d.ts", "type": "module", "dependencies": { - "@clickhouse/client": "^1.11.1", + "@clickhouse/client": "^1.12.1", "@internal/tracing": "workspace:*", "@trigger.dev/core": "workspace:*", "zod": "3.25.76", diff --git a/internal-packages/clickhouse/schema/007_add_task_events_v1.sql b/internal-packages/clickhouse/schema/007_add_task_events_v1.sql new file mode 100644 index 0000000000..7d8e7b68d3 --- /dev/null +++ b/internal-packages/clickhouse/schema/007_add_task_events_v1.sql @@ -0,0 +1,52 @@ +-- +goose Up +CREATE TABLE IF NOT EXISTS trigger_dev.task_events_v1 +( + -- This the main "tenant" ID + environment_id String, + -- The organization ID here so we can do MV rollups of usage + organization_id String, + -- The project ID here so we can do MV rollups of usage + project_id String, + -- The task slug (e.g. "my-task") + task_identifier String CODEC(ZSTD(1)), + -- The non-friendly ID for the run + run_id String CODEC(ZSTD(1)), + -- nanoseconds since the epoch + start_time DateTime64(9) CODEC(Delta(8), ZSTD(1)), + trace_id String CODEC(ZSTD(1)), + span_id String CODEC(ZSTD(1)), + -- will be an empty string for root spans + parent_span_id String CODEC(ZSTD(1)), + -- Log body, event name, or span name + message String CODEC(ZSTD(1)), + -- this is the new level column, can be + -- SPAN, SPAN_EVENT, DEBUG_EVENT, LOG_DEBUG, LOG_LOG, LOG_SUCCESS, LOG_INFO, LOG_WARN, LOG_ERROR, ANCESTOR_OVERRIDE + kind LowCardinality(String) CODEC(ZSTD(1)), + -- isError, isPartial, isCancelled will now be in this status column + -- OK, ERROR, PARTIAL, CANCELLED + status LowCardinality(String) CODEC(ZSTD(1)), + -- span/log/event attributes and resource attributes + -- includes error attributes, gen_ai attributes, and other attributes + attributes JSON CODEC(ZSTD(1)), + attributes_text String MATERIALIZED toJSONString(attributes), + -- This is the metadata column, includes style for styling the event in the UI + -- is a JSON stringified object, e.g. {"style":{"icon":"play","variant":"primary"},"error":{"message":"Error message","attributes":{"error.type":"ErrorType","error.code":"123"}}} + metadata String CODEC(ZSTD(1)), + -- nanoseconds since the start time, only non-zero for spans + duration UInt64 CODEC(ZSTD(1)), + -- The TTL for the event, will be deleted 7 days after the event expires + expires_at DateTime64(3), + + INDEX idx_run_id run_id TYPE bloom_filter(0.001) GRANULARITY 1, + INDEX idx_span_id span_id TYPE bloom_filter(0.001) GRANULARITY 1, + INDEX idx_duration duration TYPE minmax GRANULARITY 1, + INDEX idx_attributes_text attributes_text TYPE tokenbf_v1(32768, 3, 0) GRANULARITY 8 +) +ENGINE = MergeTree +PARTITION BY toDate(start_time) +ORDER BY (environment_id, toUnixTimestamp(start_time), trace_id) +TTL toDateTime(expires_at) + INTERVAL 7 DAY +SETTINGS ttl_only_drop_parts = 1; + +-- +goose Down +DROP TABLE IF EXISTS trigger_dev.task_events_v1; \ No newline at end of file diff --git a/internal-packages/clickhouse/schema/008_add_task_events_v1_mvs.sql b/internal-packages/clickhouse/schema/008_add_task_events_v1_mvs.sql new file mode 100644 index 0000000000..6a4b9b93e3 --- /dev/null +++ b/internal-packages/clickhouse/schema/008_add_task_events_v1_mvs.sql @@ -0,0 +1,55 @@ +-- +goose Up +CREATE TABLE IF NOT EXISTS trigger_dev.task_event_usage_by_minute_v1 +( + organization_id String, + project_id String, + environment_id String, + bucket_start DateTime, + event_count UInt64 +) +ENGINE = SummingMergeTree() +PARTITION BY toYYYYMM(bucket_start) +ORDER BY (organization_id, project_id, environment_id, bucket_start) +TTL bucket_start + INTERVAL 8 DAY; + +CREATE TABLE IF NOT EXISTS trigger_dev.task_event_usage_by_hour_v1 +( + organization_id String, + project_id String, + environment_id String, + bucket_start DateTime, + event_count UInt64 +) +ENGINE = SummingMergeTree() +PARTITION BY toYYYYMM(bucket_start) +ORDER BY (organization_id, project_id, environment_id, bucket_start) +TTL bucket_start + INTERVAL 400 DAY; + +CREATE MATERIALIZED VIEW IF NOT EXISTS trigger_dev.mv_task_event_usage_by_minute_v1 +TO trigger_dev.task_event_usage_by_minute_v1 AS +SELECT + organization_id, + project_id, + environment_id, + toStartOfMinute(start_time) AS bucket_start, + count() AS event_count +FROM trigger_dev.task_events_v1 +GROUP BY organization_id, project_id, environment_id, bucket_start; + +CREATE MATERIALIZED VIEW IF NOT EXISTS trigger_dev.mv_task_event_usage_by_hour_v1 +TO trigger_dev.task_event_usage_by_hour_v1 AS +SELECT + organization_id, + project_id, + environment_id, + toStartOfHour(bucket_start) AS bucket_start, + sum(event_count) AS event_count +FROM trigger_dev.task_event_usage_by_minute_v1 +GROUP BY organization_id, project_id, environment_id, bucket_start; + + +-- +goose Down +DROP MATERIALIZED VIEW IF EXISTS trigger_dev.mv_task_event_usage_by_minute_v1; +DROP MATERIALIZED VIEW IF EXISTS trigger_dev.mv_task_event_usage_by_hour_v1; +DROP TABLE IF EXISTS trigger_dev.task_event_usage_by_hour_v1; +DROP TABLE IF EXISTS trigger_dev.task_event_usage_by_minute_v1; \ No newline at end of file diff --git a/internal-packages/clickhouse/src/client/client.ts b/internal-packages/clickhouse/src/client/client.ts index f1461798ef..3ed7303262 100644 --- a/internal-packages/clickhouse/src/client/client.ts +++ b/internal-packages/clickhouse/src/client/client.ts @@ -4,6 +4,8 @@ import { ClickHouseLogLevel, type ClickHouseSettings, createClient, + type ResultSet, + type Row, } from "@clickhouse/client"; import { recordSpanError, Span, startSpan, trace, Tracer } from "@internal/tracing"; import { flattenAttributes, tryCatch } from "@trigger.dev/core/v3"; @@ -11,16 +13,18 @@ import { z } from "zod"; import { InsertError, QueryError } from "./errors.js"; import type { ClickhouseInsertFunction, + ClickhouseQueryBuilderFastFunction, ClickhouseQueryBuilderFunction, ClickhouseQueryFunction, ClickhouseReader, ClickhouseWriter, + ColumnExpression, } from "./types.js"; import { generateErrorMessage } from "zod-error"; import { Logger, type LogLevel } from "@trigger.dev/core/logger"; import type { Agent as HttpAgent } from "http"; import type { Agent as HttpsAgent } from "https"; -import { ClickhouseQueryBuilder } from "./queryBuilder.js"; +import { ClickhouseQueryBuilder, ClickhouseQueryFastBuilder } from "./queryBuilder.js"; import { randomUUID } from "node:crypto"; export type ClickhouseConfig = { @@ -225,6 +229,112 @@ export class ClickhouseClient implements ClickhouseReader, ClickhouseWriter { }; } + public queryFast, TParams extends Record>(req: { + name: string; + query: string; + columns: Array; + settings?: ClickHouseSettings; + }): ClickhouseQueryFunction { + return async (params, options) => { + const queryId = randomUUID(); + + return await startSpan(this.tracer, "queryFast", async (span) => { + this.logger.debug("Querying clickhouse fast", { + name: req.name, + query: req.query.replace(/\s+/g, " "), + params, + settings: req.settings, + attributes: options?.attributes, + queryId, + }); + + span.setAttributes({ + "clickhouse.clientName": this.name, + "clickhouse.operationName": req.name, + "clickhouse.queryId": queryId, + ...flattenAttributes(req.settings, "clickhouse.settings"), + ...flattenAttributes(options?.attributes), + }); + + const [clickhouseError, resultSet] = await tryCatch( + this.client.query({ + query: req.query, + query_params: params, + format: "JSONCompactEachRow", + query_id: queryId, + ...options?.params, + clickhouse_settings: { + ...req.settings, + ...options?.params?.clickhouse_settings, + }, + }) + ); + + if (clickhouseError) { + this.logger.error("Error querying clickhouse", { + name: req.name, + error: clickhouseError, + query: req.query, + params, + queryId, + }); + + recordClickhouseError(span, clickhouseError); + + return [ + new QueryError(`Unable to query clickhouse: ${clickhouseError.message}`, { + query: req.query, + }), + null, + ]; + } + + span.setAttributes({ + "clickhouse.query_id": resultSet.query_id, + ...flattenAttributes(resultSet.response_headers, "clickhouse.response_headers"), + }); + + const summaryHeader = resultSet.response_headers["x-clickhouse-summary"]; + + if (typeof summaryHeader === "string") { + span.setAttributes({ + ...flattenAttributes(JSON.parse(summaryHeader), "clickhouse.summary"), + }); + } + + const resultRows: Array = []; + + for await (const rows of resultSet.stream()) { + if (rows.length === 0) { + continue; + } + + for (const row of rows) { + const rowData = row.json(); + + const hydratedRow: Record = {}; + for (let i = 0; i < req.columns.length; i++) { + const column = req.columns[i]; + + if (typeof column === "string") { + hydratedRow[column] = rowData[i]; + } else { + hydratedRow[column.name] = rowData[i]; + } + } + resultRows.push(hydratedRow as TOut); + } + } + + span.setAttributes({ + "clickhouse.rows": resultRows.length, + }); + + return [null, resultRows]; + }); + }; + } + public queryBuilder>(req: { name: string; baseQuery: string; @@ -238,6 +348,19 @@ export class ClickhouseClient implements ClickhouseReader, ClickhouseWriter { }); } + public queryBuilderFast>(req: { + name: string; + table: string; + columns: string[]; + settings?: ClickHouseSettings; + }): ClickhouseQueryBuilderFastFunction { + return (chSettings) => + new ClickhouseQueryFastBuilder(req.name, req.table, req.columns, this, { + ...req.settings, + ...chSettings?.settings, + }); + } + public insert>(req: { name: string; table: string; @@ -341,6 +464,87 @@ export class ClickhouseClient implements ClickhouseReader, ClickhouseWriter { }); }; } + + public insertUnsafe>(req: { + name: string; + table: string; + settings?: ClickHouseSettings; + }): ClickhouseInsertFunction { + return async (events, options) => { + const queryId = randomUUID(); + + return await startSpan(this.tracer, "insert", async (span) => { + this.logger.debug("Inserting into clickhouse", { + clientName: this.name, + name: req.name, + table: req.table, + events: Array.isArray(events) ? events.length : 1, + settings: req.settings, + attributes: options?.attributes, + options, + queryId, + }); + + span.setAttributes({ + "clickhouse.clientName": this.name, + "clickhouse.tableName": req.table, + "clickhouse.operationName": req.name, + "clickhouse.queryId": queryId, + ...flattenAttributes(req.settings, "clickhouse.settings"), + ...flattenAttributes(options?.attributes), + }); + + const [clickhouseError, result] = await tryCatch( + this.client.insert({ + table: req.table, + format: "JSONEachRow", + values: Array.isArray(events) ? events : [events], + query_id: queryId, + ...options?.params, + clickhouse_settings: { + ...req.settings, + ...options?.params?.clickhouse_settings, + }, + }) + ); + + if (clickhouseError) { + this.logger.error("Error inserting into clickhouse", { + name: req.name, + error: clickhouseError, + table: req.table, + }); + + recordClickhouseError(span, clickhouseError); + + return [new InsertError(clickhouseError.message), null]; + } + + this.logger.debug("Inserted into clickhouse", { + clientName: this.name, + name: req.name, + table: req.table, + result, + queryId, + }); + + span.setAttributes({ + "clickhouse.query_id": result.query_id, + "clickhouse.executed": result.executed, + "clickhouse.summary.read_rows": result.summary?.read_rows, + "clickhouse.summary.read_bytes": result.summary?.read_bytes, + "clickhouse.summary.written_rows": result.summary?.written_rows, + "clickhouse.summary.written_bytes": result.summary?.written_bytes, + "clickhouse.summary.total_rows_to_read": result.summary?.total_rows_to_read, + "clickhouse.summary.result_rows": result.summary?.result_rows, + "clickhouse.summary.result_bytes": result.summary?.result_bytes, + "clickhouse.summary.elapsed_ns": result.summary?.elapsed_ns, + }); + + return [null, result]; + }); + }; + } } function recordClickhouseError(span: Span, error: Error) { diff --git a/internal-packages/clickhouse/src/client/noop.ts b/internal-packages/clickhouse/src/client/noop.ts index 99524af6c3..6815baf4f6 100644 --- a/internal-packages/clickhouse/src/client/noop.ts +++ b/internal-packages/clickhouse/src/client/noop.ts @@ -1,10 +1,14 @@ import { Result } from "@trigger.dev/core/v3"; import { InsertError, QueryError } from "./errors.js"; -import { ClickhouseQueryBuilderFunction, ClickhouseWriter } from "./types.js"; +import { + ClickhouseQueryBuilderFastFunction, + ClickhouseQueryBuilderFunction, + ClickhouseWriter, +} from "./types.js"; import { ClickhouseReader } from "./types.js"; import { z } from "zod"; import { ClickHouseSettings, InsertResult } from "@clickhouse/client"; -import { ClickhouseQueryBuilder } from "./queryBuilder.js"; +import { ClickhouseQueryBuilder, ClickhouseQueryFastBuilder } from "./queryBuilder.js"; export class NoopClient implements ClickhouseReader, ClickhouseWriter { public async close() { @@ -21,6 +25,16 @@ export class NoopClient implements ClickhouseReader, ClickhouseWriter { new ClickhouseQueryBuilder(req.name, req.baseQuery, this, req.schema, req.settings); } + public queryBuilderFast>(req: { + name: string; + table: string; + columns: string[]; + settings?: ClickHouseSettings; + }): ClickhouseQueryBuilderFastFunction { + return () => + new ClickhouseQueryFastBuilder(req.name, req.table, req.columns, this, req.settings); + } + public query, TOut extends z.ZodSchema>(req: { query: string; params?: TIn; @@ -37,6 +51,17 @@ export class NoopClient implements ClickhouseReader, ClickhouseWriter { }; } + public queryFast, TParams extends Record>(req: { + name: string; + query: string; + columns: string[]; + settings?: ClickHouseSettings; + }): (params: TParams) => Promise> { + return async (params: TParams) => { + return [null, []]; + }; + } + public insert>(req: { name: string; table: string; @@ -74,4 +99,31 @@ export class NoopClient implements ClickhouseReader, ClickhouseWriter { ]; }; } + + public insertUnsafe>(req: { + name: string; + table: string; + settings?: ClickHouseSettings; + }): (events: TRecord | TRecord[]) => Promise> { + return async (events: TRecord | TRecord[]) => { + return [ + null, + { + executed: true, + query_id: "noop", + summary: { + read_rows: "0", + read_bytes: "0", + written_rows: "0", + written_bytes: "0", + total_rows_to_read: "0", + result_rows: "0", + result_bytes: "0", + elapsed_ns: "0", + }, + response_headers: {}, + }, + ]; + }; + } } diff --git a/internal-packages/clickhouse/src/client/queryBuilder.ts b/internal-packages/clickhouse/src/client/queryBuilder.ts index d944a07bcc..78383fd270 100644 --- a/internal-packages/clickhouse/src/client/queryBuilder.ts +++ b/internal-packages/clickhouse/src/client/queryBuilder.ts @@ -1,5 +1,5 @@ import { z } from "zod"; -import { ClickhouseQueryFunction, ClickhouseReader } from "./types.js"; +import { ClickhouseQueryFunction, ClickhouseReader, ColumnExpression } from "./types.js"; import { ClickHouseSettings } from "@clickhouse/client"; export type QueryParamValue = string | number | boolean | Array | null; export type QueryParams = Record; @@ -91,3 +91,99 @@ export class ClickhouseQueryBuilder { return { query, params: this.params }; } } + +export class ClickhouseQueryFastBuilder> { + private name: string; + private table: string; + private columns: Array; + private reader: ClickhouseReader; + private settings: ClickHouseSettings | undefined; + private whereClauses: string[] = []; + private params: QueryParams = {}; + private orderByClause: string | null = null; + private limitClause: string | null = null; + private groupByClause: string | null = null; + + constructor( + name: string, + table: string, + columns: Array, + reader: ClickhouseReader, + settings?: ClickHouseSettings + ) { + this.name = name; + this.table = table; + this.columns = columns; + this.reader = reader; + this.settings = settings; + } + + where(clause: string, params?: QueryParams): this { + this.whereClauses.push(clause); + if (params) { + Object.assign(this.params, params); + } + return this; + } + + whereIf(condition: any, clause: string, params?: QueryParams): this { + if (condition) { + this.where(clause, params); + } + return this; + } + + groupBy(clause: string): this { + this.groupByClause = clause; + return this; + } + + orderBy(clause: string): this { + this.orderByClause = clause; + return this; + } + + limit(limit: number): this { + this.limitClause = `LIMIT ${limit}`; + return this; + } + + execute(): ReturnType> { + const { query, params } = this.build(); + + const queryFunction = this.reader.queryFast>({ + name: this.name, + query, + columns: this.columns, + settings: this.settings, + }); + + return queryFunction(params); + } + + build(): { query: string; params: QueryParams } { + let query = `SELECT ${this.buildColumns().join(", ")} FROM ${this.table}`; + if (this.whereClauses.length > 0) { + query += " WHERE " + this.whereClauses.join(" AND "); + } + if (this.groupByClause) { + query += ` GROUP BY ${this.groupByClause}`; + } + if (this.orderByClause) { + query += ` ORDER BY ${this.orderByClause}`; + } + if (this.limitClause) { + query += ` ${this.limitClause}`; + } + return { query, params: this.params }; + } + + buildColumns(): string[] { + return this.columns.map((column) => { + if (typeof column === "string") { + return column; + } + return [column.expression, column.name].join(" AS "); + }); + } +} diff --git a/internal-packages/clickhouse/src/client/types.ts b/internal-packages/clickhouse/src/client/types.ts index 02ad3de5d0..f0874c814d 100644 --- a/internal-packages/clickhouse/src/client/types.ts +++ b/internal-packages/clickhouse/src/client/types.ts @@ -3,7 +3,7 @@ import type { z } from "zod"; import type { InsertError, QueryError } from "./errors.js"; import { ClickHouseSettings } from "@clickhouse/client"; import type { BaseQueryParams, InsertResult } from "@clickhouse/client"; -import { ClickhouseQueryBuilder } from "./queryBuilder.js"; +import { ClickhouseQueryBuilder, ClickhouseQueryFastBuilder } from "./queryBuilder.js"; export type ClickhouseQueryFunction = ( params: TInput, @@ -17,6 +17,15 @@ export type ClickhouseQueryBuilderFunction = (options?: { settings?: ClickHouseSettings; }) => ClickhouseQueryBuilder; +export type ClickhouseQueryBuilderFastFunction> = (options?: { + settings?: ClickHouseSettings; +}) => ClickhouseQueryFastBuilder; + +export type ColumnExpression = { + name: string; + expression: string; +}; + export interface ClickhouseReader { query, TOut extends z.ZodSchema>(req: { /** @@ -47,6 +56,32 @@ export interface ClickhouseReader { settings?: ClickHouseSettings; }): ClickhouseQueryFunction, z.output>; + queryFast, TParams extends Record>(req: { + /** + * The name of the operation. + * This will be used to identify the operation in the span. + */ + name: string; + /** + * The SQL query to run. + * Use {paramName: Type} to define parameters + * Example: `SELECT * FROM table WHERE id = {id: String}` + */ + query: string; + + /** + * The columns returned by the query, in the order + * + * @example ["run_id", "created_at", "updated_at"] + */ + columns: Array; + /** + * The settings to use for the query. + * These will be merged with the default settings. + */ + settings?: ClickHouseSettings; + }): ClickhouseQueryFunction; + queryBuilder>(req: { /** * The name of the operation. @@ -71,6 +106,31 @@ export interface ClickhouseReader { settings?: ClickHouseSettings; }): ClickhouseQueryBuilderFunction>; + queryBuilderFast>(req: { + /** + * The name of the operation. + * This will be used to identify the operation in the span. + */ + name: string; + /** + * The table to query + * + * @example trigger_dev.task_runs_v1 + */ + table: string; + /** + * The columns to query + * + * @example ["run_id", "created_at", "updated_at"] + */ + columns: Array; + /** + * The settings to use for the query. + * These will be merged with the default settings. + */ + settings?: ClickHouseSettings; + }): ClickhouseQueryBuilderFastFunction; + close(): Promise; } @@ -90,5 +150,11 @@ export interface ClickhouseWriter { settings?: ClickHouseSettings; }): ClickhouseInsertFunction>; + insertUnsafe>(req: { + name: string; + table: string; + settings?: ClickHouseSettings; + }): ClickhouseInsertFunction; + close(): Promise; } diff --git a/internal-packages/clickhouse/src/index.ts b/internal-packages/clickhouse/src/index.ts index 599492eb53..a5e3835fa7 100644 --- a/internal-packages/clickhouse/src/index.ts +++ b/internal-packages/clickhouse/src/index.ts @@ -12,11 +12,18 @@ import { getTaskUsageByOrganization, getTaskRunsCountQueryBuilder, } from "./taskRuns.js"; +import { + getSpanDetailsQueryBuilder, + getTraceDetailedSummaryQueryBuilder, + getTraceSummaryQueryBuilder, + insertTaskEvents, +} from "./taskEvents.js"; import { Logger, type LogLevel } from "@trigger.dev/core/logger"; import type { Agent as HttpAgent } from "http"; import type { Agent as HttpsAgent } from "https"; export type * from "./taskRuns.js"; +export type * from "./taskEvents.js"; export type * from "./client/queryBuilder.js"; export type ClickhouseCommonConfig = { @@ -153,4 +160,13 @@ export class ClickHouse { getTaskUsageByOrganization: getTaskUsageByOrganization(this.reader), }; } + + get taskEvents() { + return { + insert: insertTaskEvents(this.writer), + traceSummaryQueryBuilder: getTraceSummaryQueryBuilder(this.reader), + traceDetailedSummaryQueryBuilder: getTraceDetailedSummaryQueryBuilder(this.reader), + spanDetailsQueryBuilder: getSpanDetailsQueryBuilder(this.reader), + }; + } } diff --git a/internal-packages/clickhouse/src/taskEvents.ts b/internal-packages/clickhouse/src/taskEvents.ts new file mode 100644 index 0000000000..dae2e4fbd0 --- /dev/null +++ b/internal-packages/clickhouse/src/taskEvents.ts @@ -0,0 +1,133 @@ +import { ClickHouseSettings } from "@clickhouse/client"; +import { z } from "zod"; +import { ClickhouseReader, ClickhouseWriter } from "./client/types.js"; + +export const TaskEventV1Input = z.object({ + environment_id: z.string(), + organization_id: z.string(), + project_id: z.string(), + task_identifier: z.string(), + run_id: z.string(), + start_time: z.string(), + duration: z.string(), + trace_id: z.string(), + span_id: z.string(), + parent_span_id: z.string(), + message: z.string(), + kind: z.string(), + status: z.string(), + attributes: z.unknown(), + metadata: z.string(), + expires_at: z.string(), +}); + +export type TaskEventV1Input = z.input; + +export function insertTaskEvents(ch: ClickhouseWriter, settings?: ClickHouseSettings) { + return ch.insertUnsafe({ + name: "insertTaskEvents", + table: "trigger_dev.task_events_v1", + settings: { + enable_json_type: 1, + type_json_skip_duplicated_paths: 1, + input_format_json_throw_on_bad_escape_sequence: 0, + input_format_json_use_string_type_for_ambiguous_paths_in_named_tuples_inference_from_objects: 1, + ...settings, + }, + }); +} + +export const TaskEventSummaryV1Result = z.object({ + span_id: z.string(), + parent_span_id: z.string(), + run_id: z.string(), + start_time: z.string(), + duration: z.number().or(z.string()), + status: z.string(), + kind: z.string(), + metadata: z.string(), + message: z.string(), +}); + +export type TaskEventSummaryV1Result = z.output; + +export function getTraceSummaryQueryBuilder(ch: ClickhouseReader, settings?: ClickHouseSettings) { + return ch.queryBuilderFast({ + name: "getTraceEvents", + table: "trigger_dev.task_events_v1", + columns: [ + "span_id", + "parent_span_id", + "run_id", + "start_time", + "duration", + "status", + "kind", + "metadata", + { name: "message", expression: "LEFT(message, 256)" }, + ], + settings, + }); +} + +export const TaskEventDetailedSummaryV1Result = z.object({ + span_id: z.string(), + parent_span_id: z.string(), + run_id: z.string(), + start_time: z.string(), + duration: z.number().or(z.string()), + status: z.string(), + kind: z.string(), + metadata: z.string(), + message: z.string(), + attributes_text: z.string(), +}); + +export type TaskEventDetailedSummaryV1Result = z.output; + +export function getTraceDetailedSummaryQueryBuilder( + ch: ClickhouseReader, + settings?: ClickHouseSettings +) { + return ch.queryBuilderFast({ + name: "getTaskEventDetailedSummary", + table: "trigger_dev.task_events_v1", + columns: [ + "span_id", + "parent_span_id", + "run_id", + "start_time", + "duration", + "status", + "kind", + "metadata", + { name: "message", expression: "LEFT(message, 256)" }, + "attributes_text", + ], + settings, + }); +} + +export const TaskEventDetailsV1Result = z.object({ + span_id: z.string(), + parent_span_id: z.string(), + start_time: z.string(), + duration: z.number().or(z.string()), + status: z.string(), + kind: z.string(), + metadata: z.string(), + message: z.string(), + attributes_text: z.string(), +}); + +export type TaskEventDetailsV1Result = z.input; + +export function getSpanDetailsQueryBuilder(ch: ClickhouseReader, settings?: ClickHouseSettings) { + return ch.queryBuilder({ + name: "getSpanDetails", + baseQuery: + "SELECT span_id, parent_span_id, start_time, duration, status, kind, metadata, message, attributes_text FROM trigger_dev.task_events_v1", + schema: TaskEventDetailsV1Result, + settings, + }); +} diff --git a/internal-packages/database/prisma/migrations/20250922145116_add_feature_flags_to_organizations/migration.sql b/internal-packages/database/prisma/migrations/20250922145116_add_feature_flags_to_organizations/migration.sql new file mode 100644 index 0000000000..afbbc1ccf6 --- /dev/null +++ b/internal-packages/database/prisma/migrations/20250922145116_add_feature_flags_to_organizations/migration.sql @@ -0,0 +1,2 @@ +-- AlterTable +ALTER TABLE "public"."Organization" ADD COLUMN "featureFlags" JSONB; \ No newline at end of file diff --git a/internal-packages/database/prisma/schema.prisma b/internal-packages/database/prisma/schema.prisma index e7e47b0707..4ecf079435 100644 --- a/internal-packages/database/prisma/schema.prisma +++ b/internal-packages/database/prisma/schema.prisma @@ -202,6 +202,8 @@ model Organization { apiRateLimiterConfig Json? realtimeRateLimiterConfig Json? + featureFlags Json? + projects Project[] members OrgMember[] invites OrgMemberInvite[] diff --git a/internal-packages/run-engine/src/engine/billingCache.ts b/internal-packages/run-engine/src/engine/billingCache.ts index 45fd3dc382..19c67d398e 100644 --- a/internal-packages/run-engine/src/engine/billingCache.ts +++ b/internal-packages/run-engine/src/engine/billingCache.ts @@ -8,6 +8,7 @@ import { type UnkeyCache, type CacheError, type Result, + createMemoryStore, } from "@internal/cache"; import type { RedisOptions } from "@internal/redis"; import type { Logger } from "@trigger.dev/core/logger"; @@ -41,7 +42,6 @@ export class BillingCache { // Initialize cache const ctx = new DefaultStatefulContext(); - const memory = new MemoryStore({ persistentMap: new Map() }); const redisCacheStore = new RedisCacheStore({ name: "billing-cache", connection: { @@ -53,7 +53,7 @@ export class BillingCache { this.cache = createCache({ currentPlan: new Namespace(ctx, { - stores: [memory, redisCacheStore], + stores: [createMemoryStore(1000), redisCacheStore], fresh: BILLING_FRESH_TTL, stale: BILLING_STALE_TTL, }), diff --git a/internal-packages/run-engine/src/engine/eventBus.ts b/internal-packages/run-engine/src/engine/eventBus.ts index c602e47b30..2e4adeed4b 100644 --- a/internal-packages/run-engine/src/engine/eventBus.ts +++ b/internal-packages/run-engine/src/engine/eventBus.ts @@ -244,6 +244,7 @@ export type EventBusEvents = { updatedAt: Date; createdAt: Date; error: TaskRunError; + taskEventStore?: string; }; organization: { id: string; diff --git a/internal-packages/run-engine/src/engine/systems/runAttemptSystem.ts b/internal-packages/run-engine/src/engine/systems/runAttemptSystem.ts index ad3f1110cf..a884ca9ba6 100644 --- a/internal-packages/run-engine/src/engine/systems/runAttemptSystem.ts +++ b/internal-packages/run-engine/src/engine/systems/runAttemptSystem.ts @@ -1,5 +1,6 @@ import { createCache, + createMemoryStore, DefaultStatefulContext, MemoryStore, Namespace, @@ -125,8 +126,7 @@ export class RunAttemptSystem { this.delayedRunSystem = options.delayedRunSystem; const ctx = new DefaultStatefulContext(); - // TODO: use an LRU cache for memory store - const memory = new MemoryStore({ persistentMap: new Map() }); + const memory = createMemoryStore(5000, 0.001); const redisCacheStore = new RedisCacheStore({ name: "run-attempt-system", connection: { @@ -444,6 +444,7 @@ export class RunAttemptSystem { parentTaskRunId: true, rootTaskRunId: true, workerQueue: true, + taskEventStore: true, }, }); @@ -996,6 +997,7 @@ export class RunAttemptSystem { updatedAt: run.updatedAt, error: completion.error, createdAt: run.createdAt, + taskEventStore: run.taskEventStore, }, organization: { id: run.runtimeEnvironment.organizationId, diff --git a/internal-packages/run-engine/src/run-queue/fairQueueSelectionStrategy.ts b/internal-packages/run-engine/src/run-queue/fairQueueSelectionStrategy.ts index b67e77d151..46396fda41 100644 --- a/internal-packages/run-engine/src/run-queue/fairQueueSelectionStrategy.ts +++ b/internal-packages/run-engine/src/run-queue/fairQueueSelectionStrategy.ts @@ -6,6 +6,7 @@ import { Namespace, type UnkeyCache, MemoryStore, + createMemoryStore, } from "@internal/cache"; import { randomUUID } from "crypto"; import seedrandom from "seedrandom"; @@ -106,7 +107,7 @@ export class FairQueueSelectionStrategy implements RunQueueSelectionStrategy { constructor(private options: FairQueueSelectionStrategyOptions) { const ctx = new DefaultStatefulContext(); - const memory = new MemoryStore({ persistentMap: new Map() }); + const memory = createMemoryStore(1000); this._cache = createCache({ concurrencyLimit: new Namespace(ctx, { diff --git a/packages/cli-v3/package.json b/packages/cli-v3/package.json index d575da4eaf..fdfbfd1919 100644 --- a/packages/cli-v3/package.json +++ b/packages/cli-v3/package.json @@ -155,4 +155,4 @@ } } } -} \ No newline at end of file +} diff --git a/packages/cli-v3/src/commands/dev.ts b/packages/cli-v3/src/commands/dev.ts index 3253fdc573..c58c9224fe 100644 --- a/packages/cli-v3/src/commands/dev.ts +++ b/packages/cli-v3/src/commands/dev.ts @@ -146,10 +146,12 @@ export async function devCommand(options: DevCommandOptions) { typeof options.skipRulesInstall === "boolean" && options.skipRulesInstall; if (!skipRulesInstall) { - await initiateRulesInstallWizard({ - manifestPath: options.rulesInstallManifestPath, - branch: options.rulesInstallBranch, - }); + await tryCatch( + initiateRulesInstallWizard({ + manifestPath: options.rulesInstallManifestPath, + branch: options.rulesInstallBranch, + }) + ); } } diff --git a/packages/cli-v3/src/rules/manifest.ts b/packages/cli-v3/src/rules/manifest.ts index f3bf73ba95..6e8e90bd9b 100644 --- a/packages/cli-v3/src/rules/manifest.ts +++ b/packages/cli-v3/src/rules/manifest.ts @@ -114,7 +114,10 @@ export class GithubRulesManifestLoader implements RulesManifestLoader { async loadManifestContent(): Promise { const response = await fetch( - `https://raw.githubusercontent.com/triggerdotdev/trigger.dev/refs/heads/${this.branch}/rules/manifest.json` + `https://raw.githubusercontent.com/triggerdotdev/trigger.dev/refs/heads/${this.branch}/rules/manifest.json`, + { + signal: AbortSignal.timeout(5000), + } ); if (!response.ok) { diff --git a/packages/core/src/v3/schemas/runEngine.ts b/packages/core/src/v3/schemas/runEngine.ts index fe59e712ce..376a8522de 100644 --- a/packages/core/src/v3/schemas/runEngine.ts +++ b/packages/core/src/v3/schemas/runEngine.ts @@ -118,6 +118,7 @@ const BaseRunMetadata = z.object({ friendlyId: z.string(), status: z.enum(Object.values(TaskRunStatus) as [TaskRunStatus]), attemptNumber: z.number().nullish(), + taskEventStore: z.string().optional(), }); export const ExecutionResult = z.object({ diff --git a/packages/core/src/v3/semanticInternalAttributes.ts b/packages/core/src/v3/semanticInternalAttributes.ts index b5e0af63a5..5916970b09 100644 --- a/packages/core/src/v3/semanticInternalAttributes.ts +++ b/packages/core/src/v3/semanticInternalAttributes.ts @@ -26,6 +26,7 @@ export const SemanticInternalAttributes = { SKIP_SPAN_PARTIAL: "$span.skip_partial", SPAN_PARTIAL: "$span.partial", SPAN_ID: "$span.span_id", + SPAN: "$span", ENTITY_TYPE: "$entity.type", ENTITY_ID: "$entity.id", OUTPUT: "$output", @@ -53,6 +54,7 @@ export const SemanticInternalAttributes = { IDEMPOTENCY_KEY: "ctx.run.idempotencyKey", USAGE_DURATION_MS: "$usage.durationMs", USAGE_COST_IN_CENTS: "$usage.costInCents", + USAGE: "$usage", RATE_LIMIT_LIMIT: "response.rateLimit.limit", RATE_LIMIT_REMAINING: "response.rateLimit.remaining", RATE_LIMIT_RESET: "response.rateLimit.reset", @@ -61,4 +63,5 @@ export const SemanticInternalAttributes = { EXECUTION_ENVIRONMENT: "exec_env", WARM_START: "warm_start", ATTEMPT_EXECUTION_COUNT: "$trigger.executionCount", + TASK_EVENT_STORE: "$trigger.taskEventStore", }; diff --git a/packages/core/src/v3/serverOnly/index.ts b/packages/core/src/v3/serverOnly/index.ts index 10d915a5d3..d4a74633dc 100644 --- a/packages/core/src/v3/serverOnly/index.ts +++ b/packages/core/src/v3/serverOnly/index.ts @@ -7,3 +7,4 @@ export * from "./k8s.js"; export * from "./jumpHash.js"; export * from "../apiClient/version.js"; export * from "./placementTags.js"; +export * from "./resourceMonitor.js"; diff --git a/packages/core/src/v3/serverOnly/resourceMonitor.ts b/packages/core/src/v3/serverOnly/resourceMonitor.ts new file mode 100644 index 0000000000..2d91d2e23d --- /dev/null +++ b/packages/core/src/v3/serverOnly/resourceMonitor.ts @@ -0,0 +1,761 @@ +import { exec } from "node:child_process"; +import { promises as fs } from "node:fs"; +import os from "node:os"; +import { PerformanceObserver, constants } from "node:perf_hooks"; +import { promisify } from "node:util"; +import { getHeapStatistics } from "node:v8"; + +const execAsync = promisify(exec); + +export type DiskMetrics = { + total: number; + used: number; + free: number; + percentUsed: number; + warning?: string; +}; + +export type MemoryMetrics = { + total: number; + free: number; + used: number; + percentUsed: number; +}; + +export type NodeProcessMetrics = { + memoryUsage: number; + memoryUsagePercent: number; + heapUsed: number; + heapSizeLimit: number; + heapUsagePercent: number; + availableHeap: number; + isNearHeapLimit: boolean; +}; + +export type TargetProcessMetrics = { + method: string; + processName: string; + count: number; + processes: ProcessInfo[]; + averages: { + cpu: number; + memory: number; + rss: number; + vsz: number; + } | null; + totals: { + cpu: number; + memory: number; + rss: number; + vsz: number; + } | null; +}; + +export type ProcessMetrics = { + node: NodeProcessMetrics; + targetProcess: TargetProcessMetrics | null; +}; + +type GCSummary = { + count: number; + totalDuration: number; // ms + avgDuration: number; // ms + maxDuration: number; // ms + kinds: Record< + string, + { + // breakdown by kind + count: number; + totalDuration: number; + avgDuration: number; + maxDuration: number; + } + >; +}; + +type ProcessInfo = { + user: string; + pid: number; + cpu: number; + mem: number; + vsz: number; + rss: number; + command: string; +}; + +export type SystemMetrics = { + disk: DiskMetrics; + memory: MemoryMetrics; +}; + +export type ResourceMonitorConfig = { + dirName?: string; + processName?: string; + ctx: Record; + compactLogging?: boolean; + verbose?: boolean; +}; + +// Constants +const DISK_LIMIT_GB = 10; +const DISK_LIMIT_BYTES = DISK_LIMIT_GB * 1024 * 1024 * 1024; // 10Gi in bytes + +export class ResourceMonitor { + private logInterval: NodeJS.Timeout | null = null; + private dirName: string; + private processName: string | undefined; + private ctx: Record; + private verbose: boolean; + private compactLogging: boolean; + private gcObserver: PerformanceObserver | null = null; + private bufferedGcEntries: PerformanceEntry[] = []; + + constructor(config: ResourceMonitorConfig) { + this.dirName = config.dirName ?? "/tmp"; + this.processName = config.processName; + this.ctx = config.ctx; + this.verbose = config.verbose ?? true; + this.compactLogging = config.compactLogging ?? false; + } + + /** + * Start periodic resource monitoring + * @param intervalMs Monitoring interval in milliseconds + */ + startMonitoring(intervalMs = 10000): void { + if (intervalMs < 1000) { + intervalMs = 1000; + console.warn("ResourceMonitor: intervalMs is less than 1000, setting to 1000"); + } + + if (this.logInterval) { + clearInterval(this.logInterval); + } + + this.logInterval = setInterval(this.logResources.bind(this), intervalMs); + + this.gcObserver = new PerformanceObserver((list) => { + this.bufferedGcEntries.push(...list.getEntries()); + }); + + this.gcObserver.observe({ entryTypes: ["gc"], buffered: true }); + } + + /** + * Stop resource monitoring + */ + stopMonitoring(): void { + if (this.logInterval) { + clearInterval(this.logInterval); + this.logInterval = null; + } + + if (this.gcObserver) { + this.gcObserver.disconnect(); + this.gcObserver = null; + } + } + + private async logResources() { + try { + await this.logResourceSnapshot("ResourceMonitor"); + } catch (error) { + console.error( + `Resource monitoring error: ${error instanceof Error ? error.message : String(error)}` + ); + } + } + + /** + * Get combined system metrics (disk and memory) + */ + private async getSystemMetrics(): Promise { + const [disk, memory] = await Promise.all([this.getDiskMetrics(), this.getMemoryMetrics()]); + return { disk, memory }; + } + + /** + * Get disk space information + */ + private async getDiskMetrics(): Promise { + try { + // Even with permission errors, du will output a total + const { stdout, stderr } = await execAsync(`du -sb ${this.dirName} || true`); + + // Get the last line of stdout which contains the total + const lastLine = stdout.split("\n").filter(Boolean).pop() || ""; + const usedBytes = parseInt(lastLine.split("\t")[0] ?? "", 10); + + const effectiveTotal = DISK_LIMIT_BYTES; + const effectiveUsed = Math.min(usedBytes, DISK_LIMIT_BYTES); + const effectiveFree = effectiveTotal - effectiveUsed; + const percentUsed = (effectiveUsed / effectiveTotal) * 100; + + const metrics: DiskMetrics = { + total: effectiveTotal, + used: effectiveUsed, + free: effectiveFree, + percentUsed, + }; + + // If we had permission errors, add a warning + if (stderr.includes("Permission denied") || stderr.includes("cannot access")) { + metrics.warning = "Some directories were not accessible"; + } else if (stderr.includes("No such file or directory")) { + metrics.warning = "The directory does not exist"; + } + + return metrics; + } catch (error) { + console.error( + `Error getting disk metrics: ${error instanceof Error ? error.message : String(error)}` + ); + return { + free: DISK_LIMIT_BYTES, + total: DISK_LIMIT_BYTES, + used: 0, + percentUsed: 0, + warning: "Failed to measure disk usage", + }; + } + } + + /** + * Get memory metrics + */ + private getMemoryMetrics(): MemoryMetrics { + const total = os.totalmem(); + const free = os.freemem(); + const used = total - free; + const percentUsed = (used / total) * 100; + + return { total, free, used, percentUsed }; + } + + /** + * Get process-specific metrics using /proc filesystem + */ + private async getProcMetrics(pids: number[]): Promise { + return Promise.all( + pids.map(async (pid) => { + try { + // Read process status + const status = await fs.readFile(`/proc/${pid}/status`, "utf8"); + const cmdline = await fs.readFile(`/proc/${pid}/cmdline`, "utf8"); + const stat = await fs.readFile(`/proc/${pid}/stat`, "utf8"); + + // Parse VmRSS (resident set size) from status + const rss = parseInt(status.match(/VmRSS:\s+(\d+)/)?.[1] ?? "0", 10); + // Parse VmSize (virtual memory size) from status + const vsz = parseInt(status.match(/VmSize:\s+(\d+)/)?.[1] ?? "0", 10); + // Get process owner + const user = (await fs.stat(`/proc/${pid}`)).uid.toString(); + + // Parse CPU stats from /proc/[pid]/stat + const stats = stat.split(" "); + const utime = parseInt(stats[13] ?? "0", 10); + const stime = parseInt(stats[14] ?? "0", 10); + const starttime = parseInt(stats[21] ?? "0", 10); + + // Calculate CPU percentage + const totalTime = utime + stime; + const uptime = os.uptime(); + const hertz = 100; // Usually 100 on Linux + const elapsedTime = uptime - starttime / hertz; + const cpuUsage = 100 * (totalTime / hertz / elapsedTime); + + // Calculate memory percentage against total system memory + const totalMem = os.totalmem(); + const memoryPercent = (rss * 1024 * 100) / totalMem; + + return { + user, + pid, + cpu: cpuUsage, + mem: memoryPercent, + vsz, + rss, + command: cmdline.replace(/\0/g, " ").trim(), + }; + } catch (error) { + return null; + } + }) + ).then((results) => results.filter((r): r is ProcessInfo => r !== null)); + } + + /** + * Find PIDs for a process name using /proc filesystem + */ + private async findPidsByName(processName?: string): Promise { + if (!processName) { + return []; + } + + try { + const pids: number[] = []; + const procDirs = await fs.readdir("/proc"); + + for (const dir of procDirs) { + if (!/^\d+$/.test(dir)) continue; + + const processPid = parseInt(dir, 10); + + // Ignore processes that have a lower PID than our own PID + if (processPid <= process.pid) { + continue; + } + + try { + const cmdline = await fs.readFile(`/proc/${dir}/cmdline`, "utf8"); + if (cmdline.includes(processName)) { + pids.push(parseInt(dir, 10)); + } + } catch { + // Ignore errors reading individual process info + continue; + } + } + + return pids; + } catch { + return []; + } + } + + /** + * Get process-specific metrics + */ + private async getProcessMetrics(): Promise { + // Get Node.js process metrics + const totalMemory = os.totalmem(); + // Convert GB to bytes (machine.memory is in GB) + const machineMemoryBytes = totalMemory; + const nodeMemoryUsage = process.memoryUsage(); + + // Node process percentage is based on machine memory if available, otherwise system memory + const nodeMemoryPercent = (nodeMemoryUsage.rss / machineMemoryBytes) * 100; + const heapStats = getHeapStatistics(); + + const nodeMetrics: NodeProcessMetrics = { + memoryUsage: nodeMemoryUsage.rss, + memoryUsagePercent: nodeMemoryPercent, + heapUsed: nodeMemoryUsage.heapUsed, + heapSizeLimit: heapStats.heap_size_limit, + heapUsagePercent: (heapStats.used_heap_size / heapStats.heap_size_limit) * 100, + availableHeap: heapStats.total_available_size, + isNearHeapLimit: heapStats.used_heap_size / heapStats.heap_size_limit > 0.8, + }; + + let method = "ps"; + + try { + let processes: ProcessInfo[] = []; + + // Try ps first, fall back to /proc if it fails + try { + const { stdout: psOutput } = await execAsync( + `ps aux | grep ${this.processName} | grep -v grep` + ); + + if (psOutput.trim()) { + processes = psOutput + .trim() + .split("\n") + .filter((line) => { + const parts = line.trim().split(/\s+/); + const pid = parseInt(parts[1] ?? "0", 10); + + // Ignore processes that have a lower PID than our own PID + return pid > process.pid; + }) + .map((line) => { + const parts = line.trim().split(/\s+/); + return { + user: parts[0] ?? "", + pid: parseInt(parts[1] ?? "0", 10), + cpu: parseFloat(parts[2] ?? "0"), + mem: parseFloat(parts[3] ?? "0"), + vsz: parseInt(parts[4] ?? "0", 10), + rss: parseInt(parts[5] ?? "0", 10), + command: parts.slice(10).join(" "), + }; + }); + } + } catch { + // ps failed, try /proc instead + method = "proc"; + const pids = await this.findPidsByName(this.processName); + processes = await this.getProcMetrics(pids); + } + + if (processes.length === 0) { + return { + node: nodeMetrics, + targetProcess: this.processName + ? { + method, + processName: this.processName, + count: 0, + processes: [], + averages: null, + totals: null, + } + : null, + }; + } + + // For CPU: + // - ps shows CPU percentage per core (e.g., 100% = 1 core) + // - machine.cpu is in cores (e.g., 0.5 = half a core) + // - we want to show percentage of allocated CPU (e.g., 100% = using all allocated CPU) + const availableCpu = os.cpus().length; + const cpuNormalizer = availableCpu * 100; // Convert to basis points for better precision with fractional CPUs + + // For Memory: + // - ps 'mem' is already a percentage of system memory + // - we need to convert it to a percentage of machine memory + // - if machine memory is 0.5GB and system has 16GB, we multiply the percentage by 32 + const memoryScaleFactor = this.ctx.machine ? totalMemory / machineMemoryBytes : 1; + + const totals = processes.reduce( + (acc, proc) => ({ + cpu: acc.cpu + proc.cpu, + // Scale memory percentage to machine memory + // TODO: test this + memory: acc.memory + proc.mem * memoryScaleFactor, + rss: acc.rss + proc.rss, + vsz: acc.vsz + proc.vsz, + }), + { cpu: 0, memory: 0, rss: 0, vsz: 0 } + ); + + const count = processes.length; + + const averages = { + cpu: totals.cpu / (count * cpuNormalizer), + memory: totals.memory / count, + rss: totals.rss / count, + vsz: totals.vsz / count, + }; + + return { + node: nodeMetrics, + targetProcess: this.processName + ? { + method, + processName: this.processName, + count, + processes, + averages, + totals: { + cpu: totals.cpu / cpuNormalizer, + memory: totals.memory, + rss: totals.rss, + vsz: totals.vsz, + }, + } + : null, + }; + } catch (error) { + return { + node: nodeMetrics, + targetProcess: this.processName + ? { + method, + processName: this.processName, + count: 0, + processes: [], + averages: null, + totals: null, + } + : null, + }; + } + } + + /** + * Log a snapshot of current resource usage + */ + async logResourceSnapshot(label = "Resource Snapshot"): Promise { + try { + const payload = await this.getResourceSnapshotPayload(); + const enhancedLabel = this.compactLogging + ? this.createCompactLabel(payload, label) + : this.createEnhancedLabel(payload, label); + + if (payload.process.node.isNearHeapLimit) { + console.warn(`${enhancedLabel}: Node is near heap limit`); + } else { + console.log(enhancedLabel); + } + + if (this.verbose) { + console.dir(payload, { depth: 6 }); + } + } catch (error) { + console.error( + `Error logging resource snapshot: ${error instanceof Error ? error.message : String(error)}` + ); + } + } + + /** + * Create an enhanced log label with key metrics for quick scanning + */ + private createEnhancedLabel(payload: any, baseLabel: string): string { + const parts: string[] = [baseLabel]; + + // System resources with text indicators + const diskPercent = parseFloat(payload.system.disk.percentUsed); + const memoryPercent = parseFloat(payload.system.memory.percentUsed); + const diskIndicator = this.getTextIndicator(diskPercent, 80, 90); + const memIndicator = this.getTextIndicator(memoryPercent, 80, 90); + parts.push(`Disk:${diskPercent.toFixed(1).padStart(5)}%${diskIndicator}`); + parts.push(`Mem:${memoryPercent.toFixed(1).padStart(5)}%${memIndicator}`); + + // Node process metrics with text indicators + const nodeMemPercent = parseFloat(payload.process.node.memoryUsagePercent); + const heapPercent = parseFloat(payload.process.node.heapUsagePercent); + const nodeIndicator = this.getTextIndicator(nodeMemPercent, 70, 85); + const heapIndicator = this.getTextIndicator(heapPercent, 70, 85); + parts.push(`Node:${nodeMemPercent.toFixed(1).padStart(4)}%${nodeIndicator}`); + parts.push(`Heap:${heapPercent.toFixed(1).padStart(4)}%${heapIndicator}`); + + // Target process metrics (if available) + if (payload.process.targetProcess && payload.process.targetProcess.count > 0) { + const targetCpu = payload.process.targetProcess.totals?.cpuPercent || "0"; + const targetMem = payload.process.targetProcess.totals?.memoryPercent || "0"; + const targetCpuNum = parseFloat(targetCpu); + const targetMemNum = parseFloat(targetMem); + const cpuIndicator = this.getTextIndicator(targetCpuNum, 80, 90); + const memIndicator = this.getTextIndicator(targetMemNum, 80, 90); + parts.push( + `${payload.process.targetProcess.processName}:${targetCpu.padStart( + 4 + )}%${cpuIndicator}/${targetMem.padStart(4)}%${memIndicator}` + ); + } + + // GC activity with performance indicators + if (payload.gc && payload.gc.count > 0) { + const avgDuration = payload.gc.avgDuration; + const gcIndicator = this.getTextIndicator(avgDuration, 5, 10, true); + parts.push( + `GC:${payload.gc.count.toString().padStart(2)}(${avgDuration + .toFixed(1) + .padStart(4)}ms)${gcIndicator}` + ); + } + + // Machine constraints + if (payload.constraints) { + parts.push(`[${payload.constraints.cpu}CPU/${payload.constraints.memoryGB}GB]`); + } + + // Warning indicators (only show critical ones in the main label) + const criticalWarnings: string[] = []; + if (payload.process.node.isNearHeapLimit) criticalWarnings.push("HEAP_LIMIT"); + if (diskPercent > 90) criticalWarnings.push("DISK_CRITICAL"); + if (memoryPercent > 95) criticalWarnings.push("MEM_CRITICAL"); + if (payload.system.disk.warning) criticalWarnings.push("DISK_WARN"); + + if (criticalWarnings.length > 0) { + parts.push(`[${criticalWarnings.join(",")}]`); + } + + return parts.join(" | "); + } + + /** + * Get text-based indicator for percentage values + */ + private getTextIndicator( + value: number, + warningThreshold: number, + criticalThreshold: number, + isDuration = false + ): string { + if (isDuration) { + // For duration values, higher is worse + if (value >= criticalThreshold) return " [CRIT]"; + if (value >= warningThreshold) return " [WARN]"; + return " [OK]"; + } else { + // For percentage values, higher is worse + if (value >= criticalThreshold) return " [CRIT]"; + if (value >= warningThreshold) return " [WARN]"; + return " [OK]"; + } + } + + /** + * Create a compact version of the enhanced label for high-frequency logging + */ + private createCompactLabel(payload: any, baseLabel: string): string { + const parts: string[] = [baseLabel]; + + // Only show critical metrics in compact mode + const diskPercent = parseFloat(payload.system.disk.percentUsed); + const memoryPercent = parseFloat(payload.system.memory.percentUsed); + const heapPercent = parseFloat(payload.process.node.heapUsagePercent); + + // Use single character indicators for compactness + const diskIndicator = diskPercent > 90 ? "!" : diskPercent > 80 ? "?" : "."; + const memIndicator = memoryPercent > 95 ? "!" : memoryPercent > 80 ? "?" : "."; + const heapIndicator = heapPercent > 85 ? "!" : heapPercent > 70 ? "?" : "."; + + parts.push(`D:${diskPercent.toFixed(0).padStart(2)}%${diskIndicator}`); + parts.push(`M:${memoryPercent.toFixed(0).padStart(2)}%${memIndicator}`); + parts.push(`H:${heapPercent.toFixed(0).padStart(2)}%${heapIndicator}`); + + // GC activity (only if significant) + if (payload.gc && payload.gc.count > 0 && payload.gc.avgDuration > 2) { + const gcIndicator = + payload.gc.avgDuration > 10 ? "!" : payload.gc.avgDuration > 5 ? "?" : "."; + parts.push(`GC:${payload.gc.count}${gcIndicator}`); + } + + return parts.join(" "); + } + + async getResourceSnapshotPayload() { + const [systemMetrics, processMetrics] = await Promise.all([ + this.getSystemMetrics(), + this.getProcessMetrics(), + ]); + + const gcSummary = summarizeGCEntries(this.bufferedGcEntries); + this.bufferedGcEntries = []; + + const formatBytes = (bytes: number) => (bytes / (1024 * 1024)).toFixed(2); + const formatPercent = (value: number) => value.toFixed(1); + + return { + system: { + disk: { + limitGiB: DISK_LIMIT_GB, + dirName: this.dirName, + usedGiB: (systemMetrics.disk.used / (1024 * 1024 * 1024)).toFixed(2), + freeGiB: (systemMetrics.disk.free / (1024 * 1024 * 1024)).toFixed(2), + percentUsed: formatPercent(systemMetrics.disk.percentUsed), + warning: systemMetrics.disk.warning, + }, + memory: { + freeGB: (systemMetrics.memory.free / (1024 * 1024 * 1024)).toFixed(2), + percentUsed: formatPercent(systemMetrics.memory.percentUsed), + }, + }, + gc: gcSummary, + constraints: { + cpu: os.cpus().length, + memoryGB: Math.floor(os.totalmem() / (1024 * 1024 * 1024)), + note: "Using system resources (no machine constraints specified)", + }, + process: { + node: { + memoryUsageMB: formatBytes(processMetrics.node.memoryUsage), + memoryUsagePercent: formatPercent(processMetrics.node.memoryUsagePercent), + heapUsedMB: formatBytes(processMetrics.node.heapUsed), + heapSizeLimitMB: formatBytes(processMetrics.node.heapSizeLimit), + heapUsagePercent: formatPercent(processMetrics.node.heapUsagePercent), + availableHeapMB: formatBytes(processMetrics.node.availableHeap), + isNearHeapLimit: processMetrics.node.isNearHeapLimit, + ...(this.verbose + ? { + heapStats: getHeapStatistics(), + } + : {}), + }, + targetProcess: processMetrics.targetProcess + ? { + method: processMetrics.targetProcess.method, + processName: processMetrics.targetProcess.processName, + count: processMetrics.targetProcess.count, + averages: processMetrics.targetProcess.averages + ? { + cpuPercent: formatPercent(processMetrics.targetProcess.averages.cpu * 100), + memoryPercent: formatPercent(processMetrics.targetProcess.averages.memory), + rssMB: formatBytes(processMetrics.targetProcess.averages.rss * 1024), + vszMB: formatBytes(processMetrics.targetProcess.averages.vsz * 1024), + } + : null, + totals: processMetrics.targetProcess.totals + ? { + cpuPercent: formatPercent(processMetrics.targetProcess.totals.cpu * 100), + memoryPercent: formatPercent(processMetrics.targetProcess.totals.memory), + rssMB: formatBytes(processMetrics.targetProcess.totals.rss * 1024), + vszMB: formatBytes(processMetrics.targetProcess.totals.vsz * 1024), + } + : null, + } + : null, + }, + timestamp: new Date().toISOString(), + }; + } +} + +function summarizeGCEntries(entries: PerformanceEntry[]): GCSummary { + if (entries.length === 0) { + return { + count: 0, + totalDuration: 0, + avgDuration: 0, + maxDuration: 0, + kinds: {}, + }; + } + + let totalDuration = 0; + let maxDuration = 0; + const kinds: Record = {}; + + for (const e of entries) { + const duration = e.duration; + totalDuration += duration; + if (duration > maxDuration) maxDuration = duration; + + const kind = kindName((e as any)?.detail?.kind ?? "unknown"); + if (!kinds[kind]) { + kinds[kind] = { count: 0, totalDuration: 0, maxDuration: 0 }; + } + kinds[kind].count += 1; + kinds[kind].totalDuration += duration; + if (duration > kinds[kind].maxDuration) kinds[kind].maxDuration = duration; + } + + // finalize averages + const avgDuration = totalDuration / entries.length; + const kindsWithAvg: GCSummary["kinds"] = {}; + for (const [kind, stats] of Object.entries(kinds)) { + kindsWithAvg[kind] = { + count: stats.count, + totalDuration: stats.totalDuration, + avgDuration: stats.totalDuration / stats.count, + maxDuration: stats.maxDuration, + }; + } + + return { + count: entries.length, + totalDuration, + avgDuration, + maxDuration, + kinds: kindsWithAvg, + }; +} + +const kindName = (k: number | string) => { + if (typeof k === "number") { + return ( + { + [constants.NODE_PERFORMANCE_GC_MAJOR]: "major", + [constants.NODE_PERFORMANCE_GC_MINOR]: "minor", + [constants.NODE_PERFORMANCE_GC_INCREMENTAL]: "incremental", + [constants.NODE_PERFORMANCE_GC_WEAKCB]: "weak-cb", + }[k] ?? `kind:${k}` + ); + } + return k; +}; diff --git a/packages/core/src/v3/utils/flattenAttributes.ts b/packages/core/src/v3/utils/flattenAttributes.ts index 78acfca262..568346d48a 100644 --- a/packages/core/src/v3/utils/flattenAttributes.ts +++ b/packages/core/src/v3/utils/flattenAttributes.ts @@ -239,7 +239,8 @@ function isRecord(value: unknown): value is Record { } export function unflattenAttributes( - obj: Attributes + obj: Attributes, + filteredKeys?: string[] ): Record | string | number | boolean | null | undefined { if (typeof obj !== "object" || obj === null || Array.isArray(obj)) { return obj; @@ -261,6 +262,10 @@ export function unflattenAttributes( const result: Record = {}; for (const [key, value] of Object.entries(obj)) { + if (filteredKeys?.includes(key)) { + continue; + } + const parts = key.split(".").reduce( (acc, part) => { if (part.startsWith("[") && part.endsWith("]")) { diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index a7f137024e..64531f614e 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -969,8 +969,8 @@ importers: internal-packages/clickhouse: dependencies: '@clickhouse/client': - specifier: ^1.11.1 - version: 1.11.1 + specifier: ^1.12.1 + version: 1.12.1 '@internal/tracing': specifier: workspace:* version: link:../tracing @@ -5385,6 +5385,10 @@ packages: resolution: {integrity: sha512-bme0le2yhDSAh13d2fxhSW5ZrNoVqZ3LTyac8jK6hNH0qkksXnjYkLS6KQalPU6NMpffxHmpI4+/Gi2MnX0NCA==} dev: false + /@clickhouse/client-common@1.12.1: + resolution: {integrity: sha512-ccw1N6hB4+MyaAHIaWBwGZ6O2GgMlO99FlMj0B0UEGfjxM9v5dYVYql6FpP19rMwrVAroYs/IgX2vyZEBvzQLg==} + dev: false + /@clickhouse/client@1.11.1: resolution: {integrity: sha512-u9h++h72SmWystijNqfNvMkfA+5+Y1LNfmLL/odCL3VgI3oyAPP9ubSw/Yrt2zRZkLKehMMD1kuOej0QHbSoBA==} engines: {node: '>=16'} @@ -5392,6 +5396,13 @@ packages: '@clickhouse/client-common': 1.11.1 dev: false + /@clickhouse/client@1.12.1: + resolution: {integrity: sha512-7ORY85rphRazqHzImNXMrh4vsaPrpetFoTWpZYueCO2bbO6PXYDXp/GQ4DgxnGIqbWB/Di1Ai+Xuwq2o7DJ36A==} + engines: {node: '>=16'} + dependencies: + '@clickhouse/client-common': 1.12.1 + dev: false + /@codemirror/autocomplete@6.18.6: resolution: {integrity: sha512-PHHBXFomUs5DF+9tCOM/UoW6XQ4R44lLNNhRaW9PKPTU0D7lIjRg3ElxaJnTwsl/oHiR93WSXDBrekhoUGCPtg==} dependencies: diff --git a/references/hello-world/src/trigger/init.ts b/references/hello-world/src/trigger/init.ts index 8512d395ca..6603befd65 100644 --- a/references/hello-world/src/trigger/init.ts +++ b/references/hello-world/src/trigger/init.ts @@ -1,10 +1,10 @@ import { logger, tasks } from "@trigger.dev/sdk"; // import { setDb } from "../db.js"; -tasks.middleware("db", ({ ctx, payload, next }) => { - logger.info("Hello, world from the middleware", { ctx, payload }); - return next(); -}); +// tasks.middleware("db", ({ ctx, payload, next }) => { +// logger.info("Hello, world from the middleware", { ctx, payload }); +// return next(); +// }); tasks.onCancel(async ({ ctx, payload }) => { logger.info("Hello, world from the global cancel", { ctx, payload }); diff --git a/references/hello-world/src/trigger/telemetry.ts b/references/hello-world/src/trigger/telemetry.ts new file mode 100644 index 0000000000..5dbecb8963 --- /dev/null +++ b/references/hello-world/src/trigger/telemetry.ts @@ -0,0 +1,1634 @@ +import { logger, task } from "@trigger.dev/sdk"; +import { setTimeout } from "timers/promises"; + +export const simpleSuccessTask = task({ + id: "otel/simple-success-task", + run: async (payload: any, { ctx }) => { + logger.log("Hello log 1", { ctx }); + logger.info("Hello info 1"); + logger.warn("Hello warn 1"); + logger.error("Hello error 1"); + + await setTimeout(15000); + + logger.log("Hello log 2"); + logger.info("Hello info 2"); + logger.warn("Hello warn 2"); + logger.error("Hello error 2"); + + return { message: "Hello, world!" }; + }, +}); + +export const simpleFailureTask = task({ + id: "otel/simple-failure-task", + retry: { + maxAttempts: 1, + }, + run: async (payload: any, { ctx }) => { + await setTimeout(5000); + + throw new Error("Hello error"); + }, +}); + +export const failureWithRetries = task({ + id: "otel/failure-with-retries", + retry: { + maxAttempts: 3, + }, + run: async (payload: any, { ctx }) => { + await setTimeout(15000); + + throw new Error("Hello error"); + }, +}); + +export const taskWithChildTasks = task({ + id: "otel/task-with-child-tasks", + run: async (payload: any, { ctx }) => { + await simpleSuccessTask.triggerAndWait({}); + }, +}); + +export const generateLogsParentTask = task({ + id: "otel/generate-logs-parent", + run: async (payload: any) => { + await generateLogsTask.triggerAndWait({}); + await generateLogsTask.triggerAndWait({}); + await generateLogsTask.triggerAndWait({}); + await generateLogsTask.triggerAndWait({}); + await generateLogsTask.triggerAndWait({}); + await generateLogsTask.triggerAndWait({}); + await generateLogsTask.triggerAndWait({}); + await generateLogsTask.triggerAndWait({}); + await generateLogsTask.triggerAndWait({}); + await generateLogsTask.triggerAndWait({}); + await generateLogsTask.triggerAndWait({}); + await generateLogsTask.triggerAndWait({}); + }, +}); + +export const generateLogsTask = task({ + id: "otel/generate-logs", + run: async (payload: any, { ctx }) => { + await generateLogs(101); + + await logger.trace("span 1", async () => { + await generateLogs(101); + }); + + await logger.trace("span 2", async () => { + await generateLogs(101); + + await logger.trace("span 2.1", async () => { + await generateLogs(101); + + await logger.trace("span 2.1.1", async () => { + await generateLogs(101); + + await logger.trace("span 2.1.1.1", async () => { + await generateLogs(101); + }); + }); + }); + }); + + await logger.trace("span 3", async () => { + await generateLogs(101); + + await logger.trace("span 3.1", async () => { + await generateLogs(101); + + await logger.trace("span 3.1.1", async () => { + await generateLogs(101); + + await logger.trace("span 3.1.1.1", async () => { + await generateLogs(101); + }); + }); + + await logger.trace("span 3.2.1", async () => { + await generateLogs(101); + + await logger.trace("span 3.2.1.1", async () => { + await generateLogs(101); + }); + }); + }); + }); + + await logger.trace("span 1", async () => { + await generateLogs(101); + }); + + await logger.trace("span 2", async () => { + await generateLogs(101); + + await logger.trace("span 2.1", async () => { + await generateLogs(101); + + await logger.trace("span 2.1.1", async () => { + await generateLogs(101); + + await logger.trace("span 2.1.1.1", async () => { + await generateLogs(101); + }); + }); + }); + }); + + await logger.trace("span 3", async () => { + await generateLogs(101); + + await logger.trace("span 3.1", async () => { + await generateLogs(101); + + await logger.trace("span 3.1.1", async () => { + await generateLogs(101); + + await logger.trace("span 3.1.1.1", async () => { + await generateLogs(101); + }); + }); + + await logger.trace("span 3.2.1", async () => { + await generateLogs(101); + + await logger.trace("span 3.2.1.1", async () => { + await generateLogs(101); + }); + }); + }); + }); + + await logger.trace("span 1", async () => { + await generateLogs(101); + }); + + await logger.trace("span 2", async () => { + await generateLogs(101); + + await logger.trace("span 2.1", async () => { + await generateLogs(101); + + await logger.trace("span 2.1.1", async () => { + await generateLogs(101); + + await logger.trace("span 2.1.1.1", async () => { + await generateLogs(101); + }); + }); + }); + }); + + await logger.trace("span 3", async () => { + await generateLogs(101); + + await logger.trace("span 3.1", async () => { + await generateLogs(101); + + await logger.trace("span 3.1.1", async () => { + await generateLogs(101); + + await logger.trace("span 3.1.1.1", async () => { + await generateLogs(101); + }); + }); + + await logger.trace("span 3.2.1", async () => { + await generateLogs(101); + + await logger.trace("span 3.2.1.1", async () => { + await generateLogs(101); + }); + }); + }); + }); + + await logger.trace("span 1", async () => { + await generateLogs(101); + }); + + await logger.trace("span 2", async () => { + await generateLogs(101); + + await logger.trace("span 2.1", async () => { + await generateLogs(101); + + await logger.trace("span 2.1.1", async () => { + await generateLogs(101); + + await logger.trace("span 2.1.1.1", async () => { + await generateLogs(101); + }); + }); + }); + }); + + await logger.trace("span 3", async () => { + await generateLogs(101); + + await logger.trace("span 3.1", async () => { + await generateLogs(101); + + await logger.trace("span 3.1.1", async () => { + await generateLogs(101); + + await logger.trace("span 3.1.1.1", async () => { + await generateLogs(101); + }); + }); + + await logger.trace("span 3.2.1", async () => { + await generateLogs(101); + + await logger.trace("span 3.2.1.1", async () => { + await generateLogs(101); + }); + }); + }); + }); + + await logger.trace("span 1", async () => { + await generateLogs(101); + }); + + await logger.trace("span 2", async () => { + await generateLogs(101); + + await logger.trace("span 2.1", async () => { + await generateLogs(101); + + await logger.trace("span 2.1.1", async () => { + await generateLogs(101); + + await logger.trace("span 2.1.1.1", async () => { + await generateLogs(101); + }); + }); + }); + }); + + await logger.trace("span 3", async () => { + await generateLogs(101); + + await logger.trace("span 3.1", async () => { + await generateLogs(101); + + await logger.trace("span 3.1.1", async () => { + await generateLogs(101); + + await logger.trace("span 3.1.1.1", async () => { + await generateLogs(101); + }); + }); + + await logger.trace("span 3.2.1", async () => { + await generateLogs(101); + + await logger.trace("span 3.2.1.1", async () => { + await generateLogs(101); + }); + }); + }); + }); + + await logger.trace("span 1", async () => { + await generateLogs(101); + }); + + await logger.trace("span 2", async () => { + await generateLogs(101); + + await logger.trace("span 2.1", async () => { + await generateLogs(101); + + await logger.trace("span 2.1.1", async () => { + await generateLogs(101); + + await logger.trace("span 2.1.1.1", async () => { + await generateLogs(101); + }); + }); + }); + }); + + await logger.trace("span 3", async () => { + await generateLogs(101); + + await logger.trace("span 3.1", async () => { + await generateLogs(101); + + await logger.trace("span 3.1.1", async () => { + await generateLogs(101); + + await logger.trace("span 3.1.1.1", async () => { + await generateLogs(101); + }); + }); + + await logger.trace("span 3.2.1", async () => { + await generateLogs(101); + + await logger.trace("span 3.2.1.1", async () => { + await generateLogs(101); + }); + }); + }); + }); + + await logger.trace("span 1", async () => { + await generateLogs(101); + }); + + await logger.trace("span 2", async () => { + await generateLogs(101); + + await logger.trace("span 2.1", async () => { + await generateLogs(101); + + await logger.trace("span 2.1.1", async () => { + await generateLogs(101); + + await logger.trace("span 2.1.1.1", async () => { + await generateLogs(101); + }); + }); + }); + }); + + await logger.trace("span 3", async () => { + await generateLogs(101); + + await logger.trace("span 3.1", async () => { + await generateLogs(101); + + await logger.trace("span 3.1.1", async () => { + await generateLogs(101); + + await logger.trace("span 3.1.1.1", async () => { + await generateLogs(101); + }); + }); + + await logger.trace("span 3.2.1", async () => { + await generateLogs(101); + + await logger.trace("span 3.2.1.1", async () => { + await generateLogs(101); + }); + }); + }); + }); + + await logger.trace("span 1", async () => { + await generateLogs(101); + }); + + await logger.trace("span 2", async () => { + await generateLogs(101); + + await logger.trace("span 2.1", async () => { + await generateLogs(101); + + await logger.trace("span 2.1.1", async () => { + await generateLogs(101); + + await logger.trace("span 2.1.1.1", async () => { + await generateLogs(101); + }); + }); + }); + }); + + await logger.trace("span 3", async () => { + await generateLogs(101); + + await logger.trace("span 3.1", async () => { + await generateLogs(101); + + await logger.trace("span 3.1.1", async () => { + await generateLogs(101); + + await logger.trace("span 3.1.1.1", async () => { + await generateLogs(101); + }); + }); + + await logger.trace("span 3.2.1", async () => { + await generateLogs(101); + + await logger.trace("span 3.2.1.1", async () => { + await generateLogs(101); + }); + }); + }); + }); + + await logger.trace("span 1", async () => { + await generateLogs(101); + }); + + await logger.trace("span 2", async () => { + await generateLogs(101); + + await logger.trace("span 2.1", async () => { + await generateLogs(101); + + await logger.trace("span 2.1.1", async () => { + await generateLogs(101); + + await logger.trace("span 2.1.1.1", async () => { + await generateLogs(101); + }); + }); + }); + }); + + await logger.trace("span 3", async () => { + await generateLogs(101); + + await logger.trace("span 3.1", async () => { + await generateLogs(101); + + await logger.trace("span 3.1.1", async () => { + await generateLogs(101); + + await logger.trace("span 3.1.1.1", async () => { + await generateLogs(101); + }); + }); + + await logger.trace("span 3.2.1", async () => { + await generateLogs(101); + + await logger.trace("span 3.2.1.1", async () => { + await generateLogs(101); + }); + }); + }); + }); + + await logger.trace("span 1", async () => { + await generateLogs(101); + }); + + await logger.trace("span 2", async () => { + await generateLogs(101); + + await logger.trace("span 2.1", async () => { + await generateLogs(101); + + await logger.trace("span 2.1.1", async () => { + await generateLogs(101); + + await logger.trace("span 2.1.1.1", async () => { + await generateLogs(101); + }); + }); + }); + }); + + await logger.trace("span 3", async () => { + await generateLogs(101); + + await logger.trace("span 3.1", async () => { + await generateLogs(101); + + await logger.trace("span 3.1.1", async () => { + await generateLogs(101); + + await logger.trace("span 3.1.1.1", async () => { + await generateLogs(101); + }); + }); + + await logger.trace("span 3.2.1", async () => { + await generateLogs(101); + + await logger.trace("span 3.2.1.1", async () => { + await generateLogs(101); + }); + }); + }); + }); + + await logger.trace("span 1", async () => { + await generateLogs(101); + }); + + await logger.trace("span 2", async () => { + await generateLogs(101); + + await logger.trace("span 2.1", async () => { + await generateLogs(101); + + await logger.trace("span 2.1.1", async () => { + await generateLogs(101); + + await logger.trace("span 2.1.1.1", async () => { + await generateLogs(101); + }); + }); + }); + }); + + await logger.trace("span 3", async () => { + await generateLogs(101); + + await logger.trace("span 3.1", async () => { + await generateLogs(101); + + await logger.trace("span 3.1.1", async () => { + await generateLogs(101); + + await logger.trace("span 3.1.1.1", async () => { + await generateLogs(101); + }); + }); + + await logger.trace("span 3.2.1", async () => { + await generateLogs(101); + + await logger.trace("span 3.2.1.1", async () => { + await generateLogs(101); + }); + }); + }); + }); + + await logger.trace("span 1", async () => { + await generateLogs(101); + }); + + await logger.trace("span 2", async () => { + await generateLogs(101); + + await logger.trace("span 2.1", async () => { + await generateLogs(101); + + await logger.trace("span 2.1.1", async () => { + await generateLogs(101); + + await logger.trace("span 2.1.1.1", async () => { + await generateLogs(101); + }); + }); + }); + }); + + await logger.trace("span 3", async () => { + await generateLogs(101); + + await logger.trace("span 3.1", async () => { + await generateLogs(101); + + await logger.trace("span 3.1.1", async () => { + await generateLogs(101); + + await logger.trace("span 3.1.1.1", async () => { + await generateLogs(101); + }); + }); + + await logger.trace("span 3.2.1", async () => { + await generateLogs(101); + + await logger.trace("span 3.2.1.1", async () => { + await generateLogs(101); + }); + }); + }); + }); + + await logger.trace("span 1", async () => { + await generateLogs(101); + }); + + await logger.trace("span 2", async () => { + await generateLogs(101); + + await logger.trace("span 2.1", async () => { + await generateLogs(101); + + await logger.trace("span 2.1.1", async () => { + await generateLogs(101); + + await logger.trace("span 2.1.1.1", async () => { + await generateLogs(101); + }); + }); + }); + }); + + await logger.trace("span 3", async () => { + await generateLogs(101); + + await logger.trace("span 3.1", async () => { + await generateLogs(101); + + await logger.trace("span 3.1.1", async () => { + await generateLogs(101); + + await logger.trace("span 3.1.1.1", async () => { + await generateLogs(101); + }); + }); + + await logger.trace("span 3.2.1", async () => { + await generateLogs(101); + + await logger.trace("span 3.2.1.1", async () => { + await generateLogs(101); + }); + }); + }); + }); + + await logger.trace("span 1", async () => { + await generateLogs(101); + }); + + await logger.trace("span 2", async () => { + await generateLogs(101); + + await logger.trace("span 2.1", async () => { + await generateLogs(101); + + await logger.trace("span 2.1.1", async () => { + await generateLogs(101); + + await logger.trace("span 2.1.1.1", async () => { + await generateLogs(101); + }); + }); + }); + }); + + await logger.trace("span 3", async () => { + await generateLogs(101); + + await logger.trace("span 3.1", async () => { + await generateLogs(101); + + await logger.trace("span 3.1.1", async () => { + await generateLogs(101); + + await logger.trace("span 3.1.1.1", async () => { + await generateLogs(101); + }); + }); + + await logger.trace("span 3.2.1", async () => { + await generateLogs(101); + + await logger.trace("span 3.2.1.1", async () => { + await generateLogs(101); + }); + }); + }); + }); + + await logger.trace("span 1", async () => { + await generateLogs(101); + }); + + await logger.trace("span 2", async () => { + await generateLogs(101); + + await logger.trace("span 2.1", async () => { + await generateLogs(101); + + await logger.trace("span 2.1.1", async () => { + await generateLogs(101); + + await logger.trace("span 2.1.1.1", async () => { + await generateLogs(101); + }); + }); + }); + }); + + await logger.trace("span 3", async () => { + await generateLogs(101); + + await logger.trace("span 3.1", async () => { + await generateLogs(101); + + await logger.trace("span 3.1.1", async () => { + await generateLogs(101); + + await logger.trace("span 3.1.1.1", async () => { + await generateLogs(101); + }); + }); + + await logger.trace("span 3.2.1", async () => { + await generateLogs(101); + + await logger.trace("span 3.2.1.1", async () => { + await generateLogs(101); + }); + }); + }); + }); + + await logger.trace("span 1", async () => { + await generateLogs(101); + }); + + await logger.trace("span 2", async () => { + await generateLogs(101); + + await logger.trace("span 2.1", async () => { + await generateLogs(101); + + await logger.trace("span 2.1.1", async () => { + await generateLogs(101); + + await logger.trace("span 2.1.1.1", async () => { + await generateLogs(101); + }); + }); + }); + }); + + await logger.trace("span 3", async () => { + await generateLogs(101); + + await logger.trace("span 3.1", async () => { + await generateLogs(101); + + await logger.trace("span 3.1.1", async () => { + await generateLogs(101); + + await logger.trace("span 3.1.1.1", async () => { + await generateLogs(101); + }); + }); + + await logger.trace("span 3.2.1", async () => { + await generateLogs(101); + + await logger.trace("span 3.2.1.1", async () => { + await generateLogs(101); + }); + }); + }); + }); + + await logger.trace("span 1", async () => { + await generateLogs(101); + }); + + await logger.trace("span 2", async () => { + await generateLogs(101); + + await logger.trace("span 2.1", async () => { + await generateLogs(101); + + await logger.trace("span 2.1.1", async () => { + await generateLogs(101); + + await logger.trace("span 2.1.1.1", async () => { + await generateLogs(101); + }); + }); + }); + }); + + await logger.trace("span 3", async () => { + await generateLogs(101); + + await logger.trace("span 3.1", async () => { + await generateLogs(101); + + await logger.trace("span 3.1.1", async () => { + await generateLogs(101); + + await logger.trace("span 3.1.1.1", async () => { + await generateLogs(101); + }); + }); + + await logger.trace("span 3.2.1", async () => { + await generateLogs(101); + + await logger.trace("span 3.2.1.1", async () => { + await generateLogs(101); + }); + }); + }); + }); + + await logger.trace("span 1", async () => { + await generateLogs(101); + }); + + await logger.trace("span 2", async () => { + await generateLogs(101); + + await logger.trace("span 2.1", async () => { + await generateLogs(101); + + await logger.trace("span 2.1.1", async () => { + await generateLogs(101); + + await logger.trace("span 2.1.1.1", async () => { + await generateLogs(101); + }); + }); + }); + }); + + await logger.trace("span 3", async () => { + await generateLogs(101); + + await logger.trace("span 3.1", async () => { + await generateLogs(101); + + await logger.trace("span 3.1.1", async () => { + await generateLogs(101); + + await logger.trace("span 3.1.1.1", async () => { + await generateLogs(101); + }); + }); + + await logger.trace("span 3.2.1", async () => { + await generateLogs(101); + + await logger.trace("span 3.2.1.1", async () => { + await generateLogs(101); + }); + }); + }); + }); + + await logger.trace("span 1", async () => { + await generateLogs(101); + }); + + await logger.trace("span 2", async () => { + await generateLogs(101); + + await logger.trace("span 2.1", async () => { + await generateLogs(101); + + await logger.trace("span 2.1.1", async () => { + await generateLogs(101); + + await logger.trace("span 2.1.1.1", async () => { + await generateLogs(101); + }); + }); + }); + }); + + await logger.trace("span 3", async () => { + await generateLogs(101); + + await logger.trace("span 3.1", async () => { + await generateLogs(101); + + await logger.trace("span 3.1.1", async () => { + await generateLogs(101); + + await logger.trace("span 3.1.1.1", async () => { + await generateLogs(101); + }); + }); + + await logger.trace("span 3.2.1", async () => { + await generateLogs(101); + + await logger.trace("span 3.2.1.1", async () => { + await generateLogs(101); + }); + }); + }); + }); + }, +}); + +async function generateLogs(count: number) { + await Promise.all( + Array.from({ length: count }).map(async () => { + await setTimeout(1000); + + const logMessage = generateRandomLogMessage(); + + switch (logMessage.level) { + case "DEBUG": { + logger.debug(logMessage.message, { ...logMessage.metadata, ...generateRandomObject() }); + break; + } + case "INFO": { + logger.info(logMessage.message, { ...logMessage.metadata, ...generateRandomObject() }); + break; + } + case "WARN": { + logger.warn(logMessage.message, { ...logMessage.metadata, ...generateRandomObject() }); + break; + } + case "FATAL": + case "ERROR": { + logger.error(logMessage.message, { ...logMessage.metadata, ...generateRandomObject() }); + break; + } + } + }) + ); +} + +type RandomValue = string | number | boolean | null | RandomObject | RandomValue[]; + +interface RandomObject { + [key: string]: RandomValue; +} + +function generateRandomObject(depth: number = 3, maxKeys: number = 8): RandomObject { + const obj: RandomObject = {}; + const numKeys = Math.floor(Math.random() * maxKeys) + 1; + + for (let i = 0; i < numKeys; i++) { + const key = generateRandomKey(); + obj[key] = generateRandomValue(depth); + } + + return obj; +} + +function generateRandomValue(depth: number): RandomValue { + if (depth <= 0) { + return generatePrimitiveValue(); + } + + const valueTypes = ["primitive", "object", "array"]; + const weights = depth > 1 ? [0.6, 0.2, 0.2] : [0.8, 0.1, 0.1]; + const selectedType = weightedRandomChoice(valueTypes, weights); + + switch (selectedType) { + case "primitive": + return generatePrimitiveValue(); + case "object": + return generateRandomObject(depth - 1, 5); + case "array": + return generateRandomArray(depth - 1); + default: + return generatePrimitiveValue(); + } +} + +function generatePrimitiveValue(): string | number | boolean | null { + const primitiveTypes = ["string", "number", "boolean", "null"]; + const type = primitiveTypes[Math.floor(Math.random() * primitiveTypes.length)]; + + switch (type) { + case "string": + return generateRandomString(); + case "number": + return generateRandomNumber(); + case "boolean": + return Math.random() > 0.5; + case "null": + return null; + default: + return generateRandomString(); + } +} + +function generateArrayOfPrimitiveValues(length: number): RandomValue[] { + const primitiveTypes = ["string", "number", "boolean", "null"]; + const type = primitiveTypes[Math.floor(Math.random() * primitiveTypes.length)]; + + switch (type) { + case "string": + return Array.from({ length }, () => generateRandomString()); + case "number": + return Array.from({ length }, () => generateRandomNumber()); + case "boolean": + return Array.from({ length }, () => Math.random() > 0.5); + case "null": + return Array.from({ length }, () => null); + default: + return Array.from({ length }, () => generateRandomString()); + } +} + +function generateRandomString(): string { + const stringTypes = [ + "name", + "email", + "city", + "company", + "product", + "description", + "color", + "status", + "category", + "id", + "url", + "phone", + ]; + + const type = stringTypes[Math.floor(Math.random() * stringTypes.length)]; + + const samples = { + name: ["John Smith", "Sarah Johnson", "Michael Brown", "Emma Wilson", "David Lee"], + email: ["user@example.com", "admin@company.org", "contact@business.net", "info@service.co.uk"], + city: ["London", "Manchester", "Birmingham", "Edinburgh", "Cardiff", "Belfast"], + company: [ + "TechCorp Ltd", + "Global Solutions", + "Innovation Hub", + "Digital Dynamics", + "Future Systems", + ], + product: ["Wireless Headphones", "Smart Watch", "Laptop Stand", "Coffee Maker", "Desk Lamp"], + description: [ + "High-quality product with excellent features", + "Reliable and efficient solution", + "Modern design meets functionality", + ], + color: ["red", "blue", "green", "yellow", "purple", "orange", "black", "white"], + status: ["active", "inactive", "pending", "completed", "cancelled", "processing"], + category: ["electronics", "clothing", "books", "home", "sports", "automotive"], + id: () => `${Math.random().toString(36).substr(2, 9)}`, + url: ["https://example.com", "https://api.service.com/v1", "https://docs.platform.org"], + phone: ["+44 20 7123 4567", "+44 161 234 5678", "+44 121 345 6789"], + }; + + const sampleArray = samples[type as keyof typeof samples]; + if (typeof sampleArray === "function") { + return sampleArray(); + } + return sampleArray[Math.floor(Math.random() * sampleArray.length)]; +} + +function generateRandomNumber(): number { + const numberTypes = ["integer", "decimal", "large", "small"]; + const type = numberTypes[Math.floor(Math.random() * numberTypes.length)]; + + switch (type) { + case "integer": + return Math.floor(Math.random() * 1000); + case "decimal": + return Math.round(Math.random() * 100 * 100) / 100; + case "large": + return Math.floor(Math.random() * 1000000); + case "small": + return Math.floor(Math.random() * 10); + default: + return Math.floor(Math.random() * 100); + } +} + +function generateRandomKey(): string { + const commonKeys = [ + "id", + "name", + "email", + "age", + "address", + "phone", + "company", + "title", + "description", + "price", + "quantity", + "status", + "createdAt", + "updatedAt", + "isActive", + "category", + "tags", + "metadata", + "config", + "settings", + "userId", + "productId", + "orderId", + "customerId", + "location", + "type", + "value", + "label", + "color", + "size", + "weight", + "dimensions", + "features", + ]; + + return commonKeys[Math.floor(Math.random() * commonKeys.length)]; +} + +function generateRandomArray(depth: number): RandomValue[] { + const arrayLength = Math.floor(Math.random() * 5) + 1; + + return generateArrayOfPrimitiveValues(arrayLength); +} + +function weightedRandomChoice(choices: T[], weights: number[]): T { + const totalWeight = weights.reduce((sum, weight) => sum + weight, 0); + let random = Math.random() * totalWeight; + + for (let i = 0; i < choices.length; i++) { + random -= weights[i]; + if (random <= 0) { + return choices[i]; + } + } + + return choices[choices.length - 1]; +} + +interface LogMessage { + timestamp: string; + level: "DEBUG" | "INFO" | "WARN" | "ERROR" | "FATAL"; + component: string; + message: string; + metadata?: Record; +} + +function generateRandomLogMessage(includeMetadata: boolean = true): LogMessage { + const level = generateLogLevel(); + const component = generateComponent(); + const message = generateMessage(level, component); + const timestamp = generateTimestamp(); + + const logMessage: LogMessage = { + timestamp, + level, + component, + message, + }; + + if (includeMetadata && Math.random() > 0.3) { + logMessage.metadata = generateMetadata(level); + } + + return logMessage; +} + +function generateLogLevel(): LogMessage["level"] { + const levels: LogMessage["level"][] = ["DEBUG", "INFO", "WARN", "ERROR", "FATAL"]; + const weights = [0.3, 0.4, 0.15, 0.13, 0.02]; // INFO and DEBUG most common + + return weightedRandomChoice(levels, weights); +} + +function generateComponent(): string { + const components = [ + "AuthService", + "DatabaseManager", + "UserController", + "PaymentProcessor", + "EmailService", + "CacheManager", + "FileUploader", + "APIGateway", + "SecurityManager", + "NotificationService", + "OrderService", + "ProductCatalog", + "SessionManager", + "ConfigLoader", + "MetricsCollector", + "HealthChecker", + "MessageQueue", + "SearchEngine", + "ReportGenerator", + "BackupService", + "LoadBalancer", + "RateLimiter", + "ValidationService", + "AuditLogger", + ]; + + return components[Math.floor(Math.random() * components.length)]; +} + +function generateMessage(level: LogMessage["level"], component: string): string { + const messageTemplates = { + DEBUG: [ + `Executing method ${generateMethodName()} with parameters: ${generateParameters()}`, + `Cache hit for key: ${generateCacheKey()}`, + `Processing request with ID: ${generateId()}`, + `Database query executed in ${generateDuration()}ms`, + `Validating input data for ${generateEntityName()}`, + `Loading configuration from ${generateFilePath()}`, + `Initializing connection pool with ${generateNumber(5, 20)} connections`, + `Parsing JSON payload of size ${generateFileSize()}`, + `Applying business rule: ${generateBusinessRule()}`, + ], + INFO: [ + `User ${generateUsername()} successfully logged in from ${generateIPAddress()}`, + `Order ${generateOrderId()} created successfully for customer ${generateCustomerId()}`, + `Email sent to ${generateEmail()} with subject: "${generateEmailSubject()}"`, + `File ${generateFileName()} uploaded successfully (${generateFileSize()})`, + `Payment of £${generatePrice()} processed for transaction ${generateTransactionId()}`, + `New user registered: ${generateUsername()} (${generateEmail()})`, + `Service started successfully on port ${generatePort()}`, + `Database migration ${generateMigrationName()} completed successfully`, + `Report ${generateReportName()} generated in ${generateDuration()}ms`, + `Cache cleared for namespace: ${generateNamespace()}`, + ], + WARN: [ + `High memory usage detected: ${generatePercentage()}% of available memory`, + `Slow query detected: ${generateDuration()}ms execution time`, + `Rate limit approaching for user ${generateUsername()}: ${generateNumber(80, 95)}% of limit`, + `Deprecated API endpoint accessed: ${generateAPIEndpoint()}`, + `Connection pool nearly exhausted: ${generateNumber(18, 20)}/20 connections in use`, + `Large file upload detected: ${generateFileSize()} from ${generateIPAddress()}`, + `Failed login attempt for user ${generateUsername()} from ${generateIPAddress()}`, + `Queue size growing: ${generateNumber(500, 1000)} pending messages`, + `SSL certificate expires in ${generateNumber(1, 30)} days`, + `Disk space low: ${generatePercentage()}% full on ${generateDiskPath()}`, + ], + ERROR: [ + `Failed to connect to database: ${generateErrorMessage()}`, + `Payment processing failed for transaction ${generateTransactionId()}: ${generatePaymentError()}`, + `Email delivery failed to ${generateEmail()}: ${generateEmailError()}`, + `File upload failed: ${generateFileError()}`, + `Authentication failed for user ${generateUsername()}: ${generateAuthError()}`, + `API request to ${generateAPIEndpoint()} failed: ${generateHTTPError()}`, + `Database query failed: ${generateSQLError()}`, + `Service unavailable: ${generateServiceError()}`, + `Configuration error: ${generateConfigError()}`, + `Validation failed for ${generateEntityName()}: ${generateValidationError()}`, + ], + FATAL: [ + `Database connection pool exhausted, shutting down service`, + `Out of memory error: Unable to allocate ${generateFileSize()}`, + `Critical security breach detected from ${generateIPAddress()}`, + `System disk full: Unable to write logs or process requests`, + `Service dependency ${generateServiceName()} is completely unavailable`, + `Unhandled exception caused service crash: ${generateCriticalError()}`, + `Configuration file corrupted: Unable to start service`, + `License validation failed: Service cannot continue`, + `Critical data corruption detected in ${generateTableName()}`, + `Network interface failure: All connections lost`, + ], + }; + + const templates = messageTemplates[level]; + return templates[Math.floor(Math.random() * templates.length)]; +} + +function generateTimestamp(): string { + const now = new Date(); + const randomOffset = Math.floor(Math.random() * 86400000); // Random time within last 24 hours + const timestamp = new Date(now.getTime() - randomOffset); + return timestamp.toISOString(); +} + +function generateMetadata(level: LogMessage["level"]): Record { + const baseMetadata: Record = { + requestId: generateId(), + userId: Math.random() > 0.3 ? generateUserId() : null, + sessionId: generateSessionId(), + userAgent: generateUserAgent(), + ipAddress: generateIPAddress(), + }; + + // Add level-specific metadata + switch (level) { + case "ERROR": + case "FATAL": + baseMetadata.stackTrace = generateStackTrace(); + baseMetadata.errorCode = generateErrorCode(); + break; + case "WARN": + baseMetadata.threshold = generateNumber(70, 95); + baseMetadata.currentValue = generateNumber(75, 100); + break; + case "INFO": + baseMetadata.duration = generateDuration(); + baseMetadata.statusCode = 200; + break; + } + + return baseMetadata; +} + +// Helper functions for generating realistic data +function generateMethodName(): string { + const prefixes = ["get", "set", "create", "update", "delete", "process", "validate", "calculate"]; + const suffixes = ["User", "Order", "Payment", "Data", "Config", "Report", "Session", "Token"]; + return `${prefixes[Math.floor(Math.random() * prefixes.length)]}${ + suffixes[Math.floor(Math.random() * suffixes.length)] + }`; +} + +function generateParameters(): string { + const params = []; + const numParams = Math.floor(Math.random() * 3) + 1; + for (let i = 0; i < numParams; i++) { + params.push( + `param${i + 1}=${Math.random() > 0.5 ? `"${generateId()}"` : generateNumber(1, 1000)}` + ); + } + return `{${params.join(", ")}}`; +} + +function generateId(): string { + return Math.random().toString(36).substr(2, 12); +} + +function generateUserId(): string { + return `user_${Math.random().toString(36).substr(2, 8)}`; +} + +function generateSessionId(): string { + return `sess_${Math.random().toString(36).substr(2, 16)}`; +} + +function generateOrderId(): string { + return `ORD-${Date.now()}-${Math.floor(Math.random() * 1000) + .toString() + .padStart(3, "0")}`; +} + +function generateTransactionId(): string { + return `TXN${Math.random().toString(36).substr(2, 10).toUpperCase()}`; +} + +function generateCustomerId(): string { + return `CUST${Math.floor(Math.random() * 100000) + .toString() + .padStart(5, "0")}`; +} + +function generateUsername(): string { + const usernames = [ + "john.smith", + "sarah.jones", + "mike.brown", + "emma.wilson", + "david.lee", + "admin", + "guest", + "testuser", + ]; + return usernames[Math.floor(Math.random() * usernames.length)]; +} + +function generateEmail(): string { + const domains = ["example.com", "company.org", "business.co.uk", "service.net"]; + const username = generateUsername().replace(".", ""); + return `${username}@${domains[Math.floor(Math.random() * domains.length)]}`; +} + +function generateIPAddress(): string { + return `${Math.floor(Math.random() * 256)}.${Math.floor(Math.random() * 256)}.${Math.floor( + Math.random() * 256 + )}.${Math.floor(Math.random() * 256)}`; +} + +function generateDuration(): number { + return Math.floor(Math.random() * 5000) + 10; +} + +function generateNumber(min: number, max: number): number { + return Math.floor(Math.random() * (max - min + 1)) + min; +} + +function generatePercentage(): number { + return Math.floor(Math.random() * 100) + 1; +} + +function generateFileSize(): string { + const size = Math.floor(Math.random() * 1000) + 1; + const units = ["KB", "MB", "GB"]; + const unit = units[Math.floor(Math.random() * units.length)]; + return `${size}${unit}`; +} + +function generateFileName(): string { + const names = ["document", "report", "image", "data", "config", "backup"]; + const extensions = [".pdf", ".xlsx", ".jpg", ".json", ".xml", ".zip"]; + return `${names[Math.floor(Math.random() * names.length)]}${Math.floor(Math.random() * 1000)}${ + extensions[Math.floor(Math.random() * extensions.length)] + }`; +} + +function generatePrice(): string { + return (Math.random() * 1000 + 10).toFixed(2); +} + +function generatePort(): number { + return Math.floor(Math.random() * 9000) + 1000; +} + +function generateCacheKey(): string { + return `cache:${generateEntityName()}:${generateId()}`; +} + +function generateEntityName(): string { + const entities = ["user", "order", "product", "payment", "session", "config", "report"]; + return entities[Math.floor(Math.random() * entities.length)]; +} + +function generateAPIEndpoint(): string { + const versions = ["v1", "v2", "v3"]; + const resources = ["users", "orders", "products", "payments", "reports"]; + return `/api/${versions[Math.floor(Math.random() * versions.length)]}/${ + resources[Math.floor(Math.random() * resources.length)] + }`; +} + +function generateErrorMessage(): string { + const errors = [ + "Connection timeout after 30 seconds", + "Invalid credentials provided", + "Resource not found", + "Permission denied", + "Service temporarily unavailable", + "Invalid request format", + "Rate limit exceeded", + ]; + return errors[Math.floor(Math.random() * errors.length)]; +} + +function generateUserAgent(): string { + const agents = [ + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36", + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36", + "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36", + "PostmanRuntime/7.29.2", + "curl/7.68.0", + ]; + return agents[Math.floor(Math.random() * agents.length)]; +} + +function generateStackTrace(): string { + return `at ${generateMethodName()}(${generateFileName()}:${generateNumber(10, 500)})`; +} + +function generateErrorCode(): string { + const codes = ["E001", "E404", "E500", "E403", "E401", "E503", "E400"]; + return codes[Math.floor(Math.random() * codes.length)]; +} + +// Additional helper functions for specific error types +function generatePaymentError(): string { + const errors = [ + "Insufficient funds", + "Card expired", + "Invalid card number", + "Payment gateway timeout", + ]; + return errors[Math.floor(Math.random() * errors.length)]; +} + +function generateEmailError(): string { + const errors = [ + "SMTP server unavailable", + "Invalid email address", + "Message too large", + "Recipient blocked", + ]; + return errors[Math.floor(Math.random() * errors.length)]; +} + +function generateAuthError(): string { + const errors = [ + "Invalid password", + "Account locked", + "Token expired", + "Two-factor authentication required", + ]; + return errors[Math.floor(Math.random() * errors.length)]; +} + +function generateHTTPError(): string { + const codes = [400, 401, 403, 404, 500, 502, 503, 504]; + const code = codes[Math.floor(Math.random() * codes.length)]; + return `HTTP ${code}`; +} + +function generateSQLError(): string { + const errors = [ + "Syntax error", + "Table does not exist", + "Duplicate key violation", + "Foreign key constraint", + ]; + return errors[Math.floor(Math.random() * errors.length)]; +} + +function generateServiceError(): string { + const services = ["Redis", "Elasticsearch", "RabbitMQ", "MongoDB"]; + return `${services[Math.floor(Math.random() * services.length)]} connection refused`; +} + +function generateConfigError(): string { + const errors = [ + "Missing required property", + "Invalid configuration format", + "Environment variable not set", + ]; + return errors[Math.floor(Math.random() * errors.length)]; +} + +function generateValidationError(): string { + const errors = [ + "Required field missing", + "Invalid email format", + "Value out of range", + "Invalid date format", + ]; + return errors[Math.floor(Math.random() * errors.length)]; +} + +function generateCriticalError(): string { + const errors = [ + "NullPointerException", + "OutOfMemoryError", + "StackOverflowError", + "SecurityException", + ]; + return errors[Math.floor(Math.random() * errors.length)]; +} + +function generateServiceName(): string { + const services = ["UserService", "PaymentGateway", "NotificationHub", "DatabaseCluster"]; + return services[Math.floor(Math.random() * services.length)]; +} + +function generateTableName(): string { + const tables = ["users", "orders", "payments", "products", "sessions"]; + return tables[Math.floor(Math.random() * tables.length)]; +} + +function generateFilePath(): string { + const paths = [ + "/etc/app/config.yml", + "/var/log/app.log", + "/opt/app/data.json", + "/home/user/.env", + ]; + return paths[Math.floor(Math.random() * paths.length)]; +} + +function generateDiskPath(): string { + const paths = ["/var/log", "/tmp", "/home", "/opt"]; + return paths[Math.floor(Math.random() * paths.length)]; +} + +function generateMigrationName(): string { + return `migration_${Date.now()}_${generateEntityName()}_table`; +} + +function generateReportName(): string { + const types = ["daily", "weekly", "monthly", "quarterly"]; + const subjects = ["sales", "users", "performance", "security"]; + return `${types[Math.floor(Math.random() * types.length)]}_${ + subjects[Math.floor(Math.random() * subjects.length)] + }_report`; +} + +function generateNamespace(): string { + const namespaces = ["user:sessions", "product:catalog", "order:cache", "auth:tokens"]; + return namespaces[Math.floor(Math.random() * namespaces.length)]; +} + +function generateBusinessRule(): string { + const rules = [ + "validate_payment_amount", + "check_inventory_levels", + "apply_discount_rules", + "verify_user_permissions", + ]; + return rules[Math.floor(Math.random() * rules.length)]; +} + +function generateEmailSubject(): string { + const subjects = [ + "Order Confirmation", + "Password Reset", + "Welcome to Our Service", + "Monthly Newsletter", + ]; + return subjects[Math.floor(Math.random() * subjects.length)]; +} + +function generateFileError(): string { + const errors = [ + "File too large", + "Invalid file type", + "Disk space insufficient", + "Permission denied", + ]; + return errors[Math.floor(Math.random() * errors.length)]; +} + +// Utility function to format log message as string +function formatLogMessage(logMessage: LogMessage): string { + const metadataStr = logMessage.metadata ? ` | ${JSON.stringify(logMessage.metadata)}` : ""; + + return `${logMessage.timestamp} [${logMessage.level}] ${logMessage.component}: ${logMessage.message}${metadataStr}`; +} diff --git a/references/hello-world/trigger.config.ts b/references/hello-world/trigger.config.ts index 2b4a68912f..7e0b3a3c9d 100644 --- a/references/hello-world/trigger.config.ts +++ b/references/hello-world/trigger.config.ts @@ -9,7 +9,7 @@ export default defineConfig({ enabled: true, maxExecutionsPerProcess: 20, }, - logLevel: "log", + logLevel: "debug", maxDuration: 3600, retries: { enabledInDev: true,