Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions apps/mesh/src/ai-providers/adapters/openrouter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,9 @@ export const openrouterAdapter: ProviderAdapter = {
mod === "image" ? "vision" : mod,
),
...m.architecture.output_modalities,
...(m.architecture.output_modalities?.includes("image")
? (["image-generation"] as const)
: []),
...(m.supported_parameters?.includes("tools")
? (["tools"] as const)
: []),
Expand Down
3 changes: 3 additions & 0 deletions apps/mesh/src/ai-providers/factory.ts
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,9 @@ function mapOpenRouterModel(m: OpenRouterAPIModel): ModelInfo {
...new Set([
...m.architecture.input_modalities,
...m.architecture.output_modalities,
...(m.architecture.output_modalities?.includes("image")
? (["image-generation"] as const)
: []),
...(canTools ? (["tools"] as const) : []),
...(canReasoning ? (["reasoning"] as const) : []),
]),
Expand Down
182 changes: 182 additions & 0 deletions apps/mesh/src/api/routes/decopilot/built-in-tools/generate-image.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,182 @@
/**
* generate_image Built-in Tool
*
* Server-side tool that generates images using the AI SDK's generateImage()
* function. The image is written as a file part to the stream, and a short
* text result is returned to the model.
*/

import type { MeshContext } from "@/core/mesh-context";
import type { MeshProvider } from "@/ai-providers/types";
import { monitorLlmCall } from "@/monitoring/emit-llm-call";
import { recordLlmCallMetrics } from "@/monitoring/record-llm-call-metrics";
import type { UIMessageStreamWriter } from "ai";
import { generateImage, tool, zodSchema } from "ai";
import { z } from "zod";
import type { ModelsConfig } from "../types";

const ALLOWED_IMAGE_TYPES = new Set([
"image/png",
"image/jpeg",
"image/webp",
"image/gif",
]);

const GenerateImageInputSchema = z.object({
prompt: z
.string()
.min(1)
.max(10_000)
.describe(
"Detailed description of the image to generate. Be specific about style, composition, colors, and subject.",
),
aspect_ratio: z
.enum(["1:1", "16:9", "9:16", "4:3", "3:4"])
.optional()
.describe("Aspect ratio for the generated image. Defaults to 1:1."),
});

const GENERATE_IMAGE_DESCRIPTION =
"Generate an image from a text description. The generated image is displayed " +
"inline to the user. Use this when the user asks you to create, draw, or " +
"generate an image or picture.";

const GENERATE_IMAGE_ANNOTATIONS = {
readOnlyHint: true,
destructiveHint: false,
idempotentHint: false,
openWorldHint: true,
} as const;

export interface GenerateImageParams {
provider: MeshProvider;
imageModelId: string;
defaultAspectRatio?: string;
models: ModelsConfig;
organizationId: string;
agentId: string;
userId: string;
threadId: string;
}

export function createGenerateImageTool(
writer: UIMessageStreamWriter,
params: GenerateImageParams,
ctx: MeshContext,
) {
const {
provider,
imageModelId,
defaultAspectRatio,
models,
organizationId,
agentId,
userId,
threadId,
} = params;

return tool({
description: GENERATE_IMAGE_DESCRIPTION,
inputSchema: zodSchema(GenerateImageInputSchema),
execute: async ({ prompt, aspect_ratio }, { abortSignal, toolCallId }) => {
const aspectRatio = (aspect_ratio ?? defaultAspectRatio ?? "1:1") as
| `${number}:${number}`
| undefined;

const startTime = Date.now();

try {
const result = await generateImage({
model: provider.aiSdk.imageModel(imageModelId),
prompt,
aspectRatio,
abortSignal,
});

const durationMs = Date.now() - startTime;
recordLlmCallMetrics({
ctx,
organizationId,
modelId: imageModelId,
durationMs,
isError: false,
});
monitorLlmCall({
ctx,
organizationId,
agentId,
modelId: imageModelId,
modelTitle: imageModelId,
credentialId: models.credentialId,
threadId,
durationMs,
isError: false,
finishReason: "stop",
userId,
requestId: ctx.metadata.requestId,
userAgent: ctx.metadata.userAgent ?? null,
});

const base64 = result.image.base64;
const rawMediaType = result.image.mediaType ?? "image/png";
if (!ALLOWED_IMAGE_TYPES.has(rawMediaType)) {
throw new Error(`Unsupported generated image type: ${rawMediaType}`);
}

// Write the image as a file part directly to the stream
writer.write({
type: "file",
url: `data:${rawMediaType};base64,${base64}`,
mediaType: rawMediaType,
});

// Write tool metadata
writer.write({
type: "data-tool-metadata",
id: toolCallId,
data: {
annotations: GENERATE_IMAGE_ANNOTATIONS,
latencyMs: durationMs,
},
});

return `Image generated successfully (${aspectRatio ?? "1:1"}).`;
} catch (error) {
// Don't record abort as an error
if (abortSignal?.aborted) {
throw error;
}

const durationMs = Date.now() - startTime;
recordLlmCallMetrics({
ctx,
organizationId,
modelId: imageModelId,
durationMs,
isError: true,
errorType: error instanceof Error ? error.name : "Error",
});
monitorLlmCall({
ctx,
organizationId,
agentId,
modelId: imageModelId,
modelTitle: imageModelId,
credentialId: models.credentialId,
threadId,
durationMs,
isError: true,
errorMessage: error instanceof Error ? error.message : String(error),
userId,
requestId: ctx.metadata.requestId,
userAgent: ctx.metadata.userAgent ?? null,
});

const errorMsg = error instanceof Error ? error.message : String(error);
throw new Error(
`Image generation failed: ${errorMsg}. Try describing what you'd like to see as an image.`,
);
}
},
});
}
36 changes: 35 additions & 1 deletion apps/mesh/src/api/routes/decopilot/built-in-tools/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,16 +16,27 @@ import { createSandboxTool, type VirtualClient } from "./sandbox";
import { createSubtaskTool } from "./subtask";
import { userAskTool } from "./user-ask";
import { proposePlanTool } from "./propose-plan";
import { createGenerateImageTool } from "./generate-image";
import type { ModelsConfig } from "../types";
import { MeshProvider } from "@/ai-providers/types";

export interface ImageConfig {
imageModelId: string;
defaultAspectRatio?: string;
organizationId: string;
agentId: string;
userId: string;
threadId: string;
}

export interface BuiltinToolParams {
provider: MeshProvider;
organization: OrganizationScope;
models: ModelsConfig;
toolApprovalLevel?: ToolApprovalLevel;
toolOutputMap: Map<string, string>;
passthroughClient: VirtualClient;
imageConfig?: ImageConfig;
}

/**
Expand All @@ -45,8 +56,9 @@ export function getBuiltInTools(
toolApprovalLevel = "readonly",
toolOutputMap,
passthroughClient,
imageConfig,
} = params;
return {
const tools = {
user_ask: userAskTool,
propose_plan: proposePlanTool,
subtask: createSubtaskTool(
Expand Down Expand Up @@ -84,4 +96,26 @@ export function getBuiltInTools(
toolOutputMap,
}),
} as const;

if (imageConfig && typeof provider.aiSdk.imageModel === "function") {
return {
...tools,
generate_image: createGenerateImageTool(
writer,
{
provider,
imageModelId: imageConfig.imageModelId,
defaultAspectRatio: imageConfig.defaultAspectRatio,
models,
organizationId: imageConfig.organizationId,
agentId: imageConfig.agentId,
userId: imageConfig.userId,
threadId: imageConfig.threadId,
},
ctx,
),
} as const;
}

return tools;
}
2 changes: 2 additions & 0 deletions apps/mesh/src/api/routes/decopilot/routes.ts
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,7 @@ export function createDecopilotRoutes(deps: DecopilotDeps) {
memory: memoryConfig,
thread_id,
toolApprovalLevel,
imageModel,
} = await validateRequest(c);

const userId = ctx.auth?.user?.id;
Expand Down Expand Up @@ -160,6 +161,7 @@ export function createDecopilotRoutes(deps: DecopilotDeps) {
userId,
threadId: resolvedThreadId,
windowSize,
imageModel,
},
ctx,
{ runRegistry, streamBuffer, cancelBroadcast },
Expand Down
6 changes: 6 additions & 0 deletions apps/mesh/src/api/routes/decopilot/schemas.ts
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,12 @@ export const StreamRequestSchema = z.object({
temperature: z.number().default(0.5),
thread_id: z.string().optional(),
toolApprovalLevel: z.enum(["auto", "readonly", "plan"]).default("readonly"),
imageModel: z
.object({
id: z.string(),
aspectRatio: z.enum(["1:1", "16:9", "9:16", "4:3", "3:4"]).optional(),
})
.optional(),
});

export type StreamRequest = z.infer<typeof StreamRequestSchema>;
17 changes: 17 additions & 0 deletions apps/mesh/src/api/routes/decopilot/stream-core.ts
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ export interface StreamCoreInput {
triggerId?: string;
windowSize?: number;
abortSignal?: AbortSignal;
imageModel?: { id: string; aspectRatio?: string };
}

export interface StreamCoreDeps {
Expand Down Expand Up @@ -280,6 +281,16 @@ export async function streamCore(
toolApprovalLevel: input.toolApprovalLevel,
toolOutputMap,
passthroughClient,
...(input.imageModel && {
imageConfig: {
imageModelId: input.imageModel.id,
defaultAspectRatio: input.imageModel.aspectRatio,
organizationId: input.organizationId,
agentId: input.agent.id,
userId: input.userId,
threadId: mem.thread.id,
},
}),
},
ctx,
);
Expand Down Expand Up @@ -340,12 +351,18 @@ export async function streamCore(
"Only read-only tools can be enabled via enable_tools."
: null;

// Image generation hint when an image model is selected
const imagePrompt = input.imageModel
? `<image-generation>\nThe user has selected an image generation model. When they describe something they want as an image, use the generate_image tool immediately without asking for confirmation.\n</image-generation>`
: null;

const systemPrompts = [
basePrompt,
planModePrompt,
toolCatalog,
promptCatalog,
agentPrompt,
imagePrompt,
].filter((s): s is string => Boolean(s?.trim()));

const {
Expand Down
30 changes: 30 additions & 0 deletions apps/mesh/src/web/components/chat/IMAGE-GEN-FOLLOWUPS.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# Image Generation — Follow-up Items

Tracked items deferred from the initial implementation PR.

## 1. Base64 → Object Storage Migration

**Priority:** High
**Impact:** Database bloat, slow thread loading, large SSE payloads

Currently, generated images are stored as base64 data URLs directly in thread message `parts` JSON. A 1024x1024 PNG = 1-5MB per image in the database row.

**Fix:** Upload generated images to object storage (S3/R2) on the server, store only the HTTPS URL in the message parts. Add a size guard (reject images > 5MB decoded) as a stopgap until migration is complete.

## 2. Conversation History Not Sent to Image Model

**Priority:** Medium
**Impact:** Multi-turn image refinement doesn't work

`generateImage()` is stateless — only the current message prompt is sent. Follow-up refinements like "make it darker" or "add a cat" won't have context from prior messages. Each generation is independent.

**Fix:** If multi-turn image generation is desired, switch to `streamText` with output modalities for models that support it (Gemini), or prepend conversation summary to the prompt.

## 3. `toMetadataModelInfo` Doesn't Serialize `image-generation` Capability

**Priority:** Low
**Impact:** Server can't infer from metadata that a conversation used image generation

The `toMetadataModelInfo` helper in `chat-store.ts` maps capabilities to a boolean object but only includes `vision`, `text`, and `reasoning`. The `image-generation` capability is silently dropped.

**Fix:** Add `imageGeneration: caps.includes("image-generation") || undefined` to the capabilities mapping.
Loading
Loading