decocms · vibegui · Mar 15, 2026 · Mar 15, 2026 · Mar 15, 2026 · Mar 15, 2026
diff --git a/apps/mesh/src/ai-providers/adapters/openrouter.ts b/apps/mesh/src/ai-providers/adapters/openrouter.ts
@@ -78,6 +78,9 @@ export const openrouterAdapter: ProviderAdapter = {
                   mod === "image" ? "vision" : mod,
                 ),
                 ...m.architecture.output_modalities,
+                ...(m.architecture.output_modalities?.includes("image")
+                  ? (["image-generation"] as const)
+                  : []),
                 ...(m.supported_parameters?.includes("tools")
                   ? (["tools"] as const)
                   : []),

diff --git a/apps/mesh/src/ai-providers/factory.ts b/apps/mesh/src/ai-providers/factory.ts
@@ -24,6 +24,9 @@ function mapOpenRouterModel(m: OpenRouterAPIModel): ModelInfo {
       ...new Set([
         ...m.architecture.input_modalities,
         ...m.architecture.output_modalities,
+        ...(m.architecture.output_modalities?.includes("image")
+          ? (["image-generation"] as const)
+          : []),
         ...(canTools ? (["tools"] as const) : []),
         ...(canReasoning ? (["reasoning"] as const) : []),
       ]),

diff --git a/apps/mesh/src/api/routes/decopilot/built-in-tools/generate-image.ts b/apps/mesh/src/api/routes/decopilot/built-in-tools/generate-image.ts
@@ -0,0 +1,182 @@
+/**
+ * generate_image Built-in Tool
+ *
+ * Server-side tool that generates images using the AI SDK's generateImage()
+ * function. The image is written as a file part to the stream, and a short
+ * text result is returned to the model.
+ */
+
+import type { MeshContext } from "@/core/mesh-context";
+import type { MeshProvider } from "@/ai-providers/types";
+import { monitorLlmCall } from "@/monitoring/emit-llm-call";
+import { recordLlmCallMetrics } from "@/monitoring/record-llm-call-metrics";
+import type { UIMessageStreamWriter } from "ai";
+import { generateImage, tool, zodSchema } from "ai";
+import { z } from "zod";
+import type { ModelsConfig } from "../types";
+
+const ALLOWED_IMAGE_TYPES = new Set([
+  "image/png",
+  "image/jpeg",
+  "image/webp",
+  "image/gif",
+]);
+
+const GenerateImageInputSchema = z.object({
+  prompt: z
+    .string()
+    .min(1)
+    .max(10_000)
+    .describe(
+      "Detailed description of the image to generate. Be specific about style, composition, colors, and subject.",
+    ),
+  aspect_ratio: z
+    .enum(["1:1", "16:9", "9:16", "4:3", "3:4"])
+    .optional()
+    .describe("Aspect ratio for the generated image. Defaults to 1:1."),
+});
+
+const GENERATE_IMAGE_DESCRIPTION =
+  "Generate an image from a text description. The generated image is displayed " +
+  "inline to the user. Use this when the user asks you to create, draw, or " +
+  "generate an image or picture.";
+
+const GENERATE_IMAGE_ANNOTATIONS = {
+  readOnlyHint: true,
+  destructiveHint: false,
+  idempotentHint: false,
+  openWorldHint: true,
+} as const;
+
+export interface GenerateImageParams {
+  provider: MeshProvider;
+  imageModelId: string;
+  defaultAspectRatio?: string;
+  models: ModelsConfig;
+  organizationId: string;
+  agentId: string;
+  userId: string;
+  threadId: string;
+}
+
+export function createGenerateImageTool(
+  writer: UIMessageStreamWriter,
+  params: GenerateImageParams,
+  ctx: MeshContext,
+) {
+  const {
+    provider,
+    imageModelId,
+    defaultAspectRatio,
+    models,
+    organizationId,
+    agentId,
+    userId,
+    threadId,
+  } = params;
+
+  return tool({
+    description: GENERATE_IMAGE_DESCRIPTION,
+    inputSchema: zodSchema(GenerateImageInputSchema),
+    execute: async ({ prompt, aspect_ratio }, { abortSignal, toolCallId }) => {
+      const aspectRatio = (aspect_ratio ?? defaultAspectRatio ?? "1:1") as
+        | `${number}:${number}`
+        | undefined;
+
+      const startTime = Date.now();
+
+      try {
+        const result = await generateImage({
+          model: provider.aiSdk.imageModel(imageModelId),
+          prompt,
+          aspectRatio,
+          abortSignal,
+        });
+
+        const durationMs = Date.now() - startTime;
+        recordLlmCallMetrics({
+          ctx,
+          organizationId,
+          modelId: imageModelId,
+          durationMs,
+          isError: false,
+        });
+        monitorLlmCall({
+          ctx,
+          organizationId,
+          agentId,
+          modelId: imageModelId,
+          modelTitle: imageModelId,
+          credentialId: models.credentialId,
+          threadId,
+          durationMs,
+          isError: false,
+          finishReason: "stop",
+          userId,
+          requestId: ctx.metadata.requestId,
+          userAgent: ctx.metadata.userAgent ?? null,
+        });
+
+        const base64 = result.image.base64;
+        const rawMediaType = result.image.mediaType ?? "image/png";
+        if (!ALLOWED_IMAGE_TYPES.has(rawMediaType)) {
+          throw new Error(`Unsupported generated image type: ${rawMediaType}`);
+        }
+
+        // Write the image as a file part directly to the stream
+        writer.write({
+          type: "file",
+          url: `data:${rawMediaType};base64,${base64}`,
+          mediaType: rawMediaType,
+        });
+
+        // Write tool metadata
+        writer.write({
+          type: "data-tool-metadata",
+          id: toolCallId,
+          data: {
+            annotations: GENERATE_IMAGE_ANNOTATIONS,
+            latencyMs: durationMs,
+          },
+        });
+
+        return `Image generated successfully (${aspectRatio ?? "1:1"}).`;
+      } catch (error) {
+        // Don't record abort as an error
+        if (abortSignal?.aborted) {
+          throw error;
+        }
+
+        const durationMs = Date.now() - startTime;
+        recordLlmCallMetrics({
+          ctx,
+          organizationId,
+          modelId: imageModelId,
+          durationMs,
+          isError: true,
+          errorType: error instanceof Error ? error.name : "Error",
+        });
+        monitorLlmCall({
+          ctx,
+          organizationId,
+          agentId,
+          modelId: imageModelId,
+          modelTitle: imageModelId,
+          credentialId: models.credentialId,
+          threadId,
+          durationMs,
+          isError: true,
+          errorMessage: error instanceof Error ? error.message : String(error),
+          userId,
+          requestId: ctx.metadata.requestId,
+          userAgent: ctx.metadata.userAgent ?? null,
+        });
+
+        const errorMsg = error instanceof Error ? error.message : String(error);
+        throw new Error(
+          `Image generation failed: ${errorMsg}. Try describing what you'd like to see as an image.`,
+        );
+      }
+    },
+  });
+}
diff --git a/apps/mesh/src/api/routes/decopilot/built-in-tools/index.ts b/apps/mesh/src/api/routes/decopilot/built-in-tools/index.ts
@@ -16,16 +16,27 @@ import { createSandboxTool, type VirtualClient } from "./sandbox";
 import { createSubtaskTool } from "./subtask";
 import { userAskTool } from "./user-ask";
 import { proposePlanTool } from "./propose-plan";
+import { createGenerateImageTool } from "./generate-image";
 import type { ModelsConfig } from "../types";
 import { MeshProvider } from "@/ai-providers/types";
 
+export interface ImageConfig {
+  imageModelId: string;
+  defaultAspectRatio?: string;
+  organizationId: string;
+  agentId: string;
+  userId: string;
+  threadId: string;
+}
+
 export interface BuiltinToolParams {
   provider: MeshProvider;
   organization: OrganizationScope;
   models: ModelsConfig;
   toolApprovalLevel?: ToolApprovalLevel;
   toolOutputMap: Map<string, string>;
   passthroughClient: VirtualClient;
+  imageConfig?: ImageConfig;
 }
 
 /**
@@ -45,8 +56,9 @@ export function getBuiltInTools(
     toolApprovalLevel = "readonly",
     toolOutputMap,
     passthroughClient,
+    imageConfig,
   } = params;
-  return {
+  const tools = {
     user_ask: userAskTool,
     propose_plan: proposePlanTool,
     subtask: createSubtaskTool(
@@ -84,4 +96,26 @@ export function getBuiltInTools(
       toolOutputMap,
     }),
   } as const;
+
+  if (imageConfig && typeof provider.aiSdk.imageModel === "function") {
+    return {
+      ...tools,
+      generate_image: createGenerateImageTool(
+        writer,
+        {
+          provider,
+          imageModelId: imageConfig.imageModelId,
+          defaultAspectRatio: imageConfig.defaultAspectRatio,
+          models,
+          organizationId: imageConfig.organizationId,
+          agentId: imageConfig.agentId,
+          userId: imageConfig.userId,
+          threadId: imageConfig.threadId,
+        },
+        ctx,
+      ),
+    } as const;
+  }
+
+  return tools;
 }
diff --git a/apps/mesh/src/api/routes/decopilot/routes.ts b/apps/mesh/src/api/routes/decopilot/routes.ts
@@ -118,6 +118,7 @@ export function createDecopilotRoutes(deps: DecopilotDeps) {
         memory: memoryConfig,
         thread_id,
         toolApprovalLevel,
+        imageModel,
       } = await validateRequest(c);
 
       const userId = ctx.auth?.user?.id;
@@ -160,6 +161,7 @@ export function createDecopilotRoutes(deps: DecopilotDeps) {
           userId,
           threadId: resolvedThreadId,
           windowSize,
+          imageModel,
         },
         ctx,
         { runRegistry, streamBuffer, cancelBroadcast },

diff --git a/apps/mesh/src/api/routes/decopilot/schemas.ts b/apps/mesh/src/api/routes/decopilot/schemas.ts
@@ -85,6 +85,12 @@ export const StreamRequestSchema = z.object({
   temperature: z.number().default(0.5),
   thread_id: z.string().optional(),
   toolApprovalLevel: z.enum(["auto", "readonly", "plan"]).default("readonly"),
+  imageModel: z
+    .object({
+      id: z.string(),
+      aspectRatio: z.enum(["1:1", "16:9", "9:16", "4:3", "3:4"]).optional(),
+    })
+    .optional(),
 });
 
 export type StreamRequest = z.infer<typeof StreamRequestSchema>;
diff --git a/apps/mesh/src/api/routes/decopilot/stream-core.ts b/apps/mesh/src/api/routes/decopilot/stream-core.ts
@@ -77,6 +77,7 @@ export interface StreamCoreInput {
   triggerId?: string;
   windowSize?: number;
   abortSignal?: AbortSignal;
+  imageModel?: { id: string; aspectRatio?: string };
 }
 
 export interface StreamCoreDeps {
@@ -280,6 +281,16 @@ export async function streamCore(
             toolApprovalLevel: input.toolApprovalLevel,
             toolOutputMap,
             passthroughClient,
+            ...(input.imageModel && {
+              imageConfig: {
+                imageModelId: input.imageModel.id,
+                defaultAspectRatio: input.imageModel.aspectRatio,
+                organizationId: input.organizationId,
+                agentId: input.agent.id,
+                userId: input.userId,
+                threadId: mem.thread.id,
+              },
+            }),
           },
           ctx,
         );
@@ -340,12 +351,18 @@ export async function streamCore(
               "Only read-only tools can be enabled via enable_tools."
             : null;
 
+        // Image generation hint when an image model is selected
+        const imagePrompt = input.imageModel
+          ? `<image-generation>\nThe user has selected an image generation model. When they describe something they want as an image, use the generate_image tool immediately without asking for confirmation.\n</image-generation>`
+          : null;
+
         const systemPrompts = [
           basePrompt,
           planModePrompt,
           toolCatalog,
           promptCatalog,
           agentPrompt,
+          imagePrompt,
         ].filter((s): s is string => Boolean(s?.trim()));
 
         const {

diff --git a/apps/mesh/src/web/components/chat/IMAGE-GEN-FOLLOWUPS.md b/apps/mesh/src/web/components/chat/IMAGE-GEN-FOLLOWUPS.md
@@ -0,0 +1,30 @@
+# Image Generation — Follow-up Items
+
+Tracked items deferred from the initial implementation PR.
+
+## 1. Base64 → Object Storage Migration
+
+**Priority:** High
+**Impact:** Database bloat, slow thread loading, large SSE payloads
+
+Currently, generated images are stored as base64 data URLs directly in thread message `parts` JSON. A 1024x1024 PNG = 1-5MB per image in the database row.
+
+**Fix:** Upload generated images to object storage (S3/R2) on the server, store only the HTTPS URL in the message parts. Add a size guard (reject images > 5MB decoded) as a stopgap until migration is complete.
+
+## 2. Conversation History Not Sent to Image Model
+
+**Priority:** Medium
+**Impact:** Multi-turn image refinement doesn't work
+
+`generateImage()` is stateless — only the current message prompt is sent. Follow-up refinements like "make it darker" or "add a cat" won't have context from prior messages. Each generation is independent.
+
+**Fix:** If multi-turn image generation is desired, switch to `streamText` with output modalities for models that support it (Gemini), or prepend conversation summary to the prompt.
+
+## 3. `toMetadataModelInfo` Doesn't Serialize `image-generation` Capability
+
+**Priority:** Low
+**Impact:** Server can't infer from metadata that a conversation used image generation
+
+The `toMetadataModelInfo` helper in `chat-store.ts` maps capabilities to a boolean object but only includes `vision`, `text`, and `reasoning`. The `image-generation` capability is silently dropped.
+
+**Fix:** Add `imageGeneration: caps.includes("image-generation") || undefined` to the capabilities mapping.