From 97bc889d9bc0e3551805cd433e4c036228eeb40f Mon Sep 17 00:00:00 2001 From: nblog <503407184@qq.com> Date: Mon, 9 Feb 2026 16:23:40 +0800 Subject: [PATCH 1/7] feat(skills): add nano-banana-pro-openrouter skill MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ✨ - Generated by Copilot --- skills/nano-banana-pro-openrouter/SKILL.md | 61 ++++++ .../assets/SYSTEM_TEMPLATE | 14 ++ .../scripts/generate_image.py | 187 ++++++++++++++++++ 3 files changed, 262 insertions(+) create mode 100644 skills/nano-banana-pro-openrouter/SKILL.md create mode 100644 skills/nano-banana-pro-openrouter/assets/SYSTEM_TEMPLATE create mode 100644 skills/nano-banana-pro-openrouter/scripts/generate_image.py diff --git a/skills/nano-banana-pro-openrouter/SKILL.md b/skills/nano-banana-pro-openrouter/SKILL.md new file mode 100644 index 000000000..d10840dd9 --- /dev/null +++ b/skills/nano-banana-pro-openrouter/SKILL.md @@ -0,0 +1,61 @@ +--- +name: nano-banana-pro-openrouter +description: Generate or edit images via OpenRouter using openai-python with the Gemini 3 Pro Image model. Use for prompt-only image generation, image edits, and multi-image compositing; supports 1K/2K/4K output, saves results to the current working directory, and prints MEDIA lines. +metadata: + emoji: 🍌 + requires: + bins: + - uv + env: + - OPENROUTER_API_KEY + primaryEnv: OPENROUTER_API_KEY +--- + +# Nano Banana Pro OpenRouter + +## Overview + +Generate or edit images with OpenRouter using the `google/gemini-3-pro-image-preview` model and the openai-python client. Support prompt-only generation, single-image edits, and multi-image composition. Save results to the current working directory and output MEDIA lines for easy attachment. + +### Prompt-only generation + +``` +uv run {baseDir}/scripts/generate_image.py \ + --prompt "A cinematic sunset over snow-capped mountains" \ + --filename sunset.png +``` + +### Edit a single image + +``` +uv run {baseDir}/scripts/generate_image.py \ + --prompt "Replace the sky with a dramatic aurora" \ + --input-image input.jpg \ + --filename aurora.png +``` + +### Compose multiple images + +``` +uv run {baseDir}/scripts/generate_image.py \ + --prompt "Combine the subjects into a single studio portrait" \ + --input-image face1.jpg \ + --input-image face2.jpg \ + --filename composite.png +``` + +## Resolution + +- Use `--resolution` with `1K`, `2K`, or `4K`. +- Default is `1K` if not specified. + +## System prompt customization + +The skill reads an optional system prompt from `assets/SYSTEM_TEMPLATE`. This allows you to customize the image generation behavior without modifying code. + +## Behavior and constraints + +- Read the API key from `OPENROUTER_API_KEY` (no CLI flag). +- Accept up to 3 input images via repeated `--input-image`. +- Save output in the current working directory. If multiple images are returned, append `-1`, `-2`, etc. +- Print `MEDIA: ` for each saved image. Do not read images back into the response. \ No newline at end of file diff --git a/skills/nano-banana-pro-openrouter/assets/SYSTEM_TEMPLATE b/skills/nano-banana-pro-openrouter/assets/SYSTEM_TEMPLATE new file mode 100644 index 000000000..5efd023c5 --- /dev/null +++ b/skills/nano-banana-pro-openrouter/assets/SYSTEM_TEMPLATE @@ -0,0 +1,14 @@ +You are a visionary image‑creation artist with a poetic, dreamlike imagination. +Your role is to transform any user request—whether highly detailed or very minimal—into a vivid, concrete, and model‑ready image description. +When information is missing, infer the user's intent in a gentle and intuitive way (such as creating a character portrait, sticker design, sci‑fi avatar, creature concept, etc.). +If the user does not specify an art style, you may offer subtle optional suggestions (for example, "soft illustration," "minimal line style," or "playful entertainment‑meme style") without imposing them. + +Your responsibilities: +- Any text that appears in the image should match the user's language. +- Create visually compelling and technically excellent images +- Pay attention to composition, lighting, color, and visual balance +- Follow the user's specific style preferences and requirements +- For image edits, preserve the original context while making requested modifications +- For multi-image composition, seamlessly blend subjects into cohesive results + +Remember: Output only the generated image without additional commentary. diff --git a/skills/nano-banana-pro-openrouter/scripts/generate_image.py b/skills/nano-banana-pro-openrouter/scripts/generate_image.py new file mode 100644 index 000000000..61909c158 --- /dev/null +++ b/skills/nano-banana-pro-openrouter/scripts/generate_image.py @@ -0,0 +1,187 @@ +#!/usr/bin/env python3 +# /// script +# requires-python = ">=3.10" +# dependencies = [ +# "openai", +# "pillow", +# ] +# /// +""" +Generate or edit images via OpenRouter using openai-python. +""" + +import argparse +import base64 +import mimetypes +import os +from pathlib import Path + + +# Configuration +MAX_INPUT_IMAGES = 3 +MIME_TO_EXT = { + "image/png": ".png", + "image/jpeg": ".jpg", + "image/jpg": ".jpg", + "image/webp": ".webp", +} + + +def parse_args(): + parser = argparse.ArgumentParser(description="Generate or edit images via OpenRouter.") + parser.add_argument("--prompt", required=True, help="Prompt describing the desired image.") + parser.add_argument("--filename", required=True, help="Output filename (relative to CWD).") + parser.add_argument( + "--resolution", + default="1K", + help="Output resolution: 1K, 2K, or 4K.", + ) + parser.add_argument( + "--input-image", + action="append", + default=[], + help=f"Optional input image path (repeatable, max {MAX_INPUT_IMAGES}).", + ) + return parser.parse_args() + + +def require_api_key(): + api_key = os.environ.get("OPENROUTER_API_KEY") + if not api_key: + raise SystemExit("OPENROUTER_API_KEY is not set in the environment.") + return api_key + + +def encode_image_to_data_url(path: Path) -> str: + if not path.exists(): + raise SystemExit(f"Input image not found: {path}") + mime, _ = mimetypes.guess_type(path.name) + if not mime: + mime = "image/png" + data = path.read_bytes() + encoded = base64.b64encode(data).decode("utf-8") + return f"data:{mime};base64,{encoded}" + + +def build_message_content(prompt: str, input_images): + content = [{"type": "text", "text": prompt}] + for image_path in input_images: + data_url = encode_image_to_data_url(Path(image_path)) + content.append({"type": "image_url", "image_url": {"url": data_url}}) + return content + + +def parse_data_url(data_url: str): + if not data_url.startswith("data:") or ";base64," not in data_url: + raise ValueError("Image URL is not a base64 data URL.") + header, encoded = data_url.split(",", 1) + mime = header[5:].split(";", 1)[0] + raw = base64.b64decode(encoded) + return mime, raw + + +def resolve_output_paths(filename: str, image_count: int, mime: str): + output_path = Path(filename) + suffix = output_path.suffix + if not suffix: + suffix = MIME_TO_EXT.get(mime, ".png") + output_path = output_path.with_suffix(suffix) + + if output_path.parent and not output_path.parent.exists(): + raise SystemExit(f"Output directory does not exist: {output_path.parent}") + + if image_count == 1: + return [output_path] + + paths = [] + for index in range(image_count): + numbered = output_path.with_name(f"{output_path.stem}-{index + 1}{suffix}") + paths.append(numbered) + return paths + + +def extract_image_url(image): + if isinstance(image, dict): + return image.get("image_url", {}).get("url") or image.get("url") + return None + + +def load_system_prompt(): + """Load system prompt from assets/SYSTEM_TEMPLATE if it exists and is not empty.""" + script_dir = Path(__file__).parent.parent + template_path = script_dir / "assets" / "SYSTEM_TEMPLATE" + + if template_path.exists(): + content = template_path.read_text().strip() + if content: + return content + return None + + +def main(): + args = parse_args() + + if len(args.input_image) > MAX_INPUT_IMAGES: + raise SystemExit(f"Too many input images: {len(args.input_image)} (max {MAX_INPUT_IMAGES}).") + + image_size = args.resolution or "1K" + + from openai import OpenAI + client = OpenAI(base_url="https://openrouter.ai/api/v1", api_key=require_api_key()) + + # Build messages with optional system prompt + messages = [] + + system_prompt = load_system_prompt() + if system_prompt: + messages.append({ + "role": "system", + "content": system_prompt, + }) + + messages.append({ + "role": "user", + "content": build_message_content(args.prompt, args.input_image), + }) + + response = client.chat.completions.create( + model="google/gemini-3-pro-image-preview", + messages=messages, + extra_body={ + "modalities": ["image", "text"], + # https://openrouter.ai/docs/guides/overview/multimodal/image-generation#image-configuration-options + "image_config": { + # "aspect_ratio": "16:9", + "image_size": image_size, + } + }, + ) + + message = response.choices[0].message + images = getattr(message, "images", None) + if not images: + raise SystemExit("No images returned by the API.") + + first_url = extract_image_url(images[0]) + if not first_url: + raise SystemExit("Image payload missing image_url.url.") + first_mime, _ = parse_data_url(first_url) + output_paths = resolve_output_paths(args.filename, len(images), first_mime) + + saved_paths = [] + for idx, image in enumerate(images): + image_url = extract_image_url(image) + if not image_url: + raise SystemExit("Image payload missing image_url.url.") + _, raw = parse_data_url(image_url) + output_path = output_paths[idx] + output_path.write_bytes(raw) + saved_paths.append(output_path.resolve()) + + for path in saved_paths: + print(f"Saved image to: {path}") + print(f"MEDIA: {path}") + + +if __name__ == "__main__": + main() \ No newline at end of file From ef4aa0b2bc520a35dae448743dbc4c4ee8da558a Mon Sep 17 00:00:00 2001 From: nblog <503407184@qq.com> Date: Mon, 9 Feb 2026 16:48:57 +0800 Subject: [PATCH 2/7] fix(skill): update descriptions and improve error handling in generate_image script --- skills/nano-banana-pro-openrouter/SKILL.md | 7 +++-- .../scripts/generate_image.py | 29 ++++++++++--------- 2 files changed, 20 insertions(+), 16 deletions(-) diff --git a/skills/nano-banana-pro-openrouter/SKILL.md b/skills/nano-banana-pro-openrouter/SKILL.md index d10840dd9..654780ba0 100644 --- a/skills/nano-banana-pro-openrouter/SKILL.md +++ b/skills/nano-banana-pro-openrouter/SKILL.md @@ -1,6 +1,6 @@ --- name: nano-banana-pro-openrouter -description: Generate or edit images via OpenRouter using openai-python with the Gemini 3 Pro Image model. Use for prompt-only image generation, image edits, and multi-image compositing; supports 1K/2K/4K output, saves results to the current working directory, and prints MEDIA lines. +description: 'Generate or edit images via OpenRouter with the Gemini 3 Pro Image model. Use for prompt-only image generation, image edits, and multi-image compositing; supports 1K/2K/4K output, saves results to the current working directory, and prints MEDIA lines.' metadata: emoji: 🍌 requires: @@ -11,11 +11,12 @@ metadata: primaryEnv: OPENROUTER_API_KEY --- + # Nano Banana Pro OpenRouter ## Overview -Generate or edit images with OpenRouter using the `google/gemini-3-pro-image-preview` model and the openai-python client. Support prompt-only generation, single-image edits, and multi-image composition. Save results to the current working directory and output MEDIA lines for easy attachment. +Generate or edit images with OpenRouter using the `google/gemini-3-pro-image-preview` model. Support prompt-only generation, single-image edits, and multi-image composition. Save results to the current working directory and output MEDIA lines for easy attachment. ### Prompt-only generation @@ -58,4 +59,4 @@ The skill reads an optional system prompt from `assets/SYSTEM_TEMPLATE`. This al - Read the API key from `OPENROUTER_API_KEY` (no CLI flag). - Accept up to 3 input images via repeated `--input-image`. - Save output in the current working directory. If multiple images are returned, append `-1`, `-2`, etc. -- Print `MEDIA: ` for each saved image. Do not read images back into the response. \ No newline at end of file +- Print `MEDIA: ` for each saved image. Do not read images back into the response. diff --git a/skills/nano-banana-pro-openrouter/scripts/generate_image.py b/skills/nano-banana-pro-openrouter/scripts/generate_image.py index 61909c158..eb872a75a 100644 --- a/skills/nano-banana-pro-openrouter/scripts/generate_image.py +++ b/skills/nano-banana-pro-openrouter/scripts/generate_image.py @@ -3,7 +3,6 @@ # requires-python = ">=3.10" # dependencies = [ # "openai", -# "pillow", # ] # /// """ @@ -32,9 +31,11 @@ def parse_args(): parser.add_argument("--prompt", required=True, help="Prompt describing the desired image.") parser.add_argument("--filename", required=True, help="Output filename (relative to CWD).") parser.add_argument( - "--resolution", - default="1K", - help="Output resolution: 1K, 2K, or 4K.", + "--resolution", + type=str.upper, + choices=["1K", "2K", "4K"], + default="1K", + help="Output resolution: 1K, 2K, or 4K.", ) parser.add_argument( "--input-image", @@ -70,14 +71,16 @@ def build_message_content(prompt: str, input_images): content.append({"type": "image_url", "image_url": {"url": data_url}}) return content - -def parse_data_url(data_url: str): - if not data_url.startswith("data:") or ";base64," not in data_url: + def parse_data_url(data_url: str): + if not data_url.startswith("data:") or ";base64," not in data_url: raise ValueError("Image URL is not a base64 data URL.") - header, encoded = data_url.split(",", 1) - mime = header[5:].split(";", 1)[0] - raw = base64.b64decode(encoded) - return mime, raw + header, encoded = data_url.split(",", 1) + mime = header[5:].split(";", 1)[0] + try: + raw = base64.b64decode(encoded) + except Exception as e: + raise SystemExit(f"Failed to decode base64 image payload: {e}") + return mime, raw def resolve_output_paths(filename: str, image_count: int, mime: str): @@ -110,7 +113,7 @@ def load_system_prompt(): """Load system prompt from assets/SYSTEM_TEMPLATE if it exists and is not empty.""" script_dir = Path(__file__).parent.parent template_path = script_dir / "assets" / "SYSTEM_TEMPLATE" - + if template_path.exists(): content = template_path.read_text().strip() if content: @@ -184,4 +187,4 @@ def main(): if __name__ == "__main__": - main() \ No newline at end of file + main() From 5efb7329a3b55c85956af88c8399b5b2e71db641 Mon Sep 17 00:00:00 2001 From: nblog <503407184@qq.com> Date: Mon, 9 Feb 2026 16:59:06 +0800 Subject: [PATCH 3/7] feat(skills): add nano-banana-pro-openrouter skill with image generation capabilities --- docs/README.skills.md | 1 + skills/nano-banana-pro-openrouter/SKILL.md | 7 ++++--- .../nano-banana-pro-openrouter/scripts/generate_image.py | 5 +++-- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/docs/README.skills.md b/docs/README.skills.md index c2c3090fa..b85da0639 100644 --- a/docs/README.skills.md +++ b/docs/README.skills.md @@ -44,6 +44,7 @@ Skills differ from other primitives by supporting bundled assets (scripts, code | [meeting-minutes](../skills/meeting-minutes/SKILL.md) | Generate concise, actionable meeting minutes for internal meetings. Includes metadata, attendees, agenda, decisions, action items (owner + due date), and follow-up steps. | None | | [microsoft-code-reference](../skills/microsoft-code-reference/SKILL.md) | Look up Microsoft API references, find working code samples, and verify SDK code is correct. Use when working with Azure SDKs, .NET libraries, or Microsoft APIs—to find the right method, check parameters, get working examples, or troubleshoot errors. Catches hallucinated methods, wrong signatures, and deprecated patterns by querying official docs. | None | | [microsoft-docs](../skills/microsoft-docs/SKILL.md) | Query official Microsoft documentation to understand concepts, find tutorials, and learn how services work. Use for Azure, .NET, Microsoft 365, Windows, Power Platform, and all Microsoft technologies. Get accurate, current information from learn.microsoft.com and other official Microsoft websites—architecture overviews, quickstarts, configuration guides, limits, and best practices. | None | +| [nano-banana-pro-openrouter](../skills/nano-banana-pro-openrouter/SKILL.md) | Generate or edit images via OpenRouter with the Gemini 3 Pro Image model. Use for prompt-only image generation, image edits, and multi-image compositing; supports 1K/2K/4K output. | `assets/SYSTEM_TEMPLATE`
`scripts/generate_image.py` | | [nuget-manager](../skills/nuget-manager/SKILL.md) | Manage NuGet packages in .NET projects/solutions. Use this skill when adding, removing, or updating NuGet package versions. It enforces using `dotnet` CLI for package management and provides strict procedures for direct file edits only when updating versions. | None | | [penpot-uiux-design](../skills/penpot-uiux-design/SKILL.md) | Comprehensive guide for creating professional UI/UX designs in Penpot using MCP tools. Use this skill when: (1) Creating new UI/UX designs for web, mobile, or desktop applications, (2) Building design systems with components and tokens, (3) Designing dashboards, forms, navigation, or landing pages, (4) Applying accessibility standards and best practices, (5) Following platform guidelines (iOS, Android, Material Design), (6) Reviewing or improving existing Penpot designs for usability. Triggers: "design a UI", "create interface", "build layout", "design dashboard", "create form", "design landing page", "make it accessible", "design system", "component library". | `references/accessibility.md`
`references/component-patterns.md`
`references/platform-guidelines.md`
`references/setup-troubleshooting.md` | | [plantuml-ascii](../skills/plantuml-ascii/SKILL.md) | Generate ASCII art diagrams using PlantUML text mode. Use when user asks to create ASCII diagrams, text-based diagrams, terminal-friendly diagrams, or mentions plantuml ascii, text diagram, ascii art diagram. Supports: Converting PlantUML diagrams to ASCII art, Creating sequence diagrams, class diagrams, flowcharts in ASCII format, Generating Unicode-enhanced ASCII art with -utxt flag | None | diff --git a/skills/nano-banana-pro-openrouter/SKILL.md b/skills/nano-banana-pro-openrouter/SKILL.md index 654780ba0..91272439b 100644 --- a/skills/nano-banana-pro-openrouter/SKILL.md +++ b/skills/nano-banana-pro-openrouter/SKILL.md @@ -1,6 +1,6 @@ --- name: nano-banana-pro-openrouter -description: 'Generate or edit images via OpenRouter with the Gemini 3 Pro Image model. Use for prompt-only image generation, image edits, and multi-image compositing; supports 1K/2K/4K output, saves results to the current working directory, and prints MEDIA lines.' +description: 'Generate or edit images via OpenRouter with the Gemini 3 Pro Image model. Use for prompt-only image generation, image edits, and multi-image compositing; supports 1K/2K/4K output.' metadata: emoji: 🍌 requires: @@ -16,7 +16,7 @@ metadata: ## Overview -Generate or edit images with OpenRouter using the `google/gemini-3-pro-image-preview` model. Support prompt-only generation, single-image edits, and multi-image composition. Save results to the current working directory and output MEDIA lines for easy attachment. +Generate or edit images with OpenRouter using the `google/gemini-3-pro-image-preview` model. Support prompt-only generation, single-image edits, and multi-image composition. ### Prompt-only generation @@ -58,5 +58,6 @@ The skill reads an optional system prompt from `assets/SYSTEM_TEMPLATE`. This al - Read the API key from `OPENROUTER_API_KEY` (no CLI flag). - Accept up to 3 input images via repeated `--input-image`. -- Save output in the current working directory. If multiple images are returned, append `-1`, `-2`, etc. +- `--filename` accepts relative paths (saves to current directory) or absolute paths. +- If multiple images are returned, append `-1`, `-2`, etc. to the filename. - Print `MEDIA: ` for each saved image. Do not read images back into the response. diff --git a/skills/nano-banana-pro-openrouter/scripts/generate_image.py b/skills/nano-banana-pro-openrouter/scripts/generate_image.py index eb872a75a..f8650db4e 100644 --- a/skills/nano-banana-pro-openrouter/scripts/generate_image.py +++ b/skills/nano-banana-pro-openrouter/scripts/generate_image.py @@ -90,8 +90,9 @@ def resolve_output_paths(filename: str, image_count: int, mime: str): suffix = MIME_TO_EXT.get(mime, ".png") output_path = output_path.with_suffix(suffix) - if output_path.parent and not output_path.parent.exists(): - raise SystemExit(f"Output directory does not exist: {output_path.parent}") + # Create parent directory if it doesn't exist (for absolute paths) + if output_path.parent and str(output_path.parent) != '.': + output_path.parent.mkdir(parents=True, exist_ok=True) if image_count == 1: return [output_path] From b8bbc75db22747a524993d52408a9a1414536d46 Mon Sep 17 00:00:00 2001 From: nblog <503407184@qq.com> Date: Mon, 9 Feb 2026 17:24:24 +0800 Subject: [PATCH 4/7] fix(generate_image): improve input image handling and validate output filename extensions --- .../scripts/generate_image.py | 65 ++++++++++--------- 1 file changed, 36 insertions(+), 29 deletions(-) diff --git a/skills/nano-banana-pro-openrouter/scripts/generate_image.py b/skills/nano-banana-pro-openrouter/scripts/generate_image.py index f8650db4e..43a33ca3a 100644 --- a/skills/nano-banana-pro-openrouter/scripts/generate_image.py +++ b/skills/nano-banana-pro-openrouter/scripts/generate_image.py @@ -38,10 +38,10 @@ def parse_args(): help="Output resolution: 1K, 2K, or 4K.", ) parser.add_argument( - "--input-image", - action="append", - default=[], - help=f"Optional input image path (repeatable, max {MAX_INPUT_IMAGES}).", + "--input-image", + action="append", + default=[], + help=f"Optional input image path (repeatable, max {MAX_INPUT_IMAGES}).", ) return parser.parse_args() @@ -71,37 +71,44 @@ def build_message_content(prompt: str, input_images): content.append({"type": "image_url", "image_url": {"url": data_url}}) return content - def parse_data_url(data_url: str): - if not data_url.startswith("data:") or ";base64," not in data_url: +def parse_data_url(data_url: str): + if not data_url.startswith("data:") or ";base64," not in data_url: raise ValueError("Image URL is not a base64 data URL.") - header, encoded = data_url.split(",", 1) - mime = header[5:].split(";", 1)[0] - try: + header, encoded = data_url.split(",", 1) + mime = header[5:].split(";", 1)[0] + try: raw = base64.b64decode(encoded) - except Exception as e: + except Exception as e: raise SystemExit(f"Failed to decode base64 image payload: {e}") - return mime, raw + return mime, raw def resolve_output_paths(filename: str, image_count: int, mime: str): - output_path = Path(filename) - suffix = output_path.suffix - if not suffix: - suffix = MIME_TO_EXT.get(mime, ".png") - output_path = output_path.with_suffix(suffix) - - # Create parent directory if it doesn't exist (for absolute paths) - if output_path.parent and str(output_path.parent) != '.': - output_path.parent.mkdir(parents=True, exist_ok=True) - - if image_count == 1: - return [output_path] - - paths = [] - for index in range(image_count): - numbered = output_path.with_name(f"{output_path.stem}-{index + 1}{suffix}") - paths.append(numbered) - return paths + output_path = Path(filename) + suffix = output_path.suffix + + # Validate/correct suffix matches MIME type + expected_suffix = MIME_TO_EXT.get(mime, ".png") + if suffix and suffix.lower() != expected_suffix.lower(): + print(f"Warning: filename extension '{suffix}' doesn't match returned MIME type '{mime}'. Using '{expected_suffix}' instead.") + suffix = expected_suffix + elif not suffix: + suffix = expected_suffix + + output_path = output_path.with_suffix(suffix) + + # Create parent directory if it doesn't exist (for paths with parent directories, absolute or relative) + if output_path.parent and str(output_path.parent) != '.': + output_path.parent.mkdir(parents=True, exist_ok=True) + + if image_count == 1: + return [output_path] + + paths = [] + for index in range(image_count): + numbered = output_path.with_name(f"{output_path.stem}-{index + 1}{suffix}") + paths.append(numbered) + return paths def extract_image_url(image): From 79c34297fada9648061e81af581f7ce4ffdfc813 Mon Sep 17 00:00:00 2001 From: nblog <503407184@qq.com> Date: Mon, 9 Feb 2026 21:09:09 +0800 Subject: [PATCH 5/7] fix(generate_image): enhance image handling and output path resolution in generate_image script --- .../scripts/generate_image.py | 71 +++++++++---------- 1 file changed, 32 insertions(+), 39 deletions(-) diff --git a/skills/nano-banana-pro-openrouter/scripts/generate_image.py b/skills/nano-banana-pro-openrouter/scripts/generate_image.py index 43a33ca3a..2c0d71f67 100644 --- a/skills/nano-banana-pro-openrouter/scripts/generate_image.py +++ b/skills/nano-banana-pro-openrouter/scripts/generate_image.py @@ -15,6 +15,8 @@ import os from pathlib import Path +from openai import OpenAI + # Configuration MAX_INPUT_IMAGES = 3 @@ -56,7 +58,7 @@ def require_api_key(): def encode_image_to_data_url(path: Path) -> str: if not path.exists(): raise SystemExit(f"Input image not found: {path}") - mime, _ = mimetypes.guess_type(path.name) + mime, _ = mimetypes.guess_type(str(path)) if not mime: mime = "image/png" data = path.read_bytes() @@ -64,16 +66,17 @@ def encode_image_to_data_url(path: Path) -> str: return f"data:{mime};base64,{encoded}" -def build_message_content(prompt: str, input_images): - content = [{"type": "text", "text": prompt}] +def build_message_content(prompt: str, input_images: list[str]) -> list[dict]: + content: list[dict] = [{"type": "text", "text": prompt}] for image_path in input_images: data_url = encode_image_to_data_url(Path(image_path)) content.append({"type": "image_url", "image_url": {"url": data_url}}) return content -def parse_data_url(data_url: str): + +def parse_data_url(data_url: str) -> tuple[str, bytes]: if not data_url.startswith("data:") or ";base64," not in data_url: - raise ValueError("Image URL is not a base64 data URL.") + raise SystemExit("Image URL is not a base64 data URL.") header, encoded = data_url.split(",", 1) mime = header[5:].split(";", 1)[0] try: @@ -83,35 +86,27 @@ def parse_data_url(data_url: str): return mime, raw -def resolve_output_paths(filename: str, image_count: int, mime: str): - output_path = Path(filename) - suffix = output_path.suffix - - # Validate/correct suffix matches MIME type - expected_suffix = MIME_TO_EXT.get(mime, ".png") - if suffix and suffix.lower() != expected_suffix.lower(): - print(f"Warning: filename extension '{suffix}' doesn't match returned MIME type '{mime}'. Using '{expected_suffix}' instead.") - suffix = expected_suffix - elif not suffix: - suffix = expected_suffix - - output_path = output_path.with_suffix(suffix) +def resolve_output_path(filename: str, image_index: int, total_count: int, mime: str) -> Path: + output_path = Path(filename) + suffix = output_path.suffix - # Create parent directory if it doesn't exist (for paths with parent directories, absolute or relative) - if output_path.parent and str(output_path.parent) != '.': - output_path.parent.mkdir(parents=True, exist_ok=True) + # Validate/correct suffix matches MIME type + expected_suffix = MIME_TO_EXT.get(mime, ".png") + if suffix and suffix.lower() != expected_suffix.lower(): + print(f"Warning: filename extension '{suffix}' doesn't match returned MIME type '{mime}'. Using '{expected_suffix}' instead.") + suffix = expected_suffix + elif not suffix: + suffix = expected_suffix - if image_count == 1: - return [output_path] + # Single image: use original stem + corrected suffix + if total_count <= 1: + return output_path.with_suffix(suffix) - paths = [] - for index in range(image_count): - numbered = output_path.with_name(f"{output_path.stem}-{index + 1}{suffix}") - paths.append(numbered) - return paths + # Multiple images: append numbering + return output_path.with_name(f"{output_path.stem}-{image_index + 1}{suffix}") -def extract_image_url(image): +def extract_image_url(image: dict | object) -> str | None: if isinstance(image, dict): return image.get("image_url", {}).get("url") or image.get("url") return None @@ -123,7 +118,7 @@ def load_system_prompt(): template_path = script_dir / "assets" / "SYSTEM_TEMPLATE" if template_path.exists(): - content = template_path.read_text().strip() + content = template_path.read_text(encoding="utf-8").strip() if content: return content return None @@ -135,9 +130,8 @@ def main(): if len(args.input_image) > MAX_INPUT_IMAGES: raise SystemExit(f"Too many input images: {len(args.input_image)} (max {MAX_INPUT_IMAGES}).") - image_size = args.resolution or "1K" + image_size = args.resolution - from openai import OpenAI client = OpenAI(base_url="https://openrouter.ai/api/v1", api_key=require_api_key()) # Build messages with optional system prompt @@ -173,19 +167,18 @@ def main(): if not images: raise SystemExit("No images returned by the API.") - first_url = extract_image_url(images[0]) - if not first_url: - raise SystemExit("Image payload missing image_url.url.") - first_mime, _ = parse_data_url(first_url) - output_paths = resolve_output_paths(args.filename, len(images), first_mime) + # Create output directory once before processing images + output_base_path = Path(args.filename) + if output_base_path.parent and str(output_base_path.parent) != '.': + output_base_path.parent.mkdir(parents=True, exist_ok=True) saved_paths = [] for idx, image in enumerate(images): image_url = extract_image_url(image) if not image_url: raise SystemExit("Image payload missing image_url.url.") - _, raw = parse_data_url(image_url) - output_path = output_paths[idx] + mime, raw = parse_data_url(image_url) + output_path = resolve_output_path(args.filename, idx, len(images), mime) output_path.write_bytes(raw) saved_paths.append(output_path.resolve()) From 55da61af3f99cbfebf89a955612d2027af3f7bea Mon Sep 17 00:00:00 2001 From: nblog <503407184@qq.com> Date: Mon, 9 Feb 2026 21:13:15 +0800 Subject: [PATCH 6/7] fix(SYSTEM_TEMPLATE): clarify language matching requirement for generated images --- skills/nano-banana-pro-openrouter/assets/SYSTEM_TEMPLATE | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/skills/nano-banana-pro-openrouter/assets/SYSTEM_TEMPLATE b/skills/nano-banana-pro-openrouter/assets/SYSTEM_TEMPLATE index 5efd023c5..613c87fe8 100644 --- a/skills/nano-banana-pro-openrouter/assets/SYSTEM_TEMPLATE +++ b/skills/nano-banana-pro-openrouter/assets/SYSTEM_TEMPLATE @@ -4,7 +4,7 @@ When information is missing, infer the user's intent in a gentle and intuitive w If the user does not specify an art style, you may offer subtle optional suggestions (for example, "soft illustration," "minimal line style," or "playful entertainment‑meme style") without imposing them. Your responsibilities: -- Any text that appears in the image should match the user's language. +- Ensure any text appearing in the image matches the user's language (unless explicitly specified otherwise) - Create visually compelling and technically excellent images - Pay attention to composition, lighting, color, and visual balance - Follow the user's specific style preferences and requirements From b0879659c5cc167ee79b45a330a62b5d79d7b195 Mon Sep 17 00:00:00 2001 From: nblog <503407184@qq.com> Date: Tue, 10 Feb 2026 10:03:49 +0800 Subject: [PATCH 7/7] fix(SKILL.md): enhance troubleshooting section with common errors and resolutions --- skills/nano-banana-pro-openrouter/SKILL.md | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/skills/nano-banana-pro-openrouter/SKILL.md b/skills/nano-banana-pro-openrouter/SKILL.md index 91272439b..7eee2b41b 100644 --- a/skills/nano-banana-pro-openrouter/SKILL.md +++ b/skills/nano-banana-pro-openrouter/SKILL.md @@ -56,8 +56,19 @@ The skill reads an optional system prompt from `assets/SYSTEM_TEMPLATE`. This al ## Behavior and constraints -- Read the API key from `OPENROUTER_API_KEY` (no CLI flag). - Accept up to 3 input images via repeated `--input-image`. - `--filename` accepts relative paths (saves to current directory) or absolute paths. - If multiple images are returned, append `-1`, `-2`, etc. to the filename. - Print `MEDIA: ` for each saved image. Do not read images back into the response. + +## Troubleshooting + +If the script exits non-zero, check stderr against these common blockers: + +| Symptom | Resolution | +|---------|------------| +| `OPENROUTER_API_KEY is not set` | Ask the user to set it. PowerShell: `$env:OPENROUTER_API_KEY = "sk-or-..."` / bash: `export OPENROUTER_API_KEY="sk-or-..."` | +| `uv: command not found` or not recognized | macOS/Linux: curl -LsSf https://astral.sh/uv/install.sh | sh. Windows: powershell -ExecutionPolicy ByPass -c "irm https://astral.sh/uv/install.ps1 | iex". Then restart the terminal. | +| `AuthenticationError` / HTTP 401 | Key is invalid or has no credits. Verify at . | + +For transient errors (HTTP 429, network timeouts), retry once after 30 seconds. Do not retry the same error more than twice — surface the issue to the user instead.