hpcaitech · JoeLiu996 · Jan 21, 2026 · Jan 27, 2026 · Jan 27, 2026 · Jan 29, 2026
diff --git a/.gitignore b/.gitignore
@@ -143,6 +143,9 @@ ENV/
 env.bak/
 venv.bak/
 
+code.diff
+metrics.jsonl
+
 # Spyder project settings
 .spyderproject
 .spyproject

diff --git a/pyproject.toml b/pyproject.toml
@@ -41,6 +41,7 @@ dependencies = [
     # AIOHTTP support
     "aiohttp",
     "httpx_aiohttp>=0.1.8",
+    "torchvision",
 ]
 requires-python = ">= 3.9"
 classifiers = [

diff --git a/src/hpcai/cookbook/data.py b/src/hpcai/cookbook/data.py
@@ -8,7 +8,7 @@
 import torch
 from hpcai.types import Datum, ModelInput, TensorData
 from hpcai.cookbook.renderers import Message, Renderer, TrainOnWhat
-from hpcai.cookbook.common import datum_from_tokens_weights
+from hpcai.cookbook.supervised.common import datum_from_model_input_weights
 
 
 def datum_from_tokens_weights(
@@ -49,4 +49,4 @@ def conversation_to_datum(
 ) -> Datum:
     """Common function to process a list of messages into a Datum."""
     tokens, weights = renderer.build_supervised_example(conversation, train_on_what=train_on_what)
-    return datum_from_tokens_weights(tokens, weights, max_length)
+    return datum_from_model_input_weights(tokens, weights, max_length)
diff --git a/src/hpcai/cookbook/eval/inspect_evaluators.py b/src/hpcai/cookbook/eval/inspect_evaluators.py
@@ -97,7 +97,7 @@ async def __call__(self, sampling_client: hpcai.SamplingClient) -> dict[str, flo
             debug_errors=self.config.debug_errors,
             # Never retry - the hpcai SDK is doing this for us already
             retry_on_error=0,
-            # Although Tinker sampling tries very hard to only throw unrecoverable failures,
+            # Although sampling tries very hard to only throw unrecoverable failures,
             # the inspect evaluation can still fail if e.g. the parser returns an error for
             # a given sample.
             fail_on_error=False,

diff --git a/src/hpcai/cookbook/image_processing_utils.py b/src/hpcai/cookbook/image_processing_utils.py
@@ -0,0 +1,56 @@
+# Copyright 2026 Thinking Machines Lab
+#
+# Licensed under the Apache License, Version 2.0
+#
+# Modifications:
+# - Adapted for HPC-AI cloud fine-tuning workflow
+# Copyright © 2026 HPC-AI.COM
+
+"""
+Utilities for working with image processors. Create new types to avoid needing to import AutoImageProcessor and BaseImageProcessor.
+Avoid importing AutoImageProcessor and BaseImageProcessor until runtime, because they're slow imports.
+"""
+
+from __future__ import annotations
+
+from functools import cache
+from typing import TYPE_CHECKING, Any, TypeAlias
+
+from PIL import Image
+
+if TYPE_CHECKING:
+    # this import takes a few seconds, so avoid it on the module import when possible
+    from transformers.image_processing_utils import BaseImageProcessor
+
+    ImageProcessor: TypeAlias = BaseImageProcessor
+else:
+    # make it importable from other files as a type in runtime
+    ImageProcessor: TypeAlias = Any
+
+
+@cache
+def get_image_processor(model_name: str) -> ImageProcessor:
+    from transformers.models.auto.image_processing_auto import AutoImageProcessor
+
+    return AutoImageProcessor.from_pretrained(model_name, use_fast=True)
+
+
+def resize_image(image: Image.Image, max_size: int) -> Image.Image:
+    """
+    Resize an image so that its longest side is at most max_size pixels.
+    Preserves aspect ratio and uses LANCZOS resampling for quality.
+    Returns the original image if it's already smaller than max_size.
+    """
+
+    width, height = image.size
+    if max(width, height) <= max_size:
+        return image
+
+    if width > height:
+        new_width = max_size
+        new_height = int(height * max_size / width)
+    else:
+        new_height = max_size
+        new_width = int(width * max_size / height)
+
+    return image.resize((new_width, new_height), Image.Resampling.LANCZOS)
diff --git a/src/hpcai/cookbook/model_info.py b/src/hpcai/cookbook/model_info.py
@@ -55,6 +55,7 @@ def get_qwen_info() -> dict[str, ModelAttributes]:
         "Qwen3-4B-Instruct-2507": ModelAttributes(org, "3", "4B", True),
         "Qwen3-30B-A3B-Instruct-2507": ModelAttributes(org, "3", "30B-A3B", True),
         "Qwen3-235B-A22B-Instruct-2507": ModelAttributes(org, "3", "235B-A22B", True),
+        "Qwen3-VL-8B-Instruct": ModelAttributes(org, "3", "8B", True),
     }
 
 
@@ -101,7 +102,9 @@ def get_recommended_renderer_names(model_name: str) -> list[str]:
         return ["llama3"]
     elif attributes.organization == "Qwen":
         if attributes.version_str == "3":
-            if "-Instruct" in model_name:
+            if attributes.is_vl:
+                return ["qwen3_vl"]
+            elif "-Instruct" in model_name:
                 return ["qwen3_instruct"]
             else:
                 return ["qwen3", "qwen3_disable_thinking"]