Skip to content

Commit 5b5c125

Browse files
authored
refactor(component,openai): resize the images before sending them to OpenAI (#865)
Because: - OpenAI does not support large images. This commit: - Resizes images exceeding 8192 pixels before sending them to OpenAI.
1 parent 4e7e260 commit 5b5c125

File tree

3 files changed

+51
-0
lines changed

3 files changed

+51
-0
lines changed

pkg/component/ai/openai/v0/main.go

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -183,6 +183,25 @@ func (e *execution) worker(ctx context.Context, client *httpclient.Client, job *
183183
userContents := []Content{}
184184
userContents = append(userContents, Content{Type: "text", Text: &inputStruct.Prompt})
185185
for _, image := range inputStruct.Images {
186+
if image.Height().Integer() > 8192 || image.Width().Integer() > 8192 {
187+
// Calculate new dimensions maintaining aspect ratio with max length 8192
188+
ratio := float64(image.Width().Integer()) / float64(image.Height().Integer())
189+
var newWidth, newHeight int
190+
191+
if image.Width().Integer() > image.Height().Integer() {
192+
newWidth = 8192
193+
newHeight = int(float64(newWidth) / ratio)
194+
} else {
195+
newHeight = 8192
196+
newWidth = int(float64(newHeight) * ratio)
197+
}
198+
199+
image, err = image.Resize(newWidth, newHeight)
200+
if err != nil {
201+
job.Error.Error(ctx, err)
202+
return
203+
}
204+
}
186205
i, err := image.DataURI()
187206
if err != nil {
188207
job.Error.Error(ctx, err)

pkg/data/format/format.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ type Image interface {
5656
Width() Number
5757
Height() Number
5858
Convert(contentType string) (val Image, err error)
59+
Resize(width, height int) (val Image, err error)
5960
}
6061

6162
type Video interface {

pkg/data/image.go

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -173,3 +173,34 @@ func (i *imageData) Get(p *path.Path) (v format.Value, err error) {
173173

174174
return result.Get(remainingPath)
175175
}
176+
177+
func (i *imageData) Resize(width, height int) (format.Image, error) {
178+
img, _, err := goimage.Decode(bytes.NewReader(i.raw))
179+
if err != nil {
180+
return nil, fmt.Errorf("error decoding image for resize: %v", err)
181+
}
182+
183+
// Create new image with desired dimensions
184+
resized := goimage.NewRGBA(goimage.Rect(0, 0, width, height))
185+
186+
// Simple nearest-neighbor scaling
187+
scaleX := float64(img.Bounds().Dx()) / float64(width)
188+
scaleY := float64(img.Bounds().Dy()) / float64(height)
189+
190+
for y := 0; y < height; y++ {
191+
for x := 0; x < width; x++ {
192+
srcX := int(float64(x) * scaleX)
193+
srcY := int(float64(y) * scaleY)
194+
resized.Set(x, y, img.At(srcX, srcY))
195+
}
196+
}
197+
198+
// Encode resized image to PNG format
199+
buf := new(bytes.Buffer)
200+
if err := png.Encode(buf, resized); err != nil {
201+
return nil, fmt.Errorf("error encoding resized image: %v", err)
202+
}
203+
204+
// Create new image data from encoded bytes
205+
return NewImageFromBytes(buf.Bytes(), PNG, "")
206+
}

0 commit comments

Comments
 (0)