Skip to content

Commit e2da340

Browse files
authored
Add vision support (ChatGPTNextWeb#4076)
1 parent 05b6d98 commit e2da340

16 files changed

+650
-73
lines changed

app/client/api.ts

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,17 @@ export type MessageRole = (typeof ROLES)[number];
1414
export const Models = ["gpt-3.5-turbo", "gpt-4"] as const;
1515
export type ChatModel = ModelType;
1616

17+
export interface MultimodalContent {
18+
type: "text" | "image_url";
19+
text?: string;
20+
image_url?: {
21+
url: string;
22+
};
23+
}
24+
1725
export interface RequestMessage {
1826
role: MessageRole;
19-
content: string;
27+
content: string | MultimodalContent[];
2028
}
2129

2230
export interface LLMConfig {

app/client/platforms/google.ts

Lines changed: 54 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,12 @@ import { ChatOptions, getHeaders, LLMApi, LLMModel, LLMUsage } from "../api";
33
import { useAccessStore, useAppConfig, useChatStore } from "@/app/store";
44
import { getClientConfig } from "@/app/config/client";
55
import { DEFAULT_API_HOST } from "@/app/constant";
6+
import {
7+
getMessageTextContent,
8+
getMessageImages,
9+
isVisionModel,
10+
} from "@/app/utils";
11+
612
export class GeminiProApi implements LLMApi {
713
extractMessage(res: any) {
814
console.log("[Response] gemini-pro response: ", res);
@@ -15,10 +21,33 @@ export class GeminiProApi implements LLMApi {
1521
}
1622
async chat(options: ChatOptions): Promise<void> {
1723
// const apiClient = this;
18-
const messages = options.messages.map((v) => ({
19-
role: v.role.replace("assistant", "model").replace("system", "user"),
20-
parts: [{ text: v.content }],
21-
}));
24+
const visionModel = isVisionModel(options.config.model);
25+
let multimodal = false;
26+
const messages = options.messages.map((v) => {
27+
let parts: any[] = [{ text: getMessageTextContent(v) }];
28+
if (visionModel) {
29+
const images = getMessageImages(v);
30+
if (images.length > 0) {
31+
multimodal = true;
32+
parts = parts.concat(
33+
images.map((image) => {
34+
const imageType = image.split(";")[0].split(":")[1];
35+
const imageData = image.split(",")[1];
36+
return {
37+
inline_data: {
38+
mime_type: imageType,
39+
data: imageData,
40+
},
41+
};
42+
}),
43+
);
44+
}
45+
}
46+
return {
47+
role: v.role.replace("assistant", "model").replace("system", "user"),
48+
parts: parts,
49+
};
50+
});
2251

2352
// google requires that role in neighboring messages must not be the same
2453
for (let i = 0; i < messages.length - 1; ) {
@@ -33,7 +62,9 @@ export class GeminiProApi implements LLMApi {
3362
i++;
3463
}
3564
}
36-
65+
// if (visionModel && messages.length > 1) {
66+
// options.onError?.(new Error("Multiturn chat is not enabled for models/gemini-pro-vision"));
67+
// }
3768
const modelConfig = {
3869
...useAppConfig.getState().modelConfig,
3970
...useChatStore.getState().currentSession().mask.modelConfig,
@@ -80,13 +111,16 @@ export class GeminiProApi implements LLMApi {
80111
const controller = new AbortController();
81112
options.onController?.(controller);
82113
try {
83-
let chatPath = this.path(Google.ChatPath);
114+
let googleChatPath = visionModel
115+
? Google.VisionChatPath
116+
: Google.ChatPath;
117+
let chatPath = this.path(googleChatPath);
84118

85119
// let baseUrl = accessStore.googleUrl;
86120

87121
if (!baseUrl) {
88122
baseUrl = isApp
89-
? DEFAULT_API_HOST + "/api/proxy/google/" + Google.ChatPath
123+
? DEFAULT_API_HOST + "/api/proxy/google/" + googleChatPath
90124
: chatPath;
91125
}
92126

@@ -152,6 +186,19 @@ export class GeminiProApi implements LLMApi {
152186
value,
153187
}): Promise<any> {
154188
if (done) {
189+
if (response.status !== 200) {
190+
try {
191+
let data = JSON.parse(ensureProperEnding(partialData));
192+
if (data && data[0].error) {
193+
options.onError?.(new Error(data[0].error.message));
194+
} else {
195+
options.onError?.(new Error("Request failed"));
196+
}
197+
} catch (_) {
198+
options.onError?.(new Error("Request failed"));
199+
}
200+
}
201+
155202
console.log("Stream complete");
156203
// options.onFinish(responseText + remainText);
157204
finished = true;

app/client/platforms/openai.ts

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,14 @@ import {
99
} from "@/app/constant";
1010
import { useAccessStore, useAppConfig, useChatStore } from "@/app/store";
1111

12-
import { ChatOptions, getHeaders, LLMApi, LLMModel, LLMUsage } from "../api";
12+
import {
13+
ChatOptions,
14+
getHeaders,
15+
LLMApi,
16+
LLMModel,
17+
LLMUsage,
18+
MultimodalContent,
19+
} from "../api";
1320
import Locale from "../../locales";
1421
import {
1522
EventStreamContentType,
@@ -18,6 +25,11 @@ import {
1825
import { prettyObject } from "@/app/utils/format";
1926
import { getClientConfig } from "@/app/config/client";
2027
import { makeAzurePath } from "@/app/azure";
28+
import {
29+
getMessageTextContent,
30+
getMessageImages,
31+
isVisionModel,
32+
} from "@/app/utils";
2133

2234
export interface OpenAIListModelResponse {
2335
object: string;
@@ -72,9 +84,10 @@ export class ChatGPTApi implements LLMApi {
7284
}
7385

7486
async chat(options: ChatOptions) {
87+
const visionModel = isVisionModel(options.config.model);
7588
const messages = options.messages.map((v) => ({
7689
role: v.role,
77-
content: v.content,
90+
content: visionModel ? v.content : getMessageTextContent(v),
7891
}));
7992

8093
const modelConfig = {

app/components/chat.module.scss

Lines changed: 122 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,47 @@
11
@import "../styles/animation.scss";
22

3+
.attach-images {
4+
position: absolute;
5+
left: 30px;
6+
bottom: 32px;
7+
display: flex;
8+
}
9+
10+
.attach-image {
11+
cursor: default;
12+
width: 64px;
13+
height: 64px;
14+
border: rgba($color: #888, $alpha: 0.2) 1px solid;
15+
border-radius: 5px;
16+
margin-right: 10px;
17+
background-size: cover;
18+
background-position: center;
19+
background-color: var(--white);
20+
21+
.attach-image-mask {
22+
width: 100%;
23+
height: 100%;
24+
opacity: 0;
25+
transition: all ease 0.2s;
26+
}
27+
28+
.attach-image-mask:hover {
29+
opacity: 1;
30+
}
31+
32+
.delete-image {
33+
width: 24px;
34+
height: 24px;
35+
cursor: pointer;
36+
display: flex;
37+
align-items: center;
38+
justify-content: center;
39+
border-radius: 5px;
40+
float: right;
41+
background-color: var(--white);
42+
}
43+
}
44+
345
.chat-input-actions {
446
display: flex;
547
flex-wrap: wrap;
@@ -189,12 +231,10 @@
189231

190232
animation: slide-in ease 0.3s;
191233

192-
$linear: linear-gradient(
193-
to right,
194-
rgba(0, 0, 0, 0),
195-
rgba(0, 0, 0, 1),
196-
rgba(0, 0, 0, 0)
197-
);
234+
$linear: linear-gradient(to right,
235+
rgba(0, 0, 0, 0),
236+
rgba(0, 0, 0, 1),
237+
rgba(0, 0, 0, 0));
198238
mask-image: $linear;
199239

200240
@mixin show {
@@ -327,7 +367,7 @@
327367
}
328368
}
329369

330-
.chat-message-user > .chat-message-container {
370+
.chat-message-user>.chat-message-container {
331371
align-items: flex-end;
332372
}
333373

@@ -349,6 +389,7 @@
349389
padding: 7px;
350390
}
351391
}
392+
352393
/* Specific styles for iOS devices */
353394
@media screen and (max-device-width: 812px) and (-webkit-min-device-pixel-ratio: 2) {
354395
@supports (-webkit-touch-callout: none) {
@@ -381,6 +422,64 @@
381422
transition: all ease 0.3s;
382423
}
383424

425+
.chat-message-item-image {
426+
width: 100%;
427+
margin-top: 10px;
428+
}
429+
430+
.chat-message-item-images {
431+
width: 100%;
432+
display: grid;
433+
justify-content: left;
434+
grid-gap: 10px;
435+
grid-template-columns: repeat(var(--image-count), auto);
436+
margin-top: 10px;
437+
}
438+
439+
.chat-message-item-image-multi {
440+
object-fit: cover;
441+
background-size: cover;
442+
background-position: center;
443+
background-repeat: no-repeat;
444+
}
445+
446+
.chat-message-item-image,
447+
.chat-message-item-image-multi {
448+
box-sizing: border-box;
449+
border-radius: 10px;
450+
border: rgba($color: #888, $alpha: 0.2) 1px solid;
451+
}
452+
453+
454+
@media only screen and (max-width: 600px) {
455+
$calc-image-width: calc(100vw/3*2/var(--image-count));
456+
457+
.chat-message-item-image-multi {
458+
width: $calc-image-width;
459+
height: $calc-image-width;
460+
}
461+
462+
.chat-message-item-image {
463+
max-width: calc(100vw/3*2);
464+
}
465+
}
466+
467+
@media screen and (min-width: 600px) {
468+
$max-image-width: calc(calc(1200px - var(--sidebar-width))/3*2/var(--image-count));
469+
$image-width: calc(calc(var(--window-width) - var(--sidebar-width))/3*2/var(--image-count));
470+
471+
.chat-message-item-image-multi {
472+
width: $image-width;
473+
height: $image-width;
474+
max-width: $max-image-width;
475+
max-height: $max-image-width;
476+
}
477+
478+
.chat-message-item-image {
479+
max-width: calc(calc(1200px - var(--sidebar-width))/3*2);
480+
}
481+
}
482+
384483
.chat-message-action-date {
385484
font-size: 12px;
386485
opacity: 0.2;
@@ -395,7 +494,7 @@
395494
z-index: 1;
396495
}
397496

398-
.chat-message-user > .chat-message-container > .chat-message-item {
497+
.chat-message-user>.chat-message-container>.chat-message-item {
399498
background-color: var(--second);
400499

401500
&:hover {
@@ -460,6 +559,7 @@
460559

461560
@include single-line();
462561
}
562+
463563
.hint-content {
464564
font-size: 12px;
465565

@@ -474,15 +574,26 @@
474574
}
475575

476576
.chat-input-panel-inner {
577+
cursor: text;
477578
display: flex;
478579
flex: 1;
580+
border-radius: 10px;
581+
border: var(--border-in-light);
582+
}
583+
584+
.chat-input-panel-inner-attach {
585+
padding-bottom: 80px;
586+
}
587+
588+
.chat-input-panel-inner:has(.chat-input:focus) {
589+
border: 1px solid var(--primary);
479590
}
480591

481592
.chat-input {
482593
height: 100%;
483594
width: 100%;
484595
border-radius: 10px;
485-
border: var(--border-in-light);
596+
border: none;
486597
box-shadow: 0 -2px 5px rgba(0, 0, 0, 0.03);
487598
background-color: var(--white);
488599
color: var(--black);
@@ -494,9 +605,7 @@
494605
min-height: 68px;
495606
}
496607

497-
.chat-input:focus {
498-
border: 1px solid var(--primary);
499-
}
608+
.chat-input:focus {}
500609

501610
.chat-input-send {
502611
background-color: var(--primary);
@@ -515,4 +624,4 @@
515624
.chat-input-send {
516625
bottom: 30px;
517626
}
518-
}
627+
}

0 commit comments

Comments
 (0)