Skip to content

Commit

Permalink
feat(ai-model): support JSON 100% limit capability for gpt-4o-2024-08…
Browse files Browse the repository at this point in the history
…-06 model (#86)
  • Loading branch information
zhoushaw authored Sep 5, 2024
1 parent cfa92b3 commit c5077a2
Show file tree
Hide file tree
Showing 13 changed files with 240 additions and 25 deletions.
2 changes: 1 addition & 1 deletion packages/midscene/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@
},
"dependencies": {
"node-fetch": "2.6.7",
"openai": "4.47.1",
"openai": "4.57.1",
"optional": "0.1.4",
"@midscene/shared": "workspace:*"
},
Expand Down
50 changes: 50 additions & 0 deletions packages/midscene/src/ai-model/automation/planning.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import type { ResponseFormatJSONSchema } from 'openai/resources';

export function systemPromptToTaskPlanning() {
return `
You are a versatile professional in software UI design and testing. Your outstanding contributions will impact the user experience of billions of users.
Expand Down Expand Up @@ -51,3 +53,51 @@ export function systemPromptToTaskPlanning() {
}
`;
}

export const planSchema: ResponseFormatJSONSchema = {
type: 'json_schema',
json_schema: {
name: 'action_items',
strict: true,
schema: {
type: 'object',
properties: {
queryLanguage: {
type: 'string',
description: 'Language of the description of the task',
},
actions: {
type: 'array',
items: {
type: 'object',
properties: {
thought: {
type: 'string',
description:
'Reasons for generating this task, and why this task is feasible on this page',
},
type: {
type: 'string',
description: 'Type of action, like "Tap", "Hover", etc.',
},
param: {
type: ['object', 'null'],
description: 'Parameter towards the task type, can be null',
},
},
required: ['thought', 'type', 'param'],
additionalProperties: false,
},
description: 'List of actions to be performed',
},
error: {
type: ['string', 'null'],
description:
'Overall error messages. If there is any error occurs during the task planning, conclude the errors again and put error messages here',
},
},
required: ['queryLanguage', 'actions', 'error'],
additionalProperties: false,
},
},
};
3 changes: 2 additions & 1 deletion packages/midscene/src/ai-model/common.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import type OpenAI from 'openai';
import type {
ChatCompletionSystemMessageParam,
ChatCompletionUserMessageParam,
Expand Down Expand Up @@ -32,7 +33,7 @@ export async function callAiFn<T>(options: {
}) {
const { useModel, msgs, AIActionType: AIActionTypeValue } = options;
if (useOpenAIModel(useModel)) {
const parseResult = await callToGetJSONObject<T>(msgs);
const parseResult = await callToGetJSONObject<T>(msgs, AIActionTypeValue);
return parseResult;
}

Expand Down
39 changes: 35 additions & 4 deletions packages/midscene/src/ai-model/openai/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,10 @@ import { AIResponseFormat } from '@/types';
import { wrapOpenAI } from 'langsmith/wrappers';
import OpenAI, { type ClientOptions } from 'openai';
import type { ChatCompletionMessageParam } from 'openai/resources';
import { planSchema } from '../automation/planning';
import { AIActionType } from '../common';
import { findElementSchema } from '../prompt/element_inspector';
import { assertSchema } from '../prompt/util';

export const MIDSCENE_OPENAI_INIT_CONFIG_JSON =
'MIDSCENE_OPENAI_INIT_CONFIG_JSON';
Expand Down Expand Up @@ -48,7 +52,9 @@ async function createOpenAI() {

export async function call(
messages: ChatCompletionMessageParam[],
responseFormat?: AIResponseFormat,
responseFormat?:
| OpenAI.ChatCompletionCreateParams['response_format']
| OpenAI.ResponseFormatJSONObject,
): Promise<string> {
const openai = await createOpenAI();

Expand All @@ -58,21 +64,46 @@ export async function call(
const completion = await openai.chat.completions.create({
model,
messages,
response_format: { type: responseFormat },
response_format: responseFormat,
temperature: 0.2,
});
shouldPrintTiming && console.timeEnd('Midscene - AI call');
shouldPrintTiming && console.log('Midscene - AI usage', completion.usage);

const { content } = completion.choices[0].message;
assert(content, 'empty content');
return content;
}

export async function callToGetJSONObject<T>(
messages: ChatCompletionMessageParam[],
AIActionTypeValue: AIActionType,
): Promise<T> {
const response = await call(messages, AIResponseFormat.JSON);
// gpt-4o-2024-05-13 only support json_object response format
let responseFormat:
| OpenAI.ChatCompletionCreateParams['response_format']
| OpenAI.ResponseFormatJSONObject = {
type: AIResponseFormat.JSON,
};

if (model === 'gpt-4o-2024-08-06') {
switch (AIActionTypeValue) {
case AIActionType.ASSERT:
responseFormat = assertSchema;
break;
case AIActionType.INSPECT_ELEMENT:
responseFormat = findElementSchema;
break;
case AIActionType.EXTRACT_DATA:
//TODO: Currently the restriction type can only be a json subset of the constraint, and the way the extract api is used needs to be adjusted to limit the user's data to this as well
// targetResponseFormat = extractDataSchema;
break;
case AIActionType.PLAN:
responseFormat = planSchema;
break;
}
}

const response = await call(messages, responseFormat);
assert(response, 'empty response');
return JSON.parse(response);
}
47 changes: 47 additions & 0 deletions packages/midscene/src/ai-model/prompt/element_inspector.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import type { ResponseFormatJSONSchema } from 'openai/resources';

export function systemPromptToFindElement() {
return `
## Role:
Expand Down Expand Up @@ -135,3 +137,48 @@ export function multiDescription(multi: boolean) {
? 'multiple elements matching the description (two or more)'
: 'The element closest to the description (only one)';
}

export const findElementSchema: ResponseFormatJSONSchema = {
type: 'json_schema',
json_schema: {
name: 'find_elements',
strict: true,
schema: {
type: 'object',
properties: {
elements: {
type: 'array',
items: {
type: 'object',
properties: {
reason: {
type: 'string',
description: 'Reason for finding this element',
},
text: {
type: 'string',
description: 'Text content of the element',
},
id: {
type: 'string',
description: 'ID of this element',
},
},
required: ['reason', 'text', 'id'],
additionalProperties: false,
},
description: 'List of found elements',
},
errors: {
type: 'array',
items: {
type: 'string',
},
description: 'List of error messages, if any',
},
},
required: ['elements', 'errors'],
additionalProperties: false,
},
},
};
55 changes: 55 additions & 0 deletions packages/midscene/src/ai-model/prompt/util.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import type {
UIContext,
UISection,
} from '@/types';
import type { ResponseFormatJSONSchema } from 'openai/resources';

const characteristic =
'You are a versatile professional in software UI design and testing. Your outstanding contributions will impact the user experience of billions of users.';
Expand Down Expand Up @@ -123,6 +124,37 @@ Return in the following JSON format:
`;
}

export const extractDataSchema: ResponseFormatJSONSchema = {
type: 'json_schema',
json_schema: {
name: 'extract_data',
strict: true,
schema: {
type: 'object',
properties: {
language: {
type: 'string',
enum: ['en', 'zh'],
description: 'The language of the page',
},
data: {
type: 'object',
description: 'The extracted data from extract_data_from_UI skill',
},
errors: {
type: 'array',
items: {
type: 'string',
},
description: 'Error messages, if any',
},
},
required: ['language', 'data', 'errors'],
additionalProperties: false,
},
},
};

export function systemPromptToAssert() {
return `
${characteristic}
Expand All @@ -138,6 +170,29 @@ Return in the following JSON format:
`;
}

export const assertSchema: ResponseFormatJSONSchema = {
type: 'json_schema',
json_schema: {
name: 'assert',
strict: true,
schema: {
type: 'object',
properties: {
thought: {
type: 'string',
description: 'The thought process behind the assertion',
},
pass: {
type: 'boolean',
description: 'Whether the assertion passed or failed',
},
},
required: ['thought', 'pass'],
additionalProperties: false,
},
},
};

/*
To modify the response format:
1. update the function `describeSectionResponseFormat` here
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ import { readFileSync } from 'node:fs';
import path from 'node:path';
import { AiInspectElement } from '@/ai-model';
import { expect, test } from 'vitest';
import { repeatTime } from '../util';
import {
getPageTestData,
repeat,
Expand Down Expand Up @@ -36,8 +37,6 @@ const testCases = [
},
];

const repeatTime = process.env.GITHUB_ACTIONS ? 1 : 5;

repeat(repeatTime, (repeatIndex) => {
test(
'xicha: inspect element',
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -619,6 +619,29 @@
"htmlNode": null,
"indexId": "26"
},
{
"id": "3f353e2096",
"nodePath": "0-5-5-1-1-1-5-5-1-3",
"nodeHashId": "3f353e2096",
"nodeType": "TEXT Node",
"locator": "",
"attributes": {
"nodeType": "TEXT Node"
},
"center": [
239,
604
],
"content": ".",
"rect": {
"left": 237,
"top": 597,
"width": 3,
"height": 13
},
"htmlNode": null,
"indexId": "27"
},
{
"id": "7eb7a9b4da",
"nodePath": "0-5-5-1-1-3-3",
Expand All @@ -643,6 +666,6 @@
395
],
"htmlNode": null,
"indexId": "27"
"indexId": "28"
}
]
5 changes: 2 additions & 3 deletions packages/midscene/tests/ai/inspector/todo_inspector.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ import { AiInspectElement } from '@/ai-model';
import { useCozeModel } from '@/ai-model/coze';
import { AiAssert } from '@/ai-model/inspect';
import { expect, it } from 'vitest';
import { repeatTime } from '../util';
import {
getPageTestData,
repeat,
Expand Down Expand Up @@ -39,8 +40,6 @@ if (useCozeModel('coze')) {
modelList.push('coze');
}

const repeatTime = process.env.GITHUB_ACTIONS ? 1 : 2;

modelList.forEach((model) => {
repeat(repeatTime, (repeatIndex) => {
it(
Expand Down Expand Up @@ -82,7 +81,7 @@ modelList.forEach((model) => {
);
});

repeat(2, () => {
repeat(repeatTime, () => {
it(
`todo: assert ${model}`,
async () => {
Expand Down
2 changes: 2 additions & 0 deletions packages/midscene/tests/ai/util.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,5 @@ export function makePlanResultStable(plans: PlanningAction[]) {
export const modelList: Array<'openAI' | 'coze'> = useCozeModel('coze')
? ['openAI', 'coze']
: ['openAI'];

export const repeatTime = process.env.GITHUB_ACTIONS ? 2 : 6;
2 changes: 1 addition & 1 deletion packages/web-integration/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@
},
"files": ["dist", "README.md"],
"dependencies": {
"openai": "4.47.1",
"openai": "4.57.1",
"inquirer": "10.1.5",
"@midscene/core": "workspace:*",
"@midscene/shared": "workspace:*",
Expand Down
2 changes: 1 addition & 1 deletion packages/web-integration/playwright.config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ dotenv.config({
*/
export default defineConfig({
// testDir: './tests/ai/e2e',
// testIgnore: 'generate-test-data.spec.ts',
testIgnore: 'generate-test-data.spec.ts',
timeout: 900 * 1000,
/* Run tests in files in parallel */
fullyParallel: true,
Expand Down
Loading

0 comments on commit c5077a2

Please sign in to comment.