Skip to content

Commit 38c844c

Browse files
authored
fix(extract-data): resolve match multiple element (#280)
1 parent afd0934 commit 38c844c

File tree

4 files changed

+35
-5
lines changed

4 files changed

+35
-5
lines changed

packages/midscene/src/ai-model/prompt/ui-tars-planning.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ export function parseActionFromVlm(
5858
if (mode === 'bc') {
5959
// Parse thought/reflection based on different text patterns
6060
if (text.startsWith('Thought:')) {
61-
const thoughtMatch = text.match(/Thought: (.+?)(?=\s*Action:|$)/);
61+
const thoughtMatch = text.match(/Thought: (.+?)(?=\s*Action:|$)/s);
6262
if (thoughtMatch) {
6363
thought = thoughtMatch[1].trim();
6464
}

packages/midscene/src/ai-model/prompt/util.ts

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -209,15 +209,25 @@ export function elementByPositionWithElementInfo(
209209
},
210210
) {
211211
assert(typeof position !== 'undefined', 'position is required for query');
212-
const item = elementsInfo.find((item) => {
212+
const matchingElements = elementsInfo.filter((item) => {
213213
return (
214214
item.rect.left <= position.x &&
215215
position.x <= item.rect.left + item.rect.width &&
216216
item.rect.top <= position.y &&
217217
position.y <= item.rect.top + item.rect.height
218218
);
219219
});
220-
return item;
220+
221+
if (matchingElements.length === 0) {
222+
return undefined;
223+
}
224+
225+
// Find the smallest element by area
226+
return matchingElements.reduce((smallest, current) => {
227+
const smallestArea = smallest.rect.width * smallest.rect.height;
228+
const currentArea = current.rect.width * current.rect.height;
229+
return currentArea < smallestArea ? current : smallest;
230+
});
221231
}
222232

223233
export const samplePageDescription = `

packages/midscene/tests/ai/parse-action.test.ts

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ import { describe, expect, it } from 'vitest';
77
describe('parse action from vlm', () => {
88
it('should parse action with Thought format', () => {
99
const text = `Thought: 点击登录按钮
10-
Action: click(start_box='(200,300,400,500)')`;
10+
Action: click(start_box='(200,300,400,500)')`;
1111

1212
const actions = parseActionFromVlm(text);
1313
expect(actions).toHaveLength(1);
@@ -21,6 +21,25 @@ Action: click(start_box='(200,300,400,500)')`;
2121
});
2222
});
2323

24+
it('should parse action with Thought format2', () => {
25+
const text = `Thought: To proceed with the task of opening Twitter and posting a tweet, I need to first access the Google search page. The highlighted "Google 搜索" button is the appropriate element to interact with, as it will allow me to search for Twitter and navigate to its website.
26+
Click on the "Google 搜索" button to initiate a search for Twitter.
27+
Action: click(start_box='(460,452)')`;
28+
29+
const actions = parseActionFromVlm(text);
30+
expect(actions).toHaveLength(1);
31+
expect(actions[0]).toEqual({
32+
reflection: null,
33+
thought:
34+
'To proceed with the task of opening Twitter and posting a tweet, I need to first access the Google search page. The highlighted "Google 搜索" button is the appropriate element to interact with, as it will allow me to search for Twitter and navigate to its website.\n Click on the "Google 搜索" button to initiate a search for Twitter.',
35+
action_type: 'click',
36+
action_inputs: {
37+
start_box: '[0.46,0.452,0.46,0.452]',
38+
},
39+
});
40+
});
41+
//
42+
2443
it('should parse action with Reflection format', () => {
2544
const text = `Action_Summary: 输入用户名
2645
Action: type(content='username')`;

packages/midscene/tsconfig.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,8 @@
1616
"rootDir": ".",
1717
"skipLibCheck": true,
1818
"strict": true,
19-
"module": "ESNext"
19+
"module": "ESNext",
20+
"target": "es2018"
2021
},
2122
"exclude": ["**/node_modules"],
2223
"include": ["src", "tests", "report"]

0 commit comments

Comments
 (0)