Skip to content

Commit

Permalink
feat: improved computer use functionality
Browse files Browse the repository at this point in the history
  • Loading branch information
BFergerson committed Oct 24, 2024
1 parent 87af569 commit 32f8ccc
Show file tree
Hide file tree
Showing 5 changed files with 257 additions and 36 deletions.
62 changes: 62 additions & 0 deletions src/tools/attach_file.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
```yaml
type: function
function:
name: attach_file
description: Attaches a file to a file input element on the page.
parameters:
type: object
properties:
file_location:
type: string
description: The location of the file to attach
form_id:
type: string
description: The form id to attach the file to
required:
- file_location
- form_id
exec:
language: javascript
execute_on_all_frames: true
trigger_response: true
```
```javascript
console.log("Attaching file to form: {{ form_id }} - Frame: {{ voqal_frame_id }}");
let element = document.getElementById("{{ form_id }}");
console.log({element, form_id: "{{ form_id }}"});

if (element == null) {
console.error("Element not found - Frame: {{ voqal_frame_id }}");
return;
}

let value = "{{ file_location }}";
console.log("File: " + value);
const selfHost = window.location.origin;
const storageLocation = selfHost + "/{{ voqal_storage_uuid }}/" + value;
console.log("Storage Location: " + storageLocation);
loadFileToInput(storageLocation, element);
async function loadFileToInput(url, fileInput) {
try {
const response = await fetch(url);
const fileContent = await response.blob();
const urlSegments = url.split('/');
const fileName = urlSegments[urlSegments.length - 1];
const myFile = new File([fileContent], fileName);
const dataTransfer = new DataTransfer();
dataTransfer.items.add(myFile);
fileInput.files = dataTransfer.files;
console.log('File loaded and set in input:', fileInput.files);
fileInput.dispatchEvent(new Event('change', { bubbles: true }));
} catch (error) {
console.error('Error loading file:', error);
}
}
```
69 changes: 69 additions & 0 deletions src/tools/click_button.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
```yaml
type: function
function:
name: click_button
description: Click a button on the current page. Use get_button_ids first to get the button coordinates.
parameters:
type: object
properties:
x_coord:
type: number
description: The x coordinate of the button
y_coord:
type: number
description: The y coordinate of the button
required:
- x_coord
- y_coord
exec: kotlin
```
```kotlin
import java.awt.MouseInfo
import java.awt.Robot
import java.awt.event.InputEvent
import kotlin.math.atan2
import kotlin.math.cos
import kotlin.math.sin

fun moveMouseSmoothly(targetX: Int, targetY: Int, steps: Int = 75, delay: Long = 5L) {
val robot = Robot()
val startX = MouseInfo.getPointerInfo().location.x
val startY = MouseInfo.getPointerInfo().location.y

val deltaX = targetX - startX
val deltaY = targetY - startY
val distance = Math.hypot(deltaX.toDouble(), deltaY.toDouble())
if (distance == 0.0) return

val stepSize = distance / steps
val angle = atan2(deltaY.toDouble(), deltaX.toDouble())

for (i in 0 until steps) {
val x = startX + (i * stepSize * cos(angle)).toInt()
val y = startY + (i * stepSize * sin(angle)).toInt()
robot.mouseMove(x, y)
Thread.sleep(delay)
}

robot.mouseMove(targetX, targetY)
}

fun clickMouse(button: Int = 1) {
val robot = Robot()

val buttonMask = when (button) {
1 -> InputEvent.BUTTON1_DOWN_MASK // Left button
2 -> InputEvent.BUTTON2_DOWN_MASK // Middle button
3 -> InputEvent.BUTTON3_DOWN_MASK // Right button
else -> throw IllegalArgumentException("Invalid mouse button: $button")
}

robot.mousePress(buttonMask)
robot.mouseRelease(buttonMask)
}

val browserCoords = browser.uiComponent.getLocationOnScreen()
moveMouseSmoothly(x_coord.toInt() + browserCoords.x, y_coord.toInt() + browserCoords.y)
clickMouse()
```
24 changes: 24 additions & 0 deletions src/tools/get_available_files.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
```yaml
type: function
function:
name: get_available_files
description: Returns a list of the files available in the local storage. This is how you get files from the "storage".
exec:
language: kotlin
manual_confirm: true
```
```kotlin
import java.io.File

val storageLocation = File(installDir, "storage")
val files = storageLocation.listFiles()
contextManager.confirmFinished(
mapOf(
"respId" to voqal_resp_id,
"data" to mapOf(
"files" to files.map { it.name }
)
)
)
```
52 changes: 36 additions & 16 deletions src/tools/get_form_ids.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,44 +2,64 @@
type: function
function:
name: get_form_ids
description: Returns a list of the form ids on the current page. Useful as a precursor to input_form.
description: Returns a list of the form ids on the current page. Useful as a precursor to input_form/click_button.
exec:
language: javascript
manual_confirm: true
execute_on_all_frames: true
```
```javascript
function getInputDescriptions() {
const elementDescriptionPairs = [];

document.querySelectorAll('input, textarea').forEach(element => {
const type = element.type || 'unknown';
if (type === 'submit' || type === 'button' || type === 'hidden') {
return;
}

const elementDescriptions = [];
document.querySelectorAll('input, textarea, button').forEach(element => {
const description = getElementDescription(element);
if (description == null) {
return;
}

elementDescriptionPairs.push({element, description, id: element.id});
elementDescriptions.push({description});
});

return elementDescriptionPairs;
return elementDescriptions;
}

function getElementDescription(el) {
let label = document.querySelector(`label[for="${el.id}"]`);
const type = el.type || 'unknown';
let description = {
type: type,
label: '',
elementId: getId(el),
elementName: el.name || '',
type: type,
label: '',
elementName: el.name || '',
innerText: el.innerText || '',
hidden: el.hidden,
visible: el.offsetParent !== null,
position: getPosition(el)
};

const label = document.querySelector(`label[for="${el.id}"]`);
if (label) {
label = label.textContent.trim();
description.label = label.textContent.trim();
}
const ariaLabel = el.getAttribute('aria-label');
if (ariaLabel) {
label = ariaLabel;
description.label = ariaLabel;
}

return label;
return description;
}

function getId(node) {
return (node.id) ? node.id : (node.id = 'voqal_' + crypto.randomUUID());
}

function getPosition(el) {
const rect = el.getBoundingClientRect();
const x = rect.left + rect.width / 2;
const y = rect.top + rect.height / 2;
return {x, y};
}

const resp = {
Expand Down
86 changes: 66 additions & 20 deletions src/tools/input_form.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,41 +2,87 @@
type: function
function:
name: input_form
description: Types the given text in the form of the given form id.
description: Types the given text in the forms based on the provided form IDs and texts.
parameters:
type: object
properties:
form_id:
type: string
description: The form id to type text into
text:
type: string
description: The text to type
inputs:
type: array
description: A list of form inputs, each containing a form ID and the corresponding text to type.
items:
type: object
properties:
form_id:
type: string
description: The form ID to type text into.
text:
type: string
description: The text to type into the form.
required:
- form_id
- text
required:
- form_id
- text
exec: javascript
- inputs
exec:
language: javascript
manual_confirm: true
trigger_response: true
execute_on_all_frames: true
```
```javascript
const element = getTypableElementById("{{ form_id }}");
if (element) {
element.value = "{{ text }}";
} else {
alert('Element with id ' + "{{ form_id }}" + ' not found.');
const inputsArray = {{ context }}.inputs;
for (const input of inputsArray) {
const element = getTypableElementById(input.form_id);
if (element) {
scrollToElement(element);

element.value = "";
typeText(element, input.text);
} else {
console.log('Element with id ' + input.form_id + ' not found.');
}
}

const resp = {
respId: "{{ voqal_resp_id }}",
data: [{
status: "success",
}]
}
window.cefQuery({
request: JSON.stringify(resp),
onSuccess: function (response) {
resolve(response);
}
});

function getTypableElementById(id) {
// Get all elements with the specified ID
const elements = document.querySelectorAll(`#${id}`);

// Iterate through the NodeList and return the first typable element
for (let element of elements) {
if (element.tagName === "INPUT" || element.tagName === "TEXTAREA") {
return element; // Return the first typable element found
return element;
}
}
return null;
}

return null; // Return null if no typable element is found
function scrollToElement(element) {
element.scrollIntoView({
behavior: "smooth",
block: "center"
});
}

function typeText(element, text, delay = 20) {
let currentIndex = 0;
const interval = setInterval(() => {
element.value += text[currentIndex];
currentIndex++;
if (currentIndex === text.length) {
clearInterval(interval);
element.dispatchEvent(new Event("input", { bubbles: true }));
}
}, delay);
}
```

0 comments on commit 32f8ccc

Please sign in to comment.