From 32f8ccc3d29ef59330bf0270721bbb82b0654555 Mon Sep 17 00:00:00 2001 From: Brandon Fergerson Date: Wed, 23 Oct 2024 10:16:20 -0300 Subject: [PATCH] feat: improved computer use functionality --- src/tools/attach_file.md | 62 +++++++++++++++++++++++ src/tools/click_button.md | 69 +++++++++++++++++++++++++ src/tools/get_available_files.md | 24 +++++++++ src/tools/get_form_ids.md | 52 +++++++++++++------ src/tools/input_form.md | 86 ++++++++++++++++++++++++-------- 5 files changed, 257 insertions(+), 36 deletions(-) create mode 100644 src/tools/attach_file.md create mode 100644 src/tools/click_button.md create mode 100644 src/tools/get_available_files.md diff --git a/src/tools/attach_file.md b/src/tools/attach_file.md new file mode 100644 index 0000000..e517bb5 --- /dev/null +++ b/src/tools/attach_file.md @@ -0,0 +1,62 @@ +```yaml +type: function +function: + name: attach_file + description: Attaches a file to a file input element on the page. + parameters: + type: object + properties: + file_location: + type: string + description: The location of the file to attach + form_id: + type: string + description: The form id to attach the file to + required: + - file_location + - form_id + exec: + language: javascript + execute_on_all_frames: true + trigger_response: true +``` + +```javascript +console.log("Attaching file to form: {{ form_id }} - Frame: {{ voqal_frame_id }}"); +let element = document.getElementById("{{ form_id }}"); +console.log({element, form_id: "{{ form_id }}"}); + +if (element == null) { + console.error("Element not found - Frame: {{ voqal_frame_id }}"); + return; +} + +let value = "{{ file_location }}"; +console.log("File: " + value); + +const selfHost = window.location.origin; +const storageLocation = selfHost + "/{{ voqal_storage_uuid }}/" + value; +console.log("Storage Location: " + storageLocation); + +loadFileToInput(storageLocation, element); + +async function loadFileToInput(url, fileInput) { + try { + const response = await fetch(url); + const fileContent = await response.blob(); + const urlSegments = url.split('/'); + const fileName = urlSegments[urlSegments.length - 1]; + const myFile = new File([fileContent], fileName); + + const dataTransfer = new DataTransfer(); + dataTransfer.items.add(myFile); + + fileInput.files = dataTransfer.files; + console.log('File loaded and set in input:', fileInput.files); + + fileInput.dispatchEvent(new Event('change', { bubbles: true })); + } catch (error) { + console.error('Error loading file:', error); + } +} +``` \ No newline at end of file diff --git a/src/tools/click_button.md b/src/tools/click_button.md new file mode 100644 index 0000000..3a22a13 --- /dev/null +++ b/src/tools/click_button.md @@ -0,0 +1,69 @@ +```yaml +type: function +function: + name: click_button + description: Click a button on the current page. Use get_button_ids first to get the button coordinates. + parameters: + type: object + properties: + x_coord: + type: number + description: The x coordinate of the button + y_coord: + type: number + description: The y coordinate of the button + required: + - x_coord + - y_coord + exec: kotlin +``` + +```kotlin +import java.awt.MouseInfo +import java.awt.Robot +import java.awt.event.InputEvent +import kotlin.math.atan2 +import kotlin.math.cos +import kotlin.math.sin + +fun moveMouseSmoothly(targetX: Int, targetY: Int, steps: Int = 75, delay: Long = 5L) { + val robot = Robot() + val startX = MouseInfo.getPointerInfo().location.x + val startY = MouseInfo.getPointerInfo().location.y + + val deltaX = targetX - startX + val deltaY = targetY - startY + val distance = Math.hypot(deltaX.toDouble(), deltaY.toDouble()) + if (distance == 0.0) return + + val stepSize = distance / steps + val angle = atan2(deltaY.toDouble(), deltaX.toDouble()) + + for (i in 0 until steps) { + val x = startX + (i * stepSize * cos(angle)).toInt() + val y = startY + (i * stepSize * sin(angle)).toInt() + robot.mouseMove(x, y) + Thread.sleep(delay) + } + + robot.mouseMove(targetX, targetY) +} + +fun clickMouse(button: Int = 1) { + val robot = Robot() + + val buttonMask = when (button) { + 1 -> InputEvent.BUTTON1_DOWN_MASK // Left button + 2 -> InputEvent.BUTTON2_DOWN_MASK // Middle button + 3 -> InputEvent.BUTTON3_DOWN_MASK // Right button + else -> throw IllegalArgumentException("Invalid mouse button: $button") + } + + robot.mousePress(buttonMask) + robot.mouseRelease(buttonMask) +} + +val browserCoords = browser.uiComponent.getLocationOnScreen() +moveMouseSmoothly(x_coord.toInt() + browserCoords.x, y_coord.toInt() + browserCoords.y) +clickMouse() +``` \ No newline at end of file diff --git a/src/tools/get_available_files.md b/src/tools/get_available_files.md new file mode 100644 index 0000000..f41496b --- /dev/null +++ b/src/tools/get_available_files.md @@ -0,0 +1,24 @@ +```yaml +type: function +function: + name: get_available_files + description: Returns a list of the files available in the local storage. This is how you get files from the "storage". + exec: + language: kotlin + manual_confirm: true +``` + +```kotlin +import java.io.File + +val storageLocation = File(installDir, "storage") +val files = storageLocation.listFiles() +contextManager.confirmFinished( + mapOf( + "respId" to voqal_resp_id, + "data" to mapOf( + "files" to files.map { it.name } + ) + ) +) +``` diff --git a/src/tools/get_form_ids.md b/src/tools/get_form_ids.md index 5ff2ce4..8dcc18e 100644 --- a/src/tools/get_form_ids.md +++ b/src/tools/get_form_ids.md @@ -2,44 +2,64 @@ type: function function: name: get_form_ids - description: Returns a list of the form ids on the current page. Useful as a precursor to input_form. + description: Returns a list of the form ids on the current page. Useful as a precursor to input_form/click_button. exec: language: javascript manual_confirm: true + execute_on_all_frames: true ``` ```javascript function getInputDescriptions() { - const elementDescriptionPairs = []; - - document.querySelectorAll('input, textarea').forEach(element => { - const type = element.type || 'unknown'; - if (type === 'submit' || type === 'button' || type === 'hidden') { - return; - } - + const elementDescriptions = []; + document.querySelectorAll('input, textarea, button').forEach(element => { const description = getElementDescription(element); if (description == null) { return; } - - elementDescriptionPairs.push({element, description, id: element.id}); + elementDescriptions.push({description}); }); - return elementDescriptionPairs; + return elementDescriptions; } function getElementDescription(el) { - let label = document.querySelector(`label[for="${el.id}"]`); + const type = el.type || 'unknown'; + let description = { + type: type, + label: '', + elementId: getId(el), + elementName: el.name || '', + type: type, + label: '', + elementName: el.name || '', + innerText: el.innerText || '', + hidden: el.hidden, + visible: el.offsetParent !== null, + position: getPosition(el) + }; + + const label = document.querySelector(`label[for="${el.id}"]`); if (label) { - label = label.textContent.trim(); + description.label = label.textContent.trim(); } const ariaLabel = el.getAttribute('aria-label'); if (ariaLabel) { - label = ariaLabel; + description.label = ariaLabel; } - return label; + return description; +} + +function getId(node) { + return (node.id) ? node.id : (node.id = 'voqal_' + crypto.randomUUID()); +} + +function getPosition(el) { + const rect = el.getBoundingClientRect(); + const x = rect.left + rect.width / 2; + const y = rect.top + rect.height / 2; + return {x, y}; } const resp = { diff --git a/src/tools/input_form.md b/src/tools/input_form.md index 580aede..9dd7ef7 100644 --- a/src/tools/input_form.md +++ b/src/tools/input_form.md @@ -2,41 +2,87 @@ type: function function: name: input_form - description: Types the given text in the form of the given form id. + description: Types the given text in the forms based on the provided form IDs and texts. parameters: type: object properties: - form_id: - type: string - description: The form id to type text into - text: - type: string - description: The text to type + inputs: + type: array + description: A list of form inputs, each containing a form ID and the corresponding text to type. + items: + type: object + properties: + form_id: + type: string + description: The form ID to type text into. + text: + type: string + description: The text to type into the form. + required: + - form_id + - text required: - - form_id - - text - exec: javascript + - inputs + exec: + language: javascript + manual_confirm: true + trigger_response: true + execute_on_all_frames: true ``` ```javascript -const element = getTypableElementById("{{ form_id }}"); -if (element) { - element.value = "{{ text }}"; -} else { - alert('Element with id ' + "{{ form_id }}" + ' not found.'); +const inputsArray = {{ context }}.inputs; +for (const input of inputsArray) { + const element = getTypableElementById(input.form_id); + if (element) { + scrollToElement(element); + + element.value = ""; + typeText(element, input.text); + } else { + console.log('Element with id ' + input.form_id + ' not found.'); + } } +const resp = { + respId: "{{ voqal_resp_id }}", + data: [{ + status: "success", + }] +} +window.cefQuery({ + request: JSON.stringify(resp), + onSuccess: function (response) { + resolve(response); + } +}); + function getTypableElementById(id) { - // Get all elements with the specified ID const elements = document.querySelectorAll(`#${id}`); - - // Iterate through the NodeList and return the first typable element for (let element of elements) { if (element.tagName === "INPUT" || element.tagName === "TEXTAREA") { - return element; // Return the first typable element found + return element; } } + return null; +} - return null; // Return null if no typable element is found +function scrollToElement(element) { + element.scrollIntoView({ + behavior: "smooth", + block: "center" + }); +} + +function typeText(element, text, delay = 20) { + let currentIndex = 0; + const interval = setInterval(() => { + element.value += text[currentIndex]; + currentIndex++; + if (currentIndex === text.length) { + clearInterval(interval); + element.dispatchEvent(new Event("input", { bubbles: true })); + } + }, delay); } ``` \ No newline at end of file