diff --git a/pyproject.toml b/pyproject.toml index 56d4bed..ccb1c22 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,9 @@ +[project] +name = "scrapybara" + [tool.poetry] name = "scrapybara" -version = "2.3.0" +version = "2.3.1" description = "" readme = "README.md" authors = [] diff --git a/src/scrapybara/anthropic/__init__.py b/src/scrapybara/anthropic/__init__.py index 7c54d8e..8732edb 100644 --- a/src/scrapybara/anthropic/__init__.py +++ b/src/scrapybara/anthropic/__init__.py @@ -3,6 +3,7 @@ from pydantic import Field from ..types.act import Model +from datetime import datetime class Anthropic(Model): @@ -29,3 +30,93 @@ def __init__( api_key: Optional[str] = None, ) -> None: super().__init__(provider="anthropic", name=name, api_key=api_key) + + +UBUNTU_SYSTEM_PROMPT = f""" +* You have access to an Ubuntu VM with internet connectivity +* You can install Ubuntu applications using the bash tool (use curl over wget) +* To run GUI applications with the bash tool, use a subshell, e.g. "(DISPLAY=:1 xterm &)", make sure to include the parantheses +* GUI apps will appear but may take time to load - confirm with an extra screenshot +* Chromium is the default browser +* Start Chromium via the bash tool "(DISPLAY=:1 chromium &)", but interact with it visually via the computer tool +* If you need to read a HTML file: + - Open with the address bar in Chromium +* For commands with large text output: + - Redirect to a temp file + - Use str_replace_editor or grep with context (-B and -A flags) to view output +* When viewing pages: + - Zoom out to see full content, or + - Scroll to ensure you see everything +* When interacting with a field, always clear the field first using "ctrl+A" and "delete" + - Take an extra screenshot after clicking "enter" to confirm the field is properly submitted and move the mouse to the next field +* Computer function calls take time, string together calls when possible +* You are allowed to take actions on behalf of the user on sites that are authenticated +* If the user asks you to access a site, assume that the user has already authenticated +* To login additional sites, ask the user to use Auth Contexts or the Interactive Desktop +* If first screenshot shows black screen: + - Click mouse in screen center + - Take another screenshot +* Today's date is {datetime.today().strftime('%A, %B %-d, %Y')} + + + +* If given a complex task, break down into smaller steps and ask the user for details only if necessary +* Read through web pages thoroughly by scrolling down till you have gathered enough info +* Be concise! +""" +"""Recommended Anthropic system prompt for Ubuntu instances""" + + +BROWSER_SYSTEM_PROMPT = f""" +* You have access to a Chromium VM with internet connectivity +* Chromium should already be open and running +* You can interact with web pages using the computer tool +* When viewing pages: + - Zoom out to see full content, or + - Scroll to ensure you see everything +* When interacting with a field, always clear the field first using "ctrl+A" and "delete" + - Take an extra screenshot after clicking "enter" to confirm the field is properly submitted and move the mouse to the next field +* Computer function calls take time, string together calls when possible +* You are allowed to take actions on behalf of the user on sites that are authenticated +* If the user asks you to access a site, assume that the user has already authenticated +* To login additional sites, ask the user to use Auth Contexts +* If first screenshot shows black screen: + - Click mouse in screen center + - Take another screenshot +* Today's date is {datetime.today().strftime('%A, %B %-d, %Y')} + + + +* If given a complex task, break down into smaller steps and ask the user for details only if necessary +* Read through web pages thoroughly by scrolling down till you have gathered enough info +* Be concise! +""" +"""Recommended Anthropic system prompt for Browser instances""" + + +WINDOWS_SYSTEM_PROMPT = f""" +* You wave access to a Windows VM with internet connectivity +* You can interact with the Windows desktop using the computer tool +* GUI apps will appear but may take time to load - confirm with an extra screenshot +* Edge is the default browser +* When viewing pages: + - Zoom out to see full content, or + - Scroll to ensure you see everything +* When interacting with a field, always clear the field first using "ctrl+A" and "delete" + - Take an extra screenshot after clicking "enter" to confirm the field is properly submitted and move the mouse to the next field +* Computer function calls take time, string together calls when possible +* You are allowed to take actions on behalf of the user on sites that are authenticated +* If the user asks you to access a site, assume that the user has already authenticated +* To login additional sites, ask the user to use Auth Contexts or the Interactive Desktop +* If first screenshot shows black screen: + - Click mouse in screen center + - Take another screenshot +* Today's date is {datetime.today().strftime('%A, %B %-d, %Y')} + + + +* If given a complex task, break down into smaller steps and ask the user for details only if necessary +* Read through web pages thoroughly by scrolling down till you have gathered enough info +* Be concise! +""" +"""Recommended Anthropic system prompt for Windows instances""" diff --git a/src/scrapybara/core/client_wrapper.py b/src/scrapybara/core/client_wrapper.py index 1c86f52..eb85936 100644 --- a/src/scrapybara/core/client_wrapper.py +++ b/src/scrapybara/core/client_wrapper.py @@ -16,7 +16,7 @@ def get_headers(self) -> typing.Dict[str, str]: headers: typing.Dict[str, str] = { "X-Fern-Language": "Python", "X-Fern-SDK-Name": "scrapybara", - "X-Fern-SDK-Version": "2.3.0", + "X-Fern-SDK-Version": "2.3.1", } headers["x-api-key"] = self.api_key return headers diff --git a/src/scrapybara/prompts/__init__.py b/src/scrapybara/prompts/__init__.py index e1cdf6e..a8d6108 100644 --- a/src/scrapybara/prompts/__init__.py +++ b/src/scrapybara/prompts/__init__.py @@ -1,16 +1,12 @@ from datetime import datetime UBUNTU_SYSTEM_PROMPT = f""" -* You have access to an Ubuntu virtual machine with internet connectivity +* You have access to an Ubuntu VM with internet connectivity * You can install Ubuntu applications using the bash tool (use curl over wget) * To run GUI applications with the bash tool, use a subshell, e.g. "(DISPLAY=:1 xterm &)", make sure to include the parantheses * GUI apps will appear but may take time to load - confirm with an extra screenshot * Chromium is the default browser * Start Chromium via the bash tool "(DISPLAY=:1 chromium &)", but interact with it visually via the computer tool -* If you need to read a full PDF after initial screenshot - - Download with curl - - Convert to text using pdftotext - - Read the text file with StrReplaceEditTool * If you need to read a HTML file: - Open with the address bar in Chromium * For commands with large text output: @@ -33,13 +29,14 @@ * If given a complex task, break down into smaller steps and ask the user for details only if necessary -* Read through web pages thoroughly by scrolling down till the end +* Read through web pages thoroughly by scrolling down till you have gathered enough info * Be concise! """ +"""DEPRECATED — Please import prompts from their respective models instead: `from scrapybara.anthropic import UBUNTU_SYSTEM_PROMPT`""" BROWSER_SYSTEM_PROMPT = f""" -* You have access to a Chromium browser instance with internet connectivity +* You have access to a Chromium VM with internet connectivity * Chromium should already be open and running * You can interact with web pages using the computer tool * When viewing pages: @@ -59,13 +56,14 @@ * If given a complex task, break down into smaller steps and ask the user for details only if necessary -* Read through web pages thoroughly by scrolling down till the end +* Read through web pages thoroughly by scrolling down till you have gathered enough info * Be concise! """ +"""DEPRECATED — Please import prompts from their respective models instead: `from scrapybara.anthropic import BROWSER_SYSTEM_PROMPT`""" WINDOWS_SYSTEM_PROMPT = f""" -* You have access to a Windows virtual machine with internet connectivity +* You have access to a Windows VM with internet connectivity * You can interact with the Windows desktop using the computer tool * GUI apps will appear but may take time to load - confirm with an extra screenshot * Edge is the default browser @@ -86,6 +84,7 @@ * If given a complex task, break down into smaller steps and ask the user for details only if necessary -* Read through web pages thoroughly by scrolling down till the end +* Read through web pages thoroughly by scrolling down till you have gathered enough info * Be concise! """ +"""DEPRECATED — Please import prompts from their respective models instead: `from scrapybara.anthropic import WINDOWS_SYSTEM_PROMPT`""" diff --git a/tests/custom/test_client.py b/tests/custom/test_client.py index 894c531..78e296f 100644 --- a/tests/custom/test_client.py +++ b/tests/custom/test_client.py @@ -3,10 +3,10 @@ import os import pytest -from scrapybara.anthropic import Anthropic -from scrapybara.prompts import ( - BROWSER_SYSTEM_PROMPT, +from scrapybara.anthropic import ( + Anthropic, UBUNTU_SYSTEM_PROMPT, + BROWSER_SYSTEM_PROMPT, WINDOWS_SYSTEM_PROMPT, ) from scrapybara.tools import BashTool, ComputerTool, EditTool