diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index c59fa521..b6aaac22 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -18,12 +18,6 @@ repos: rev: v3.1.0 hooks: - id: add-trailing-comma - # alternative: isort - # optional comments: # noreorder - - repo: https://github.com/asottile/reorder-python-imports - rev: v3.14.0 - hooks: - - id: reorder-python-imports - repo: https://github.com/astral-sh/ruff-pre-commit # Ruff version. rev: v0.8.2 diff --git a/choreographer/DIR_INDEX.txt b/choreographer/DIR_INDEX.txt new file mode 100644 index 00000000..709f3915 --- /dev/null +++ b/choreographer/DIR_INDEX.txt @@ -0,0 +1,31 @@ +Files +----- + +_browser.py + +A behemoth: contains process control, future storage, and user's primary interface. + +_cli_utils.py + +Functions to be used as scripts from commandline. + +_pipe.py + +The communication layer between choreographer and the browser. + +_tab.py + +Also part of user's primary interface. Intuitive, the user interacts with a "Browser" +which has "Tabs". + +Directories +----------- + +_devtools_protocol_layer/ + +The browser-tab interface is intuitive, but here we have the interface that Chrome's +Devtools Protocol actually provides. + +_system_utils/ + +Some utilities we use to encourage cross-platform compatibility. diff --git a/choreographer/__init__.py b/choreographer/__init__.py index d133b772..6c5fa1a4 100644 --- a/choreographer/__init__.py +++ b/choreographer/__init__.py @@ -1,17 +1,24 @@ -from .browser import Browser -from .browser import browser_which -from .browser import get_browser_path -from .cli_utils import get_browser -from .cli_utils import get_browser_sync -from .tempfile import TempDirectory -from .tempfile import TempDirWarning +"""choreographer is a browser controller for python.""" + +import choreographer._devtools_protocol_layer as protocol + +from ._browser import Browser, BrowserClosedError, browser_which, get_browser_path +from ._cli_utils import get_browser, get_browser_sync +from ._pipe import BlockWarning, PipeClosedError +from ._system_utils._tempfile import TempDirectory, TempDirWarning +from ._tab import Tab __all__ = [ - Browser, - get_browser, - get_browser_sync, - browser_which, - get_browser_path, - TempDirectory, - TempDirWarning, + "BlockWarning", + "Browser", + "BrowserClosedError", + "PipeClosedError", + "Tab", + "TempDirWarning", + "TempDirectory", + "browser_which", + "get_browser", + "get_browser_path", + "get_browser_sync", + "protocol", ] diff --git a/choreographer/browser.py b/choreographer/_browser.py similarity index 83% rename from choreographer/browser.py rename to choreographer/_browser.py index 5f797661..69c8997c 100644 --- a/choreographer/browser.py +++ b/choreographer/_browser.py @@ -7,20 +7,27 @@ import sys import warnings from collections import OrderedDict +from functools import partial +from pathlib import Path from threading import Thread -from .pipe import Pipe -from .pipe import PipeClosedError -from .protocol import DevtoolsProtocolError -from .protocol import ExperimentalFeatureWarning -from .protocol import Protocol -from .protocol import TARGET_NOT_FOUND -from .session import Session -from .system import browser_which -from .tab import Tab -from .target import Target -from .tempfile import TempDirectory -from .tempfile import TempDirWarning +from ._devtools_protocol_layer._protocol import ( + TARGET_NOT_FOUND, + DevtoolsProtocolError, + ExperimentalFeatureWarning, + Protocol, +) +from ._devtools_protocol_layer._session import Session +from ._devtools_protocol_layer._target import Target +from ._pipe import Pipe, PipeClosedError +from ._system_utils._system import browser_which +from ._system_utils._tempfile import TempDirectory, TempDirWarning +from ._tab import Tab + +# importing the below via __file__ causes __name__ weirdness when its exe'd ??? +chromewrapper_path = ( + Path(__file__).resolve().parent / "_system_utils" / "_chrome_wrapper.py" +) class UnhandledMessageWarning(UserWarning): @@ -55,9 +62,10 @@ def _check_loop(self): print("We are in a selector event loop, use loop_hack", file=sys.stderr) self._loop_hack = True - def __init__( + def __init__( # noqa: PLR0915, PLR0912, C901 It's too complex self, path=None, + *, headless=True, debug=False, debug_browser=False, @@ -66,7 +74,7 @@ def __init__( ### Set some defaults self._env = os.environ.copy() # environment for subprocesses self._loop_hack = False # see _check_loop - self.lock = None # TODO where else is this set + self.lock = None self.tabs = OrderedDict() self.sandboxed = False # this is if our processes can't use /tmp @@ -81,7 +89,10 @@ def __init__( path = get_browser_path(skip_local=skip_local) if not path: raise BrowserFailedError( - "Could not find an acceptable browser. Please call `choreo.get_browser()`, set environmental variable BROWSER_PATH or pass `path=/path/to/browser` into the Browser() constructor. The latter two work with Edge.", + "Could not find an acceptable browser. Please call " + "`choreo.get_browser()`, set environmental variable " + "BROWSER_PATH or pass `path=/path/to/browser` into " + "the Browser() constructor. The latter two work with Edge.", ) if "snap" in str(path): self.sandboxed = True # not chromium sandbox, snap sandbox @@ -100,7 +111,7 @@ def __init__( try: self.loop = kwargs.pop("loop", asyncio.get_running_loop()) - except Exception: + except RuntimeError: self.loop = False if self.loop: self.futures = {} @@ -126,8 +137,12 @@ def __init__( try: stderr.fileno() except io.UnsupportedOperation: - warnings.warn( - "A value has been passed to debug_browser which is not compatible with python's Popen. This may be because one was passed to Browser or because sys.stderr has been overridden by a framework. Browser logs will not be handled by python in this case.", + warnings.warn( # noqa: B028 + "A value has been passed to debug_browser which " + "is not compatible with python's Popen. This may " + "be because one was passed to Browser or because " + "sys.stderr has been overridden by a framework. " + "Browser logs will not be handled by python in this case.", ) stderr = None @@ -164,19 +179,17 @@ def __enter__(self): return self # for use with `await Browser()` - # TODO: why have to call __await__ when __aenter__ returns a future def __await__(self): return self.__aenter__().__await__() + # ? why have to call __await__ when __aenter__ returns a future + def _open(self): if platform.system() != "Windows": - self.subprocess = subprocess.Popen( + self.subprocess = subprocess.Popen( # noqa: S603, false positive, input fine [ sys.executable, - os.path.join( - os.path.dirname(os.path.realpath(__file__)), - "chrome_wrapper.py", - ), + chromewrapper_path, ], close_fds=True, stdin=self.pipe.read_to_chromium, @@ -185,7 +198,7 @@ def _open(self): env=self._env, ) else: - from .chrome_wrapper import open_browser + from ._system_utils._chrome_wrapper import open_browser self.subprocess = open_browser( to_chromium=self.pipe.read_to_chromium, @@ -200,10 +213,7 @@ async def _open_async(self): if platform.system() != "Windows": self.subprocess = await asyncio.create_subprocess_exec( sys.executable, - os.path.join( - os.path.dirname(os.path.realpath(__file__)), - "chrome_wrapper.py", - ), + chromewrapper_path, stdin=self.pipe.read_to_chromium, stdout=self.pipe.write_from_chromium, stderr=self._stderr, @@ -211,7 +221,7 @@ async def _open_async(self): env=self._env, ) else: - from .chrome_wrapper import open_browser + from ._system_utils._chrome_wrapper import open_browser self.subprocess = await open_browser( to_chromium=self.pipe.read_to_chromium, @@ -226,7 +236,8 @@ async def _open_async(self): self.future_self.set_result(self) except (BrowserClosedError, BrowserFailedError, asyncio.CancelledError) as e: raise BrowserFailedError( - "The browser seemed to close immediately after starting. Perhaps adding debug_browser=True will help.", + "The browser seemed to close immediately after starting. " + "Perhaps adding debug_browser=True will help.", ) from e async def _is_closed_async(self, wait=0): @@ -240,26 +251,23 @@ async def _is_closed_async(self, wait=0): if wait == 0: # this never works cause processing wait = 0.15 await asyncio.wait_for(waiter, wait) - return True - except Exception: + except TimeoutError: return False + return True def _is_closed(self, wait=0): if wait == 0: - if self.subprocess.poll() is None: - return False - else: - return True + return self.subprocess.poll() is None else: try: self.subprocess.wait(wait) - return True - except: # noqa + except subprocess.TimeoutExpired: return False + return True # _sync_close and _async_close are basically the same thing - def _sync_close(self): + def _sync_close(self): # noqa: PLR0912, C901 if self._is_closed(): if self.debug: print("Browser was already closed.", file=sys.stderr) @@ -282,8 +290,8 @@ def _sync_close(self): # Start a kill if platform.system() == "Windows": if not self._is_closed(): - subprocess.call( - ["taskkill", "/F", "/T", "/PID", str(self.subprocess.pid)], + subprocess.call( # noqa: S603, false positive, input fine + ["taskkill", "/F", "/T", "/PID", str(self.subprocess.pid)], # noqa: S607 windows full path... stderr=subprocess.DEVNULL, stdout=subprocess.DEVNULL, ) @@ -299,12 +307,11 @@ def _sync_close(self): return self.subprocess.kill() - if self._is_closed(wait=4): - if self.debug: - print("kill() closed the browser", file=sys.stderr) + if self._is_closed(wait=4) and self.debug: + print("kill() closed the browser", file=sys.stderr) return - async def _async_close(self): + async def _async_close(self): # noqa: PLR0912, C901 if await self._is_closed_async(): if self.debug: print("Browser was already closed.", file=sys.stderr) @@ -323,7 +330,9 @@ async def _async_close(self): # Start a kill if platform.system() == "Windows": if not await self._is_closed_async(): - subprocess.call( + # could we use native asyncio process here? or hackcheck? + await asyncio.to_thread( + subprocess.call, ["taskkill", "/F", "/T", "/PID", str(self.subprocess.pid)], stderr=subprocess.DEVNULL, stdout=subprocess.DEVNULL, @@ -340,22 +349,22 @@ async def _async_close(self): return self.subprocess.kill() - if await self._is_closed_async(wait=4): - if self.debug: - print("kill() closed the browser", file=sys.stderr) + if await self._is_closed_async(wait=4) and self.debug: + print("kill() closed the browser", file=sys.stderr) return - def close(self): + def close(self): # noqa: C901 if self.loop: - async def close_task(): + async def close_task(): # noqa: PLR0912, C901 if self.lock.locked(): return await self.lock.acquire() if not self.future_self.done(): self.future_self.set_exception( BrowserFailedError( - "Close() was called before the browser finished opening- maybe it crashed?", + "Close() was called before the browser " + "finished opening- maybe it crashed?", ), ) for future in self.futures.values(): @@ -402,6 +411,7 @@ async def close_task(): pass self.pipe.close() self.tmp_dir.clean() + return None async def _watchdog(self): self._watchdog_healthy = True @@ -424,10 +434,10 @@ async def _watchdog(self): if self.tmp_dir.exists: self.tmp_dir.delete_manually() - def __exit__(self, type, value, traceback): + def __exit__(self, exc_type, exc_value, exc_traceback): self.close() - async def __aexit__(self, type, value, traceback): + async def __aexit__(self, exc_type, exc_value, exc_traceback): await self.close() # Basic synchronous functions @@ -443,19 +453,21 @@ def _remove_tab(self, target_id): def get_tab(self): if self.tabs.values(): - return list(self.tabs.values())[0] + return next(iter(self.tabs.values())) + return None # Better functions that require asynchronous async def create_tab(self, url="", width=None, height=None): if self.lock.locked(): raise BrowserClosedError("create_tab() called on a closed browser.") if self.headless and (width or height): - warnings.warn( - "Width and height only work for headless chrome mode, they will be ignored.", + warnings.warn( # noqa: B028 + "Width and height only work for headless chrome mode, " + "they will be ignored.", ) width = None height = None - params = dict(url=url) + params = {"url": url} if width: params["width"] = width if height: @@ -493,8 +505,9 @@ async def close_tab(self, target_id): async def create_session(self): if self.lock.locked(): raise BrowserClosedError("create_session() called on a closed browser") - warnings.warn( - "Creating new sessions on Browser() only works with some versions of Chrome, it is experimental.", + warnings.warn( # noqa: B028 + "Creating new sessions on Browser() only works with some " + "versions of Chrome, it is experimental.", ExperimentalFeatureWarning, ) response = await self.browser.send_command("Target.attachToBrowserTarget") @@ -529,12 +542,13 @@ async def populate_targets(self): if e.code == TARGET_NOT_FOUND: if self.debug: print( - f"Target {target_id} not found (could be closed before)", + f"Target {target_id} not found " + "(could be closed before)", file=sys.stderr, ) continue else: - raise e + raise self._add_tab(new_tab) if self.debug: print(f"The target {target_id} was added", file=sys.stderr) @@ -549,15 +563,14 @@ def run_output_thread(self, debug=None): def run_print(debug): if debug: print("Starting run_print loop", file=sys.stderr) - while True: - try: + try: + while True: responses = self.pipe.read_jsons(debug=debug) for response in responses: print(json.dumps(response, indent=4)) - except PipeClosedError: - if self.debug: - print("PipeClosedError caught", file=sys.stderr) - break + except PipeClosedError: + if self.debug: + print("PipeClosedError caught", file=sys.stderr) Thread(target=run_print, args=(debug,)).start() @@ -569,23 +582,26 @@ def _get_target_for_session(self, session_id): return self return None - def run_read_loop(self): + def run_read_loop(self): # noqa: PLR0915, C901 complexity def check_error(result): e = result.exception() if e: self.close() if self.debug: - print(f"Error in run_read_loop: {str(e)}", file=sys.stderr) + print(f"Error in run_read_loop: {e!s}", file=sys.stderr) if not isinstance(e, asyncio.CancelledError): raise e - async def read_loop(): + async def read_loop(): # noqa: PLR0915, PLR0912, C901 complexity try: + read_jsons = partial( + self.pipe.read_jsons, + blocking=True, + debug=self.debug, + ) responses = await self.loop.run_in_executor( self.executor, - self.pipe.read_jsons, - True, # blocking argument to read_jsons - self.debug, # debug argument to read_jsons + read_jsons, ) for response in responses: error = self.protocol.get_error(response) @@ -609,7 +625,8 @@ async def read_loop(): ) or (response["method"] == query) if self.debug: print( - f"Checking subscription key: {query} against event method {response['method']}", + f"Checking subscription key: {query} " + "against event method {response['method']}", file=sys.stderr, ) if match: @@ -621,7 +638,7 @@ async def read_loop(): event_session_id ].unsubscribe(query) - ### THIS IS FOR SUBSCRIBE_ONCE (that's not clear from variable names) + ### THIS IS FOR SUBSCRIBE_ONCE for query, futures in list(subscriptions_futures.items()): match = ( query.endswith("*") @@ -629,7 +646,8 @@ async def read_loop(): ) or (response["method"] == query) if self.debug: print( - f"Checking subscription key: {query} against event method {response['method']}", + f"Checking subscription key: {query} against " + "event method {response['method']}", file=sys.stderr, ) if match: @@ -657,11 +675,15 @@ async def read_loop(): continue # browser closing anyway target_closed = self._get_target_for_session(session_closed) if target_closed: - target_closed._remove_session(session_closed) + target_closed._remove_session(session_closed) # noqa: SLF001 + # TODO(Andrew): private access # noqa: FIX002, TD003 + _ = self.protocol.sessions.pop(session_closed, None) if self.debug: print( - f"Use intern subscription key: 'Target.detachedFromTarget'. Session {session_closed} was closed.", + "Use intern subscription key: " + "'Target.detachedFromTarget'. " + f"Session {session_closed} was closed.", file=sys.stderr, ) @@ -683,8 +705,8 @@ async def read_loop(): else: future.set_result(response) else: - warnings.warn( - f"Unhandled message type:{str(response)}", + warnings.warn( # noqa: B028 + f"Unhandled message type:{response!s}", UnhandledMessageWarning, ) except PipeClosedError: @@ -712,7 +734,7 @@ def write_json(self, obj): def check_future(fut): if fut.exception(): if self.debug: - print(f"Write json future error: {str(fut)}", file=sys.stderr) + print(f"Write json future error: {fut!s}", file=sys.stderr) if not future.done(): print("Setting future based on pipe error", file=sys.stderr) future.set_exception(fut.exception()) diff --git a/choreographer/_cli_utils.py b/choreographer/_cli_utils.py new file mode 100644 index 00000000..a0bbdd58 --- /dev/null +++ b/choreographer/_cli_utils.py @@ -0,0 +1,153 @@ +import argparse +import asyncio +import json +import platform +import shutil +import sys +import urllib.request +import warnings +import zipfile +from pathlib import Path + +# we use arch instead of platform when singular since platform is a package +platforms = ["linux64", "win32", "win64", "mac-x64", "mac-arm64"] + +default_local_exe_path = Path(__file__).resolve().parent / "browser_exe" + +platform_detected = platform.system() +arch_size_detected = "64" if sys.maxsize > 2**32 else "32" +arch_detected = "arm" if platform.processor() == "arm" else "x" + +if platform_detected == "Windows": + chrome_platform_detected = "win" + arch_size_detected +elif platform_detected == "Linux": + chrome_platform_detected = "linux" + arch_size_detected +elif platform_detected == "Darwin": + chrome_platform_detected = "mac-" + arch_detected + arch_size_detected + +default_exe_name = None +if platform_detected.startswith("Linux"): + default_exe_name = ( + default_local_exe_path / f"chrome-{chrome_platform_detected}" / "chrome" + ) +elif platform_detected.startswith("Darwin"): + default_exe_name = ( + default_local_exe_path + / f"chrome-{chrome_platform_detected}" + / "Google Chrome for Testing.app" + / "Contents" + / "MacOS" + / "Google Chrome for Testing" + ) +elif platform_detected.startswith("Win"): + default_exe_name = ( + default_local_exe_path / f"chrome-{chrome_platform_detected}" / "chrome.exe" + ) + + +# https://stackoverflow.com/questions/39296101/python-zipfile-removes-execute-permissions-from-binaries +class ZipFilePermissions(zipfile.ZipFile): + def _extract_member(self, member, targetpath, pwd): + if not isinstance(member, zipfile.ZipInfo): + member = self.getinfo(member) + + path = super()._extract_member(member, targetpath, pwd) + # High 16 bits are os specific (bottom is st_mode flag) + attr = member.external_attr >> 16 + if attr != 0: + Path(path).chmod(attr) + return path + + +def get_browser_cli(): + if "ubuntu" in platform.version().lower(): + warnings.warn( # noqa: B028 + "You are using `get_browser()` on Ubuntu." + " Ubuntu is **very strict** about where binaries come from." + " You have to disable the sandbox with use_sandbox=False" + " when you initialize the browser OR you can install from Ubuntu's" + " package manager.", + UserWarning, + ) + parser = argparse.ArgumentParser(description="tool to help debug problems") + parser.add_argument("--i", "-i", type=int, dest="i") + parser.add_argument("--arch", dest="arch") + parser.add_argument("--path", dest="path") + parser.add_argument( + "-v", + "--verbose", + dest="verbose", + action="store_true", + ) + parser.set_defaults(i=-1) + parser.set_defaults(path=default_local_exe_path) + parser.set_defaults(arch=chrome_platform_detected) + parser.set_defaults(verbose=False) + parsed = parser.parse_args() + i = parsed.i + arch = parsed.arch + path = Path(parsed.path) + verbose = parsed.verbose + if not arch or arch not in platforms: + raise RuntimeError( + "You must specify a platform: " + f"linux64, win32, win64, mac-x64, mac-arm64, not {platform}", + ) + print(get_browser_sync(arch=arch, i=i, path=path, verbose=verbose)) + + +def get_browser_sync( + arch=chrome_platform_detected, + i=-1, + path=default_local_exe_path, + *, + verbose=False, +): + path = Path(path) + browser_list = json.loads( + urllib.request.urlopen( + "https://googlechromelabs.github.io/chrome-for-testing/known-good-versions-with-downloads.json", + ).read(), + ) + version_obj = browser_list["versions"][i] + if verbose: + print(version_obj["version"]) + print(version_obj["revision"]) + chromium_sources = version_obj["downloads"]["chrome"] + url = None + for src in chromium_sources: + if src["platform"] == arch: + url = src["url"] + break + if not path.exists(): + path.mkdir(parents=True) + filename = path / "chrome.zip" + with urllib.request.urlopen(url) as response, filename.open("wb") as out_file: # noqa: S310 audit url + shutil.copyfileobj(response, out_file) + with ZipFilePermissions(filename, "r") as zip_ref: + zip_ref.extractall(path) + + if arch.startswith("linux"): + exe_name = path / f"chrome-{arch}" / "chrome" + elif arch.startswith("mac"): + exe_name = ( + path + / f"chrome-{arch}" + / "Google Chrome for Testing.app" + / "Contents" + / "MacOS" + / "Google Chrome for Testing" + ) + elif arch.startswith("win"): + exe_name = path / f"chrome-{arch}" / "chrome.exe" + + return exe_name + + +# to_thread everything +async def get_browser( + arch=chrome_platform_detected, + i=-1, + path=default_local_exe_path, +): + return await asyncio.to_thread(get_browser_sync, arch=arch, i=i, path=path) diff --git a/choreographer/_cli_utils_no_qa.py b/choreographer/_cli_utils_no_qa.py new file mode 100644 index 00000000..131658a7 --- /dev/null +++ b/choreographer/_cli_utils_no_qa.py @@ -0,0 +1,105 @@ +import argparse +import asyncio +import platform +import subprocess +import sys +import time + +# diagnose function is too weird and ruff guts it +# ruff has line-level and file-level QA suppression +# so lets give diagnose a separate file + +# ruff: noqa: PLR0915, C901, S603, BLE001, S607, PERF203, TRY002 + +# in order, exceptions are: +# - function complexity (statements?) +# - function complexity (algo measure) +# - validate subprocess input arguments +# - blind exception +# - partial executable path +# - performance overhead of try-except in loop +# - make own exceptions + + +def diagnose(): + from choreographer import Browser, browser_which + + parser = argparse.ArgumentParser(description="tool to help debug problems") + parser.add_argument("--no-run", dest="run", action="store_false") + parser.add_argument("--show", dest="headless", action="store_false") + parser.set_defaults(run=True) + parser.set_defaults(headless=True) + args = parser.parse_args() + run = args.run + headless = args.headless + fail = [] + print("*".center(50, "*")) + print("SYSTEM:".center(50, "*")) + print(platform.system()) + print(platform.release()) + print(platform.version()) + print(platform.uname()) + print("BROWSER:".center(50, "*")) + print(browser_which(debug=True)) + print("VERSION INFO:".center(50, "*")) + try: + print("PIP:".center(25, "*")) + print(subprocess.check_output([sys.executable, "-m", "pip", "freeze"]).decode()) + except BaseException as e: + print(f"Error w/ pip: {e}") + try: + print("UV:".center(25, "*")) + print(subprocess.check_output(["uv", "pip", "freeze"]).decode()) + except BaseException as e: + print(f"Error w/ uv: {e}") + try: + print("GIT:".center(25, "*")) + print( + subprocess.check_output( + ["git", "describe", "--all", "--tags", "--long", "--always"], + ).decode(), + ) + except BaseException as e: + print(f"Error w/ git: {e}") + finally: + print(sys.version) + print(sys.version_info) + print("Done with version info.".center(50, "*")) + if run: + try: + print("Sync Test Headless".center(50, "*")) + browser = Browser(debug=True, debug_browser=True, headless=headless) + time.sleep(3) + browser.close() + except BaseException as e: + fail.append(("Sync test headless", e)) + finally: + print("Done with sync test headless".center(50, "*")) + + async def test_headless(): + browser = await Browser(debug=True, debug_browser=True, headless=headless) + await asyncio.sleep(3) + await browser.close() + + try: + print("Async Test Headless".center(50, "*")) + asyncio.run(test_headless()) + except BaseException as e: + fail.append(("Async test headless", e)) + finally: + print("Done with async test headless".center(50, "*")) + print() + sys.stdout.flush() + sys.stderr.flush() + if fail: + import traceback + + for exception in fail: + try: + print(f"Error in: {exception[0]}") + traceback.print_exception(exception[1]) + except BaseException: + print("Couldn't print traceback for:") + print(str(exception)) + raise BaseException("There was an exception, see above.") + print("Thank you! Please share these results with us!") diff --git a/choreographer/_devtools_protocol_layer/__init__.py b/choreographer/_devtools_protocol_layer/__init__.py new file mode 100644 index 00000000..82a69b76 --- /dev/null +++ b/choreographer/_devtools_protocol_layer/__init__.py @@ -0,0 +1,17 @@ +from ._protocol import ( + DevtoolsProtocolError, + ExperimentalFeatureWarning, + MessageTypeError, + MissingKeyError, +) +from ._session import Session +from ._target import Target + +__all__ = [ + "DevtoolsProtocolError", + "ExperimentalFeatureWarning", + "MessageTypeError", + "MissingKeyError", + "Session", + "Target", +] diff --git a/choreographer/protocol.py b/choreographer/_devtools_protocol_layer/_protocol.py similarity index 82% rename from choreographer/protocol.py rename to choreographer/_devtools_protocol_layer/_protocol.py index b994580a..b8dbe402 100644 --- a/choreographer/protocol.py +++ b/choreographer/_devtools_protocol_layer/_protocol.py @@ -28,7 +28,7 @@ class ExperimentalFeatureWarning(UserWarning): class Protocol: - def __init__(self, debug=False): + def __init__(self, *, debug=False): # Stored Resources # Configuration @@ -38,8 +38,8 @@ def __init__(self, debug=False): self.sessions = {} def calculate_key(self, response): - session_id = response["sessionId"] if "sessionId" in response else "" - message_id = response["id"] if "id" in response else None + session_id = response.get("sessionId", "") + message_id = response.get("id", None) if message_id is None: return None return (session_id, message_id) @@ -61,14 +61,15 @@ def verify_json(self, obj): if len(obj.keys()) != n_keys: raise RuntimeError( - "Message objects must have id and method keys, and may have params and sessionId keys.", + "Message objects must have id and method keys, " + "and may have params and sessionId keys.", ) def match_key(self, response, key): session_id, message_id = key - if "session_id" not in response and session_id == "": - pass - elif "session_id" in response and response["session_id"] == session_id: + if ("session_id" not in response and session_id == "") or ( + "session_id" in response and response["session_id"] == session_id + ): pass else: return False @@ -102,6 +103,4 @@ def get_error(self, response): def is_event(self, response): required_keys = {"method", "params"} - if required_keys <= response.keys() and "id" not in response: - return True - return False + return required_keys <= response.keys() and "id" not in response diff --git a/choreographer/session.py b/choreographer/_devtools_protocol_layer/_session.py similarity index 90% rename from choreographer/session.py rename to choreographer/_devtools_protocol_layer/_session.py index be1dae7e..2c2058d8 100644 --- a/choreographer/session.py +++ b/choreographer/_devtools_protocol_layer/_session.py @@ -26,12 +26,13 @@ def send_command(self, command, params=None): return self.browser.write_json(json_command) - def subscribe(self, string, callback, repeating=True): + def subscribe(self, string, callback, *, repeating=True): if not self.browser.loop: raise ValueError("You may use this method with a loop in Browser") if string in self.subscriptions: raise ValueError( - "You are already subscribed to this string, duplicate subscriptions are not allowed.", + "You are already subscribed to this string, " + "duplicate subscriptions are not allowed.", ) else: self.subscriptions[string] = (callback, repeating) diff --git a/choreographer/target.py b/choreographer/_devtools_protocol_layer/_target.py similarity index 81% rename from choreographer/target.py rename to choreographer/_devtools_protocol_layer/_target.py index ef9df52f..59392c12 100644 --- a/choreographer/target.py +++ b/choreographer/_devtools_protocol_layer/_target.py @@ -1,8 +1,8 @@ import sys from collections import OrderedDict -from .protocol import DevtoolsProtocolError -from .session import Session +from ._protocol import DevtoolsProtocolError +from ._session import Session class Target: @@ -31,11 +31,12 @@ def _remove_session(self, session_id): async def create_session(self): if not self.browser.loop: raise RuntimeError( - "There is no eventloop, or was not passed to browser. Cannot use async methods", + "There is no eventloop, or was not passed " + "to browser. Cannot use async methods", ) response = await self.browser.send_command( "Target.attachToTarget", - params=dict(targetId=self.target_id, flatten=True), + params={"targetId": self.target_id, "flatten": True}, ) if "error" in response: raise RuntimeError("Could not create session") from DevtoolsProtocolError( @@ -49,7 +50,8 @@ async def create_session(self): async def close_session(self, session_id): if not self.browser.loop: raise RuntimeError( - "There is no eventloop, or was not passed to browser. Cannot use async methods", + "There is no eventloop, or was not passed to " + "browser. Cannot use async methods", ) if isinstance(session_id, Session): session_id = session_id.session_id @@ -68,18 +70,18 @@ async def close_session(self, session_id): def send_command(self, command, params=None): if not self.sessions.values(): raise RuntimeError("Cannot send_command without at least one valid session") - return list(self.sessions.values())[0].send_command(command, params) + return next(iter(self.sessions.values())).send_command(command, params) def _get_first_session(self): if not self.sessions.values(): raise RuntimeError( "Cannot use this method without at least one valid session", ) - return list(self.sessions.values())[0] + return next(iter(self.sessions.values())) - def subscribe(self, string, callback, repeating=True): + def subscribe(self, string, callback, *, repeating=True): session = self._get_first_session() - session.subscribe(string, callback, repeating) + session.subscribe(string, callback, repeating=repeating) def unsubscribe(self, string): session = self._get_first_session() diff --git a/choreographer/pipe.py b/choreographer/_pipe.py similarity index 77% rename from choreographer/pipe.py rename to choreographer/_pipe.py index 13f83b71..1dd6e1e6 100644 --- a/choreographer/pipe.py +++ b/choreographer/_pipe.py @@ -9,14 +9,16 @@ with_block = bool(sys.version_info[:3] >= (3, 12) or platform.system() != "Windows") +class PipeClosedError(IOError): + pass + + class BlockWarning(UserWarning): pass -# TODO: don't know about this -# TODO: use has_attr instead of np.integer, you'll be fine class MultiEncoder(simplejson.JSONEncoder): - """Special json encoder for numpy types""" + """Special json encoder for numpy types.""" def default(self, obj): if hasattr(obj, "dtype") and obj.dtype.kind == "i" and obj.shape == (): @@ -30,12 +32,8 @@ def default(self, obj): return simplejson.JSONEncoder.default(self, obj) -class PipeClosedError(IOError): - pass - - class Pipe: - def __init__(self, debug=False, json_encoder=MultiEncoder): + def __init__(self, *, debug=False, json_encoder=MultiEncoder): self.read_from_chromium, self.write_from_chromium = list(os.pipe()) self.read_to_chromium, self.write_to_chromium = list(os.pipe()) self.debug = debug @@ -51,10 +49,6 @@ def serialize(self, obj): ignore_nan=True, cls=self.json_encoder, ) - # if debug: - # print(f"write_json: {message}", file=sys.stderr) - # windows may print weird characters if we set utf-8 instead of utf-16 - # check this TODO return message.encode("utf-8") + b"\0" def deserialize(self, message): @@ -62,25 +56,27 @@ def deserialize(self, message): def write_json(self, obj, debug=None): if self.shutdown_lock.locked(): - raise PipeClosedError() + raise PipeClosedError if not debug: debug = self.debug if debug: - print("write_json:", file=sys.stderr) + print(f"write_json: {obj}", file=sys.stderr) encoded_message = self.serialize(obj) + if debug: + print(f"write_json: {encoded_message}", file=sys.stderr) try: os.write(self.write_to_chromium, encoded_message) except OSError as e: self.close() - raise PipeClosedError() from e + raise PipeClosedError from e if debug: print("wrote_json.", file=sys.stderr) - def read_jsons(self, blocking=True, debug=None): + def read_jsons(self, *, blocking=True, debug=None): # noqa: PLR0912, C901 branches, complexity if self.shutdown_lock.locked(): - raise PipeClosedError() + raise PipeClosedError if not with_block and not blocking: - warnings.warn( + warnings.warn( # noqa: B028 "Windows python version < 3.12 does not support non-blocking", BlockWarning, ) @@ -97,7 +93,7 @@ def read_jsons(self, blocking=True, debug=None): os.set_blocking(self.read_from_chromium, blocking) except OSError as e: self.close() - raise PipeClosedError() from e + raise PipeClosedError from e try: raw_buffer = None # if we fail in read, we already defined raw_buffer = os.read( @@ -108,7 +104,7 @@ def read_jsons(self, blocking=True, debug=None): # we seem to need {bye} even if chrome closes NOTE if debug: print("read_jsons pipe was closed, raising", file=sys.stderr) - raise PipeClosedError() + raise PipeClosedError while raw_buffer[-1] != 0: # still not great, return what you have if with_block: @@ -121,12 +117,10 @@ def read_jsons(self, blocking=True, debug=None): except OSError as e: self.close() if debug: - print(f"caught OSError in read() {str(e)}", file=sys.stderr) + print(f"caught OSError in read() {e!s}", file=sys.stderr) if not raw_buffer or raw_buffer == b"{bye}\n": - raise PipeClosedError() - # TODO this could be hard to test as it is a real OS corner case - # but possibly raw_buffer is partial - # and we don't check for partials + raise PipeClosedError from e + # this could be hard to test as it is a real OS corner case decoded_buffer = raw_buffer.decode("utf-8") if debug: print(decoded_buffer, file=sys.stderr) @@ -134,14 +128,15 @@ def read_jsons(self, blocking=True, debug=None): if raw_message: try: jsons.append(self.deserialize(raw_message)) - except BaseException as e: + except simplejson.decoder.JSONDecodeError as e: if debug: print( f"Problem with {raw_message} in json: {e}", file=sys.stderr, ) if debug: - # This debug is kinda late but the jsons package helps with decoding, since JSON optionally + # This debug is kinda late but the jsons package + # helps with decoding, since JSON optionally # allows escaping unicode characters, which chrome does (oof) print(f"read_jsons: {jsons[-1]}", file=sys.stderr) return jsons @@ -150,25 +145,28 @@ def _unblock_fd(self, fd): try: if with_block: os.set_blocking(fd, False) - except BaseException as e: + except BaseException as e: # noqa: BLE001 OS errors are not consistent, catch blind + # also, best effort. if self.debug: - print(f"Expected error unblocking {str(fd)}: {str(e)}", file=sys.stderr) + print(f"Expected error unblocking {fd!s}: {e!s}", file=sys.stderr) def _close_fd(self, fd): try: os.close(fd) - except BaseException as e: + except BaseException as e: # noqa: BLE001 OS errors are not consistent, catch blind + # also, best effort. if self.debug: - print(f"Expected error closing {str(fd)}: {str(e)}", file=sys.stderr) + print(f"Expected error closing {fd!s}: {e!s}", file=sys.stderr) def _fake_bye(self): self._unblock_fd(self.write_from_chromium) try: os.write(self.write_from_chromium, b"{bye}\n") - except BaseException as e: + except BaseException as e: # noqa: BLE001 OS errors are not consistent, catch blind + # also, best effort. if self.debug: print( - f"Caught expected error in self-wrte bye: {str(e)}", + f"Caught expected error in self-wrte bye: {e!s}", file=sys.stderr, ) @@ -183,4 +181,4 @@ def close(self): self._close_fd(self.write_to_chromium) # no more writes self._close_fd(self.write_from_chromium) # we're done with writes self._close_fd(self.read_from_chromium) # no more attempts at read - self._close_fd(self.read_to_chromium) # + self._close_fd(self.read_to_chromium) diff --git a/choreographer/_system_utils/__init__.py b/choreographer/_system_utils/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/choreographer/chrome_wrapper.py b/choreographer/_system_utils/_chrome_wrapper.py similarity index 59% rename from choreographer/chrome_wrapper.py rename to choreographer/_system_utils/_chrome_wrapper.py index e46fb4ee..0dfd5d9d 100644 --- a/choreographer/chrome_wrapper.py +++ b/choreographer/_system_utils/_chrome_wrapper.py @@ -1,43 +1,42 @@ import os # importing modules has side effects, so we do this before imports -# linter complains. -env = None - -# chromium reads on 3, writes on 4 -# really this means windows only, but a more readable platform.system() +# not everyone uses the wrapper as a process if __name__ == "__main__": + # chromium reads on 3, writes on 4 os.dup2(0, 3) # make our stdin their input os.dup2(1, 4) # make our stdout their output +import asyncio +import platform +import signal +import subprocess +import sys +from functools import partial -import subprocess # noqa -import signal # noqa -import platform # noqa -import asyncio # noqa -import sys # noqa +_inheritable = True system = platform.system() if system == "Windows": - import msvcrt # noqa + import msvcrt else: - os.set_inheritable(4, True) - os.set_inheritable(3, True) + os.set_inheritable(4, _inheritable) + os.set_inheritable(3, _inheritable) -def open_browser( +def open_browser( # noqa: PLR0913 too many args in func to_chromium, from_chromium, stderr=sys.stderr, env=None, loop=None, + *, loop_hack=False, ): path = env.get("BROWSER_PATH") if not path: raise RuntimeError("No browser path was passed to run") - # TODO: check that browser exists (windows, mac) w/ --version (TODO: how to do on wndows?) user_data_dir = env["USER_DATA_DIR"] @@ -62,11 +61,11 @@ def open_browser( system_dependent = {} if system == "Windows": to_chromium_handle = msvcrt.get_osfhandle(to_chromium) - os.set_handle_inheritable(to_chromium_handle, True) + os.set_handle_inheritable(to_chromium_handle, _inheritable) from_chromium_handle = msvcrt.get_osfhandle(from_chromium) - os.set_handle_inheritable(from_chromium_handle, True) + os.set_handle_inheritable(from_chromium_handle, _inheritable) cli += [ - f"--remote-debugging-io-pipes={str(to_chromium_handle)},{str(from_chromium_handle)}", + f"--remote-debugging-io-pipes={to_chromium_handle!s},{from_chromium_handle!s}", ] system_dependent["creationflags"] = subprocess.CREATE_NEW_PROCESS_GROUP system_dependent["close_fds"] = False @@ -74,7 +73,7 @@ def open_browser( system_dependent["pass_fds"] = (to_chromium, from_chromium) if not loop: - return subprocess.Popen( + return subprocess.Popen( # noqa: S603 input fine. cli, stderr=stderr, **system_dependent, @@ -82,7 +81,7 @@ def open_browser( elif loop_hack: def run(): - return subprocess.Popen( + return subprocess.Popen( # noqa: S603 input fine. cli, stderr=stderr, **system_dependent, @@ -98,24 +97,22 @@ def run(): ) -# THIS MAY BE PART OF KILL -def kill_proc(*nope): - global process +def kill_proc(process, _sig_num, _frame): process.terminate() - process.wait(3) # 3 seconds to clean up nicely, it's a lot + process.wait(5) # 5 seconds to clean up nicely, it's a lot process.kill() if __name__ == "__main__": process = open_browser(to_chromium=3, from_chromium=4, env=os.environ) - signal.signal(signal.SIGTERM, kill_proc) - signal.signal(signal.SIGINT, kill_proc) + kp = partial(kill_proc, process) + signal.signal(signal.SIGTERM, kp) + signal.signal(signal.SIGINT, kp) process.wait() - # NOTE is bad but we don't detect closed pipe (stdout doesn't close from other end?) - # doesn't seem to impact in sync, maybe because we're doing manual cleanup in sequence - # should try to see if shutting down chrome browser can provoke pipeerror in threadmode and asyncmode - # i think getting rid of this would be really good, and seeing what's going on in both - # async and sync mode would help - # see NOTE in pipe.py (line 38?) + + # not great but it seems that + # pipe isn't always closed when chrome closes + # so we pretend to be chrome and send a bye instead + # also, above depends on async/sync, platform, etc print("{bye}") diff --git a/choreographer/system.py b/choreographer/_system_utils/_system.py similarity index 80% rename from choreographer/system.py rename to choreographer/_system_utils/_system.py index 8f22d41f..de5f1a04 100644 --- a/choreographer/system.py +++ b/choreographer/_system_utils/_system.py @@ -3,7 +3,7 @@ import shutil import sys -from .cli_utils import default_exe_name +from choreographer._cli_utils import default_exe_name chrome = [ "chrome", @@ -19,7 +19,6 @@ chromium = ["chromium", "chromium-browser"] # firefox = // this needs to be tested # brave = // this needs to be tested -# edge = // this needs to be tested system = platform.system() @@ -28,7 +27,8 @@ if system == "Windows": default_path_chrome = [ r"c:\Program Files\Google\Chrome\Application\chrome.exe", - f"c:\\Users\\{os.environ.get('USER', 'default')}\\AppData\\Local\\Google\\Chrome\\Application\\chrome.exe", + f"c:\\Users\\{os.environ.get('USER', 'default')}\\AppData\\" + "Local\\Google\\Chrome\\Application\\chrome.exe", ] elif system == "Linux": default_path_chrome = [ @@ -44,8 +44,8 @@ def which_windows_chrome(): try: - import winreg import re + import winreg command = winreg.QueryValueEx( winreg.OpenKey( @@ -57,23 +57,22 @@ def which_windows_chrome(): "", )[0] exe = re.search('"(.*?)"', command).group(1) - return exe - except BaseException: + except BaseException: # noqa: BLE001 don't care why, best effort search return None + return exe def _is_exe(path): - res = False try: - res = os.access(path, os.X_OK) - finally: - return res + return os.access(path, os.X_OK) + except: # noqa: E722 bare except ok, weird errors, best effort. + return False -def browser_which(executable_name=chrome, debug=False, skip_local=False): +def browser_which(executable_name=chrome, *, debug=False, skip_local=False): # noqa: PLR0912, C901 if debug: print(f"Checking {default_exe_name}", file=sys.stderr) - if not skip_local and os.path.exists(default_exe_name): + if not skip_local and default_exe_name.exists(): if debug: print(f"Found {default_exe_name}", file=sys.stderr) return default_exe_name @@ -81,7 +80,7 @@ def browser_which(executable_name=chrome, debug=False, skip_local=False): if isinstance(executable_name, str): executable_name = [executable_name] if platform.system() == "Windows": - os.environ["NoDefaultCurrentDirectoryInExePath"] = "0" + os.environ["NoDefaultCurrentDirectoryInExePath"] = "0" # noqa: SIM112 var name set by windows for exe in executable_name: if debug: print(f"looking for {exe}", file=sys.stderr) diff --git a/choreographer/tempfile.py b/choreographer/_system_utils/_tempfile.py similarity index 72% rename from choreographer/tempfile.py rename to choreographer/_system_utils/_tempfile.py index 5872471c..771b57de 100644 --- a/choreographer/tempfile.py +++ b/choreographer/_system_utils/_tempfile.py @@ -15,18 +15,19 @@ class TempDirWarning(UserWarning): # Python's built-in temporary directory functions are lacking -# In short, they don't handle removal well, and there's lots of API changes over recent versions. +# In short, they don't handle removal well, and there's +# lots of API changes over recent versions. # Here we have our own class to deal with it. class TempDirectory: - def __init__(self, path=None, sneak=False): + def __init__(self, path=None, *, sneak=False): self.debug = True # temporary! TODO self._with_onexc = bool(sys.version_info[:3] >= (3, 12)) args = {} if path: - args = dict(dir=path) + args = {"dir": path} elif sneak: - args = dict(prefix=".choreographer-", dir=Path.home()) + args = {"prefix": ".choreographer-", "dir": Path.home()} if platform.system() != "Windows": self.temp_dir = tempfile.TemporaryDirectory(**args) @@ -46,13 +47,13 @@ def __init__(self, path=None, sneak=False): else: self.temp_dir = tempfile.TemporaryDirectory(**args) - self.path = self.temp_dir.name + self.path = Path(self.temp_dir.name) self.exists = True if self.debug: print(f"TEMP DIR PATH: {self.path}", file=sys.stderr) - def delete_manually(self, check_only=False): - if not os.path.exists(self.path): + def delete_manually(self, *, check_only=False): # noqa: C901, PLR0912 + if not self.path.exists(): self.exists = False if self.debug: print( @@ -68,34 +69,34 @@ def delete_manually(self, check_only=False): n_files += len(files) if not check_only: for f in files: - fp = os.path.join(root, f) + fp = Path(root) / f if self.debug: print(f"Removing file: {fp}", file=sys.stderr) try: - os.chmod(fp, stat.S_IWUSR) - os.remove(fp) + fp.chmod(stat.S_IWUSR) + fp.unlink() if self.debug: print("Success", file=sys.stderr) - except BaseException as e: + except BaseException as e: # noqa: BLE001 yes catch and report errors.append((fp, e)) for d in dirs: - fp = os.path.join(root, d) + fp = Path(root) / d if self.debug: print(f"Removing dir: {fp}", file=sys.stderr) try: - os.chmod(fp, stat.S_IWUSR) - os.rmdir(fp) + fp.chmod(stat.S_IWUSR) + fp.rmdir() if self.debug: print("Success", file=sys.stderr) - except BaseException as e: + except BaseException as e: # noqa: BLE001 yes catch and report errors.append((fp, e)) # clean up directory if not check_only: try: - os.chmod(self.path, stat.S_IWUSR) - os.rmdir(self.path) - except BaseException as e: + self.path.chmod(stat.S_IWUSR) + self.path.rmdir() + except BaseException as e: # noqa: BLE001 yes catch and report errors.append((self.path, e)) if check_only: @@ -104,8 +105,9 @@ def delete_manually(self, check_only=False): else: self.exists = False elif errors: - warnings.warn( - f"The temporary directory could not be deleted, execution will continue. errors: {errors}", + warnings.warn( # noqa: B028 + "The temporary directory could not be deleted, " + f"execution will continue. errors: {errors}", TempDirWarning, ) self.exists = True @@ -114,24 +116,23 @@ def delete_manually(self, check_only=False): return n_dirs, n_files, errors - def clean(self): + def clean(self): # noqa: C901 try: # no faith in this python implementation, always fails with windows # very unstable recently as well, lots new arguments in tempfile package self.temp_dir.cleanup() self.exists = False - return - except BaseException as e: + except BaseException as e: # noqa: BLE001 yes catch and report if self.debug: print( - f"First tempdir deletion failed: TempDirWarning: {str(e)}", + f"First tempdir deletion failed: TempDirWarning: {e!s}", file=sys.stderr, ) - def remove_readonly(func, path, excinfo): + def remove_readonly(func, path, _excinfo): try: - os.chmod(path, stat.S_IWUSR) - func(path) + Path(path).chmod(stat.S_IWUSR) + func(str(path)) except FileNotFoundError: pass @@ -142,13 +143,12 @@ def remove_readonly(func, path, excinfo): shutil.rmtree(self.path, onerror=remove_readonly) self.exists = False del self.temp_dir - return except FileNotFoundError: pass # it worked! - except BaseException as e: + except BaseException as e: # noqa: BLE001 yes catch like this and report and try if self.debug: print( - f"Second tmpdir deletion failed (shutil.rmtree): {str(e)}", + f"Second tmpdir deletion failed (shutil.rmtree): {e!s}", file=sys.stderr, ) self.delete_manually(check_only=True) @@ -163,6 +163,6 @@ def extra_clean(): t.run() if self.debug: print( - f"Tempfile still exists?: {bool(os.path.exists(str(self.path)))}", + f"Tempfile still exists?: {self.path.exists()!s}", file=sys.stderr, ) diff --git a/choreographer/tab.py b/choreographer/_tab.py similarity index 79% rename from choreographer/tab.py rename to choreographer/_tab.py index 964f58ef..7b38db5a 100644 --- a/choreographer/tab.py +++ b/choreographer/_tab.py @@ -1,4 +1,4 @@ -from .target import Target +from ._devtools_protocol_layer._target import Target class Tab(Target): diff --git a/choreographer/cli_utils.py b/choreographer/cli_utils.py deleted file mode 100644 index 80983821..00000000 --- a/choreographer/cli_utils.py +++ /dev/null @@ -1,244 +0,0 @@ -import argparse -import asyncio -import json -import os -import platform -import shutil -import subprocess -import sys -import time -import urllib.request -import warnings -import zipfile - -# we use arch instead of platform when singular since platform is a package -platforms = ["linux64", "win32", "win64", "mac-x64", "mac-arm64"] - -default_local_exe_path = os.path.join( - os.path.dirname(os.path.realpath(__file__)), - "browser_exe", -) - -platform_detected = platform.system() -arch_size_detected = "64" if sys.maxsize > 2**32 else "32" -arch_detected = "arm" if platform.processor() == "arm" else "x" - -if platform_detected == "Windows": - chrome_platform_detected = "win" + arch_size_detected -elif platform_detected == "Linux": - chrome_platform_detected = "linux" + arch_size_detected -elif platform_detected == "Darwin": - chrome_platform_detected = "mac-" + arch_detected + arch_size_detected - -default_exe_name = None -if platform_detected.startswith("Linux"): - default_exe_name = os.path.join( - default_local_exe_path, - f"chrome-{chrome_platform_detected}", - "chrome", - ) -elif platform_detected.startswith("Darwin"): - default_exe_name = os.path.join( - default_local_exe_path, - f"chrome-{chrome_platform_detected}", - "Google Chrome for Testing.app", - "Contents", - "MacOS", - "Google Chrome for Testing", - ) -elif platform_detected.startswith("Win"): - default_exe_name = os.path.join( - default_local_exe_path, - f"chrome-{chrome_platform_detected}", - "chrome.exe", - ) - - -# https://stackoverflow.com/questions/39296101/python-zipfile-removes-execute-permissions-from-binaries -class ZipFilePermissions(zipfile.ZipFile): - def _extract_member(self, member, targetpath, pwd): - if not isinstance(member, zipfile.ZipInfo): - member = self.getinfo(member) - - path = super()._extract_member(member, targetpath, pwd) - # High 16 bits are os specific (bottom is st_mode flag) - attr = member.external_attr >> 16 - if attr != 0: - os.chmod(path, attr) - return path - - -def get_browser_cli(): - if "ubuntu" in platform.version().lower(): - warnings.warn( - "You are using `get_browser()` on Ubuntu." - " Ubuntu is **very strict** about where binaries come from." - " You have to disable the sandbox with use_sandbox=False" - " when you initialize the browser OR you can install from Ubuntu's" - " package manager.", - UserWarning, - ) - parser = argparse.ArgumentParser(description="tool to help debug problems") - parser.add_argument("--i", "-i", type=int, dest="i") - parser.add_argument("--arch", dest="arch") - parser.add_argument("--path", dest="path") # TODO, unused - parser.add_argument( - "-v", - "--verbose", - dest="verbose", - action="store_true", - ) # TODO, unused - parser.set_defaults(i=-1) - parser.set_defaults(path=default_local_exe_path) - parser.set_defaults(arch=chrome_platform_detected) - parser.set_defaults(verbose=False) - parsed = parser.parse_args() - i = parsed.i - arch = parsed.arch - path = parsed.path - verbose = parsed.verbose - if not arch or arch not in platforms: - raise RuntimeError( - f"You must specify a platform: linux64, win32, win64, mac-x64, mac-arm64, not {platform}", - ) - print(get_browser_sync(arch=arch, i=i, path=path, verbose=verbose)) - - -def get_browser_sync( - arch=chrome_platform_detected, - i=-1, - path=default_local_exe_path, - verbose=False, -): - browser_list = json.loads( - urllib.request.urlopen( - "https://googlechromelabs.github.io/chrome-for-testing/known-good-versions-with-downloads.json", - ).read(), - ) - version_obj = browser_list["versions"][i] - if verbose: - print(version_obj["version"]) - print(version_obj["revision"]) - chromium_sources = version_obj["downloads"]["chrome"] - url = None - for src in chromium_sources: - if src["platform"] == arch: - url = src["url"] - break - if not os.path.exists(path): - os.makedirs(path) - filename = os.path.join(path, "chrome.zip") - with urllib.request.urlopen(url) as response, open(filename, "wb") as out_file: - shutil.copyfileobj(response, out_file) - with ZipFilePermissions(filename, "r") as zip_ref: - zip_ref.extractall(path) - - if arch.startswith("linux"): - exe_name = os.path.join(path, f"chrome-{arch}", "chrome") - elif arch.startswith("mac"): - exe_name = os.path.join( - path, - f"chrome-{arch}", - "Google Chrome for Testing.app", - "Contents", - "MacOS", - "Google Chrome for Testing", - ) - elif arch.startswith("win"): - exe_name = os.path.join(path, f"chrome-{arch}", "chrome.exe") - - return exe_name - - -# to_thread everything -async def get_browser( - arch=chrome_platform_detected, - i=-1, - path=default_local_exe_path, -): - return await asyncio.to_thread(get_browser_sync, arch=arch, i=i, path=path) - - -def diagnose(): - from choreographer import Browser, browser_which - - parser = argparse.ArgumentParser(description="tool to help debug problems") - parser.add_argument("--no-run", dest="run", action="store_false") - parser.add_argument("--show", dest="headless", action="store_false") - parser.set_defaults(run=True) - parser.set_defaults(headless=True) - args = parser.parse_args() - run = args.run - headless = args.headless - fail = [] - print("*".center(50, "*")) - print("SYSTEM:".center(50, "*")) - print(platform.system()) - print(platform.release()) - print(platform.version()) - print(platform.uname()) - print("BROWSER:".center(50, "*")) - print(browser_which(debug=True)) - print("VERSION INFO:".center(50, "*")) - try: - print("PIP:".center(25, "*")) - print(subprocess.check_output([sys.executable, "-m", "pip", "freeze"]).decode()) - except BaseException as e: - print(f"Error w/ pip: {e}") - try: - print("UV:".center(25, "*")) - print(subprocess.check_output(["uv", "pip", "freeze"]).decode()) - except BaseException as e: - print(f"Error w/ uv: {e}") - try: - print("GIT:".center(25, "*")) - print( - subprocess.check_output( - ["git", "describe", "--all", "--tags", "--long", "--always"], - ).decode(), - ) - except BaseException as e: - print(f"Error w/ git: {e}") - finally: - print(sys.version) - print(sys.version_info) - print("Done with version info.".center(50, "*")) - pass - if run: - try: - print("Sync Test Headless".center(50, "*")) - browser = Browser(debug=True, debug_browser=True, headless=headless) - time.sleep(3) - browser.close() - except BaseException as e: - fail.append(("Sync test headless", e)) - finally: - print("Done with sync test headless".center(50, "*")) - - async def test_headless(): - browser = await Browser(debug=True, debug_browser=True, headless=headless) - await asyncio.sleep(3) - await browser.close() - - try: - print("Async Test Headless".center(50, "*")) - asyncio.run(test_headless()) - except BaseException as e: - fail.append(("Async test headless", e)) - finally: - print("Done with async test headless".center(50, "*")) - print("") - sys.stdout.flush() - sys.stderr.flush() - if fail: - import traceback - - for exception in fail: - try: - print(f"Error in: {exception[0]}") - traceback.print_exception(exception[1]) - except BaseException: - print("Couldn't print traceback for:") - print(str(exception)) - raise BaseException("There was an exception, see above.") - print("Thank you! Please share these results with us!") diff --git a/pyproject.toml b/pyproject.toml index 5fa25d89..4858f517 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -22,10 +22,15 @@ maintainers = [ {name = "Andrew Pikul", email = "ajpikul@gmail.com"}, ] dependencies = [ - "simplejson" - ] + "logistro>=1.0.2", + "simplejson", +] -[project.optional-dependencies] +[project.urls] +Homepage = "https://github.com/plotly/choreographer" +Repository = "https://github.com/geopozo/logistro" + +[dependency-groups] dev = [ "pytest", "pytest-asyncio", @@ -35,9 +40,48 @@ dev = [ "numpy" ] +# uv doens't allow dependency groups to have separate python requirements +# it resolves everything all at once +# this group we need to require higher python +# and only resolve if explicitly asked for + +#docs = [ +# "mkquixote @ git+ssh://git@github.com/geopozo/mkquixote", +# "mkdocs>=1.6.1", +# "mkdocs-material>=9.5.49", +#] + [project.scripts] -choreo_diagnose = "choreographer.cli_utils:diagnose" -choreo_get_browser = "choreographer.cli_utils:get_browser_cli" +choreo_diagnose = "choreographer._cli_utils_no_qa:diagnose" +choreo_get_browser = "choreographer._cli_utils:get_browser_cli" + +[tool.ruff.lint] +select = ["ALL"] +ignore = [ + "ANN", # no types + "EM", # allow strings in raise(), despite python being ugly about it + "TRY003", # allow long error messages inside raise() + "D203", # No blank before class docstring (D211 = require blank line) + "D212", # Commit message style docstring is D213, ignore D212 + "COM812", # manual says linter rule conflicts with formatter + "ISC001", # manual says litner rule conflicts with formatter + "RET504", # Allow else if unnecessary because more readable + "RET505", # Allow else if unnecessary because more readable + "RET506", # Allow else if unnecessary because more readable + "RET507", # Allow else if unnecessary because more readable + "RET508", # Allow else if unnecessary because more readable + "RUF012", # We don't do typing, so no typing + "SIM105", # Too opionated (try-except-pass) + "T201", # no print, remove after logistro TODO + "PT003", # scope="function" implied but I like readability + ] + +[tool.ruff.lint.per-file-ignores] +"tests/*" = [ + "D", # ignore docstring errors + "S101", # allow assert + "INP001", # no need for __init__ in test directories + ] # Format breaks this anyway # [tool.ruff.lint] @@ -45,6 +89,7 @@ choreo_get_browser = "choreographer.cli_utils:get_browser_cli" [tool.pytest.ini_options] asyncio_default_fixture_loop_scope = "function" +log_cli = true [tool.poe.tasks] _test_proc = "pytest -W error -n auto -v -rfE --capture=fd tests/test_process.py" diff --git a/tests/conftest.py b/tests/conftest.py index 6d9ea9da..3b54b349 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -13,6 +13,7 @@ ##### Parameterized Arguments # Are used to re-run tests under different conditions +VERBOSITY_FOR_DEBUG = 3 @pytest.fixture(params=[True, False], ids=["enable_sandbox", ""]) @@ -40,7 +41,8 @@ def debug_browser(request): return request.param -# --headless is the default flag for most tests, but you can set --no-headless if you want to watch +# --headless is the default flag for most tests, +# but you can set --no-headless if you want to watch def pytest_addoption(parser): parser.addoption("--headless", action="store_true", dest="headless", default=True) parser.addoption("--no-headless", dest="headless", action="store_false") @@ -50,7 +52,7 @@ def pytest_addoption(parser): @pytest_asyncio.fixture(scope="function", loop_scope="function") async def browser(request): headless = request.config.getoption("--headless") - debug = request.config.get_verbosity() > 2 + debug = request.config.get_verbosity() >= VERBOSITY_FOR_DEBUG debug_browser = None if debug else False browser = await choreo.Browser( headless=headless, @@ -92,7 +94,8 @@ async def wrapped_test_fn(*args, **kwargs): ) except TimeoutError: pytest.fail( - f"Test {item.name} failed a timeout. This can be extended, but shouldn't be. See conftest.py.", + f"Test {item.name} failed a timeout. " + "This can be extended, but shouldn't be. See conftest.py.", ) item.obj = wrapped_test_fn @@ -111,11 +114,12 @@ def pytest_configure(): # buffering mechanics @pytest.fixture(scope="function") def capteesys(request): - from _pytest import capture import warnings + from _pytest import capture + if hasattr(capture, "capteesys"): - warnings.warn( + warnings.warn( # noqa: B028 ( "You are using a polyfill for capteesys, but this" " version of pytest supports it natively- you may" @@ -143,9 +147,9 @@ def _inject_start(): ) self._capture.start_capturing() - capture_fixture._start = _inject_start + capture_fixture._start = _inject_start # noqa: SLF001 private member hack capman.set_fixture(capture_fixture) - capture_fixture._start() + capture_fixture._start() # noqa: SLF001 private member hack yield capture_fixture capture_fixture.close() capman.unset_fixture() diff --git a/tests/test_browser.py b/tests/test_browser.py index 90f29e2d..7dcb3d8a 100644 --- a/tests/test_browser.py +++ b/tests/test_browser.py @@ -8,7 +8,7 @@ @pytest.mark.asyncio async def test_create_and_close_tab(browser): tab = await browser.create_tab("") - assert isinstance(tab, choreo.tab.Tab) + assert isinstance(tab, choreo.Tab) assert tab.target_id in browser.tabs await browser.close_tab(tab) assert tab.target_id not in browser.tabs @@ -18,7 +18,7 @@ async def test_create_and_close_tab(browser): async def test_create_and_close_session(browser): with pytest.warns(choreo.protocol.ExperimentalFeatureWarning): session = await browser.create_session() - assert isinstance(session, choreo.session.Session) + assert isinstance(session, choreo.protocol.Session) assert session.session_id in browser.sessions await browser.close_session(session) assert session.session_id not in browser.sessions @@ -30,7 +30,7 @@ async def test_create_and_close_session(browser): async def test_browser_write_json(browser): # Test valid request with correct id and method response = await browser.write_json({"id": 0, "method": "Target.getTargets"}) - assert "result" in response and "targetInfos" in response["result"] + assert "result" in response and "targetInfos" in response["result"] # noqa: PT018 I like this assertion # Test invalid method name should return error response = await browser.write_json({"id": 2, "method": "dkadklqwmd"}) @@ -79,7 +79,7 @@ async def test_browser_write_json(browser): async def test_browser_send_command(browser): # Test valid request with correct command response = await browser.send_command(command="Target.getTargets") - assert "result" in response and "targetInfos" in response["result"] + assert "result" in response and "targetInfos" in response["result"] # noqa: PT018 I like this assertion # Test invalid method name should return error response = await browser.send_command(command="dkadklqwmd") @@ -102,7 +102,7 @@ async def test_populate_targets(browser): @pytest.mark.asyncio async def test_get_tab(browser): await browser.create_tab("") - assert browser.get_tab() == list(browser.tabs.values())[0] + assert browser.get_tab() == next(iter(browser.tabs.values())) await browser.create_tab() await browser.create_tab("") - assert browser.get_tab() == list(browser.tabs.values())[0] + assert browser.get_tab() == next(iter(browser.tabs.values())) diff --git a/tests/test_placeholder.py b/tests/test_placeholder.py index 7a727ee4..a985b598 100644 --- a/tests/test_placeholder.py +++ b/tests/test_placeholder.py @@ -7,7 +7,7 @@ async def test_placeholder(browser, capteesys): - print("") + print() assert "result" in await browser.send_command("Target.getTargets") out, err = capteesys.readouterr() assert out == "\n", f"stdout should be silent! -{out}-{err}-" diff --git a/tests/test_process.py b/tests/test_process.py index 2696b03d..7a68db2b 100644 --- a/tests/test_process.py +++ b/tests/test_process.py @@ -9,6 +9,8 @@ import choreographer as choreo +# ruff: noqa: PLR0913 (lots of parameters) + @pytest.mark.asyncio(loop_scope="function") async def test_context(capteesys, headless, debug, debug_browser, sandbox, gpu): @@ -23,15 +25,16 @@ async def test_context(capteesys, headless, debug, debug_browser, sandbox, gpu): timeout(pytest.default_timeout), ): if sandbox and "ubuntu" in platform.version().lower(): - pytest.skip("Ubuntu doesn't support sandbox") + pytest.skip("Ubuntu doesn't support sandbox unless installed from snap.") temp_dir = browser.tmp_dir response = await browser.send_command(command="Target.getTargets") - assert "result" in response and "targetInfos" in response["result"] + assert "result" in response and "targetInfos" in response["result"] # noqa: PT018 combined assert assert len(response["result"]["targetInfos"]) != 0 - assert isinstance(browser.get_tab(), choreo.tab.Tab) + assert isinstance(browser.get_tab(), choreo.Tab) assert len(browser.get_tab().sessions) == 1 - print("") # this makes sure that capturing is working - # stdout should be empty, but not because capsys is broken, because nothing was print + print() # this makes sure that capturing is working + # stdout should be empty, but not + # because capsys is broken, because nothing was print assert capteesys.readouterr().out == "\n", "stdout should be silent!" # let asyncio do some cleaning up if it wants, may prevent warnings await asyncio.sleep(0) @@ -44,20 +47,22 @@ async def test_no_context(capteesys, headless, debug, debug_browser, sandbox, gp headless=headless, debug=debug, debug_browser=None if debug_browser else False, - enable_sandbox=False, + enable_sandbox=sandbox, enable_gpu=gpu, ) + if sandbox and "ubuntu" in platform.version().lower(): + pytest.skip("Ubuntu doesn't support sandbox unless installed from snap.") temp_dir = browser.tmp_dir try: async with timeout(pytest.default_timeout): response = await browser.send_command(command="Target.getTargets") - assert "result" in response and "targetInfos" in response["result"] + assert "result" in response and "targetInfos" in response["result"] # noqa: PT018 combined assert assert len(response["result"]["targetInfos"]) != 0 - assert isinstance(browser.get_tab(), choreo.tab.Tab) + assert isinstance(browser.get_tab(), choreo.Tab) assert len(browser.get_tab().sessions) == 1 finally: await browser.close() - print("") # this make sure that capturing is working + print() # this make sure that capturing is working assert capteesys.readouterr().out == "\n", "stdout should be silent!" await asyncio.sleep(0) assert not temp_dir.exists @@ -74,8 +79,9 @@ async def test_watchdog(capteesys, headless, debug, debug_browser): ) if platform.system() == "Windows": - subprocess.call( - ["taskkill", "/F", "/T", "/PID", str(browser.subprocess.pid)], + # Blocking process here because it ensures the kill will occur rn + subprocess.call( # noqa: S603, ASYNC221 sanitize input, blocking process + ["taskkill", "/F", "/T", "/PID", str(browser.subprocess.pid)], # noqa: S607 stderr=subprocess.DEVNULL, stdout=subprocess.DEVNULL, ) @@ -84,9 +90,11 @@ async def test_watchdog(capteesys, headless, debug, debug_browser): await asyncio.sleep(1.5) with pytest.raises( - (choreo.browser.PipeClosedError, choreo.browser.BrowserClosedError), + (choreo.PipeClosedError, choreo.BrowserClosedError), ): await browser.send_command(command="Target.getTargets") await browser.close() await asyncio.sleep(0) + print() + assert capteesys.readouterr().out == "\n", "stdout should be silent!" diff --git a/tests/test_serializer.py b/tests/test_serializer.py index b75e922e..8bea73b0 100644 --- a/tests/test_serializer.py +++ b/tests/test_serializer.py @@ -1,11 +1,13 @@ -from datetime import datetime +from datetime import datetime, timezone import numpy as np -from choreographer.pipe import Pipe +from choreographer._pipe import Pipe -data = [1, 2.00, 3, float("nan"), float("inf"), float("-inf"), datetime(1970, 1, 1)] -expected_message = b'[1, 2.0, 3, null, null, null, "1970-01-01T00:00:00"]\x00' +_timestamp = datetime(1970, 1, 1, tzinfo=timezone.utc) + +data = [1, 2.00, 3, float("nan"), float("inf"), float("-inf"), _timestamp] +expected_message = b'[1, 2.0, 3, null, null, null, "1970-01-01T00:00:00+00:00"]\x00' converted_type = [int, float, int, type(None), type(None), type(None), str] @@ -21,7 +23,3 @@ def test_de_serialize(): obj_np = pipe.deserialize(message_np[:-1]) # split out \0 for o, t in zip(obj_np, converted_type): assert isinstance(o, t) - - -# TODO: Not sure if this all the data we have to worry about: -# we should also run through mocks and have it print data. diff --git a/tests/test_session.py b/tests/test_session.py index 84f85cf9..48487101 100644 --- a/tests/test_session.py +++ b/tests/test_session.py @@ -19,7 +19,7 @@ async def session(browser): async def test_session_send_command(session): # Test valid request with correct command response = await session.send_command(command="Target.getTargets") - assert "result" in response and "targetInfos" in response["result"] + assert "result" in response and "targetInfos" in response["result"] # noqa: PT018 I like this assertion # Test invalid method name should return error response = await session.send_command(command="dkadklqwmd") diff --git a/tests/test_tab.py b/tests/test_tab.py index 388a6725..83db36dc 100644 --- a/tests/test_tab.py +++ b/tests/test_tab.py @@ -12,8 +12,12 @@ def check_response_dictionary(response_received, response_expected): if isinstance(v, dict): check_response_dictionary(v, response_expected[k]) assert ( - k in response_received and response_received[k] == v - ), "Expected: {response_expected}\nReceived: {response_received}" + response_received.get( + k, + float("NaN"), + ) + == v + ), f"Expected: {response_expected}\nReceived: {response_received}" @pytest_asyncio.fixture(scope="function", loop_scope="function") @@ -22,14 +26,14 @@ async def tab(browser): yield tab_browser try: await browser.close_tab(tab_browser) - except choreo.browser.BrowserClosedError: + except choreo.BrowserClosedError: pass @pytest.mark.asyncio async def test_create_and_close_session(tab): session = await tab.create_session() - assert isinstance(session, choreo.session.Session) + assert isinstance(session, choreo.protocol.Session) await tab.close_session(session) assert session.session_id not in tab.sessions @@ -54,7 +58,7 @@ async def test_tab_send_command(tab): @pytest.mark.asyncio async def test_subscribe_once(tab): subscription_result = tab.subscribe_once("Page.*") - assert "Page.*" in list(tab.sessions.values())[0].subscriptions_futures + assert "Page.*" in next(iter(tab.sessions.values())).subscriptions_futures _ = await tab.send_command("Page.enable") _ = await tab.send_command("Page.reload") _ = await subscription_result @@ -66,12 +70,12 @@ async def test_subscribe_and_unsubscribe(tab): counter = 0 old_counter = counter - async def count_event(r): + async def count_event(_r): nonlocal counter counter += 1 tab.subscribe("Page.*", count_event) - assert "Page.*" in list(tab.sessions.values())[0].subscriptions + assert "Page.*" in next(iter(tab.sessions.values())).subscriptions await tab.send_command("Page.enable") await tab.send_command("Page.reload") await asyncio.sleep(0.5) @@ -80,7 +84,7 @@ async def count_event(r): tab.unsubscribe("Page.*") old_counter = counter - assert "Page.*" not in list(tab.sessions.values())[0].subscriptions + assert "Page.*" not in next(iter(tab.sessions.values())).subscriptions await tab.send_command("Page.enable") await tab.send_command("Page.reload") assert old_counter == counter diff --git a/uv.lock b/uv.lock index dab919f8..03e1d761 100644 --- a/uv.lock +++ b/uv.lock @@ -16,13 +16,14 @@ wheels = [ [[package]] name = "choreographer" -version = "1.0.0a0.post0+git.03c25504.dirty" +version = "1.0.0a0.post51+git.2f98811d.dirty" source = { editable = "." } dependencies = [ + { name = "logistro" }, { name = "simplejson" }, ] -[package.optional-dependencies] +[package.dev-dependencies] dev = [ { name = "async-timeout" }, { name = "numpy", version = "2.0.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" }, @@ -35,15 +36,20 @@ dev = [ [package.metadata] requires-dist = [ - { name = "async-timeout", marker = "extra == 'dev'" }, - { name = "numpy", marker = "extra == 'dev'" }, - { name = "poethepoet", marker = "extra == 'dev'", specifier = ">=0.31.1" }, - { name = "pytest", marker = "extra == 'dev'" }, - { name = "pytest-asyncio", marker = "extra == 'dev'" }, - { name = "pytest-xdist", marker = "extra == 'dev'" }, + { name = "logistro", specifier = ">=1.0.2" }, { name = "simplejson" }, ] +[package.metadata.requires-dev] +dev = [ + { name = "async-timeout" }, + { name = "numpy" }, + { name = "poethepoet", specifier = ">=0.31.1" }, + { name = "pytest" }, + { name = "pytest-asyncio" }, + { name = "pytest-xdist" }, +] + [[package]] name = "colorama" version = "0.4.6" @@ -80,6 +86,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ef/a6/62565a6e1cf69e10f5727360368e451d4b7f58beeac6173dc9db836a5b46/iniconfig-2.0.0-py3-none-any.whl", hash = "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374", size = 5892 }, ] +[[package]] +name = "logistro" +version = "1.0.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/c4/37/616339aa4c47c8316caa74c6be0e42a7db2e5f11bb4df7b1589e2347c8d2/logistro-1.0.2.tar.gz", hash = "sha256:0d437c0f4fd9abef6eca0a28d85c171fc14f4f17aa1ce0f2f91d2e93f82a5c18", size = 5973 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/3d/fb/a0d753951886ace90e2a92f55f119450e0dedabe81733c9f7655c9009d1c/logistro-1.0.2-py3-none-any.whl", hash = "sha256:f3435161b12a7f6a463aa23885c076ed2d2876255db9b5af3e302bdba6e2ed02", size = 5343 }, +] + [[package]] name = "numpy" version = "2.0.2"