From dbb6cc6eddf0e41583f4be63ab35eab784749eb2 Mon Sep 17 00:00:00 2001 From: Ross Wollman Date: Wed, 13 Apr 2022 13:06:06 -0700 Subject: [PATCH] feat: large file uploads (#1255) Ports: - https://github.com/microsoft/playwright/commit/a8d80621b22b62fcae5135860fd666f05010e927 (feat(chromium): large file uploads (#12860)) - https://github.com/microsoft/playwright/commit/b0103566c980b35fbc5c0b10f253eb8442f08062 (fix(addInitScript): tolerate trailing comments (#13275)) Fixes https://github.com/microsoft/playwright-python/issues/1211 --- playwright/_impl/_element_handle.py | 18 +++- playwright/_impl/_file_chooser.py | 28 ----- playwright/_impl/_frame.py | 14 ++- playwright/_impl/_object_factory.py | 3 + playwright/_impl/_set_input_files_helpers.py | 101 +++++++++++++++++++ playwright/_impl/_writable_stream.py | 42 ++++++++ setup.py | 2 +- tests/assets/input/fileupload.html | 4 +- tests/async/test_add_init_script.py | 7 ++ tests/async/test_browsertype_connect.py | 57 +++++++++++ tests/async/test_input.py | 44 ++++++++ tests/sync/test_add_init_script.py | 7 ++ 12 files changed, 290 insertions(+), 37 deletions(-) create mode 100644 playwright/_impl/_set_input_files_helpers.py create mode 100644 playwright/_impl/_writable_stream.py diff --git a/playwright/_impl/_element_handle.py b/playwright/_impl/_element_handle.py index 23aec8994..5803413dc 100644 --- a/playwright/_impl/_element_handle.py +++ b/playwright/_impl/_element_handle.py @@ -19,8 +19,8 @@ from playwright._impl._api_structures import FilePayload, FloatRect, Position from playwright._impl._connection import ChannelOwner, from_nullable_channel -from playwright._impl._file_chooser import normalize_file_payloads from playwright._impl._helper import ( + Error, KeyboardModifier, MouseButton, async_writefile, @@ -33,6 +33,7 @@ parse_result, serialize_argument, ) +from playwright._impl._set_input_files_helpers import convert_input_files if sys.version_info >= (3, 8): # pragma: no cover from typing import Literal @@ -190,8 +191,19 @@ async def set_input_files( noWaitAfter: bool = None, ) -> None: params = locals_to_params(locals()) - params["files"] = await normalize_file_payloads(files) - await self._channel.send("setInputFiles", params) + frame = await self.owner_frame() + if not frame: + raise Error("Cannot set input files to detached element") + converted = await convert_input_files(files, frame.page.context) + if converted["files"] is not None: + await self._channel.send( + "setInputFiles", {**params, "files": converted["files"]} + ) + else: + await self._channel.send( + "setInputFilePaths", + locals_to_params({**params, **converted, "files": None}), + ) async def focus(self) -> None: await self._channel.send("focus") diff --git a/playwright/_impl/_file_chooser.py b/playwright/_impl/_file_chooser.py index 7add73e07..a15050fc0 100644 --- a/playwright/_impl/_file_chooser.py +++ b/playwright/_impl/_file_chooser.py @@ -12,13 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. -import base64 -import os from pathlib import Path from typing import TYPE_CHECKING, List, Union from playwright._impl._api_structures import FilePayload -from playwright._impl._helper import async_readfile if TYPE_CHECKING: # pragma: no cover from playwright._impl._element_handle import ElementHandle @@ -56,28 +53,3 @@ async def set_files( noWaitAfter: bool = None, ) -> None: await self._element_handle.set_input_files(files, timeout, noWaitAfter) - - -async def normalize_file_payloads( - files: Union[str, Path, FilePayload, List[Union[str, Path]], List[FilePayload]] -) -> List: - file_list = files if isinstance(files, list) else [files] - file_payloads: List = [] - for item in file_list: - if isinstance(item, (str, Path)): - file_payloads.append( - { - "name": os.path.basename(item), - "buffer": base64.b64encode(await async_readfile(item)).decode(), - } - ) - else: - file_payloads.append( - { - "name": item["name"], - "mimeType": item["mimeType"], - "buffer": base64.b64encode(item["buffer"]).decode(), - } - ) - - return file_payloads diff --git a/playwright/_impl/_frame.py b/playwright/_impl/_frame.py index 9d6b272ce..471ea7dcb 100644 --- a/playwright/_impl/_frame.py +++ b/playwright/_impl/_frame.py @@ -28,7 +28,6 @@ ) from playwright._impl._element_handle import ElementHandle, convert_select_option_values from playwright._impl._event_context_manager import EventContextManagerImpl -from playwright._impl._file_chooser import normalize_file_payloads from playwright._impl._helper import ( DocumentLoadState, FrameNavigatedEvent, @@ -48,6 +47,7 @@ ) from playwright._impl._locator import FrameLocator, Locator from playwright._impl._network import Response +from playwright._impl._set_input_files_helpers import convert_input_files from playwright._impl._wait_helper import WaitHelper if sys.version_info >= (3, 8): # pragma: no cover @@ -598,8 +598,16 @@ async def set_input_files( noWaitAfter: bool = None, ) -> None: params = locals_to_params(locals()) - params["files"] = await normalize_file_payloads(files) - await self._channel.send("setInputFiles", params) + converted = await convert_input_files(files, self.page.context) + if converted["files"] is not None: + await self._channel.send( + "setInputFiles", {**params, "files": converted["files"]} + ) + else: + await self._channel.send( + "setInputFilePaths", + locals_to_params({**params, **converted, "files": None}), + ) async def type( self, diff --git a/playwright/_impl/_object_factory.py b/playwright/_impl/_object_factory.py index a926d71ca..e04d1adec 100644 --- a/playwright/_impl/_object_factory.py +++ b/playwright/_impl/_object_factory.py @@ -33,6 +33,7 @@ from playwright._impl._selectors import SelectorsOwner from playwright._impl._stream import Stream from playwright._impl._tracing import Tracing +from playwright._impl._writable_stream import WritableStream class DummyObject(ChannelOwner): @@ -89,6 +90,8 @@ def create_remote_object( return WebSocket(parent, type, guid, initializer) if type == "Worker": return Worker(parent, type, guid, initializer) + if type == "WritableStream": + return WritableStream(parent, type, guid, initializer) if type == "Selectors": return SelectorsOwner(parent, type, guid, initializer) return DummyObject(parent, type, guid, initializer) diff --git a/playwright/_impl/_set_input_files_helpers.py b/playwright/_impl/_set_input_files_helpers.py new file mode 100644 index 000000000..a03a41e91 --- /dev/null +++ b/playwright/_impl/_set_input_files_helpers.py @@ -0,0 +1,101 @@ +import base64 +import os +import sys +from pathlib import Path +from typing import TYPE_CHECKING, List, Optional, Union + +if sys.version_info >= (3, 8): # pragma: no cover + from typing import TypedDict +else: # pragma: no cover + from typing_extensions import TypedDict + +from playwright._impl._connection import Channel, from_channel +from playwright._impl._helper import Error, async_readfile +from playwright._impl._writable_stream import WritableStream + +if TYPE_CHECKING: # pragma: no cover + from playwright._impl._browser_context import BrowserContext + +from playwright._impl._api_structures import FilePayload + +SIZE_LIMIT_IN_BYTES = 50 * 1024 * 1024 + + +class InputFilesList(TypedDict): + streams: Optional[List[Channel]] + localPaths: Optional[List[str]] + files: Optional[List[FilePayload]] + + +async def convert_input_files( + files: Union[str, Path, FilePayload, List[Union[str, Path]], List[FilePayload]], + context: "BrowserContext", +) -> InputFilesList: + file_list = files if isinstance(files, list) else [files] + + has_large_buffer = any( + [ + len(f.get("buffer", "")) > SIZE_LIMIT_IN_BYTES + for f in file_list + if not isinstance(f, (str, Path)) + ] + ) + if has_large_buffer: + raise Error( + "Cannot set buffer larger than 50Mb, please write it to a file and pass its path instead." + ) + + has_large_file = any( + [ + os.stat(f).st_size > SIZE_LIMIT_IN_BYTES + for f in file_list + if isinstance(f, (str, Path)) + ] + ) + if has_large_file: + if context._channel._connection.is_remote: + streams = [] + for file in file_list: + assert isinstance(file, (str, Path)) + stream: WritableStream = from_channel( + await context._channel.send( + "createTempFile", {"name": os.path.basename(file)} + ) + ) + await stream.copy(file) + streams.append(stream._channel) + return InputFilesList(streams=streams, localPaths=None, files=None) + local_paths = [] + for p in file_list: + assert isinstance(p, (str, Path)) + local_paths.append(str(Path(p).absolute().resolve())) + return InputFilesList(streams=None, localPaths=local_paths, files=None) + + return InputFilesList( + streams=None, localPaths=None, files=await _normalize_file_payloads(files) + ) + + +async def _normalize_file_payloads( + files: Union[str, Path, FilePayload, List[Union[str, Path]], List[FilePayload]] +) -> List: + file_list = files if isinstance(files, list) else [files] + file_payloads: List = [] + for item in file_list: + if isinstance(item, (str, Path)): + file_payloads.append( + { + "name": os.path.basename(item), + "buffer": base64.b64encode(await async_readfile(item)).decode(), + } + ) + else: + file_payloads.append( + { + "name": item["name"], + "mimeType": item["mimeType"], + "buffer": base64.b64encode(item["buffer"]).decode(), + } + ) + + return file_payloads diff --git a/playwright/_impl/_writable_stream.py b/playwright/_impl/_writable_stream.py new file mode 100644 index 000000000..702adf153 --- /dev/null +++ b/playwright/_impl/_writable_stream.py @@ -0,0 +1,42 @@ +# Copyright (c) Microsoft Corporation. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import base64 +import os +from pathlib import Path +from typing import Dict, Union + +from playwright._impl._connection import ChannelOwner + +# COPY_BUFSIZE is taken from shutil.py in the standard library +_WINDOWS = os.name == "nt" +COPY_BUFSIZE = 1024 * 1024 if _WINDOWS else 64 * 1024 + + +class WritableStream(ChannelOwner): + def __init__( + self, parent: ChannelOwner, type: str, guid: str, initializer: Dict + ) -> None: + super().__init__(parent, type, guid, initializer) + + async def copy(self, path: Union[str, Path]) -> None: + with open(path, "rb") as f: + while True: + data = f.read(COPY_BUFSIZE) + if not data: + break + await self._channel.send( + "write", {"binary": base64.b64encode(data).decode()} + ) + await self._channel.send("close") diff --git a/setup.py b/setup.py index d598e5d69..5d8e764b5 100644 --- a/setup.py +++ b/setup.py @@ -30,7 +30,7 @@ InWheel = None from wheel.bdist_wheel import bdist_wheel as BDistWheelCommand -driver_version = "1.21.0-beta-1649712128000" +driver_version = "1.21.0" def extractall(zip: zipfile.ZipFile, path: str) -> None: diff --git a/tests/assets/input/fileupload.html b/tests/assets/input/fileupload.html index 57e90d50a..771dc4c68 100644 --- a/tests/assets/input/fileupload.html +++ b/tests/assets/input/fileupload.html @@ -4,8 +4,8 @@ File upload test -
- + +
diff --git a/tests/async/test_add_init_script.py b/tests/async/test_add_init_script.py index 7317e7523..515891a88 100644 --- a/tests/async/test_add_init_script.py +++ b/tests/async/test_add_init_script.py @@ -74,3 +74,10 @@ async def test_add_init_script_support_multiple_scripts(page): await page.goto("data:text/html,") assert await page.evaluate("window.script1") == 1 assert await page.evaluate("window.script2") == 2 + + +async def test_should_work_with_trailing_comments(page): + await page.add_init_script("// comment") + await page.add_init_script("window.secret = 42;") + await page.goto("data:text/html,") + assert await page.evaluate("secret") == 42 diff --git a/tests/async/test_browsertype_connect.py b/tests/async/test_browsertype_connect.py index 73ab81c0b..9dce38780 100644 --- a/tests/async/test_browsertype_connect.py +++ b/tests/async/test_browsertype_connect.py @@ -12,6 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +import asyncio +import re from typing import Callable import pytest @@ -241,3 +243,58 @@ async def handle_request(route: Route) -> None: assert await response.json() == {"foo": "bar"} remote.kill() + + +@pytest.mark.only_browser("chromium") +async def test_should_upload_large_file( + browser_type: BrowserType, + launch_server: Callable[[], RemoteServer], + playwright: Playwright, + server: Server, + tmp_path, +): + remote = launch_server() + + browser = await browser_type.connect(remote.ws_endpoint) + context = await browser.new_context() + page = await context.new_page() + + await page.goto(server.PREFIX + "/input/fileupload.html") + large_file_path = tmp_path / "200MB.zip" + data = b"A" * 1024 + with large_file_path.open("wb") as f: + for i in range(0, 200 * 1024 * 1024, len(data)): + f.write(data) + input = page.locator('input[type="file"]') + events = await input.evaluate_handle( + """ + e => { + const events = []; + e.addEventListener('input', () => events.push('input')); + e.addEventListener('change', () => events.push('change')); + return events; + } + """ + ) + + await input.set_input_files(large_file_path) + assert await input.evaluate("e => e.files[0].name") == "200MB.zip" + assert await events.evaluate("e => e") == ["input", "change"] + + [request, _] = await asyncio.gather( + server.wait_for_request("/upload"), + page.click("input[type=submit]"), + ) + + contents = request.args[b"file1"][0] + assert len(contents) == 200 * 1024 * 1024 + assert contents[:1024] == data + # flake8: noqa: E203 + assert contents[len(contents) - 1024 :] == data + match = re.search( + rb'^.*Content-Disposition: form-data; name="(?P.*)"; filename="(?P.*)".*$', + request.post_body, + re.MULTILINE, + ) + assert match.group("name") == b"file1" + assert match.group("filename") == b"200MB.zip" diff --git a/tests/async/test_input.py b/tests/async/test_input.py index 3ff92ebe6..77e0ca1c8 100644 --- a/tests/async/test_input.py +++ b/tests/async/test_input.py @@ -14,6 +14,7 @@ import asyncio import os +import re import pytest @@ -284,3 +285,46 @@ async def _listen_for_wheel_events(page: Page, selector: str) -> None: """, selector, ) + + +@pytest.mark.only_browser("chromium") +async def test_should_upload_large_file(page, server, tmp_path): + await page.goto(server.PREFIX + "/input/fileupload.html") + large_file_path = tmp_path / "200MB.zip" + data = b"A" * 1024 + with large_file_path.open("wb") as f: + for i in range(0, 200 * 1024 * 1024, len(data)): + f.write(data) + input = page.locator('input[type="file"]') + events = await input.evaluate_handle( + """ + e => { + const events = []; + e.addEventListener('input', () => events.push('input')); + e.addEventListener('change', () => events.push('change')); + return events; + } + """ + ) + + await input.set_input_files(large_file_path) + assert await input.evaluate("e => e.files[0].name") == "200MB.zip" + assert await events.evaluate("e => e") == ["input", "change"] + + [request, _] = await asyncio.gather( + server.wait_for_request("/upload"), + page.click("input[type=submit]"), + ) + + contents = request.args[b"file1"][0] + assert len(contents) == 200 * 1024 * 1024 + assert contents[:1024] == data + # flake8: noqa: E203 + assert contents[len(contents) - 1024 :] == data + match = re.search( + rb'^.*Content-Disposition: form-data; name="(?P.*)"; filename="(?P.*)".*$', + request.post_body, + re.MULTILINE, + ) + assert match.group("name") == b"file1" + assert match.group("filename") == b"200MB.zip" diff --git a/tests/sync/test_add_init_script.py b/tests/sync/test_add_init_script.py index 5fc626a18..039862063 100644 --- a/tests/sync/test_add_init_script.py +++ b/tests/sync/test_add_init_script.py @@ -78,3 +78,10 @@ def test_add_init_script_support_multiple_scripts(page: Page) -> None: page.goto("data:text/html,") assert page.evaluate("window.script1") == 1 assert page.evaluate("window.script2") == 2 + + +def test_should_work_with_trailing_comments(page: Page) -> None: + page.add_init_script("// comment") + page.add_init_script("window.secret = 42;") + page.goto("data:text/html,") + assert page.evaluate("secret") == 42