From f9dc9afa82c1e0767335bea020d6173abad88668 Mon Sep 17 00:00:00 2001 From: Iacopo Colonnelli Date: Wed, 3 Aug 2022 13:49:43 +0200 Subject: [PATCH] Import dependencies from cwl_utils after code refactoring (#1686) * Removed sandboxjs and expression from cwltool * req cwl-utils package; type fixes Co-authored-by: "Bruno P. Kinoshita" Co-authored-by: Michael R. Crusoe --- MANIFEST.in | 3 - cwltool.Dockerfile | 4 +- cwltool/builder.py | 2 +- cwltool/cwlNodeEngine.js | 21 -- cwltool/cwlNodeEngineJSConsole.js | 32 --- cwltool/cwlNodeEngineWithContext.js | 37 --- cwltool/expression.py | 421 ---------------------------- cwltool/process.py | 4 +- cwltool/sandboxjs.py | 402 -------------------------- cwltool/validate_js.py | 6 +- cwltool/workflow_job.py | 2 +- mypy-requirements.txt | 2 + requirements.txt | 1 + setup.py | 3 +- tests/test_examples.py | 23 +- tests/test_js_sandbox.py | 21 +- tests/test_validate_js.py | 2 +- 17 files changed, 37 insertions(+), 949 deletions(-) delete mode 100755 cwltool/cwlNodeEngine.js delete mode 100644 cwltool/cwlNodeEngineJSConsole.js delete mode 100644 cwltool/cwlNodeEngineWithContext.js delete mode 100644 cwltool/expression.py delete mode 100644 cwltool/sandboxjs.py diff --git a/MANIFEST.in b/MANIFEST.in index 08a85b4ec..5ad845cc6 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -52,9 +52,6 @@ include cwltool/schemas/v1.2/*.yml include cwltool/schemas/v1.2/*.md include cwltool/schemas/v1.2/salad/schema_salad/metaschema/*.yml include cwltool/schemas/v1.2/salad/schema_salad/metaschema/*.md -include cwltool/cwlNodeEngine.js -include cwltool/cwlNodeEngineJSConsole.js -include cwltool/cwlNodeEngineWithContext.js include cwltool/extensions.yml include cwltool/extensions-v1.1.yml include cwltool/jshint/jshint_wrapper.js diff --git a/cwltool.Dockerfile b/cwltool.Dockerfile index 18fbc8523..5976737ee 100644 --- a/cwltool.Dockerfile +++ b/cwltool.Dockerfile @@ -5,9 +5,7 @@ RUN apk add --no-cache git gcc python3-dev libxml2-dev libxslt-dev libc-dev linu WORKDIR /cwltool COPY . . -RUN pip install toml -rmypy-requirements.txt "$(grep schema.salad requirements.txt)" -# schema-salad is needed to be installed (this time as pure Python) for -# cwltool + mypyc +RUN pip install toml -rmypy-requirements.txt RUN CWLTOOL_USE_MYPYC=1 MYPYPATH=mypy-stubs pip wheel --no-binary schema-salad --wheel-dir=/wheels .[deps] RUN rm /wheels/schema_salad* RUN pip install black diff --git a/cwltool/builder.py b/cwltool/builder.py index a422d9769..f86e214ea 100644 --- a/cwltool/builder.py +++ b/cwltool/builder.py @@ -15,6 +15,7 @@ cast, ) +from cwl_utils import expression from rdflib import Graph, URIRef from rdflib.namespace import OWL, RDFS from ruamel.yaml.comments import CommentedMap @@ -25,7 +26,6 @@ from schema_salad.validate import validate from typing_extensions import TYPE_CHECKING, Type # pylint: disable=unused-import -from . import expression from .errors import WorkflowException from .loghandler import _logger from .mutation import MutationManager diff --git a/cwltool/cwlNodeEngine.js b/cwltool/cwlNodeEngine.js deleted file mode 100755 index b3b6ff8aa..000000000 --- a/cwltool/cwlNodeEngine.js +++ /dev/null @@ -1,21 +0,0 @@ -"use strict"; -process.stdin.setEncoding("utf8"); -var incoming = ""; -process.stdin.on("data", function(chunk) { - incoming += chunk; - var i = incoming.indexOf("\n"); - if (i > -1) { - try{ - var fn = JSON.parse(incoming.substr(0, i)); - incoming = incoming.substr(i+1); - process.stdout.write(JSON.stringify(require("vm").runInNewContext(fn, {})) + "\n"); - } - catch(e){ - console.error(e) - } - /*strings to indicate the process has finished*/ - console.log("r1cepzbhUTxtykz5XTC4"); - console.error("r1cepzbhUTxtykz5XTC4"); - } -}); -process.stdin.on("end", process.exit); diff --git a/cwltool/cwlNodeEngineJSConsole.js b/cwltool/cwlNodeEngineJSConsole.js deleted file mode 100644 index 2ebe8bd0b..000000000 --- a/cwltool/cwlNodeEngineJSConsole.js +++ /dev/null @@ -1,32 +0,0 @@ -"use strict"; -function js_console_log(){ - console.error("[log] "+require("util").format.apply(this, arguments).split("\n").join("\n[log] ")); -} -function js_console_err(){ - console.error("[err] "+require("util").format.apply(this, arguments).split("\n").join("\n[err] ")); -} -process.stdin.setEncoding("utf8"); -var incoming = ""; -process.stdin.on("data", function(chunk) { - incoming += chunk; - var i = incoming.indexOf("\n"); - if (i > -1) { - try{ - var fn = JSON.parse(incoming.substr(0, i)); - incoming = incoming.substr(i+1); - process.stdout.write(JSON.stringify(require("vm").runInNewContext(fn, { - console: { - log: js_console_log, - error: js_console_err - } - })) + "\n"); - } - catch(e){ - console.error(e) - } - /*strings to indicate the process has finished*/ - console.log("r1cepzbhUTxtykz5XTC4"); - console.error("r1cepzbhUTxtykz5XTC4"); - } -}); -process.stdin.on("end", process.exit); diff --git a/cwltool/cwlNodeEngineWithContext.js b/cwltool/cwlNodeEngineWithContext.js deleted file mode 100644 index f3d514884..000000000 --- a/cwltool/cwlNodeEngineWithContext.js +++ /dev/null @@ -1,37 +0,0 @@ -"use strict"; -process.stdin.setEncoding("utf8"); -var incoming = ""; -var firstInput = true; -var context = {}; - -process.stdin.on("data", function(chunk) { - incoming += chunk; - var i = incoming.indexOf("\n"); - while (i > -1) { - try{ - var input = incoming.substr(0, i); - incoming = incoming.substr(i+1); - var fn = JSON.parse(input); - if(firstInput){ - context = require("vm").runInNewContext(fn, {}); - } - else{ - process.stdout.write(JSON.stringify(require("vm").runInNewContext(fn, context)) + "\n"); - } - } - catch(e){ - console.error(e); - } - if(firstInput){ - firstInput = false; - } - else{ - /*strings to indicate the process has finished*/ - console.log("r1cepzbhUTxtykz5XTC4"); - console.error("r1cepzbhUTxtykz5XTC4"); - } - - i = incoming.indexOf("\n"); - } -}); -process.stdin.on("end", process.exit); diff --git a/cwltool/expression.py b/cwltool/expression.py deleted file mode 100644 index 981a0b003..000000000 --- a/cwltool/expression.py +++ /dev/null @@ -1,421 +0,0 @@ -"""Parse CWL expressions.""" - -import copy -import json -import re -from typing import ( - Any, - Dict, - List, - Mapping, - MutableMapping, - MutableSequence, - Optional, - Tuple, - Union, - cast, -) - -from schema_salad.utils import json_dumps - -from .errors import WorkflowException -from .loghandler import _logger -from .sandboxjs import JavascriptException, default_timeout, execjs -from .utils import CWLObjectType, CWLOutputType, bytes2str_in_dicts - - -def jshead(engine_config: List[str], rootvars: CWLObjectType) -> str: - # make sure all the byte strings are converted - # to str in `rootvars` dict. - - return "\n".join( - engine_config - + [f"var {k} = {json_dumps(v, indent=4)};" for k, v in rootvars.items()] - ) - - -# decode all raw strings to unicode -seg_symbol = r"""\w+""" -seg_single = r"""\['([^']|\\')+'\]""" -seg_double = r"""\["([^"]|\\")+"\]""" -seg_index = r"""\[[0-9]+\]""" -segments = rf"(\.{seg_symbol}|{seg_single}|{seg_double}|{seg_index})" -segment_re = re.compile(segments, flags=re.UNICODE) -param_str = rf"\(({seg_symbol}){segments}*\)$" -param_re = re.compile(param_str, flags=re.UNICODE) - - -class SubstitutionError(Exception): - pass - - -def scanner(scan: str) -> Optional[Tuple[int, int]]: - DEFAULT = 0 - DOLLAR = 1 - PAREN = 2 - BRACE = 3 - SINGLE_QUOTE = 4 - DOUBLE_QUOTE = 5 - BACKSLASH = 6 - - i = 0 - stack = [DEFAULT] - start = 0 - while i < len(scan): - state = stack[-1] - c = scan[i] - - if state == DEFAULT: - if c == "$": - stack.append(DOLLAR) - elif c == "\\": - stack.append(BACKSLASH) - elif state == BACKSLASH: - stack.pop() - if stack[-1] == DEFAULT: - return (i - 1, i + 1) - elif state == DOLLAR: - if c == "(": - start = i - 1 - stack.append(PAREN) - elif c == "{": - start = i - 1 - stack.append(BRACE) - else: - stack.pop() - i -= 1 - elif state == PAREN: - if c == "(": - stack.append(PAREN) - elif c == ")": - stack.pop() - if stack[-1] == DOLLAR: - return (start, i + 1) - elif c == "'": - stack.append(SINGLE_QUOTE) - elif c == '"': - stack.append(DOUBLE_QUOTE) - elif state == BRACE: - if c == "{": - stack.append(BRACE) - elif c == "}": - stack.pop() - if stack[-1] == DOLLAR: - return (start, i + 1) - elif c == "'": - stack.append(SINGLE_QUOTE) - elif c == '"': - stack.append(DOUBLE_QUOTE) - elif state == SINGLE_QUOTE: - if c == "'": - stack.pop() - elif c == "\\": - stack.append(BACKSLASH) - elif state == DOUBLE_QUOTE: - if c == '"': - stack.pop() - elif c == "\\": - stack.append(BACKSLASH) - i += 1 - - if len(stack) > 1 and not (len(stack) == 2 and stack[1] in (BACKSLASH, DOLLAR)): - raise SubstitutionError( - "Substitution error, unfinished block starting at position {}: '{}' stack was {}".format( - start, scan[start:], stack - ) - ) - return None - - -def next_seg( - parsed_string: str, remaining_string: str, current_value: CWLOutputType -) -> CWLOutputType: - if remaining_string: - m = segment_re.match(remaining_string) - if not m: - return current_value - next_segment_str = m.group(1) - - key = None # type: Optional[Union[str, int]] - if next_segment_str[0] == ".": - key = next_segment_str[1:] - elif next_segment_str[1] in ("'", '"'): - key = next_segment_str[2:-2].replace("\\'", "'").replace('\\"', '"') - - if key is not None: - if ( - isinstance(current_value, MutableSequence) - and key == "length" - and not remaining_string[m.end(1) :] - ): - return len(current_value) - if not isinstance(current_value, MutableMapping): - raise WorkflowException( - "%s is a %s, cannot index on string '%s'" - % (parsed_string, type(current_value).__name__, key) - ) - if key not in current_value: - raise WorkflowException(f"{parsed_string} does not contain key '{key}'") - else: - try: - key = int(next_segment_str[1:-1]) - except ValueError as v: - raise WorkflowException(str(v)) from v - if not isinstance(current_value, MutableSequence): - raise WorkflowException( - "%s is a %s, cannot index on int '%s'" - % (parsed_string, type(current_value).__name__, key) - ) - if key and key >= len(current_value): - raise WorkflowException( - "%s list index %i out of range" % (parsed_string, key) - ) - - if isinstance(current_value, Mapping): - try: - return next_seg( - parsed_string + remaining_string, - remaining_string[m.end(1) :], - cast(CWLOutputType, current_value[cast(str, key)]), - ) - except KeyError: - raise WorkflowException(f"{parsed_string} doesn't have property {key}") - elif isinstance(current_value, list) and isinstance(key, int): - try: - return next_seg( - parsed_string + remaining_string, - remaining_string[m.end(1) :], - current_value[key], - ) - except KeyError: - raise WorkflowException(f"{parsed_string} doesn't have property {key}") - else: - raise WorkflowException(f"{parsed_string} doesn't have property {key}") - else: - return current_value - - -def evaluator( - ex: str, - jslib: str, - obj: CWLObjectType, - timeout: float, - fullJS: bool = False, - force_docker_pull: bool = False, - debug: bool = False, - js_console: bool = False, - container_engine: str = "docker", -) -> Optional[CWLOutputType]: - match = param_re.match(ex) - - expression_parse_exception = None - - if match is not None: - first_symbol = match.group(1) - first_symbol_end = match.end(1) - - if first_symbol_end + 1 == len(ex) and first_symbol == "null": - return None - try: - if first_symbol not in obj: - raise WorkflowException("%s is not defined" % first_symbol) - - return next_seg( - first_symbol, - ex[first_symbol_end:-1], - cast(CWLOutputType, obj[first_symbol]), - ) - except WorkflowException as werr: - expression_parse_exception = werr - - if fullJS: - return execjs( - ex, - jslib, - timeout, - force_docker_pull=force_docker_pull, - debug=debug, - js_console=js_console, - container_engine=container_engine, - ) - else: - if expression_parse_exception is not None: - raise JavascriptException( - "Syntax error in parameter reference '%s': %s. This could be " - "due to using Javascript code without specifying " - "InlineJavascriptRequirement." % (ex[1:-1], expression_parse_exception) - ) - else: - raise JavascriptException( - "Syntax error in parameter reference '%s'. This could be due " - "to using Javascript code without specifying " - "InlineJavascriptRequirement." % ex - ) - - -def _convert_dumper(string: str) -> str: - return f"{json.dumps(string)} + " - - -def interpolate( - scan: str, - rootvars: CWLObjectType, - timeout: float = default_timeout, - fullJS: bool = False, - jslib: str = "", - force_docker_pull: bool = False, - debug: bool = False, - js_console: bool = False, - strip_whitespace: bool = True, - escaping_behavior: int = 2, - convert_to_expression: bool = False, - container_engine: str = "docker", -) -> Optional[CWLOutputType]: - """ - Interpolate and evaluate. - - Note: only call with convert_to_expression=True on CWL Expressions in $() - form that need interpolation. - """ - if strip_whitespace: - scan = scan.strip() - parts = [] - if convert_to_expression: - dump = _convert_dumper - parts.append("${return ") - else: - dump = lambda x: x - w = scanner(scan) - while w: - if convert_to_expression: - parts.append(f'"{scan[0 : w[0]]}" + ') - else: - parts.append(scan[0 : w[0]]) - - if scan[w[0]] == "$": - if not convert_to_expression: - e = evaluator( - scan[w[0] + 1 : w[1]], - jslib, - rootvars, - timeout, - fullJS=fullJS, - force_docker_pull=force_docker_pull, - debug=debug, - js_console=js_console, - container_engine=container_engine, - ) - if w[0] == 0 and w[1] == len(scan) and len(parts) <= 1: - return e - - leaf = json_dumps(e, sort_keys=True) - if leaf[0] == '"': - leaf = json.loads(leaf) - parts.append(leaf) - else: - parts.append( - "function(){var item =" - + scan[w[0] : w[1]][2:-1] - + '; if (typeof(item) === "string"){ return item; } else { return JSON.stringify(item); }}() + ' - ) - elif scan[w[0]] == "\\": - if escaping_behavior == 1: - # Old behavior. Just skip the next character. - e = scan[w[1] - 1] - parts.append(dump(e)) - elif escaping_behavior == 2: - # Backslash quoting requires a three character lookahead. - e = scan[w[0] : w[1] + 1] - if e in ("\\$(", "\\${"): - # Suppress start of a parameter reference, drop the - # backslash. - parts.append(dump(e[1:])) - w = (w[0], w[1] + 1) - elif e[1] == "\\": - # Double backslash, becomes a single backslash - parts.append(dump("\\")) - else: - # Some other text, add it as-is (including the - # backslash) and resume scanning. - parts.append(dump(e[:2])) - else: - raise Exception("Unknown escaping behavior %s" % escaping_behavior) - scan = scan[w[1] :] - w = scanner(scan) - if convert_to_expression: - parts.append(f'"{scan}"') - parts.append(";}") - else: - parts.append(scan) - return "".join(parts) - - -def needs_parsing(snippet: Any) -> bool: - return isinstance(snippet, str) and ("$(" in snippet or "${" in snippet) - - -def do_eval( - ex: Optional[CWLOutputType], - jobinput: CWLObjectType, - requirements: List[CWLObjectType], - outdir: Optional[str], - tmpdir: Optional[str], - resources: Dict[str, Union[float, int]], - context: Optional[CWLOutputType] = None, - timeout: float = default_timeout, - force_docker_pull: bool = False, - debug: bool = False, - js_console: bool = False, - strip_whitespace: bool = True, - cwlVersion: str = "", - container_engine: str = "docker", -) -> Optional[CWLOutputType]: - - runtime = cast(MutableMapping[str, Union[int, str, None]], copy.deepcopy(resources)) - runtime["tmpdir"] = tmpdir if tmpdir else None - runtime["outdir"] = outdir if outdir else None - - rootvars = cast( - CWLObjectType, - bytes2str_in_dicts({"inputs": jobinput, "self": context, "runtime": runtime}), - ) - - if isinstance(ex, str) and needs_parsing(ex): - fullJS = False - jslib = "" - for r in reversed(requirements): - if r["class"] == "InlineJavascriptRequirement": - fullJS = True - jslib = jshead(cast(List[str], r.get("expressionLib", [])), rootvars) - break - - try: - return interpolate( - ex, - rootvars, - timeout=timeout, - fullJS=fullJS, - jslib=jslib, - force_docker_pull=force_docker_pull, - debug=debug, - js_console=js_console, - strip_whitespace=strip_whitespace, - escaping_behavior=1 - if cwlVersion - in ( - "v1.0", - "v1.1.0-dev1", - "v1.1", - "v1.2.0-dev1", - "v1.2.0-dev2", - "v1.2.0-dev3", - ) - else 2, - container_engine=container_engine, - ) - - except Exception as e: - _logger.exception(e) - raise WorkflowException("Expression evaluation error:\n%s" % str(e)) from e - else: - return ex diff --git a/cwltool/process.py b/cwltool/process.py index a21b1f291..ff15d5945 100644 --- a/cwltool/process.py +++ b/cwltool/process.py @@ -10,7 +10,7 @@ import shutil import stat import textwrap -import urllib +import urllib.parse import uuid from os import scandir from typing import ( @@ -31,6 +31,7 @@ cast, ) +from cwl_utils import expression from pkg_resources import resource_stream from rdflib import Graph from ruamel.yaml.comments import CommentedMap, CommentedSeq @@ -48,7 +49,6 @@ from schema_salad.validate import avro_type_name, validate_ex from typing_extensions import TYPE_CHECKING -from . import expression from .builder import INPUT_OBJ_VOCAB, Builder from .context import LoadingContext, RuntimeContext, getdefault from .errors import UnsupportedRequirement, WorkflowException diff --git a/cwltool/sandboxjs.py b/cwltool/sandboxjs.py deleted file mode 100644 index 55f41b1f5..000000000 --- a/cwltool/sandboxjs.py +++ /dev/null @@ -1,402 +0,0 @@ -"""Evaluate CWL Javascript Expressions in a sandbox.""" - -import errno -import json -import os -import re -import select -import subprocess # nosec -import threading -from io import BytesIO -from typing import List, Optional, Tuple, cast - -from pkg_resources import resource_stream -from schema_salad.utils import json_dumps - -from .loghandler import _logger -from .singularity_utils import singularity_supports_userns -from .utils import CWLOutputType, processes_to_kill - - -class JavascriptException(Exception): - pass - - -localdata = threading.local() - -default_timeout = 20 -have_node_slim = False -# minimum acceptable version of nodejs engine -minimum_node_version_str = "0.10.26" - - -def check_js_threshold_version(working_alias: str) -> bool: - """ - Check if the nodeJS engine version on the system with the allowed minimum version. - - https://github.com/nodejs/node/blob/master/CHANGELOG.md#nodejs-changelog - """ - # parse nodejs version into int Tuple: 'v4.2.6\n' -> [4, 2, 6] - current_version_str = subprocess.check_output( # nosec - [working_alias, "-v"], universal_newlines=True - ) - - current_version = [ - int(v) for v in current_version_str.strip().strip("v").split(".") - ] - minimum_node_version = [int(v) for v in minimum_node_version_str.split(".")] - - return current_version >= minimum_node_version - - -def new_js_proc( - js_text: str, force_docker_pull: bool = False, container_engine: str = "docker" -) -> "subprocess.Popen[str]": - """Return a subprocess ready to submit javascript to.""" - required_node_version, docker = (False,) * 2 - nodejs = None # type: Optional[subprocess.Popen[str]] - trynodes = ("nodejs", "node") - for n in trynodes: - try: - if ( - subprocess.check_output( # nosec - [n, "--eval", "process.stdout.write('t')"], universal_newlines=True - ) - != "t" - ): - continue - else: - nodejs = subprocess.Popen( # nosec - [n, "--eval", js_text], - stdin=subprocess.PIPE, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - universal_newlines=True, - ) - processes_to_kill.append(nodejs) - required_node_version = check_js_threshold_version(n) - break - except (subprocess.CalledProcessError, OSError): - pass - - if nodejs is None or nodejs is not None and required_node_version is False: - try: - nodeimg = "docker.io/node:slim" - global have_node_slim - if container_engine == "singularity": - nodeimg = f"docker://{nodeimg}" - - if not have_node_slim: - if container_engine in ("docker", "podman"): - dockerimgs = subprocess.check_output( # nosec - [container_engine, "images", "-q", nodeimg], - universal_newlines=True, - ) - elif container_engine != "singularity": - raise Exception(f"Unknown container_engine: {container_engine}.") - # if output is an empty string - if ( - container_engine == "singularity" - or len(dockerimgs.split("\n")) <= 1 - or force_docker_pull - ): - # pull node:slim docker container - nodejs_pull_commands = [container_engine, "pull"] - if container_engine == "singularity": - nodejs_pull_commands.append("--force") - nodejs_pull_commands.append(nodeimg) - nodejsimg = subprocess.check_output( # nosec - nodejs_pull_commands, universal_newlines=True - ) - _logger.debug( - "Pulled Docker image %s %s using %s", - nodeimg, - nodejsimg, - container_engine, - ) - have_node_slim = True - nodejs_commands = [ - container_engine, - ] - if container_engine != "singularity": - nodejs_commands.extend( - [ - "run", - "--attach=STDIN", - "--attach=STDOUT", - "--attach=STDERR", - "--sig-proxy=true", - "--interactive", - "--rm", - ] - ) - else: - nodejs_commands.extend( - [ - "exec", - "--contain", - "--ipc", - "--cleanenv", - "--userns" if singularity_supports_userns() else "--pid", - ] - ) - nodejs_commands.extend( - [ - nodeimg, - "node", - "--eval", - js_text, - ], - ) - _logger.debug("Running nodejs via %s", nodejs_commands[:-1]) - nodejs = subprocess.Popen( # nosec - nodejs_commands, - universal_newlines=True, - stdin=subprocess.PIPE, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - ) - processes_to_kill.append(nodejs) - docker = True - except OSError as e: - if e.errno == errno.ENOENT: - pass - else: - raise - except subprocess.CalledProcessError: - pass - - # docker failed and nodejs not on system - if nodejs is None: - raise JavascriptException( - "cwltool requires Node.js engine to evaluate and validate " - "Javascript expressions, but couldn't find it. Tried {}, " - f"{container_engine} run node:slim".format(", ".join(trynodes)) - ) - - # docker failed, but nodejs is installed on system but the version is below the required version - if docker is False and required_node_version is False: - raise JavascriptException( - "cwltool requires minimum v{} version of Node.js engine.".format( - minimum_node_version_str - ), - "Try updating: https://docs.npmjs.com/getting-started/installing-node", - ) - - return nodejs - - -PROCESS_FINISHED_STR = "r1cepzbhUTxtykz5XTC4\n" - - -def exec_js_process( - js_text: str, - timeout: float = default_timeout, - js_console: bool = False, - context: Optional[str] = None, - force_docker_pull: bool = False, - container_engine: str = "docker", -) -> Tuple[int, str, str]: - - if not hasattr(localdata, "procs"): - localdata.procs = {} - - if js_console and context is not None: - raise NotImplementedError("js_console=True and context not implemented") - - if js_console: - js_engine = "cwlNodeEngineJSConsole.js" - _logger.warning( - "Running with support for javascript console in expressions (DO NOT USE IN PRODUCTION)" - ) - elif context is not None: - js_engine = "cwlNodeEngineWithContext.js" - else: - js_engine = "cwlNodeEngine.js" - - created_new_process = False - - if context is not None: - nodejs = localdata.procs.get((js_engine, context)) - else: - nodejs = localdata.procs.get(js_engine) - - if nodejs is None or nodejs.poll() is not None: - res = resource_stream(__name__, js_engine) - js_engine_code = res.read().decode("utf-8") - - created_new_process = True - - new_proc = new_js_proc( - js_engine_code, - force_docker_pull=force_docker_pull, - container_engine=container_engine, - ) - - if context is None: - localdata.procs[js_engine] = new_proc - nodejs = new_proc - else: - localdata.procs[(js_engine, context)] = new_proc - nodejs = new_proc - - killed = [] - - def terminate() -> None: - """Kill the node process if it exceeds timeout limit.""" - try: - killed.append(True) - nodejs.kill() - except OSError: - pass - - timer = threading.Timer(timeout, terminate) - timer.daemon = True - timer.start() - - stdin_text = "" - if created_new_process and context is not None: - stdin_text = json_dumps(context) + "\n" - stdin_text += json_dumps(js_text) + "\n" - - stdin_buf = BytesIO(stdin_text.encode("utf-8")) - stdout_buf = BytesIO() - stderr_buf = BytesIO() - - rselect = [nodejs.stdout, nodejs.stderr] # type: List[BytesIO] - wselect = [nodejs.stdin] # type: List[BytesIO] - - def process_finished() -> bool: - return stdout_buf.getvalue().decode("utf-8").endswith( - PROCESS_FINISHED_STR - ) and stderr_buf.getvalue().decode("utf-8").endswith(PROCESS_FINISHED_STR) - - while not process_finished() and timer.is_alive(): - rready, wready, _ = select.select(rselect, wselect, []) - try: - if nodejs.stdin in wready: - buf = stdin_buf.read(select.PIPE_BUF) - if buf: - os.write(nodejs.stdin.fileno(), buf) - for pipes in ((nodejs.stdout, stdout_buf), (nodejs.stderr, stderr_buf)): - if pipes[0] in rready: - buf = os.read(pipes[0].fileno(), select.PIPE_BUF) - if buf: - pipes[1].write(buf) - except OSError: - break - - stdin_buf.close() - - if not timer.is_alive(): - _logger.info("Expression Tool stopped because time limit has been exceeded.") - _logger.info( - "Time limit is {} seconds. This can be increased using the --eval-timeout flag.\n".format( - timeout - ) - ) - stdoutdata = stdout_buf.getvalue() - stderrdata = stderr_buf.getvalue() - else: - stdoutdata = stdout_buf.getvalue()[: -len(PROCESS_FINISHED_STR) - 1] - stderrdata = stderr_buf.getvalue()[: -len(PROCESS_FINISHED_STR) - 1] - - timer.cancel() - - nodejs.poll() - - if nodejs.poll() not in (None, 0): - if killed: - returncode = -1 - else: - returncode = nodejs.returncode - else: - returncode = 0 - - return returncode, stdoutdata.decode("utf-8"), stderrdata.decode("utf-8") - - -def code_fragment_to_js(jscript: str, jslib: str = "") -> str: - if isinstance(jscript, str) and len(jscript) > 1 and jscript[0] == "{": - inner_js = jscript - else: - inner_js = "{return (%s);}" % jscript - - return f'"use strict";\n{jslib}\n(function(){inner_js})()' - - -def execjs( - js: str, - jslib: str, - timeout: float, - force_docker_pull: bool = False, - debug: bool = False, - js_console: bool = False, - container_engine: str = "docker", -) -> CWLOutputType: - - fn = code_fragment_to_js(js, jslib) - - returncode, stdout, stderr = exec_js_process( - fn, - timeout, - js_console=js_console, - force_docker_pull=force_docker_pull, - container_engine=container_engine, - ) - - if js_console: - if stderr is not None: - _logger.info("Javascript console output:") - _logger.info("----------------------------------------") - _logger.info( - "\n".join( - re.findall(r"^[[](?:log|err)[]].*$", stderr, flags=re.MULTILINE) - ) - ) - _logger.info("----------------------------------------") - - def stdfmt(data: str) -> str: - if "\n" in data: - return "\n" + data.strip() - return data - - def fn_linenum() -> str: - lines = fn.splitlines() - ofs = 0 - maxlines = 99 - if len(lines) > maxlines: - ofs = len(lines) - maxlines - lines = lines[-maxlines:] - return "\n".join("%02i %s" % (i + ofs + 1, b) for i, b in enumerate(lines)) - - if returncode != 0: - if debug: - info = ( - "returncode was: %s\nscript was:\n%s\nstdout was: %s\nstderr was: %s\n" - % (returncode, fn_linenum(), stdfmt(stdout), stdfmt(stderr)) - ) - else: - info = ( - "Javascript expression was: {}\nstdout was: {}\nstderr was: {}".format( - js, - stdfmt(stdout), - stdfmt(stderr), - ) - ) - - if returncode == -1: - raise JavascriptException( - f"Long-running script killed after {timeout} seconds: {info}" - ) - else: - raise JavascriptException(info) - - try: - return cast(CWLOutputType, json.loads(stdout)) - except ValueError as err: - raise JavascriptException( - "{}\nscript was:\n{}\nstdout was: '{}'\nstderr was: '{}'\n".format( - err, fn_linenum(), stdout, stderr - ) - ) from err diff --git a/cwltool/validate_js.py b/cwltool/validate_js.py index 99e2d5eff..821cadbf2 100644 --- a/cwltool/validate_js.py +++ b/cwltool/validate_js.py @@ -15,6 +15,9 @@ cast, ) +from cwl_utils.errors import SubstitutionError +from cwl_utils.expression import scanner as scan_expression +from cwl_utils.sandboxjs import code_fragment_to_js, exec_js_process from pkg_resources import resource_stream from ruamel.yaml.comments import CommentedMap, CommentedSeq from schema_salad.avro.schema import ( @@ -29,10 +32,7 @@ from schema_salad.validate import validate_ex from .errors import WorkflowException -from .expression import SubstitutionError -from .expression import scanner as scan_expression from .loghandler import _logger -from .sandboxjs import code_fragment_to_js, exec_js_process def is_expression(tool, schema): diff --git a/cwltool/workflow_job.py b/cwltool/workflow_job.py index 26af81f58..f28dd08c0 100644 --- a/cwltool/workflow_job.py +++ b/cwltool/workflow_job.py @@ -14,11 +14,11 @@ cast, ) +from cwl_utils import expression from schema_salad.sourceline import SourceLine from schema_salad.utils import json_dumps from typing_extensions import TYPE_CHECKING -from . import expression from .builder import content_limit_respected_read from .checker import can_assign_src_to_sink from .context import RuntimeContext, getdefault diff --git a/mypy-requirements.txt b/mypy-requirements.txt index db5b67f4a..163975058 100644 --- a/mypy-requirements.txt +++ b/mypy-requirements.txt @@ -1,5 +1,7 @@ mypy==0.971 ruamel.yaml>=0.16.0,<0.17.22 +schema-salad>=8.2.20211104054942,<9 +cwl-utils>=0.14 types-requests types-setuptools types-psutil diff --git a/requirements.txt b/requirements.txt index 38e42d041..87fc9a652 100644 --- a/requirements.txt +++ b/requirements.txt @@ -15,3 +15,4 @@ pydot>=1.4.1 argcomplete>=1.12.0 pyparsing != 3.0.2 # breaks --print-dot (pydot) https://github.com/pyparsing/pyparsing/issues/319 pyparsing < 3;python_version<='3.6' # breaks --print-dot +cwl-utils>=0.14 diff --git a/setup.py b/setup.py index 3c9dfa18c..50b5550a6 100644 --- a/setup.py +++ b/setup.py @@ -53,7 +53,6 @@ "cwltool/udocker.py", "cwltool/errors.py", "cwltool/executors.py", - "cwltool/expression.py", "cwltool/factory.py", "cwltool/flatten.py", # "cwltool/__init__.py", @@ -69,7 +68,6 @@ "cwltool/procgenerator.py", # "cwltool/provenance.py", # WritableBag is having issues "cwltool/resolver.py", - # "cwltool/sandboxjs.py", # probably not speed critical, tests need to mock components "cwltool/secrets.py", "cwltool/singularity.py", "cwltool/software_requirements.py", @@ -123,6 +121,7 @@ "pyparsing != 3.0.2", # breaks --print-dot (pydot) https://github.com/pyparsing/pyparsing/issues/319 "pyparsing < 3 ;python_version<='3.6'", # breaks --print-dot (pydot) "argcomplete", + "cwl-utils >= 0.14", ], extras_require={ "deps": ["galaxy-tool-util >= 21.1.0"], diff --git a/tests/test_examples.py b/tests/test_examples.py index c2c78e12d..ee899f9a3 100644 --- a/tests/test_examples.py +++ b/tests/test_examples.py @@ -9,15 +9,16 @@ from io import StringIO from pathlib import Path from typing import Any, Dict, List, Union, cast -from urllib.parse import urlparse +import cwl_utils.expression as expr import pydot import pytest +from cwl_utils.errors import JavascriptException +from cwl_utils.sandboxjs import param_re from ruamel.yaml.comments import CommentedMap, CommentedSeq from schema_salad.exceptions import ValidationException import cwltool.checker -import cwltool.expression as expr import cwltool.factory import cwltool.pathmapper import cwltool.process @@ -27,7 +28,6 @@ from cwltool.errors import WorkflowException from cwltool.main import main from cwltool.process import CWL_IANA -from cwltool.sandboxjs import JavascriptException from cwltool.utils import CWLObjectType, dedup from .util import get_data, get_main_output, needs_docker, working_directory @@ -64,7 +64,7 @@ @pytest.mark.parametrize("expression,expected", expression_match) def test_expression_match(expression: str, expected: bool) -> None: - match = expr.param_re.match(expression) + match = param_re.match(expression) assert (match is not None) == expected @@ -1077,19 +1077,20 @@ def test_print_dot() -> None: @pytest.mark.parametrize("factor", test_factors) -def test_js_console_cmd_line_tool(factor: str) -> None: +def test_js_console_cmd_line_tool( + factor: str, caplog: pytest.LogCaptureFixture +) -> None: for test_file in ("js_output.cwl", "js_output_workflow.cwl"): commands = factor.split() commands.extend( ["--js-console", "--no-container", get_data("tests/wf/" + test_file)] ) - error_code, _, stderr = get_main_output(commands) - - stderr = re.sub(r"\s\s+", " ", stderr) - assert "[log] Log message" in stderr - assert "[err] Error message" in stderr + error_code, _, _ = get_main_output(commands) + logging_output = "\n".join([record.message for record in caplog.records]) + assert "[log] Log message" in logging_output + assert "[err] Error message" in logging_output - assert error_code == 0, stderr + assert error_code == 0, logging_output @pytest.mark.parametrize("factor", test_factors) diff --git a/tests/test_js_sandbox.py b/tests/test_js_sandbox.py index 9facb91a6..f27a8ada0 100644 --- a/tests/test_js_sandbox.py +++ b/tests/test_js_sandbox.py @@ -7,8 +7,8 @@ from typing import Any, List import pytest +from cwl_utils import sandboxjs -from cwltool import sandboxjs from cwltool.factory import Factory from cwltool.loghandler import _logger, configure_logging @@ -28,15 +28,16 @@ @pytest.mark.parametrize("version,supported", node_versions) def test_node_version(version: str, supported: bool, mocker: Any) -> None: - mocked_subprocess = mocker.patch("cwltool.sandboxjs.subprocess") + mocked_subprocess = mocker.patch("cwl_utils.sandboxjs.subprocess") mocked_subprocess.check_output = mocker.Mock(return_value=version) assert sandboxjs.check_js_threshold_version("node") == supported def test_value_from_two_concatenated_expressions() -> None: - sandboxjs.have_node_slim = False - sandboxjs.localdata = threading.local() + js_engine = sandboxjs.get_js_engine() + js_engine.have_node_slim = False # type: ignore[attr-defined] + js_engine.localdata = threading.local() # type: ignore[attr-defined] factory = Factory() echo = factory.make(get_data("tests/wf/vf-concat.cwl")) file = {"class": "File", "location": get_data("tests/wf/whale.txt")} @@ -70,8 +71,9 @@ def test_value_from_two_concatenated_expressions_podman( tmp_path: Path, monkeypatch: pytest.MonkeyPatch ) -> None: """Javascript test using podman.""" - sandboxjs.have_node_slim = False - sandboxjs.localdata = threading.local() + js_engine = sandboxjs.get_js_engine() + js_engine.have_node_slim = False # type: ignore[attr-defined] + js_engine.localdata = threading.local() # type: ignore[attr-defined] new_paths = hide_nodejs(tmp_path) factory = Factory() factory.loading_context.podman = True @@ -89,8 +91,9 @@ def test_value_from_two_concatenated_expressions_singularity( tmp_path: Path, monkeypatch: pytest.MonkeyPatch ) -> None: """Javascript test using Singularity.""" - sandboxjs.have_node_slim = False - sandboxjs.localdata = threading.local() + js_engine = sandboxjs.get_js_engine() + js_engine.have_node_slim = False # type: ignore[attr-defined] + js_engine.localdata = threading.local() # type: ignore[attr-defined] new_paths = hide_nodejs(tmp_path) factory = Factory() factory.loading_context.singularity = True @@ -106,7 +109,7 @@ def test_value_from_two_concatenated_expressions_singularity( def test_caches_js_processes(mocker: Any) -> None: sandboxjs.exec_js_process("7", context="{}") - mocked_new_js_proc = mocker.patch("cwltool.sandboxjs.new_js_proc") + mocked_new_js_proc = mocker.patch("cwl_utils.sandboxjs.new_js_proc") sandboxjs.exec_js_process("7", context="{}") mocked_new_js_proc.assert_not_called() diff --git a/tests/test_validate_js.py b/tests/test_validate_js.py index 7047d1dc3..d4520ee93 100644 --- a/tests/test_validate_js.py +++ b/tests/test_validate_js.py @@ -1,9 +1,9 @@ import pytest +from cwl_utils.sandboxjs import code_fragment_to_js from schema_salad.avro.schema import Names from schema_salad.utils import yaml_no_ts from cwltool import process, validate_js -from cwltool.sandboxjs import code_fragment_to_js TEST_CWL = """ cwlVersion: v1.0