From 5351eee69ae133e31325f58626f6e0738092c6d3 Mon Sep 17 00:00:00 2001 From: "Michael R. Crusoe" <1330696+mr-c@users.noreply.github.com> Date: Wed, 6 Oct 2021 12:00:04 +0200 Subject: [PATCH 01/14] Fewer containers (#129) also run the tests against cwltool --- .github/workflows/ci.yml | 2 +- tests/cat3-tool-docker.cwl | 19 +++++++++++++++++++ tests/wc-tool-shortcut.cwl | 20 ++++++++++++++++++++ 3 files changed, 40 insertions(+), 1 deletion(-) create mode 100755 tests/cat3-tool-docker.cwl create mode 100755 tests/wc-tool-shortcut.cwl diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 0928beaf..f39f54cb 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -6,7 +6,7 @@ name: CI on: # Triggers the workflow on push or pull request events but only for the main branch push: - branches: [ main ] + branches: [ main, 1.2.1_proposed ] pull_request: branches: [ main, 1.2.1_proposed ] diff --git a/tests/cat3-tool-docker.cwl b/tests/cat3-tool-docker.cwl new file mode 100755 index 00000000..b486e8ce --- /dev/null +++ b/tests/cat3-tool-docker.cwl @@ -0,0 +1,19 @@ +#!/usr/bin/env cwl-runner +class: CommandLineTool +cwlVersion: v1.2 +doc: "Print the contents of a file to stdout using 'cat' running in a docker container." +requirements: + DockerRequirement: + dockerPull: docker.io/debian:stable-slim +inputs: + file1: + type: File + label: Input File + doc: "The file that will be copied using 'cat'" + inputBinding: {position: 1} +outputs: + output_file: + type: File + outputBinding: {glob: output.txt} +baseCommand: cat +stdout: output.txt diff --git a/tests/wc-tool-shortcut.cwl b/tests/wc-tool-shortcut.cwl new file mode 100755 index 00000000..f157cca3 --- /dev/null +++ b/tests/wc-tool-shortcut.cwl @@ -0,0 +1,20 @@ +#!/usr/bin/env cwl-runner + +class: CommandLineTool +cwlVersion: v1.2 + +requirements: + - class: DockerRequirement + dockerPull: docker.io/debian:stable-slim + +inputs: + file1: stdin + +outputs: + output: + type: File + outputBinding: { glob: output } + +baseCommand: [wc] + +stdout: output From 5519ef95be0d4555f71f7431398f1eb3c98466c1 Mon Sep 17 00:00:00 2001 From: "Michael R. Crusoe" Date: Wed, 6 Oct 2021 12:33:55 +0200 Subject: [PATCH 02/14] CI: less checks --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f39f54cb..0928beaf 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -6,7 +6,7 @@ name: CI on: # Triggers the workflow on push or pull request events but only for the main branch push: - branches: [ main, 1.2.1_proposed ] + branches: [ main ] pull_request: branches: [ main, 1.2.1_proposed ] From 08573436bfb35817b19177b203d0c5407431af8d Mon Sep 17 00:00:00 2001 From: Tomoya Tanjo Date: Mon, 22 Nov 2021 13:46:23 +0000 Subject: [PATCH 03/14] Fix typo: `$schema` to `$schemas` --- concepts.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/concepts.md b/concepts.md index 77a1aa2d..378c4a42 100644 --- a/concepts.md +++ b/concepts.md @@ -331,7 +331,7 @@ prefix listed in the `$namespaces` section of the document as described in the [Schema Salad specification](SchemaSalad.html#Explicit_context). It is recommended that concepts from schema.org are used whenever possible. -For the `$schemas` field we recommend their RDF encoding: https://schema.org/version/latest/schemaorg-current-https.rdf +For the `$schemas` field we recommend their RDF encoding: http://schema.org/version/latest/schema.rdf Implementation extensions which modify execution semantics must be [listed in the `requirements` field](#Requirements_and_hints). From 941b65e71196d70fc9364946bba049a7b6663e31 Mon Sep 17 00:00:00 2001 From: "Bruno P. Kinoshita" Date: Mon, 2 May 2022 18:40:06 +1200 Subject: [PATCH 04/14] Fix a few typos (automatic IDE spellchecker) --- salad/schema_salad/metaschema/salad.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/salad/schema_salad/metaschema/salad.md b/salad/schema_salad/metaschema/salad.md index d1aa8ddc..dca21120 100644 --- a/salad/schema_salad/metaschema/salad.md +++ b/salad/schema_salad/metaschema/salad.md @@ -173,7 +173,7 @@ enable or disable the behavior described. ## Data concepts An **object** is a data structure equivalent to the "object" type in JSON, -consisting of a unordered set of name/value pairs (referred to here as +consisting of an unordered set of name/value pairs (referred to here as **fields**) and where the name is a string and the value is a string, number, boolean, array, or object. From e631704f7280aced03505cc9963a5c83fafa6747 Mon Sep 17 00:00:00 2001 From: GlassOfWhiskey Date: Tue, 7 Feb 2023 09:47:28 +0100 Subject: [PATCH 05/14] Added CWLObjectType named union This commit introduces a `CWLObjectType` named union to represent a valid CWL object. Using a more precise union type instead of `Any` allows for a smarter parsing from SALAD auto-generated parsers. In particular, `CWLObjectType` is used to define the type of the `default` directive in CWL `InputParameter` and `WorkflowStepInput` fields. Plus, a `CWLInputFile` type is introduced to represent a CWL jobfiles as `map`. --- Base.yml | 425 ++++++++++++++++++ Process.yml | 397 +--------------- Workflow.yml | 2 +- salad/schema_salad/metaschema/metaschema.yml | 18 + .../metaschema/metaschema_base.yml | 88 ++++ 5 files changed, 535 insertions(+), 395 deletions(-) create mode 100644 Base.yml diff --git a/Base.yml b/Base.yml new file mode 100644 index 00000000..44a0573a --- /dev/null +++ b/Base.yml @@ -0,0 +1,425 @@ +$base: "https://w3id.org/cwl/cwl#" + +$namespaces: + cwl: "https://w3id.org/cwl/cwl#" + sld: "https://w3id.org/cwl/salad#" + +$graph: + +- name: CWLType + type: enum + extends: "sld:PrimitiveType" + symbols: + - cwl:File + - cwl:Directory + doc: + - "Extends primitive types with the concept of a file and directory as a builtin type." + - "File: A File object" + - "Directory: A Directory object" + +- name: File + type: record + docParent: "#CWLType" + doc: | + Represents a file (or group of files when `secondaryFiles` is provided) that + will be accessible by tools using standard POSIX file system call API such as + open(2) and read(2). + + Files are represented as objects with `class` of `File`. File objects have + a number of properties that provide metadata about the file. + + The `location` property of a File is a IRI that uniquely identifies the + file. Implementations must support the `file://` IRI scheme and may support + other schemes such as `http://` and `https://`. The value of `location` may also be a + relative reference, in which case it must be resolved relative to the IRI + of the document it appears in. Alternately to `location`, implementations + must also accept the `path` property on File, which must be a filesystem + path available on the same host as the CWL runner (for inputs) or the + runtime environment of a command line tool execution (for command line tool + outputs). + + If no `location` or `path` is specified, a file object must specify + `contents` with the UTF-8 text content of the file. This is a "file + literal". File literals do not correspond to external resources, but are + created on disk with `contents` with when needed for executing a tool. + Where appropriate, expressions can return file literals to define new files + on a runtime. The maximum size of `contents` is 64 kilobytes. + + The `basename` property defines the filename on disk where the file is + staged. This may differ from the resource name. If not provided, + `basename` must be computed from the last path part of `location` and made + available to expressions. + + The `secondaryFiles` property is a list of File or Directory objects that + must be staged in the same directory as the primary file. It is an error + for file names to be duplicated in `secondaryFiles`. + + The `size` property is the size in bytes of the File. It must be computed + from the resource and made available to expressions. The `checksum` field + contains a cryptographic hash of the file content for use it verifying file + contents. Implementations may, at user option, enable or disable + computation of the `checksum` field for performance or other reasons. + However, the ability to compute output checksums is required to pass the + CWL conformance test suite. + + When executing a CommandLineTool, the files and secondary files may be + staged to an arbitrary directory, but must use the value of `basename` for + the filename. The `path` property must be file path in the context of the + tool execution runtime (local to the compute node, or within the executing + container). All computed properties should be available to expressions. + File literals also must be staged and `path` must be set. + + When collecting CommandLineTool outputs, `glob` matching returns file paths + (with the `path` property) and the derived properties. This can all be + modified by `outputEval`. Alternately, if the file `cwl.output.json` is + present in the output, `outputBinding` is ignored. + + File objects in the output must provide either a `location` IRI or a `path` + property in the context of the tool execution runtime (local to the compute + node, or within the executing container). + + When evaluating an ExpressionTool, file objects must be referenced via + `location` (the expression tool does not have access to files on disk so + `path` is meaningless) or as file literals. It is legal to return a file + object with an existing `location` but a different `basename`. The + `loadContents` field of ExpressionTool inputs behaves the same as on + CommandLineTool inputs, however it is not meaningful on the outputs. + + An ExpressionTool may forward file references from input to output by using + the same value for `location`. + + fields: + - name: class + type: + type: enum + name: File_class + symbols: + - cwl:File + jsonldPredicate: + _id: "@type" + _type: "@vocab" + doc: Must be `File` to indicate this object describes a file. + - name: location + type: string? + doc: | + An IRI that identifies the file resource. This may be a relative + reference, in which case it must be resolved using the base IRI of the + document. The location may refer to a local or remote resource; the + implementation must use the IRI to retrieve file content. If an + implementation is unable to retrieve the file content stored at a + remote resource (due to unsupported protocol, access denied, or other + issue) it must signal an error. + + If the `location` field is not provided, the `contents` field must be + provided. The implementation must assign a unique identifier for + the `location` field. + + If the `path` field is provided but the `location` field is not, an + implementation may assign the value of the `path` field to `location`, + then follow the rules above. + jsonldPredicate: + _id: "@id" + _type: "@id" + - name: path + type: string? + doc: | + The local host path where the File is available when a CommandLineTool is + executed. This field must be set by the implementation. The final + path component must match the value of `basename`. This field + must not be used in any other context. The command line tool being + executed must be able to access the file at `path` using the POSIX + `open(2)` syscall. + + As a special case, if the `path` field is provided but the `location` + field is not, an implementation may assign the value of the `path` + field to `location`, and remove the `path` field. + + If the `path` contains [POSIX shell metacharacters](http://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_02) + (`|`,`&`, `;`, `<`, `>`, `(`,`)`, `$`,`` ` ``, `\`, `"`, `'`, + ``, ``, and ``) or characters + [not allowed](http://www.iana.org/assignments/idna-tables-6.3.0/idna-tables-6.3.0.xhtml) + for [Internationalized Domain Names for Applications](https://tools.ietf.org/html/rfc6452) + then implementations may terminate the process with a + `permanentFailure`. + jsonldPredicate: + "_id": "cwl:path" + "_type": "@id" + - name: basename + type: string? + doc: | + The base name of the file, that is, the name of the file without any + leading directory path. The base name must not contain a slash `/`. + + If not provided, the implementation must set this field based on the + `location` field by taking the final path component after parsing + `location` as an IRI. If `basename` is provided, it is not required to + match the value from `location`. + + When this file is made available to a CommandLineTool, it must be named + with `basename`, i.e. the final component of the `path` field must match + `basename`. + jsonldPredicate: "cwl:basename" + - name: dirname + type: string? + doc: | + The name of the directory containing file, that is, the path leading up + to the final slash in the path such that `dirname + '/' + basename == + path`. + + The implementation must set this field based on the value of `path` + prior to evaluating parameter references or expressions in a + CommandLineTool document. This field must not be used in any other + context. + - name: nameroot + type: string? + doc: | + The basename root such that `nameroot + nameext == basename`, and + `nameext` is empty or begins with a period and contains at most one + period. For the purposes of path splitting leading periods on the + basename are ignored; a basename of `.cshrc` will have a nameroot of + `.cshrc`. + + The implementation must set this field automatically based on the value + of `basename` prior to evaluating parameter references or expressions. + - name: nameext + type: string? + doc: | + The basename extension such that `nameroot + nameext == basename`, and + `nameext` is empty or begins with a period and contains at most one + period. Leading periods on the basename are ignored; a basename of + `.cshrc` will have an empty `nameext`. + + The implementation must set this field automatically based on the value + of `basename` prior to evaluating parameter references or expressions. + - name: checksum + type: string? + doc: | + Optional hash code for validating file integrity. Currently, must be in the form + "sha1$ + hexadecimal string" using the SHA-1 algorithm. + - name: size + type: + - "null" + - int + - long + doc: Optional file size (in bytes) + - name: "secondaryFiles" + type: + - "null" + - type: array + items: [File, Directory] + jsonldPredicate: + _id: "cwl:secondaryFiles" + secondaryFilesDSL: true + doc: | + A list of additional files or directories that are associated with the + primary file and must be transferred alongside the primary file. + Examples include indexes of the primary file, or external references + which must be included when loading primary document. A file object + listed in `secondaryFiles` may itself include `secondaryFiles` for + which the same rules apply. + - name: format + type: string? + jsonldPredicate: + _id: cwl:format + _type: "@id" + identity: true + doc: | + The format of the file: this must be an IRI of a concept node that + represents the file format, preferably defined within an ontology. + If no ontology is available, file formats may be tested by exact match. + + Reasoning about format compatibility must be done by checking that an + input file format is the same, `owl:equivalentClass` or + `rdfs:subClassOf` the format required by the input parameter. + `owl:equivalentClass` is transitive with `rdfs:subClassOf`, e.g. if + ` owl:equivalentClass ` and ` owl:subclassOf ` then infer + ` owl:subclassOf `. + + File format ontologies may be provided in the "$schemas" metadata at the + root of the document. If no ontologies are specified in `$schemas`, the + runtime may perform exact file format matches. + - name: contents + type: string? + doc: | + File contents literal. + + If neither `location` nor `path` is provided, `contents` must be + non-null. The implementation must assign a unique identifier for the + `location` field. When the file is staged as input to CommandLineTool, + the value of `contents` must be written to a file. + + If `contents` is set as a result of a Javascript expression, + an `entry` in `InitialWorkDirRequirement`, or read in from + `cwl.output.json`, there is no specified upper limit on the + size of `contents`. Implementations may have practical limits + on the size of `contents` based on memory and storage + available to the workflow runner or other factors. + + If the `loadContents` field of an `InputParameter` or + `OutputParameter` is true, and the input or output File object + `location` is valid, the file must be a UTF-8 text file 64 KiB + or smaller, and the implementation must read the entire + contents of the file and place it in the `contents` field. If + the size of the file is greater than 64 KiB, the + implementation must raise a fatal error. + + +- name: Directory + type: record + docAfter: "#File" + doc: | + Represents a directory to present to a command line tool. + + Directories are represented as objects with `class` of `Directory`. Directory objects have + a number of properties that provide metadata about the directory. + + The `location` property of a Directory is a IRI that uniquely identifies + the directory. Implementations must support the file:// IRI scheme and may + support other schemes such as http://. Alternately to `location`, + implementations must also accept the `path` property on Directory, which + must be a filesystem path available on the same host as the CWL runner (for + inputs) or the runtime environment of a command line tool execution (for + command line tool outputs). + + A Directory object may have a `listing` field. This is a list of File and + Directory objects that are contained in the Directory. For each entry in + `listing`, the `basename` property defines the name of the File or + Subdirectory when staged to disk. If `listing` is not provided, the + implementation must have some way of fetching the Directory listing at + runtime based on the `location` field. + + If a Directory does not have `location`, it is a Directory literal. A + Directory literal must provide `listing`. Directory literals must be + created on disk at runtime as needed. + + The resources in a Directory literal do not need to have any implied + relationship in their `location`. For example, a Directory listing may + contain two files located on different hosts. It is the responsibility of + the runtime to ensure that those files are staged to disk appropriately. + Secondary files associated with files in `listing` must also be staged to + the same Directory. + + When executing a CommandLineTool, Directories must be recursively staged + first and have local values of `path` assigned. + + Directory objects in CommandLineTool output must provide either a + `location` IRI or a `path` property in the context of the tool execution + runtime (local to the compute node, or within the executing container). + + An ExpressionTool may forward file references from input to output by using + the same value for `location`. + + Name conflicts (the same `basename` appearing multiple times in `listing` + or in any entry in `secondaryFiles` in the listing) is a fatal error. + + fields: + - name: class + type: + type: enum + name: Directory_class + symbols: + - cwl:Directory + jsonldPredicate: + _id: "@type" + _type: "@vocab" + doc: Must be `Directory` to indicate this object describes a Directory. + - name: location + type: string? + doc: | + An IRI that identifies the directory resource. This may be a relative + reference, in which case it must be resolved using the base IRI of the + document. The location may refer to a local or remote resource. If + the `listing` field is not set, the implementation must use the + location IRI to retrieve directory listing. If an implementation is + unable to retrieve the directory listing stored at a remote resource (due to + unsupported protocol, access denied, or other issue) it must signal an + error. + + If the `location` field is not provided, the `listing` field must be + provided. The implementation must assign a unique identifier for + the `location` field. + + If the `path` field is provided but the `location` field is not, an + implementation may assign the value of the `path` field to `location`, + then follow the rules above. + jsonldPredicate: + _id: "@id" + _type: "@id" + - name: path + type: string? + doc: | + The local path where the Directory is made available prior to executing a + CommandLineTool. This must be set by the implementation. This field + must not be used in any other context. The command line tool being + executed must be able to access the directory at `path` using the POSIX + `opendir(2)` syscall. + + If the `path` contains [POSIX shell metacharacters](http://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_02) + (`|`,`&`, `;`, `<`, `>`, `(`,`)`, `$`,`` ` ``, `\`, `"`, `'`, + ``, ``, and ``) or characters + [not allowed](http://www.iana.org/assignments/idna-tables-6.3.0/idna-tables-6.3.0.xhtml) + for [Internationalized Domain Names for Applications](https://tools.ietf.org/html/rfc6452) + then implementations may terminate the process with a + `permanentFailure`. + jsonldPredicate: + _id: "cwl:path" + _type: "@id" + - name: basename + type: string? + doc: | + The base name of the directory, that is, the name of the file without any + leading directory path. The base name must not contain a slash `/`. + + If not provided, the implementation must set this field based on the + `location` field by taking the final path component after parsing + `location` as an IRI. If `basename` is provided, it is not required to + match the value from `location`. + + When this file is made available to a CommandLineTool, it must be named + with `basename`, i.e. the final component of the `path` field must match + `basename`. + jsonldPredicate: "cwl:basename" + - name: listing + type: + - "null" + - type: array + items: [File, Directory] + doc: | + List of files or subdirectories contained in this directory. The name + of each file or subdirectory is determined by the `basename` field of + each `File` or `Directory` object. It is an error if a `File` shares a + `basename` with any other entry in `listing`. If two or more + `Directory` object share the same `basename`, this must be treated as + equivalent to a single subdirectory with the listings recursively + merged. + jsonldPredicate: + _id: "cwl:listing" + + +- name: CWLObjectType + type: union + names: + - "null" + - boolean + - int + - long + - float + - double + - string + - File + - Directory + - type: array + items: CWLObjectType + - type: map + values: CWLObjectType + doc: | + Generic type representing a valid CWL object. It is used to represent + `default` values passed to CWL `InputParameter` and `WorkflowStepInput` + record fields. + + +- name: CWLInputFile + type: map + values: CWLObjectType + doc: | + Type representing a valid CWL input file as a `map`. diff --git a/Process.yml b/Process.yml index 41d24a2d..fcd6af6d 100644 --- a/Process.yml +++ b/Process.yml @@ -14,6 +14,8 @@ $graph: - $import: "salad/schema_salad/metaschema/metaschema_base.yml" +- $import: Base.yml + - name: BaseTypesDoc type: documentation doc: | @@ -47,395 +49,6 @@ $graph: - cwl:v1.2.0-dev5 - cwl:v1.2 -- name: CWLType - type: enum - extends: "sld:PrimitiveType" - symbols: - - cwl:File - - cwl:Directory - doc: - - "Extends primitive types with the concept of a file and directory as a builtin type." - - "File: A File object" - - "Directory: A Directory object" - -- name: File - type: record - docParent: "#CWLType" - doc: | - Represents a file (or group of files when `secondaryFiles` is provided) that - will be accessible by tools using standard POSIX file system call API such as - open(2) and read(2). - - Files are represented as objects with `class` of `File`. File objects have - a number of properties that provide metadata about the file. - - The `location` property of a File is a IRI that uniquely identifies the - file. Implementations must support the `file://` IRI scheme and may support - other schemes such as `http://` and `https://`. The value of `location` may also be a - relative reference, in which case it must be resolved relative to the IRI - of the document it appears in. Alternately to `location`, implementations - must also accept the `path` property on File, which must be a filesystem - path available on the same host as the CWL runner (for inputs) or the - runtime environment of a command line tool execution (for command line tool - outputs). - - If no `location` or `path` is specified, a file object must specify - `contents` with the UTF-8 text content of the file. This is a "file - literal". File literals do not correspond to external resources, but are - created on disk with `contents` with when needed for executing a tool. - Where appropriate, expressions can return file literals to define new files - on a runtime. The maximum size of `contents` is 64 kilobytes. - - The `basename` property defines the filename on disk where the file is - staged. This may differ from the resource name. If not provided, - `basename` must be computed from the last path part of `location` and made - available to expressions. - - The `secondaryFiles` property is a list of File or Directory objects that - must be staged in the same directory as the primary file. It is an error - for file names to be duplicated in `secondaryFiles`. - - The `size` property is the size in bytes of the File. It must be computed - from the resource and made available to expressions. The `checksum` field - contains a cryptographic hash of the file content for use it verifying file - contents. Implementations may, at user option, enable or disable - computation of the `checksum` field for performance or other reasons. - However, the ability to compute output checksums is required to pass the - CWL conformance test suite. - - When executing a CommandLineTool, the files and secondary files may be - staged to an arbitrary directory, but must use the value of `basename` for - the filename. The `path` property must be file path in the context of the - tool execution runtime (local to the compute node, or within the executing - container). All computed properties should be available to expressions. - File literals also must be staged and `path` must be set. - - When collecting CommandLineTool outputs, `glob` matching returns file paths - (with the `path` property) and the derived properties. This can all be - modified by `outputEval`. Alternately, if the file `cwl.output.json` is - present in the output, `outputBinding` is ignored. - - File objects in the output must provide either a `location` IRI or a `path` - property in the context of the tool execution runtime (local to the compute - node, or within the executing container). - - When evaluating an ExpressionTool, file objects must be referenced via - `location` (the expression tool does not have access to files on disk so - `path` is meaningless) or as file literals. It is legal to return a file - object with an existing `location` but a different `basename`. The - `loadContents` field of ExpressionTool inputs behaves the same as on - CommandLineTool inputs, however it is not meaningful on the outputs. - - An ExpressionTool may forward file references from input to output by using - the same value for `location`. - - fields: - - name: class - type: - type: enum - name: File_class - symbols: - - cwl:File - jsonldPredicate: - _id: "@type" - _type: "@vocab" - doc: Must be `File` to indicate this object describes a file. - - name: location - type: string? - doc: | - An IRI that identifies the file resource. This may be a relative - reference, in which case it must be resolved using the base IRI of the - document. The location may refer to a local or remote resource; the - implementation must use the IRI to retrieve file content. If an - implementation is unable to retrieve the file content stored at a - remote resource (due to unsupported protocol, access denied, or other - issue) it must signal an error. - - If the `location` field is not provided, the `contents` field must be - provided. The implementation must assign a unique identifier for - the `location` field. - - If the `path` field is provided but the `location` field is not, an - implementation may assign the value of the `path` field to `location`, - then follow the rules above. - jsonldPredicate: - _id: "@id" - _type: "@id" - - name: path - type: string? - doc: | - The local host path where the File is available when a CommandLineTool is - executed. This field must be set by the implementation. The final - path component must match the value of `basename`. This field - must not be used in any other context. The command line tool being - executed must be able to access the file at `path` using the POSIX - `open(2)` syscall. - - As a special case, if the `path` field is provided but the `location` - field is not, an implementation may assign the value of the `path` - field to `location`, and remove the `path` field. - - If the `path` contains [POSIX shell metacharacters](http://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_02) - (`|`,`&`, `;`, `<`, `>`, `(`,`)`, `$`,`` ` ``, `\`, `"`, `'`, - ``, ``, and ``) or characters - [not allowed](http://www.iana.org/assignments/idna-tables-6.3.0/idna-tables-6.3.0.xhtml) - for [Internationalized Domain Names for Applications](https://tools.ietf.org/html/rfc6452) - then implementations may terminate the process with a - `permanentFailure`. - jsonldPredicate: - "_id": "cwl:path" - "_type": "@id" - - name: basename - type: string? - doc: | - The base name of the file, that is, the name of the file without any - leading directory path. The base name must not contain a slash `/`. - - If not provided, the implementation must set this field based on the - `location` field by taking the final path component after parsing - `location` as an IRI. If `basename` is provided, it is not required to - match the value from `location`. - - When this file is made available to a CommandLineTool, it must be named - with `basename`, i.e. the final component of the `path` field must match - `basename`. - jsonldPredicate: "cwl:basename" - - name: dirname - type: string? - doc: | - The name of the directory containing file, that is, the path leading up - to the final slash in the path such that `dirname + '/' + basename == - path`. - - The implementation must set this field based on the value of `path` - prior to evaluating parameter references or expressions in a - CommandLineTool document. This field must not be used in any other - context. - - name: nameroot - type: string? - doc: | - The basename root such that `nameroot + nameext == basename`, and - `nameext` is empty or begins with a period and contains at most one - period. For the purposes of path splitting leading periods on the - basename are ignored; a basename of `.cshrc` will have a nameroot of - `.cshrc`. - - The implementation must set this field automatically based on the value - of `basename` prior to evaluating parameter references or expressions. - - name: nameext - type: string? - doc: | - The basename extension such that `nameroot + nameext == basename`, and - `nameext` is empty or begins with a period and contains at most one - period. Leading periods on the basename are ignored; a basename of - `.cshrc` will have an empty `nameext`. - - The implementation must set this field automatically based on the value - of `basename` prior to evaluating parameter references or expressions. - - name: checksum - type: string? - doc: | - Optional hash code for validating file integrity. Currently, must be in the form - "sha1$ + hexadecimal string" using the SHA-1 algorithm. - - name: size - type: - - "null" - - int - - long - doc: Optional file size (in bytes) - - name: "secondaryFiles" - type: - - "null" - - type: array - items: [File, Directory] - jsonldPredicate: - _id: "cwl:secondaryFiles" - secondaryFilesDSL: true - doc: | - A list of additional files or directories that are associated with the - primary file and must be transferred alongside the primary file. - Examples include indexes of the primary file, or external references - which must be included when loading primary document. A file object - listed in `secondaryFiles` may itself include `secondaryFiles` for - which the same rules apply. - - name: format - type: string? - jsonldPredicate: - _id: cwl:format - _type: "@id" - identity: true - doc: | - The format of the file: this must be an IRI of a concept node that - represents the file format, preferably defined within an ontology. - If no ontology is available, file formats may be tested by exact match. - - Reasoning about format compatibility must be done by checking that an - input file format is the same, `owl:equivalentClass` or - `rdfs:subClassOf` the format required by the input parameter. - `owl:equivalentClass` is transitive with `rdfs:subClassOf`, e.g. if - ` owl:equivalentClass ` and ` owl:subclassOf ` then infer - ` owl:subclassOf `. - - File format ontologies may be provided in the "$schemas" metadata at the - root of the document. If no ontologies are specified in `$schemas`, the - runtime may perform exact file format matches. - - name: contents - type: string? - doc: | - File contents literal. - - If neither `location` nor `path` is provided, `contents` must be - non-null. The implementation must assign a unique identifier for the - `location` field. When the file is staged as input to CommandLineTool, - the value of `contents` must be written to a file. - - If `contents` is set as a result of a Javascript expression, - an `entry` in `InitialWorkDirRequirement`, or read in from - `cwl.output.json`, there is no specified upper limit on the - size of `contents`. Implementations may have practical limits - on the size of `contents` based on memory and storage - available to the workflow runner or other factors. - - If the `loadContents` field of an `InputParameter` or - `OutputParameter` is true, and the input or output File object - `location` is valid, the file must be a UTF-8 text file 64 KiB - or smaller, and the implementation must read the entire - contents of the file and place it in the `contents` field. If - the size of the file is greater than 64 KiB, the - implementation must raise a fatal error. - - -- name: Directory - type: record - docAfter: "#File" - doc: | - Represents a directory to present to a command line tool. - - Directories are represented as objects with `class` of `Directory`. Directory objects have - a number of properties that provide metadata about the directory. - - The `location` property of a Directory is a IRI that uniquely identifies - the directory. Implementations must support the file:// IRI scheme and may - support other schemes such as http://. Alternately to `location`, - implementations must also accept the `path` property on Directory, which - must be a filesystem path available on the same host as the CWL runner (for - inputs) or the runtime environment of a command line tool execution (for - command line tool outputs). - - A Directory object may have a `listing` field. This is a list of File and - Directory objects that are contained in the Directory. For each entry in - `listing`, the `basename` property defines the name of the File or - Subdirectory when staged to disk. If `listing` is not provided, the - implementation must have some way of fetching the Directory listing at - runtime based on the `location` field. - - If a Directory does not have `location`, it is a Directory literal. A - Directory literal must provide `listing`. Directory literals must be - created on disk at runtime as needed. - - The resources in a Directory literal do not need to have any implied - relationship in their `location`. For example, a Directory listing may - contain two files located on different hosts. It is the responsibility of - the runtime to ensure that those files are staged to disk appropriately. - Secondary files associated with files in `listing` must also be staged to - the same Directory. - - When executing a CommandLineTool, Directories must be recursively staged - first and have local values of `path` assigned. - - Directory objects in CommandLineTool output must provide either a - `location` IRI or a `path` property in the context of the tool execution - runtime (local to the compute node, or within the executing container). - - An ExpressionTool may forward file references from input to output by using - the same value for `location`. - - Name conflicts (the same `basename` appearing multiple times in `listing` - or in any entry in `secondaryFiles` in the listing) is a fatal error. - - fields: - - name: class - type: - type: enum - name: Directory_class - symbols: - - cwl:Directory - jsonldPredicate: - _id: "@type" - _type: "@vocab" - doc: Must be `Directory` to indicate this object describes a Directory. - - name: location - type: string? - doc: | - An IRI that identifies the directory resource. This may be a relative - reference, in which case it must be resolved using the base IRI of the - document. The location may refer to a local or remote resource. If - the `listing` field is not set, the implementation must use the - location IRI to retrieve directory listing. If an implementation is - unable to retrieve the directory listing stored at a remote resource (due to - unsupported protocol, access denied, or other issue) it must signal an - error. - - If the `location` field is not provided, the `listing` field must be - provided. The implementation must assign a unique identifier for - the `location` field. - - If the `path` field is provided but the `location` field is not, an - implementation may assign the value of the `path` field to `location`, - then follow the rules above. - jsonldPredicate: - _id: "@id" - _type: "@id" - - name: path - type: string? - doc: | - The local path where the Directory is made available prior to executing a - CommandLineTool. This must be set by the implementation. This field - must not be used in any other context. The command line tool being - executed must be able to access the directory at `path` using the POSIX - `opendir(2)` syscall. - - If the `path` contains [POSIX shell metacharacters](http://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_02) - (`|`,`&`, `;`, `<`, `>`, `(`,`)`, `$`,`` ` ``, `\`, `"`, `'`, - ``, ``, and ``) or characters - [not allowed](http://www.iana.org/assignments/idna-tables-6.3.0/idna-tables-6.3.0.xhtml) - for [Internationalized Domain Names for Applications](https://tools.ietf.org/html/rfc6452) - then implementations may terminate the process with a - `permanentFailure`. - jsonldPredicate: - _id: "cwl:path" - _type: "@id" - - name: basename - type: string? - doc: | - The base name of the directory, that is, the name of the file without any - leading directory path. The base name must not contain a slash `/`. - - If not provided, the implementation must set this field based on the - `location` field by taking the final path component after parsing - `location` as an IRI. If `basename` is provided, it is not required to - match the value from `location`. - - When this file is made available to a CommandLineTool, it must be named - with `basename`, i.e. the final component of the `path` field must match - `basename`. - jsonldPredicate: "cwl:basename" - - name: listing - type: - - "null" - - type: array - items: [File, Directory] - doc: | - List of files or subdirectories contained in this directory. The name - of each file or subdirectory is determined by the `basename` field of - each `File` or `Directory` object. It is an error if a `File` shares a - `basename` with any other entry in `listing`. If two or more - `Directory` object share the same `basename`, this must be treated as - equivalent to a single subdirectory with the listings recursively - merged. - jsonldPredicate: - _id: "cwl:listing" - - name: Labeled type: record abstract: true @@ -759,11 +372,7 @@ $graph: extends: [Parameter, InputFormat, LoadContents] fields: - name: default - type: - - "null" - - File - - Directory - - Any + type: CWLObjectType jsonldPredicate: _id: sld:default noLinkCheck: true diff --git a/Workflow.yml b/Workflow.yml index 88a7df6f..3de0329a 100644 --- a/Workflow.yml +++ b/Workflow.yml @@ -586,7 +586,7 @@ $graph: fields: - name: default - type: ["null", File, Directory, Any] + type: CWLObjectType doc: | The default value for this parameter to use if either there is no `source` field, or the value produced by the `source` is `null`. The diff --git a/salad/schema_salad/metaschema/metaschema.yml b/salad/schema_salad/metaschema/metaschema.yml index de0b0931..15952c62 100644 --- a/salad/schema_salad/metaschema/metaschema.yml +++ b/salad/schema_salad/metaschema/metaschema.yml @@ -356,6 +356,24 @@ $graph: Indicates that this enum inherits symbols from a base enum. +- name: SaladMapSchema + docParent: "#Schema" + type: record + extends: [NamedType, MapSchema, SchemaDefinedType] + documentRoot: true + doc: | + Define a map type. + + +- name: SaladUnionSchema + docParent: "#Schema" + type: record + extends: [NamedType, UnionSchema, SchemaDefinedType] + documentRoot: true + doc: | + Define a union type. + + - name: Documentation type: record docParent: "#Schema" diff --git a/salad/schema_salad/metaschema/metaschema_base.yml b/salad/schema_salad/metaschema/metaschema_base.yml index 5a37a33a..25607ce8 100644 --- a/salad/schema_salad/metaschema/metaschema_base.yml +++ b/salad/schema_salad/metaschema/metaschema_base.yml @@ -77,6 +77,8 @@ $graph: - RecordSchema - EnumSchema - ArraySchema + - MapSchema + - UnionSchema - string - type: array items: @@ -84,6 +86,8 @@ $graph: - RecordSchema - EnumSchema - ArraySchema + - MapSchema + - UnionSchema - string jsonldPredicate: _id: sld:type @@ -168,6 +172,8 @@ $graph: - RecordSchema - EnumSchema - ArraySchema + - MapSchema + - UnionSchema - string - type: array items: @@ -175,9 +181,91 @@ $graph: - RecordSchema - EnumSchema - ArraySchema + - MapSchema + - UnionSchema - string jsonldPredicate: _id: "sld:items" _type: "@vocab" refScope: 2 doc: "Defines the type of the array elements." + + +- name: MapSchema + type: record + fields: + type: + doc: "Must be `map`" + type: + type: enum + name: Map_name + symbols: + - "sld:map" + jsonldPredicate: + _id: "sld:type" + _type: "@vocab" + typeDSL: true + refScope: 2 + values: + type: + - PrimitiveType + - RecordSchema + - EnumSchema + - ArraySchema + - MapSchema + - UnionSchema + - string + - type: array + items: + - PrimitiveType + - RecordSchema + - EnumSchema + - ArraySchema + - MapSchema + - UnionSchema + - string + jsonldPredicate: + _id: "sld:values" + _type: "@vocab" + refScope: 2 + doc: "Defines the type of the map elements." + + +- name: UnionSchema + type: record + fields: + type: + doc: "Must be `union`" + type: + type: enum + name: Union_name + symbols: + - "sld:union" + jsonldPredicate: + _id: "sld:type" + _type: "@vocab" + typeDSL: true + refScope: 2 + names: + type: + - PrimitiveType + - RecordSchema + - EnumSchema + - ArraySchema + - MapSchema + - UnionSchema + - string + - type: array + items: + - PrimitiveType + - RecordSchema + - EnumSchema + - ArraySchema + - MapSchema + - UnionSchema + - string + jsonldPredicate: + _id: "sld:names" + _type: "@vocab" + refScope: 2 + doc: "Defines the type of the union elements." From b635305f8628f6d7ce09fd855e40c1f4adad5883 Mon Sep 17 00:00:00 2001 From: GlassOfWhiskey Date: Sat, 11 Feb 2023 16:44:31 +0100 Subject: [PATCH 06/14] Moved CWLInputFile to Workflow.yml --- Base.yml | 7 ------- Workflow.yml | 10 ++++++++++ 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/Base.yml b/Base.yml index 44a0573a..2d5bf4c5 100644 --- a/Base.yml +++ b/Base.yml @@ -416,10 +416,3 @@ $graph: Generic type representing a valid CWL object. It is used to represent `default` values passed to CWL `InputParameter` and `WorkflowStepInput` record fields. - - -- name: CWLInputFile - type: map - values: CWLObjectType - doc: | - Type representing a valid CWL input file as a `map`. diff --git a/Workflow.yml b/Workflow.yml index 3de0329a..88c19934 100644 --- a/Workflow.yml +++ b/Workflow.yml @@ -976,3 +976,13 @@ $graph: "_type": "@vocab" - {$import: Operation.yml} + + +- name: CWLInputFile + type: map + values: + - type: array + items: ProcessRequirement + - CWLObjectType + doc: | + Type representing a valid CWL input file as a `map`. \ No newline at end of file From 4bbe62583f442dd270afd77655232a5620d34be0 Mon Sep 17 00:00:00 2001 From: GlassOfWhiskey Date: Sun, 12 Feb 2023 02:04:29 +0100 Subject: [PATCH 07/14] Added flatten field to CWLObjectType array --- Base.yml | 10 +++++++--- Process.yml | 2 +- Workflow.yml | 5 +++-- 3 files changed, 11 insertions(+), 6 deletions(-) diff --git a/Base.yml b/Base.yml index 2d5bf4c5..0c4731ec 100644 --- a/Base.yml +++ b/Base.yml @@ -399,7 +399,6 @@ $graph: - name: CWLObjectType type: union names: - - "null" - boolean - int - long @@ -409,9 +408,14 @@ $graph: - File - Directory - type: array - items: CWLObjectType + items: + - "null" + - CWLObjectType + flatten: false - type: map - values: CWLObjectType + values: + - "null" + - CWLObjectType doc: | Generic type representing a valid CWL object. It is used to represent `default` values passed to CWL `InputParameter` and `WorkflowStepInput` diff --git a/Process.yml b/Process.yml index fcd6af6d..79f850a9 100644 --- a/Process.yml +++ b/Process.yml @@ -372,7 +372,7 @@ $graph: extends: [Parameter, InputFormat, LoadContents] fields: - name: default - type: CWLObjectType + type: CWLObjectType? jsonldPredicate: _id: sld:default noLinkCheck: true diff --git a/Workflow.yml b/Workflow.yml index 88c19934..57e8766f 100644 --- a/Workflow.yml +++ b/Workflow.yml @@ -586,7 +586,7 @@ $graph: fields: - name: default - type: CWLObjectType + type: CWLObjectType? doc: | The default value for this parameter to use if either there is no `source` field, or the value produced by the `source` is `null`. The @@ -981,8 +981,9 @@ $graph: - name: CWLInputFile type: map values: + - "null" - type: array items: ProcessRequirement - CWLObjectType doc: | - Type representing a valid CWL input file as a `map`. \ No newline at end of file + Type representing a valid CWL input file as a `map, CWLObjectType>>`. \ No newline at end of file From aeb719c9a56b21f75ec047e43ff15f418e066041 Mon Sep 17 00:00:00 2001 From: GlassOfWhiskey Date: Fri, 17 Feb 2023 16:48:27 +0100 Subject: [PATCH 08/14] Limited support fot MapSchema --- salad/schema_salad/metaschema/metaschema_base.yml | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/salad/schema_salad/metaschema/metaschema_base.yml b/salad/schema_salad/metaschema/metaschema_base.yml index 25607ce8..459e28b1 100644 --- a/salad/schema_salad/metaschema/metaschema_base.yml +++ b/salad/schema_salad/metaschema/metaschema_base.yml @@ -77,7 +77,6 @@ $graph: - RecordSchema - EnumSchema - ArraySchema - - MapSchema - UnionSchema - string - type: array @@ -86,7 +85,6 @@ $graph: - RecordSchema - EnumSchema - ArraySchema - - MapSchema - UnionSchema - string jsonldPredicate: @@ -172,7 +170,6 @@ $graph: - RecordSchema - EnumSchema - ArraySchema - - MapSchema - UnionSchema - string - type: array @@ -181,7 +178,6 @@ $graph: - RecordSchema - EnumSchema - ArraySchema - - MapSchema - UnionSchema - string jsonldPredicate: @@ -189,6 +185,13 @@ $graph: _type: "@vocab" refScope: 2 doc: "Defines the type of the array elements." + flatten: + type: boolean? + jsonldPredicate: + _id: "sld:flatten" + _type: "@vocab" + refScope: 2 + doc: "Flatten inner array objects into a single sequence (default: true)." - name: MapSchema From ba0dadb237e630971d346734ca9df113f973188e Mon Sep 17 00:00:00 2001 From: GlassOfWhiskey Date: Tue, 18 Apr 2023 11:21:54 +0200 Subject: [PATCH 09/14] Added Base.yml to render --- render.bash | 4 ++++ requirements.txt | 3 +-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/render.bash b/render.bash index 05bf4852..ee774192 100644 --- a/render.bash +++ b/render.bash @@ -10,6 +10,10 @@ if [[ -d $bn ]] ; then else git clone $repo && pushd $bn; git checkout main ; git show --no-patch ; popd fi +echo "- basename: Base.yml" >> $WORKSPACE/cwl-website/site/v1.2-deps.yaml +echo " class: File" >> $WORKSPACE/cwl-website/site/v1.2-deps.yaml +echo " location: ../v1.2/Base.yml" >> $WORKSPACE/cwl-website/site/v1.2-deps.yaml +cat $WORKSPACE/cwl-website/site/v1.2-deps.yaml CURDIR=$PWD pushd $WORKSPACE/cwl-website ; ln -s $CURDIR v1.2 ; popd cp cwlsite-v1.2-only-job.yaml $WORKSPACE/cwl-website/site/ diff --git a/requirements.txt b/requirements.txt index e1d38dee..0c390a3c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,2 @@ cwltool - - +schema_salad @ git+https://github.com/common-workflow-language/schema_salad@refs/pull/672/head From b8c3152a213d13c36b0b7101eec9b1187b34d046 Mon Sep 17 00:00:00 2001 From: GlassOfWhiskey Date: Tue, 18 Apr 2023 12:04:59 +0200 Subject: [PATCH 10/14] Removed DocType from Map and Union --- .github/config/wordlist.txt | 5 ++ .gitignore | 2 +- Base.yml | 101 ++++++++++++++++++++++++++++++++++++ Process.yml | 32 ++++++------ Workflow.yml | 11 ---- render.bash | 3 +- 6 files changed, 124 insertions(+), 30 deletions(-) diff --git a/.github/config/wordlist.txt b/.github/config/wordlist.txt index 039d4ab0..c8b96f43 100644 --- a/.github/config/wordlist.txt +++ b/.github/config/wordlist.txt @@ -54,6 +54,11 @@ cryptographic ctbrown curii cwl +cwlarrayschema +cwlinputfile +cwlobjecttype +cwlrecordfield +cwlrecordschema cwltest cwltool cwltype diff --git a/.gitignore b/.gitignore index 2a25de39..21f3238d 100644 --- a/.gitignore +++ b/.gitignore @@ -8,4 +8,4 @@ *.egg-info *egg[s] __pycache__ -.python_history +.python_history \ No newline at end of file diff --git a/Base.yml b/Base.yml index 0c4731ec..36a562d2 100644 --- a/Base.yml +++ b/Base.yml @@ -17,6 +17,97 @@ $graph: - "File: A File object" - "Directory: A Directory object" +- name: CWLArraySchema + type: record + fields: + type: + doc: "Must be `array`" + type: + type: enum + name: cwl_array_name + symbols: + - "sld:array" + jsonldPredicate: + _id: "sld:type" + _type: "@vocab" + typeDSL: true + refScope: 2 + items: + type: + - PrimitiveType + - CWLRecordSchema + - EnumSchema + - CWLArraySchema + - string + - type: array + items: + - PrimitiveType + - CWLRecordSchema + - EnumSchema + - CWLArraySchema + - string + jsonldPredicate: + _id: "sld:items" + _type: "@vocab" + refScope: 2 + doc: "Defines the type of the array elements." + +- name: CWLRecordField + type: record + extends: "sld:Documented" + doc: A field of a record. + fields: + - name: name + type: string + jsonldPredicate: "@id" + doc: | + The name of the field + + - name: type + type: + - PrimitiveType + - CWLRecordSchema + - EnumSchema + - CWLArraySchema + - string + - type: array + items: + - PrimitiveType + - CWLRecordSchema + - EnumSchema + - CWLArraySchema + - string + jsonldPredicate: + _id: sld:type + _type: "@vocab" + typeDSL: true + refScope: 2 + doc: | + The field type + +- name: CWLRecordSchema + type: record + fields: + type: + doc: "Must be `record`" + type: + type: enum + name: cwl_record_name + symbols: + - "sld:record" + jsonldPredicate: + _id: "sld:type" + _type: "@vocab" + typeDSL: true + refScope: 2 + fields: + type: CWLRecordField[]? + jsonldPredicate: + _id: sld:fields + mapSubject: name + mapPredicate: type + doc: "Defines the fields of the record." + - name: File type: record docParent: "#CWLType" @@ -420,3 +511,13 @@ $graph: Generic type representing a valid CWL object. It is used to represent `default` values passed to CWL `InputParameter` and `WorkflowStepInput` record fields. + +- name: CWLInputFile + type: map + values: + - "null" + - type: array + items: ProcessRequirement + - CWLObjectType + doc: | + Type representing a valid CWL input file as a `map, CWLObjectType>>`. \ No newline at end of file diff --git a/Process.yml b/Process.yml index 79f850a9..5e2c7a5c 100644 --- a/Process.yml +++ b/Process.yml @@ -283,13 +283,13 @@ $graph: - name: InputRecordField type: record - extends: [sld:RecordField, FieldBase, InputFormat, LoadContents] + extends: [CWLRecordField, FieldBase, InputFormat, LoadContents] specialize: - - specializeFrom: "sld:RecordSchema" + - specializeFrom: CWLRecordSchema specializeTo: InputRecordSchema - specializeFrom: "sld:EnumSchema" specializeTo: InputEnumSchema - - specializeFrom: "sld:ArraySchema" + - specializeFrom: CWLArraySchema specializeTo: InputArraySchema - specializeFrom: "sld:PrimitiveType" specializeTo: CWLType @@ -297,9 +297,9 @@ $graph: - name: InputRecordSchema type: record - extends: ["sld:RecordSchema", InputSchema] + extends: [CWLRecordSchema, InputSchema] specialize: - - specializeFrom: "sld:RecordField" + - specializeFrom: CWLRecordField specializeTo: InputRecordField @@ -310,13 +310,13 @@ $graph: - name: InputArraySchema type: record - extends: ["sld:ArraySchema", InputSchema] + extends: [CWLArraySchema, InputSchema] specialize: - - specializeFrom: "sld:RecordSchema" + - specializeFrom: CWLRecordSchema specializeTo: InputRecordSchema - specializeFrom: "sld:EnumSchema" specializeTo: InputEnumSchema - - specializeFrom: "sld:ArraySchema" + - specializeFrom: CWLArraySchema specializeTo: InputArraySchema - specializeFrom: "sld:PrimitiveType" specializeTo: CWLType @@ -324,13 +324,13 @@ $graph: - name: OutputRecordField type: record - extends: [sld:RecordField, FieldBase, OutputFormat] + extends: [CWLRecordField, FieldBase, OutputFormat] specialize: - - specializeFrom: "sld:RecordSchema" + - specializeFrom: CWLRecordSchema specializeTo: OutputRecordSchema - specializeFrom: "sld:EnumSchema" specializeTo: OutputEnumSchema - - specializeFrom: "sld:ArraySchema" + - specializeFrom: CWLArraySchema specializeTo: OutputArraySchema - specializeFrom: "sld:PrimitiveType" specializeTo: CWLType @@ -338,10 +338,10 @@ $graph: - name: OutputRecordSchema type: record - extends: ["sld:RecordSchema", "#OutputSchema"] + extends: [CWLRecordSchema, "#OutputSchema"] docParent: "#OutputParameter" specialize: - - specializeFrom: "sld:RecordField" + - specializeFrom: CWLRecordField specializeTo: OutputRecordField @@ -353,14 +353,14 @@ $graph: - name: OutputArraySchema type: record - extends: ["sld:ArraySchema", OutputSchema] + extends: [CWLArraySchema, OutputSchema] docParent: "#OutputParameter" specialize: - - specializeFrom: "sld:RecordSchema" + - specializeFrom: CWLRecordSchema specializeTo: OutputRecordSchema - specializeFrom: "sld:EnumSchema" specializeTo: OutputEnumSchema - - specializeFrom: "sld:ArraySchema" + - specializeFrom: CWLArraySchema specializeTo: OutputArraySchema - specializeFrom: "sld:PrimitiveType" specializeTo: CWLType diff --git a/Workflow.yml b/Workflow.yml index 57e8766f..af19b345 100644 --- a/Workflow.yml +++ b/Workflow.yml @@ -976,14 +976,3 @@ $graph: "_type": "@vocab" - {$import: Operation.yml} - - -- name: CWLInputFile - type: map - values: - - "null" - - type: array - items: ProcessRequirement - - CWLObjectType - doc: | - Type representing a valid CWL input file as a `map, CWLObjectType>>`. \ No newline at end of file diff --git a/render.bash b/render.bash index ee774192..68541363 100644 --- a/render.bash +++ b/render.bash @@ -1,6 +1,6 @@ #!/usr/bin/env bash if [[ -z "$WORKSPACE" ]] ; then - WORKSPACE=$PWD/temp/ + WORKSPACE=$PWD fi mkdir -p $WORKSPACE repo=https://github.com/common-workflow-language/cwl-website \ @@ -13,7 +13,6 @@ fi echo "- basename: Base.yml" >> $WORKSPACE/cwl-website/site/v1.2-deps.yaml echo " class: File" >> $WORKSPACE/cwl-website/site/v1.2-deps.yaml echo " location: ../v1.2/Base.yml" >> $WORKSPACE/cwl-website/site/v1.2-deps.yaml -cat $WORKSPACE/cwl-website/site/v1.2-deps.yaml CURDIR=$PWD pushd $WORKSPACE/cwl-website ; ln -s $CURDIR v1.2 ; popd cp cwlsite-v1.2-only-job.yaml $WORKSPACE/cwl-website/site/ From d6bc230473045bc5ac999b180cca27afca480fc5 Mon Sep 17 00:00:00 2001 From: GlassOfWhiskey Date: Sat, 7 Oct 2023 14:56:33 +0200 Subject: [PATCH 11/14] Make CWLType extend original sld:Type --- Base.yml | 35 ++----------------- .../metaschema/metaschema_base.yml | 8 ++++- 2 files changed, 10 insertions(+), 33 deletions(-) diff --git a/Base.yml b/Base.yml index 36a562d2..a3c82fc5 100644 --- a/Base.yml +++ b/Base.yml @@ -19,19 +19,8 @@ $graph: - name: CWLArraySchema type: record + extends: "sld:ArraySchema" fields: - type: - doc: "Must be `array`" - type: - type: enum - name: cwl_array_name - symbols: - - "sld:array" - jsonldPredicate: - _id: "sld:type" - _type: "@vocab" - typeDSL: true - refScope: 2 items: type: - PrimitiveType @@ -54,15 +43,8 @@ $graph: - name: CWLRecordField type: record - extends: "sld:Documented" - doc: A field of a record. + extends: "sld:RecordField" fields: - - name: name - type: string - jsonldPredicate: "@id" - doc: | - The name of the field - - name: type type: - PrimitiveType @@ -87,19 +69,8 @@ $graph: - name: CWLRecordSchema type: record + extends: "sld:RecordSchema" fields: - type: - doc: "Must be `record`" - type: - type: enum - name: cwl_record_name - symbols: - - "sld:record" - jsonldPredicate: - _id: "sld:type" - _type: "@vocab" - typeDSL: true - refScope: 2 fields: type: CWLRecordField[]? jsonldPredicate: diff --git a/salad/schema_salad/metaschema/metaschema_base.yml b/salad/schema_salad/metaschema/metaschema_base.yml index 459e28b1..c29b6f99 100644 --- a/salad/schema_salad/metaschema/metaschema_base.yml +++ b/salad/schema_salad/metaschema/metaschema_base.yml @@ -77,6 +77,7 @@ $graph: - RecordSchema - EnumSchema - ArraySchema + - MapSchema - UnionSchema - string - type: array @@ -85,6 +86,7 @@ $graph: - RecordSchema - EnumSchema - ArraySchema + - MapSchema - UnionSchema - string jsonldPredicate: @@ -93,7 +95,9 @@ $graph: typeDSL: true refScope: 2 doc: | - The field type + The field type. If it is an array, it indicates + that the field type is a union type of its elements. + Its elements may be duplicated. - name: RecordSchema @@ -170,6 +174,7 @@ $graph: - RecordSchema - EnumSchema - ArraySchema + - MapSchema - UnionSchema - string - type: array @@ -178,6 +183,7 @@ $graph: - RecordSchema - EnumSchema - ArraySchema + - MapSchema - UnionSchema - string jsonldPredicate: From e9acf616a8cbcba943da82cbaf3002911734cf11 Mon Sep 17 00:00:00 2001 From: GlassOfWhiskey Date: Mon, 6 Nov 2023 16:03:58 +0100 Subject: [PATCH 12/14] Added `noLinkCheck` to `format` field CWL parsers should not follow `format` links. --- .github/config/wordlist.txt | 4 ++++ Base.yml | 8 ++++++-- Process.yml | 5 ++++- Workflow.yml | 1 + concepts.md | 2 +- render.bash | 6 +++--- salad/schema_salad/metaschema/metaschema.yml | 10 +++++++++- .../metaschema/metaschema_base.yml | 7 ------- salad/schema_salad/metaschema/salad.md | 13 +++++++++++- tests/cat3-tool-docker.cwl | 19 ------------------ tests/wc-tool-shortcut.cwl | 20 ------------------- 11 files changed, 40 insertions(+), 55 deletions(-) delete mode 100755 tests/cat3-tool-docker.cwl delete mode 100755 tests/wc-tool-shortcut.cwl diff --git a/.github/config/wordlist.txt b/.github/config/wordlist.txt index c8b96f43..1f9f8642 100644 --- a/.github/config/wordlist.txt +++ b/.github/config/wordlist.txt @@ -25,6 +25,7 @@ chilton cisphobia classname cmd +colonnelli commandinputarrayschema commandinputenumschema commandinputparameter @@ -113,6 +114,7 @@ gmail gunzip hervé https +iacopo iana ietf implementer @@ -127,6 +129,7 @@ inplaceupdaterequirement inputarrayschema inputenumschema inputbinding +inputfile inputformat inputrecordfield inputrecordschema @@ -275,6 +278,7 @@ tijanić timelimit toc tooltimelimit +torino transcode txt typedef diff --git a/Base.yml b/Base.yml index a3c82fc5..c2ca0d5b 100644 --- a/Base.yml +++ b/Base.yml @@ -285,6 +285,7 @@ $graph: _id: cwl:format _type: "@id" identity: true + noLinkCheck: true doc: | The format of the file: this must be an IRI of a concept node that represents the file format, preferably defined within an ontology. @@ -473,7 +474,6 @@ $graph: items: - "null" - CWLObjectType - flatten: false - type: map values: - "null" @@ -491,4 +491,8 @@ $graph: items: ProcessRequirement - CWLObjectType doc: | - Type representing a valid CWL input file as a `map, CWLObjectType>>`. \ No newline at end of file + Type representing a valid CWL input file as a `map, CWLObjectType>>`. + jsonldPredicate: + _id: "cwl:inputfile" + _container: "@list" + noLinkCheck: true diff --git a/Process.yml b/Process.yml index 5e2c7a5c..acc77c07 100644 --- a/Process.yml +++ b/Process.yml @@ -199,6 +199,7 @@ $graph: _id: cwl:format _type: "@id" identity: true + noLinkCheck: true doc: | Only valid when `type: File` or is an array of `items: File`. @@ -221,6 +222,7 @@ $graph: _id: cwl:format _type: "@id" identity: true + noLinkCheck: true doc: | Only valid when `type: File` or is an array of `items: File`. @@ -374,7 +376,8 @@ $graph: - name: default type: CWLObjectType? jsonldPredicate: - _id: sld:default + _id: "sld:default" + _container: "@list" noLinkCheck: true doc: | The default value to use for this parameter if the parameter is missing diff --git a/Workflow.yml b/Workflow.yml index af19b345..bafe5b46 100644 --- a/Workflow.yml +++ b/Workflow.yml @@ -593,6 +593,7 @@ $graph: default must be applied prior to scattering or evaluating `valueFrom`. jsonldPredicate: _id: "sld:default" + _container: "@list" noLinkCheck: true - name: valueFrom type: diff --git a/concepts.md b/concepts.md index 378c4a42..77a1aa2d 100644 --- a/concepts.md +++ b/concepts.md @@ -331,7 +331,7 @@ prefix listed in the `$namespaces` section of the document as described in the [Schema Salad specification](SchemaSalad.html#Explicit_context). It is recommended that concepts from schema.org are used whenever possible. -For the `$schemas` field we recommend their RDF encoding: http://schema.org/version/latest/schema.rdf +For the `$schemas` field we recommend their RDF encoding: https://schema.org/version/latest/schemaorg-current-https.rdf Implementation extensions which modify execution semantics must be [listed in the `requirements` field](#Requirements_and_hints). diff --git a/render.bash b/render.bash index 68541363..eda4758c 100644 --- a/render.bash +++ b/render.bash @@ -1,14 +1,14 @@ #!/usr/bin/env bash if [[ -z "$WORKSPACE" ]] ; then - WORKSPACE=$PWD + WORKSPACE=$PWD/temp/ fi mkdir -p $WORKSPACE repo=https://github.com/common-workflow-language/cwl-website \ -bn=$(basename $repo) +bn=$WORKSPACE/$(basename $repo) if [[ -d $bn ]] ; then (cd $bn && git fetch origin && git reset --hard origin/main) else - git clone $repo && pushd $bn; git checkout main ; git show --no-patch ; popd + git clone $repo $bn && pushd $bn; git checkout main ; git show --no-patch ; popd fi echo "- basename: Base.yml" >> $WORKSPACE/cwl-website/site/v1.2-deps.yaml echo " class: File" >> $WORKSPACE/cwl-website/site/v1.2-deps.yaml diff --git a/salad/schema_salad/metaschema/metaschema.yml b/salad/schema_salad/metaschema/metaschema.yml index 15952c62..f696e0ae 100644 --- a/salad/schema_salad/metaschema/metaschema.yml +++ b/salad/schema_salad/metaschema/metaschema.yml @@ -274,6 +274,7 @@ $graph: doc: | If true, indicates that the type is a valid at the document root. At least one type in a schema must be tagged with `documentRoot: true`. + jsonldPredicate: sld:documentRoot - name: SaladRecordField @@ -368,10 +369,17 @@ $graph: - name: SaladUnionSchema docParent: "#Schema" type: record - extends: [NamedType, UnionSchema, SchemaDefinedType] + extends: [NamedType, UnionSchema, DocType] documentRoot: true doc: | Define a union type. + fields: + - name: documentRoot + type: boolean? + doc: | + If true, indicates that the type is a valid at the document root. At + least one type in a schema must be tagged with `documentRoot: true`. + jsonldPredicate: sld:documentRoot - name: Documentation diff --git a/salad/schema_salad/metaschema/metaschema_base.yml b/salad/schema_salad/metaschema/metaschema_base.yml index c29b6f99..3bdf6390 100644 --- a/salad/schema_salad/metaschema/metaschema_base.yml +++ b/salad/schema_salad/metaschema/metaschema_base.yml @@ -191,13 +191,6 @@ $graph: _type: "@vocab" refScope: 2 doc: "Defines the type of the array elements." - flatten: - type: boolean? - jsonldPredicate: - _id: "sld:flatten" - _type: "@vocab" - refScope: 2 - doc: "Flatten inner array objects into a single sequence (default: true)." - name: MapSchema diff --git a/salad/schema_salad/metaschema/salad.md b/salad/schema_salad/metaschema/salad.md index dca21120..26d4a6b8 100644 --- a/salad/schema_salad/metaschema/salad.md +++ b/salad/schema_salad/metaschema/salad.md @@ -10,6 +10,7 @@ Contributors: * The developers of JSON-LD * Nebojša Tijanić , Seven Bridges Genomics * Michael R. Crusoe, ELIXIR-DE +* Iacopo Colonnelli, University of Torino # Abstract @@ -120,6 +121,16 @@ clarifications. resolved with [identifier resolution](#Identifier_resolution). Otherwise the field is resolved with [link resolution](#Link_resolution). +## Introduction to v1.3 + +This is the fifth version of the Schema Salad specification. It was created to +enhance code generation by representing CWL data types as specific Python objects +(instead of relying on the generic `Any` type). The following changes have been made: + +* Support for the Avro `map` schema +* Add named versions of the `map` and `union` Avro types +* Support for nested named `union` type definitions + ## References to Other Specifications **Javascript Object Notation (JSON)**: http://json.org @@ -173,7 +184,7 @@ enable or disable the behavior described. ## Data concepts An **object** is a data structure equivalent to the "object" type in JSON, -consisting of an unordered set of name/value pairs (referred to here as +consisting of a unordered set of name/value pairs (referred to here as **fields**) and where the name is a string and the value is a string, number, boolean, array, or object. diff --git a/tests/cat3-tool-docker.cwl b/tests/cat3-tool-docker.cwl deleted file mode 100755 index b486e8ce..00000000 --- a/tests/cat3-tool-docker.cwl +++ /dev/null @@ -1,19 +0,0 @@ -#!/usr/bin/env cwl-runner -class: CommandLineTool -cwlVersion: v1.2 -doc: "Print the contents of a file to stdout using 'cat' running in a docker container." -requirements: - DockerRequirement: - dockerPull: docker.io/debian:stable-slim -inputs: - file1: - type: File - label: Input File - doc: "The file that will be copied using 'cat'" - inputBinding: {position: 1} -outputs: - output_file: - type: File - outputBinding: {glob: output.txt} -baseCommand: cat -stdout: output.txt diff --git a/tests/wc-tool-shortcut.cwl b/tests/wc-tool-shortcut.cwl deleted file mode 100755 index f157cca3..00000000 --- a/tests/wc-tool-shortcut.cwl +++ /dev/null @@ -1,20 +0,0 @@ -#!/usr/bin/env cwl-runner - -class: CommandLineTool -cwlVersion: v1.2 - -requirements: - - class: DockerRequirement - dockerPull: docker.io/debian:stable-slim - -inputs: - file1: stdin - -outputs: - output: - type: File - outputBinding: { glob: output } - -baseCommand: [wc] - -stdout: output From 92c44c784ab62cc6f7ba1a2c87c1860263f405d8 Mon Sep 17 00:00:00 2001 From: "Michael R. Crusoe" Date: Wed, 10 Apr 2024 15:34:54 +0300 Subject: [PATCH 13/14] set schema-salad minimum version --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 0c390a3c..db185255 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,2 @@ cwltool -schema_salad @ git+https://github.com/common-workflow-language/schema_salad@refs/pull/672/head +schema_salad >= 8.5.20231201181309 From 61699f5dfd891fed2ebf2a820f2be3eafe4241a4 Mon Sep 17 00:00:00 2001 From: "Michael R. Crusoe" Date: Wed, 10 Apr 2024 15:37:27 +0300 Subject: [PATCH 14/14] ci: run on/for codegen branch --- .github/workflows/ci.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 0928beaf..8266c80b 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -6,9 +6,9 @@ name: CI on: # Triggers the workflow on push or pull request events but only for the main branch push: - branches: [ main ] + branches: [ main, codegen ] pull_request: - branches: [ main, 1.2.1_proposed ] + branches: [ main, codegen ] # Allows you to run this workflow manually from the Actions tab workflow_dispatch: