diff --git a/CHANGELOG b/CHANGELOG index ac62a57..3fa1fdc 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,3 +1,6 @@ +v1.3 +- Changed the docker container to be broad institute's official docker container, not wtsi-hgi own container + v1.2.2 - Resolving secondaryFiles to a relative path, to work with newer cwl-runner versions and the advice in "secondaryFiles" of http://www.commonwl.org/v1.0/Workflow.html#WorkflowOutputParameter diff --git a/README.md b/README.md index 192b403..8124302 100644 --- a/README.md +++ b/README.md @@ -13,29 +13,13 @@ python setup.py install You may also want to install [cwltool](https://github.com/common-workflow-language/cwltool) to run the generated CWL files -### Docker Requirements - -The generated CWL files have a dependency on a GATK docker container. These can be found at https://github.com/wtsi-hgi/arvados-pipelines/tree/master/docker/ - -As Oracle have discontinued public downloading of JDK 7, to install the docker image you need to install jdk-7u25-linux-x64.tar.gz from http://www.oracle.com/technetwork/java/javase/downloads/java-archive-downloads-javase7-521261.html and put it in the same directory as the Dockerfile (see https://github.com/wtsi-hgi/arvados-pipelines/blob/master/docker/gatk-3.5/Dockerfile#L44) -```bash -git clone https://github.com/wtsi-hgi/arvados-pipelines -cd arvados-pipelines/docker -... add jdk-7u25-linux-x64.tar to the directory ... -docker build gatk-3.5 -t gatk -``` - -To enter the container, run: -```bash -docker run --name='gatk' -it gatk /bin/bash -``` - ## Usage ``` usage: gatkcwlgenerator [-h] [--version GATKVERSION] [--out OUTPUTDIR] [--include INCLUDE_FILE] [--dev] [--docker_container_name DOCKER_CONTAINER_NAME] + [--gatk_location GATK_LOCATION] Generates CWL files from the GATK documentation @@ -54,14 +38,18 @@ optional arguments: generated files (for development purposes). Requires requests_cache to be installed --docker_container_name DOCKER_CONTAINER_NAME, -c DOCKER_CONTAINER_NAME - Enable network caching and overwriting of the - generated files (for development purposes). Default is - 'gatk' + Docker container name for generated cwl files. Default + is 'broadinstitute/gatk3:' for version 3.x and + 'broadinstitute/gatk:' for 4.x + --gatk_location GATK_LOCATION, -l GATK_LOCATION + Location of the gatk jar file. Default is + '/usr/GenomeAnalysisTK.jar' for gatk 3.x and + '/gatk/gatk.jar' for gatk 4.x ``` This has been tested on versions 3.5-3.8 and generates files for version 4 (though some parameters are unknown and default to outputting a string). -The input parameters are the same as in the documentation, with the addition of `refIndex` and `refDict` which are required parameters that specify the index and dict file of the reference genome. +The input parameters are the same as in the documentation, with the addition of `refIndex` and `refDict` which are required parameters that specify the index and dict file of the reference genome. To add tags to arguments that have a file type, add to the parameter `_tags`. e.g. to output the parameter `--variant:vcf path\to\file`, use the input: ```yml diff --git a/gatkcwlgenerator/json2cwl.py b/gatkcwlgenerator/json2cwl.py index 6f59699..a567d85 100644 --- a/gatkcwlgenerator/json2cwl.py +++ b/gatkcwlgenerator/json2cwl.py @@ -57,12 +57,17 @@ def json2cwl(GATK_json, cwl_dir, cmd_line_options): skeleton_cwl = { 'id': GATK_json['name'], 'cwlVersion': 'v1.0', - 'baseCommand': ['java', '-jar', '/gatk/GenomeAnalysisTK.jar'], + 'baseCommand': ['java', '-jar', cmd_line_options.gatk_location], 'class': 'CommandLineTool', + 'hints': [{ + "class": "DockerRequirement", + "dockerPull": cmd_line_options.docker_container_name + }], 'requirements': [ { "class": "ShellCommandRequirement" - }, { + }, + { "class": "InlineJavascriptRequirement", "expressionLib": [ # Allows you to add annotations @@ -75,10 +80,6 @@ def json2cwl(GATK_json, cwl_dir, cmd_line_options): } }""".replace(" ", "").replace("\n", "") ] - }, - { - "dockerPull": cmd_line_options.docker_container_name + ":latest", - "class": "DockerRequirement" } ] } diff --git a/gatkcwlgenerator/main.py b/gatkcwlgenerator/main.py index 66229c0..d4c1d5a 100644 --- a/gatkcwlgenerator/main.py +++ b/gatkcwlgenerator/main.py @@ -186,9 +186,11 @@ def main(): parser.add_argument("--dev", dest="dev", action="store_true", help="Enable network caching and overwriting of the generated files (for development purposes). " + "Requires requests_cache to be installed") - parser.add_argument("--docker_container_name", "-c", dest="docker_container_name", default="gatk", - help="Enable network caching and overwriting of the generated files (for development purposes). " + - "Default is 'gatk'") + parser.add_argument("--docker_container_name", "-c", dest="docker_container_name", + help="Docker container name for generated cwl files. Default is 'broadinstitute/gatk3:-0' " + + "for version 3.x and 'broadinstitute/gatk:' for 4.x") + parser.add_argument("--gatk_location", "-l", dest="gatk_location", + help="Location of the gatk jar file. Default is '/usr/GenomeAnalysisTK.jar' for gatk 3.x and '/gatk/gatk.jar' for gatk 4.x") cmd_line_options = parser.parse_args() @@ -197,7 +199,19 @@ def main(): requests_cache.install_cache() # Decreases the time to run dramatically if not cmd_line_options.outputdir: - cmd_line_options.outputdir = os.getcwd() + '/gatk_cmdline_tools/%s' % cmd_line_options.gatkversion + cmd_line_options.outputdir = os.getcwd() + '/gatk_cmdline_tools/' + cmd_line_options.gatkversion + + if not cmd_line_options.docker_container_name: + if is_version_3(cmd_line_options.gatkversion): + cmd_line_options.docker_container_name = "broadinstitute/gatk3:" + cmd_line_options.gatkversion + "-0" + else: + cmd_line_options.docker_container_name = "broadinstitute/gatk:" + cmd_line_options.gatkversion + + if not cmd_line_options.gatk_location: + if is_version_3(cmd_line_options.gatkversion): + cmd_line_options.gatk_location = "/usr/GenomeAnalysisTK.jar" + else: + cmd_line_options.gatk_location = "/gatk/gatk.jar" print("Your chosen directory is: %s" % cmd_line_options.outputdir) grouped_urls = get_json_links(cmd_line_options.gatkversion) diff --git a/setup.py b/setup.py index 6410f45..1e6334e 100644 --- a/setup.py +++ b/setup.py @@ -10,7 +10,7 @@ def read_markdown(file): setup( name="gatk_cwl_generator", - version="1.2.2", + version="1.3.0", packages=find_packages(exclude=["tests"]), install_requires=open("requirements.txt", "r").readlines(), tests_require=open("test_requirements.txt", "r").readlines(),