nextflow_schema.json

{
    "$schema": "http://json-schema.org/draft-07/schema",
    "$id": "https://raw.githubusercontent.com/qbic-pipelines/rnadeseq/master/nextflow_schema.json",
    "title": "qbic-pipelines/rnadeseq pipeline parameters",
    "description": "Differential gene expression analysis and pathway analysis of RNAseq data",
    "type": "object",
    "definitions": {
        "main_arguments": {
            "title": "Main input arguments",
            "type": "object",
            "fa_icon": "fas fa-terminal",
            "properties": {
                "input_type": {
                    "type": "string",
                    "default": "featurecounts",
                    "description": "Which type of input is provided, one of featurecounts, rsem, salmon, smrnaseq."
                },
                "gene_counts": {
                    "type": "string",
                    "description": "If input_type = featurecounts: Raw count table (TSV). Columns are samples and rows are genes. 1st column Ensembl_ID, 2nd column gene_name. If input_type = rsem or input_type = salmon: Path to rsem/salmon output folder. If input_type = smrnaseq: Path to smrnaseq output folder containing hairpin and non-hairpin mature .sorted.idxstats files."
                },
                "input": {
                    "type": "string",
                    "format": "file-path",
                    "exists": true,
                    "description": "Metadata table/samplesheet (TSV). Rows are samples and columns contain sample grouping."
                },
                "model": {
                    "type": "string",
                    "format": "file-path",
                    "exists": true,
                    "description": "Linear model function to calculate the contrasts (TXT). Variable names should be columns in metadata file/samplesheet."
                },
                "gtf": {
                    "type": "string",
                    "description": "Not necessary if genome is set or input_type is featurecounts. GTF file to be used for DESeq if input is rsem or salmon."
                }
            },
            "required": ["input_type", "gene_counts", "input", "model"]
        },
        "deseq2_arguments": {
            "title": "DESeq2 arguments",
            "type": "object",
            "description": "",
            "default": "",
            "properties": {
                "contrast_matrix": {
                    "type": "string",
                    "default": "DEFAULT",
                    "description": "Tsv indicating which contrasts to consider, one contrast per column. 1 or 0 for every coefficient of the linear model. Check contrasts docs."
                },
                "contrast_list": {
                    "type": "string",
                    "default": "DEFAULT1",
                    "description": "Tsv indicating list of the contrasts to calculate. 3 columns: factor name, contrast numerator and denominator. Check contrasts docs."
                },
                "contrast_pairs": {
                    "type": "string",
                    "default": "DEFAULT2",
                    "description": "Tsv indicating list of contrast pairs to calculate. 3 columns: contrast name, numerator and denominator. Check contrasts docs."
                },
                "genelist": {
                    "type": "string",
                    "default": "NO_FILE",
                    "description": "Txt file with list of genes (one per line) of which to plot heatmaps for normalized counts across all samples."
                },
                "relevel": {
                    "type": "string",
                    "default": "NO_FILE2",
                    "description": "Tsv indicating list of factors (conditions in the metadata table/samplesheet) and the new level on which to relevel the factor. Check contrasts docs."
                },
                "batch_effect": {
                    "type": "boolean",
                    "description": "Turn on this flag if you wish to consider batch effects. You need to add the batch effect to the linear model too!"
                },
                "logFC_threshold": {
                    "type": "integer",
                    "default": 0,
                    "description": "Threshold (int) to apply to Log 2 Fold Change to consider a gene as differentially expressed."
                },
                "adj_pval_threshold": {
                    "type": "number",
                    "default": 0.05,
                    "description": "p value (float) to consider a gene as differentially expressed. The default value is 0.05."
                },
                "norm_method": {
                    "type": "string",
                    "default": "rlog",
                    "enum": ["rlog", "vst", "vst-force"],
                    "description": "Which transformation to use during DE analysis; either rlog or vst or vst-force. If vst: the pipeline will override this and run rlog if it detects a strong variation in size factors in the input data (this will be stated in the report). To disable this behavior, set to vst-force."
                },
                "vst_genes_number": {
                    "type": "integer",
                    "default": 1000,
                    "description": "Integer indicating how many genes to subset to during vst transformation (when using rlog, will ignore this number). Default: 1000."
                },
                "round_DE": {
                    "type": "integer",
                    "default": -1,
                    "description": "Number of decimals to which to round result tables, default: -1 (this means no rounding)."
                }
            }
        },
        "pathway_arguments": {
            "title": "Pathway analysis arguments",
            "type": "object",
            "description": "",
            "default": "",
            "properties": {
                "organism": {
                    "type": "string",
                    "description": "Not necessary if genome is set. Which organism name to use for pathway analysis, e.g. `hsapiens`, not necessary if `--run_pathway_analysis = false`."
                },
                "species_library": {
                    "type": "string",
                    "description": "Not necessary if genome is set. Which bioconductor library to use for pathway analysis, e.g. org.Hs.eg.db, not necessary if --run_pathway_analysis = false."
                },
                "keytype": {
                    "type": "string",
                    "description": "Not necessary if genome is set. Which keytype to use for pathway analysis, e.g. ENSEMBL, not necessary if --run_pathway_analysis = false."
                },
                "run_pathway_analysis": {
                    "type": "boolean",
                    "description": "Turn on this flag if you wish to run pathway analysis."
                },
                "custom_gmt": {
                    "type": "string",
                    "default": "NO_FILE3",
                    "description": "Path to custom GMT file for gost query, this allows e.g. to query older versions of databases for pathway analysis, not necessary if --skip_pathway_analysis = true. If --datasources is set, the GMT file will be filtered for these datasources."
                },
                "set_background": {
                    "type": "boolean",
                    "default": true,
                    "description": "Whether to limit pathway analysis to a background list of genes (i.e. those genes expressed in at least one sample), default = true."
                },
                "custom_background": {
                    "type": "string",
                    "default": "NO_FILE7",
                    "description": "Path to custom background TXT file for gost query containing a gene ID in each line, not necessary if --skip_pathway_analysis = true or --set_background = false."
                },
                "min_DEG_pathway": {
                    "type": "integer",
                    "default": 1,
                    "description": "Integer indicating how many genes in a pathway must be differentially expressed to be considered as enriched, and report these pathways in tables and the final report. The default value is 1."
                },
                "datasources": {
                    "type": "string",
                    "description": "Which datasources to use for pathway analysis, comma-separated string like 'KEGG,REAC'. See param 'sources' on https://rdrr.io/cran/gprofiler2/man/gost.html for a list of available sources. If not set, will use all sources. If set while a --custom_gmt is provided, will filter the GMT for these datasources (will not filter for the GO subtypes like GO:BP, just for GO)."
                },
                "heatmaps_cluster_rows": {
                    "type": "boolean",
                    "default": true,
                    "description": "Whether to activate row clustering when generating heatmaps of gene expression in enriched pathways."
                },
                "heatmaps_cluster_cols": {
                    "type": "boolean",
                    "default": false,
                    "description": "Whether to activate column clustering when generating heatmaps of gene expression in enriched pathways."
                },
                "pathway_adj_pval_threshold": {
                    "type": "number",
                    "default": -1,
                    "description": "Adjusted p value (float) to use as threshold for pathway analysis. If omitted, will use the value of the parameter adj_pval_threshold (default 0.05)."
                }
            }
        },
        "report_arguments": {
            "title": "Report arguments",
            "type": "object",
            "description": "",
            "default": "",
            "properties": {
                "project_summary": {
                    "type": "string",
                    "description": "Project summary file downloaded from the qPortal."
                },
                "multiqc": {
                    "type": "string",
                    "default": "NO_FILE4",
                    "description": "multiqc.zip folder containing the multiQC plots and report."
                },
                "quote": {
                    "type": "string",
                    "default": "NO_FILE5",
                    "description": "Quote file to link in the report."
                },
                "software_versions": {
                    "type": "string",
                    "default": "NO_FILE6",
                    "description": "CSV/YML containing nf-core/rnaseq software versions."
                },
                "report_file": {
                    "type": "string",
                    "default": "${projectDir}/assets/RNAseq_report.Rmd",
                    "description": "Report Rmd file, should not be changed."
                },
                "references_file": {
                    "type": "string",
                    "default": "${projectDir}/assets/references.bibtex",
                    "description": "Reference bibtex file for the report, should not be changed."
                },
                "css": {
                    "type": "string",
                    "default": "${projectDir}/assets/corp-styles.css",
                    "description": "Stylesheet css file for the report, should not be changed."
                },
                "logo": {
                    "type": "string",
                    "default": "${projectDir}/assets/logo.png",
                    "description": "QBiC logo png file for the report, should not be changed."
                }
            }
        },
        "genome_options": {
            "title": "Genome options",
            "type": "object",
            "description": "",
            "default": "",
            "properties": {
                "genome": {
                    "type": "string",
                    "description": "Which genome to use for analysis, e.g. GRCh37; see /conf/igenomes.config for which genomes are available."
                },
                "igenomes_base": {
                    "type": "string",
                    "default": "s3://ngi-igenomes/igenomes",
                    "description": "Directory / URL base for iGenomes references."
                },
                "igenomes_ignore": {
                    "type": "boolean",
                    "description": "Do not load the iGenomes reference config."
                }
            }
        },
        "github_options": {
            "title": "Github options",
            "type": "object",
            "description": "",
            "default": "",
            "properties": {
                "citest": {
                    "type": "boolean",
                    "description": "Only enable for CI tests on the github servers",
                    "hidden": true
                }
            }
        },
        "nextflow_options": {
            "title": "Nextflow options",
            "type": "object",
            "description": "",
            "default": "",
            "properties": {
                "publish_dir_mode": {
                    "type": "string",
                    "description": "Method used to save pipeline results to output directory.",
                    "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.",
                    "fa_icon": "fas fa-copy",
                    "enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"],
                    "hidden": true
                },
                "outdir": {
                    "type": "string",
                    "description": "The output directory where the results will be saved",
                    "default": "./results",
                    "fa_icon": "fas fa-folder-open"
                }
            }
        },
        "boilerplate_options": {
            "title": "Boilerplate options",
            "type": "object",
            "description": "",
            "default": "",
            "properties": {
                "email": {
                    "type": "string",
                    "description": "Set this parameter to your e-mail address to get a summary e-mail with details of the run sent to you when the workflow exits"
                },
                "email_on_fail": {
                    "type": "string",
                    "description": "Email address for completion summary, only when pipeline fails.",
                    "fa_icon": "fas fa-exclamation-triangle",
                    "pattern": "^([a-zA-Z0-9_\\-\\.]+)@([a-zA-Z0-9_\\-\\.]+)\\.([a-zA-Z]{2,5})$",
                    "help_text": "An email address to send a summary email to when the pipeline is completed - ONLY sent if the pipeline does not exit successfully.",
                    "hidden": true
                },
                "plaintext_email": {
                    "type": "boolean",
                    "description": "Send plain-text email instead of HTML.",
                    "fa_icon": "fas fa-remove-format",
                    "hidden": true
                },
                "monochrome_logs": {
                    "type": "boolean",
                    "description": "Do not use coloured log outputs.",
                    "hidden": true
                },
                "help": {
                    "type": "boolean",
                    "description": "Display help text.",
                    "fa_icon": "fas fa-question-circle"
                },
                "tracedir": {
                    "type": "string",
                    "default": "./results/pipeline_info",
                    "description": "Directory to keep pipeline Nextflow logs and reports.",
                    "hidden": true
                },
                "custom_config_version": {
                    "type": "string",
                    "default": "master",
                    "description": "Git commit id for Institutional configs.",
                    "hidden": true
                },
                "custom_config_base": {
                    "type": "string",
                    "default": "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}",
                    "description": "Base directory for Institutional configs.",
                    "hidden": true
                },
                "config_profile_description": {
                    "type": "string",
                    "description": "Institutional config description.",
                    "hidden": true
                },
                "config_profile_contact": {
                    "type": "string",
                    "description": "Institutional config contact information.",
                    "hidden": true
                },
                "config_profile_url": {
                    "type": "string",
                    "description": "Institutional config URL link.",
                    "hidden": true
                },
                "hook_url": {
                    "type": "string",
                    "description": "Incoming hook URL for messaging service",
                    "fa_icon": "fas fa-people-group",
                    "help_text": "Incoming hook URL for messaging service. Currently, MS Teams and Slack are supported.",
                    "hidden": true
                },
                "validationShowHiddenParams": {
                    "type": "boolean",
                    "fa_icon": "far fa-eye-slash",
                    "description": "Show all params when using `--help`",
                    "hidden": true,
                    "help_text": "By default, parameters set as _hidden_ in the schema are not shown on the command line when a user runs with `--help`. Specifying this option will tell the pipeline to show all parameters."
                }
            }
        },
        "max_job_request_options": {
            "title": "Max job request options",
            "type": "object",
            "fa_icon": "fab fa-acquisitions-incorporated",
            "description": "Set the top limit for requested resources for any single job.",
            "help_text": "If you are running on a smaller system, a pipeline step requesting more resources than are available may cause Nextflow to stop the run with an error. These options allow you to cap the maximum resources requested by any single job so that the pipeline will run on your system.\n\nNote that you can not _increase_ the resources requested by any job using these options. For that you will need your own configuration file. See [the nf-core website](https://nf-co.re/usage/configuration) for details.",
            "properties": {
                "max_cpus": {
                    "type": "integer",
                    "description": "Maximum number of CPUs that can be requested for any single job.",
                    "default": 16,
                    "fa_icon": "fas fa-microchip",
                    "hidden": true,
                    "help_text": "Use to set an upper-limit for the CPU requirement for each process. Should be an integer e.g. `--max_cpus 1`"
                },
                "max_memory": {
                    "type": "string",
                    "description": "Maximum amount of memory that can be requested for any single job.",
                    "default": "128.GB",
                    "fa_icon": "fas fa-memory",
                    "pattern": "^\\d+(\\.\\d+)?\\.?\\s*(K|M|G|T)?B$",
                    "hidden": true,
                    "help_text": "Use to set an upper-limit for the memory requirement for each process. Should be a string in the format integer-unit e.g. `--max_memory '8.GB'`"
                },
                "max_time": {
                    "type": "string",
                    "description": "Maximum amount of time that can be requested for any single job.",
                    "default": "240.h",
                    "fa_icon": "far fa-clock",
                    "pattern": "^(\\d+\\.?\\s*(s|m|h|d|day)\\s*)+$",
                    "hidden": true,
                    "help_text": "Use to set an upper-limit for the time requirement for each process. Should be a string in the format integer-unit e.g. `--max_time '2.h'`"
                }
            }
        }
    },
    "allOf": [
        {
            "$ref": "#/definitions/main_arguments"
        },
        {
            "$ref": "#/definitions/deseq2_arguments"
        },
        {
            "$ref": "#/definitions/pathway_arguments"
        },
        {
            "$ref": "#/definitions/report_arguments"
        },
        {
            "$ref": "#/definitions/github_options"
        },
        {
            "$ref": "#/definitions/genome_options"
        },
        {
            "$ref": "#/definitions/nextflow_options"
        },
        {
            "$ref": "#/definitions/boilerplate_options"
        },
        {
            "$ref": "#/definitions/max_job_request_options"
        }
    ]
}