From aff578e4d0fbe734e4c8b295c47242abaa2795a4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Guizard?= Date: Thu, 8 Aug 2024 11:13:57 +0100 Subject: [PATCH 01/16] Add eddie_roslin --- README.md | 1 + conf/eddie_roslin.config | 46 +++++++++++++++++++ docs/eddie_roslin.md | 95 ++++++++++++++++++++++++++++++++++++++++ nfcore_custom.config | 1 + 4 files changed, 143 insertions(+) create mode 100644 conf/eddie_roslin.config create mode 100644 docs/eddie_roslin.md diff --git a/README.md b/README.md index 2a7f45a8..d9d24a2b 100644 --- a/README.md +++ b/README.md @@ -122,6 +122,7 @@ Currently documentation is available for the following systems: - [EBC](docs/ebc.md) - [EBI_CODON](docs/ebi_codon.md) - [EBI_CODON_SLURM](docs/ebi_codon_slurm.md) +- [EDDIE_ROSLIN](docs/eddie_roslin.md) - [EINSTEIN](docs/einstein.md) - [EMBL HD](docs/embl_hd.md) - [Engaging](docs/engaging.md) diff --git a/conf/eddie_roslin.config b/conf/eddie_roslin.config new file mode 100644 index 00000000..6c39ef19 --- /dev/null +++ b/conf/eddie_roslin.config @@ -0,0 +1,46 @@ +//Profile config names for nf-core/configs +params { + config_profile_description = 'University of Edinburgh (Eddie) cluster profile for Roslin Institute provided by nf-core/configs.' + config_profile_contact = 'Sebastien Guizard (@sguizard) and Donald Dunbar (@ddunbar)' + config_profile_url = 'https://www.ed.ac.uk/information-services/research-support/research-computing/ecdf/high-performance-computing' +} + +executor { + name = "sge" +} + +process { + clusterOptions = { task.memory ? "-l h_vmem=${task.memory.bytes/task.cpus}" : null } + stageInMode = 'symlink' + scratch = 'false' + penv = { task.cpus > 1 ? "sharedmem" : null } + + // common SGE error statuses + errorStrategy = {task.exitStatus in [143,137,104,134,139,140] ? 'retry' : 'finish'} + maxErrors = '-1' + maxRetries = 3 + + beforeScript = + """ + . /etc/profile.d/modules.sh + module load singularity + export SINGULARITY_TMPDIR="\$TMPDIR" + """ +} + +params { + // iGenomes reference base + igenomes_base = '/exports/igmm/eddie/BioinformaticsResources/igenomes' +} + +env { + MALLOC_ARENA_MAX=1 +} + +singularity { + envWhitelist = "SINGULARITY_TMPDIR,TMPDIR" + runOptions = '-p -B "$TMPDIR"' + enabled = true + autoMounts = true + cacheDir = /exports/cmvm/eddie/eb/groups/roslin_shared_reference_data/singularity_cache +} diff --git a/docs/eddie_roslin.md b/docs/eddie_roslin.md new file mode 100644 index 00000000..0140c948 --- /dev/null +++ b/docs/eddie_roslin.md @@ -0,0 +1,95 @@ +# nf-core/configs: Eddie Configuration + +nf-core pipelines sarek, rnaseq, atacseq, and viralrecon have all been tested on the University of Edinburgh Eddie HPC. All except atacseq have pipeline-specific config files; atacseq does not yet support this. + +## Getting help + +There is a Teams group dedicated to nextflow users: [Netxtflow Teams](https://teams.microsoft.com/l/team/19%3A7e957d32ce1345b8989af14564547690%40thread.tacv2/conversations?groupId=446c509d-b8fd-466c-a66f-52122f0a2fcc&tenantId=2e9f06b0-1669-4589-8789-10a06934dc61) + +## Using the Eddie config profile + +To use, run the pipeline with `-profile eddie_roslin` (one hyphen). +This will download and launch the [`eddie_roslin.config`](../conf/eddie_roslin.config) which has been pre-configured with a setup suitable for the [University of Edinburgh Eddie HPC](https://www.ed.ac.uk/information-services/research-support/research-computing/ecdf/high-performance-computing). + +The configuration file supports running nf-core pipelines with Docker containers running under Singularity by default. Conda is not currently supported. + +```bash +nextflow run nf-core/PIPELINE -profile eddie_roslin # ...rest of pipeline flags +``` + +Before running the pipeline you will need to load Nextflow from the module system or activate youir Nextflow conda envronment. Generally the most recent version will be the one you want. + +To list versions: + +```bash +module avail|grep nextflow +``` + +To load the most recent version (08/08/2024): + +```bash +module load igmm/bac/nextflow/24.04.2 +``` + +This config enables Nextflow to manage the pipeline jobs via the SGE job scheduler and using Singularity for software management. + +## Singularity set-up + +The eddie profile is set to use `/exports/igmm/eddie/BioinformaticsResources/nfcore/singularity-images` as the Singularity cache directory. If some containers for your pipeline run are not present, please contact the [IGC Data Manager](data.manager@igc.ed.ac.uk) to have them added. You can add these lines to the file `$HOME/.bashrc`, or you can run these commands before you run an nf-core pipeline. + +If you do not have access to `/exports/igmm/eddie/BioinformaticsResources`, set the Singularity cache directory to somewhere sensible that is not in your `$HOME` area (which has limited space). It will take time to download all the Singularity containers, but you can use this again. + +Singularity will by default create a directory `.singularity` in your `$HOME` directory on eddie. Space on `$HOME` is very limited, so it is a good idea to create a directory somewhere else with more room and link the locations. + +```bash +cd $HOME +mkdir /exports/eddie/path/to/my/area/.singularity +ln -s /exports/eddie/path/to/my/area/.singularity .singularity +``` + +## Running Nextflow + +### On a login node + +You can use a qlogin to run Nextflow, if you request more than the default 2GB of memory. Unfortunately you can't submit the initial Nextflow run process as a job as you can't qsub within a qsub. +If your eddie terminal disconnects your Nextflow job will stop. You can run qlogin in a screen session to prevent this. + +Start a new screen session. + +```bash +screen -S +``` + +Start an interactive job with qlogin. + +```bash +qlogin -l h_vmem=8G +``` + +You can leave our screen session by typing Ctrl + A, then d. + +To list existing screen sessions, use: + +```bash +screen -ls +``` + +To reconnect to an existing screen session, use: + +```bash +screen -r +``` + + +### On the wild west node + +Wild West node have relaxed restriction compared to regular nodes, which allows the execution of Nextflow. +The access to Wild West node must be requested to Andy Law (alaw3@ed.ac.uk) and IS. +Similarly to qlogin option, it is advised to run Nextflow within a screen session. + + +## Using iGenomes references + +A local copy of the iGenomes resource has been made available on the Eddie HPC for those with access to `/exports/igmm/eddie/BioinformaticsResources` so you should be able to run the pipeline against any reference available in the `igenomes.config`. +You can do this by simply using the `--genome ` parameter. + diff --git a/nfcore_custom.config b/nfcore_custom.config index a503e463..a9a046dc 100644 --- a/nfcore_custom.config +++ b/nfcore_custom.config @@ -45,6 +45,7 @@ profiles { dkfz { includeConfig "${params.custom_config_base}/conf/dkfz.config" } ebc { includeConfig "${params.custom_config_base}/conf/ebc.config" } eddie { includeConfig "${params.custom_config_base}/conf/eddie.config" } + eddie_roslin { includeConfig "${params.custom_config_base}/conf/eddie_roslin.config" } ebi_codon { includeConfig "${params.custom_config_base}/conf/ebi_codon.config" } ebi_codon_slurm { includeConfig "${params.custom_config_base}/conf/ebi_codon_slurm.config" } einstein { includeConfig "${params.custom_config_base}/conf/einstein.config" } From be30516911738822a947b0a6c1c2b2b390e8ecf6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Guizard?= Date: Thu, 8 Aug 2024 11:17:07 +0100 Subject: [PATCH 02/16] Add eddie_roslin to github workflow --- .github/workflows/main.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index fbd52a08..7477a482 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -70,6 +70,7 @@ jobs: - "ebi_codon" - "ebi_codon_slurm" - "eddie" + - "eddie_roslin" - "embl_hd" - "engaging" - "einstein" From fd710d75a75a2549d4c020af032518930dbf32d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Guizard?= Date: Thu, 8 Aug 2024 11:47:21 +0100 Subject: [PATCH 03/16] FIX: add quote around singularity-cache directory path --- conf/eddie_roslin.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/eddie_roslin.config b/conf/eddie_roslin.config index 6c39ef19..142e79d3 100644 --- a/conf/eddie_roslin.config +++ b/conf/eddie_roslin.config @@ -42,5 +42,5 @@ singularity { runOptions = '-p -B "$TMPDIR"' enabled = true autoMounts = true - cacheDir = /exports/cmvm/eddie/eb/groups/roslin_shared_reference_data/singularity_cache + cacheDir = '/exports/cmvm/eddie/eb/groups/roslin_shared_reference_data/singularity_cache' } From eb3bce129b29869dccce950ec0dfb26e2b191849 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Guizard?= Date: Thu, 8 Aug 2024 15:14:20 +0100 Subject: [PATCH 04/16] Rename files --- conf/{eddie_roslin.config => roslin.config} | 0 docs/eddie_roslin.md => conf/roslin.md | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename conf/{eddie_roslin.config => roslin.config} (100%) rename docs/eddie_roslin.md => conf/roslin.md (100%) diff --git a/conf/eddie_roslin.config b/conf/roslin.config similarity index 100% rename from conf/eddie_roslin.config rename to conf/roslin.config diff --git a/docs/eddie_roslin.md b/conf/roslin.md similarity index 100% rename from docs/eddie_roslin.md rename to conf/roslin.md From 2dacca532470197d0d9aa4a352337add17d817ad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Guizard?= Date: Thu, 8 Aug 2024 15:14:55 +0100 Subject: [PATCH 05/16] Update files to new name --- .github/workflows/main.yml | 2 +- README.md | 2 +- nfcore_custom.config | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 7477a482..4522642b 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -70,7 +70,6 @@ jobs: - "ebi_codon" - "ebi_codon_slurm" - "eddie" - - "eddie_roslin" - "embl_hd" - "engaging" - "einstein" @@ -124,6 +123,7 @@ jobs: - "qmul_apocrita" - "rosalind" - "rosalind_uge" + - "roslin" - "sage" - "sahmri" - "sanger" diff --git a/README.md b/README.md index d9d24a2b..27f398dc 100644 --- a/README.md +++ b/README.md @@ -122,7 +122,6 @@ Currently documentation is available for the following systems: - [EBC](docs/ebc.md) - [EBI_CODON](docs/ebi_codon.md) - [EBI_CODON_SLURM](docs/ebi_codon_slurm.md) -- [EDDIE_ROSLIN](docs/eddie_roslin.md) - [EINSTEIN](docs/einstein.md) - [EMBL HD](docs/embl_hd.md) - [Engaging](docs/engaging.md) @@ -175,6 +174,7 @@ Currently documentation is available for the following systems: - [QMUL_APOCRITA](docs/qmul_apocrita.md) - [ROSALIND](docs/rosalind.md) - [ROSALIND_UGE](docs/rosalind_uge.md) +- [ROSLIN](docs/roslin.md) - [SAGE BIONETWORKS](docs/sage.md) - [SANGER](docs/sanger.md) - [SEATTLECHILDRENS](docs/seattlechildrens.md) diff --git a/nfcore_custom.config b/nfcore_custom.config index a9a046dc..cbcb6774 100644 --- a/nfcore_custom.config +++ b/nfcore_custom.config @@ -45,7 +45,6 @@ profiles { dkfz { includeConfig "${params.custom_config_base}/conf/dkfz.config" } ebc { includeConfig "${params.custom_config_base}/conf/ebc.config" } eddie { includeConfig "${params.custom_config_base}/conf/eddie.config" } - eddie_roslin { includeConfig "${params.custom_config_base}/conf/eddie_roslin.config" } ebi_codon { includeConfig "${params.custom_config_base}/conf/ebi_codon.config" } ebi_codon_slurm { includeConfig "${params.custom_config_base}/conf/ebi_codon_slurm.config" } einstein { includeConfig "${params.custom_config_base}/conf/einstein.config" } @@ -100,6 +99,7 @@ profiles { qmul_apocrita { includeConfig "${params.custom_config_base}/conf/qmul_apocrita.config" } rosalind { includeConfig "${params.custom_config_base}/conf/rosalind.config" } rosalind_uge { includeConfig "${params.custom_config_base}/conf/rosalind_uge.config" } + roslin { includeConfig "${params.custom_config_base}/conf/roslin.config" } sage { includeConfig "${params.custom_config_base}/conf/sage.config" } sahmri { includeConfig "${params.custom_config_base}/conf/sahmri.config" } sanger { includeConfig "${params.custom_config_base}/conf/sanger.config"} From 4a8ff847eab0d25a2075e0e95ca5692ef3489630 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Guizard?= Date: Thu, 8 Aug 2024 15:19:19 +0100 Subject: [PATCH 06/16] FIX: relocating doc at in docs dir --- {conf => docs}/roslin.md | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename {conf => docs}/roslin.md (100%) diff --git a/conf/roslin.md b/docs/roslin.md similarity index 100% rename from conf/roslin.md rename to docs/roslin.md From 666a3b5d00e20924aa4044e4dabc5d3bf32b2149 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Guizard?= Date: Thu, 8 Aug 2024 15:20:13 +0100 Subject: [PATCH 07/16] UPDATE: Systematically add 4GB to any job --- conf/roslin.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/roslin.config b/conf/roslin.config index 142e79d3..ddfcdc35 100644 --- a/conf/roslin.config +++ b/conf/roslin.config @@ -10,7 +10,7 @@ executor { } process { - clusterOptions = { task.memory ? "-l h_vmem=${task.memory.bytes/task.cpus}" : null } + clusterOptions = {"-l h_vmem=${(task.memory + 4.GB).bytes/task.cpus}"} stageInMode = 'symlink' scratch = 'false' penv = { task.cpus > 1 ? "sharedmem" : null } From 6c7d6e0de3cf0b9c8a341d0c4b758dc4e6e39cb3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Guizard?= Date: Fri, 9 Aug 2024 09:34:28 +0100 Subject: [PATCH 08/16] UPDATE: Force clusterOptions to all jobs --- conf/roslin.config | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/conf/roslin.config b/conf/roslin.config index ddfcdc35..9b25580f 100644 --- a/conf/roslin.config +++ b/conf/roslin.config @@ -10,11 +10,21 @@ executor { } process { - clusterOptions = {"-l h_vmem=${(task.memory + 4.GB).bytes/task.cpus}"} stageInMode = 'symlink' scratch = 'false' penv = { task.cpus > 1 ? "sharedmem" : null } + // This withName will override all jobs clusterOptions + // This is necessary to allow jobs to run on Eddie for many users + // For each job, we add an extra 4 Gb of memory. + // For example, the process asked 16 Gb of RAM (task.memory). The job will reserve 20 Gb of RAM. + // The process will still use 16 Gb (task.memory) leaving 4 Gb for other system processes. + // This is very useful any JAVA programs which allocate task.memory RAM for its Virtual Machine + // Also it leaves enough memory for singularity to unpack images. + withName: '.*' { + clusterOptions = {"-l h_vmem=${(task.memory + 4.GB).bytes/task.cpus}"} + } + // common SGE error statuses errorStrategy = {task.exitStatus in [143,137,104,134,139,140] ? 'retry' : 'finish'} maxErrors = '-1' From ce6f41301f3d22b168b4a8d59e3b1e9bc4d30cc7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Guizard?= Date: Fri, 16 Aug 2024 07:58:26 +0100 Subject: [PATCH 09/16] UPDATE: Use roslin bioinformatics grouip cache (open to all) --- conf/roslin.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/roslin.config b/conf/roslin.config index 9b25580f..74a6e4c4 100644 --- a/conf/roslin.config +++ b/conf/roslin.config @@ -52,5 +52,5 @@ singularity { runOptions = '-p -B "$TMPDIR"' enabled = true autoMounts = true - cacheDir = '/exports/cmvm/eddie/eb/groups/roslin_shared_reference_data/singularity_cache' + cacheDir = '/exports/cmvm/eddie/eb/groups/alaw3_eb_singularity_cache' } From 066a9c88e8602f0071f6ff83ddbabd83901b9ea9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Guizard?= Date: Fri, 16 Aug 2024 11:09:02 +0100 Subject: [PATCH 10/16] FIX: fix FastQC memory overallocation, and request more memory for all jobs --- conf/roslin.config | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/conf/roslin.config b/conf/roslin.config index 74a6e4c4..76ff0287 100644 --- a/conf/roslin.config +++ b/conf/roslin.config @@ -13,16 +13,27 @@ process { stageInMode = 'symlink' scratch = 'false' penv = { task.cpus > 1 ? "sharedmem" : null } + + // To date (16/08/2024), the FastQC module is still broken. + // More details here: https://github.com/nf-core/modules/pull/6156 + // Until the Pull Request is accepted, and the new version of the module is integrated to pipelines, + // We force the amount of memory here. + withName: 'FASTQC' { + cpus = 5 + memory = '5.GB' + clusterOptions = "-l h_vmem=10G -pe sharedmem 5" + + } - // This withName will override all jobs clusterOptions + // This withName will override all jobs (except for FASTQC jobs, cf above) clusterOptions // This is necessary to allow jobs to run on Eddie for many users // For each job, we add an extra 4 Gb of memory. // For example, the process asked 16 Gb of RAM (task.memory). The job will reserve 20 Gb of RAM. // The process will still use 16 Gb (task.memory) leaving 4 Gb for other system processes. // This is very useful any JAVA programs which allocate task.memory RAM for its Virtual Machine // Also it leaves enough memory for singularity to unpack images. - withName: '.*' { - clusterOptions = {"-l h_vmem=${(task.memory + 4.GB).bytes/task.cpus}"} + withName: '!.*FASTQC' { + clusterOptions = {"-l h_vmem=${(task.memory + 8.GB).bytes/task.cpus}"} } // common SGE error statuses From c43429566c1bac16bef4bf535bdb97998e83abd9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Guizard?= Date: Fri, 16 Aug 2024 14:12:02 +0100 Subject: [PATCH 11/16] UPDATE: Change selector expressions to match FastQC and it\s aliases --- conf/roslin.config | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/conf/roslin.config b/conf/roslin.config index 76ff0287..72cc0f31 100644 --- a/conf/roslin.config +++ b/conf/roslin.config @@ -18,7 +18,7 @@ process { // More details here: https://github.com/nf-core/modules/pull/6156 // Until the Pull Request is accepted, and the new version of the module is integrated to pipelines, // We force the amount of memory here. - withName: 'FASTQC' { + withName: 'FASTQC.*' { cpus = 5 memory = '5.GB' clusterOptions = "-l h_vmem=10G -pe sharedmem 5" @@ -32,7 +32,7 @@ process { // The process will still use 16 Gb (task.memory) leaving 4 Gb for other system processes. // This is very useful any JAVA programs which allocate task.memory RAM for its Virtual Machine // Also it leaves enough memory for singularity to unpack images. - withName: '!.*FASTQC' { + withName: '!.*FASTQC.*' { clusterOptions = {"-l h_vmem=${(task.memory + 8.GB).bytes/task.cpus}"} } From effbde8b13ab0b18a706d13d080ddf8ffba57e6a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Guizard?= Date: Fri, 16 Aug 2024 18:40:37 +0100 Subject: [PATCH 12/16] UPDATE: Allows to defined an SGE project with the environment variable NFX_SGE_PROJECT --- conf/roslin.config | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/conf/roslin.config b/conf/roslin.config index 72cc0f31..e3e60e8d 100644 --- a/conf/roslin.config +++ b/conf/roslin.config @@ -21,8 +21,13 @@ process { withName: 'FASTQC.*' { cpus = 5 memory = '5.GB' - clusterOptions = "-l h_vmem=10G -pe sharedmem 5" - + // Check if an environment variable NFX_SGE_PROJECT exists, if yes, use the stored value for -P option + // Otherwise set the project to uoe_baseline + if (System.getenv('NFX_SGE_PROJECT')) { + clusterOptions = {"-l h_vmem=10G -pe sharedmem 5 -P $NFX_SGE_PROJECT"} + } else { + clusterOptions = {"-l h_vmem=10G -pe sharedmem 5 -P uoe_baseline"} + } } // This withName will override all jobs (except for FASTQC jobs, cf above) clusterOptions @@ -33,7 +38,13 @@ process { // This is very useful any JAVA programs which allocate task.memory RAM for its Virtual Machine // Also it leaves enough memory for singularity to unpack images. withName: '!.*FASTQC.*' { - clusterOptions = {"-l h_vmem=${(task.memory + 8.GB).bytes/task.cpus}"} + // Check if an environment variable NFX_SGE_PROJECT exists, if yes, use the stored value for -P option + // Otherwise set the project to uoe_baseline + if (System.getenv('NFX_SGE_PROJECT')) { + clusterOptions = {"-l h_vmem=${(task.memory + 8.GB).bytes/task.cpus} -P $NFX_SGE_PROJECT"} + } else { + clusterOptions = {"-l h_vmem=${(task.memory + 8.GB).bytes/task.cpus} -P uoe_baseline"} + } } // common SGE error statuses From 7c3a6ab05db713d8d2f48a0e5d9a3e046b1b0402 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Guizard?= Date: Mon, 19 Aug 2024 10:11:04 +0100 Subject: [PATCH 13/16] UPDATE: Add superdome exclusion to ClusterOptions --- conf/roslin.config | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/conf/roslin.config b/conf/roslin.config index e3e60e8d..1e763042 100644 --- a/conf/roslin.config +++ b/conf/roslin.config @@ -13,20 +13,20 @@ process { stageInMode = 'symlink' scratch = 'false' penv = { task.cpus > 1 ? "sharedmem" : null } - - // To date (16/08/2024), the FastQC module is still broken. + + // To date (16/08/2024), the FastQC module is still broken. // More details here: https://github.com/nf-core/modules/pull/6156 - // Until the Pull Request is accepted, and the new version of the module is integrated to pipelines, - // We force the amount of memory here. + // Until the Pull Request is accepted, and the new version of the module is integrated to pipelines, + // We force the amount of memory here. withName: 'FASTQC.*' { cpus = 5 memory = '5.GB' // Check if an environment variable NFX_SGE_PROJECT exists, if yes, use the stored value for -P option // Otherwise set the project to uoe_baseline if (System.getenv('NFX_SGE_PROJECT')) { - clusterOptions = {"-l h_vmem=10G -pe sharedmem 5 -P $NFX_SGE_PROJECT"} + clusterOptions = {"-l h=!node1d01 -l h_vmem=10G -pe sharedmem 5 -P $NFX_SGE_PROJECT"} } else { - clusterOptions = {"-l h_vmem=10G -pe sharedmem 5 -P uoe_baseline"} + clusterOptions = {"-l h=!node1d01 -l h_vmem=10G -pe sharedmem 5 -P uoe_baseline"} } } @@ -41,9 +41,9 @@ process { // Check if an environment variable NFX_SGE_PROJECT exists, if yes, use the stored value for -P option // Otherwise set the project to uoe_baseline if (System.getenv('NFX_SGE_PROJECT')) { - clusterOptions = {"-l h_vmem=${(task.memory + 8.GB).bytes/task.cpus} -P $NFX_SGE_PROJECT"} + clusterOptions = {"-l h=!node1d01 -l h_vmem=${(task.memory + 8.GB).bytes/task.cpus} -P $NFX_SGE_PROJECT"} } else { - clusterOptions = {"-l h_vmem=${(task.memory + 8.GB).bytes/task.cpus} -P uoe_baseline"} + clusterOptions = {"-l h=!node1d01 -l h_vmem=${(task.memory + 8.GB).bytes/task.cpus} -P uoe_baseline"} } } From aff0faf2e9705f5194a78cf16caa79142d35e477 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Guizard?= Date: Mon, 26 Aug 2024 10:51:02 +0100 Subject: [PATCH 14/16] Update roslin.config doc --- docs/roslin.md | 45 ++++++++++++++++++++++++++++----------------- 1 file changed, 28 insertions(+), 17 deletions(-) diff --git a/docs/roslin.md b/docs/roslin.md index 0140c948..13c23164 100644 --- a/docs/roslin.md +++ b/docs/roslin.md @@ -1,23 +1,24 @@ -# nf-core/configs: Eddie Configuration +# nf-core/configs: Roslin Configuration -nf-core pipelines sarek, rnaseq, atacseq, and viralrecon have all been tested on the University of Edinburgh Eddie HPC. All except atacseq have pipeline-specific config files; atacseq does not yet support this. +nf-core pipelines sarek, rnaseq, chipseq, mag, differentialabundance and isoseq have all been tested on the University of Edinburgh Eddie HPC with test profile. ## Getting help -There is a Teams group dedicated to nextflow users: [Netxtflow Teams](https://teams.microsoft.com/l/team/19%3A7e957d32ce1345b8989af14564547690%40thread.tacv2/conversations?groupId=446c509d-b8fd-466c-a66f-52122f0a2fcc&tenantId=2e9f06b0-1669-4589-8789-10a06934dc61) +There is a Teams group dedicated to Nextflow users: [Nextflow Teams](https://teams.microsoft.com/l/team/19%3A7e957d32ce1345b8989af14564547690%40thread.tacv2/conversations?groupId=446c509d-b8fd-466c-a66f-52122f0a2fcc&tenantId=2e9f06b0-1669-4589-8789-10a06934dc61) +Also, you can find at the coding club held each Wednesday: [Code Club Teams](https://teams.microsoft.com/l/channel/19%3A1bf9220112e445c382b6beb660ffb61a%40thread.tacv2/Coding%20Club?groupId=cc7a1113-38a1-48f6-9fc6-14700c8da27e&tenantId=2e9f06b0-1669-4589-8789-10a06934dc61) -## Using the Eddie config profile +## Using the Roslin config profile -To use, run the pipeline with `-profile eddie_roslin` (one hyphen). -This will download and launch the [`eddie_roslin.config`](../conf/eddie_roslin.config) which has been pre-configured with a setup suitable for the [University of Edinburgh Eddie HPC](https://www.ed.ac.uk/information-services/research-support/research-computing/ecdf/high-performance-computing). +To use, run the pipeline with `-profile roslin` (one hyphen). +This will download and launch the [`roslin.config`](../conf/roslin.config) file which has been pre-configured with a setup suitable for the [University of Edinburgh Eddie HPC](https://www.ed.ac.uk/information-services/research-support/research-computing/ecdf/high-performance-computing). The configuration file supports running nf-core pipelines with Docker containers running under Singularity by default. Conda is not currently supported. ```bash -nextflow run nf-core/PIPELINE -profile eddie_roslin # ...rest of pipeline flags +nextflow run nf-core/PIPELINE -profile roslin # ...rest of pipeline flags ``` -Before running the pipeline you will need to load Nextflow from the module system or activate youir Nextflow conda envronment. Generally the most recent version will be the one you want. +Before running the pipeline, you will need to load Nextflow from the module system or activate your Nextflow conda environment. Generally, the most recent version will be the one you want. To list versions: @@ -33,11 +34,9 @@ module load igmm/bac/nextflow/24.04.2 This config enables Nextflow to manage the pipeline jobs via the SGE job scheduler and using Singularity for software management. -## Singularity set-up +## Singularity set up -The eddie profile is set to use `/exports/igmm/eddie/BioinformaticsResources/nfcore/singularity-images` as the Singularity cache directory. If some containers for your pipeline run are not present, please contact the [IGC Data Manager](data.manager@igc.ed.ac.uk) to have them added. You can add these lines to the file `$HOME/.bashrc`, or you can run these commands before you run an nf-core pipeline. - -If you do not have access to `/exports/igmm/eddie/BioinformaticsResources`, set the Singularity cache directory to somewhere sensible that is not in your `$HOME` area (which has limited space). It will take time to download all the Singularity containers, but you can use this again. +The roslin profile is set to use `/exports/cmvm/eddie/eb/groups/alaw3_eb_singularity_cache` as the singularity cache directory. This directory is put at the disposition of roslin institute nextflow/nf-core users by the Roslin Bioinformatics group led by Andy Law. All new container will be cached in this directory writable by all. If you face any problem with singularity cache, please contact [Sébastien Guizard](sguizard@ed.ac.uk), [Donald Dunbar](donald.dunbar@ed.ac.uk) and [Andy Law](andy.law@roslin.ed.ac.uk) with the [Roslin Bioinformatics](roslin.bioinformatics@roslin.ed.ac.uk) group in CC. Singularity will by default create a directory `.singularity` in your `$HOME` directory on eddie. Space on `$HOME` is very limited, so it is a good idea to create a directory somewhere else with more room and link the locations. @@ -47,11 +46,23 @@ mkdir /exports/eddie/path/to/my/area/.singularity ln -s /exports/eddie/path/to/my/area/.singularity .singularity ``` +## SGE project set up + +By default, users’ jobs are started with the `uoe_baseline` project that gives access to free nodes. If you have a project code that gives you access to paid nodes, it can be used by jobs submitted by Nextflow. To do so, you need to set up an environment variable called `NFX_SGE_PROJECT`: + +```bash +export NFX_SGE_PROJECT="" +``` + +If you wish, you place this variable declaration in your `.bashrc` file located in your home directory to automatically set it up each time you log on Eddie. + +**NB:** This will work only with the roslin profile. + ## Running Nextflow ### On a login node -You can use a qlogin to run Nextflow, if you request more than the default 2GB of memory. Unfortunately you can't submit the initial Nextflow run process as a job as you can't qsub within a qsub. +You can use a qlogin to run Nextflow, if you request more than the default 2 GB of memory. Unfortunately, you can't submit the initial Nextflow run process as a job as you can't qsub within a qsub. If your eddie terminal disconnects your Nextflow job will stop. You can run qlogin in a screen session to prevent this. Start a new screen session. @@ -60,7 +71,7 @@ Start a new screen session. screen -S ``` -Start an interactive job with qlogin. +Start an interactive job with qlogin. ```bash qlogin -l h_vmem=8G @@ -83,9 +94,9 @@ screen -r ### On the wild west node -Wild West node have relaxed restriction compared to regular nodes, which allows the execution of Nextflow. -The access to Wild West node must be requested to Andy Law (alaw3@ed.ac.uk) and IS. -Similarly to qlogin option, it is advised to run Nextflow within a screen session. +Wild West node has relaxed restriction compared to regular nodes, which allows the execution of Nextflow. +The access to Wild West node must be requested to [Andy Law](andy.law@roslin.ed.ac.uk) and IS. +Similarly to the qlogin option, it is advised to run Nextflow within a screen session. ## Using iGenomes references From 440538dee28d08b6354adda35c838622a6781cb3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Guizard?= Date: Mon, 26 Aug 2024 11:06:35 +0100 Subject: [PATCH 15/16] prettier doc --- docs/roslin.md | 3 --- 1 file changed, 3 deletions(-) diff --git a/docs/roslin.md b/docs/roslin.md index 13c23164..a5641154 100644 --- a/docs/roslin.md +++ b/docs/roslin.md @@ -91,16 +91,13 @@ To reconnect to an existing screen session, use: screen -r ``` - ### On the wild west node Wild West node has relaxed restriction compared to regular nodes, which allows the execution of Nextflow. The access to Wild West node must be requested to [Andy Law](andy.law@roslin.ed.ac.uk) and IS. Similarly to the qlogin option, it is advised to run Nextflow within a screen session. - ## Using iGenomes references A local copy of the iGenomes resource has been made available on the Eddie HPC for those with access to `/exports/igmm/eddie/BioinformaticsResources` so you should be able to run the pipeline against any reference available in the `igenomes.config`. You can do this by simply using the `--genome ` parameter. - From 4f101fedc1a45b1c77e58876e5e9818cbc48b7d9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Guizard?= Date: Mon, 26 Aug 2024 11:10:35 +0100 Subject: [PATCH 16/16] Fix indentation --- conf/roslin.config | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/conf/roslin.config b/conf/roslin.config index 1e763042..849f9695 100644 --- a/conf/roslin.config +++ b/conf/roslin.config @@ -23,11 +23,11 @@ process { memory = '5.GB' // Check if an environment variable NFX_SGE_PROJECT exists, if yes, use the stored value for -P option // Otherwise set the project to uoe_baseline - if (System.getenv('NFX_SGE_PROJECT')) { + if (System.getenv('NFX_SGE_PROJECT')) { clusterOptions = {"-l h=!node1d01 -l h_vmem=10G -pe sharedmem 5 -P $NFX_SGE_PROJECT"} - } else { + } else { clusterOptions = {"-l h=!node1d01 -l h_vmem=10G -pe sharedmem 5 -P uoe_baseline"} - } + } } // This withName will override all jobs (except for FASTQC jobs, cf above) clusterOptions @@ -40,11 +40,11 @@ process { withName: '!.*FASTQC.*' { // Check if an environment variable NFX_SGE_PROJECT exists, if yes, use the stored value for -P option // Otherwise set the project to uoe_baseline - if (System.getenv('NFX_SGE_PROJECT')) { + if (System.getenv('NFX_SGE_PROJECT')) { clusterOptions = {"-l h=!node1d01 -l h_vmem=${(task.memory + 8.GB).bytes/task.cpus} -P $NFX_SGE_PROJECT"} - } else { + } else { clusterOptions = {"-l h=!node1d01 -l h_vmem=${(task.memory + 8.GB).bytes/task.cpus} -P uoe_baseline"} - } + } } // common SGE error statuses