From 38f6a76339c308dcd1481b719aa702ad2950ffd2 Mon Sep 17 00:00:00 2001 From: Robrecht Cannoodt Date: Tue, 24 Sep 2024 15:14:56 +0200 Subject: [PATCH] update scripts and wf --- nextflow.config | 2 +- .../create_resources/process_10x_xenium.sh | 7 +++--- .../process_allen_brain_cell_atlas_brain.sh | 3 +-- scripts/create_resources/process_datasets.sh | 11 ++++++--- scripts/run_benchmark/run_full_local.sh | 13 ++-------- scripts/run_benchmark/run_full_seqeracloud.sh | 24 +++++++++---------- scripts/run_benchmark/run_test_seqeracloud.sh | 6 +++++ src/workflows/run_benchmark/main.nf | 3 +++ src/workflows/run_benchmark/test.sh | 2 +- 9 files changed, 36 insertions(+), 35 deletions(-) diff --git a/nextflow.config b/nextflow.config index 8fc6c4e3..6402ebf2 100644 --- a/nextflow.config +++ b/nextflow.config @@ -1 +1 @@ -process.container = 'nextflow/bash:latest' \ No newline at end of file +process.container = 'nextflow/bash:latest' diff --git a/scripts/create_resources/process_10x_xenium.sh b/scripts/create_resources/process_10x_xenium.sh index 6eae8e4a..d17a4ade 100755 --- a/scripts/create_resources/process_10x_xenium.sh +++ b/scripts/create_resources/process_10x_xenium.sh @@ -23,7 +23,7 @@ param_list: segmentation_id: [cell, nucleus] - id: "10x_xenium/2023_10x_mouse_brain_xenium/rep2" - input: https://cf.10xgenomics.com/samples/xenium/1.0.2/Xenium_V1_FF_Mouse_Brain_MultiSection_1/Xenium_V1_FF_Mouse_Brain_MultiSection_2_outs.zip + input: https://cf.10xgenomics.com/samples/xenium/1.0.2/Xenium_V1_FF_Mouse_Brain_MultiSection_2/Xenium_V1_FF_Mouse_Brain_MultiSection_2_outs.zip dataset_name: "Xenium V1 Fresh Frozen Mouse Brain replicate 2" dataset_url: "https://www.10xgenomics.com/datasets/fresh-frozen-mouse-brain-replicates-1-standard" dataset_summary: "Demonstration of gene expression profiling for fresh frozen mouse brain on the Xenium platform using the pre-designed Mouse Brain Gene Expression Panel (v1)." @@ -32,7 +32,7 @@ param_list: segmentation_id: [cell, nucleus] - id: "10x_xenium/2023_10x_mouse_brain_xenium/rep3" - input: https://cf.10xgenomics.com/samples/xenium/1.0.2/Xenium_V1_FF_Mouse_Brain_MultiSection_1/Xenium_V1_FF_Mouse_Brain_MultiSection_3_outs.zip + input: https://cf.10xgenomics.com/samples/xenium/1.0.2/Xenium_V1_FF_Mouse_Brain_MultiSection_3/Xenium_V1_FF_Mouse_Brain_MultiSection_3_outs.zip dataset_name: "Xenium V1 Fresh Frozen Mouse Brain replicate 3" dataset_url: "https://www.10xgenomics.com/datasets/fresh-frozen-mouse-brain-replicates-1-standard" dataset_summary: "Demonstration of gene expression profiling for fresh frozen mouse brain on the Xenium platform using the pre-designed Mouse Brain Gene Expression Panel (v1)." @@ -45,13 +45,12 @@ output_state: "\$id/state.yaml" publish_dir: "$publish_dir" HERE -tw launch openproblems-bio/task_ist_preprocessing \ +tw launch https://github.com/openproblems-bio/task_ist_preprocessing.git \ --revision build/main \ --pull-latest \ --main-script target/nextflow/datasets/workflows/process_tenx_xenium/main.nf \ --workspace 53907369739130 \ --compute-env 6TeIFgV5OY4pJCk8I0bfOh \ --params-file /tmp/params.yaml \ - --entry-name auto \ --config common/nextflow_helpers/labels_tw.config \ --labels datasets,10x_xenium diff --git a/scripts/create_resources/process_allen_brain_cell_atlas_brain.sh b/scripts/create_resources/process_allen_brain_cell_atlas_brain.sh index 8ebdf3dc..706d79e1 100755 --- a/scripts/create_resources/process_allen_brain_cell_atlas_brain.sh +++ b/scripts/create_resources/process_allen_brain_cell_atlas_brain.sh @@ -38,13 +38,12 @@ output_state: "\$id/state.yaml" publish_dir: "$publish_dir" HERE -tw launch openproblems-bio/task_ist_preprocessing \ +tw launch https://github.com/openproblems-bio/task_ist_preprocessing.git \ --revision build/main \ --pull-latest \ --main-script target/nextflow/datasets/workflows/process_allen_brain_cell_atlas/main.nf \ --workspace 53907369739130 \ --compute-env 6TeIFgV5OY4pJCk8I0bfOh \ --params-file /tmp/params.yaml \ - --entry-name auto \ --config common/nextflow_helpers/labels_tw.config \ --labels datasets,allen_brain_cell_atlas diff --git a/scripts/create_resources/process_datasets.sh b/scripts/create_resources/process_datasets.sh index a7117493..ee7ae3f9 100755 --- a/scripts/create_resources/process_datasets.sh +++ b/scripts/create_resources/process_datasets.sh @@ -11,13 +11,18 @@ set -e input_dir="s3://openproblems-data/resources/datasets" publish_dir="s3://openproblems-data/resources/task_ist_preprocessing/datasets" - cat > /tmp/params.yaml << HERE param_list: - id: "mouse_brain_combined/rep1" input_sp: "$input_dir/10x_xenium/2023_10x_mouse_brain_xenium/rep1/dataset.zarr" input_sc: "$input_dir/allen_brain_cell_atlas/2023_yao_mouse_brain_scrnaseq_10xv2/dataset.h5ad" + - id: "mouse_brain_combined/rep2" + input_sp: "$input_dir/10x_xenium/2023_10x_mouse_brain_xenium/rep2/dataset.zarr" + input_sc: "$input_dir/allen_brain_cell_atlas/2023_yao_mouse_brain_scrnaseq_10xv2/dataset.h5ad" + - id: "mouse_brain_combined/rep2" + input_sp: "$input_dir/10x_xenium/2023_10x_mouse_brain_xenium/rep2/dataset.zarr" + input_sc: "$input_dir/allen_brain_cell_atlas/2023_yao_mouse_brain_scrnaseq_10xv2/dataset.h5ad" output_sc: "\$id/output_sc.h5ad" output_sp: "\$id/output_sp.zarr" @@ -25,7 +30,7 @@ output_state: "\$id/state.yaml" publish_dir: "$publish_dir" HERE -tw launch openproblems-bio/task_ist_preprocessing \ +tw launch https://github.com/openproblems-bio/task_ist_preprocessing.git \ --revision build/main \ --pull-latest \ --main-script target/nextflow/workflows/process_datasets/main.nf \ @@ -34,4 +39,4 @@ tw launch openproblems-bio/task_ist_preprocessing \ --params-file /tmp/params.yaml \ --entry-name auto \ --config common/nextflow_helpers/labels_tw.config \ - --labels datasets,10x_xenium + --labels task_ist_preprocessing,process_datasets diff --git a/scripts/run_benchmark/run_full_local.sh b/scripts/run_benchmark/run_full_local.sh index 8c63393b..5496a205 100755 --- a/scripts/run_benchmark/run_full_local.sh +++ b/scripts/run_benchmark/run_full_local.sh @@ -11,14 +11,6 @@ cd "$REPO_ROOT" # please refer to the nextflow information for more details: # https://www.nextflow.io/docs/latest/ -# remove this when you have implemented the script -echo "TODO: once the 'run_benchmark' workflow has been implemented, update this script to use it." -echo " Step 1: replace 'task_template' with the name of the task in the following command." -echo " Step 2: replace the rename keys parameters to fit your run_benchmark inputs" -echo " Step 3: replace the settings parameter to fit your run_benchmark outputs" -echo " Step 4: remove this message" -exit 1 - set -e echo "Running benchmark on test data" @@ -31,14 +23,13 @@ publish_dir="resources/results/${RUN_ID}" # write the parameters to file cat > /tmp/params.yaml << HERE input_states: resources/datasets/**/state.yaml -rename_keys: 'input_train:output_train;input_test:output_test;input_solution:output_solution' +rename_keys: 'input_sc:output_sc;input_sp:output_sp' output_state: "state.yaml" publish_dir: "$publish_dir" HERE # run the benchmark -nextflow run openproblems-bio/task_template \ - --revision build/main \ +nextflow run . \ -main-script target/nextflow/workflows/run_benchmark/main.nf \ -profile docker \ -resume \ diff --git a/scripts/run_benchmark/run_full_seqeracloud.sh b/scripts/run_benchmark/run_full_seqeracloud.sh index 87d133c4..8466540d 100755 --- a/scripts/run_benchmark/run_full_seqeracloud.sh +++ b/scripts/run_benchmark/run_full_seqeracloud.sh @@ -6,29 +6,21 @@ REPO_ROOT=$(git rev-parse --show-toplevel) # ensure that the command below is run from the root of the repository cd "$REPO_ROOT" -# remove this when you have implemented the script -echo "TODO: once the 'run_benchmark' workflow has been implemented, update this script to use it." -echo " Step 1: replace 'task_template' with the name of the task in the following command." -echo " Step 2: replace the rename keys parameters to fit your run_benchmark inputs" -echo " Step 3: replace the settings parameter to fit your run_benchmark outputs" -echo " Step 4: remove this message" -exit 1 - set -e # generate a unique id RUN_ID="run_$(date +%Y-%m-%d_%H-%M-%S)" -publish_dir="s3://openproblems-data/resources/task_template/results/${RUN_ID}" +publish_dir="s3://openproblems-data/resources/temp_ist_preprocessing/results/${RUN_ID}" # write the parameters to file cat > /tmp/params.yaml << HERE -input_states: s3://openproblems-data/resources/task_template/datasets/**/state.yaml -rename_keys: 'input_train:output_train;input_test:output_test;input_solution:output_solution' +input_states: s3://openproblems-data/resources/temp_ist_preprocessing/datasets/**/state.yaml +rename_keys: 'input_sc:output_sc;input_sp:output_sp' output_state: "state.yaml" publish_dir: "$publish_dir" HERE -tw launch https://github.com/openproblems-bio/task_template.git \ +tw launch https://github.com/openproblems-bio/temp_ist_preprocessing.git \ --revision build/main \ --pull-latest \ --main-script target/nextflow/workflows/run_benchmark/main.nf \ @@ -37,4 +29,10 @@ tw launch https://github.com/openproblems-bio/task_template.git \ --params-file /tmp/params.yaml \ --entry-name auto \ --config common/nextflow_helpers/labels_tw.config \ - --labels task_template,full \ No newline at end of file + --labels temp_ist_preprocessing,full + +aws s3 sync \ + s3://openproblems-data/resources/temp_ist_preprocessing/results \ + resources/temp_ist_preprocessing/results \ + --profile op \ + --dryrun diff --git a/scripts/run_benchmark/run_test_seqeracloud.sh b/scripts/run_benchmark/run_test_seqeracloud.sh index 4f556898..72446616 100755 --- a/scripts/run_benchmark/run_test_seqeracloud.sh +++ b/scripts/run_benchmark/run_test_seqeracloud.sh @@ -29,3 +29,9 @@ tw launch https://github.com/openproblems-bio/task_ist_preprocessing.git \ --params-file /tmp/params.yaml \ --config common/nextflow_helpers/labels_tw.config \ --labels task_template,test + +aws s3 sync \ + s3://openproblems-nextflow/temp/results \ + temp_results \ + --profile op \ + --dryrun diff --git a/src/workflows/run_benchmark/main.nf b/src/workflows/run_benchmark/main.nf index 58116229..934a6cbb 100644 --- a/src/workflows/run_benchmark/main.nf +++ b/src/workflows/run_benchmark/main.nf @@ -361,6 +361,9 @@ workflow run_wf { | extract_uns_metadata.run( key: "extract_uns_scores", fromState: [input: "output_metric"], + args: [ + uns_length_cutoff: 100 + ], toState: { id, output, state -> state + [ score_uns: readYaml(output.output).uns diff --git a/src/workflows/run_benchmark/test.sh b/src/workflows/run_benchmark/test.sh index b0dbc249..3ac912ac 100755 --- a/src/workflows/run_benchmark/test.sh +++ b/src/workflows/run_benchmark/test.sh @@ -10,7 +10,7 @@ set -e # export TOWER_WORKSPACE_ID=53907369739130 -DATASETS_DIR="resources_test/task_template" +DATASETS_DIR="resources_test/task_ist_preprocessing" OUTPUT_DIR="output/temp" if [ ! -d "$OUTPUT_DIR" ]; then