Skip to content

Commit

Permalink
chore: release 1.1.0
Browse files Browse the repository at this point in the history
Release-as:1.1.0
  • Loading branch information
ftabaro committed Mar 12, 2024
2 parents 90d3e04 + 74e2339 commit 0340bbc
Show file tree
Hide file tree
Showing 12 changed files with 326 additions and 113 deletions.
19 changes: 18 additions & 1 deletion .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -61,4 +61,21 @@ jobs:
ls -R tests
- name: Run workflow
run: |
snakemake --directory tests --configfile tests/config.yaml --profile tests/profile --snakefile workflow/Snakefile
ROOT=$(realpath .)
snakemake \
--directory tests \
--configfile tests/config.yaml \
--profile tests/profile \
--snakefile workflow/Snakefile \
--singularity-args="--bind $ROOT --bind $HOME"
- name: Test reporting
run: |
ROOT=$(realpath .)
snakemake \
--directory tests \
--configfile tests/config.yaml \
--profile tests/profile \
--snakefile workflow/Snakefile \
--report report.zip \
--singularity-args="--bind $ROOT --bind $HOME"
8 changes: 7 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,10 +1,16 @@
**/.snakemake
**/results
**/references
**/.cache
**/.conda
**/.java
**/.config
**/.condarc
**/workdir
profile/slurm
tests4/
tests4/
tests/.wget-hsts
tests/*.log
tests/*.out
run-test.sh
test.sh
File renamed without changes.
1 change: 0 additions & 1 deletion tests/profile/config.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
use-conda: True
use-singularity: True
singularity-args: ""
show-failed-logs: True
cores: 2
conda-cleanup-pkgs: cache
Expand Down
16 changes: 9 additions & 7 deletions workflow/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,14 @@ configfile: "config/config.yaml"
include: "rules/common.smk"


###########################
## DEFINE PIPELINE CONTAINER
###########################


containerized: "docker://ftabaro/rna-seq:1.0"


#######################
## DEFINE VARIABLES
#######################
Expand Down Expand Up @@ -84,13 +92,7 @@ gtf_path = references_folder.joinpath(
get_filename(config["genome"]["gtf_url"], decompress=True)
)

rmsk_path = references_folder.joinpath("rmsk.gtf")
rmsk_bed = Path(str(rmsk_path).replace("gtf", "bed"))

# gaf_path = references_folder.joinpath(
# get_filename(config["genome"]["gaf_url"], decompress=False)
# )

rmsk_folder = references_folder.joinpath("rmsk")
tRNA_annotation_dir = references_folder.joinpath("gtrnadb")

## Get samples
Expand Down
11 changes: 9 additions & 2 deletions workflow/rules/download-references.smk
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ rule download_genome_fasta_file:
protected(fasta_path),
params:
url=config["genome"]["fasta_url"],
cache: True
conda:
"../env/wget.yml"
log:
Expand All @@ -18,6 +19,7 @@ rule download_genome_fasta_file:
rule download_genome_annotation_file:
output:
protected(gtf_path),
cache: True
params:
url=config["genome"]["gtf_url"],
conda:
Expand All @@ -34,8 +36,12 @@ rule download_genome_annotation_file:

rule download_repeatmasker_annotation_file:
output:
protected(rmsk_path),
protected(rmsk_bed),
protected(
multiext(
str(rmsk_folder.joinpath(config["genome"]["label"])), ".gtf", ".bed"
)
),
cache: True
params:
genome_id=config["genome"]["label"],
conda:
Expand Down Expand Up @@ -66,6 +72,7 @@ rule download_gtRNAdb:
"-tRNAs.fa",
)
),
cache: True
params:
url=config["genome"]["gtrnadb_url"],
output_dir=tRNA_annotation_dir,
Expand Down
4 changes: 3 additions & 1 deletion workflow/rules/filter_bam.smk
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
rule filter_bam:
input:
alignment=starTE_folder.joinpath("{serie}/{method}/{sample}.Aligned.out.bam"),
annotation=rmsk_bed,
annotation=rmsk_folder.joinpath(
"{0}.{1}".format(config["genome"]["label"], "bed")
),
output:
starTE_folder.joinpath("{serie}/filter/{method}/{sample}.TEonly.bam"),
log:
Expand Down
22 changes: 13 additions & 9 deletions workflow/rules/star.smk
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ rule star_genome_preparation:
genome_annotation_file=gtf_path,
output:
directory(references_folder.joinpath("STAR")),
cache: True
conda:
"../env/alignment.yml"
threads: 8
Expand Down Expand Up @@ -52,15 +53,18 @@ rule star:
star_index_folder=references_folder.joinpath("STAR"),
genome_annotation_file=gtf_path,
output:
star_folder.joinpath("{serie}/{sample}.Aligned.sortedByCoord.out.bam"),
star_folder.joinpath("{serie}/{sample}.Aligned.toTranscriptome.out.bam"),
star_folder.joinpath("{serie}/{sample}.ReadsPerGene.out.tab"),
star_folder.joinpath("{serie}/{sample}.SJ.out.tab"),
star_folder.joinpath("{serie}/{sample}.Signal.Unique.str1.out.wig"),
star_folder.joinpath("{serie}/{sample}.Signal.Unique.str2.out.wig"),
star_folder.joinpath("{serie}/{sample}.Signal.UniqueMultiple.str1.out.wig"),
star_folder.joinpath("{serie}/{sample}.Signal.UniqueMultiple.str2.out.wig"),
star_folder.joinpath("{serie}/{sample}.Log.final.out"),
multiext(
str(star_folder.joinpath("{serie}", "{sample}")),
".Aligned.sortedByCoord.out.bam",
".Aligned.toTranscriptome.out.bam",
".ReadsPerGene.out.tab",
".SJ.out.tab",
".Signal.Unique.str1.out.wig",
".Signal.Unique.str2.out.wig",
".Signal.UniqueMultiple.str1.out.wig",
".Signal.UniqueMultiple.str2.out.wig",
".Log.final.out",
),
threads: 8
resources:
runtime=lambda wildcards, attempt: 1440 * attempt,
Expand Down
8 changes: 6 additions & 2 deletions workflow/rules/starTE.smk
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,9 @@ rule featureCounts_random:
else samples["paired"][wildcards.serie]
),
),
annotation=rmsk_path,
annotation=rmsk_folder.joinpath(
"{0}.{1}".format(config["genome"]["label"], "gtf")
),
output:
starTE_folder.joinpath("{serie}/featureCount/random.txt"),
conda:
Expand Down Expand Up @@ -218,7 +220,9 @@ rule featureCounts_multihit:
else samples["paired"][wildcards.serie]
),
),
annotation=rmsk_path,
annotation=rmsk_folder.joinpath(
"{0}.{1}".format(config["genome"]["label"], "gtf")
),
output:
starTE_folder.joinpath("{serie}/featureCount/multihit.txt"),
conda:
Expand Down
40 changes: 22 additions & 18 deletions workflow/scripts/download-gtf.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,31 +4,35 @@ set -e

URL="${snakemake_params[url]}"

mkdir -p $(dirname ${snakemake_log})
touch ${snakemake_log}

if [[ $URL == *.gz ]]; then
TMP=$(mktemp -u --suffix .gz)
echo "$URL refers to gzipped file. Temp file: $TMP" >> ${snakemake_log}
else
TMP=$(mktemp -u)
echo "$URL does not refer to gzipped file. Temp file: $TMP" >> ${snakemake_log}
fi

echo "Downloading to $TMP" | tee -a ${snakemake_log}
echo "Downloading to $TMP" >> ${snakemake_log}

OUTPUT=${snakemake_output}
mkdir -pv $(dirname $OUTPUT)

# wget -O $TMP "$URL"
curl "$URL" \
-H 'User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/118.0' \
-H 'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8' \
-H 'Accept-Language: en-US,en;q=0.5' \
-H 'Accept-Encoding: gzip, deflate' \
-H 'Connection: keep-alive' \
-H 'Upgrade-Insecure-Requests: 1' \
-H 'DNT: 1' \
-H 'Sec-GPC: 1' \
-H 'Pragma: no-cache' \
-H 'Cache-Control: no-cache' \
--silent \
--output $TMP
wget "$URL" \
--user-agent=' Mozilla/5.0 (X11; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/118.0' \
--header='Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8' \
--header='Accept-Language: en-US,en;q=0.5' \
--header='Accept-Encoding: gzip, deflate' \
--header='Connection: keep-alive' \
--header='Upgrade-Insecure-Requests: 1' \
--header='DNT: 1' \
--header='Sec-GPC: 1' \
--header='Pragma: no-cache' \
--header='Cache-Control: no-cache' \
--quiet \
--output-document="$TMP"

sleep $(( $RANDOM % 10 + 2 ))

Expand All @@ -37,10 +41,10 @@ if [[ $URL == *.gz ]] && [[ ! $OUTPUT == *.gz ]]; then
sleep $(( $RANDOM % 10 + 2 ))
fi

if grep -v '#' "${TMP%.gz}" | head -n 1 | grep -q '^chr' | tee -a ${snakemake_log}; then
echo "Mv'ing to $OUTPUT" | tee -a ${snakemake_log}
if grep -v '#' "${TMP%.gz}" | head -n 1 | grep -q '^chr' >> ${snakemake_log}; then
echo "Mv'ing to $OUTPUT" >> ${snakemake_log}
mv $TMP $OUTPUT
else
echo "Adding \"chr\" to first column, then move to $OUTPUT" | tee -a ${snakemake_log}
echo "Adding \"chr\" to first column, then move to $OUTPUT" >> ${snakemake_log}
awk -F "\t" -v OFS="\t" '!/^#/{print "chr"$0}/#/{print}' ${TMP%.gz} > $OUTPUT
fi
6 changes: 3 additions & 3 deletions workflow/scripts/edit_condition_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,9 @@
joined = sample_sheet.join(salmon_condition_sheet)

joined["condition"] = joined.apply(
lambda row: "control"
if row[snakemake.params.variable] == reference_level
else "treatment",
lambda row: (
"control" if row[snakemake.params.variable] == reference_level else "treatment"
),
axis=1,
)
joined = joined.reset_index()
Expand Down
Loading

0 comments on commit 0340bbc

Please sign in to comment.