From fc38a77cfe35aef2f7b6bec2a0f17bcf650af4f3 Mon Sep 17 00:00:00 2001 From: chels0 Date: Thu, 14 Dec 2023 16:07:35 +0100 Subject: [PATCH 1/4] feat: html and excel depth table for relevant positions --- .tests/integration/config/config.yaml | 3 + .../integration/reference/html_template.html | 660 ++++++++++++++++++ .../reference/target_rsid.TPMT.bed | 8 +- config/config.yaml | 5 +- config/output_files.yaml | 14 + data/templates/html_template.html | 660 ++++++++++++++++++ .../report_template.txt | 0 workflow/Snakefile | 1 + workflow/rules/generate_depth_report.smk | 37 + workflow/scripts/generate_depth_report.py | 91 +++ 10 files changed, 1474 insertions(+), 5 deletions(-) create mode 100644 .tests/integration/reference/html_template.html create mode 100644 data/templates/html_template.html rename data/{guidelines => templates}/report_template.txt (100%) create mode 100644 workflow/rules/generate_depth_report.smk create mode 100644 workflow/scripts/generate_depth_report.py diff --git a/.tests/integration/config/config.yaml b/.tests/integration/config/config.yaml index 046c97e..25d198d 100644 --- a/.tests/integration/config/config.yaml +++ b/.tests/integration/config/config.yaml @@ -80,3 +80,6 @@ reference: generate_pgx_report: report_template: "reference/report_template.txt" + +generate_depth_report: + html_template: "reference/html_template.html" \ No newline at end of file diff --git a/.tests/integration/reference/html_template.html b/.tests/integration/reference/html_template.html new file mode 100644 index 0000000..f80cf74 --- /dev/null +++ b/.tests/integration/reference/html_template.html @@ -0,0 +1,660 @@ + + + + + + + + + Farmakogenomisk analys av {{sample}} + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +

Farmakogenomisk analys av {{sample}}

+ + + +
+
+

Provdetaljer

+ + + + + + + + + + + + + + + + + + + + +
+Prov ID +{{sample}} +
+Sekvenserings ID + +{{ sample_id }} +
+dbSNP + +{{dbsnp}} +
+Referensgenomens sökväg + +{{ref_path}} +
+
+ +
+

Läsdjup för kliniska positioner

+ +{{ table }} + + + + + +
+
+

Läsdjup vid targets

+ + + + + diff --git a/.tests/integration/reference/target_rsid.TPMT.bed b/.tests/integration/reference/target_rsid.TPMT.bed index 16f68f7..d0dd867 100644 --- a/.tests/integration/reference/target_rsid.TPMT.bed +++ b/.tests/integration/reference/target_rsid.TPMT.bed @@ -1,4 +1,4 @@ -chr6 18131012 18131012 rs1800584 TPMT -chr6 18130918 18130918 rs1142345 TPMT -chr6 18143955 18143955 rs1800462 TPMT -chr6 18139228 18139228 rs1800460 TPMT +6 18131012 18131012 rs1800584 TPMT +6 18130918 18130918 rs1142345 TPMT +6 18143955 18143955 rs1800462 TPMT +6 18139228 18139228 rs1800460 TPMT diff --git a/config/config.yaml b/config/config.yaml index cdce1d2..122b020 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -82,4 +82,7 @@ get_interaction_guidelines: interaction_guidelines: "data/guidelines/interaction_guidelines.csv" generate_pgx_report: - report_template: "data/guidelines/report_template.txt" + report_template: "data/templates/report_template.txt" + +generate_depth_report: + html_template: "data/templates/html_template.html" diff --git a/config/output_files.yaml b/config/output_files.yaml index 0212fe4..e76c40c 100644 --- a/config/output_files.yaml +++ b/config/output_files.yaml @@ -26,3 +26,17 @@ files: types: - T - N + + - name: "_copy_pgx_depth_excel" + input: "pgx/generate_depth_report/{sample}_{type}_pgx_depth.xlsx" + output: "results/dna/pgx/{sample}_{type}.pgx_depth_table.xlsx" + types: + - T + - N + + - name: "_copy_pgx_depth_html" + input: "pgx/generate_depth_report/{sample}_{type}_pgx_depth.html" + output: "results/dna/pgx/{sample}_{type}.pgx_depth_table.html" + types: + - T + - N \ No newline at end of file diff --git a/data/templates/html_template.html b/data/templates/html_template.html new file mode 100644 index 0000000..f80cf74 --- /dev/null +++ b/data/templates/html_template.html @@ -0,0 +1,660 @@ + + + + + + + + + Farmakogenomisk analys av {{sample}} + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +

Farmakogenomisk analys av {{sample}}

+ + + +
+
+

Provdetaljer

+ + + + + + + + + + + + + + + + + + + + +
+Prov ID +{{sample}} +
+Sekvenserings ID + +{{ sample_id }} +
+dbSNP + +{{dbsnp}} +
+Referensgenomens sökväg + +{{ref_path}} +
+
+ +
+

Läsdjup för kliniska positioner

+ +{{ table }} + + + + + +
+
+

Läsdjup vid targets

+ + + + + diff --git a/data/guidelines/report_template.txt b/data/templates/report_template.txt similarity index 100% rename from data/guidelines/report_template.txt rename to data/templates/report_template.txt diff --git a/workflow/Snakefile b/workflow/Snakefile index 5b06195..dd23935 100644 --- a/workflow/Snakefile +++ b/workflow/Snakefile @@ -5,6 +5,7 @@ __license__ = "GPL-3" include: "rules/common.smk" +include: "rules/generate_depth_report.smk" include: "rules/generate_pgx_report.smk" include: "rules/reform_genomic_region.smk" include: "rules/depth_of_coverage.smk" diff --git a/workflow/rules/generate_depth_report.smk b/workflow/rules/generate_depth_report.smk new file mode 100644 index 0000000..f518c80 --- /dev/null +++ b/workflow/rules/generate_depth_report.smk @@ -0,0 +1,37 @@ +__author__ = "Chelsea Ramsin" +__copyright__ = "Copyright 2023, Chelsea Ramsin" +__email__ = "chelsea.ramsin@regionostergotland.se" +__license__ = "GPL-3" + + +rule generate_depth_report: + input: + missed_variants="pgx/append_id_to_gdf/{sample}_{type}.depth_at_missing_annotated.gdf", + output: + html_report=temp("pgx/generate_depth_report/{sample}_{type}_pgx_depth.html"), + excel_report=temp("pgx/generate_depth_report/{sample}_{type}_pgx_depth.xlsx"), + params: + haplotype_definitions=config.get("get_clinical_guidelines", {}).get("haplotype_definitions", ""), + html_template=config.get("generate_depth_report", {}).get("html_template", ""), + dbsnp=config.get("reference", {}).get("dbsnp", ""), + ref=config.get("reference", {}).get("fasta", ""), + log: + "pgx/generate_depth_report/{sample}_{type}.output.log", + benchmark: + repeat( + "pgx/generate_depth_report/{sample}_{type}.output.benchmark.tsv", + config.get("generate_depth_report", {}).get("benchmark_repeats", 1), + ) + threads: config.get("generate_depth_report", {}).get("threads", config["default_resources"]["threads"]) + resources: + mem_mb=config.get("generate_depth_report", {}).get("mem_mb", config["default_resources"]["mem_mb"]), + mem_per_cpu=config.get("generate_depth_report", {}).get("mem_per_cpu", config["default_resources"]["mem_per_cpu"]), + partition=config.get("generate_depth_report", {}).get("partition", config["default_resources"]["partition"]), + threads=config.get("generate_depth_report", {}).get("threads", config["default_resources"]["threads"]), + time=config.get("generate_depth_report", {}).get("time", config["default_resources"]["time"]), + container: + config.get("generate_depth_report", {}).get("container", config["default_container"]) + message: + "{rule}: Generate read depth reports on pgx/{rule}/{wildcards.sample}_{wildcards.type}.input" + script: + "../scripts/generate_depth_report.py" diff --git a/workflow/scripts/generate_depth_report.py b/workflow/scripts/generate_depth_report.py new file mode 100644 index 0000000..1e2b80f --- /dev/null +++ b/workflow/scripts/generate_depth_report.py @@ -0,0 +1,91 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Wed Dec 6 11:34:29 2023 + +""" + +import pandas as pd +import jinja2 + + +def highlight_greaterthan(s, threshold, column): + is_max = pd.Series(data=False, index=s.index) + is_max[column] = s.loc[column] < threshold + return [ + 'background-color: red ; font-weight: bold ; color: white' + if is_max.any() else '' for v in is_max + ] + + +def create_depth_table(missing, haplotype_definitions): + missing = missing[['ID', 'Locus', 'Total_Depth']] + + depth_table = missing.merge(haplotype_definitions, on=['ID']) + depth_table.rename(columns={ + 'ID': 'rsID', + 'HAPLOTYPE': 'Haplotyp', + 'Locus': 'Position', + 'Total_Depth': 'Läsdjup' + }, + inplace=True) + + new_col_order = ['rsID', 'Haplotyp', 'Position', 'Läsdjup'] + depth_table = depth_table[new_col_order] + + depth_table = depth_table.groupby(['Position', 'Läsdjup', + 'rsID'])['Haplotyp'].apply( + '/'.join).reset_index() + depth_table['sort'] = depth_table['Position'].str.extract( + r'(\d+)', expand=False).astype(int) + depth_table = depth_table.sort_values('sort') + depth_table = depth_table.drop('sort', axis=1) + + styled_depth_table = depth_table.style.apply(highlight_greaterthan, + threshold=100, + column=['Läsdjup'], + axis=1).hide(axis="index") + + return styled_depth_table + + +def generate_html_report(styled_depth_table, missing, template, dbsnp, + ref_path): + sample_column = missing.columns[3] + sample = sample_column.split('_')[2] + sample_id = sample_column.split('_')[3] + + with open(template, 'r') as f: + template = jinja2.Template(source=f.read()) + styled_depth_report = template.render( + sample=sample, + sample_id=sample_id, + dbsnp=dbsnp, + ref_path=ref_path, + table=styled_depth_table.to_html(index=False)) + + return styled_depth_report + + +if __name__ == "__main__": + missed_variants = snakemake.input.missed_variants + dbsnp = snakemake.params.dbsnp + ref = snakemake.params.ref + haplotype_definitions = snakemake.params.haplotype_definitions + html_template = snakemake.params.html_template + excel_report = snakemake.output.excel_report + html_report = snakemake.output.html_report + + haplotype_definitions = pd.read_csv(haplotype_definitions, sep='\t') + missed_variants = pd.read_csv(missed_variants, sep='\t') + + styled_depth_table = create_depth_table(missed_variants, + haplotype_definitions) + styled_depth_report = generate_html_report(styled_depth_table, + missed_variants, html_template, + dbsnp, ref) + + with open(html_report, 'w') as w: + w.write(styled_depth_report) + + styled_depth_table.to_excel(excel_report, index=False) From 35e9079d287b7e73b64655dd4c1b0cad13d4d62d Mon Sep 17 00:00:00 2001 From: chels0 Date: Fri, 19 Jan 2024 11:40:39 +0100 Subject: [PATCH 2/4] fix: change to UCSC annotation --- ..._pharmacogenomics_18_06_2019_ex_cyp2d6.bed | 82 +++++++++---------- data/genomic_regions/target_rsid.bed | 36 ++++---- workflow/scripts/get_target_variants.py | 2 +- workflow/scripts/reform_genomic_region.py | 4 +- 4 files changed, 62 insertions(+), 62 deletions(-) diff --git a/data/genomic_regions/exons_variants_pharmacogenomics_18_06_2019_ex_cyp2d6.bed b/data/genomic_regions/exons_variants_pharmacogenomics_18_06_2019_ex_cyp2d6.bed index 6a40499..405d44d 100755 --- a/data/genomic_regions/exons_variants_pharmacogenomics_18_06_2019_ex_cyp2d6.bed +++ b/data/genomic_regions/exons_variants_pharmacogenomics_18_06_2019_ex_cyp2d6.bed @@ -1,41 +1,41 @@ -1 98386420 98386599 exon1_DPYD -1 98348800 98348950 exon2_DPYD -1 98293650 98293772 exon3_DPYD -1 98205928 98206055 exon4_DPYD -1 98187046 98187247 exon5_DPYD -1 98164887 98165123 exon6_DPYD -1 98157253 98157374 exon7_DPYD -1 98144631 98144758 exon8_DPYD -1 98060595 98060742 exon9_DPYD -1 98058754 98058963 exon10_DPYD -1 98039296 98039546 exon11_DPYD -1 98015096 98015320 exon12_DPYD -1 97981262 97981517 exon13_DPYD -1 97915595 97915799 exon14_DPYD -1 97847929 97848037 exon15_DPYD -1 97839097 97839220 exon16_DPYD -1 97771713 97771873 exon17_DPYD -1 97770795 97770954 exon18_DPYD -1 97700388 97700570 exon19_DPYD -1 97658605 97658824 exon20_DPYD -1 97564025 97564208 exon21_DPYD -1 97547866 97548046 exon22_DPYD -1 97543279 97544722 exon23_DPYD -6 18155244 18155325 exon1_TPMT -6 18149199 18149422 exon2_TPMT -6 18148034 18148166 exon3_TPMT -6 18143807 18143979 exon4_TPMT -6 18139876 18139968 exon5_TPMT -6 18139174 18139288 exon6_TPMT -6 18134015 18134140 exon7_TPMT -6 18132344 18132428 exon8_TPMT -6 18128522 18131031 exon9_TPMT -2 234668874 234669817 exon1_UGT1A1 -2 234675660 234675831 exon2_UGT1A1 -2 234676475 234676602 exon3_UGT1A1 -2 234676846 234677105 exon4_UGT1A1 -2 234680888 234681965 exon5_UGT1A1 -13 48611683 48612060 exon1_NUDT15 -13 48615036 48615272 exon2_NUDT15 -13 48619776 48621378 exon3_NUDT15 -1 98045349 98045549 rs75017182_DPYD +chr1 98386420 98386599 exon1_DPYD +chr1 98348800 98348950 exon2_DPYD +chr1 98293650 98293772 exon3_DPYD +chr1 98205928 98206055 exon4_DPYD +chr1 98187046 98187247 exon5_DPYD +chr1 98164887 98165123 exon6_DPYD +chr1 98157253 98157374 exon7_DPYD +chr1 98144631 98144758 exon8_DPYD +chr1 98060595 98060742 exon9_DPYD +chr1 98058754 98058963 exon10_DPYD +chr1 98039296 98039546 exon11_DPYD +chr1 98015096 98015320 exon12_DPYD +chr1 97981262 97981517 exon13_DPYD +chr1 97915595 97915799 exon14_DPYD +chr1 97847929 97848037 exon15_DPYD +chr1 97839097 97839220 exon16_DPYD +chr1 97771713 97771873 exon17_DPYD +chr1 97770795 97770954 exon18_DPYD +chr1 97700388 97700570 exon19_DPYD +chr1 97658605 97658824 exon20_DPYD +chr1 97564025 97564208 exon21_DPYD +chr1 97547866 97548046 exon22_DPYD +chr1 97543279 97544722 exon23_DPYD +chr2 18155244 18155325 exon1_TPMT +chr2 18149199 18149422 exon2_TPMT +chr2 18148034 18148166 exon3_TPMT +chr2 18143807 18143979 exon4_TPMT +chr2 18139876 18139968 exon5_TPMT +chr2 18139174 18139288 exon6_TPMT +chr2 18134015 18134140 exon7_TPMT +chr2 18132344 18132428 exon8_TPMT +chr2 18128522 18131031 exon9_TPMT +chr2 234668874 234669817 exon1_UGT1A1 +chr2 234675660 234675831 exon2_UGT1A1 +chr2 234676475 234676602 exon3_UGT1A1 +chr2 234676846 234677105 exon4_UGT1A1 +chr2 234680888 234681965 exon5_UGT1A1 +chr13 48611683 48612060 exon1_NUDT15 +chr13 48615036 48615272 exon2_NUDT15 +chr13 48619776 48621378 exon3_NUDT15 +chr1 98045349 98045549 rs75017182_DPYD diff --git a/data/genomic_regions/target_rsid.bed b/data/genomic_regions/target_rsid.bed index 36b031d..e351d78 100644 --- a/data/genomic_regions/target_rsid.bed +++ b/data/genomic_regions/target_rsid.bed @@ -1,18 +1,18 @@ -6 18131012 18131012 rs1800584 TPMT -6 18130918 18130918 rs1142345 TPMT -6 18143955 18143955 rs1800462 TPMT -6 18139228 18139228 rs1800460 TPMT -1 98039499 98039499 rs78060119 DPYD -1 97915622 97915622 rs72549303 DPYD -1 97981343 97981343 rs55886062 DPYD -1 98165030 98165030 rs115232898 DPYD -1 97915614 97915614 rs3918290 DPYD -1 98045449 98045449 rs75017182 DPYD -1 97547947 97547947 rs67376798 DPYD -1 97544627 97544627 rs1801268 DPYD -1 98157332 98157332 rs1801266 DPYD -1 98205971 98205974 rs72549309 DPYD -13 48611934 48611934 rs186364861 NUDT15 -13 48619855 48619855 rs116855232 NUDT15 -13 48619856 48619856 rs147390019 NUDT15 -13 48611919 48611924 rs746071566 NUDT15 \ No newline at end of file +chr6 18131012 18131012 rs1800584 TPMT +chr6 18130918 18130918 rs1142345 TPMT +chr6 18143955 18143955 rs1800462 TPMT +chr6 18139228 18139228 rs1800460 TPMT +chr1 98039499 98039499 rs78060119 DPYD +chr1 97915622 97915622 rs72549303 DPYD +chr1 97981343 97981343 rs55886062 DPYD +chr1 98165030 98165030 rs115232898 DPYD +chr1 97915614 97915614 rs3918290 DPYD +chr1 98045449 98045449 rs75017182 DPYD +chr1 97547947 97547947 rs67376798 DPYD +chr1 97544627 97544627 rs1801268 DPYD +chr1 98157332 98157332 rs1801266 DPYD +chr1 98205971 98205974 rs72549309 DPYD +chr13 48611934 48611934 rs186364861 NUDT15 +chr13 48619855 48619855 rs116855232 NUDT15 +chr13 48619856 48619856 rs147390019 NUDT15 +chr13 48611919 48611924 rs746071566 NUDT15 diff --git a/workflow/scripts/get_target_variants.py b/workflow/scripts/get_target_variants.py index c39c510..8271fba 100644 --- a/workflow/scripts/get_target_variants.py +++ b/workflow/scripts/get_target_variants.py @@ -40,7 +40,7 @@ def _annotate(x, targets): idx_swap = targets.START > targets.END targets.loc[idx_swap, "START"] = targets.loc[idx_swap, "END"] targets.loc[idx_swap, "END"] = targets.loc[idx_swap, "save"] - targets["CHROM"] = targets.CHROM.apply(lambda x: f"chr{x}") + #targets["CHROM"] = targets.CHROM.apply(lambda x: f"chr{x}") self.data["ID"] = self.data.apply(lambda x: _annotate(x, targets), axis=1) diff --git a/workflow/scripts/reform_genomic_region.py b/workflow/scripts/reform_genomic_region.py index 11b4707..cac3ac8 100644 --- a/workflow/scripts/reform_genomic_region.py +++ b/workflow/scripts/reform_genomic_region.py @@ -21,8 +21,8 @@ def reform(target_bed, output_f, detected_variants, padding, file_format): end, start = start, end start -= padding end += padding - if not str(chrom).startswith('chr'): - chrom = f"chr{chrom}" + #if not str(chrom).startswith('chr'): + # chrom = f"chr{chrom}" if bed: # f.write(f"chr{chrom}\t{start}\t{end}\t{id}\n") f.write(f"{chrom}\t{start}\t{end}\t{id}\n") From 20d11772a9f55638e23a496b0f09e32946b08c7d Mon Sep 17 00:00:00 2001 From: chels0 Date: Fri, 19 Jan 2024 11:48:57 +0100 Subject: [PATCH 3/4] fix: change back to UCSC annotation --- .tests/integration/reference/target_rsid.TPMT.bed | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.tests/integration/reference/target_rsid.TPMT.bed b/.tests/integration/reference/target_rsid.TPMT.bed index d0dd867..16f68f7 100644 --- a/.tests/integration/reference/target_rsid.TPMT.bed +++ b/.tests/integration/reference/target_rsid.TPMT.bed @@ -1,4 +1,4 @@ -6 18131012 18131012 rs1800584 TPMT -6 18130918 18130918 rs1142345 TPMT -6 18143955 18143955 rs1800462 TPMT -6 18139228 18139228 rs1800460 TPMT +chr6 18131012 18131012 rs1800584 TPMT +chr6 18130918 18130918 rs1142345 TPMT +chr6 18143955 18143955 rs1800462 TPMT +chr6 18139228 18139228 rs1800460 TPMT From f92171a8013add9d727495631f35d5664d8e3662 Mon Sep 17 00:00:00 2001 From: chels0 Date: Fri, 19 Jan 2024 11:56:44 +0100 Subject: [PATCH 4/4] fix: pycodestyle --- workflow/scripts/get_target_variants.py | 1 - workflow/scripts/reform_genomic_region.py | 5 +---- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/workflow/scripts/get_target_variants.py b/workflow/scripts/get_target_variants.py index 8271fba..cd6629c 100644 --- a/workflow/scripts/get_target_variants.py +++ b/workflow/scripts/get_target_variants.py @@ -40,7 +40,6 @@ def _annotate(x, targets): idx_swap = targets.START > targets.END targets.loc[idx_swap, "START"] = targets.loc[idx_swap, "END"] targets.loc[idx_swap, "END"] = targets.loc[idx_swap, "save"] - #targets["CHROM"] = targets.CHROM.apply(lambda x: f"chr{x}") self.data["ID"] = self.data.apply(lambda x: _annotate(x, targets), axis=1) diff --git a/workflow/scripts/reform_genomic_region.py b/workflow/scripts/reform_genomic_region.py index cac3ac8..d5c13bf 100644 --- a/workflow/scripts/reform_genomic_region.py +++ b/workflow/scripts/reform_genomic_region.py @@ -21,14 +21,11 @@ def reform(target_bed, output_f, detected_variants, padding, file_format): end, start = start, end start -= padding end += padding - #if not str(chrom).startswith('chr'): - # chrom = f"chr{chrom}" + if bed: - # f.write(f"chr{chrom}\t{start}\t{end}\t{id}\n") f.write(f"{chrom}\t{start}\t{end}\t{id}\n") else: - # f.write(f"chr{chrom}:{start}-{end}\n") f.write(f"{chrom}:{start}-{end}\n")