From ab8b3c3921efc59bb252d392f0f5d1c27d4a4cd6 Mon Sep 17 00:00:00 2001 From: Steffen Heyne Date: Tue, 14 Nov 2017 13:18:15 +0100 Subject: [PATCH 1/4] fix: make sample_qc_report_{SE,PE}.py python3 compatible, make scripts executable as well --- shared/tools/sample_qc_report_PE.py | 8 ++++---- shared/tools/sample_qc_report_SE.py | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) mode change 100644 => 100755 shared/tools/sample_qc_report_PE.py mode change 100644 => 100755 shared/tools/sample_qc_report_SE.py diff --git a/shared/tools/sample_qc_report_PE.py b/shared/tools/sample_qc_report_PE.py old mode 100644 new mode 100755 index 25efe3dda..ad8bbeebd --- a/shared/tools/sample_qc_report_PE.py +++ b/shared/tools/sample_qc_report_PE.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # paired-end ONLY! @@ -28,7 +28,7 @@ with open(infile_AlignmentSummaryMetrics) as f: lines = f.readlines() - columns = filter(lambda x: x.startswith("PAIR"), lines)[0].strip().split("\t") + columns = list(filter(lambda x: x.startswith("PAIR"), lines))[0].strip().split("\t") # PF_READS: The number of PF reads where PF is defined as passing Illumina's filter. total_reads = int(columns[2]) @@ -57,7 +57,7 @@ try: with open(infile_MarkDuplicates) as f: lines = f.readlines() - columns = filter(lambda x: x.startswith("Unknown Library"), lines)[0].strip().split("\t") + columns = list(filter(lambda x: x.startswith("Unknown Library"), lines))[0].strip().split("\t") dup_mapped_pairs = int(columns[5]) fdup_mapped_pairs = 1.0 * dup_mapped_pairs / mapped_pairs @@ -79,7 +79,7 @@ elif os.path.isfile(infile_MACS2_xls): with open(infile_MACS2_xls) as f: lines = f.readlines() - columns = filter(lambda x: x.startswith("# d"), lines)[0].strip() + columns = list(filter(lambda x: x.startswith("# d"), lines))[0].strip() fragment_size = int(columns.split(" = ")[1]) else: fragment_size = "NA" diff --git a/shared/tools/sample_qc_report_SE.py b/shared/tools/sample_qc_report_SE.py old mode 100644 new mode 100755 index 82a0cb515..d1d5df7ce --- a/shared/tools/sample_qc_report_SE.py +++ b/shared/tools/sample_qc_report_SE.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # single-end ONLY! @@ -24,7 +24,7 @@ try: with open(infile_AlignmentSummaryMetrics) as f: lines = f.readlines() - columns = filter(lambda x: x.startswith("UNPAIRED"), lines)[0].strip().split("\t") + columns = list(filter(lambda x: x.startswith("UNPAIRED"), lines))[0].strip().split("\t") # PF_READS: The number of PF reads where PF is defined as passing Illumina's filter. total_reads = int(columns[2]) @@ -45,7 +45,7 @@ try: with open(infile_MarkDuplicates) as f: lines = f.readlines() - columns = filter(lambda x: x.startswith("Unknown Library"), lines)[0].strip().split("\t") + columns = list(filter(lambda x: x.startswith("Unknown Library"), lines))[0].strip().split("\t") # UNPAIRED_READ_DUPLICATES: The number of fragments that were marked as duplicates. dup_mapped_reads = int(columns[4]) @@ -66,7 +66,7 @@ try: with open(infile_MACS2_xls) as f: lines = f.readlines() - columns = filter(lambda x: x.startswith("# d"), lines)[0].strip() + columns = list(filter(lambda x: x.startswith("# d"), lines))[0].strip() fragment_size = int(columns.split(" = ")[1]) except: fragment_size = "NA" From 80ee8b78a97c69aa544e5b2c4f4cde1373eea9d3 Mon Sep 17 00:00:00 2001 From: Steffen Heyne Date: Tue, 14 Nov 2017 13:19:49 +0100 Subject: [PATCH 2/4] fix: call sample_qc_report directly in rule, ie. skip python --- shared/rules/ChIP_qc_report.snakefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/shared/rules/ChIP_qc_report.snakefile b/shared/rules/ChIP_qc_report.snakefile index 195289174..01e589643 100644 --- a/shared/rules/ChIP_qc_report.snakefile +++ b/shared/rules/ChIP_qc_report.snakefile @@ -17,7 +17,7 @@ if paired: benchmark: "QC_report/.benchmark/qc_report.{sample}.benchmark" shell: - "python " + os.path.join(workflow_tools, "sample_qc_report_PE.py") + " " + os.path.join(workflow_tools, "sample_qc_report_PE.py") + " " "{input.alignment_summary_metrics} {input.mark_duplicates_metrics} {input.insert_size_metrics} {input.macs2_xls} {input.macs2_qc_txt} " ">{output} 2>{log} " else: @@ -36,7 +36,7 @@ else: benchmark: "QC_report/.benchmark/qc_report.{sample}.benchmark" shell: - "python " + os.path.join(workflow_tools, "sample_qc_report_SE.py") + " " + os.path.join(workflow_tools, "sample_qc_report_SE.py") + " " "{input.alignment_summary_metrics} {input.mark_duplicates_metrics} {input.macs2_xls} {input.macs2_qc_txt} " ">{output} 2>{log} " From 145975da6ddba5eaa4c817449677468ed16dfa66 Mon Sep 17 00:00:00 2001 From: Steffen Heyne Date: Tue, 14 Nov 2017 15:22:33 +0100 Subject: [PATCH 3/4] fix travis pep8 --- shared/tools/sample_qc_report_PE.py | 10 +++++----- shared/tools/sample_qc_report_SE.py | 10 +++++----- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/shared/tools/sample_qc_report_PE.py b/shared/tools/sample_qc_report_PE.py index ad8bbeebd..50fc8aa70 100755 --- a/shared/tools/sample_qc_report_PE.py +++ b/shared/tools/sample_qc_report_PE.py @@ -19,7 +19,7 @@ try: infile_MACS2_xls = sys.argv[4] # sample_name.filtered.BAM_peaks.xls (optional) infile_MACS2_qc_txt = sys.argv[5] # ample_name.filtered.BAM_peaks.qc.txt (optional) -except: +except IndexError: pass @@ -49,7 +49,7 @@ fmapped_pairs = 1.0 * mapped_pairs / read_pairs fmapped_singletons = 1.0 * (mapped_reads - mapped_reads_in_pairs) / total_reads -except: +except OSError: exit("ERROR! Unable to read: {}".format(infile_AlignmentSummaryMetrics)) @@ -63,7 +63,7 @@ fdup_mapped_pairs = 1.0 * dup_mapped_pairs / mapped_pairs dupfree_mapped_pairs = mapped_pairs - dup_mapped_pairs fdupfree_mapped_pairs = 1.0 * dupfree_mapped_pairs / read_pairs -except: +except OSError: exit("ERROR! Unable to read: {}".format(infile_MarkDuplicates)) @@ -83,7 +83,7 @@ fragment_size = int(columns.split(" = ")[1]) else: fragment_size = "NA" -except: +except OSError: fragment_size = "NA" @@ -94,7 +94,7 @@ peak_count = int(columns[0]) frip = round(float(columns[1]), 3) peak_genome_coverage = round(columns[2], 3) -except: +except OSError: peak_count = "NA" frip = "NA" peak_genome_coverage = "NA" diff --git a/shared/tools/sample_qc_report_SE.py b/shared/tools/sample_qc_report_SE.py index d1d5df7ce..e78ad46a0 100755 --- a/shared/tools/sample_qc_report_SE.py +++ b/shared/tools/sample_qc_report_SE.py @@ -16,7 +16,7 @@ try: infile_MACS2_xls = sys.argv[3] # sample_name.filtered.BAM_peaks.xls (optional) infile_MACS2_qc_txt = sys.argv[4] # ample_name.filtered.BAM_peaks.qc.txt (optional) -except: +except IndexError: pass @@ -37,7 +37,7 @@ # fraction of high-quality mapped reads (MAPQ >=20) fhq_mapped_reads = 1.0 * int(columns[8]) / total_reads -except: +except OSError: exit("ERROR! Unable to read: {}\n".format(infile_AlignmentSummaryMetrics)) @@ -58,7 +58,7 @@ # fraction of duplication free mapped reads fdupfree_mapped_reads = 1.0 * dupfree_mapped_reads / total_reads -except: +except OSError: exit("ERROR! Unable to read: {}".format(infile_MarkDuplicates)) @@ -68,7 +68,7 @@ lines = f.readlines() columns = list(filter(lambda x: x.startswith("# d"), lines))[0].strip() fragment_size = int(columns.split(" = ")[1]) -except: +except OSError: fragment_size = "NA" @@ -79,7 +79,7 @@ peak_count = int(columns[0]) frip = round(float(columns[1]), 3) peak_genome_coverage = round(columns[2], 3) -except: +except OSError: peak_count = "NA" frip = "NA" peak_genome_coverage = "NA" From f7b4d675edd518d00eb5173421e7f735b81b2df5 Mon Sep 17 00:00:00 2001 From: Steffen Heyne Date: Tue, 14 Nov 2017 15:25:44 +0100 Subject: [PATCH 4/4] fix pep8 --- shared/common_functions.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/shared/common_functions.py b/shared/common_functions.py index 93820121e..127ab57e1 100644 --- a/shared/common_functions.py +++ b/shared/common_functions.py @@ -103,7 +103,7 @@ def get_sample_names(infiles, ext, reads): x = os.path.basename(x).replace(ext, "") try: x = x.replace(reads[0], "").replace(reads[1], "") - except: + except IndexError: pass s.append(x) return sorted(list(set(s))) @@ -145,7 +145,7 @@ def get_fragment_length(infile): try: median = next(f).split()[0] return int(median) - except: + except TypeError: print("ERROR: File", infile, "is NOT a proper Picard CollectInsertSizeMetrics metrics file.\n") exit(1) # no match in infile