code simplification

Linnas · Jul 24, 2021 · 118a096 · 118a096
1 parent 45231d9
commit 118a096
Show file tree

Hide file tree

Showing 9 changed files with 63 additions and 98 deletions.
diff --git a/rnai/analysis/__pycache__/database_helpers.cpython-37.pyc b/rnai/analysis/__pycache__/database_helpers.cpython-37.pyc
diff --git a/rnai/analysis/__pycache__/free_energy.cpython-37.pyc b/rnai/analysis/__pycache__/free_energy.cpython-37.pyc
diff --git a/rnai/analysis/__pycache__/pipeline.cpython-37.pyc b/rnai/analysis/__pycache__/pipeline.cpython-37.pyc
diff --git a/rnai/analysis/__pycache__/views.cpython-37.pyc b/rnai/analysis/__pycache__/views.cpython-37.pyc
diff --git a/rnai/analysis/database_helpers.py b/rnai/analysis/database_helpers.py
@@ -37,7 +37,6 @@ def create_bowtie_database(db_name, database_file_location, bowtie_location):
     """Creates a Bowtie DB."""
 
     os.chdir(bowtie_location)
-    print(os.getcwd())
     process = subprocess.Popen(["bowtie-build-s", database_file_location, str(db_name)])
     process.wait()
 

diff --git a/rnai/analysis/free_energy.py b/rnai/analysis/free_energy.py
@@ -281,7 +281,6 @@ def calculate_free_energy(seq, check=True, strict=True, c_seq=None, shift=0, nn_
           dNTPs=0, saltcorr=5):
     """Return the delatG using nearest neighbor thermodynamics."""
     #print shift
-    print(seq)
     seq = str(seq)
     if not c_seq:
         # c_seq must be provided by user if dangling ends or mismatches should

diff --git a/rnai/analysis/pipeline.py b/rnai/analysis/pipeline.py
@@ -1,15 +1,16 @@
 import subprocess
 import tempfile
-from Bio import SeqIO
 import os
 import numpy as np
 import json
-from collections import Counter
-from Bio.Seq import Seq
 import re
-from analysis import free_energy
-from analysis import general_helpers
 import sys
+from Bio         import SeqIO, SeqUtils
+from Bio.Seq     import Seq
+from collections import Counter
+from analysis    import free_energy
+from analysis    import general_helpers
+
 
 
 class SifiPipeline(object):
@@ -19,7 +20,8 @@ def __init__(self, bowtie_db, query_sequences, sirna_size, mismatches, accessibi
                  no_efficience, min_gc_range, max_gc_range, right_end_type,remove_damaging_motifs,contiguous_num):
 
 
-        """Class for si-Fi pipeline:
+        """
+        Class for si-Fi pipeline:
 
             si-Fi pipeline for RNAi design.
            1. Save query sequence as fasta file.
@@ -34,22 +36,22 @@ def __init__(self, bowtie_db, query_sequences, sirna_size, mismatches, accessibi
            2. Split query sequence into xmers and store as fasta file.
            3. Run BOWTIE against DB and extract all information.
            4. For each hit, do strand selection for efficiency.
-           5. Store data into json file for plotting."""
-
-        self.bowtie_db = bowtie_db                                                                                      # Bowtie DB
-        self.query_sequences = query_sequences                                                                          # List of all query sequences in multi fasta format
-        self.sirna_size = sirna_size                                                                                    # siRNA size
-        self.mismatches = mismatches                                                                                    # Allowed mismatches                                                                                # DB path
-        self.rnaplfold_location = rnaplfold_location                                                                    # Rnaplfold path
-        self.bowtie_location = bowtie_location                                                                          # Bowtie path
-        self.mode = 0                                                                                                # Mode, either RNAi design or off-target prediction
-
-        self.strand_check = strand_check                                                                                # Strand selection is enabled or disabled
-        self.end_check = end_check                                                                                      # End stability selection is enabled or disabled
-        self.accessibility_check = accessibility_check                                                                  # Target site accessibility is enabled or disabled
-        self.accessibility_window = accessibility_window                                                                # Accessibility window
-        self.end_stability_treshold = end_stability_treshold                                                            # End stability treshold
-        self.ts_accessibility_treshold = target_site_accessibility_treshold                                             # Target site accessibility threshold
+           5. Store data into json file for plotting.
+        """
+
+        self.bowtie_db = bowtie_db                                              # Bowtie DB
+        self.query_sequences = query_sequences                                  # List of all query sequences in multi fasta format
+        self.sirna_size = sirna_size                                            # siRNA size
+        self.mismatches = mismatches                                            # Allowed mismatches                                                                                # DB path
+        self.rnaplfold_location = rnaplfold_location                            # Rnaplfold path
+        self.bowtie_location = bowtie_location                                  # Bowtie path
+
+        self.strand_check = strand_check                                        # Strand selection is enabled or disabled
+        self.end_check = end_check                                              # End stability selection is enabled or disabled
+        self.accessibility_check = accessibility_check                          # Target site accessibility is enabled or disabled
+        self.accessibility_window = accessibility_window                        # Accessibility window
+        self.end_stability_treshold = end_stability_treshold                    # End stability treshold
+        self.ts_accessibility_treshold = target_site_accessibility_treshold     # Target site accessibility threshold
         self.terminal_check = terminal_check
         self.no_efficience = no_efficience
         self.min_gc_range = min_gc_range
@@ -60,20 +62,19 @@ def __init__(self, bowtie_db, query_sequences, sirna_size, mismatches, accessibi
 
 
         # Some constants
-        self.winsize = 80                                                                                               # Average the pair probabil. over windows of given size
-        self.span = 40                                                                                                  # Set the maximum allowed separation of a base pair to span
-        self.temperature = 22                                                                                           # Temperature for calculation free energy
-        self.sirna_start_position = 0
-        self.overhang = 2                                                                                               # siRNA overhang
-        self.end_nucleotides = 3                                                                                        # siRNA end nucleotides
+        self.winsize = 80                                                        # Average the pair probabil. over windows of given size
+        self.span = 40                                                           # Set the maximum allowed separation of a base pair to span
+        self.temperature = 22                                                    # Temperature for calculation free energ
+        self.sirna_start_position = 0                                            
+        self.overhang = 2                                                        # siRNA overhang
+        self.end_nucleotides = 3                                                 # siRNA end nucleotides
         self.snp_location = bowtie_location + '\\snp.json' 
         with open(self.snp_location) as f:
             self.SNPs = json.load(f)                                                                              
 
     def run_pipeline(self):
         """Start the si-Fi pipeline either in off-target or design mode."""
 
-        # Iterate over all query sequences
         for seq_record in SeqIO.parse(self.query_sequences, "fasta"):
             # Store ID and sequence
             self.query_name = seq_record.id
@@ -98,7 +99,7 @@ def process_data(self, target):
         json_lst = self.data_to_json(self.query_name, self.bowtie_data, no_target, self.lunp_data, target)
         if self.remove_damaging_motifs:
              json_lst =  list(filter(lambda x: self.is_damaging(x['sirna_sequence']), json_lst))
-        json_lst = list(filter(lambda x: self.max_gc_range > x['gc_content']*100  > self.min_gc_range, json_lst))
+        json_lst = list(filter(lambda x: self.max_gc_range > x['gc_content']  > self.min_gc_range, json_lst))
         json_lst = list(filter(lambda x: self.gc_contiguous(x['sirna_sequence']), json_lst))
         json.dump(json_lst, fp, indent=4)
         os.close(fd)
@@ -134,43 +135,38 @@ def process_data(self, target):
 
     def run_bowtie(self, sequence):
         """Run BOWTIE alignment."""
-        temp_bowtie_file = tempfile.mkstemp()
         os.chdir(self.bowtie_location)
-
         # -a report all alignments per read;
         # -n max mismatches in seed
         # -y try hard to find valid alignments, at the expense of speed
         # -x index name
         # -f query input files are (multi-)FASTA .fa/.mfa
-        fd, path = tempfile.mkstemp(suffix=".fasta")
-        with open(path, 'w') as f:
+        fd, out_path    = tempfile.mkstemp()
+        fdd, input_path = tempfile.mkstemp(suffix=".fasta")
+
+        with os.fdopen(fdd, mode="r+") as fp:
             for i in range(0, len(sequence)-self.sirna_size+1):
-                f.write('> sirna'+str(i+1)+'\n')
-                f.write(sequence[i:i+self.sirna_size].upper() + '\n')
-        os.close(fd)
+                fp.write('> sirna'+str(i+1)+'\n')
+                fp.write(sequence[i:i+self.sirna_size].upper() + '\n')
 
         process = subprocess.Popen(["bowtie", "-a", "-v", str(self.mismatches),  "-y",
                                     self.bowtie_db, "-f",
-                                    path, temp_bowtie_file[1]])
+                                    input_path, out_path])
         process.wait()
 
-        os.remove(path)
-
-        if os.path.exists(temp_bowtie_file[1]):
-            bowtie_data = open(temp_bowtie_file[1], 'r').readlines()
-            bowtie_data_l = list(map(lambda x: x.strip().split('\t'), bowtie_data))
-
+        with os.fdopen(fd) as fp:
+            bowtie_data = fp.readlines()
+            bowtie_data = list(map(lambda x: x.strip().split('\t'), bowtie_data))
 
-            return bowtie_data_l
-        else:
-            return []
+        os.unlink(out_path)
+        os.unlink(input_path)
+        return bowtie_data
 
     def run_rnaplfold(self, query_name, sequence):
 
         os.chdir(self.rnaplfold_location)
 
         with tempfile.TemporaryDirectory() as fp:
-            print(fp)     
             prc_stdout = subprocess.PIPE
             prc = subprocess.Popen(['RNAplfold', '-W', '%d'%self.winsize,'-L', '%d'% self.span, '-u', '%d'%self.sirna_size, '-T', '%.2f'%self.temperature], stdin=subprocess.PIPE, stdout=prc_stdout, cwd=fp)
             prc.stdin.write(sequence.encode())
@@ -184,13 +180,13 @@ def run_rnaplfold(self, query_name, sequence):
 
         return lunp_data
 
-    def data_to_json(self, query_name, input_data, no_target, lunp_data, main_targets):
+    def data_to_json(self, query_name, align_data, no_target, lunp_data, main_targets):
         """Extracts the data from bowtie results and put everything into json format.
            Efficiency is calculated for each siRNA.
            If no target is found, for design mode the siRNA fasta file is used instead of Bowtie data."""
 
         json_lst = []
-        for entity in input_data:
+        for entity in align_data:
             if not no_target:
                 sirna_name = entity[0]
                 strand = entity[1]
@@ -200,13 +196,11 @@ def data_to_json(self, query_name, input_data, no_target, lunp_data, main_target
                 sirna_sequence = entity[4]
                 missmatches = entity[7] if self.mismatches else 0
 
-                if self.mode == 0:
-                    if hit_name == main_targets:
-                        off_target = False
-                    else:
-                        off_target = True
+                if hit_name == main_targets:
+                    off_target = False
                 else:
-                    off_target = None
+                    off_target = True
+
             else:
                 # We use the siRNA fasta file to get the efficiency information for each siRNA
                 sirna_name = entity[0]
@@ -232,11 +226,13 @@ def data_to_json(self, query_name, input_data, no_target, lunp_data, main_target
                     # We must ignore the first two siRNAs because we can not calculate free energy
                 if query_position == 1 or query_position == 2:
                     sirna_sequence_n2 = None
+                    sirna_complement = sirna_sequence
                 else:
                     sirna_sequence_n2 = self.sirna_l[query_position-3].strip()
+                    sirna_complement = str(Seq(sirna_sequence_n2).reverse_complement().strip())[::-1].upper()
+
 
                 lunp_data_xmer = lunp_data[query_position-1, :].astype(np.float).tolist()[self.accessibility_window]
-                SNP_exist = self.is_snp(main_targets, query_position-1)
 
                 is_efficient,\
                 strand_selection,\
@@ -248,13 +244,14 @@ def data_to_json(self, query_name, input_data, no_target, lunp_data, main_target
                 self.calculate_efficiency(sirna_sequence, sirna_sequence_n2, lunp_data_xmer)
 
                 delta_MEF_enegery = anti_sense5_MFE_enegery - sense5_MFE_enegery
-                gc_content = self.calculate_gc_content(sirna_sequence)
-
+                gc_percentage = SeqUtils.GC(sirna_sequence)
+                SNP_exist = self.is_snp(main_targets, query_position-1)
                 json_dict = {
                     "query_name": query_name,
                     "sirna_name":sirna_name,
                     "sirna_position": query_position,
                     "sirna_sequence": sirna_sequence,
+                    "sirna_complement": sirna_complement,
                     "is_efficient": is_efficient,
                     "SNP_exist": SNP_exist,
                     "strand_selection": strand_selection,
@@ -266,22 +263,16 @@ def data_to_json(self, query_name, input_data, no_target, lunp_data, main_target
                     "accessibility_value": lunp_data_xmer,
                     "is_off_target": off_target,
                     "hit_name": hit_name,
-                    "gc_content":gc_content,
+                    "gc_content":gc_percentage,
                     "reference_strand_pos":reference_strand_pos,
                     "strand": strand,
                     "mismatches": missmatches,
                     "thermo_effcicient": thermo_effcicient
                 }
-                json_lst.append(json_dict)
-
+                if(is_efficient):
+                    json_lst.append(json_dict)
         return json_lst
 
-
-    def calculate_gc_content(self, sequence):
-        seq_letter_list = [i for i in sequence]
-        gc_content = (Counter(seq_letter_list)['C'] + Counter(seq_letter_list)['G']) / len(seq_letter_list)
-        return round(gc_content, 2)
-
     def gc_contiguous(self, sequence):
         patterns = ['C'*self.contiguous_num, 'G'*self.contiguous_num]
         re_res = [re.findall(pattern, sequence) for pattern in patterns]
@@ -329,7 +320,6 @@ def free_energy_dangling_ends(self, sirna_sequence, sirna_sequence_n2):
             # Anitsense5_MFE for sifi siRNA not zhangbing siRNA
             antisense_five_seq = Seq(sirna_sequence_n2).reverse_complement().strip()[self.sirna_start_position:self.end_nucleotides]
             antisense_c_seq = sirna_sequence[self.sirna_size-5:self.sirna_size-1]
-            print(antisense_five_seq)
             anti_sense5_MFE_enegery = free_energy.calculate_free_energy(antisense_five_seq, check=True, strict=True, c_seq=antisense_c_seq[::-1], shift=1)
 
 
@@ -466,11 +456,6 @@ def is_snp(self, hit_name, start_position):
                 pass
         return 'Yes' if bool(snp_sum) else 'No' 
 
-
-
-
-
-
     def calculate_efficiency(self, sirna_sequence, sirna_sequence_n2, lunp_data_xmer):
         """"""
         is_efficient = None

diff --git a/rnai/analysis/views.py b/rnai/analysis/views.py
@@ -84,6 +84,7 @@ def process_data(request):
 		target = order['target']
 		table_data = sifi.process_data(target)
 		response['table_data']     = table_data
+		print(table_data)
 		# response['json_lst']       = json_lst
 		# response['eff_sirna_plot'] = eff_sirna_plot
 		# response['main_histo']     = main_histo

diff --git a/src/components/aligntable.vue b/src/components/aligntable.vue
@@ -147,45 +147,26 @@ export default {
       this.targets = [...new Set(hit_targets[2])].sort(collator.compare);
 
       var targets_counts = _.countBy(hit_targets[2])
-      var snp_counts = _.countBy(hit_targets[hit_targets.length - 1])
 
       var trace1 = {
          values: Object.values(targets_counts),
          labels: Object.keys(targets_counts),
          type:'pie',
          name:'hit Target',
-         domain:{row:0},
          hole:.4,
          textinfo: "label+percent",
          textposition: "outside",
          automargin: true,
-         textinfo: "label+percent",
-         textposition: "outside",
-         hoverinfo: 'label+percent',
-      };
-      var trace2 = {
-         values: Object.values(snp_counts),
-         labels: Object.keys(snp_counts),
-         type:'pie',
-         name:'SNP',
-         domain:{row:1},
-         hole:.4,
-         textinfo: "label+percent",
-         textposition: "inside",
-         automargin: true,
-         // textinfo: "label+percent",
-         // textposition: "outside",
          hoverinfo: 'label+percent',
       };
       var layout = {
          height:550,
          width:300,
-         ygap:0.2,
          margin: {"t": 0, "b": 0, "l": 0, "r": 0},
-         showlegend:false,
-         grid:{rows:2, columns:1}
+         showlegend:false
+
       };
-      Plotly.newPlot('pieDiv', [trace1, trace2], layout)
+      Plotly.newPlot('pieDiv', [trace1], layout)
       // Plotly.newPlot('pieDiv2', [trace2], layout)
 
       var lunp_data = this.$store.state.lunaData;