Skip to content

Commit

Permalink
numpy dependency removed
Browse files Browse the repository at this point in the history
  • Loading branch information
mikolmogorov committed Jan 15, 2019
1 parent 4a44441 commit c9db046
Show file tree
Hide file tree
Showing 2 changed files with 35 additions and 14 deletions.
25 changes: 21 additions & 4 deletions flye/trestle/divergence.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
from itertools import izip
import multiprocessing
import signal
import numpy as np
import os.path

from flye.polishing.alignment import shift_gaps, SynchronizedSamReader
Expand Down Expand Up @@ -296,7 +295,7 @@ def _write_div_summary(div_sum_path, sum_header, positions,
curr_pos = p
position_gaps[-1] = seq_len-curr_pos

mean_position_gap = np.mean(position_gaps)
mean_position_gap = _mean(position_gaps)
max_position_gap = max(position_gaps)

window_len = 1000
Expand All @@ -321,8 +320,8 @@ def _write_div_summary(div_sum_path, sum_header, positions,
if curr_window_len != 0:
window_divs[i] = position_counts[i]/float(curr_window_len)

mean_window_div = np.mean(window_divs)
median_window_div = np.median(window_divs)
mean_window_div = _mean(window_divs)
median_window_div = _get_median(window_divs)
min_window_div = min(window_divs)

with open(div_sum_path, 'w') as f:
Expand Down Expand Up @@ -396,3 +395,21 @@ def read_positions(positions_file):
except IOError as e:
raise PositionIOError(e)
return headers, positions


def _get_median(lst):
if not lst:
raise ValueError("_get_median() arg is an empty sequence")
sorted_list = sorted(lst)
if len(lst) % 2 == 1:
return sorted_list[len(lst)/2]
else:
mid1 = sorted_list[(len(lst)/2) - 1]
mid2 = sorted_list[(len(lst)/2)]
return float(mid1 + mid2) / 2


def _mean(lst):
if not lst:
return 0
return float(sum(lst)) / len(lst)
24 changes: 14 additions & 10 deletions flye/trestle/trestle.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@

import os
import logging
import numpy as np
from itertools import combinations, product
import copy
import multiprocessing, signal
Expand Down Expand Up @@ -918,8 +917,8 @@ def find_coverage(frequency_file):
header, freqs = div.read_frequency_path(frequency_file)
cov_ind = header.index("Cov")
all_covs = [f[cov_ind] for f in freqs]
coverage = np.mean(all_covs)
#print min(all_covs), np.mean(all_covs), max(all_covs)
coverage = _mean(all_covs)
#print min(all_covs), _mean(all_covs), max(all_covs)
return coverage


Expand Down Expand Up @@ -1019,7 +1018,7 @@ def locate_consensus_cutpoint(side, read_endpoints, edge_read_file):
window_start = 0
if window_end > len(coverage):
window_end = len(coverage)
avg_cov = np.mean(coverage[window_start:window_end])
avg_cov = _mean(coverage[window_start:window_end])
if avg_cov >= MIN_EDGE_COV:
cutpoint = window_end
break
Expand All @@ -1030,7 +1029,7 @@ def locate_consensus_cutpoint(side, read_endpoints, edge_read_file):
window_start = 0
if window_end > len(coverage):
window_end = len(coverage)
avg_cov = np.mean(coverage[window_start:window_end])
avg_cov = _mean(coverage[window_start:window_end])
if avg_cov >= MIN_EDGE_COV:
cutpoint = window_start
break
Expand Down Expand Up @@ -2210,7 +2209,7 @@ def finalize_int_stats(rep, repeat_edges, side_it, cons_align_path, template,
position_gaps[i] = p - curr_pos
curr_pos = p
position_gaps[-1] = template_len - curr_pos
mean_position_gap = np.mean(position_gaps)
mean_position_gap = _mean(position_gaps)
max_position_gap = max(position_gaps)
f.write("{0:26}\t{1}\n".format("Template Length:", template_len))
f.write("{0:26}\t{1}\n".format("# Confirmed Positions:",
Expand Down Expand Up @@ -2362,10 +2361,10 @@ def finalize_int_stats(rep, repeat_edges, side_it, cons_align_path, template,
div_rates[side].append(edge_rate)
mean_in_div = 0.0
if div_rates["in"]:
mean_in_div = np.mean(div_rates["in"])
mean_in_div = _mean(div_rates["in"])
mean_out_div = 0.0
if div_rates["out"]:
mean_out_div = np.mean(div_rates["out"])
mean_out_div = _mean(div_rates["out"])
weighted_mean_div = 0.0
if side_lens["in"] + side_lens["out"] != 0:
weighted_mean_div = (mean_in_div*side_lens["in"] +
Expand Down Expand Up @@ -2634,7 +2633,7 @@ def int_stats_postscript(rep, repeat_edges, integrated_stats,
divs.append(div_rate)
f.write("{0:26}\t{1:.4f}\n".format("Divergence Rate:", div_rate))
f.write("\n")
return np.mean(divs)
return _mean(divs)


def _get_partitioning_info(part_list, edges):
Expand Down Expand Up @@ -2993,4 +2992,9 @@ def remove_unneeded_files(repeat_edges, rep, side_labels, side_it, orient_dir,
split_path = os.path.split(f)
new_file = os.path.join(split_path[0], add_dir_name, split_path[1])
os.rename(f, new_file)



def _mean(lst):
if not lst:
return 0
return float(sum(lst)) / len(lst)

0 comments on commit c9db046

Please sign in to comment.