diff --git a/workflow/scripts/weighted_distances.py b/workflow/scripts/weighted_distances.py index 33538df..3c3113a 100644 --- a/workflow/scripts/weighted_distances.py +++ b/workflow/scripts/weighted_distances.py @@ -107,11 +107,10 @@ def build_cache(df: pd.DataFrame, reference: Seq): return cache -def buildm(df: pd.DataFrame, sample_names: list, cache: dict) -> pd.DataFrame: +def buildm(positions: List[int], sample_names: list, cache: dict) -> pd.DataFrame: # Compute matrix logging.debug(f"Filling distance matrix") nsamples = len(sample_names) - positions = df["POS"].astype("Int64").unique().tolist() m = np.zeros((nsamples, nsamples), np.float64) for i, sample1 in enumerate(sample_names): for j, sample2 in enumerate(sample_names): @@ -140,10 +139,11 @@ def main(): # Pre-compute one-sample measurements logging.info(f"Caching computations") - cache = build_cache(df, ancestor.seq) + cache = build_cache(variant_table, ancestor.seq) logging.info(f"Calculating matrix") - df = buildm(variant_table, sample_names, cache) + positions = variant_table["POS"].astype("Int64").unique().tolist() + df = buildm(positions, sample_names, cache) logging.info("Writing results") df.to_csv(snakemake.output.distances)