diff --git a/.gitignore b/.gitignore
index 38c4422..3b10f54 100644
--- a/.gitignore
+++ b/.gitignore
@@ -6,6 +6,7 @@ output
*egg*
dist
__pycache__
+*/.ipynb_checkpoints/*
idealg_v
build
venv3
diff --git a/notebooks/squig_stats.ipynb b/notebooks/squig_stats.ipynb
new file mode 100644
index 0000000..10f1c2c
--- /dev/null
+++ b/notebooks/squig_stats.ipynb
@@ -0,0 +1,559 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "id": "e12bb0ec",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ ""
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "from IPython.core.display import display, HTML\n",
+ "display(HTML(\n",
+ " ''\n",
+ "))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "id": "7cda617d",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import pandas as pd\n",
+ "import numpy as np\n",
+ "from io import StringIO\n",
+ "\n",
+ "POS_DWELL = 0\n",
+ "POS_MEAN = 1\n",
+ "POS_MEDIAN = 2\n",
+ "POS_STD = 3\n",
+ "POS_BOXPLOT = 4\n",
+ "POS_DWELL_STD = 5\n",
+ "\n",
+ "def get_boxplot_stats(data):\n",
+ " return [round(np.min(data), 4), round(np.percentile(data, 25), 4), round(np.median(data), 4), round(np.percentile(data, 75), 4), round(np.max(data), 4)]\n",
+ "\n",
+ "def get_stats(file_path, base_shift=0):\n",
+ " df = pd.read_csv(file_path, sep='\\t', header=None)\n",
+ "\n",
+ " # Initialize an empty list to store cell means\n",
+ " pos_dwell = []\n",
+ " pos_mean = []\n",
+ " pos_median = []\n",
+ " pos_std = []\n",
+ " pos_boxplot = []\n",
+ " pos_dwell_std = []\n",
+ " \n",
+ " # Iterate over columns\n",
+ " for col in df.columns[abs(base_shift):]:\n",
+ " col_means = []\n",
+ " col_dwells = []\n",
+ " col_medians = []\n",
+ " col_stds = []\n",
+ " col_values = []\n",
+ "\n",
+ " for cell in df[col]:\n",
+ " if pd.isna(cell):\n",
+ " continue\n",
+ "\n",
+ " cell_values = str(cell).split(',')\n",
+ " col_dwells.append(len(cell_values))\n",
+ "\n",
+ " numeric_values = pd.to_numeric(cell_values, errors='coerce')\n",
+ " col_values.extend(numeric_values)\n",
+ " col_means.append(np.nanmean(numeric_values))\n",
+ " col_medians.append(np.nanmedian(numeric_values))\n",
+ " col_stds.append(np.std(numeric_values))\n",
+ " \n",
+ "# print(col_dwells)\n",
+ "# print(col_values)\n",
+ " \n",
+ " pos_dwell.append(get_boxplot_stats(col_dwells))\n",
+ " pos_mean.append(get_boxplot_stats(col_means))\n",
+ " pos_median.append(get_boxplot_stats(col_medians))\n",
+ " pos_std.append(get_boxplot_stats(col_stds))\n",
+ " pos_boxplot.append(get_boxplot_stats(col_values))\n",
+ " \n",
+ " pos_dwell_std.append(np.std(col_dwells))\n",
+ " \n",
+ "\n",
+ "# print(pos_dwell)\n",
+ "# print(pos_mean)\n",
+ "# print(pos_median)\n",
+ "# print(pos_std)\n",
+ " return [pos_dwell, pos_mean, pos_median, pos_std, pos_boxplot, pos_dwell_std]\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "d7e27dde",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# %matplotlib notebook\n",
+ "import matplotlib.pyplot as plt\n",
+ "import numpy as np\n",
+ "from matplotlib.lines import Line2D\n",
+ "\n",
+ "colors = ['lightblue','lightcoral','lightcyan','lightgoldenrodyellow','lightgreen','lightpink','lightsalmon','lightseagreen','lightskyblue','lightslategray']\n",
+ "medianprops = dict(color='red', linewidth=2)\n",
+ "\n",
+ "def draw_boxplots(stats, ref, ref_index, title, labels):\n",
+ "\n",
+ " plt.figure(figsize=(30,6))\n",
+ "\n",
+ " num_stats = len(stats)\n",
+ " \n",
+ " all_stats = []\n",
+ " x_ticks = []\n",
+ " for i in range(len(stats[0])):\n",
+ " for j in range(num_stats):\n",
+ " all_stats.append(stats[j][i])\n",
+ "\n",
+ " for i in range(len(stats[0])):\n",
+ " x_ticks.append(ref[i])\n",
+ " x_ticks.append(ref_index[i])\n",
+ " for j in range(num_stats-2):\n",
+ " x_ticks.append('')\n",
+ "\n",
+ " distance_within_group = 1\n",
+ " distance_between_groups = 3\n",
+ " positions = [0]\n",
+ " positions.append(positions[-1]+distance_within_group)\n",
+ " for i in range(len(stats[0])):\n",
+ " for j in range(num_stats-1):\n",
+ " positions.append(positions[-1]+distance_within_group)\n",
+ " positions.append(positions[-1]+distance_between_groups)\n",
+ "\n",
+ " positions = positions[1:-1]\n",
+ "\n",
+ " # Set box colors\n",
+ " box_colors = colors[:num_stats] * (len(all_stats) // 2)\n",
+ "\n",
+ " # Create paired box plots with different colors for experiments and controls\n",
+ " bplot = plt.boxplot(all_stats, medianprops=medianprops, positions=positions, vert=True, patch_artist=True, flierprops=dict(marker='.', markerfacecolor='black', markersize=5))\n",
+ "\n",
+ " for patch, color in zip(bplot['boxes'], box_colors):\n",
+ " patch.set_facecolor(color)\n",
+ " \n",
+ " legend_elements = []\n",
+ " for j in range(num_stats):\n",
+ " legend_elements.append(Line2D([0], [0], marker='s', color='w', markerfacecolor=box_colors[j], markersize=10, label=labels[j]))\n",
+ " \n",
+ " plt.legend(handles=legend_elements, loc='upper right')\n",
+ "\n",
+ " # Add labels and title\n",
+ " plt.xticks(positions, x_ticks)\n",
+ " plt.title(title)\n",
+ "\n",
+ " # Show the plot\n",
+ " plt.show()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "id": "dddfdc9f",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "ref = \"AGGTTGCAGTGAACCAACGTCGCCACTGCACTCCAGTCTGGCGACAGAGCGAGACTCCCTGTCA\"\n",
+ "ref_index = list(range(7,100))\n",
+ "\n",
+ "file_path = '../test/data/raw/metric/eventalign.tsv'\n",
+ "f5c = get_stats(file_path, base_shift=-6)\n",
+ "\n",
+ "file_path = '../test/data/raw/metric/nanopolish.tsv'\n",
+ "nanopolish = get_stats(file_path, base_shift=-6)\n",
+ "\n",
+ "file_path = '../test/data/raw/metric/realign.tsv'\n",
+ "realign = get_stats(file_path, base_shift=-6)\n",
+ "\n",
+ "file_path = '../test/data/raw/metric/sigfish.tsv'\n",
+ "sigfish = get_stats(file_path, base_shift=-6)\n",
+ "\n",
+ "file_path = '../test/data/raw/metric/squigualator.tsv'\n",
+ "squigulator = get_stats(file_path, base_shift=-6)\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "id": "a1d54a40",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "image/png": "\n",
+ "text/plain": [
+ "