Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Solving issues in the onion examples #46

Merged
merged 8 commits into from
Nov 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file added .coverage.cantoalto.polito.it.13481.XDNgkqFx
Binary file not shown.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,5 @@ __pycache__
/build
docs/build
docs/source/_autosummary
examples/onion_example_files
tests/systems/coex/.*
67 changes: 29 additions & 38 deletions examples/onion_multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,91 +3,82 @@
import matplotlib.pyplot as plt
import numpy as np

from dynsight.onion import onion_multi
from dynsight.onion.plot import (
plot_medoids_multi,
plot_one_trj_multi,
plot_output_multi,
plot_pop_fractions,
plot_sankey,
plot_state_populations,
plot_time_res_analysis,
)
from dynsight import onion


def main() -> None:
"""Run the example.

The data clustered are the MD trajectories of two molecules moving in
a 2-dimensional free energy landscape, with four Gaussian minima, under
Langevin dynsmics.

Use git clone git@github.com:matteobecchi/onion_example_files.git
to download example datasets.
"""
### Set the path to where the example files are located
path_to_input_data = (
"onion_example_files/data/multivariate_time-series.npy"
)

### Load the input data -
### it's an array of shape (n_dims, n_particles, n_frames)
input_data = np.load(path_to_input_data)
n_particles = input_data.shape[1]
n_frames = input_data.shape[2]

### CLUSTERING WITH A SINGLE TIME RESOLUTION ###
### Chose the time resolution --> the length of the windows in which the
### time-series will be divided
""" STEP 1: CLUSTERING WITH A SINGLE TIME RESOLUTION
Chose the time resolution --> the length of the windows in which the
time-series will be divided. This is the minimum lifetime required for
a state to be considered stable."""
tau_window = 10
bins = 25 # For mutlivariate clustering, setting bins is often important
n_windows = int(n_frames / tau_window) # Number of windows

### The input array has to be (n_parrticles * n_windows,
### tau_window * n_dims)
### because each window is trerated as a single data-point
reshaped_data = np.reshape(input_data, (n_particles * n_windows, -1))
reshaped_data = onion.helpers.reshape_from_dnt(input_data, tau_window)

### onion_multi() returns the list of states and the label for each
### signal window
state_list, labels = onion_multi(reshaped_data, bins=bins)
state_list, labels = onion.onion_multi(reshaped_data, bins=bins)

### These functions are examples of how to visualize the results
plot_output_multi("Fig1.png", input_data, state_list, labels, tau_window)
plot_one_trj_multi("Fig2.png", 0, tau_window, input_data, labels)
plot_medoids_multi("Fig3.png", tau_window, input_data, labels)
plot_state_populations("Fig4.png", n_windows, labels)
plot_sankey("Fig5.png", labels, n_windows, [100, 200, 300, 400])
onion.plot.plot_output_multi(
"Fig1.png", input_data, state_list, labels, tau_window
)
onion.plot.plot_one_trj_multi(
"Fig2.png", 0, tau_window, input_data, labels
)
onion.plot.plot_medoids_multi("Fig3.png", tau_window, input_data, labels)
onion.plot.plot_state_populations("Fig4.png", n_windows, labels)
onion.plot.plot_sankey("Fig5.png", labels, n_windows, [100, 200, 300, 400])

### CLUSTERING THE WHOLE RANGE OF TIME RESOLUTIONS ###
""" STEP 2: CLUSTERING THE WHOLE RANGE OF TIME RESOLUTIONS
This allows to select the optimal time resolution for the analysis,
avoiding an a priori choice."""
tau_window_list = np.geomspace(3, 10000, 20, dtype=int)

tra = np.zeros((len(tau_window_list), 3)) # List of number of states and
# ENV0 population for each tau_window
pop_list = [] # List of the states' population for each tau_window

for i, tau_window in enumerate(tau_window_list):
n_windows = int(n_frames / tau_window)
excess_frames = n_frames - n_windows * tau_window

if excess_frames > 0:
reshaped_data = np.reshape(
input_data[:, :, :-excess_frames],
(n_particles * n_windows, -1),
)
else:
reshaped_data = np.reshape(
input_data, (n_particles * n_windows, -1)
)
reshaped_data = onion.helpers.reshape_from_dnt(input_data, tau_window)

state_list, labels = onion_multi(reshaped_data, bins=bins)
state_list, labels = onion.onion_multi(reshaped_data, bins=bins)

list_pop = [state.perc for state in state_list]
list_pop.insert(0, 1 - np.sum(np.array(list_pop)))
list_pop.insert(0, 1 - np.sum(np.array(list_pop))) # Add ENV0 fraction

tra[i][0] = tau_window
tra[i][1] = len(state_list)
tra[i][2] = list_pop[0]
pop_list.append(list_pop)

### These functions are examples of how to visualize the results
plot_time_res_analysis("Fig6.png", tra)
plot_pop_fractions("Fig7.png", pop_list)
onion.plot.plot_time_res_analysis("Fig6.png", tra)
onion.plot.plot_pop_fractions("Fig7.png", pop_list, tra)

plt.show()

Expand Down
101 changes: 57 additions & 44 deletions examples/onion_uni.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,89 +3,102 @@
import matplotlib.pyplot as plt
import numpy as np

from dynsight.onion import onion_uni
from dynsight.onion.plot import (
plot_medoids_uni,
plot_one_trj_uni,
plot_output_uni,
plot_pop_fractions,
plot_sankey,
plot_state_populations,
plot_time_res_analysis,
)
from dynsight import onion
from dynsight.utilities import find_extrema_points


def main() -> None:
"""Run the example.

Use git clone git@github.com:matteobecchi/onion_example_files.git
The data clustered are the LENS values computed over a TIP4P/ICE
MD trajectory at the solid/liquid coexistence point. The trajectory
is sampled every 0.1 ns, and there are 2048 water molecules.
For more information about this dataset or the LENS descriptor, see
https://doi.org/10.1073/pnas.2300565120.

Use `git clone git@github.com:matteobecchi/onion_example_files.git`
to download example datasets.
"""
### Set the path to where the example files are located
path_to_input_data = "onion_example_files/data/univariate_time-series.npy"

### Load the input data - it's an array of shape (n_particles, n_frames)
### The first LENS frame has to be removed because it's always zero
input_data = np.load(path_to_input_data)[:, 1:]
n_particles = input_data.shape[0]
n_frames = input_data.shape[1]

### CLUSTERING WITH A SINGLE TIME RESOLUTION ###
### Chose the time resolution --> the length of the windows in which the
### time-series will be divided
""" STEP 0: STATIC CLUSTERING
Before using Onion Clustering, a simple pattern recognition analysis can
be performed with dynsight, identifying the maxima of the data
distribution. This analysis ignores the time correlations withing the
data. The value of 'prominance' tunes the sensibility to the data
histogram roughness. Plot the histogram to set the best value.

The results is an array which lists the (x, y) value of each peak.
"""
counts, bins = np.histogram(
input_data.flatten(),
bins=50,
density=True,
)
_ = find_extrema_points(
x_axis=bins[1:],
y_axis=counts,
extrema_type="max",
prominence=0.2,
)

""" STEP 1: CLUSTERING WITH A SINGLE TIME RESOLUTION
Chose the time resolution --> the length of the windows in which the
time-series will be divided. This is the minimum lifetime required for
a state to be considered stable."""
tau_window = 5
n_windows = int(n_frames / tau_window) # Number of windows
frames_in_excess = n_frames - n_windows * tau_window

### The input array needs to be (n_parrticles * n_windows, tau_window)
### because each window is trerated as a single data-point
reshaped_data = np.reshape(
input_data[:, :-frames_in_excess],
(n_particles * n_windows, tau_window),
)
reshaped_data = onion.helpers.reshape_from_nt(input_data, tau_window)

### onion_uni() returns the list of states and the label for each
### signal window
state_list, labels = onion_uni(reshaped_data)
state_list, labels = onion.onion_uni(reshaped_data)

### These functions are examples of how to visualize the results
plot_output_uni("Fig1.png", reshaped_data, n_windows, state_list)
plot_one_trj_uni("Fig2.png", 1234, reshaped_data, labels, n_windows)
plot_medoids_uni("Fig3.png", reshaped_data, labels)
plot_state_populations("Fig4.png", n_windows, labels)
plot_sankey("Fig5.png", labels, n_windows, [10, 20, 30, 40])
onion.plot.plot_output_uni(
"Fig1.png", reshaped_data, n_windows, state_list
)
onion.plot.plot_one_trj_uni(
"Fig2.png", 1234, reshaped_data, labels, n_windows
)
onion.plot.plot_medoids_uni("Fig3.png", reshaped_data, labels)
onion.plot.plot_state_populations("Fig4.png", n_windows, labels)
onion.plot.plot_sankey("Fig5.png", labels, n_windows, [10, 20, 30, 40])

### CLUSTERING THE WHOLE RANGE OF TIME RESOLUTIONS ###
tmp_list = np.geomspace(2, 499, num=20, dtype=int)
tau_windows = [x for i, x in enumerate(tmp_list) if x not in tmp_list[:i]]
""" STEP 2: CLUSTERING THE WHOLE RANGE OF TIME RESOLUTIONS
This allows to select the optimal time resolution for the analysis,
avoiding an a priori choice."""
tau_windows = np.unique(np.geomspace(2, 499, num=20, dtype=int))

tra = np.zeros((len(tau_windows), 3)) # List of number of states and
# ENV0 population for each tau_window
list_of_pop = [] # List of the states' population for each tau_window

for i, tau_window in enumerate(tau_windows):
n_windows = int(n_frames / tau_window)
frames_in_excess = n_frames - n_windows * tau_window
if frames_in_excess > 0:
tmp_input_data = input_data[:, :-frames_in_excess]
else:
tmp_input_data = input_data
reshaped_data = np.reshape(
tmp_input_data,
(n_particles * n_windows, tau_window),
)

state_list, labels = onion_uni(reshaped_data)
reshaped_data = onion.helpers.reshape_from_nt(input_data, tau_window)

state_list, labels = onion.onion_uni(reshaped_data)

pop_list = [state.perc for state in state_list]
pop_list.insert(0, 1 - np.sum(np.array(pop_list)))
pop_list.insert(0, 1 - np.sum(np.array(pop_list))) # Add ENV0 fraction
list_of_pop.append(pop_list)

tra[i][0] = tau_window
tra[i][1] = len(state_list)
tra[i][2] = pop_list[0]

### These functions are examples of how to visualize the results
plot_time_res_analysis("Fig6.png", tra)
plot_pop_fractions("Fig7.png", list_of_pop)
onion.plot.plot_time_res_analysis("Fig6.png", tra)
onion.plot.plot_pop_fractions("Fig7.png", list_of_pop, tra)

plt.show()

Expand Down
2 changes: 1 addition & 1 deletion justfile
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ check:
# Auto-fix code issues.
fix:
ruff format .
ruff check .
ruff check --fix .

# Build a release.
build:
Expand Down
1 change: 0 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,6 @@ line-length = 79
[tool.ruff.lint]
select = ["ALL"]
ignore = [
"ANN101",
"ANN401",
"COM812",
"ISC001",
Expand Down
12 changes: 6 additions & 6 deletions src/dynsight/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,12 @@
)

__all__ = [
"soapify",
"lens",
"analysis",
"data_processing",
"hdf5er",
"utilities",
"time_soap",
"lens",
"onion",
"data_processing",
"analysis",
"soapify",
"time_soap",
"utilities",
]
6 changes: 3 additions & 3 deletions src/dynsight/_internal/onion/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,10 @@
)

__all__ = [
"OnionUni",
"OnionMulti",
"onion_uni",
"onion_multi",
"OnionUni",
"helpers",
"onion_multi",
"onion_uni",
"plot",
]
14 changes: 7 additions & 7 deletions src/dynsight/data_processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,18 +22,18 @@
)

__all__ = [
"simplekernelsoap",
"simplesoapdistance",
"soapdistance",
"kernelsoap",
"soapdistancenormalized",
"applyclassification",
"createreferencesfromtrajectory",
"getdistancebetween",
"getdistancesfromref",
"getdistancesfromrefnormalized",
"getreferencesfromdataset",
"kernelsoap",
"mergereferences",
"savereferences",
"getreferencesfromdataset",
"applyclassification",
"simplekernelsoap",
"simplesoapdistance",
"soapdistance",
"soapdistancenormalized",
"spatialaverage",
]
2 changes: 1 addition & 1 deletion src/dynsight/hdf5er.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from dynsight._internal.hdf5er.to_hdf5 import mda_to_hdf5, universe_to_hdf5

__all__ = [
"create_universe_from_slice",
"mda_to_hdf5",
"universe_to_hdf5",
"create_universe_from_slice",
]
6 changes: 3 additions & 3 deletions src/dynsight/onion.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,10 @@
)

__all__ = [
"OnionUni",
"OnionMulti",
"onion_uni",
"onion_multi",
"OnionUni",
"helpers",
"onion_multi",
"onion_uni",
"plot",
]
6 changes: 3 additions & 3 deletions src/dynsight/soapify.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@
)

__all__ = [
"saponify_trajectory",
"saponify_multiple_trajectories",
"get_soap_settings",
"fill_soap_vector_from_dscribe",
"get_soap_settings",
"saponify_multiple_trajectories",
"saponify_trajectory",
]
4 changes: 2 additions & 2 deletions src/dynsight/time_soap.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
)

__all__ = [
"timesoapsimple",
"timesoap",
"gettimesoapsimple",
"timesoap",
"timesoapsimple",
]
Loading
Loading