Skip to content

Commit

Permalink
change water clustering with ruff
Browse files Browse the repository at this point in the history
  • Loading branch information
DomFijan committed Apr 9, 2024
1 parent 152c0b3 commit 333c6be
Showing 1 changed file with 62 additions and 46 deletions.
108 changes: 62 additions & 46 deletions ConservedWaterSearch/water_clustering.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ class WaterClustering:
orientations are spread into different orientations with angle
of 104.5
- WCW (Weakly Conserved Water): several orientation combinations
exsist with satisfying water angles
exist with satisfying water angles
To run the calculation use either :py:meth:`multi_stage_reclustering`
function to start Multi Stage ReClustering (MSRC) procedure or
Expand Down Expand Up @@ -96,7 +96,7 @@ def __init__(
"""Initialise :py:class:`WaterClustering` class.
The input parameters determine the options for oxygen clustering and
hydrogen orienataion analysis if applicable.
hydrogen orientation analysis if applicable.
Args:
nsnaps (int): Number of trajectory snapshots related to
Expand Down Expand Up @@ -173,11 +173,12 @@ def __init__(
njobs (int, optional): how many cpu cores to use for clustering.
Defaults to 1.
verbose (int, optional): verbosity of output. Defaults to 0.
debugO (int, optional): debug level for oxygen clustering.
debugH (int, optional): debug level for orientations. Defaults to 0.
plotend (bool, optional): weather to plot everything at end
of run. Defaults to False.
plotreach (bool, optional): weather to plot the reachability
plot for OPTICS when debuging. Defaults to False.
plot for OPTICS when debugging. Defaults to False.
restart_data_file (str, optional): Restart data file. If
``None`` restarting is not possible and no restart file
is generated. Both ``restart_data_file`` and
Expand Down Expand Up @@ -242,7 +243,11 @@ def __init__(
if not (self.debugH < 2 or self.debugO < 2):
self.plotend = False
warnings.warn(
"plotend set to True while debugH or debugO are >1; setting back to False"
(
"plotend set to True while debugH or debugO are >1;"
" setting back to False"
),
stacklevel=2,
)
self._waterO: list[np.ndarray] = []
self._waterH1: list[np.ndarray] = []
Expand Down Expand Up @@ -280,8 +285,7 @@ def multi_stage_reclustering(
xis: list[float] | None = None,
whichH: list[str] | None = None,
) -> None:
"""Multi Stage ReClustering (MSRC) procedure for obtaining conserved
water molecules.
"""Multi Stage ReClustering (MSRC) procedure.
Main loop - loops over water clustering parameter space
(minsamp and xi) and clusters oxygens first - if a clustering
Expand Down Expand Up @@ -339,8 +343,7 @@ def quick_multi_stage_reclustering(
xis: list[float] | None = None,
whichH: list[str] | None = None,
) -> None:
"""Quick Multi Stage ReClustering (QMSRC) procedure for
obtaining conserved water molecules.
"""Quick Multi Stage ReClustering (QMSRC) procedure.
Main loop - loops over water clustering parameter space
(minsamp and xi) and clusters oxygens first - clusters with
Expand Down Expand Up @@ -435,13 +438,13 @@ def single_clustering(
self.run(Odata, H1, H2)

def save_results(self, file_name: str) -> None:
"""Saves clustering results and paramters to a file.
"""Saves clustering results and parameters to a file.
Top of the results file contains clustering parametrs after
Top of the results file contains clustering parameters after
which results are saved in the same file.
Args:
file_name str: File name of the file that will contain results.
file_name (str): File name to save results to.
"""
self._save_clustering_options(file_name)
for i in range(len(self._waterO)):
Expand All @@ -463,14 +466,13 @@ def restart_cluster(
partial_results_file: str,
partial_data_file: str,
) -> None:
"""Read the clustering options and intermediate results from a
file and restart the clustering procedure.
"""Read the options and results and restart the clustering procedure.
Args:
partial_data_file str: File name of the file containing
partial_data_file (str): File name of the file containing
intermediate set of data of hydrogen and oxygen
coordinates.
partial_results_file str: File name containing partial
partial_results_file (str): File name containing partial
results with determined water coordinates.
"""
if os.path.isfile(partial_data_file):
Expand All @@ -497,12 +499,14 @@ def restart_cluster(
self.run(Odata, H1, H2)

def read_and_set_water_clust_options(self, file_name: str) -> None:
"""Reads all class clustering options from save file and sets
"""Reads clustering options from file.
Reads all class clustering options from save file and sets
the parameters. Reads all parameters except clustering protocol
and protocol parameters.
Args:
file_name str: Results or partial results file from which
file_name (str): Results or partial results file from which
procedure parameters will be read.
"""
if os.path.isfile(file_name):
Expand Down Expand Up @@ -536,19 +540,18 @@ def read_and_set_water_clust_options(self, file_name: str) -> None:
self.plotreach = lines[25] == "True"
self.plotend = lines[26] == "True"
else:
msg = "output file not found"
raise Exception(msg)
error_msg = "output file not found"
raise FileNotFoundError(error_msg)

@classmethod
def create_from_file(
cls,
file_name: str,
) -> WaterClustering:
"""Create a WaterClustering class from saved clustering options
file or full or partial results file.
"""Create a :py:class:`WaterClustering` class from a file.
Args:
file_name str: Results or partial results file from which
file_name (str): Results or partial results file from which
procedure parameters will be read.
Returns:
Expand All @@ -563,13 +566,12 @@ class by reading options from a file.
def create_from_files_and_restart(
cls, partial_output: str, partial_data_file: str
) -> WaterClustering:
"""Create a WaterClustering class from saved clustering restart
and partial results files and restart clustering.
"""Create a :py:class:`WaterClustering` from file and restart the procedure.
Args:
partial_file_name str: Partial results file from which
partial_output (str): Partial results file from which
procedure parameters will be read.
partial_data_file str: Partial data file from which
partial_data_file (str): Partial data file from which
data will be read.
Returns:
Expand Down Expand Up @@ -649,7 +651,12 @@ def visualise_nglview(
crystal_waters: str | None = None,
density_map: str | None = None,
) -> NGLWidget:
"""Visualise the results using `nglview <https://github.com/nglviewer/nglview>`__.
"""Visualise the results using nglview.
`nglview <https://github.com/nglviewer/nglview>`__ can be used to
visualise the results of the clustering procedure. We recommend using
pymol visualisation as it is more informative and provides more
options.
Args:
aligned_protein (str, optional): File containing protein
Expand Down Expand Up @@ -706,8 +713,7 @@ def water_type(self) -> list[str]:

@property
def waterO(self) -> list[np.ndarray]:
"""Contains coordiantes of Oxygens of water molecules classified
with water clustering.
"""Oxygen coordinates of water molecules classified using clustering.
Returns:
list[np.ndarray]: Returns a list of 3D xyz
Expand All @@ -717,8 +723,7 @@ def waterO(self) -> list[np.ndarray]:

@property
def waterH1(self) -> list[np.ndarray]:
"""Contains coordinates of first Hydrogen atom of water
molecules classified with water clustering.
"""Coordinates of first Hydrogen atom of water molecules from clustering.
Returns:
list[np.ndarray]: Returns a list of 3D xyz
Expand All @@ -728,8 +733,7 @@ def waterH1(self) -> list[np.ndarray]:

@property
def waterH2(self) -> list[np.ndarray]:
"""Contains coordinates of second Hydrogen atom of water
molecules classified with water clustering.
"""Coordinates of first Hydrogen atom of water molecules from clustering.
Returns:
list[np.ndarray]: Returns a list of 3D xyz
Expand Down Expand Up @@ -788,7 +792,8 @@ def _scan_clustering_params(
clust.fit(Odata)
# loop over xi
for j in self.xis:
# recalculate reachability - OPTICS reachability has to be recaculated when changing minsamp
# recalculate reachability - OPTICS reachability has
# to be recaculated when changing minsamp
if self.clustering_algorithm == "HDBSCAN":
clust = HDBSCAN(
min_cluster_size=int(self.nsnaps * self.numbpct_oxygen),
Expand Down Expand Up @@ -818,8 +823,15 @@ def _scan_clustering_params(
if self.verbose > 0:
(aa, bb) = np.unique(clusters, return_counts=True)
dbgt = (
f"Oxygen clustering {type(clust)} minsamp={i}, xi={j}, {len(np.unique(clusters[clusters!=-1]))} clusters \n"
f"Required N(elem) range:{self.nsnaps*self.numbpct_oxygen:.2f} to {(2-self.numbpct_oxygen)*self.nsnaps}; (tar cls size={self.nsnaps} and numbpct={self.numbpct_oxygen:.2f})\n"
f"Oxygen clustering {type(clust)} "
f"minsamp={i}, xi={j}, "
f"{len(np.unique(clusters[clusters!=-1]))} "
f"clusters \n"
f"Required N(elem) range:"
f"{self.nsnaps*self.numbpct_oxygen:.2f} to "
f"{(2-self.numbpct_oxygen)*self.nsnaps}; "
f"(tar cls size={self.nsnaps} and numbpct="
f"{self.numbpct_oxygen:.2f})\n"
f"N(elements) for each cluster: {bb}\n"
)
print(dbgt)
Expand Down Expand Up @@ -849,7 +861,8 @@ def _scan_clustering_params(
break
if (found and self.restart_after_find) or len(Odata) < self.nsnaps:
break
# check if size of remaining data set is bigger then number of snapshots
# check if size of remaining data set is bigger
# then number of snapshots
if len(Odata) < self.nsnaps or self.restart_after_find is False:
break
if (self.debugH == 1 or self.debugO == 1) and self.plotend:
Expand All @@ -864,8 +877,7 @@ def _analyze_oxygen_clustering(
clusters: np.ndarray,
whichH: list[str],
) -> tuple[list[np.ndarray], list[int]]:
"""Helper function for analysing oxygen clustering and invoking
hydrogen orientation clustering.
"""Helper function for analysing oxygen and hydrogen clustering.
Analyzes clusters for oxygen clustering. For oxygen clusters
which have the size around number of samples, the hydrogen
Expand All @@ -880,6 +892,7 @@ def _analyze_oxygen_clustering(
must be "onlyO".
clusters (np.ndarray): Output of clustering
results from OPTICS or HDBSCAN.
whichH (list[str]): Defines which water types to search for.
Returns:
tuple[list[np.ndarray], list[int]]:
Expand All @@ -902,15 +915,17 @@ def _analyze_oxygen_clustering(
mask = clusters == k
# Number of elements in oxygen cluster
neioc = np.count_nonzero(mask)
# If number of elements in oxygen cluster is Nsnap*0.85<Nelem<Nsnap*1.15 then ignore
# If number of elements in oxygen cluster is
# Nsnap*0.85<Nelem<Nsnap*1.15 then ignore
if min_neioc < neioc < max_neioc:
if self.verbose > 0:
print(f"O clust {k}, size {len(clusters[clusters==k])}\n")
O_center = np.mean(Odata[mask], axis=0)
if "onlyO" not in self.water_types_to_find:
# Construct array of hydrogen orientations
orientations = np.vstack([H1[mask], H2[mask]])
# Analyse clustering with hydrogen orientation analysis and more debug stuff
# Analyse clustering with hydrogen orientation analysis
# and more debug stuff
hyd = hydrogen_orientation_analysis(
orientations,
self.numbpct_hyd_orient_analysis,
Expand Down Expand Up @@ -1012,8 +1027,7 @@ def _delete_data(
H1: None | np.ndarray = None,
H2: None | np.ndarray = None,
) -> tuple[np.ndarray, np.ndarray | None, np.ndarray | None]:
"""A helper function for deleting data from the dataset during
MSRC procedure.
"""A helper function for deleting data from during MSRC procedure.
Args:
elements (np.ndarray): Indices to delete.
Expand Down Expand Up @@ -1104,7 +1118,10 @@ def _check_and_setup_MSRC(self, lower_minsamp_pct, every_minsamp):

def _check_data(self, Odata, H1, H2):
if (H1 is None or H2 is None) and "onlyO" not in self.water_types_to_find:
msg = f"H1 and H2 have to be provided for non oxygen only search. Run type {self.water_types_to_find}"
msg = (
f"H1 and H2 have to be provided for non oxygen only"
f" search. Run type {self.water_types_to_find}"
)
raise Exception(msg)
if H1 is not None and H2 is not None:
if len(Odata) != len(H1) or len(Odata) != len(H2) or len(H1) != len(H2):
Expand All @@ -1122,8 +1139,7 @@ def _add_water_solutions(
self,
waters: list,
) -> None:
"""A helper function which extends the solutions obtained from
analysing hydrogen orientations.
"""A helper function which adds new water clusters found.
Args:
waters (list): List containing results - coordinates of
Expand Down

0 comments on commit 333c6be

Please sign in to comment.