Skip to content

Commit

Permalink
Merge pull request #65 from JecaTosovic/fix_water_clustering_defaults
Browse files Browse the repository at this point in the history
Make WaterClustering defaults consistent with hyrdogen clustering
  • Loading branch information
JecaTosovic authored Apr 22, 2024
2 parents 2170536 + 595269f commit b3ef0bc
Show file tree
Hide file tree
Showing 4 changed files with 64 additions and 48 deletions.
18 changes: 9 additions & 9 deletions ConservedWaterSearch/hydrogen_orientation.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,14 +25,14 @@ def hydrogen_orientation_analysis(
HCW_angstd_cutoff: float = 17,
WCW_angstd_cutoff: float = 20,
weakly_explained: float = 0.7,
xiFCW: tuple[float]|list[float] = (0.03,),
xiHCW: tuple[float]|list[float] = (0.05, 0.01),
xiWCW: tuple[float]|list[float] = (0.05, 0.001),
xiFCW: tuple[float] | list[float] = (0.03,),
xiHCW: tuple[float] | list[float] = (0.05, 0.01),
xiWCW: tuple[float] | list[float] = (0.05, 0.001),
njobs: int = 1,
verbose: int = 0,
debugH: int = 0,
plotreach: bool = False,
which: tuple[str]|list[str] = ("FCW", "HCW", "WCW"),
which: tuple[str] | list[str] = ("FCW", "HCW", "WCW"),
normalize_orientations: bool = True,
) -> list:
"""Determines if the water cluster is conserved and of what type.
Expand Down Expand Up @@ -66,13 +66,13 @@ def hydrogen_orientation_analysis(
kmeans inertia (measure of spread of data in a cluster).
Defaults to 0.4.
FCW_angdiff_cutoff (float, optional): Maximum value of angle (in
deg) allowed for FCW in OPTICS/HDBSCAN clustering to be
deg) allowed for FCW in OPTICS clustering to be
considered correct water angle. Defaults to 5.
FCW_angstd_cutoff (float, optional): Maximal standard deviation
of angle distribution of orientations of two hydrogens
allowed for water to be considered FCW. Defaults to 17.
min_samp_data_size_pct (float, optional): Minimum samples to
choose for OPTICS or HDBSCAN clustering as percentage of
choose for OPTICS clustering as percentage of
number of water molecules considered for HCW and WCW.
Defaults to 0.15.
nonFCW_angdiff_cutoff (float, optional): Maximum standard
Expand Down Expand Up @@ -229,7 +229,7 @@ def find_fully_conserved_orientations(
kmeans inertia (measure of spread of data in a cluster).
Defaults to 0.4.
angdiff_cutoff (float, optional): Maximum value of angle (in
deg) allowed for FCW in OPTICS/HDBSCAN clustering to be
deg) allowed for FCW in OPTICS clustering to be
considered correct water angle. Defaults to 5.
angstd_cutoff (float, optional): Maximal standard deviation
of angle distribution of orientations of two hydrogens
Expand Down Expand Up @@ -431,7 +431,7 @@ def find_half_conserved_orientations(
pct_size_buffer (float, optional): Minimum allowed size of the
hydrogen orientation cluster. Defaults to 0.85.
min_samp_data_size_pct (float, optional): Minimum samples to
choose for OPTICS or HDBSCAN clustering as percentage of
choose for OPTICS clustering as percentage of
number of water molecules considered for HCW and WCW.
Defaults to 0.15.
angdiff_cutoff (float, optional): Maximum standard
Expand Down Expand Up @@ -585,7 +585,7 @@ def find_weakly_conserved_orientations(
lower_bound_pct_buffer (float, optional): Minimum allowed size of the
hydrogen orientation cluster. Defaults to 0.35.
min_samp_data_size_pct (float, optional): Minimum samples to
choose for OPTICS or HDBSCAN clustering as percentage of
choose for OPTICS clustering as percentage of
number of water molecules considered for HCW and WCW.
Defaults to 0.15.
pct_explained (float, optional): percentage of explained
Expand Down
91 changes: 54 additions & 37 deletions ConservedWaterSearch/water_clustering.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,10 +65,19 @@ def __init__(
self,
nsnaps: int,
clustering_algorithm: str = "OPTICS",
water_types_to_find: list[str] | None = None,
water_types_to_find: tuple[str] | list[str] = ("FCW", "HCW", "WCW"),
restart_after_found: bool = False,
min_samples: list[int] | None = None,
xis: list[float] | None = None,
xis: tuple[float] | list[float] = (
0.1,
0.05,
0.01,
0.005,
0.001,
0.0005,
0.0001,
1e-05,
),
numbpct_oxygen: float = 0.8,
normalize_orientations: bool = True,
numbpct_hyd_orient_analysis: float = 0.85,
Expand All @@ -81,9 +90,9 @@ def __init__(
HCW_angstd_cutoff: float = 17,
WCW_angstd_cutoff: float = 20,
weakly_explained: float = 0.7,
xiFCW: list[float] | None = None,
xiHCW: list[float] | None = None,
xiWCW: list[float] | None = None,
xiFCW: tuple[float] | list[float] = (0.03,),
xiHCW: tuple[float] | list[float] = (0.05, 0.01),
xiWCW: tuple[float] | list[float] = (0.05, 0.001),
njobs: int = 1,
verbose: int = 0,
debugO: int = 0,
Expand All @@ -104,24 +113,24 @@ def __init__(
clustering_algorithm (str, optional): Options are "OPTICS"
or "HDBSCAN". OPTICS provides slightly better results,
but is also slightly slower. Defaults to "OPTICS".
water_types_to_find (list[str], optional): Defines which
water_types_to_find (tuple[str], optional): Defines which
water types to search for. Any combination of "FCW",
"HWC" and "WCW" is allowed, or "onlyO" for oxygen
clustering only. Defaults to ["FCW", "HCW", "WCW"].
clustering only. Defaults to ("FCW", "HCW", "WCW").
restart_after_found (bool, optional): If ``True`` restarts
clustering after each water is found. ``False`` will
give the quick version of multi-stage reculstering
give the quick version of multi-stage reclustering
approach. Defaults to False.
min_samples (list[int], optional): List of minimum samples
for OPTICS or HDBSCAN. If ``None`` following range is
used ``[int(0.25 * nsnaps), nsnaps]`` is used. For single
clustering users should provide a single integer between
0 and ``nsnaps`` in a list. Defaults to None.
xis (list[float], optional): List of xis for OPTICS
xis (tuple[float], optional): List or tuple of xis for OPTICS
clustering. This is ignored for HDBSCAN. Defaults to
[ 0.1, 0.05, 0.01, 0.005, 0.001, 0.0005, 0.0001,
0.00001]. For single clustering users should provide a
single float between 0 and 1 in a list.
(0.1, 0.05, 0.01, 0.005, 0.001, 0.0005, 0.0001,
0.00001). For single clustering, users should provide a
single float between 0 and 1 in a list/tuple.
numbpct_oxygen (float, optional): Percentage of
``nsnaps`` required for oxygen cluster to be considered
valid and water conserved. The check is enforced on
Expand Down Expand Up @@ -161,15 +170,15 @@ def __init__(
weakly_explained (float, optional): percentage of explained
hydrogen orientations for water to be considered WCW.
Defaults to 0.7.
xiFCW (list, optional): Xi value for OPTICS clustering for
FCW. Don't touch this unless you know what you are
doing. Defaults to [0.03].
xiHCW (list, optional): Xi value for OPTICS clustering for
HCW. Don't touch this unless you know what you are doing.
Defaults to [0.05, 0.01].
xiWCW (list, optional): Xi value for OPTICS clustering for
WCW. Don't touch this unless you know what you are doing.
Defaults to [0.05, 0.001].
xiFCW (tuple[float], optional): Xi value for hydrogen clustering of
FCWs for OPTICS algorithm. Avoid changing the defaults if
possible. Defaults to (0.03,).
xiHCW (tuple[float], optional): Xi value for OPTICS clustering for
HCW. Avoid changing the defaults if possible.
Defaults to (0.05, 0.01).
xiWCW (tuple[float], optional): Xi value for OPTICS clustering for
WCW. Avoid changing the defaults if possible.
Defaults to (0.05, 0.001).
njobs (int, optional): how many cpu cores to use for clustering.
Defaults to 1.
verbose (int, optional): verbosity of output. Defaults to 0.
Expand All @@ -191,16 +200,21 @@ def __init__(
``output_file`` have to be provided for clustering
restarting. Defaults to None.
"""
if xiWCW is None:
xiWCW = [0.05, 0.001]
if xiHCW is None:
xiHCW = [0.05, 0.01]
if xiFCW is None:
xiFCW = [0.03]
if water_types_to_find is None:
water_types_to_find = ["FCW", "HCW", "WCW"]
if xis is None:
xis = [0.1, 0.05, 0.01, 0.005, 0.001, 0.0005, 0.0001, 1e-05]
if not isinstance(water_types_to_find, (tuple, list)):
if isinstance(water_types_to_find, str):
water_types_to_find = (water_types_to_find,)
if not isinstance(xis, (tuple, list)):
if isinstance(xis, float):
xis = tuple(xis)
if not isinstance(xiFCW, (tuple, list)):
if isinstance(xiFCW, float):
xiFCW = tuple(xiFCW)
if not isinstance(xiHCW, (tuple, list)):
if isinstance(xiHCW, float):
xiHCW = tuple(xiHCW)
if not isinstance(xiWCW, (tuple, list)):
if isinstance(xiWCW, float):
xiWCW = tuple(xiWCW)
if nsnaps <= 0:
msg = f"nsnaps must be positive {nsnaps}"
raise Exception(msg)
Expand Down Expand Up @@ -514,10 +528,10 @@ def read_and_set_water_clust_options(self, file_name: str) -> None:
lines: list[str] = f.read().splitlines()
self.nsnaps = int(lines[0].strip())
self.clustering_algorithm = lines[1].strip(" ")
self.water_types_to_find = [i for i in lines[2].split(" ")]
self.water_types_to_find = tuple([i for i in lines[2].split(" ")])
self.restart_after_find = lines[3] == "True"
self.min_samples = [int(i) for i in lines[4].split(" ")]
self.xis = [float(i) for i in lines[5].split(" ")]
self.xis = tuple([float(i) for i in lines[5].split(" ")])
self.numbpct_oxygen = float(lines[6])
self.normalize_orientations = lines[7] == "True"
self.numbpct_hyd_orient_analysis = float(lines[8])
Expand All @@ -530,9 +544,9 @@ def read_and_set_water_clust_options(self, file_name: str) -> None:
self.halfcon_angstd_cutoff = float(lines[15])
self.weakly_angstd_cutoff = float(lines[16])
self.weakly_explained = float(lines[17])
self.xiFCW = [float(i) for i in lines[18].split(" ")]
self.xiHCW = [float(i) for i in lines[19].split(" ")]
self.xiWCW = [float(i) for i in lines[20].split(" ")]
self.xiFCW = tuple([float(i) for i in lines[18].split(" ")])
self.xiHCW = tuple([float(i) for i in lines[19].split(" ")])
self.xiWCW = tuple([float(i) for i in lines[20].split(" ")])
self.njobs = int(lines[21])
self.verbose = int(lines[22])
self.debugO = int(lines[23])
Expand Down Expand Up @@ -1056,7 +1070,10 @@ def _check_cls_alg_and_whichH(self):
raise Exception(msg)
for i in self.water_types_to_find:
if i not in ["FCW", "HCW", "WCW", "onlyO"]:
msg = "whichH supports onlyO or any combination of FCW, HCW and WCW"
msg = (
"whichH supports onlyO or any combination of FCW, HCW and WCW"
f" given option is invalid {i}"
)
raise Exception(msg)
if "onlyO" in self.water_types_to_find and len(self.water_types_to_find) > 1:
msg = "onlyO cannot be used with other water types"
Expand Down
1 change: 0 additions & 1 deletion docs/source/citing.rst
Original file line number Diff line number Diff line change
Expand Up @@ -23,4 +23,3 @@ For citations, the following BibTeX entry can be used:
doi = {10.1021/acs.jcim.2c00801},
URL = {https://doi.org/10.1021/acs.jcim.2c00801},
}
2 changes: 1 addition & 1 deletion tests/test_water_clustering.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ def test_save_clustering_options():

def test_create_from_file():
ca = "OPTICS"
whichH = ["onlyO"]
whichH = ("onlyO")
wc = WaterClustering(10, clustering_algorithm=ca, water_types_to_find=whichH)
with tempfile.NamedTemporaryFile(mode="w+", delete=True) as f:
wc._save_clustering_options(f.name)
Expand Down

0 comments on commit b3ef0bc

Please sign in to comment.