diff --git a/ConservedWaterSearch/hydrogen_orientation.py b/ConservedWaterSearch/hydrogen_orientation.py index a4bfd97..0fad4e4 100644 --- a/ConservedWaterSearch/hydrogen_orientation.py +++ b/ConservedWaterSearch/hydrogen_orientation.py @@ -25,14 +25,14 @@ def hydrogen_orientation_analysis( HCW_angstd_cutoff: float = 17, WCW_angstd_cutoff: float = 20, weakly_explained: float = 0.7, - xiFCW: tuple[float]|list[float] = (0.03,), - xiHCW: tuple[float]|list[float] = (0.05, 0.01), - xiWCW: tuple[float]|list[float] = (0.05, 0.001), + xiFCW: tuple[float] | list[float] = (0.03,), + xiHCW: tuple[float] | list[float] = (0.05, 0.01), + xiWCW: tuple[float] | list[float] = (0.05, 0.001), njobs: int = 1, verbose: int = 0, debugH: int = 0, plotreach: bool = False, - which: tuple[str]|list[str] = ("FCW", "HCW", "WCW"), + which: tuple[str] | list[str] = ("FCW", "HCW", "WCW"), normalize_orientations: bool = True, ) -> list: """Determines if the water cluster is conserved and of what type. @@ -66,13 +66,13 @@ def hydrogen_orientation_analysis( kmeans inertia (measure of spread of data in a cluster). Defaults to 0.4. FCW_angdiff_cutoff (float, optional): Maximum value of angle (in - deg) allowed for FCW in OPTICS/HDBSCAN clustering to be + deg) allowed for FCW in OPTICS clustering to be considered correct water angle. Defaults to 5. FCW_angstd_cutoff (float, optional): Maximal standard deviation of angle distribution of orientations of two hydrogens allowed for water to be considered FCW. Defaults to 17. min_samp_data_size_pct (float, optional): Minimum samples to - choose for OPTICS or HDBSCAN clustering as percentage of + choose for OPTICS clustering as percentage of number of water molecules considered for HCW and WCW. Defaults to 0.15. nonFCW_angdiff_cutoff (float, optional): Maximum standard @@ -229,7 +229,7 @@ def find_fully_conserved_orientations( kmeans inertia (measure of spread of data in a cluster). Defaults to 0.4. angdiff_cutoff (float, optional): Maximum value of angle (in - deg) allowed for FCW in OPTICS/HDBSCAN clustering to be + deg) allowed for FCW in OPTICS clustering to be considered correct water angle. Defaults to 5. angstd_cutoff (float, optional): Maximal standard deviation of angle distribution of orientations of two hydrogens @@ -431,7 +431,7 @@ def find_half_conserved_orientations( pct_size_buffer (float, optional): Minimum allowed size of the hydrogen orientation cluster. Defaults to 0.85. min_samp_data_size_pct (float, optional): Minimum samples to - choose for OPTICS or HDBSCAN clustering as percentage of + choose for OPTICS clustering as percentage of number of water molecules considered for HCW and WCW. Defaults to 0.15. angdiff_cutoff (float, optional): Maximum standard @@ -585,7 +585,7 @@ def find_weakly_conserved_orientations( lower_bound_pct_buffer (float, optional): Minimum allowed size of the hydrogen orientation cluster. Defaults to 0.35. min_samp_data_size_pct (float, optional): Minimum samples to - choose for OPTICS or HDBSCAN clustering as percentage of + choose for OPTICS clustering as percentage of number of water molecules considered for HCW and WCW. Defaults to 0.15. pct_explained (float, optional): percentage of explained diff --git a/ConservedWaterSearch/water_clustering.py b/ConservedWaterSearch/water_clustering.py index 785f875..9c9f10d 100644 --- a/ConservedWaterSearch/water_clustering.py +++ b/ConservedWaterSearch/water_clustering.py @@ -65,10 +65,19 @@ def __init__( self, nsnaps: int, clustering_algorithm: str = "OPTICS", - water_types_to_find: list[str] | None = None, + water_types_to_find: tuple[str] | list[str] = ("FCW", "HCW", "WCW"), restart_after_found: bool = False, min_samples: list[int] | None = None, - xis: list[float] | None = None, + xis: tuple[float] | list[float] = ( + 0.1, + 0.05, + 0.01, + 0.005, + 0.001, + 0.0005, + 0.0001, + 1e-05, + ), numbpct_oxygen: float = 0.8, normalize_orientations: bool = True, numbpct_hyd_orient_analysis: float = 0.85, @@ -81,9 +90,9 @@ def __init__( HCW_angstd_cutoff: float = 17, WCW_angstd_cutoff: float = 20, weakly_explained: float = 0.7, - xiFCW: list[float] | None = None, - xiHCW: list[float] | None = None, - xiWCW: list[float] | None = None, + xiFCW: tuple[float] | list[float] = (0.03,), + xiHCW: tuple[float] | list[float] = (0.05, 0.01), + xiWCW: tuple[float] | list[float] = (0.05, 0.001), njobs: int = 1, verbose: int = 0, debugO: int = 0, @@ -104,24 +113,24 @@ def __init__( clustering_algorithm (str, optional): Options are "OPTICS" or "HDBSCAN". OPTICS provides slightly better results, but is also slightly slower. Defaults to "OPTICS". - water_types_to_find (list[str], optional): Defines which + water_types_to_find (tuple[str], optional): Defines which water types to search for. Any combination of "FCW", "HWC" and "WCW" is allowed, or "onlyO" for oxygen - clustering only. Defaults to ["FCW", "HCW", "WCW"]. + clustering only. Defaults to ("FCW", "HCW", "WCW"). restart_after_found (bool, optional): If ``True`` restarts clustering after each water is found. ``False`` will - give the quick version of multi-stage reculstering + give the quick version of multi-stage reclustering approach. Defaults to False. min_samples (list[int], optional): List of minimum samples for OPTICS or HDBSCAN. If ``None`` following range is used ``[int(0.25 * nsnaps), nsnaps]`` is used. For single clustering users should provide a single integer between 0 and ``nsnaps`` in a list. Defaults to None. - xis (list[float], optional): List of xis for OPTICS + xis (tuple[float], optional): List or tuple of xis for OPTICS clustering. This is ignored for HDBSCAN. Defaults to - [ 0.1, 0.05, 0.01, 0.005, 0.001, 0.0005, 0.0001, - 0.00001]. For single clustering users should provide a - single float between 0 and 1 in a list. + (0.1, 0.05, 0.01, 0.005, 0.001, 0.0005, 0.0001, + 0.00001). For single clustering, users should provide a + single float between 0 and 1 in a list/tuple. numbpct_oxygen (float, optional): Percentage of ``nsnaps`` required for oxygen cluster to be considered valid and water conserved. The check is enforced on @@ -161,15 +170,15 @@ def __init__( weakly_explained (float, optional): percentage of explained hydrogen orientations for water to be considered WCW. Defaults to 0.7. - xiFCW (list, optional): Xi value for OPTICS clustering for - FCW. Don't touch this unless you know what you are - doing. Defaults to [0.03]. - xiHCW (list, optional): Xi value for OPTICS clustering for - HCW. Don't touch this unless you know what you are doing. - Defaults to [0.05, 0.01]. - xiWCW (list, optional): Xi value for OPTICS clustering for - WCW. Don't touch this unless you know what you are doing. - Defaults to [0.05, 0.001]. + xiFCW (tuple[float], optional): Xi value for hydrogen clustering of + FCWs for OPTICS algorithm. Avoid changing the defaults if + possible. Defaults to (0.03,). + xiHCW (tuple[float], optional): Xi value for OPTICS clustering for + HCW. Avoid changing the defaults if possible. + Defaults to (0.05, 0.01). + xiWCW (tuple[float], optional): Xi value for OPTICS clustering for + WCW. Avoid changing the defaults if possible. + Defaults to (0.05, 0.001). njobs (int, optional): how many cpu cores to use for clustering. Defaults to 1. verbose (int, optional): verbosity of output. Defaults to 0. @@ -191,16 +200,21 @@ def __init__( ``output_file`` have to be provided for clustering restarting. Defaults to None. """ - if xiWCW is None: - xiWCW = [0.05, 0.001] - if xiHCW is None: - xiHCW = [0.05, 0.01] - if xiFCW is None: - xiFCW = [0.03] - if water_types_to_find is None: - water_types_to_find = ["FCW", "HCW", "WCW"] - if xis is None: - xis = [0.1, 0.05, 0.01, 0.005, 0.001, 0.0005, 0.0001, 1e-05] + if not isinstance(water_types_to_find, (tuple, list)): + if isinstance(water_types_to_find, str): + water_types_to_find = (water_types_to_find,) + if not isinstance(xis, (tuple, list)): + if isinstance(xis, float): + xis = tuple(xis) + if not isinstance(xiFCW, (tuple, list)): + if isinstance(xiFCW, float): + xiFCW = tuple(xiFCW) + if not isinstance(xiHCW, (tuple, list)): + if isinstance(xiHCW, float): + xiHCW = tuple(xiHCW) + if not isinstance(xiWCW, (tuple, list)): + if isinstance(xiWCW, float): + xiWCW = tuple(xiWCW) if nsnaps <= 0: msg = f"nsnaps must be positive {nsnaps}" raise Exception(msg) @@ -514,10 +528,10 @@ def read_and_set_water_clust_options(self, file_name: str) -> None: lines: list[str] = f.read().splitlines() self.nsnaps = int(lines[0].strip()) self.clustering_algorithm = lines[1].strip(" ") - self.water_types_to_find = [i for i in lines[2].split(" ")] + self.water_types_to_find = tuple([i for i in lines[2].split(" ")]) self.restart_after_find = lines[3] == "True" self.min_samples = [int(i) for i in lines[4].split(" ")] - self.xis = [float(i) for i in lines[5].split(" ")] + self.xis = tuple([float(i) for i in lines[5].split(" ")]) self.numbpct_oxygen = float(lines[6]) self.normalize_orientations = lines[7] == "True" self.numbpct_hyd_orient_analysis = float(lines[8]) @@ -530,9 +544,9 @@ def read_and_set_water_clust_options(self, file_name: str) -> None: self.halfcon_angstd_cutoff = float(lines[15]) self.weakly_angstd_cutoff = float(lines[16]) self.weakly_explained = float(lines[17]) - self.xiFCW = [float(i) for i in lines[18].split(" ")] - self.xiHCW = [float(i) for i in lines[19].split(" ")] - self.xiWCW = [float(i) for i in lines[20].split(" ")] + self.xiFCW = tuple([float(i) for i in lines[18].split(" ")]) + self.xiHCW = tuple([float(i) for i in lines[19].split(" ")]) + self.xiWCW = tuple([float(i) for i in lines[20].split(" ")]) self.njobs = int(lines[21]) self.verbose = int(lines[22]) self.debugO = int(lines[23]) @@ -1056,7 +1070,10 @@ def _check_cls_alg_and_whichH(self): raise Exception(msg) for i in self.water_types_to_find: if i not in ["FCW", "HCW", "WCW", "onlyO"]: - msg = "whichH supports onlyO or any combination of FCW, HCW and WCW" + msg = ( + "whichH supports onlyO or any combination of FCW, HCW and WCW" + f" given option is invalid {i}" + ) raise Exception(msg) if "onlyO" in self.water_types_to_find and len(self.water_types_to_find) > 1: msg = "onlyO cannot be used with other water types" diff --git a/docs/source/citing.rst b/docs/source/citing.rst index f024568..1fe7df6 100644 --- a/docs/source/citing.rst +++ b/docs/source/citing.rst @@ -23,4 +23,3 @@ For citations, the following BibTeX entry can be used: doi = {10.1021/acs.jcim.2c00801}, URL = {https://doi.org/10.1021/acs.jcim.2c00801}, } - diff --git a/tests/test_water_clustering.py b/tests/test_water_clustering.py index fec6d40..e422b0b 100644 --- a/tests/test_water_clustering.py +++ b/tests/test_water_clustering.py @@ -128,7 +128,7 @@ def test_save_clustering_options(): def test_create_from_file(): ca = "OPTICS" - whichH = ["onlyO"] + whichH = ("onlyO") wc = WaterClustering(10, clustering_algorithm=ca, water_types_to_find=whichH) with tempfile.NamedTemporaryFile(mode="w+", delete=True) as f: wc._save_clustering_options(f.name)