Merge branch 'master' of https://github.com/tkchafin/resistnet

tkchafin · May 7, 2024 · befe6c6 · befe6c6
2 parents 4d16427 + 5542fb3
commit befe6c6
Show file tree

Hide file tree

Showing 16 changed files with 1,534 additions and 149 deletions.
diff --git a/conda-recipe/meta.yaml b/conda-recipe/meta.yaml
@@ -31,13 +31,13 @@ requirements:
   - momepy
   - networkx>3
   - pyogrio
-  - deap
+  - deap>=1.4
   - nlopt
   - r-base
   - r-tidyverse
   - rpy2
   - r-lme4
-  - r-matrix
+  - r-matrix >=1.6.2
   - r-mumin
   - mantel
 

diff --git a/environment.yml b/environment.yml
@@ -21,12 +21,13 @@ dependencies:
   - momepy
   - networkx>3
   - pyogrio
-  - deap
+  - deap>=1.4
   - nlopt
   - r-base
   - r-tidyverse
   - rpy2
-  - r-lme4
-  - r-matrix
+  - r-lme4>=1.1
+  - r-matrix>=1.6
   - r-mumin
   - mantel
+  - r-samc
diff --git a/scripts/ensembleResistnet.py b/scripts/ensembleResistnet.py
@@ -15,9 +15,6 @@
 
 
 def main():
-    """
-    Main function to run the resistnet application.
-    """
     # Step 0: Read/format input args
     params = ParseArgs()
 
@@ -40,6 +37,10 @@ def main():
         params.paths, ".HallOfFame.tsv", local_rows=local_rows
     )
 
+    hofs.replace('-', '0.0', inplace=True)
+    for col in hofs.columns:
+        hofs[col] = pd.to_numeric(hofs[col], errors='coerce')
+
     if (hofs['fitness'] == hofs['aic']).all():
         hofs["fitness"] = hofs["fitness"] * -1
     hofs["aic"] = hofs["aic"] * -1
@@ -48,7 +49,7 @@ def main():
     )
 
     if params.only_keep:
-        hofs = hofs[hofs['keep'] is True]
+        hofs = hofs[hofs['keep']]
     if params.hof_max is not None:
         hofs = hofs.head(params.hof_max)
 

diff --git a/scripts/runResistnet.py b/scripts/runResistnet.py
@@ -6,6 +6,7 @@
 from resistnet.params import parseArgs
 from resistnet.resistance_network import ResistanceNetwork
 from resistnet.model_optimisation import ModelRunner
+# from resistnet.samc_network import ResistanceNetworkSAMC
 
 
 def main():
@@ -21,6 +22,26 @@ def main():
     random.seed(params.seed)
 
     # Step 1: Initialise network data
+    # network = ResistanceNetworkSAMC(
+    #     network=params.network,
+    #     shapefile=params.shapefile,
+    #     sizes=params.sizefile,
+    #     coords=params.coords,
+    #     variables=params.variables,
+    #     agg_opts=params.agg_opts,
+    #     pop_agg=params.pop_agg,
+    #     inmat=params.inmat,
+    #     reachid_col=params.reachid_col,
+    #     length_col=params.length_col,
+    #     infer_origin=params.infer_origin,
+    #     origin=params.origin,
+    #     rtol=params.rtol,
+    #     solver=params.solver,
+    #     max_iter=params.max_iter,
+    #     max_fail=params.max_fail,
+    #     out=params.out,
+    #     verbose=True
+    # )
     network = ResistanceNetwork(
         network=params.network,
         shapefile=params.shapefile,
@@ -35,6 +56,9 @@ def main():
         verbose=True
     )
 
+    # write new output geodatabase
+    network.write_geodataframe(params.out, params.output_driver)
+
     # Step 2: Initialise ModelRunner
     runner = ModelRunner(
         resistance_network=network,

diff --git a/src/resistnet/CFPT.py b/src/resistnet/CFPT.py
@@ -0,0 +1,135 @@
+import warnings
+import numpy as np
+from scipy.sparse import diags, SparseEfficiencyWarning
+from scipy.sparse.linalg import spsolve, lgmres
+
+warnings.simplefilter('ignore', SparseEfficiencyWarning)
+
+
+def CFPT(Q, R, edge_site_indices, rtol=1e-5, max_iter=1000,
+         max_fail=1, solver="iterative", ):
+    N = len(edge_site_indices)
+    cfpt_matrix = np.zeros((N, N))
+    failure_count = 0
+
+    for i, dest in enumerate(edge_site_indices):
+        Q_temp = Q.copy().tolil()
+        absorption_factor = R[dest]
+
+        # old method that could potentially lead to negative values
+        # for j in range(Q.shape[1]):
+        #     if j != dest:  # Avoid altering the diagonal element
+        #         Q_temp[dest, j] *= (1 - absorption_factor)
+
+        # Proportionally reduce non-destination transitions to adjust for abs
+        non_dest_transitions = [j for j in range(Q.shape[1]) if j != dest]
+        total_non_dest_transition = sum(
+            Q_temp[dest, j] for j in non_dest_transitions)
+        if total_non_dest_transition > 0:  # Avoid division by zero
+            scale_factor = (
+                1 - absorption_factor * Q_temp[dest, dest]
+            ) / total_non_dest_transition
+            for j in non_dest_transitions:
+                Q_temp[dest, j] *= scale_factor
+
+        Q_temp = Q_temp.tocsr()
+
+        # Ensure diagonals make row sums to 1
+        Q_temp = _set_diags(Q_temp)
+
+        for j, orig in enumerate(edge_site_indices):
+            if orig != dest:
+                mask = np.ones(Q.shape[0], dtype=bool)
+                mask[dest] = False
+                Qj = Q_temp[mask, :][:, mask]
+                qj = Q_temp[mask, dest]
+                Qj.data *= -1
+                Qj.setdiag(Qj.diagonal() + 1)
+
+                # lgmres for iterative and sparselu/sparseqr if direct
+                try:
+                    if solver == "iterative":
+                        # trying with pre-conditioning to see if this improves
+                        # the non-convergence issue when Q is asymmetric
+                        # ilu = spilu(Qj.tocsc())
+                        # M_x = LinearOperator(
+                        # Qj.shape, lambda x: ilu.solve(x))
+                        solution, info = lgmres(Qj,
+                                                qj.toarray().flatten(),
+                                                maxiter=max_iter,
+                                                rtol=rtol,
+                                                )  # M=M_x)
+                        if info == 0:
+                            adjusted_index = np.where(mask)[0].tolist().index(
+                                orig)
+                            cfpt_matrix[j, i] = solution[adjusted_index]
+                        else:
+                            raise ValueError(
+                                f"Convergence failed {dest}-{orig}: {info}"
+                            )
+                    elif solver == "direct":
+                        solution = spsolve(Qj, qj.toarray().flatten())
+                        adjusted_index = np.where(mask)[0].tolist().index(orig)
+                        cfpt_matrix[j, i] = solution[adjusted_index]
+                except Exception:
+                    cfpt_matrix[j, i] = np.nan
+                    failure_count += 1
+                    if failure_count >= max_fail:
+                        return None
+    return cfpt_matrix
+
+
+def _set_diags(sm, offset=None):
+    """
+    Adjusts the diagonal elements of a sparse matrix to ensure that each
+    row sums to 1, optionally incorporating an offset subtraction from each
+    diagonal element.
+
+    This function modifies the input sparse matrix in-place.
+
+    Parameters:
+    - sm (csr_matrix): The sparse matrix whose diagonals are to be adjusted.
+    - offset (numpy.ndarray, optional): An array of values to subtract from
+    each diagonal. If `None`, no offset is subtracted.
+
+    Returns:
+    - None: The matrix `sm` is modified in-place.
+    """
+    row_sums = sm.sum(axis=1).A1
+    d = 1 - row_sums
+    if offset is not None:
+        d -= offset
+    dm = diags(d, 0, shape=sm.shape, format='csr')
+    sm += dm
+    _validate_row_sums(sm)
+    return sm
+
+
+def _validate_row_sums(sm):
+    """
+    Validates that each row of a sparse matrix sums to 1.
+
+    Parameters:
+    - sm (csr_matrix): The sparse matrix to validate.
+
+    Raises:
+    - ValueError: If any row sum does not equal 1.
+    """
+    row_sums_after = sm.sum(axis=1).A1
+    if not np.allclose(row_sums_after, 1):
+        raise ValueError("Row sums do not sum to 1.")
+
+
+def _opt_prep(sm):
+    """
+    Prepares a scipy.sparse matrix for optimization by negating non-zero
+    elements and incrementing diagonal elements by 1, in-place.
+
+    Parameters:
+    - sm (scipy.sparse matrix): The sparse matrix to be modified in-place.
+
+    Returns:
+    - None
+    """
+    sm.data = -sm.data
+    sm.setdiag(sm.diagonal() + 1)
diff --git a/src/resistnet/data/test.gpkg b/src/resistnet/data/test.gpkg
diff --git a/src/resistnet/data/test.popSizes.txt b/src/resistnet/data/test.popSizes.txt
@@ -0,0 +1,38 @@
+sample	size
+burk	100
+cdikc	100
+dakp	100
+digl	100
+dikc	100
+dima	100
+dolk	100
+dort	100
+dusa	100
+dusb	100
+hara	100
+kali	100
+kame	100
+karo	100
+kiri	100
+labr	100
+mapt	100
+mart	100
+mori	100
+nyrb	100
+nyrc	100
+pach	100
+pipp	100
+raid	100
+rich	100
+rind	100
+ring	100
+rong	100
+sher	100
+sing	100
+thew	100
+thri	100
+tsri	100
+unar	100
+yong	100
+yued	100
+zarp	100
diff --git a/src/resistnet/hall_of_fame.py b/src/resistnet/hall_of_fame.py
@@ -43,7 +43,9 @@ def __init__(self, variables, max_size, init_pop=None):
         """
         cols = ["fitness"]
         for v in variables:
-            cols.extend([str(v), f"{v}_weight", f"{v}_trans", f"{v}_shape"])
+            cols.extend([
+                str(v), f"{v}_weight", f"{v}_trans", f"{v}_shape", f"{v}_asym"
+            ])
         cols.extend(["loglik", "r2m", "aic", "delta_aic_null"])
         self.data = pd.DataFrame(columns=cols)
         self.variables = variables
@@ -70,9 +72,9 @@ def check_population(self, pop):
             pop (list): A list of models to be considered for inclusion in the
                         hall of fame.
         """
+        # only consider models w/ non neg-inf fitnesses
         popDF = pd.DataFrame(pop, columns=self.data.columns)
         popDF = popDF[popDF.fitness > float("-inf")]
-
         if popDF.shape[0] < 1:
             return
 
@@ -130,6 +132,9 @@ def custom_drop(self):
             self.data[f"{v_str}_shape"] = (
                 self.data[v_str] * self.data[f"{v_str}_shape"]
             )
+            self.data[f"{v_str}_asym"] = (
+                self.data[v_str] * self.data[f"{v_str}_asym"]
+            )
             temp = self.data[f"{v_str}_trans"]
             temp[temp > 1] = 1
             self.data[f"{v_str}_shape"] = self.data[v_str] * temp
@@ -369,6 +374,7 @@ def get_best_model(self):
                 "Weight": best_model[f"{var}_weight"],
                 "Shape": best_model[f"{var}_shape"],
                 "Transformation": best_model[f"{var}_trans"],
+                "Asymmetry": best_model[f"{var}_asym"],
             }
             for var in self.variables
         ]
@@ -426,8 +432,8 @@ def cleanHOF(self):
 
         for v in self.variables:
             mask = ret[v] == 0
-            for attr in ["_weight", "_trans", "_shape"]:
-                ret[f"{v}{attr}"][mask] = np.nan
+            for attr in ["_weight", "_trans", "_shape", "_asym"]:
+                ret.loc[mask, f"{v}{attr}"] = np.nan
 
         if ret.iloc[0]["fitness"] == (ret.iloc[0]["aic"] * -1):
             ret["fitness"] = ret["fitness"] * -1
@@ -694,7 +700,7 @@ def from_dataframe(cls, df, max_size=None):
                         the DataFrame.
         """
         # Extract variable names from the DataFrame column names
-        variable_prefixes = ["_weight", "_trans", "_shape"]
+        variable_prefixes = ["_weight", "_trans", "_shape", "_asym"]
         variables = set()
         for col in df.columns:
             for prefix in variable_prefixes: