Added multiple species support and documentation

smrfeld · Jun 20, 2019 · 23c6f48 · 23c6f48
1 parent 1c9a50c
commit 23c6f48
Show file tree

Hide file tree

Showing 15 changed files with 568 additions and 17 deletions.
diff --git a/README.md b/README.md
@@ -44,6 +44,12 @@ A key way this library reduces computational cost is by introducing a cutoff for
 
 In general, we avoid all use of for loops, and rely extensively on array operations using numpy.
 
+### Multiple species
+
+Multiple species are also supported, where we have multiple species but want to draw two particles of the **same species** (two particles of any species can be done by simply ignoring the species labels).
+
+Specifically, the classes `ProbCalculatorMultiSpecies` and `SamplerMultiSpecies` implement this.
+
 ## Examples
 
 See the [examples](examples) folder.
diff --git a/examples/README.md b/examples/README.md
@@ -67,3 +67,17 @@ The module supports points in any spatial dimension, although samples are more d
 For each particle, the average distance to the neighbor it was drawn with. At the outer edge, particles are not drawn because the chosen std. dev. (=1.0) is too small (for these particle, no other particle is close enough to be drawn). At the center, where many draws occur (see previous figure), the length scale is related to the chosen std. dev. of the Gaussian. As we move closer to the edge, the average distance between sampled pairs increases (although the count of draws for these particles decreases as shown in the previous figure) because particles become more sparse.
 
 <img src="figures/sample_2d_ave_dist.png" width="300">
+
+## Multiple species
+
+Drawing two particles of the **same species** from a collection of multiple species is also supported.
+
+A simple example is `simple_multispecies.py`, which should be the self-explanatory generalization of `simple.py`.
+
+A further example is `multiple_1d_multispecies.py`, where two populations `A`,`B` exist.
+
+<img src="figures/multiple_1d_multispecies_A.png" width="300">
+<img src="figures/multiple_1d_multispecies_B.png" width="300">
+<img src="figures/multiple_1d_multispecies.png" width="300">
+
+where `A` is in red and `B` in blue.
diff --git a/examples/figures/multiple_1d_multispecies.png b/examples/figures/multiple_1d_multispecies.png
diff --git a/examples/figures/multiple_1d_multispecies_A.png b/examples/figures/multiple_1d_multispecies_A.png
diff --git a/examples/figures/multiple_1d_multispecies_B.png b/examples/figures/multiple_1d_multispecies_B.png
diff --git a/examples/histograms_1d.py b/examples/histograms_1d.py
@@ -46,16 +46,16 @@ def get_samples(no_samples, std_dev, std_dev_clip_mult):
         prob_calculator.std_dev_clip_mult = std_dev_clip_mult
 
         # For efficiency, just compute the first particle probability now
-        prob_calculator.compute_probs_first_particle()
-        compute_probs_first_particle = False
+        prob_calculator.compute_un_probs_first_particle()
+        compute_un_probs_first_particle = False
 
         no_tries_max = 100
         idxs_1 = []
         idxs_2 = []
         for i in range(0,no_samples):
 
             # Sample using rejection sampling
-            success = sampler.rejection_sample_pair(no_tries_max,compute_probs_first_particle)
+            success = sampler.rejection_sample_pair(no_tries_max,compute_un_probs_first_particle)
             if not success:
                 handle_fail()
 

diff --git a/examples/multiple_1d.py b/examples/multiple_1d.py
@@ -47,8 +47,8 @@ def handle_fail():
         sys.exit(0)
 
     # For efficiency, just compute the first particle probability now
-    prob_calculator.compute_probs_first_particle()
-    compute_probs_first_particle = False
+    prob_calculator.compute_un_probs_first_particle()
+    compute_un_probs_first_particle = False
 
     no_samples = 1000
     no_tries_max = 100
@@ -57,7 +57,7 @@ def handle_fail():
     for i in range(0,no_samples):
 
         # Sample using rejection sampling
-        success = sampler.rejection_sample_pair(no_tries_max,compute_probs_first_particle)
+        success = sampler.rejection_sample_pair(no_tries_max,compute_un_probs_first_particle)
         if not success:
             handle_fail()
 

diff --git a/examples/multiple_1d_multispecies.py b/examples/multiple_1d_multispecies.py
@@ -0,0 +1,111 @@
+# If installed:
+from samplePairsGaussian import *
+
+# Else: Add the path to the module
+# import sys
+# sys.path.append('../samplePairsGaussian/')
+# from sampler import *
+
+import sys
+import numpy as np
+import matplotlib.pyplot as plt
+
+if __name__ == "__main__":
+
+    # Uniformly spread particles from -L to L
+    L = 100
+
+    # Dimensionality (1D,2D,3D, etc. - all dims are supported, but only 1D will be possible to plot pairs and probability)
+    dim = 1
+
+    # Number of particles
+    N = 100
+
+    # Positions of two species
+    posns_A = (np.random.random(size=(N,dim))-0.5) * (2.0 * L)
+    posns_B = (np.random.random(size=(N,dim))-0.5) * (2.0 * L)
+
+    # Setup the sampler
+
+    # Cutoff counting probabilities for particles that are more than:
+    # std_dev_clip_mult * std_dev
+    # away from each-other
+    std_dev = 10.0
+    std_dev_clip_mult = 3.0
+
+    # Make the probability calculator
+    prob_calculator_A = ProbCalculator(posns_A,std_dev,std_dev_clip_mult)
+    prob_calculator_B = ProbCalculator(posns_B,std_dev,std_dev_clip_mult)
+    species_arr = ["A","B"]
+    prob_calculator = ProbCalculatorMultiSpecies([prob_calculator_A,prob_calculator_B],species_arr)
+
+    # Make the sampler
+    sampler = SamplerMultiSpecies(prob_calculator)
+    sampler.set_logging_level(logging.INFO)
+
+    # Sample many particle pairs
+
+    # Handle failure
+    def handle_fail():
+        print("Could not draw particle: try adjusting the std. dev. for the probability cutoff.")
+        sys.exit(0)
+
+    # For efficiency, just compute the first particle probability now
+    prob_calculator_A.compute_un_probs_first_particle()
+    prob_calculator_B.compute_un_probs_first_particle()
+    compute_un_probs_first_particle = False
+
+    # Also compute the species probs
+    prob_calculator.compute_species_probs()
+    compute_species_probs = False
+
+    no_samples = 1000
+    no_tries_max = 100
+    idxs_1 = {"A": [], "B": []}
+    idxs_2 = {"A": [], "B": []}
+    for i in range(0,no_samples):
+
+        # Sample using rejection sampling
+        success = sampler.rejection_sample_pair(no_tries_max,compute_species_probs,compute_un_probs_first_particle)
+        if not success:
+            handle_fail()
+
+        # Alternatively sample using CDF
+        # sampler.cdf_sample_first_particle()
+        # sampler.cdf_sample_second_particle()
+
+        # Append
+        idxs_1[sampler.species_particles].append(sampler.idx_first_particle)
+        idxs_2[sampler.species_particles].append(sampler.idx_second_particle)
+
+    idxs_1["A"] = np.array(idxs_1["A"]).astype(int)
+    idxs_1["B"] = np.array(idxs_1["B"]).astype(int)
+    idxs_2["A"] = np.array(idxs_2["A"]).astype(int)
+    idxs_2["B"] = np.array(idxs_2["B"]).astype(int)
+
+    # Plot
+
+    plt.figure()
+    plt.hist(posns_A)
+    plt.xlabel("particle position")
+    plt.title("Distribution of " + str(N) + " particle positions of species A")
+
+    plt.figure()
+    plt.hist(posns_B)
+    plt.xlabel("particle position")
+    plt.title("Distribution of " + str(N) + " particle positions of species B")
+
+    plt.figure()
+    # Make symmetric
+    idxs_1_symmetric = np.concatenate((idxs_1["A"],idxs_2["A"]))
+    idxs_2_symmetric = np.concatenate((idxs_2["A"],idxs_1["A"]))
+    plt.plot(posns_A[idxs_1_symmetric][:,0],posns_A[idxs_2_symmetric][:,0],'ro')
+    idxs_1_symmetric = np.concatenate((idxs_1["B"],idxs_2["B"]))
+    idxs_2_symmetric = np.concatenate((idxs_2["B"],idxs_1["B"]))
+    plt.plot(posns_B[idxs_1_symmetric][:,0],posns_B[idxs_2_symmetric][:,0],'bo')
+    plt.xlabel("position of particle #1")
+    plt.ylabel("position of particle #2")
+    plt.title(str(no_samples)+ " sampled pairs of particles")
+
+    # Show
+    plt.show()
diff --git a/examples/sample_2d.py b/examples/sample_2d.py
@@ -44,8 +44,8 @@ def handle_fail(ith_particle):
         sys.exit(0)
 
     # For efficiency, just compute the first particle probability now
-    prob_calculator.compute_probs_first_particle()
-    compute_probs_first_particle = False
+    prob_calculator.compute_un_probs_first_particle()
+    compute_un_probs_first_particle = False
 
     no_samples = 10000
     no_tries_max = 100
@@ -54,7 +54,7 @@ def handle_fail(ith_particle):
     for i in range(0,no_samples):
 
         # Sample using rejection sampling
-        success = sampler.rejection_sample_pair(no_tries_max,compute_probs_first_particle)
+        success = sampler.rejection_sample_pair(no_tries_max,compute_un_probs_first_particle)
         if not success:
             handle_fail(i)
 

diff --git a/examples/simple.py b/examples/simple.py
@@ -35,7 +35,6 @@
 
     # Make the probability calculator
     prob_calculator = ProbCalculator(posns,std_dev,std_dev_clip_mult)
-    # Distances have already been computed for us between all particles
 
     # Make the sampler
     sampler = Sampler(prob_calculator)

diff --git a/examples/simple_multispecies.py b/examples/simple_multispecies.py
@@ -0,0 +1,61 @@
+# If installed:
+from samplePairsGaussian import *
+
+# Else: Add the path to the module
+# import sys
+# sys.path.append('../samplePairsGaussian/')
+# from sampler import *
+
+import sys
+import numpy as np
+
+if __name__ == "__main__":
+
+    # Generate random positions of particles
+
+    # Uniformly spread particles from -L to L
+    L = 100
+
+    # Dimensionality (1D,2D,3D, etc. - all dims are supported, but only 1D will be possible to plot pairs and probability)
+    dim = 1
+
+    # Number of particles
+    N = 100
+
+    # Positions for two species
+    posns_A = (np.random.random(size=(N,dim))-0.5) * (2.0 * L)
+    posns_B = (np.random.random(size=(N,dim))-0.5) * (2.0 * L)
+
+    # Setup the sampler
+
+    # Cutoff counting probabilities for particles that are more than:
+    # std_dev_clip_mult * std_dev
+    # away from each-other
+    std_dev = 10.0
+    std_dev_clip_mult = 3.0
+
+    # Make the probability calculator
+    prob_calculator_A = ProbCalculator(posns_A,std_dev,std_dev_clip_mult)
+    prob_calculator_B = ProbCalculator(posns_B,std_dev,std_dev_clip_mult)
+    species_arr = ["A","B"]
+    prob_calculator = ProbCalculatorMultiSpecies([prob_calculator_A,prob_calculator_B],species_arr)
+
+    # Make the sampler
+    sampler = SamplerMultiSpecies(prob_calculator)
+    sampler.set_logging_level(logging.INFO)
+
+    # Handle failure
+    def handle_fail():
+        print("Could not draw particle: try adjusting the std. dev. for the probability cutoff.")
+        sys.exit(0)
+
+    # Sample pair using rejection sampling
+    no_tries_max = 100
+    success = sampler.rejection_sample_pair(no_tries_max)
+    if not success:
+        handle_fail()
+
+    # Sample pair using CDF
+    success = sampler.cdf_sample_pair()
+    if not success:
+        handle_fail()
diff --git a/samplePairsGaussian/__init__.py b/samplePairsGaussian/__init__.py
@@ -1,2 +1,4 @@
 from .prob_calculator import *
 from .sampler import *
+from .prob_calculator_multispecies import *
+from .sampler_multispecies import *
diff --git a/samplePairsGaussian/prob_calculator_multispecies.py b/samplePairsGaussian/prob_calculator_multispecies.py
@@ -0,0 +1,53 @@
+from .prob_calculator import *
+
+class ProbCalculatorMultiSpecies:
+
+    def __init__(self, prob_calculator_arr, species_arr):
+        """Constructor.
+
+        Args:
+        posns (np.array([[float]])): particle positions. First axis are particles, seconds are coordinates in n-dimensional space
+        std_dev (float): standard deviation for cutting off probabilities
+        std_dev_clip_mult (float): multiplier for the standard deviation cutoff
+        """
+
+        # Setup the logger
+        self._logger = logging.getLogger(__name__)
+        formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
+        ch = logging.StreamHandler()
+        ch.setLevel(logging.DEBUG)
+        ch.setFormatter(formatter)
+        self._logger.addHandler(ch)
+
+        # Level of logging to display
+        self._logger.setLevel(logging.ERROR)
+
+        # Length
+        if len(species_arr) != len(prob_calculator_arr):
+            self._logger.error("Error: length of species array = " + str(len(species_arr)) + " does not match the probabilities: " + str(len(prob_calculator_arr)) + "; Quitting.")
+            sys.exit(0)
+
+        # vars
+        self.no_species = len(species_arr)
+        self.prob_calculator_arr = prob_calculator_arr
+        self.species_arr = species_arr
+
+        # compute species probabilities
+        self.compute_species_probs()
+
+    def set_logging_level(self, level):
+        """Sets the logging level
+        Args:
+        level (logging): logging level
+        """
+        self._logger.setLevel(level)
+
+    def compute_species_probs(self):
+        """compute probabilities for the different species.
+        """
+
+        self.probs_species = np.zeros(self.no_species)
+        for i in range(0,self.no_species):
+            self.probs_species[i] = self.prob_calculator_arr[i].n
+        self.n_total = np.sum(self.probs_species)
+        self.probs_species /= self.n_total
diff --git a/samplePairsGaussian/sampler.py b/samplePairsGaussian/sampler.py
@@ -58,15 +58,15 @@ def rejection_sample_first_particle(self, no_tries_max=100, compute_probs=True):
 
         Args:
         no_tries_max (int): max. no. of tries for rejection sampling
-        compute_probs (bool): whether to first call compute_probs_first_particle for the ProbCalculator
+        compute_probs (bool): whether to first call compute_un_probs_first_particle for the ProbCalculator
 
         Returns:
         bool: True for success, False for failure
         """
 
         # Form probabilities
         if compute_probs:
-            self.prob_calculator.compute_probs_first_particle()
+            self.prob_calculator.compute_un_probs_first_particle()
 
         i_try = 0
         self.idx_first_particle = None
@@ -103,7 +103,7 @@ def rejection_sample_second_particle(self, no_tries_max=100, compute_probs=True)
 
         # Form probabilities
         if compute_probs:
-            self.prob_calculator.compute_probs_second_particle(self.idx_first_particle)
+            self.prob_calculator.compute_un_probs_second_particle(self.idx_first_particle)
 
         i_try = 0
         self.idx_second_particle = None
@@ -141,7 +141,7 @@ def rejection_sample_pair(self, no_tries_max=100, compute_probs_first_particle=T
         """
 
         if compute_probs_first_particle:
-            self.prob_calculator.compute_probs_first_particle()
+            self.prob_calculator.compute_un_probs_first_particle()
 
         # Turn off logging temp
         level = self._logger.level
@@ -186,7 +186,7 @@ def cdf_sample_first_particle(self,compute_probs=True):
 
         # Form probabilities
         if compute_probs:
-            self.prob_calculator.compute_probs_first_particle()
+            self.prob_calculator.compute_un_probs_first_particle()
 
         # Ensure normalized
         if self.prob_calculator.are_probs_first_particle_normalized == False:
@@ -214,7 +214,7 @@ def cdf_sample_second_particle(self,compute_probs=True):
 
         # Form probabilities
         if compute_probs:
-            self.prob_calculator.compute_probs_second_particle(self.idx_first_particle)
+            self.prob_calculator.compute_un_probs_second_particle(self.idx_first_particle)
 
         # Ensure normalized
         if self.prob_calculator.are_probs_second_particle_normalized == False:
@@ -240,7 +240,7 @@ def cdf_sample_pair(self, compute_probs_first_particle=True):
         """
 
         if compute_probs_first_particle:
-            self.prob_calculator.compute_probs_first_particle()
+            self.prob_calculator.compute_un_probs_first_particle()
 
         # Ensure normalized
         if self.prob_calculator.are_probs_first_particle_normalized == False: