Skip to content

Commit

Permalink
Added multiple species support and documentation
Browse files Browse the repository at this point in the history
  • Loading branch information
Oliver K. Ernst committed Jun 20, 2019
1 parent 1c9a50c commit 23c6f48
Show file tree
Hide file tree
Showing 15 changed files with 568 additions and 17 deletions.
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,12 @@ A key way this library reduces computational cost is by introducing a cutoff for

In general, we avoid all use of for loops, and rely extensively on array operations using numpy.

### Multiple species

Multiple species are also supported, where we have multiple species but want to draw two particles of the **same species** (two particles of any species can be done by simply ignoring the species labels).

Specifically, the classes `ProbCalculatorMultiSpecies` and `SamplerMultiSpecies` implement this.

## Examples

See the [examples](examples) folder.
14 changes: 14 additions & 0 deletions examples/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -67,3 +67,17 @@ The module supports points in any spatial dimension, although samples are more d
For each particle, the average distance to the neighbor it was drawn with. At the outer edge, particles are not drawn because the chosen std. dev. (=1.0) is too small (for these particle, no other particle is close enough to be drawn). At the center, where many draws occur (see previous figure), the length scale is related to the chosen std. dev. of the Gaussian. As we move closer to the edge, the average distance between sampled pairs increases (although the count of draws for these particles decreases as shown in the previous figure) because particles become more sparse.

<img src="figures/sample_2d_ave_dist.png" width="300">

## Multiple species

Drawing two particles of the **same species** from a collection of multiple species is also supported.

A simple example is `simple_multispecies.py`, which should be the self-explanatory generalization of `simple.py`.

A further example is `multiple_1d_multispecies.py`, where two populations `A`,`B` exist.

<img src="figures/multiple_1d_multispecies_A.png" width="300">
<img src="figures/multiple_1d_multispecies_B.png" width="300">
<img src="figures/multiple_1d_multispecies.png" width="300">

where `A` is in red and `B` in blue.
Binary file added examples/figures/multiple_1d_multispecies.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added examples/figures/multiple_1d_multispecies_A.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added examples/figures/multiple_1d_multispecies_B.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
6 changes: 3 additions & 3 deletions examples/histograms_1d.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,16 +46,16 @@ def get_samples(no_samples, std_dev, std_dev_clip_mult):
prob_calculator.std_dev_clip_mult = std_dev_clip_mult

# For efficiency, just compute the first particle probability now
prob_calculator.compute_probs_first_particle()
compute_probs_first_particle = False
prob_calculator.compute_un_probs_first_particle()
compute_un_probs_first_particle = False

no_tries_max = 100
idxs_1 = []
idxs_2 = []
for i in range(0,no_samples):

# Sample using rejection sampling
success = sampler.rejection_sample_pair(no_tries_max,compute_probs_first_particle)
success = sampler.rejection_sample_pair(no_tries_max,compute_un_probs_first_particle)
if not success:
handle_fail()

Expand Down
6 changes: 3 additions & 3 deletions examples/multiple_1d.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,8 @@ def handle_fail():
sys.exit(0)

# For efficiency, just compute the first particle probability now
prob_calculator.compute_probs_first_particle()
compute_probs_first_particle = False
prob_calculator.compute_un_probs_first_particle()
compute_un_probs_first_particle = False

no_samples = 1000
no_tries_max = 100
Expand All @@ -57,7 +57,7 @@ def handle_fail():
for i in range(0,no_samples):

# Sample using rejection sampling
success = sampler.rejection_sample_pair(no_tries_max,compute_probs_first_particle)
success = sampler.rejection_sample_pair(no_tries_max,compute_un_probs_first_particle)
if not success:
handle_fail()

Expand Down
111 changes: 111 additions & 0 deletions examples/multiple_1d_multispecies.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
# If installed:
from samplePairsGaussian import *

# Else: Add the path to the module
# import sys
# sys.path.append('../samplePairsGaussian/')
# from sampler import *

import sys
import numpy as np
import matplotlib.pyplot as plt

if __name__ == "__main__":

# Uniformly spread particles from -L to L
L = 100

# Dimensionality (1D,2D,3D, etc. - all dims are supported, but only 1D will be possible to plot pairs and probability)
dim = 1

# Number of particles
N = 100

# Positions of two species
posns_A = (np.random.random(size=(N,dim))-0.5) * (2.0 * L)
posns_B = (np.random.random(size=(N,dim))-0.5) * (2.0 * L)

# Setup the sampler

# Cutoff counting probabilities for particles that are more than:
# std_dev_clip_mult * std_dev
# away from each-other
std_dev = 10.0
std_dev_clip_mult = 3.0

# Make the probability calculator
prob_calculator_A = ProbCalculator(posns_A,std_dev,std_dev_clip_mult)
prob_calculator_B = ProbCalculator(posns_B,std_dev,std_dev_clip_mult)
species_arr = ["A","B"]
prob_calculator = ProbCalculatorMultiSpecies([prob_calculator_A,prob_calculator_B],species_arr)

# Make the sampler
sampler = SamplerMultiSpecies(prob_calculator)
sampler.set_logging_level(logging.INFO)

# Sample many particle pairs

# Handle failure
def handle_fail():
print("Could not draw particle: try adjusting the std. dev. for the probability cutoff.")
sys.exit(0)

# For efficiency, just compute the first particle probability now
prob_calculator_A.compute_un_probs_first_particle()
prob_calculator_B.compute_un_probs_first_particle()
compute_un_probs_first_particle = False

# Also compute the species probs
prob_calculator.compute_species_probs()
compute_species_probs = False

no_samples = 1000
no_tries_max = 100
idxs_1 = {"A": [], "B": []}
idxs_2 = {"A": [], "B": []}
for i in range(0,no_samples):

# Sample using rejection sampling
success = sampler.rejection_sample_pair(no_tries_max,compute_species_probs,compute_un_probs_first_particle)
if not success:
handle_fail()

# Alternatively sample using CDF
# sampler.cdf_sample_first_particle()
# sampler.cdf_sample_second_particle()

# Append
idxs_1[sampler.species_particles].append(sampler.idx_first_particle)
idxs_2[sampler.species_particles].append(sampler.idx_second_particle)

idxs_1["A"] = np.array(idxs_1["A"]).astype(int)
idxs_1["B"] = np.array(idxs_1["B"]).astype(int)
idxs_2["A"] = np.array(idxs_2["A"]).astype(int)
idxs_2["B"] = np.array(idxs_2["B"]).astype(int)

# Plot

plt.figure()
plt.hist(posns_A)
plt.xlabel("particle position")
plt.title("Distribution of " + str(N) + " particle positions of species A")

plt.figure()
plt.hist(posns_B)
plt.xlabel("particle position")
plt.title("Distribution of " + str(N) + " particle positions of species B")

plt.figure()
# Make symmetric
idxs_1_symmetric = np.concatenate((idxs_1["A"],idxs_2["A"]))
idxs_2_symmetric = np.concatenate((idxs_2["A"],idxs_1["A"]))
plt.plot(posns_A[idxs_1_symmetric][:,0],posns_A[idxs_2_symmetric][:,0],'ro')
idxs_1_symmetric = np.concatenate((idxs_1["B"],idxs_2["B"]))
idxs_2_symmetric = np.concatenate((idxs_2["B"],idxs_1["B"]))
plt.plot(posns_B[idxs_1_symmetric][:,0],posns_B[idxs_2_symmetric][:,0],'bo')
plt.xlabel("position of particle #1")
plt.ylabel("position of particle #2")
plt.title(str(no_samples)+ " sampled pairs of particles")

# Show
plt.show()
6 changes: 3 additions & 3 deletions examples/sample_2d.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,8 @@ def handle_fail(ith_particle):
sys.exit(0)

# For efficiency, just compute the first particle probability now
prob_calculator.compute_probs_first_particle()
compute_probs_first_particle = False
prob_calculator.compute_un_probs_first_particle()
compute_un_probs_first_particle = False

no_samples = 10000
no_tries_max = 100
Expand All @@ -54,7 +54,7 @@ def handle_fail(ith_particle):
for i in range(0,no_samples):

# Sample using rejection sampling
success = sampler.rejection_sample_pair(no_tries_max,compute_probs_first_particle)
success = sampler.rejection_sample_pair(no_tries_max,compute_un_probs_first_particle)
if not success:
handle_fail(i)

Expand Down
1 change: 0 additions & 1 deletion examples/simple.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@

# Make the probability calculator
prob_calculator = ProbCalculator(posns,std_dev,std_dev_clip_mult)
# Distances have already been computed for us between all particles

# Make the sampler
sampler = Sampler(prob_calculator)
Expand Down
61 changes: 61 additions & 0 deletions examples/simple_multispecies.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
# If installed:
from samplePairsGaussian import *

# Else: Add the path to the module
# import sys
# sys.path.append('../samplePairsGaussian/')
# from sampler import *

import sys
import numpy as np

if __name__ == "__main__":

# Generate random positions of particles

# Uniformly spread particles from -L to L
L = 100

# Dimensionality (1D,2D,3D, etc. - all dims are supported, but only 1D will be possible to plot pairs and probability)
dim = 1

# Number of particles
N = 100

# Positions for two species
posns_A = (np.random.random(size=(N,dim))-0.5) * (2.0 * L)
posns_B = (np.random.random(size=(N,dim))-0.5) * (2.0 * L)

# Setup the sampler

# Cutoff counting probabilities for particles that are more than:
# std_dev_clip_mult * std_dev
# away from each-other
std_dev = 10.0
std_dev_clip_mult = 3.0

# Make the probability calculator
prob_calculator_A = ProbCalculator(posns_A,std_dev,std_dev_clip_mult)
prob_calculator_B = ProbCalculator(posns_B,std_dev,std_dev_clip_mult)
species_arr = ["A","B"]
prob_calculator = ProbCalculatorMultiSpecies([prob_calculator_A,prob_calculator_B],species_arr)

# Make the sampler
sampler = SamplerMultiSpecies(prob_calculator)
sampler.set_logging_level(logging.INFO)

# Handle failure
def handle_fail():
print("Could not draw particle: try adjusting the std. dev. for the probability cutoff.")
sys.exit(0)

# Sample pair using rejection sampling
no_tries_max = 100
success = sampler.rejection_sample_pair(no_tries_max)
if not success:
handle_fail()

# Sample pair using CDF
success = sampler.cdf_sample_pair()
if not success:
handle_fail()
2 changes: 2 additions & 0 deletions samplePairsGaussian/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,4 @@
from .prob_calculator import *
from .sampler import *
from .prob_calculator_multispecies import *
from .sampler_multispecies import *
53 changes: 53 additions & 0 deletions samplePairsGaussian/prob_calculator_multispecies.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
from .prob_calculator import *

class ProbCalculatorMultiSpecies:

def __init__(self, prob_calculator_arr, species_arr):
"""Constructor.
Args:
posns (np.array([[float]])): particle positions. First axis are particles, seconds are coordinates in n-dimensional space
std_dev (float): standard deviation for cutting off probabilities
std_dev_clip_mult (float): multiplier for the standard deviation cutoff
"""

# Setup the logger
self._logger = logging.getLogger(__name__)
formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
ch = logging.StreamHandler()
ch.setLevel(logging.DEBUG)
ch.setFormatter(formatter)
self._logger.addHandler(ch)

# Level of logging to display
self._logger.setLevel(logging.ERROR)

# Length
if len(species_arr) != len(prob_calculator_arr):
self._logger.error("Error: length of species array = " + str(len(species_arr)) + " does not match the probabilities: " + str(len(prob_calculator_arr)) + "; Quitting.")
sys.exit(0)

# vars
self.no_species = len(species_arr)
self.prob_calculator_arr = prob_calculator_arr
self.species_arr = species_arr

# compute species probabilities
self.compute_species_probs()

def set_logging_level(self, level):
"""Sets the logging level
Args:
level (logging): logging level
"""
self._logger.setLevel(level)

def compute_species_probs(self):
"""compute probabilities for the different species.
"""

self.probs_species = np.zeros(self.no_species)
for i in range(0,self.no_species):
self.probs_species[i] = self.prob_calculator_arr[i].n
self.n_total = np.sum(self.probs_species)
self.probs_species /= self.n_total
14 changes: 7 additions & 7 deletions samplePairsGaussian/sampler.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,15 +58,15 @@ def rejection_sample_first_particle(self, no_tries_max=100, compute_probs=True):
Args:
no_tries_max (int): max. no. of tries for rejection sampling
compute_probs (bool): whether to first call compute_probs_first_particle for the ProbCalculator
compute_probs (bool): whether to first call compute_un_probs_first_particle for the ProbCalculator
Returns:
bool: True for success, False for failure
"""

# Form probabilities
if compute_probs:
self.prob_calculator.compute_probs_first_particle()
self.prob_calculator.compute_un_probs_first_particle()

i_try = 0
self.idx_first_particle = None
Expand Down Expand Up @@ -103,7 +103,7 @@ def rejection_sample_second_particle(self, no_tries_max=100, compute_probs=True)

# Form probabilities
if compute_probs:
self.prob_calculator.compute_probs_second_particle(self.idx_first_particle)
self.prob_calculator.compute_un_probs_second_particle(self.idx_first_particle)

i_try = 0
self.idx_second_particle = None
Expand Down Expand Up @@ -141,7 +141,7 @@ def rejection_sample_pair(self, no_tries_max=100, compute_probs_first_particle=T
"""

if compute_probs_first_particle:
self.prob_calculator.compute_probs_first_particle()
self.prob_calculator.compute_un_probs_first_particle()

# Turn off logging temp
level = self._logger.level
Expand Down Expand Up @@ -186,7 +186,7 @@ def cdf_sample_first_particle(self,compute_probs=True):

# Form probabilities
if compute_probs:
self.prob_calculator.compute_probs_first_particle()
self.prob_calculator.compute_un_probs_first_particle()

# Ensure normalized
if self.prob_calculator.are_probs_first_particle_normalized == False:
Expand Down Expand Up @@ -214,7 +214,7 @@ def cdf_sample_second_particle(self,compute_probs=True):

# Form probabilities
if compute_probs:
self.prob_calculator.compute_probs_second_particle(self.idx_first_particle)
self.prob_calculator.compute_un_probs_second_particle(self.idx_first_particle)

# Ensure normalized
if self.prob_calculator.are_probs_second_particle_normalized == False:
Expand All @@ -240,7 +240,7 @@ def cdf_sample_pair(self, compute_probs_first_particle=True):
"""

if compute_probs_first_particle:
self.prob_calculator.compute_probs_first_particle()
self.prob_calculator.compute_un_probs_first_particle()

# Ensure normalized
if self.prob_calculator.are_probs_first_particle_normalized == False:
Expand Down
Loading

0 comments on commit 23c6f48

Please sign in to comment.