diff --git a/bin/run_vega_mpi.py b/bin/run_vega_mpi.py index eba3855b..3b43202f 100644 --- a/bin/run_vega_mpi.py +++ b/bin/run_vega_mpi.py @@ -1,10 +1,10 @@ #!/usr/bin/env python -from vega import VegaInterface -from vega.sampler_interface import Sampler -from mpi4py import MPI import argparse import sys +from mpi4py import MPI + +from vega import VegaInterface if __name__ == '__main__': pars = argparse.ArgumentParser( @@ -56,12 +56,27 @@ def print_func(message): ' but no "[monte carlo]" section provided.') # Run sampler - if vega.has_sampler: - print_func('Running the sampler') - sampler = Sampler(vega.main_config['Polychord'], - sampling_params, vega.log_lik) + if not vega.run_sampler: + raise ValueError('Warning: You called "run_vega_mpi.py" without asking' + ' for the sampler. Add "run_sampler = True" to the "[control]" section.') + if vega.sampler == 'Polychord': + from vega.samplers.polychord import Polychord + + print_func('Running Polychord') + sampler = Polychord(vega.main_config['Polychord'], sampling_params, vega.log_lik) sampler.run() - else: - raise ValueError('Warning: You called "run_vega_mpi.py" without asking' - ' for the sampler. Add "sampler = True" to the "[control]" section.') + elif vega.sampler == 'PocoMC': + from vega.samplers.pocomc import PocoMC + + print_func('Running PocoMC') + sampler = PocoMC(vega.main_config['PocoMC'], sampling_params, vega.log_lik) + mpi_comm.barrier() + sampler.run() + mpi_comm.barrier() + + if cpu_rank == 0: + sampler.write_chain() + mpi_comm.barrier() + + print_func('Finished running sampler') diff --git a/vega/sampler_interface.py b/vega/sampler_interface.py deleted file mode 100644 index 5cc1f559..00000000 --- a/vega/sampler_interface.py +++ /dev/null @@ -1,179 +0,0 @@ -import numpy as np -import os.path -import sys -import pypolychord -from pypolychord.settings import PolyChordSettings -from pypolychord.priors import UniformPrior -from pathlib import Path -from vega.parameters.param_utils import build_names -from mpi4py import MPI - - -class Sampler: - ''' Interface between Vega and the nested sampler PolyChord ''' - - def __init__(self, polychord_setup, limits, log_lik_func): - """ - - Parameters - ---------- - polychord_setup : ConfigParser - Polychord section from the main config - limits : dict - Dictionary with the prior limits of the sampled parameters - log_lik_func : f(params) - Log Likelihood function to be passed to Polychord - """ - self.limits = limits - self.names = limits.keys() - self.num_params = len(limits) - self.num_derived = 0 - self.log_lik = log_lik_func - self.getdist_latex = polychord_setup.getboolean('getdist_latex', True) - - # Check limits are well defined - for lims in self.limits.values(): - if None in lims: - raise ValueError('Sampler needs well defined prior limits.' - ' You passed a None. Please give numbers, or' - ' just say par_name = True to use defaults.') - - # Initialize the Polychord settings - self.settings, self.parnames_path = self.get_polychord_settings( - polychord_setup, self.num_params, self.num_derived) - - @staticmethod - def get_polychord_settings(polychord_setup, num_params, num_derived): - """Extract polychord settings and create the settings object. - - Parameters - ---------- - polychord_setup : ConfigParser - Polychord section from the main config - num_params : int - Number of sampled parameters - num_derived : int - Number of derived parameters - - Returns - ------- - PolyChordSettings - Settings object for running Polychord - """ - # Seed and path/name - seed = polychord_setup.getint('seed', int(0)) - path = os.path.expandvars(polychord_setup.get('path')) - name = polychord_setup.get('name') - - # The key config parameters - num_live = polychord_setup.getint('num_live', int(25*num_params)) - num_repeats = polychord_setup.getint('num_repeats', int(5*num_params)) - precision = polychord_setup.getfloat('precision', float(0.001)) - - # Resume should almost always be true - resume = polychord_setup.getboolean('resume', True) - write_dead = polychord_setup.getboolean('write_dead', True) - - # Useful for plotting as it gives you more posterior samples - boost_posterior = polychord_setup.getfloat('boost_posterior', - float(0.0)) - - # Do we do clustering, useful for multimodal distributions - do_clustering = polychord_setup.getboolean('do_clustering', False) - cluster_posteriors = polychord_setup.getboolean('cluster_posteriors', - False) - - # Perform maximisation at the end of the chain - maximise = polychord_setup.getboolean('maximise', False) - - # These control different sampling speeds - # grade_frac : List[float] - # (Default: [1]) - # The amount of time to spend in each speed. - # If any of grade_frac are <= 1, then polychord will time each - # sub-speed, and then choose num_repeats for the number of slowest - # repeats, and spend the proportion of time indicated by grade_frac. - # Otherwise this indicates the number of repeats to spend in - # each speed. - # grade_dims : List[int] - # (Default: nDims) - # The number of parameters within each speed. - - # Initialize the settings - settings = PolyChordSettings(num_params, num_derived, base_dir=path, - file_root=name, seed=seed, nlive=num_live, - num_repeats=num_repeats, - precision_criterion=precision, - write_resume=resume, read_resume=resume, - boost_posterior=boost_posterior, - do_clustering=do_clustering, - cluster_posteriors=cluster_posteriors, - equals=False, write_dead=write_dead, - maximise=maximise, - write_live=False, write_prior=False) - - # Check the path and get the paramnames path - output_path = Path(path) - err_msg = ("The PolyChord 'path' does not correspond to an existing" - " folder. Create the output folder before running.") - assert output_path.exists(), err_msg - parnames_path = output_path / (name + '.paramnames') - - return settings, parnames_path - - def write_parnames(self): - mpi_comm = MPI.COMM_WORLD - cpu_rank = mpi_comm.Get_rank() - - if cpu_rank == 0: - print('Writing parameter names') - sys.stdout.flush() - latex_names = build_names(list(self.names)) - with open(self.parnames_path, 'w') as f: - for name, latex in latex_names.items(): - if self.getdist_latex: - f.write('%s %s\n' % (name, latex)) - else: - f.write('%s $%s$\n' % (name, latex)) - print('Finished writing parameter names') - sys.stdout.flush() - - mpi_comm.barrier() - - def run(self): - """Run Polychord. We need to pass three functions: - - log_lik: takes a list of parameter values and - returns tuple: (log_lik, list of derived) - - prior: takes a unit hypercube and converts it to the - physical parameters - - dumper: Optional function if we want to get some output while - the chain is running. For now it's empty - """ - # Write parameter names - self.write_parnames() - - def log_lik(theta): - """ Wrapper for likelihood. No derived for now """ - params = {} - for i, name in enumerate(self.names): - params[name] = theta[i] - - log_lik = self.log_lik(params) - return log_lik, [] - - def prior(hypercube): - """ Uniform prior """ - prior = [] - for i, limits in enumerate(self.limits.values()): - prior.append(UniformPrior(limits[0], limits[1])(hypercube[i])) - return prior - - def dumper(live, dead, logweights, logZ, logZ_err): - """ Dumper empty for now""" - pass - - pypolychord.run_polychord(log_lik, self.num_params, self.num_derived, - self.settings, prior, dumper) diff --git a/vega/samplers/__init__.py b/vega/samplers/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/vega/samplers/pocomc.py b/vega/samplers/pocomc.py new file mode 100644 index 00000000..b3edb1fa --- /dev/null +++ b/vega/samplers/pocomc.py @@ -0,0 +1,63 @@ +from pathlib import Path + +import numpy as np +import pocomc as pc +from mpi4py import MPI +from mpi4py.futures import MPIPoolExecutor +from scipy.stats import uniform + +from vega.samplers.sampler_interface import Sampler + + +class PocoMC(Sampler): + """ Interface between Vega and the PocoMC sampler """ + + def __init__(self, pocomc_setup, limits, log_lik_func): + super().__init__(pocomc_setup, limits, log_lik_func) + + def get_sampler_settings(self, sampler_config, num_params, num_derived): + # Initialize the pocomc settings + self.precondition = sampler_config.getboolean('precondition', True) + self.dynamic = sampler_config.getboolean('dynamic', True) + self.n_effective = sampler_config.getint('n_effective', 512) + self.n_active = sampler_config.getint('n_active', 256) + self.n_total = sampler_config.getint('n_total', 1024) + self.n_evidence = sampler_config.getint('n_evidence', 0) + self.save_every = sampler_config.getint('save_every', 3) + + self.prior = pc.Prior( + [uniform(self.limits[par][0], self.limits[par][1]-self.limits[par][0]) + for par in self.limits] + ) + + def run(self): + """ Run the PocoMC sampler """ + mpi_comm = MPI.COMM_WORLD + num_mpi_threads = mpi_comm.Get_size() + with MPIPoolExecutor(num_mpi_threads) as pool: + self.pocomc_sampler = pc.Sampler( + self.prior, self.log_lik, pool=pool, + output_dir=self.path, save_every=self.save_every, + dynamic=self.dynamic, precondition=self.precondition, + n_effective=self.n_effective, n_active=self.n_active, + ) + self.pocomc_sampler.run(self.n_total, self.n_evidence) + + def write_chain(self): + # Get the weighted posterior samples + samples, weights, logl, logp = self.pocomc_sampler.posterior() + + # Write the chain + chain_path = Path(self.path) / (self.name + '.txt') + chain = np.column_stack((weights, logl, samples)) + print(f'Writing chain to {chain_path}') + np.savetxt(chain_path, chain, header='Weights, Log Likelihood, ' + ', '.join(self.names)) + + # Write stats + stats_path = Path(self.path) / (self.name + '.stats') + stats = np.column_stack((weights, logl, logp)) + np.savetxt(stats_path, stats, header='Weights, Log Likelihood, Log Prior') + + # Print Evidence + logZ, logZerr = self.pocomc_sampler.evidence() + print(f'log(Z) = {logZ} +/- {logZerr}') diff --git a/vega/samplers/polychord.py b/vega/samplers/polychord.py new file mode 100644 index 00000000..0160f99d --- /dev/null +++ b/vega/samplers/polychord.py @@ -0,0 +1,114 @@ +import pypolychord +from pypolychord.priors import UniformPrior +from pypolychord.settings import PolyChordSettings + +from vega.samplers.sampler_interface import Sampler + + +class Polychord(Sampler): + ''' Interface between Vega and the nested sampler PolyChord ''' + + def __init__(self, polychord_setup, limits, log_lik_func): + super().__init__(polychord_setup, limits, log_lik_func) + + def get_sampler_settings(self, sampler_config, num_params, num_derived): + """Extract polychord settings and create the settings object. + + Parameters + ---------- + polychord_setup : ConfigParser + Polychord section from the main config + num_params : int + Number of sampled parameters + num_derived : int + Number of derived parameters + + Returns + ------- + PolyChordSettings + Settings object for running Polychord + """ + # Seed and path/name + seed = sampler_config.getint('seed', int(0)) + + # The key config parameters + num_live = sampler_config.getint('num_live', int(25*num_params)) + num_repeats = sampler_config.getint('num_repeats', int(5*num_params)) + precision = sampler_config.getfloat('precision', float(0.001)) + + # Resume should almost always be true + resume = sampler_config.getboolean('resume', True) + write_dead = sampler_config.getboolean('write_dead', True) + + # Useful for plotting as it gives you more posterior samples + boost_posterior = sampler_config.getfloat('boost_posterior', float(0.0)) + + # Do we do clustering, useful for multimodal distributions + do_clustering = sampler_config.getboolean('do_clustering', False) + cluster_posteriors = sampler_config.getboolean('cluster_posteriors', False) + + # Perform maximisation at the end of the chain + maximise = sampler_config.getboolean('maximise', False) + + # These control different sampling speeds + # grade_frac : List[float] + # (Default: [1]) + # The amount of time to spend in each speed. + # If any of grade_frac are <= 1, then polychord will time each + # sub-speed, and then choose num_repeats for the number of slowest + # repeats, and spend the proportion of time indicated by grade_frac. + # Otherwise this indicates the number of repeats to spend in + # each speed. + # grade_dims : List[int] + # (Default: nDims) + # The number of parameters within each speed. + + # Initialize the settings + self.settings = PolyChordSettings( + num_params, num_derived, base_dir=self.path, + file_root=self.name, seed=seed, nlive=num_live, + num_repeats=num_repeats, + precision_criterion=precision, + write_resume=resume, read_resume=resume, + boost_posterior=boost_posterior, + do_clustering=do_clustering, + cluster_posteriors=cluster_posteriors, + equals=False, write_dead=write_dead, + maximise=maximise, + write_live=False, write_prior=False + ) + + def run(self): + """Run Polychord. We need to pass three functions: + + log_lik: takes a list of parameter values and + returns tuple: (log_lik, list of derived) + + prior: takes a unit hypercube and converts it to the + physical parameters + + dumper: Optional function if we want to get some output while + the chain is running. For now it's empty + """ + def log_lik(theta): + """ Wrapper for likelihood. No derived for now """ + params = {} + for i, name in enumerate(self.names): + params[name] = theta[i] + + log_lik = self.log_lik(params) + return log_lik, [] + + def prior(hypercube): + """ Uniform prior """ + prior = [] + for i, limits in enumerate(self.limits.values()): + prior.append(UniformPrior(limits[0], limits[1])(hypercube[i])) + return prior + + def dumper(live, dead, logweights, logZ, logZ_err): + """ Dumper empty for now""" + pass + + pypolychord.run_polychord( + log_lik, self.num_params, self.num_derived, self.settings, prior, dumper) diff --git a/vega/samplers/sampler_interface.py b/vega/samplers/sampler_interface.py new file mode 100644 index 00000000..6208450f --- /dev/null +++ b/vega/samplers/sampler_interface.py @@ -0,0 +1,78 @@ +import os.path +import sys +from pathlib import Path + +from mpi4py import MPI + +from vega.parameters.param_utils import build_names + + +class Sampler: + ''' Interface between Vega and the nested sampler PolyChord ''' + + def __init__(self, sampler_config, limits, log_lik_func): + """ + + Parameters + ---------- + polychord_setup : ConfigParser + Polychord section from the main config + limits : dict + Dictionary with the prior limits of the sampled parameters + log_lik_func : f(params) + Log Likelihood function to be passed to Polychord + """ + self.limits = limits + self.names = limits.keys() + self.num_params = len(limits) + self.num_derived = 0 + self.log_lik = log_lik_func + self.getdist_latex = sampler_config.getboolean('getdist_latex', True) + + # Check limits are well defined + for lims in self.limits.values(): + if None in lims: + raise ValueError('Sampler needs well defined prior limits.' + ' You passed a None. Please give numbers, or' + ' just say par_name = True to use defaults.') + + # Check the path and get the paramnames path + self.path = os.path.expandvars(sampler_config.get('path')) + self.name = sampler_config.get('name') + + output_path = Path(self.path) + err_msg = ("The sampler 'path' does not correspond to an existing" + " folder. Create the output folder before running.") + assert output_path.exists(), err_msg + parnames_path = output_path / (self.name + '.paramnames') + + # Write parameter names + self.write_parnames(parnames_path) + + # Initialize the sampler settings + self.get_sampler_settings(sampler_config, self.num_params, self.num_derived) + + def write_parnames(self, parnames_path): + mpi_comm = MPI.COMM_WORLD + cpu_rank = mpi_comm.Get_rank() + + if cpu_rank == 0: + print('Writing parameter names') + sys.stdout.flush() + latex_names = build_names(list(self.names)) + with open(parnames_path, 'w') as f: + for name, latex in latex_names.items(): + if self.getdist_latex: + f.write('%s %s\n' % (name, latex)) + else: + f.write('%s $%s$\n' % (name, latex)) + print('Finished writing parameter names') + sys.stdout.flush() + + mpi_comm.barrier() + + def get_sampler_settings(self, sampler_config, num_params, num_derived): + raise NotImplementedError('This method should be implemented in the child class') + + def run(self): + raise NotImplementedError('This method should be implemented in the child class') diff --git a/vega/vega_interface.py b/vega/vega_interface.py index 8852af45..7a1db28e 100644 --- a/vega/vega_interface.py +++ b/vega/vega_interface.py @@ -187,12 +187,15 @@ def __init__(self, main_path): ) # Check for sampler - self.has_sampler = False + self.run_sampler = False if 'control' in self.main_config: - self.has_sampler = self.main_config['control'].getboolean('sampler', False) - if self.has_sampler: - if 'Polychord' not in self.main_config: - raise RuntimeError('run_sampler called, but no sampler initialized') + self.run_sampler = self.main_config['control'].getboolean('run_sampler', False) + self.sampler = self.main_config['control'].get('sampler', None) + if self.run_sampler: + if self.sampler not in ['Polychord', 'Pocomc']: + raise ValueError('Sampler not recognized. Please use Polychord or PocoMC.') + if self.sampler not in self.main_config: + raise RuntimeError('run_sampler called, but no sampler config found') # Initialize the output object self.output = Output(self.main_config['output'], self.data, self.corr_items, self.analysis)