Source code for nested_sampling.ns_gaussian

'''Python implementation of Nested Sampling (Skilling 2004) in order to compute the integral of a N-dim gaussian'''

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import argparse
from tqdm import tqdm
import logging
import time
from nested_sampling.functions import log_likelihood, autocorrelation, proposal

[docs]def nested_samplig(live_points, dim, boundary, proposal_distribution, verbose=False): '''Nested Sampling by Skilling (2004) Parameters ---------- live_points : numpy array Numpy array of dimension (number of live points, dimension + 2). From column 0 to d(=dimension) the vectors of parameter sampled from the parameter space are placed, column d+1 corresponds to the priors value and the last to the likelihood values dim : int Dimension of the parameter space. resample_function : str Choose between uniform or normal resample distribution verbose : bool Print some information. The default is False Returns ------- area : numpy array Array of the area elements accumulated during iterations Zlog : list List of two list: logZ + error and logZ - error. They are used for plots logL_worst : list List of the worst likelihoods obtained during iterations. They are the values that define the profile of the likelihood used in the actual computation of the Riemann sum prior_mass : list List of the value of the prior mass accumulated during the iteration T : list List of times required for sample the new objects with the proposal function steps : int Number of steps required accepted, rejected : int Accepted and rejected points over the whole NS run. They are the sum of all the accepted/rejected points during the sampling of the new object in the proposal function logH_list : list List of information values accumulated during iterations error : int Estimated error on the evidence. It is a first approximation based on the standard deviation of the prior mass values (the one found statistically, see Nested Sampling (Skilling 2004)) ''' N = live_points.shape[0] f = np.log(0.01) area = []; Zlog = [[],[]]; logL_worst = []; T = []; prior_mass = []; logH_list = [] logZ = -np.inf logH = -np.inf parameters = np.random.uniform(-boundary, boundary, size=(N, dim)) live_points[:, :dim] = parameters live_points[:, dim] = log_likelihood(parameters, dim, init=True) logwidth = np.log(1.0 - np.exp(-1.0/N)) steps = 0 rejected = 0 accepted = 0 while True: steps += 1 prior_mass.append(logwidth) Lw_idx = np.argmin(live_points[:, dim]) logLw = live_points[Lw_idx, dim] logL_worst.append(logLw) logZnew = np.logaddexp(logZ, logwidth+logLw) logZ = logZnew logH = np.logaddexp(logH, logwidth + logLw - logZ + np.log(logLw - logZ)) logH_list.append(logH) error = np.sqrt(np.exp(logH)/N) Zlog[0].append(logZ + error) Zlog[1].append(logZ - error) survivors = np.delete(live_points, Lw_idx, axis=0) std = np.mean(np.std(survivors[:dim], axis = 0)) area.append(logwidth+logLw) if verbose: print("i:{0} d={1} log(Lw)={2:.2f} term={3:.2f} log(Z)={4:.2f} std={5:.2f} e={6:.2f} H={7:.2f} prop={8}".format(steps, dim, logLw, (max(live_points[:,dim]) - steps/N - f - logZ), logZ, std, error, np.exp(logH), proposal_distribution)) new_sample, t, acc, rej = proposal(live_points[Lw_idx], dim, boundary, std, proposal_distribution) accepted += acc rejected += rej T.append(t) live_points[Lw_idx] = new_sample logwidth -= 1.0/N if max(live_points[:,dim]) - (steps+steps**0.5)/N < f + logZ: break final_term = np.log(np.exp(live_points[:,dim]).sum()*np.exp(-steps/N)/N) logZ = np.logaddexp(logZ, final_term) area = np.exp(area) return area, Zlog, logL_worst, prior_mass, logZ, T, steps, accepted, rejected, logH_list, error
if __name__ == "__main__": parser = argparse.ArgumentParser(description='Nested sampling') parser.add_argument('--dim', '-d', type=int, help='Max dimension of the parameter spaces. Iterations are from 2 to dim') parser.add_argument('--step', '-s', type=int, help='Step fro the dimension range') parser.add_argument('--num_live_points', '-n', type=int, help='Number of live points') parser.add_argument('--boundary', '-b', type=int, default=5, help='Boundaries for the prior (centered at zero). The default is 5 ') parser.add_argument('--proposal', '-pr', type=int, help='Proposal for the new object from the prior. 0 for uniform, 1 for normal') parser.add_argument('--plot', '-p', action='store_true', help='Plot the plots') parser.add_argument('--summary_plot', '-sp', action='store_true', help='Plot some summary information') parser.add_argument('--verbose', '-v', action='store_true', help='Print some info during iterations. the default is False') parser.add_argument("-log", "--log", default="info", help=("Provide logging level. Example --log debug', default='info")) args = parser.parse_args() levels = {'critical': logging.CRITICAL, 'error': logging.ERROR, 'warning': logging.WARNING, 'info': logging.INFO, 'debug': logging.DEBUG} logging.basicConfig(level=levels[args.log]) np.random.seed(95) n = args.num_live_points dim = args.dim boundary = args.boundary if args.proposal == 0: prop = 'uniform' if args.proposal == 1: prop = 'normal' time_tot = []; log_evidence_values = []; error_values = [] range_dim = np.arange(2,dim+1, args.step) true_values = [-d*np.log(2*boundary) for d in range_dim] for d in tqdm(range_dim): t_start = time.time() live_points = np.zeros((n, d+1), dtype=np.float64) area_plot, evidence_plot, likelihood_worst, prior_mass, log_evidence, t_resample, steps, acc, rej, logH, error = nested_samplig(live_points, d, boundary, proposal_distribution=prop, verbose=args.verbose) t_end = time.time() t_total = t_end - t_start time_tot.append(t_total) log_evidence_values.append(log_evidence) error_values.append(error) if args.plot: fig, ax1 = plt.subplots() ax1.plot(area_plot, color = 'k', label = 'Li*wi') ax2 = ax1.twinx() ax2.plot(evidence_plot[0], color = 'r', linewidth=0.6, label = 'logZ +- e') ax2.plot(evidence_plot[1], color = 'r', linewidth=0.6) plt.fill_between(np.arange(len(evidence_plot[0])),evidence_plot[0],evidence_plot[1], color='r', alpha=0.3) ax1.set_xlabel('Iterations') plt.grid() fig.legend() plt.savefig(f'results/images/{args.proposal}/area_dynamics_{d}.png') plt.figure() plt.scatter(np.arange(len(t_resample)),t_resample, s=0.5, c='k') plt.yscale('log') plt.xlabel('Iterations') plt.ylabel('Resampling time') plt.title('Time for resampling') plt.savefig(f'results/images/{args.proposal}/resampling_time_{d}.png') plt.figure() plt.plot(prior_mass, logH, 'k') plt.xlabel('Prior mass (log)') plt.ylabel('logH') plt.title('Information') plt.savefig(f'results/images/{args.proposal}/information_{d}.png') plt.close('all') with open(f'results/summaries/{args.proposal}/Summary_{d}.txt', 'w', encoding='utf-8') as file: file.write(f'''============ SUMMARY ============ \n Dimension of the integral = {d} \n Number of steps required = {steps} \n Evidence = {log_evidence:.2f} +- {error:.2f} \n Theoretical value = -{d*np.log(2*boundary)} \n Information = {np.exp(logH)[-1]:.2f} \n Maximum of the likelihood = {-d*np.log(np.sqrt(2*np.pi)):.2f} \n Proposal chosen: {prop} \n Last area value = {area_plot[-1]:.2f} \n Last worst Likelihood = {likelihood_worst[-1]:.2f} \n Accepted and rejected points: {acc}, {rej} \n Mass prior sum = {np.exp(prior_mass).sum():.2f} \n Total time: {t_total:.2f} s \n=================================''') file.close() compared_results = pd.DataFrame({'log_evidence_values': log_evidence_values, 'error': error_values, 'time': time_tot}) compared_results.to_csv(f'results/results_{args.proposal}.csv', index=False) if args.summary_plot: plt.figure() plt.plot(range_dim, time_tot, 'k--') plt.ylabel('Time (s)') plt.xlabel('Dimension') plt.grid() plt.scatter(range_dim, time_tot, c='black') plt.savefig(f'results/images/Total_time_per_dim_{args.proposal}.png') plt.figure() plt.plot(range_dim, true_values, 'k--', linewidth=0.6, alpha=0.4) plt.scatter(range_dim, true_values, s=3, c='black', label='True Values') plt.errorbar(range_dim, log_evidence_values, yerr=error_values, linewidth=0.6, elinewidth=1, linestyle='-.', label='Computed Values') plt.xlabel('Dimension') plt.ylabel('logZ') plt.legend() plt.savefig(f'results/images/Results_{args.proposal}.png') plt.close('all')