'''Python implementation of Nested Sampling (Skilling 2004) in order to compute the integral of a N-dim gaussian'''
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import argparse
from tqdm import tqdm
import logging
import time
from nested_sampling.functions import log_likelihood, autocorrelation, proposal
[docs]def nested_samplig(live_points, dim, boundary, proposal_distribution, verbose=False):
'''Nested Sampling by Skilling (2004)
Parameters
----------
live_points : numpy array
Numpy array of dimension (number of live points, dimension + 2). From column 0
to d(=dimension) the vectors of parameter sampled from the parameter space are
placed, column d+1 corresponds to the priors value and the last to the likelihood values
dim : int
Dimension of the parameter space.
resample_function : str
Choose between uniform or normal resample distribution
verbose : bool
Print some information. The default is False
Returns
-------
area : numpy array
Array of the area elements accumulated during iterations
Zlog : list
List of two list: logZ + error and logZ - error. They are used for plots
logL_worst : list
List of the worst likelihoods obtained during iterations. They are the values that
define the profile of the likelihood used in the actual computation of the Riemann sum
prior_mass : list
List of the value of the prior mass accumulated during the iteration
T : list
List of times required for sample the new objects with the proposal function
steps : int
Number of steps required
accepted, rejected : int
Accepted and rejected points over the whole NS run. They are the sum of all the accepted/rejected
points during the sampling of the new object in the proposal function
logH_list : list
List of information values accumulated during iterations
error : int
Estimated error on the evidence. It is a first approximation based on the standard deviation
of the prior mass values (the one found statistically, see Nested Sampling (Skilling 2004))
'''
N = live_points.shape[0]
f = np.log(0.01)
area = []; Zlog = [[],[]]; logL_worst = []; T = []; prior_mass = []; logH_list = []
logZ = -np.inf
logH = -np.inf
parameters = np.random.uniform(-boundary, boundary, size=(N, dim))
live_points[:, :dim] = parameters
live_points[:, dim] = log_likelihood(parameters, dim, init=True)
logwidth = np.log(1.0 - np.exp(-1.0/N))
steps = 0
rejected = 0
accepted = 0
while True:
steps += 1
prior_mass.append(logwidth)
Lw_idx = np.argmin(live_points[:, dim])
logLw = live_points[Lw_idx, dim]
logL_worst.append(logLw)
logZnew = np.logaddexp(logZ, logwidth+logLw)
logZ = logZnew
logH = np.logaddexp(logH, logwidth + logLw - logZ + np.log(logLw - logZ))
logH_list.append(logH)
error = np.sqrt(np.exp(logH)/N)
Zlog[0].append(logZ + error)
Zlog[1].append(logZ - error)
survivors = np.delete(live_points, Lw_idx, axis=0)
std = np.mean(np.std(survivors[:dim], axis = 0))
area.append(logwidth+logLw)
if verbose:
print("i:{0} d={1} log(Lw)={2:.2f} term={3:.2f} log(Z)={4:.2f} std={5:.2f} e={6:.2f} H={7:.2f} prop={8}".format(steps, dim, logLw, (max(live_points[:,dim]) - steps/N - f - logZ), logZ, std, error, np.exp(logH), proposal_distribution))
new_sample, t, acc, rej = proposal(live_points[Lw_idx], dim, boundary, std, proposal_distribution)
accepted += acc
rejected += rej
T.append(t)
live_points[Lw_idx] = new_sample
logwidth -= 1.0/N
if max(live_points[:,dim]) - (steps+steps**0.5)/N < f + logZ:
break
final_term = np.log(np.exp(live_points[:,dim]).sum()*np.exp(-steps/N)/N)
logZ = np.logaddexp(logZ, final_term)
area = np.exp(area)
return area, Zlog, logL_worst, prior_mass, logZ, T, steps, accepted, rejected, logH_list, error
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Nested sampling')
parser.add_argument('--dim', '-d', type=int, help='Max dimension of the parameter spaces. Iterations are from 2 to dim')
parser.add_argument('--step', '-s', type=int, help='Step fro the dimension range')
parser.add_argument('--num_live_points', '-n', type=int, help='Number of live points')
parser.add_argument('--boundary', '-b', type=int, default=5, help='Boundaries for the prior (centered at zero). The default is 5 ')
parser.add_argument('--proposal', '-pr', type=int, help='Proposal for the new object from the prior. 0 for uniform, 1 for normal')
parser.add_argument('--plot', '-p', action='store_true', help='Plot the plots')
parser.add_argument('--summary_plot', '-sp', action='store_true', help='Plot some summary information')
parser.add_argument('--verbose', '-v', action='store_true', help='Print some info during iterations. the default is False')
parser.add_argument("-log", "--log", default="info",
help=("Provide logging level. Example --log debug', default='info"))
args = parser.parse_args()
levels = {'critical': logging.CRITICAL,
'error': logging.ERROR,
'warning': logging.WARNING,
'info': logging.INFO,
'debug': logging.DEBUG}
logging.basicConfig(level=levels[args.log])
np.random.seed(95)
n = args.num_live_points
dim = args.dim
boundary = args.boundary
if args.proposal == 0: prop = 'uniform'
if args.proposal == 1: prop = 'normal'
time_tot = []; log_evidence_values = []; error_values = []
range_dim = np.arange(2,dim+1, args.step)
true_values = [-d*np.log(2*boundary) for d in range_dim]
for d in tqdm(range_dim):
t_start = time.time()
live_points = np.zeros((n, d+1), dtype=np.float64)
area_plot, evidence_plot, likelihood_worst, prior_mass, log_evidence, t_resample, steps, acc, rej, logH, error = nested_samplig(live_points, d, boundary, proposal_distribution=prop, verbose=args.verbose)
t_end = time.time()
t_total = t_end - t_start
time_tot.append(t_total)
log_evidence_values.append(log_evidence)
error_values.append(error)
if args.plot:
fig, ax1 = plt.subplots()
ax1.plot(area_plot, color = 'k', label = 'Li*wi')
ax2 = ax1.twinx()
ax2.plot(evidence_plot[0], color = 'r', linewidth=0.6, label = 'logZ +- e')
ax2.plot(evidence_plot[1], color = 'r', linewidth=0.6)
plt.fill_between(np.arange(len(evidence_plot[0])),evidence_plot[0],evidence_plot[1], color='r', alpha=0.3)
ax1.set_xlabel('Iterations')
plt.grid()
fig.legend()
plt.savefig(f'results/images/{args.proposal}/area_dynamics_{d}.png')
plt.figure()
plt.scatter(np.arange(len(t_resample)),t_resample, s=0.5, c='k')
plt.yscale('log')
plt.xlabel('Iterations')
plt.ylabel('Resampling time')
plt.title('Time for resampling')
plt.savefig(f'results/images/{args.proposal}/resampling_time_{d}.png')
plt.figure()
plt.plot(prior_mass, logH, 'k')
plt.xlabel('Prior mass (log)')
plt.ylabel('logH')
plt.title('Information')
plt.savefig(f'results/images/{args.proposal}/information_{d}.png')
plt.close('all')
with open(f'results/summaries/{args.proposal}/Summary_{d}.txt', 'w', encoding='utf-8') as file:
file.write(f'''============ SUMMARY ============
\n Dimension of the integral = {d}
\n Number of steps required = {steps}
\n Evidence = {log_evidence:.2f} +- {error:.2f}
\n Theoretical value = -{d*np.log(2*boundary)}
\n Information = {np.exp(logH)[-1]:.2f}
\n Maximum of the likelihood = {-d*np.log(np.sqrt(2*np.pi)):.2f}
\n Proposal chosen: {prop}
\n Last area value = {area_plot[-1]:.2f}
\n Last worst Likelihood = {likelihood_worst[-1]:.2f}
\n Accepted and rejected points: {acc}, {rej}
\n Mass prior sum = {np.exp(prior_mass).sum():.2f}
\n Total time: {t_total:.2f} s
\n=================================''')
file.close()
compared_results = pd.DataFrame({'log_evidence_values': log_evidence_values, 'error': error_values, 'time': time_tot})
compared_results.to_csv(f'results/results_{args.proposal}.csv', index=False)
if args.summary_plot:
plt.figure()
plt.plot(range_dim, time_tot, 'k--')
plt.ylabel('Time (s)')
plt.xlabel('Dimension')
plt.grid()
plt.scatter(range_dim, time_tot, c='black')
plt.savefig(f'results/images/Total_time_per_dim_{args.proposal}.png')
plt.figure()
plt.plot(range_dim, true_values, 'k--', linewidth=0.6, alpha=0.4)
plt.scatter(range_dim, true_values, s=3, c='black', label='True Values')
plt.errorbar(range_dim, log_evidence_values, yerr=error_values, linewidth=0.6, elinewidth=1, linestyle='-.', label='Computed Values')
plt.xlabel('Dimension')
plt.ylabel('logZ')
plt.legend()
plt.savefig(f'results/images/Results_{args.proposal}.png')
plt.close('all')