Source code for zdm.analyze_cube

""" Codes to analyze cube outputs """
import os
import numpy as np
import pickle
import glob

import math
import scipy

import pandas

from matplotlib import pyplot as plt

from zdm import io
from zdm import iteration

from IPython import embed

[docs] def slurp_cube(input_file:str, prefix:str, outfile:str, nsurveys, debug:bool=False, suffix:str='.csv'): """ Slurp the cube ASCII output files and write lC and ll into a numpy savez file Args: input_file (str): parameter file used to generate the cube prefix (str): prefix on the output files outfile (str): output file name. Should have .npz extension nsurveys (int): Number of surveys in the analysis. debug (int, optional): Debug? suffix (str, optional): File ending. Allows for old .out files """ # Grab em. The order doesn't matter files = glob.glob(prefix + "*" + suffix) # Init input_dict = io.process_jfile(input_file) _, cube_dict, vparam_dict = iteration.parse_input_dict(input_dict) PARAMS = list(vparam_dict.keys()) # Prep order, iorder = iteration.set_orders(cube_dict["parameter_order"], PARAMS) cube_shape = iteration.set_cube_shape(vparam_dict, order) param_shape = np.array([0] + cube_shape)[iorder].tolist()[:-1] # Outputs ll_cube = np.zeros(param_shape, dtype=np.float32) ll_cube[:] = -9e9 lC_cube = np.zeros(param_shape, dtype=np.float32) pzDM_cube = np.zeros(param_shape, dtype=np.float32) pDM_cube = np.zeros(param_shape, dtype=np.float32) pDMz_cube = np.zeros(param_shape, dtype=np.float32) pz_cube = np.zeros(param_shape, dtype=np.float32) survey_items = ["lls", "P_zDM", "P_n", "P_s", "N"] names = ["icube"] + PARAMS for ss in range(nsurveys): names += [item + f"_{ss}" for item in survey_items] names += ["ll"] # Loop on cube output files survey_arrays = {} nsurvey = 0 for ss, dfile in enumerate(files): print(f"Loading: {dfile}") df = pandas.read_csv(dfile) # Generate survey arrays if ss == 0: # Count the surveys for key in df.keys(): if "P_zDM" in key and len(key) > len("P_zDM"): # Generate them for item in survey_items: survey_arrays[item + key[-1]] = np.zeros(param_shape) nsurvey += 1 # Get indices indices = [] ns = df.n for n in ns: r_current = np.array( [0] + list(np.unravel_index(int(n), cube_shape, order="F")) ) current = r_current[iorder][:-1] # Truncate lC # Ravel me back idx = np.ravel_multi_index(current, ll_cube.shape) indices.append(idx) # Set ll_cube.flat[indices] = df.lls lC_cube.flat[indices] = df.lC # New ones pzDM_cube.flat[indices] = df.p_zgDM pDM_cube.flat[indices] = df.p_DM pDMz_cube.flat[indices] = df.p_DMgz pz_cube.flat[indices] = df.p_z # Survey items for key in survey_arrays.keys(): survey_arrays[key].flat[indices] = getattr(df, key) # Check if debug: embed(header="69 of analyze") # Grids out_dict = dict( ll=ll_cube, lC=lC_cube, params=PARAMS[:-1], pzDM=pzDM_cube, pDM=pDM_cube, pDMz=pDMz_cube, pz=pz_cube, ) # Save the parameter values too for name in PARAMS[:-1]: out_dict[name] = np.linspace( vparam_dict[name]["min"], vparam_dict[name]["max"], vparam_dict[name]["n"] ) # Survey items for key in survey_arrays.keys(): out_dict[key] = survey_arrays[key] # Write np.savez(outfile, **out_dict) #ll=ll_cube, lC=lC_cube, params=PARAMS[-1]) print(f"Wrote: {outfile}")
[docs] def apply_gaussian_prior(lls:np.ndarray, iparam:int, values:np.ndarray, mean:float, sigma:float): """ Applies a prior to parameter iparam with mean mean and deviation sigma. Returns a vector of length lls modified by that prior. """ NDIMS = len(lls.shape) if iparam < 0 or iparam >= NDIMS: raise ValueError( "Data only has ", NDIMS, " dimensions.", "Please select iparam between 0 and ", NDIMS - 1, " not ", iparam, ) wlls = np.copy(lls) for iv, val in enumerate(values): # select ivth value from iparam dimension big_slice = [slice(None, None, None)] * NDIMS big_slice[iparam] = iv # calculate weights. Yes I know this is silly. weight = np.exp(-0.5 * ((val - mean) / sigma) ** 2) weight = np.log10(weight) wlls[tuple(big_slice)] += weight return wlls
[docs] def apply_H0_prior( lls: np.ndarray, H0dim: int, H0values: np.ndarray, cmbH0: float, cmb_sigma: float, sn1aH0: float, sn1a_sigma: float, ): """ Applies a prior as a function of H0 This is flat between two systematic values Args: lls (np.ndarray): values of likelihoods H0dim (int): dimension of H0 in the data H0values (float): vector specifying values of H0 cmbH0 (float): value of H0 from the CMB cmb_sigma (float): 1 sigma uncertainty on H0 from CMB sn1aH0 (float): value of H0 from SN1a (distance ladder) measurements sn1a_sigma (float): 1 sigma uncertainty on H0 from SN1A Returns a vector of length lls modified by that prior. """ # orders limits appropriately if cmbH0 < sn1aH0: lowH0 = cmbH0 lowsigma = cmb_sigma highH0 = sn1aH0 highsigma = sn1a_sigma else: lowH0 = sn1aH0 lowsigma = sn1a_sigma highH0 = cmbH0 highsigma = cmb_sigma NDIMS = len(lls.shape) if H0dim < 0 or H0dim >= NDIMS: raise ValueError( "Data only has ", NDIMS, " dimensions.", "Please select H0dim between 0 and ", NDIMS - 1, " not ", H0dim, ) wlls = np.copy(lls) for iv, val in enumerate(H0values): # select ivth value from iparam dimension big_slice = [slice(None, None, None)] * NDIMS big_slice[H0dim] = iv # calculate weights. if val < lowH0: weight = -0.5 * ((val - lowH0) / lowsigma) ** 2 * np.log10(np.exp(1)) elif val > highH0: weight = -0.5 * ((val - highH0) / highsigma) ** 2 * np.log10(np.exp(1)) else: weight = 0.0 wlls[tuple(big_slice)] += weight return wlls
[docs] def apply_F_prior( lls: np.ndarray, Fdim: int, Fvalues: np.ndarray, F_0: float, F_sigma: float, ): """ Applies a prior as a function of F This is a Gaussian prior. Args: lls (np.ndarray): values of likelihoods Fdim (int): dimension of F0 in the data Fvalues (float): vector specifying values of H0 F_0 (float): value of F F_sigma (float): 1 sigma uncertainty on F Returns a vector of length lls modified by that prior. """ NDIMS = len(lls.shape) if Fdim < 0 or Fdim >= NDIMS: raise ValueError( "Data only has ", NDIMS, " dimensions.", "Please select F dim between 0 and ", NDIMS - 1, " not ", Fdim, ) wlls = np.copy(lls) for iv, val in enumerate(Fvalues): # select ivth value from iparam dimension big_slice = [slice(None, None, None)] * NDIMS big_slice[Fdim] = iv weight = -0.5 * ((val - F_0) / F_sigma) ** 2 * np.log10(np.exp(1)) wlls[tuple(big_slice)] += weight return wlls
[docs] def get_slice_from_parameters(data, plist, mcvals, verbose=False, wanted="ll"): """ Selects from data according to parameters which are as close to those in the mcvals as possible. Args: data: .npz structure generated by slurp_cube plist (list of strings): is a list of strings giving parameters mcvals (list of floats): is a float list of values one wants to approximate verbose (bool): toggle output level wanted: which column from the data to extract Returns: Array of NDIM=dim(data)-dim(plist) likelihood values """ NDIMS = len(data["params"]) if verbose: print("We have ", NDIMS, " dimensions to this cube") # sets up array of which values to keep which = np.full([NDIMS], -1) # finds the order of these parameters in the multidimensional cube iplist = [] for param in plist: iplist.append(np.where(data["params"] == param)[0][0]) # identifies the closest parameter values iclosest = [] for i, param in enumerate(plist): vals = data[param] vals = np.array(vals) diffs = (mcvals[i] - vals) ** 2 imindiff = np.argmin(diffs) iclosest.append(imindiff) if verbose: print( "For ", param, " MC truth ", mcvals[i], " closest cube value is the ", imindiff, "th, with value ", vals[imindiff], ) which[iplist[i]] = imindiff # selects the slice while keeping all MC parameters constant big_slice = [slice(None, None, None)] * NDIMS for i in np.arange(NDIMS): if which[i] >= 0: # insert code of -1 for "leave free" big_slice[i] = which[i] if isinstance(wanted, list): for i, item in enumerate(wanted): if i == 0: selection = data[item][tuple(big_slice)] else: selection += data[item][tuple(big_slice)] else: selection = data[wanted][tuple(big_slice)] selection = np.array(selection) return selection
[docs] def get_bayesian_data(lls: np.ndarray, plls: np.ndarray = None, pklfile=None): """ Method to perform simple Bayesian analysis on the Log-likelihood cube Args: lls (np.ndarray): Log-likelood cube plls (np.ndarray, optional): Log-likelihood cube corrected for priors (e.g. alpha). Defaults to None. pklfile (str, optional): If given, write the output to this pickle file. Defaults to None. Returns: tuple: uvals,vectors,wvectors lists of np.ndarray's of LL analysis One item per parameter in the cube """ NDIMS = len(lls.shape) # multiplies all log-likelihoods by the maximum value # ensures no floating point problems global_max = np.nanmax(lls) lls -= global_max # eventually remove this line if plls is None: plls = lls origlls = lls if plls is not None: w_global_max = np.nanmax(plls) plls = plls - w_global_max uvals = [] for i in np.arange(NDIMS): unique = np.arange(lls.shape[i]) uvals.append(unique) # we now have a list of unique values for each dimension vectors = [] # this will contain the best values for 1d plots wvectors = ( [] ) # holds same as above, but including spectral penalty factor from ASKAP obs # loop over the DIMS for i in np.arange(NDIMS): # does 1D values vector = np.zeros([len(uvals[i])]) wvector = np.zeros([len(uvals[i])]) # selects for lls a subset corresponding only to that particular value of a variables for iv, ivv in enumerate(uvals[i]): big_slice = [slice(None, None, None)] * NDIMS # Construct the slice big_slice[i] = ivv # set1=np.where(data[:,idim]==ivv) #selects for a set of values # lls=data[set1,llindex] lls = origlls[tuple(big_slice)].flatten() # ignores all values of 0, which is what missing data is # ignore = np.where(lls == 0.0)[0] # lls[ignore] = -99999 lls[np.isnan(lls)] = -99999 # selects all fits that are close to the peak (i.e. percentage within 0.1%) try: themax = np.nanmax(lls) except: # all nans, probability =0. Easy! vector[iv] = 0.0 wvector[iv] = 0.0 continue OKlls = np.isfinite(lls) & (lls > themax - 3) vector[iv] = np.sum(10 ** lls[OKlls]) if plls is not None: wlls = plls[tuple(big_slice)].flatten() wthemax = np.nanmax(wlls) OKwlls = np.isfinite(wlls) & (wlls > wthemax - 3) wvector[iv] = np.sum(10 ** wlls[OKwlls]) # import pdb; pdb.set_trace() # Check vector *= 1.0 / np.sum(vector) vector *= 1.0 / np.abs(uvals[i][1] - uvals[i][0]) vectors.append(vector) if plls is not None: wvector *= 1.0 / np.sum(wvector) wvector *= 1.0 / np.abs(uvals[i][1] - uvals[i][0]) wvectors.append(wvector) # now makes correction lls += global_max if plls is not None: plls += w_global_max # Pickle? if pklfile is not None: with open(pklfile, "wb") as output: pickle.dump(uvals, output, pickle.HIGHEST_PROTOCOL) pickle.dump(vectors, output, pickle.HIGHEST_PROTOCOL) pickle.dump(wvectors, output, pickle.HIGHEST_PROTOCOL) # result is just the total probability, normalised to unity, when summed over the parameter space # technically needs to be divided by the x-increment in bins. return uvals, vectors, wvectors
[docs] def get_2D_bayesian_data(lls: np.ndarray, plls: np.ndarray = None, pklfile=None): """ Method to perform simple Bayesian analysis on the Log-likelihood cube Args: lls (np.ndarray): Log-likelood cube plls (np.ndarray, optional): Log-likelihood cube corrected for priors (e.g. alpha). Defaults to None. pklfile (str, optional): If given, write the output to this pickle file. Defaults to None. Returns: tuple: uvals,ijs,arrays,warrays, uvals: values of each array, in form ijs: order of which parameters are combined in the arrays (e.g. [2,4]) arrays: lists of 2D np.ndarray's of LL analysis warrays: weighted with prior on alpha ijs, arrays, and warrays have Nitems = Nparams*(Nparams-1)/2 """ NDIMS = len(lls.shape) # multiplies all log-likelihoods by the maximum value global_max = np.nanmax(lls) lls -= global_max # eventually remove this line if plls is None: plls = lls if plls is not None: w_global_max = np.nanmax(plls) plls = plls - w_global_max origlls = lls uvals = [] for i in np.arange(NDIMS): unique = np.arange(lls.shape[i]) uvals.append(unique) # we now have a list of unique values for each dimension arrays = [] # this will contain the best values for 1d plots warrays = ( [] ) # holds same as above, but including spectral penalty factor from ASKAP obs ijs = [] # loop over the first dimensional combination for i in np.arange(NDIMS): # loops over the second dimension for j in np.arange(NDIMS - i - 1) + i + 1: # does 1D values array = np.zeros([len(uvals[i]), len(uvals[j])]) warray = np.zeros([len(uvals[i]), len(uvals[j])]) # selects for lls a subset corresponding only to that particular value of a variables for iv, ivv in enumerate(uvals[i]): big_slice = [slice(None, None, None)] * NDIMS # Construct the slice big_slice[i] = ivv for jv, jvv in enumerate(uvals[j]): # Construct the slice big_slice[j] = jvv # lls=data[set1,llindex] lls = origlls[tuple(big_slice)].flatten() # ignores all values of 0, which is what missing data is # ignore = np.where(lls == 0.0)[0] # lls[ignore] = -99999 lls[np.isnan(lls)] = -99999 try: themax = np.nanmax(lls) except: # all nans, probability =0. Easy! arrays[iv, jv] = 0.0 warrays[iv, jv] = 0.0 continue OKlls = np.isfinite(lls) & (lls > themax - 3) array[iv, jv] = np.sum(10 ** lls[OKlls]) if plls is not None: wlls = plls[tuple(big_slice)].flatten() wthemax = np.nanmax(wlls) OKwlls = np.isfinite(wlls) & (wlls > wthemax - 3) warray[iv, jv] = np.sum(10 ** wlls[OKwlls]) # normalisation over the parameter space to unity array *= 1.0 / np.sum(array) arrays.append(array) if plls is not None: warray *= 1.0 / np.sum(warray) warrays.append(warray) ijs.append([i, j]) lls += global_max if plls is not None: plls += w_global_max # Pickle? if pklfile is not None: with open(pklfile, "wb") as output: pickle.dump(uvals, output, pickle.HIGHEST_PROTOCOL) pickle.dump(vectors, output, pickle.HIGHEST_PROTOCOL) pickle.dump(wvectors, output, pickle.HIGHEST_PROTOCOL) # result is just the total probability, normalised to unity, when summed over the parameter space # technically needs to be divided by the x-increment in bins. return uvals,ijs,arrays,warrays
[docs] def get_maxl_data(lls:np.ndarray, plls:np.ndarray=None, pklfile=None): """ Method to perform simple Bayesian analysis on the Log-likelihood cube Args: lls (np.ndarray): Log-likelood cube plls (np.ndarray, optional): Log-likelihood cube corrected for priors (e.g. alpha). Defaults to None. pklfile (str, optional): If given, write the output to this pickle file. Defaults to None. Returns: tuple: uvals,vectors,wvectors lists of np.ndarray's of LL analysis One item per parameter in the cube """ NDIMS = len(lls.shape) # multiplies all log-likelihoods by the maximum value # ensures no floating point problems global_max = np.nanmax(lls) lls -= global_max # eventually remove this line if plls is None: plls = lls origlls = lls if plls is not None: w_global_max = np.nanmax(plls) plls = plls - w_global_max uvals = [] for i in np.arange(NDIMS): unique = np.arange(lls.shape[i]) uvals.append(unique) # we now have a list of unique values for each dimension vectors = [] # this will contain the best values for 1d plots wvectors = ( [] ) # holds same as above, but including spectral penalty factor from ASKAP obs # loop over the DIMS for i in np.arange(NDIMS): # does 1D values vector = np.zeros([len(uvals[i])]) wvector = np.zeros([len(uvals[i])]) # selects for lls a subset corresponding only to that particular value of a variables for iv, ivv in enumerate(uvals[i]): big_slice = [slice(None, None, None)] * NDIMS # Construct the slice big_slice[i] = ivv # set1=np.where(data[:,idim]==ivv) #selects for a set of values # lls=data[set1,llindex] lls = origlls[tuple(big_slice)].flatten() # ignores all values of 0, which is what missing data is ignore = np.where(lls == 0.0)[0] lls[ignore] = -99999 # selects all fits that are close to the peak (i.e. percentage within 0.1%) try: themax = np.nanmax(lls) except: # all nans, probability =0. Easy! vector[iv] = 0.0 wvector[iv] = 0.0 continue vector[iv] = themax if plls is not None: wlls = plls[tuple(big_slice)].flatten() wthemax = np.nanmax(wlls) wvector[iv] = wthemax # import pdb; pdb.set_trace() # Check vectors.append(vector) if plls is not None: wvectors.append(wvector) # now makes correction lls += global_max if plls is not None: plls += w_global_max # Pickle? if pklfile is not None: with open(pklfile, "wb") as output: pickle.dump(uvals, output, pickle.HIGHEST_PROTOCOL) pickle.dump(vectors, output, pickle.HIGHEST_PROTOCOL) pickle.dump(wvectors, output, pickle.HIGHEST_PROTOCOL) # result is just the total probability, normalised to unit, when summed over the parameter space # technically needs to be divided by the x-increment in bins. return uvals, vectors, wvectors
[docs] def get_2D_maxl_data(lls: np.ndarray, plls: np.ndarray = None, pklfile=None): """ Method to perform simple Bayesian analysis on the Log-likelihood cube Args: lls (np.ndarray): Log-likelood cube plls (np.ndarray, optional): Log-likelihood cube corrected for priors (e.g. alpha). Defaults to None. pklfile (str, optional): If given, write the output to this pickle file. Defaults to None. Returns: tuple: uvals,ijs,arrays,warrays, uvals: values of each array, in form ijs: order of which parameters are combined in the arrays (e.g. [2,4]) arrays: lists of 2D np.ndarray's of LL analysis warrays: weighted with prior on alpha ijs, arrays, and warrays have Nitems = Nparams*(Nparams-1)/2 """ NDIMS = len(lls.shape) # multiplies all log-likelihoods by the maximum value global_max = np.nanmax(lls) lls -= global_max # eventually remove this line if plls is None: plls = lls if plls is not None: w_global_max = np.nanmax(plls) plls = plls - w_global_max origlls = lls uvals = [] for i in np.arange(NDIMS): unique = np.arange(lls.shape[i]) uvals.append(unique) # we now have a list of unique values for each dimension arrays = [] # this will contain the best values for 1d plots warrays = ( [] ) # holds same as above, but including spectral penalty factor from ASKAP obs ijs = [] # loop over the first dimensional combination for i in np.arange(NDIMS): # loops over the second dimension for j in np.arange(NDIMS - i - 1) + i + 1: # does 1D values array = np.zeros([len(uvals[i]), len(uvals[j])]) warray = np.zeros([len(uvals[i]), len(uvals[j])]) # selects for lls a subset corresponding only to that particular value of a variables for iv, ivv in enumerate(uvals[i]): big_slice = [slice(None, None, None)] * NDIMS # Construct the slice big_slice[i] = ivv for jv, jvv in enumerate(uvals[j]): # Construct the slice big_slice[j] = jvv lls = origlls[tuple(big_slice)].flatten() # ignores all values of 0, which is what missing data is ignore = np.where(lls == 0.0)[0] lls[ignore] = -99999 try: themax = np.nanmax(lls) except: # all nans, probability =0. Easy! arrays[iv, jv] = 0.0 warrays[iv, jv] = 0.0 continue array[iv, jv] = themax if plls is not None: wlls = plls[tuple(big_slice)].flatten() wthemax = np.nanmax(wlls) warray[iv, jv] = wthemax # normalisation over the parameter space to unity arrays.append(array) if plls is not None: warray *= 1.0 / np.sum(warray) warrays.append(warray) ijs.append([i, j]) lls += global_max if plls is not None: plls += w_global_max # Pickle? if pklfile is not None: with open(pklfile, "wb") as output: pickle.dump(uvals, output, pickle.HIGHEST_PROTOCOL) pickle.dump(vectors, output, pickle.HIGHEST_PROTOCOL) pickle.dump(wvectors, output, pickle.HIGHEST_PROTOCOL) # result is just the total probability, normalised to unity, when summed over the parameter space # technically needs to be divided by the x-increment in bins. return uvals, ijs, arrays, warrays
[docs] def interpolate_points(oldx, oldy, logspline=False, kind="cubic"): """ performs simle spline interpolation of the data args: oldx: np array of original data on x axis oldy: np array of original data on y axis logspline (bool): perform interpolation in log space kind: type of spline to fit returns: x,y: interpolated points """ #### does unweighted plotting #### x = np.linspace(oldx[0], oldx[-1], 400) if logspline: # does interpolation in log-space f = scipy.interpolate.interp1d(oldx, np.log(oldy), kind=kind) y = np.exp(f(x)) else: f = scipy.interpolate.interp1d(oldx, oldy, kind=kind) y = f(x) # check to ensure the splines have not done anything too dumb f2 = scipy.interpolate.interp1d(oldx, np.log(oldy), kind=kind) y2 = np.exp(f2(x)) # replaces linear interpolation in small-value regime where splines can be dumb if np.min(y2) < 1.0e-2: very_small = np.where(y2 < 1e-2)[0] y[very_small]=y2[very_small] y[np.where(y < 0.)]=0. return x,y
[docs] def extract_limits(x,y,p,method=1,dumb=False,interp=False): """ args: x (np.ndarray): xvalues of data (independent variable on which we place limits) y (np.ndarray): yvalues of data (probability distribution) p: probability at each tail of the distribution (e.g. 0.16 for ~1 sigma, 0.025 for 2 sigma) Use "dumb = True" to interpet this as the C.I. (e.g. 0.68 for 1 sigma, 0.95 for 2 sigma) p: probability of confidence interval (e.g. (1-0.68)/2 for ~1 sigma) method: 1: includes most-likely points in C.I. first 2: sets equal probability to either side interp: if True, interpolates between points returns: v0,v1: lower and upper bounds of range """ if dumb: p = (1.-p)/2. # sets intervals according to highest likelihood if method == 1: # this sorts from lowest to highest sy = np.sort(y) # highest to lowest sy = sy[::-1] # now 0 to 1 csy = np.cumsum(sy) csy /= csy[-1] # this is the likelihood we cut on # this creates small intervals: where csy < this value cut=np.where(csy < 1.-2.*p)[0] # allowed values in interval cut=cut[-1] # last allowed value cut2=sy[cut] OK=np.where(y > cut2)[0] ik1=OK[0] ik2=OK[-1] v0=x[ik1] v1=x[ik2] # we now extend the range assigning excess probability to either side # in theory, this should be done according to the slope if interp: XS = 1.-2.*p - csy[cut] # checks if either is at the limit if ik1==0: # all excess at ik2 XS2=XS elif ik2 == x.size-1: XS2=XS else: XS2 = XS/2. if ik1 > 0: dx1 = x[ik1]-x[ik1-1] fraction1 = (XS2/y[ik1-1])*dx1 v0 -= fraction1 if ik2< x.size-1: dx2 = x[ik2+1]-x[ik2] fraction2 = (XS2/y[ik2+1])*dx2 v1 += fraction2 if v1 < v0: temp=v0 v0=v1 v1=temp elif method==2: cy=np.cumsum(y) cy /= cy[-1] # ignores normalisation in dx direction # gets lower value inside=np.where(cy > p)[0] ik1=inside[0] v0=x[ik1] # value above if interp: print("WARNING: interp currently untested with method=2") if ik1>0: dx = x[ik1]-x[ik1-1] else: dx = x[ik1+1]-x[ik1] dp = cy[ik1]-p # excess p fraction = (dp/y[ik1])*dx v0 -= fraction # gets upper value inside=np.where(cy > 1.-p)[0] ik2=inside[0] v1=x[ik2] if interp: dx = x[ik2]-x[ik2-1] dp = cy[ik2]-(1.-p) # excess p,to reduce fraction = (dp/y[ik2])*dx v1 -= fraction return v0,v1,ik1,ik2
[docs] def do_single_plots(uvals,vectors,wvectors,names,tag=None, fig_exten='.png', dolevels=False,log=True,outdir='SingleFigs/', vparams_dict=None, prefix='',truth=None,latexnames=None, units=None,logspline=True, others=None, compact=False, others_labels=None): """ Generate a series of 1D plots of the cube parameters Args: uvals (list): List, each element containing a np.ndarray giving the parameter values for each parameter. Total length nparams. vectors (list): [For each parameter, contains an unweighted vector giving 1D probabilities for that value of the parameter] wvectors ([list]): [For each parameter, contains a weighted (with prior) vector giving 1D probabilities for that value of the parameter] names ([type]): [description] tag ([type], optional): [description]. Defaults to None. fig_exten (str, optional): [description]. Defaults to '.png'. dolevels (bool, optional): [description]. Defaults to False. log (bool, optional): [description]. Defaults to True. outdir (str, optional): [description]. Defaults to 'SingleFigs/'. vparams_dict (dict, optional): parameter dict -- used to set x-values. Defaults to None. prefix (str, optional): [description]. Defaults to ''. latexnames: latex of axis names units: latex of units to attach to x-axis only logspline(bool): do spline fitting in logspace? others(list of arrays): list of other plots to add to data compact:??? others_labels(list of strings): labels for others """ if tag is not None: outdir = tag + outdir if not os.path.isdir(outdir): os.makedirs(outdir) if log: logfile = outdir + "limits.dat" logfile = open(logfile, "w") if dolevels: results=np.zeros([len(uvals), 9]) # holds mean and error info for each parameter prior_results=np.zeros([len(uvals), 9]) # does the same with alpha priors for i, vals in enumerate(uvals): # Convert vals? if vparams_dict is not None: # Check assert vparams_dict[names[i]]["n"] == len(vals) vals = np.linspace( vparams_dict[names[i]]["min"], vparams_dict[names[i]]["max"], len(vals) ) plt.figure(dpi=300) lw = 3 if compact: plt.gcf().set_figheight(3) plt.gcf().set_figwidth(3.5) # get raw ylimits # removes zeroes, could lead to strange behaviour in theory ymax = np.max(vectors[i]) temp = np.where((vectors[i] > 0.0) & (np.isfinite(vectors[i]))) # set to integers and get range ymax = math.ceil(ymax) ymin = 0.0 kind = None if len(vals[temp]) == 1: continue if len(vals[temp]) < 4: kind = "linear" else: kind = "cubic" # does the for alpha x, y = interpolate_points(vals[temp], vectors[i][temp], logspline, kind=kind) norm = np.sum(y) * (x[1] - x[0]) # integral y dx ~ sum y delta x norm = np.abs(norm) y /= norm vectors[i][temp] /= norm plt.plot(x, y, label="Uniform", color="blue", linewidth=lw, linestyle="-") if compact: plt.plot(vals[temp], vectors[i][temp], color="blue", linestyle="", marker="s", ms=5) else: plt.plot(vals[temp], vectors[i][temp], color="blue", linestyle="", marker="s") # weighted plotting if wvectors is not None: wx, wy = interpolate_points( vals[temp], wvectors[i][temp], logspline, kind=kind ) wnorm = np.sum(wy) * (x[1] - x[0]) wnorm = np.abs(wnorm) wvectors[i][temp] /= wnorm wy /= wnorm plt.plot(x, wy, label="Gauss", color="orange", linewidth=lw, linestyle="--") ax = plt.gca() ax.xaxis.set_ticks_position("both") # ax.Xaxis.set_ticks_position('both') if wvectors is not None: ymax = np.max([np.max(wy), np.max(y)]) else: ymax = np.max(y) # ymax=(np.ceil(ymax*5.))/5. if dolevels == True: # and i != 1: limvals = np.array([0.00135, 0.0228, 0.05, 0.15866]) labels = ["99.7%", "95%", "90%", "68%"] styles = ["--", ":", "-.", "-"] upper = np.max(vectors[i]) besty = np.max(y) imax = np.argmax(y) xmax = x[imax] results[i, 0] = xmax string = names[i] + " & {0:4.2f}".format(xmax) for iav, av in enumerate(limvals): # need to integrate from min to some point # gets cumulative distribution # sets intervals according to highest likelihood v0, v1, ik1, ik2 = extract_limits(x, y, av, method=1) string += " & $_{" string += "{0:4.2f}".format(v0 - xmax) string += "}^{+" string += "{0:4.2f}".format(v1 - xmax) string += "}$ " results[i, 2 * iav + 1] = v0 - xmax results[i, 2 * iav + 2] = v1 - xmax hl = 0.03 doff = (x[-1] - x[0]) / 100.0 ybar = (av + ymax) / 2.0 xbar = (v0 + v1) / 2.0 # need to separate the plots if wvectors is not None: if ik1 != 0: # if iav==3 and i==4: # ybar -= 0.8 plt.plot( [x[ik1], x[ik1]], [ymax, y[ik1]], color="blue", linestyle=styles[iav], alpha=0.5, ) if i == 1: t = plt.text( x[ik1] + doff * 0.5, (ymax) + (-3.6 + iav) * 0.2 * ymax, labels[iav], rotation=90, fontsize=12, ) t.set_bbox( dict( facecolor="white", alpha=0.7, edgecolor="white", pad=-1, ) ) if ik2 != wy.size - 1: plt.plot( [x[ik2], x[ik2]], [ymax, y[ik2]], color="blue", linestyle=styles[iav], alpha=0.5, ) if i != 1: t = plt.text( x[ik2] - doff * 3, (ymax) + (-3.6 + iav) * 0.2 * ymax, labels[iav], rotation=90, fontsize=12, ) t.set_bbox( dict( facecolor="white", alpha=0.7, edgecolor="white", pad=-1, ) ) else: plt.plot( [x[ik1], x[ik1]], [0, y[ik1]], color="red", linestyle=styles[iav], ) plt.plot( [x[ik2], x[ik2]], [0, y[ik2]], color="red", linestyle=styles[iav], ) Dx = x[-1] - x[0] if Dx < 0.0: plt.text( x[ik1], y[ik1] + ymax * 0.03, labels[iav], color="red", rotation=90, ) plt.text( x[ik2] + 0.02 * Dx, y[ik2] + ymax * 0.03, labels[iav], color="red", rotation=90, ) else: plt.text( x[ik1] - 0.02 * Dx, y[ik1] + ymax * 0.03, labels[iav], color="red", rotation=90, ) plt.text( x[ik2], y[ik2] + ymax * 0.03, labels[iav], color="red", rotation=90, ) # print("For parameter ",i," CI ",iav, " is ",x[ik1]," to ",x[ik2]) string += " & " # could just ignore the weightings if wvectors is not None: plt.plot( vals[temp], wvectors[i][temp], color="orange", linestyle="", marker="o" ) if dolevels == True: limvals = np.array([0.0015, 0.025, 0.05, 0.16]) labels = ["99.7%", "95%", "90%", "68%"] styles = ["--", ":", "-.", "-"] upper = np.max(wvectors[i]) besty = np.max(wy) imax = np.argmax(wy) xmax = x[imax] prior_results[i, 0] = xmax string += " {0:4.2f}".format(xmax) for iav, av in enumerate(limvals): # sets intervals according to highest likelihood v0, v1, ik1, ik2 = extract_limits(x, wy, av, method=1) string += " & $_{" string += "{0:4.2f}".format(v0 - xmax) string += "}^{+" string += "{0:4.2f}".format(v1 - xmax) string += "}$ " prior_results[i, 2 * iav + 1] = v0 - xmax prior_results[i, 2 * iav + 2] = v1 - xmax # version 2 hl = 0.03 doff = (x[-1] - x[0]) / 100.0 if i == 1: doff = 0.001 ybar = (av + ymin) / 2.0 xbar = (v0 + v1) / 2.0 if ik1 != 0: plt.plot( [x[ik1], x[ik1]], [ymin, wy[ik1]], color="orange", linestyle=styles[iav], ) if i == 1: t = plt.text( x[ik1] + doff * 0.5, wy[ik1] / 2.2, labels[iav], rotation=90, fontsize=12, ) t.set_bbox( dict( facecolor="white", alpha=0.7, edgecolor="white", pad=-1, ) ) if ik2 != wy.size - 1: plt.plot( [x[ik2], x[ik2]], [ymin, wy[ik2]], color="orange", linestyle=styles[iav], ) if i != 1: t = plt.text( x[ik2] - doff * 3, wy[ik2] / 2.2, labels[iav], rotation=90, fontsize=12, ) t.set_bbox( dict( facecolor="white", alpha=0.7, edgecolor="white", pad=-1, ) ) other_styles = [":", "--", "-."] # other_colors = ["orange", "green", "red"] # plot any other plots if others is not None: if others[i] is not None: for io, data in enumerate(others[i]): x, y = interpolate_points(vals, data, logspline, kind=kind) norm = np.sum(y) * (x[1] - x[0]) # integral y dx ~ sum y delta x norm = np.abs(norm) y /= norm imax = np.argmax(y) xmax = x[imax] if dolevels: logfile_others = outdir + f"limits_others_{others_labels[io]}.dat" logfile_others = open(logfile_others, "w") others_string = "" # print(f"Confidence intervals for alternative distribution {names[i]}, {others_labels[io]}") # limvals = np.array([0.00135, 0.0228, 0.05, 0.15866]) # labels = ["99.7%", "95%", "90%", "68%"] limvals = np.array([0.15866]) labels = ["68%"] for iav, av in enumerate(limvals): v0, v1, ik1, ik2 = extract_limits(x, y, av, method=1) others_string += "{} : ${}_{{{}}}^{{+{}}}$".format(labels[iav], format(xmax, '.2f'), format(v0 - xmax, '.2f'), format(v1 - xmax, '.2f')) logfile_others.write(string + "\n") logfile_others.close() # data_norm = np.sum(data) * (vals[1] - vals[0]) # data_norm = np.abs(norm) # plt.plot(vals, data/data_norm, color=other_colors[io % 3], marker="s") plt.plot( x, y, color="grey", linewidth=1, linestyle=other_styles[io % 3], label=others_labels[io % 3] ) plt.legend() if dolevels: string += "\\\\" if log: logfile.write(string + "\n") else: print(string) # plt.ylim(0.,ymax) plt.gca().set_ylim(bottom=0) if truth is not None: plt.plot( [truth[i], truth[i]], plt.gca().get_ylim(), color="black", linestyle=":" ) Dx = x[-1] - x[0] plt.text(truth[i] + 0.01 * Dx, ymax * 0.4, "simulated truth", rotation=90) if latexnames is not None: if units is not None: plt.xlabel(latexnames[i] + " " + units[i]) else: plt.xlabel(latexnames[i]) plt.ylabel("$p($" + latexnames[i] + "$)$") else: plt.xlabel(names[i]) plt.ylabel("p(" + names[i] + ")") if i == 4 and wvectors is not None: plt.legend(loc="upper left", title="Prior on $\\alpha$") plt.tight_layout() plt.savefig(os.path.join(outdir, prefix + names[i] + fig_exten), dpi=300) plt.close() if log: logfile.close() if dolevels: return results, prior_results else: return
[docs] def make_1d_plots_by_contribution( data, contributions, labels, prefix="", fig_exten=".png", log=False, splines=True, latexnames=None, units=None, linestyles=None, colors=None, ): """ contributions: list of vectors giving various likelihood terms args: splines (bool): draw cubic splines Labels: lists labels stating what these are latexnames: latex for x and p(X) units: appends units to x axis but not p(X) """ ######################### 1D plots, split by terms ################ all_uvals = [] all_vectors = [] all_wvectors = [] combined = data["pzDM"] + data["pDM"] # gets 1D Bayesian curves for each contribution for datatype in contributions: uvals, vectors, wvectors = get_bayesian_data(datatype) all_uvals.append(uvals) all_vectors.append(vectors) all_wvectors.append(wvectors) params = data["params"] # gets unique values for each axis param_vals = [] param_list = [ data["lEmax"], data["H0"], data["alpha"], data["gamma"], data["sfr_n"], data["lmean"], data["lsigma"], ] xlatexnames = [ "\\log_{10} E_{\\rm max} {\\rm [erg]}", "H_0 {\\rm [km\,s^{-1}\,Mpc^{-1}]}", "\\alpha", "\\gamma", "n_{\\rm sfr}", "\\mu_{\\rm host} {\\rm [pc\,cm^{-3}]}", "\\sigma_{\\rm host}", ] ylatexnames = [ "\\log_{10} E_{\\rm max}", "H_0", "\\alpha", "\\gamma", "n_{\\rm sfr}", "\\mu_{\\rm host}", "\\sigma_{\\rm host}", ] for col in param_list: unique = np.unique(col) param_vals.append(unique) # assigns different plotting styles to help distinguish curves if linestyles is None: linestyles = ["-", "--", "-.", ":", "-", "--", "-.", ":", "-", "--", "-.", ":"] if colors is None: colors = plt.rcParams["axes.prop_cycle"].by_key()["color"] for which in np.arange(len(param_list)): plt.figure() plt.xlabel("$" + xlatexnames[which] + "$") plt.ylabel("$p(" + ylatexnames[which] + ")$") xvals = param_vals[which] for idata, vectors in enumerate(all_vectors): if splines: xdata = np.linspace(xvals[0], xvals[-1], 100) f = scipy.interpolate.interp1d( xvals, np.log(vectors[which]), kind="cubic" ) ydata = np.exp(f(xdata)) plt.plot( xdata, ydata, label=labels[idata], linestyle=linestyles[idata], color=colors[idata], ) plt.scatter( xvals, vectors[which], color=plt.gca().lines[-1].get_color() ) else: ydata = vectors[which] xdata = xvals # print(labels[idata]," has values ",vector) plt.plot( xdata, ydata, label=labels[idata], linestyle=linestyles[idata], color=colors[idata], ) if log: plt.yscale("log") # plt.ylim(np.max(vector)*1e-3,np.max(vector)) #improve this plt.legend() plt.savefig(prefix + params[which] + fig_exten) plt.close()
[docs] def gen_vparams(indices: tuple, vparam_dict: dict): new_dict = {} for ss, key in enumerate(vparam_dict.keys()): if vparam_dict[key]["n"] <= 0: continue # vals = np.linspace( vparam_dict[key]["min"], vparam_dict[key]["max"], vparam_dict[key]["n"] ) # new_dict[key] = vals[indices[ss]] # Return return new_dict
[docs] def make_2d_plot(array, xlabel, ylabel, xvals, yvals, savename=None, norm=None): """ Makes 2D plot given an array of probabilities array: 2D grid of data to plot xlabel: string to label on x axis ylabel: string to label y axis xvals: np array, values of x data yvals: np array, values of y data savename (optional): string to save data under """ plt.figure(dpi=300) plt.xlabel(xlabel) plt.ylabel(ylabel) # calculates increment in values Dx = xvals[-1] - xvals[0] Dy = yvals[-1] - yvals[0] nx = xvals.size ny = yvals.size dx = np.abs(Dx / nx) dy = np.abs(Dy / ny) aspect = np.abs(dx / dy * nx / ny) # the dx/2 etc is so that parameter values line up with the centres of each pixel extent = [ np.min(xvals) - dx / 2.0, np.max(xvals) + dx / 2.0, np.min(yvals) - dy / 2.0, np.max(yvals) + dy / 2.0, ] if norm == 0: array = array / np.max(array, axis=0) clabel = "$p($" + ylabel + "|" + xlabel + "$)$" elif norm == 1: array = array.T / np.max(array, axis=1) array = array.T clabel = "$p($" + xlabel + "|" + ylabel + "$)$" else: clabel = "$p($" + xlabel + "," + ylabel + "$)$" plt.imshow(array.T, origin="lower", extent=extent, aspect=aspect) plt.xlabel(xlabel, fontsize=18) plt.ylabel(ylabel, fontsize=18) plt.xticks(rotation=90) cbar = plt.colorbar() cbar.set_label(clabel, fontsize=18) if savename is None: savename = xlabel + "_" + ylabel + ".pdf" plt.tight_layout() plt.savefig(savename) plt.close()