Source code for OCD_modeling.mcmc.history_analysis

### Analyze history of optimizations
##  Author: Sebastien Naze
#   QIMR 2023

import argparse
from datetime import datetime 
import itertools
import matplotlib
from matplotlib import pyplot as plt
import numpy as np
import os 
import pandas as pd
import pickle
import pingouin as pg
import pyabc
import scipy
import seaborn as sbn
import sklearn

import OCD_modeling
# import most relevant environment and project variable
from OCD_modeling.utils import cohen_d, get_working_dir, today

working_dir = get_working_dir()
proj_dir = os.path.join(working_dir, 'lab_lucac/sebastiN/projects/OCD_modeling')


[docs] def import_results(args): """ Read optimization results from DB """ histories = dict() for i,db_name in enumerate(args.histories): db_path = os.path.join(proj_dir, 'traces', db_name+'.db') name = args.history_names[i] try: histories[name] = pyabc.History("sqlite:///"+db_path) except: print(f'Optimization {db_name} does not exist, check name.') return histories
def load_empirical_FC(args): """ Loads functional connectivity from patients and controls (data, not simulation) """ with open(os.path.join(proj_dir, 'postprocessing', 'df_roi_corr_avg_2023.pkl'), 'rb') as f: df_data = pickle.load(f) return df_data def get_ax_inds(nrows, ncols, row_offset=2, col_offset=2): """ create list of axes to iterate from when plotting """ for row_ind, col_ind in itertools.product(np.arange(nrows), np.arange(ncols)): if ((col_ind<col_offset) & (row_ind<row_offset)): continue else: yield (row_ind, col_ind)
[docs] def plot_epsilons(histories, ax=None, args=None): """ Plot evolution of epsilons across generations Parameters ---------- histories: list Optimization pyABC history objects (e.g. controls and patients) ax: matplotlib.Axes (optional) Axis to draw the figure. args: argparse.Namespace (optional) Extra arguments (for saving, etc). """ n_hist = len(histories) if ax==None: fig = plt.figure(figsize=[5, 5]) ax = plt.subplot(1,1,1) for i,(name,history) in enumerate(histories.items()): if name=='controls': pyabc.visualization.plot_epsilons([history], ax=ax) plt.legend(list(histories.keys()))
[docs] def plot_weights(histories, gs=None, nrows=None, ncols=None, row_offset=None, col_offset=None, args=None): """ Plot evolution of weights across generations Parameters ---------- histories: list Optimization pyABC history objects (e.g. controls and patients) gs: matplotlib.GridSpec (optional) GridSpec object to draw the figure into. nrows, ncols: int number of rows/columns to make the grid. row_offset, col_offset: int offsets to account for when some (top left) entries of the grid should be left empty. args: argparse.Namespace (optional) Extra arguments (e.g. for saving, etc). """ n_hist = len(histories) n_gen = np.max([h.max_t for h in histories.values()]) if gs==None: fig = plt.figure(figsize=[15, 4]) nrows,ncols, row_offset, col_offset = 0,0,0,0 gs = plt.GridSpec(nrows=nrows, ncols=ncols) for i,(name,history) in enumerate(histories.items()): ax_inds = get_ax_inds(nrows, ncols, row_offset=row_offset, col_offset=col_offset) #for j in np.arange(history.max_t+1): for j in np.arange(1,10): x,y = next(ax_inds) ax = plt.subplot(gs[x,y]) df,w = history.get_distribution(t=j) plt.scatter(np.arange(len(w)), w, s=10, alpha=0.1) plt.title(f"t={j}", fontsize=10) plt.yscale('log') #if x==nrows-1: plt.xlabel('particle', fontsize=10) if (((y==col_offset) and (x<row_offset)) or (y==0)): plt.ylabel('$\omega$', fontsize=10) #plt.legend(args.history_names) ax.spines.top.set_visible(False) ax.spines.right.set_visible(False) #xtl = ax.get_xticklabels() #ax.set_xticklabels(xtl, fontsize=12) #ytl = ax.get_yticklabels() #ax.set_yticklabels(ytl, fontsize=12) plt.tight_layout()
[docs] def plot_param_distrib(histories, args): """ plot posterior distribution at end of optimization """ colors = {'controls':'lightskyblue', 'patients':'orange'} n_hist = len(histories) n_gen = np.max([h.max_t for h in histories.values()]) fig = plt.figure(figsize=[40, 20]) xs = dict() # xlims for i,(name,history) in enumerate(histories.items()): for j in range(history.max_t+1): df,w = history.get_distribution(t=j) n_cols = len(list(df.columns)) for k,col in enumerate(df.columns): plt.subplot(n_gen+1,n_cols,j*n_cols+k+1) plt.hist(df[col], alpha=0.2, color=colors[args.history_names[i]]) plt.xlabel(col) # use first generation (prior) to set xlims if j==0: xs[col] = [df[col].min(), df[col].max()] plt.xlim(xs[col]) plt.tight_layout()
def plot_kde_matrix(histories, args): for (name,history) in histories.items(): df,w = history.get_distribution(m=0) pyabc.visualization.plot_kde_matrix(df, w)
[docs] def compute_stats(histories, args=None): """ Computes the statistics of the optimization outcome, i.e. tests the posterior distributions (parameters) between controls and patients. Parameters --------- histories: list Controls and patient pyABC history objects. args: argparse.Namespace Extra options Returns ------- df_stats: pandas DataFrame Statistics. """ df_post_con,w_con = histories['controls'].get_distribution(t=args.gens[0]) df_post_pat,w_pat = histories['patients'].get_distribution(t=args.gens[1]) cols = df_post_con.columns mc = len(cols) stats = [] for col in cols: x,y = df_post_con[col], df_post_pat[col] stat_norm_con,p_norm_con = scipy.stats.normaltest(x) stat_norm_pat,p_norm_pat = scipy.stats.normaltest(y) t,p_t = scipy.stats.ttest_ind(x,y, permutations=10000) u,p_u = scipy.stats.mannwhitneyu(x,y) h,p_h = scipy.stats.kruskal(x,y) d = OCD_modeling.utils.cohen_d(x,y) ks_res = scipy.stats.ks_2samp(x,y) line = "{:15} OCD={:.2f} controls={:.2f} t={:8.2f} p={:.4f} p_bf={:.4f} normality(con/pat)={}/{} \n\ U={:8d} p={:.4f} p_bf={:.4f} H={:8.3f} p={:.4f} p_bf={:.4f} \ d={:.4f} ks={:.2f} p_ks={:.4f} p_bf={:.4f}".format( col,y.median(),x.median(),t,p_t,p_t*mc,p_norm_con>0.05,p_norm_pat>0.05,int(u),p_u, p_u*mc,h,p_h,p_h*mc,d,ks_res.statistic, ks_res.pvalue, ks_res.pvalue*mc) if args.plot_stats: print(line) df_line = {'param':col, 't':t, 'p_t':p_t, 'p_t_bf':p_t*len(cols), 'normality':(p_norm_con>0.05)&(p_norm_pat>0.05), \ 'u':u, 'p_u':p_u, 'p_u_bf':p_u*len(cols), \ 'h':h, 'p_h':p_h, 'p_h_bf':p_h*len(cols), 'd':d, 'ks':ks_res.statistic, 'p_ks':ks_res.pvalue} stats.append(df_line) df_stats = pd.DataFrame(stats) return df_stats
[docs] def compute_kdes(histories, n_pts = 100, args=None): """ Computes Kernel Density Estimates (KDEs) of the posterior distributions of parameters. Parameters ---------- histories: dict Nested dictionnary of SQL alchemy history objects. n_pts: int Number of points used to estimate the probability density functions (PDFs). args: argparse.Namespace Extra options. Returns ------- kdes: dict Nested dictiorany of KDEs and associated PDFs. cols: list List of parameters for which the KDEs were estimated. """ kdes = dict() cols = [] for i,(cohort,history) in enumerate(histories.items()): kdes[cohort] = dict() df,w = history.get_distribution(t=args.gens[i]) for col in df.columns: cols.append(col) vmin, vmax = df[col].min(), df[col].max() bw = (vmax-vmin)/10 kde = sklearn.neighbors.KernelDensity(kernel='gaussian', bandwidth=bw).fit(np.array(df[col]).reshape(-1,1)) X = np.linspace(vmin-0.5*bw, vmax+0.5*bw, 100).reshape(-1,1) pdf = kde.score_samples(X) kdes[cohort][col] = {'kde':kde, 'pdf':pdf, 'X':X, 'vals': df[col]} cols = np.unique(cols) if args.save_kdes: fname = os.path.join(proj_dir, 'postprocessing', 'kdes_'+'_'.join(args.histories)+today()+'.pkl') with open(fname, 'wb') as f: pickle.dump(kdes, f) return kdes, cols
def custom_plot_epsilons(histories, ax=None, n_gens=11): colors = ['lightblue', 'orange'] labels = ['controls', 'OCD']#list(histories.keys()) if ax==None: fig = plt.figure() ax = plt.subplot(1,1,1) for i,(name,history) in enumerate(histories.items()): # extract epsilons # note: first entry is from calibration and thus translates to inf, # thus must be discarded eps = np.array(history.get_all_populations()['epsilon'][:n_gens]) # plot ax.plot(eps, 'x--', label=labels[i], color=colors[i], alpha=1) plt.xticks(np.arange(0,n_gens,2), labels=np.arange(0,n_gens,2)) plt.yscale('log') plt.ylabel('$\epsilon$', fontsize=10) plt.xlabel('$t$', fontsize=10) ax.spines.top.set_visible(False) ax.spines.right.set_visible(False) plt.legend(labels, fontsize=10) #xtl = ax.get_xticklabels() #ax.set_xticklabels(xtl, fontsize=12) #ax.set_yticklabels(ax.get_yticklabels(), fontsize=12) # ------------------------------------- # Observed vs simulated stats and plots # ------------------------------------- def stats_obs_vs_sim(df_data, df_base): """ T-tests within and across cohorts, observed and simulated traces """ # prep sim dataframe df_sim = df_base.melt(id_vars=['subj', 'base_cohort'], value_vars=['Acc_OFC', 'Acc_PFC', 'Acc_dPut', 'OFC_PFC', 'dPut_OFC', 'dPut_PFC'], var_name='pathway', value_name='corr') df_sim = df_sim.rename(columns={'base_cohort':'cohort'}) df_data['type'] = 'observed' df_sim['type'] = 'simulated' df_fc = pd.concat([df_data, df_sim], join='inner', ignore_index=True) pathways = ['Acc_OFC', 'Acc_PFC', 'Acc_dPut', 'OFC_PFC', 'dPut_OFC', 'dPut_PFC'] stats = dict((pathway,dict()) for pathway in pathways) print("================") print("TWO-FACTOR ANOVA") print("================") for pathway in df_fc.pathway.unique(): print(pathway) aov = pg.anova(data=df_fc[df_fc.pathway==pathway], between=['cohort', 'type'], dv='corr') pg.print_table(aov) stats[pathway]['aov'] = aov print("=============") print("WELCH T-TESTS") print("=============") for pathway in pathways: print(pathway) df_tmp = df_fc[df_fc.pathway==pathway] cc = pg.ttest(df_tmp[(df_tmp.cohort=='controls') & (df_tmp.type=='observed')]['corr'], df_tmp[(df_tmp.cohort=='controls') & (df_tmp.type=='simulated')]['corr']) print("con-con -- obs-sim T={:.2f} p={:.4f} d={:.2f}".format(cc['T'][0], cc['p-val'][0], cc['cohen-d'][0])) pp = pg.ttest(df_tmp[(df_tmp.cohort=='patients') & (df_tmp.type=='observed')]['corr'], df_tmp[(df_tmp.cohort=='patients') & (df_tmp.type=='simulated')]['corr']) print("pat-pat -- obs-sim T={:.2f} p={:.4f} d={:.2f}".format(pp['T'][0], pp['p-val'][0], pp['cohen-d'][0])) cpo = pg.ttest(df_tmp[(df_tmp.cohort=='controls') & (df_tmp.type=='observed')]['corr'], df_tmp[(df_tmp.cohort=='patients') & (df_tmp.type=='observed')]['corr']) print("con-pat -- obs-obs T={:.2f} p={:.4f} d={:.2f}".format(cpo['T'][0], cpo['p-val'][0], cpo['cohen-d'][0])) cps = pg.ttest(df_tmp[(df_tmp.cohort=='controls') & (df_tmp.type=='simulated')]['corr'], df_tmp[(df_tmp.cohort=='patients') & (df_tmp.type=='simulated')]['corr']) print("con-pat -- sim-sim T={:.2f} p={:.4f} d={:.2f}".format(cps['T'][0], cps['p-val'][0], cps['cohen-d'][0])) os = pg.ttest(df_tmp[(df_tmp.type=='observed')]['corr'], df_tmp[(df_tmp.type=='simulated')]['corr']) print("obs-sim T={:.2f} p={:.4f} d={:.2f}".format(cps['T'][0], cps['p-val'][0], cps['cohen-d'][0])) print("\n") stats[pathway]['cc'], stats[pathway]['pp'], stats[pathway]['cpo'], stats[pathway]['cps'], stats[pathway]['os'] = cc, pp, cpo, cps, os return stats
[docs] def plot_fc_sim_vs_data(df_data, df_base, stats, axes=None, args=None): """ Plot functional connectivity (Pearson correlation) across the frontostriatal regions of interests in empirical and simulated data. Parameters ---------- df_data: pandas.DataFrame Empirical data extracted from fMRI in OCD subjects and healthy controls. df_base: pandas.DataFrame Simulated data using parameters infered from posterior distributions (either only OCD parameters or only controls, no permutation to model virtual intervention). stats: dict (deprecated) Statistics within and between cohorts for both empirical and simulated data (deprecated since now stats are directly computed within plotting function). axes: list of matplotlib.Axes (optional) List of axes to plot data (if embedded in other figure, otherwise create new figure). args: argparse.Namespace (optional) Extra arguments (e.g. for saving, etc). """ palette = {'controls': 'lightblue', 'patients': 'orange'} pathways = ['Acc_OFC', 'Acc_PFC', 'Acc_dPut', 'OFC_PFC', 'dPut_OFC', 'dPut_PFC'] pathway_names = ['NAcc-OFC', 'NAcc-LPFC', 'NAcc-dPut', 'OFC-LPFC', 'dPut-OFC', 'dPut-LPFC'] df_tmp = df_base.iloc[:384].melt(id_vars=['subj', 'base_cohort'], value_vars=pathways, var_name='pathway', value_name='corr') df_tmp = df_tmp.rename(columns={'base_cohort':'cohort'}) # select n random simulations for each cohort n = 50 cons = df_tmp[df_tmp.cohort=='controls'].subj.unique() cons = cons[np.random.permutation(len(cons))][:n] pats = df_tmp[df_tmp.cohort=='patients'].subj.unique() pats = pats[np.random.permutation(len(pats))][:n] subjs = np.concatenate([cons, pats]) df_tmp = df_tmp[df_tmp.subj.isin(subjs)] if axes==None: plt.figure(figsize=[10,3]) ax_data = plt.subplot(1,2,1) ax_sim = plt.subplot(1,2,2) plt.rcParams.update({'figure.autolayout':True}) plt.tight_layout() else: ax_data = axes['data'] ax_sim = axes['sim'] def plot_stats_stars(df, ax, y_level=0.5): """ Add stars on top of plots for stats significance """ for i,pathway in enumerate(pathways): ttest = scipy.stats.ttest_ind(np.array(df[(df.pathway==pathway) & (df.cohort=='controls')]['corr']), np.array(df[(df.pathway==pathway) & (df.cohort=='patients')]['corr'])) if ttest.pvalue<(0.05/len(pathways)): ax.plot(i,y_level, '*', color='black') # DATA # ---- sbn.swarmplot(data=df_data, x='pathway', y='corr', hue='cohort', order=pathways, ax=ax_data, marker='o', size=2.8, dodge=True, palette=palette, hue_order=['controls', 'patients']) #plt.xticks(rotation=30, ha='right') for label in ax_data.get_xticklabels(): label.set_rotation(30) label.set_ha('center') label.set_va('top') label.set_rotation_mode('default') #'anchor' ax_data.legend().set_visible(False) ax_data.spines.top.set_visible(False) ax_data.spines.right.set_visible(False) plot_stats_stars(df_data, ax=ax_data, y_level=0.45) ax_data.set_title('Observed') ax_data.set_ylim([-0.35, 0.5]) ax_data.set_ylabel('R', fontsize=12) #ax_data.set_xlabel('Pathway', fontsize=12) ax_data.set_xticklabels(pathway_names) # SIMULATION # ---------- #sbn.swarmplot(data=df_tmp, x='pathway', y='corr', hue='base_cohort', # ax=ax_sim, size=1.5, dodge=True, palette=palette, hue_order=['controls', 'patients']) sbn.swarmplot(data=df_tmp, x='pathway', y='corr', hue='cohort', ax=ax_sim, size=2.8, dodge=True, palette=palette, hue_order=['controls', 'patients']) #plt.xticks(rotation=30, ha='right', rotation_mode='anchor') for label in ax_sim.get_xticklabels(): label.set_rotation(30) label.set_ha('center') label.set_va('top') label.set_rotation_mode('default') #'anchor' ax_sim.legend().set_visible(False) ax_sim.spines.top.set_visible(False) ax_sim.spines.right.set_visible(False) ax_sim.spines.left.set_visible(False) ax_sim.set_yticks([]) plot_stats_stars(df_tmp, ax=ax_sim, y_level=0.45) ax_sim.set_title('Simulated') ax_sim.set_ylim([-0.35, 0.5]) ax_sim.set_ylabel('') ax_sim.set_yticklabels([]) #ax_sim.set_xlabel('Pathway', fontsize=12) ax_sim.set_xticklabels(pathway_names) if args.save_figs: today = datetime.now().strftime('_%Y%m%d') fname = os.path.join(proj_dir, 'img', 'fc_sim_vs_data'+today+'.svg') plt.savefig(fname)
[docs] def plot_kdes(kdes, cols, df_stats, df_real=[], df_pred=[], plot_args={'nrows':4, 'ncols':5, 'row_offset':2, 'col_offset':3, 'figsize':[10,7], 'show_stars':True, 'hist_alpha':0.3, 'kde_alpha':1}, args=None): """ Plot Kernel Density Estimates of posteriors (controls vs OCD) Parameters ---------- kdes: dict Kernel Density Estimates of parameters cols: list Model parameters df_stats: pandas.DataFrame Stastics for each parameter (healthy controls vs OCD patients) df_real: pandas.DataFrame (Optional) Synthetic data (observed) df_pred: pandas.DataFrame (Optional) Synthetic data (predicted) plot_args: dict Default options for plotting. nrows, ncols: number of rows and columns of the GridSpec object (grid of axes). row_offset, col_offset: shifts in rows and colums to let space for another plot. figsize: figure size. show_stars: show stars for statistical significant between controls and OCD. hist_alpha, kde_alpha: opacity of the histograms and kernel density estimates. args: argparse.Namespace (Optional) Extra arguments """ plt.rcParams.update({'mathtext.default': 'regular', 'font.size':10}) plt.rcParams.update({'text.usetex': False}) plt.rcParams.update({'figure.constrained_layout.use': False}) nrows, ncols = plot_args['nrows'], plot_args['ncols'] row_offset, col_offset = plot_args['row_offset'], plot_args['col_offset'] fig = plt.figure(figsize=plot_args['figsize']) gs = plt.GridSpec(nrows=nrows, ncols=ncols, hspace=0.7, wspace=0.25) # EPSILONS #ax = fig.add_subplot(gs[0:row_offset, 0:col_offset]) #custom_plot_epsilons(histories, ax=ax) # subject vertical lines colors: subjs_colors = ['yellowgreen', 'plum'] # KDES ax_inds = get_ax_inds(nrows, ncols, row_offset, col_offset) for _,col in enumerate(cols): #ax = plt.subplot(3,5,i+2) i,j = next(ax_inds) ax = fig.add_subplot(gs[i,j]) vals = np.concatenate([kdes['controls'][col]['vals'], kdes['patients'][col]['vals']]) bins = np.linspace(vals.min(), vals.max(), 13) plt.hist(kdes['controls'][col]['vals'], color='lightblue', bins=bins, alpha=plot_args['hist_alpha'], density=True, log=False, lw=0.3, ec='gray') plt.hist(kdes['patients'][col]['vals'], color='orange', bins=bins, alpha=plot_args['hist_alpha'], density=True, log=False, lw=0.3, ec='gray') plt.plot(kdes['controls'][col]['X'], np.exp(kdes['controls'][col]['pdf']), 'lightblue', lw=1, alpha=plot_args['kde_alpha']) plt.plot(kdes['patients'][col]['X'], np.exp(kdes['patients'][col]['pdf']), 'orange', lw=1, alpha=plot_args['kde_alpha']) xlabl = OCD_modeling.mcmc.inference_analysis.format_labels([matplotlib.text.Text(text=col)], args.N)[0] plt.xlabel(xlabl, fontsize=12) if j==0: plt.ylabel("$\\rho$", fontsize=12) else: plt.ylabel('') #ax.xaxis.set_major_formatter(matplotlib.ticker.FormatStrFormatter('%.1f')) mn,mx = ax.get_ylim() if mn<0.01: ax.set_ylim(bottom=0.01) ax.spines.top.set_visible(False) ax.spines.right.set_visible(False) ttl='' if plot_args['show_stars']: if df_stats[df_stats.param==col]['p_u_bf'].iloc[0]<0.05: #if df_stats[df_stats.param==col]['p_t_bf'].iloc[0]<0.05: ttl+='*' if np.abs(df_stats[df_stats.param==col]['d'].iloc[0])>0.2: ttl+='*' if np.abs(df_stats[df_stats.param==col]['d'].iloc[0])>0.5: ttl+='*' plt.title(ttl, fontsize=14) if len(df_real)!=0: for i,subj_df in enumerate(df_real): plt.vlines(subj_df[col], ymin=mn, ymax=mx, color=subjs_colors[i], lw=2) if args.annotate_restore: ctl_mean = kdes['controls'][col]['vals'].mean() arrow_height = mx - (i+1)*5*(mx-mn)/100 # 5% below top and by step of 5% for each subjs plt.vlines(ctl_mean, ymin=mn, ymax=mx, color='black', linewidth=3, alpha=0.5) arrow = matplotlib.patches.FancyArrowPatch((float(subj_df[col]),arrow_height), (ctl_mean,arrow_height), arrowstyle='->,head_width=0.1,head_length=0.1', mutation_scale=20, edgecolor=subjs_colors[i], linewidth=2) ax.add_patch(arrow) if len(df_pred)!=0: plt.vlines(df_pred[col], ymin=mn, ymax=mx, color='red') # FC #axes = {'data': fig.add_subplot(gs[3:, 0:3]), # 'sim': fig.add_subplot(gs[3:, 3:])} #plot_fc_sim_vs_data() if args.annotate_restore: ax = fig.add_subplot(gs[0,0]) for i,subj_df in enumerate(df_real): ax.text(0.5,-i*0.5, subj_df.iloc[0]['subj'], backgroundcolor=matplotlib.colors.colorConverter.to_rgba(subjs_colors[i], 0.3), horizontalalignment='center') ax.text(0.5,-i*0.5-0.25, 'vs', horizontalalignment='center') ax.text(0.5,-len(df_real)*0.5, 'Healthy Average', fontweight='bold', horizontalalignment='center') ax.set_ylim([-1,0.2]) ax.set_xticks([]) ax.set_yticks([]) ax.spines['bottom'].set_visible(False) ax.spines['left'].set_visible(False) ax.spines['top'].set_visible(False) ax.spines['right'].set_visible(False) plt.tight_layout(pad=0) if args.save_figs: today = datetime.now().strftime('_%Y%m%d') fname = os.path.join(proj_dir, 'img', 'kdes'+today+'.svg') plt.savefig(fname) plt.show() return fig
# ------------------------------------------ # # Simulate at posterior distributions' modes # # ------------------------------------------ # def get_kde_peaks(kdes): """ Extract peaks of posterior distribution from kernel density estimates """ peaks = dict() for cohort in kdes.keys(): peaks[cohort]= dict() for param in kdes[cohort].keys(): i_max = np.argmax(kdes[cohort][param]['pdf']) X_max = kdes[cohort][param]['X'][i_max] peaks[cohort][param] = X_max return peaks def simulate_posterior_modes(peaks, args): """ Run simulations for posterior distribution's modes """ sim_outputs = dict() for cohort, params in peaks.items(): sim_args = argparse.Namespace() sim_args.n_sims = args.n_sims sim_args.n_jobs= args.n_jobs sim_args.t_tot = 15000 sim_args.t_range = [5000, 15000] sim_args.model_pars, sim_args.sim_pars, sim_args.control_pars, sim_args.bold_pars = OCD_modeling.mcmc.unpack_params(params) sim_objs = OCD_modeling.hpc.launch_simulations(sim_args) sim_outputs[cohort] = sim_objs return sim_outputs def plot_epsilons_weights(histories, args): """ Combine the epsilons tolerances and the optimization weights """ fig = plt.figure(figsize=[12,4]) nrows, ncols = 2,5 row_offset, col_offset = 1,1 gs = plt.GridSpec(nrows=nrows, ncols=ncols) ax = fig.add_subplot(gs[:row_offset,:col_offset]) custom_plot_epsilons(histories, ax=ax, n_gens=11) plot_weights(histories, gs=gs, nrows=nrows, ncols=ncols, row_offset=row_offset, col_offset=col_offset, args=args) if args.save_figs: fname = os.path.join(proj_dir, 'img', 'epsilons_weights_log'+today()+'.svg') plt.savefig(fname) fname = os.path.join(proj_dir, 'img', 'epsilons_weights_log'+today()+'.png') plt.savefig(fname) fname = os.path.join(proj_dir, 'img', 'epsilons_weights_log'+today()+'.pdf') plt.savefig(fname) plt.show()
[docs] def get_history_parser(): " Script arguments when ran as main " parser = argparse.ArgumentParser() parser.add_argument('--n_jobs', type=int, default=10, action='store', help="number of parallel processes launched") parser.add_argument('--n_sims', type=int, default=50, action='store', help="number of simulations ran with the same parameters (e.g. to get distribution that can be campared to clinical observations)") parser.add_argument('--gens', nargs='+', default=[], action='store', help="generation of the optimization (list, must be same length as histories)") parser.add_argument('--histories', nargs='+', default=None, action='store', help="optimizations to analyse and compare") parser.add_argument('--history_names', type=list, default=['controls', 'patients'], action='store', help="names given to each otpimization loaded") parser.add_argument('--compute_kdes', default=False, action='store_true', help='compute KDEs of parameter estimations') parser.add_argument('--save_kdes', default=False, action='store_true', help='save KDEs') parser.add_argument('--save_kdes_suffix', type=str, default=today(), help="identifier of the KDE in the saving folder") parser.add_argument('--save_figs', default=False, action='store_true', help='save figures') parser.add_argument('--save_outputs', default=False, action='store_true', help='save outputs') parser.add_argument('--plot_figs', default=False, action='store_true', help='plot figures') parser.add_argument('--plot_epsilons', default=False, action='store_true', help='plot optimization errors') parser.add_argument('--plot_weights', default=False, action='store_true', help='plot optimization weights for each gen') parser.add_argument('--plot_epsilons_weights', default=False, action='store_true', help='plot optimization errors') parser.add_argument('--plot_param_distrib', default=False, action='store_true', help='plot parameter distribution for each gen') parser.add_argument('--plot_kde_matrix', default=False, action='store_true', help='plot KDE matrix of optimization') parser.add_argument('--plot_stats', default=False, action='store_true', help='show statisics') parser.add_argument('--plot_kdes', default=False, action='store_true', help='plot KDEs of optimzed params') parser.add_argument('--plot_fc_sim_vs_data', default=False, action='store_true', help='plot FC of inference from optimization vs real data') parser.add_argument('--simulate_posterior_modes', default=False, action='store_true', help='run simulations with at posterior distributions modes ') parser.add_argument('--N', type=int, default=4, action='store', help="number of node in the soumilations (default N=4, i.e. no thalamus, other option is N=6 (with thalamus)") parser.add_argument('--annotate_restore', default=False, action='store_true', help='Annotate the plots with arrow between subjects params and the avg controls') return parser
if __name__=='__main__': args = get_history_parser().parse_args() histories = import_results(args) if args.plot_epsilons: #plot_epsilons(histories, args) custom_plot_epsilons(histories, n_gens=np.array(args.gens, dtype=int).max()) if args.plot_weights: plot_weights(histories, args) if args.plot_epsilons_weights: plot_epsilons_weights(histories, args) if args.plot_param_distrib: plot_param_distrib(histories, args) if args.plot_kde_matrix: plot_kde_matrix(histories, args) # posterior distributions stats df_stats = compute_stats(histories, args) if args.compute_kdes: kdes,cols = compute_kdes(histories, args=args) if args.plot_kdes: plot_args = {4:{'nrows':4, 'ncols':5, 'row_offset':2, 'col_offset':3, 'figsize':[10,7], 'show_stars':True, 'hist_alpha':0.3, 'kde_alpha':1}, 6:{'nrows':6, 'ncols':5, 'row_offset':0, 'col_offset':0, 'figsize':[10,12], 'show_stars':True, 'hist_alpha':0.3, 'kde_alpha':1}} plot_kdes(kdes, cols, df_stats, plot_args=plot_args[args.N], args=args) if args.plot_fc_sim_vs_data: # load simulated and observed data df_base = OCD_modeling.mcmc.get_df_base(args) #df_base = OCD_modeling.mcmc.fix_df_base(df_base) df_data = OCD_modeling.mcmc.load_df_data(args) # observed vs simulated stats stats = stats_obs_vs_sim(df_data, df_base) plot_fc_sim_vs_data(df_data, df_base, stats, args=args) if args.simulate_posterior_modes: kde_peaks = get_kde_peaks(kdes) sim_outputs = simulate_posterior_modes(kde_peaks, args) if args.save_outputs: with open(os.path.join(proj_dir, 'postprocessing', 'sims_posterior_modes'+today()+'.pkl'), 'wb') as f: pickle.dump(sim_outputs, f)