Source code for OCD_modeling.mcmc.history_analysis

### Analyze history of optimizations
##  Author: Sebastien Naze
#   QIMR 2023

import argparse
from datetime import datetime 
import itertools
import matplotlib
from matplotlib import pyplot as plt
import numpy as np
import os 
import pandas as pd
import pickle
import pingouin as pg
import pyabc
import scipy
import seaborn as sbn
import sklearn

import OCD_modeling
# import most relevant environment and project variable
from OCD_modeling.utils import cohen_d, get_working_dir, today

working_dir = get_working_dir()
proj_dir = os.path.join(working_dir, 'lab_lucac/sebastiN/projects/OCD_modeling')



[docs]
def import_results(args):
    """ Read optimization results from DB """ 
    histories = dict()
    for i,db_name in enumerate(args.histories):
        db_path = os.path.join(proj_dir, 'traces', db_name+'.db')
        name = args.history_names[i]
        try:
            histories[name] = pyabc.History("sqlite:///"+db_path)
        except:
            print(f'Optimization {db_name} does not exist, check name.')
    return histories

    

def load_empirical_FC(args):
    """ Loads functional connectivity from patients and controls (data, not simulation) """
    with open(os.path.join(proj_dir, 'postprocessing', 'df_roi_corr_avg_2023.pkl'), 'rb') as f:
        df_data = pickle.load(f)
    return df_data       


def get_ax_inds(nrows, ncols, row_offset=2, col_offset=2):
    """ create list of axes to iterate from when plotting """
    for row_ind, col_ind in itertools.product(np.arange(nrows), np.arange(ncols)):
        if ((col_ind<col_offset) & (row_ind<row_offset)):
            continue
        else:
            yield (row_ind, col_ind)



[docs]
def plot_epsilons(histories, ax=None, args=None):
    """ Plot evolution of epsilons across generations 
    
    Parameters
    ----------
        histories: list
            Optimization pyABC history objects (e.g. controls and patients)
        ax: matplotlib.Axes
            (optional) Axis to draw the figure.
        args: argparse.Namespace
            (optional) Extra arguments (for saving, etc).

    """ 
    n_hist = len(histories)
    if ax==None:
        fig = plt.figure(figsize=[5, 5])
        ax = plt.subplot(1,1,1)

    for i,(name,history) in enumerate(histories.items()):
        if name=='controls':
            pyabc.visualization.plot_epsilons([history], ax=ax)
        plt.legend(list(histories.keys()))




[docs]
def plot_weights(histories, gs=None, nrows=None, ncols=None, row_offset=None, col_offset=None, args=None):
    """ Plot evolution of weights across generations
    
    Parameters
    ----------
        histories: list
            Optimization pyABC history objects (e.g. controls and patients)
        gs: matplotlib.GridSpec
            (optional) GridSpec object to draw the figure into.
        nrows, ncols: int
            number of rows/columns to make the grid.
        row_offset, col_offset: int
            offsets to account for when some (top left) entries of the grid should be left empty.
        args: argparse.Namespace
            (optional) Extra arguments (e.g. for saving, etc).

    """ 
    n_hist = len(histories)
    n_gen = np.max([h.max_t for h in histories.values()])
    
    if gs==None:
        fig = plt.figure(figsize=[15, 4])
        nrows,ncols, row_offset, col_offset = 0,0,0,0
        gs = plt.GridSpec(nrows=nrows, ncols=ncols)
        
    
    for i,(name,history) in enumerate(histories.items()):
        ax_inds = get_ax_inds(nrows, ncols, row_offset=row_offset, col_offset=col_offset)
        #for j in np.arange(history.max_t+1):
        for j in np.arange(1,10):
            x,y = next(ax_inds)
            ax = plt.subplot(gs[x,y])
            df,w  = history.get_distribution(t=j)
            
            plt.scatter(np.arange(len(w)), w, s=10, alpha=0.1)
            plt.title(f"t={j}", fontsize=10)
            plt.yscale('log')
            #if x==nrows-1:
            plt.xlabel('particle', fontsize=10)
            if (((y==col_offset) and (x<row_offset)) or (y==0)):
                plt.ylabel('$\omega$', fontsize=10)
            #plt.legend(args.history_names)
            ax.spines.top.set_visible(False)
            ax.spines.right.set_visible(False)
            #xtl = ax.get_xticklabels()
            #ax.set_xticklabels(xtl, fontsize=12)
            #ytl = ax.get_yticklabels()
            #ax.set_yticklabels(ytl, fontsize=12)
    plt.tight_layout()




[docs]
def plot_param_distrib(histories, args):
    """ plot posterior distribution at end of optimization """
    colors = {'controls':'lightskyblue', 'patients':'orange'}
    n_hist = len(histories)
    n_gen = np.max([h.max_t for h in histories.values()])
    fig = plt.figure(figsize=[40, 20])
    xs = dict() # xlims 
    for i,(name,history) in enumerate(histories.items()):
        for j in range(history.max_t+1):
            df,w  = history.get_distribution(t=j)
            n_cols = len(list(df.columns))
            for k,col in enumerate(df.columns):
                plt.subplot(n_gen+1,n_cols,j*n_cols+k+1)
                plt.hist(df[col], alpha=0.2, color=colors[args.history_names[i]])
                plt.xlabel(col)

                # use first generation (prior) to set xlims
                if j==0:
                    xs[col] = [df[col].min(), df[col].max()]
                plt.xlim(xs[col])
    plt.tight_layout()



def plot_kde_matrix(histories, args):
    for (name,history) in histories.items():
        df,w = history.get_distribution(m=0)
        pyabc.visualization.plot_kde_matrix(df, w)



[docs]
def compute_stats(histories, args=None):
    """ Computes the statistics of the optimization outcome, i.e. tests the posterior distributions (parameters) 
    between controls and patients.

    Parameters
    ---------
        histories: list
            Controls and patient pyABC history objects. 
        args: argparse.Namespace
            Extra options

    Returns
    -------
        df_stats: pandas DataFrame 
            Statistics.  

    """
    df_post_con,w_con = histories['controls'].get_distribution(t=args.gens[0])
    df_post_pat,w_pat = histories['patients'].get_distribution(t=args.gens[1])
    cols = df_post_con.columns
    mc = len(cols)
    stats = []
    for col in cols:
        x,y = df_post_con[col], df_post_pat[col]
        
        stat_norm_con,p_norm_con = scipy.stats.normaltest(x)
        stat_norm_pat,p_norm_pat = scipy.stats.normaltest(y)

        t,p_t = scipy.stats.ttest_ind(x,y, permutations=10000)
    
        u,p_u = scipy.stats.mannwhitneyu(x,y)
        
        h,p_h = scipy.stats.kruskal(x,y)

        d = OCD_modeling.utils.cohen_d(x,y)

        ks_res = scipy.stats.ks_2samp(x,y)
        
        line = "{:15}    OCD={:.2f}     controls={:.2f}     t={:8.2f}    p={:.4f}    p_bf={:.4f}    normality(con/pat)={}/{}    \n\
                U={:8d}    p={:.4f}    p_bf={:.4f}    H={:8.3f}    p={:.4f}    p_bf={:.4f}    \
                d={:.4f}    ks={:.2f}    p_ks={:.4f}    p_bf={:.4f}".format(
            col,y.median(),x.median(),t,p_t,p_t*mc,p_norm_con>0.05,p_norm_pat>0.05,int(u),p_u, p_u*mc,h,p_h,p_h*mc,d,ks_res.statistic, ks_res.pvalue, ks_res.pvalue*mc)
        if args.plot_stats:
            print(line)
        
        df_line = {'param':col, 't':t, 'p_t':p_t, 'p_t_bf':p_t*len(cols), 'normality':(p_norm_con>0.05)&(p_norm_pat>0.05), \
                'u':u, 'p_u':p_u, 'p_u_bf':p_u*len(cols), \
                'h':h, 'p_h':p_h, 'p_h_bf':p_h*len(cols), 'd':d, 'ks':ks_res.statistic, 'p_ks':ks_res.pvalue}
        stats.append(df_line)
    df_stats = pd.DataFrame(stats)
    return df_stats




[docs]
def compute_kdes(histories, n_pts = 100, args=None):
    """ 
    Computes Kernel Density Estimates (KDEs) of the posterior distributions of parameters.
    
    Parameters
    ----------
        histories: dict
            Nested dictionnary of SQL alchemy history objects.
        n_pts: int 
            Number of points used to estimate the probability density functions (PDFs).
        args: argparse.Namespace
            Extra options.

    Returns
    -------
        kdes: dict 
            Nested dictiorany of KDEs and associated PDFs.
        cols: list 
            List of parameters for which the KDEs were estimated.
    
    """

    kdes = dict()

    cols = []
    for i,(cohort,history) in enumerate(histories.items()):
        kdes[cohort] = dict()
        df,w = history.get_distribution(t=args.gens[i])
        for col in df.columns:
            cols.append(col)
            vmin, vmax = df[col].min(), df[col].max()
            bw = (vmax-vmin)/10
            kde = sklearn.neighbors.KernelDensity(kernel='gaussian', bandwidth=bw).fit(np.array(df[col]).reshape(-1,1))
            X = np.linspace(vmin-0.5*bw, vmax+0.5*bw, 100).reshape(-1,1)
            pdf = kde.score_samples(X)
            kdes[cohort][col] = {'kde':kde, 'pdf':pdf, 'X':X, 'vals': df[col]}
    cols = np.unique(cols)

    if args.save_kdes:
        fname = os.path.join(proj_dir, 'postprocessing', 'kdes_'+'_'.join(args.histories)+today()+'.pkl')
        with open(fname, 'wb') as f:
            pickle.dump(kdes, f)

    return kdes, cols


def custom_plot_epsilons(histories, ax=None, n_gens=11):
    colors = ['lightblue', 'orange']
    labels = ['controls', 'OCD']#list(histories.keys())

    if ax==None:
        fig = plt.figure()
        ax = plt.subplot(1,1,1)

    for i,(name,history) in enumerate(histories.items()):
        # extract epsilons
        # note: first entry is from calibration and thus translates to inf,
        # thus must be discarded
        eps = np.array(history.get_all_populations()['epsilon'][:n_gens])

        # plot
        ax.plot(eps, 'x--', label=labels[i], color=colors[i], alpha=1)
        
    plt.xticks(np.arange(0,n_gens,2), labels=np.arange(0,n_gens,2))
    plt.yscale('log')
    plt.ylabel('$\epsilon$', fontsize=10)
    plt.xlabel('$t$', fontsize=10)
    ax.spines.top.set_visible(False)
    ax.spines.right.set_visible(False)
    plt.legend(labels, fontsize=10)
    #xtl = ax.get_xticklabels()
    #ax.set_xticklabels(xtl, fontsize=12)
    #ax.set_yticklabels(ax.get_yticklabels(), fontsize=12)

# -------------------------------------
# Observed vs simulated stats and plots
# -------------------------------------

def stats_obs_vs_sim(df_data, df_base):
    """ T-tests within and across cohorts, observed and simulated traces  """ 
    # prep sim dataframe
    df_sim = df_base.melt(id_vars=['subj', 'base_cohort'], value_vars=['Acc_OFC', 'Acc_PFC', 'Acc_dPut', 'OFC_PFC', 'dPut_OFC', 'dPut_PFC'], var_name='pathway', value_name='corr')
    df_sim = df_sim.rename(columns={'base_cohort':'cohort'})

    df_data['type'] = 'observed'
    df_sim['type'] = 'simulated'
    df_fc = pd.concat([df_data, df_sim], join='inner', ignore_index=True)

    pathways = ['Acc_OFC', 'Acc_PFC', 'Acc_dPut', 'OFC_PFC', 'dPut_OFC', 'dPut_PFC']
    stats = dict((pathway,dict()) for pathway in pathways)
    print("================")
    print("TWO-FACTOR ANOVA")
    print("================")
    for pathway in df_fc.pathway.unique():
        print(pathway)
        aov = pg.anova(data=df_fc[df_fc.pathway==pathway], between=['cohort', 'type'], dv='corr')
        pg.print_table(aov)
        stats[pathway]['aov'] = aov

    print("=============")
    print("WELCH T-TESTS")
    print("=============")
    for pathway in pathways:
        print(pathway)
        df_tmp = df_fc[df_fc.pathway==pathway]
        cc = pg.ttest(df_tmp[(df_tmp.cohort=='controls') & (df_tmp.type=='observed')]['corr'], df_tmp[(df_tmp.cohort=='controls') & (df_tmp.type=='simulated')]['corr'])
        print("con-con -- obs-sim   T={:.2f}   p={:.4f}   d={:.2f}".format(cc['T'][0], cc['p-val'][0], cc['cohen-d'][0]))
        pp = pg.ttest(df_tmp[(df_tmp.cohort=='patients') & (df_tmp.type=='observed')]['corr'], df_tmp[(df_tmp.cohort=='patients') & (df_tmp.type=='simulated')]['corr'])
        print("pat-pat -- obs-sim   T={:.2f}   p={:.4f}   d={:.2f}".format(pp['T'][0], pp['p-val'][0], pp['cohen-d'][0]))
        cpo = pg.ttest(df_tmp[(df_tmp.cohort=='controls') & (df_tmp.type=='observed')]['corr'], df_tmp[(df_tmp.cohort=='patients') & (df_tmp.type=='observed')]['corr'])
        print("con-pat -- obs-obs   T={:.2f}   p={:.4f}   d={:.2f}".format(cpo['T'][0], cpo['p-val'][0], cpo['cohen-d'][0]))
        cps = pg.ttest(df_tmp[(df_tmp.cohort=='controls') & (df_tmp.type=='simulated')]['corr'], df_tmp[(df_tmp.cohort=='patients') & (df_tmp.type=='simulated')]['corr'])
        print("con-pat -- sim-sim   T={:.2f}   p={:.4f}   d={:.2f}".format(cps['T'][0], cps['p-val'][0], cps['cohen-d'][0]))
        os = pg.ttest(df_tmp[(df_tmp.type=='observed')]['corr'], df_tmp[(df_tmp.type=='simulated')]['corr'])
        print("obs-sim   T={:.2f}   p={:.4f}   d={:.2f}".format(cps['T'][0], cps['p-val'][0], cps['cohen-d'][0]))
        print("\n")

        stats[pathway]['cc'], stats[pathway]['pp'], stats[pathway]['cpo'], stats[pathway]['cps'], stats[pathway]['os'] = cc, pp, cpo, cps, os

    return stats


[docs]
def plot_fc_sim_vs_data(df_data, df_base, stats, axes=None, args=None):
    """ Plot functional connectivity (Pearson correlation) across the frontostriatal regions of interests 
    in empirical and simulated data. 
    
    Parameters
    ----------
        df_data: pandas.DataFrame
            Empirical data extracted from fMRI in OCD subjects and healthy controls.
        df_base: pandas.DataFrame
            Simulated data using parameters infered from posterior distributions (either only OCD parameters  
            or only controls, no permutation to model virtual intervention).
        stats: dict
            (deprecated) Statistics within and between cohorts for both empirical and simulated data (deprecated 
            since now stats are directly computed within plotting function).
        axes: list of matplotlib.Axes
            (optional) List of axes to plot data (if embedded in other figure, otherwise create new figure).
        args: argparse.Namespace
            (optional) Extra arguments (e.g. for saving, etc).
    
    """ 
    
    palette = {'controls': 'lightblue', 'patients': 'orange'}
    pathways = ['Acc_OFC', 'Acc_PFC', 'Acc_dPut', 'OFC_PFC', 'dPut_OFC', 'dPut_PFC']
    pathway_names = ['NAcc-OFC', 'NAcc-LPFC', 'NAcc-dPut', 'OFC-LPFC', 'dPut-OFC', 'dPut-LPFC']
    df_tmp = df_base.iloc[:384].melt(id_vars=['subj', 'base_cohort'], value_vars=pathways, var_name='pathway', value_name='corr')
    df_tmp = df_tmp.rename(columns={'base_cohort':'cohort'})

    # select n random simulations for each cohort
    n = 50 
    cons = df_tmp[df_tmp.cohort=='controls'].subj.unique()
    cons = cons[np.random.permutation(len(cons))][:n]
    pats = df_tmp[df_tmp.cohort=='patients'].subj.unique()
    pats = pats[np.random.permutation(len(pats))][:n]
    subjs = np.concatenate([cons, pats])
    df_tmp = df_tmp[df_tmp.subj.isin(subjs)]
    
    if axes==None:
        plt.figure(figsize=[10,3])
        ax_data = plt.subplot(1,2,1)
        ax_sim = plt.subplot(1,2,2)
        plt.rcParams.update({'figure.autolayout':True})
        plt.tight_layout()
    else:
        ax_data = axes['data']
        ax_sim = axes['sim']


    def plot_stats_stars(df, ax, y_level=0.5):
        """ Add stars on top of plots for stats significance """ 
        for i,pathway in enumerate(pathways):
            ttest = scipy.stats.ttest_ind(np.array(df[(df.pathway==pathway) & (df.cohort=='controls')]['corr']), 
                                np.array(df[(df.pathway==pathway) & (df.cohort=='patients')]['corr']))
            if ttest.pvalue<(0.05/len(pathways)):
                ax.plot(i,y_level, '*', color='black')

    # DATA
    # ----
    sbn.swarmplot(data=df_data, x='pathway', y='corr', hue='cohort', order=pathways, 
                            ax=ax_data, marker='o', size=2.8, dodge=True, palette=palette, 
                            hue_order=['controls', 'patients'])
    #plt.xticks(rotation=30, ha='right')
    for label in ax_data.get_xticklabels():
        label.set_rotation(30)
        label.set_ha('center')
        label.set_va('top')
        label.set_rotation_mode('default') #'anchor'
    ax_data.legend().set_visible(False)
    ax_data.spines.top.set_visible(False)
    ax_data.spines.right.set_visible(False)
    plot_stats_stars(df_data, ax=ax_data, y_level=0.45)
    ax_data.set_title('Observed')
    ax_data.set_ylim([-0.35, 0.5])
    ax_data.set_ylabel('R', fontsize=12)
    #ax_data.set_xlabel('Pathway', fontsize=12)
    ax_data.set_xticklabels(pathway_names)


    # SIMULATION
    # ----------
    #sbn.swarmplot(data=df_tmp, x='pathway', y='corr', hue='base_cohort', 
    #                       ax=ax_sim, size=1.5, dodge=True, palette=palette, hue_order=['controls', 'patients'])
    sbn.swarmplot(data=df_tmp, x='pathway', y='corr', hue='cohort', 
                           ax=ax_sim, size=2.8, dodge=True, palette=palette, hue_order=['controls', 'patients'])
    #plt.xticks(rotation=30, ha='right', rotation_mode='anchor')
    for label in ax_sim.get_xticklabels():
        label.set_rotation(30)
        label.set_ha('center')
        label.set_va('top')
        label.set_rotation_mode('default') #'anchor'
    ax_sim.legend().set_visible(False)
    ax_sim.spines.top.set_visible(False)
    ax_sim.spines.right.set_visible(False)
    ax_sim.spines.left.set_visible(False)
    ax_sim.set_yticks([])
    plot_stats_stars(df_tmp, ax=ax_sim, y_level=0.45)
    ax_sim.set_title('Simulated')
    ax_sim.set_ylim([-0.35, 0.5])
    ax_sim.set_ylabel('')
    ax_sim.set_yticklabels([])
    #ax_sim.set_xlabel('Pathway', fontsize=12)
    ax_sim.set_xticklabels(pathway_names)

    if args.save_figs:
        today = datetime.now().strftime('_%Y%m%d')
        fname = os.path.join(proj_dir, 'img', 'fc_sim_vs_data'+today+'.svg')
        plt.savefig(fname)

    
    


[docs]
def plot_kdes(kdes, cols, df_stats, df_real=[], df_pred=[], plot_args={'nrows':4, 'ncols':5, 'row_offset':2, 'col_offset':3, 'figsize':[10,7], 'show_stars':True, 'hist_alpha':0.3, 'kde_alpha':1}, args=None):
    """  Plot Kernel Density Estimates of posteriors (controls vs OCD)
    
    Parameters
    ----------
        kdes: dict
            Kernel Density Estimates of parameters
        cols: list
            Model parameters
        df_stats: pandas.DataFrame
            Stastics for each parameter (healthy controls vs OCD patients)
        df_real: pandas.DataFrame
            (Optional) Synthetic data (observed)
        df_pred: pandas.DataFrame
            (Optional) Synthetic data (predicted)
        plot_args: dict
            Default options for plotting.
                nrows, ncols: number of rows and columns of the GridSpec object (grid of axes).
                row_offset, col_offset: shifts in rows and colums to let space for another plot.
                figsize: figure size.
                show_stars: show stars for statistical significant between controls and OCD.
                hist_alpha, kde_alpha: opacity of the histograms and kernel density estimates.
        args: argparse.Namespace
            (Optional) Extra arguments

    """

    plt.rcParams.update({'mathtext.default': 'regular', 'font.size':10})
    plt.rcParams.update({'text.usetex': False})
    plt.rcParams.update({'figure.constrained_layout.use': False})

    nrows, ncols = plot_args['nrows'], plot_args['ncols']
    row_offset, col_offset = plot_args['row_offset'], plot_args['col_offset']

    fig = plt.figure(figsize=plot_args['figsize'])
    gs = plt.GridSpec(nrows=nrows, ncols=ncols, hspace=0.7, wspace=0.25)
    
    # EPSILONS
    #ax = fig.add_subplot(gs[0:row_offset, 0:col_offset])
    #custom_plot_epsilons(histories, ax=ax)

    # subject vertical lines colors:
    subjs_colors = ['yellowgreen', 'plum']
    
    # KDES
    ax_inds = get_ax_inds(nrows, ncols, row_offset, col_offset)
    for _,col in enumerate(cols):
        #ax = plt.subplot(3,5,i+2)
        i,j = next(ax_inds)
        ax = fig.add_subplot(gs[i,j])
        vals = np.concatenate([kdes['controls'][col]['vals'], kdes['patients'][col]['vals']])
        bins = np.linspace(vals.min(), vals.max(), 13)
        plt.hist(kdes['controls'][col]['vals'], color='lightblue', bins=bins, alpha=plot_args['hist_alpha'], density=True, log=False, lw=0.3, ec='gray')
        plt.hist(kdes['patients'][col]['vals'], color='orange', bins=bins, alpha=plot_args['hist_alpha'], density=True, log=False, lw=0.3, ec='gray')
        plt.plot(kdes['controls'][col]['X'], np.exp(kdes['controls'][col]['pdf']), 'lightblue', lw=1, alpha=plot_args['kde_alpha'])
        plt.plot(kdes['patients'][col]['X'], np.exp(kdes['patients'][col]['pdf']), 'orange', lw=1, alpha=plot_args['kde_alpha'])
        xlabl = OCD_modeling.mcmc.inference_analysis.format_labels([matplotlib.text.Text(text=col)], args.N)[0]
        plt.xlabel(xlabl, fontsize=12)
        if j==0:
            plt.ylabel("$\\rho$", fontsize=12)
        else:
            plt.ylabel('')
        #ax.xaxis.set_major_formatter(matplotlib.ticker.FormatStrFormatter('%.1f'))
        mn,mx = ax.get_ylim()
        if mn<0.01:
            ax.set_ylim(bottom=0.01)
        ax.spines.top.set_visible(False)
        ax.spines.right.set_visible(False)
        
        ttl=''
        if plot_args['show_stars']:
            if df_stats[df_stats.param==col]['p_u_bf'].iloc[0]<0.05:
            #if df_stats[df_stats.param==col]['p_t_bf'].iloc[0]<0.05:
                ttl+='*'
                if np.abs(df_stats[df_stats.param==col]['d'].iloc[0])>0.2:
                    ttl+='*'
                    if np.abs(df_stats[df_stats.param==col]['d'].iloc[0])>0.5:
                        ttl+='*'
        plt.title(ttl, fontsize=14)

        if len(df_real)!=0:
            for i,subj_df in enumerate(df_real):
                plt.vlines(subj_df[col], ymin=mn, ymax=mx, color=subjs_colors[i], lw=2)
                if args.annotate_restore:
                    ctl_mean = kdes['controls'][col]['vals'].mean()
                    arrow_height = mx - (i+1)*5*(mx-mn)/100  # 5% below top and by step of 5% for each subjs
                    plt.vlines(ctl_mean, ymin=mn, ymax=mx, color='black', linewidth=3, alpha=0.5)
                    arrow = matplotlib.patches.FancyArrowPatch((float(subj_df[col]),arrow_height), (ctl_mean,arrow_height), 
                                                    arrowstyle='->,head_width=0.1,head_length=0.1', mutation_scale=20, edgecolor=subjs_colors[i], linewidth=2)
                    ax.add_patch(arrow)
        
        if len(df_pred)!=0:
            plt.vlines(df_pred[col], ymin=mn, ymax=mx, color='red')


    # FC
    #axes = {'data': fig.add_subplot(gs[3:, 0:3]),
    #        'sim': fig.add_subplot(gs[3:, 3:])}
    #plot_fc_sim_vs_data()

    if args.annotate_restore:
        ax = fig.add_subplot(gs[0,0])
        for i,subj_df in enumerate(df_real):
            ax.text(0.5,-i*0.5, subj_df.iloc[0]['subj'], backgroundcolor=matplotlib.colors.colorConverter.to_rgba(subjs_colors[i], 0.3), horizontalalignment='center')
            ax.text(0.5,-i*0.5-0.25, 'vs', horizontalalignment='center')
        ax.text(0.5,-len(df_real)*0.5, 'Healthy Average', fontweight='bold', horizontalalignment='center')
        ax.set_ylim([-1,0.2])
        ax.set_xticks([])
        ax.set_yticks([])
        ax.spines['bottom'].set_visible(False)
        ax.spines['left'].set_visible(False)
        ax.spines['top'].set_visible(False)
        ax.spines['right'].set_visible(False)
        
    plt.tight_layout(pad=0)
    
    if args.save_figs:
        today = datetime.now().strftime('_%Y%m%d')
        fname = os.path.join(proj_dir, 'img', 'kdes'+today+'.svg')
        plt.savefig(fname)
    plt.show()
    return fig

    

# ------------------------------------------ #
# Simulate at posterior distributions' modes #
# ------------------------------------------ #

def get_kde_peaks(kdes):
    """ Extract peaks of posterior distribution from kernel density estimates """
    peaks = dict()
    for cohort in kdes.keys():
        peaks[cohort]= dict()
        for param in kdes[cohort].keys():
            i_max = np.argmax(kdes[cohort][param]['pdf'])
            X_max = kdes[cohort][param]['X'][i_max]
            peaks[cohort][param] = X_max
    return peaks


def simulate_posterior_modes(peaks, args):
    """ Run simulations for posterior distribution's modes  """
    sim_outputs = dict()
    for cohort, params in peaks.items():
        sim_args = argparse.Namespace()
        sim_args.n_sims = args.n_sims
        sim_args.n_jobs= args.n_jobs
        sim_args.t_tot = 15000
        sim_args.t_range = [5000, 15000]
        sim_args.model_pars, sim_args.sim_pars, sim_args.control_pars, sim_args.bold_pars = OCD_modeling.mcmc.unpack_params(params)

        sim_objs = OCD_modeling.hpc.launch_simulations(sim_args)
        sim_outputs[cohort] = sim_objs
    return sim_outputs


def plot_epsilons_weights(histories, args):
    """ Combine the epsilons tolerances and the optimization weights """
    fig = plt.figure(figsize=[12,4])
    nrows, ncols = 2,5
    row_offset, col_offset = 1,1
    gs = plt.GridSpec(nrows=nrows, ncols=ncols)
    ax = fig.add_subplot(gs[:row_offset,:col_offset])
    custom_plot_epsilons(histories, ax=ax, n_gens=11)
    plot_weights(histories, gs=gs, nrows=nrows, ncols=ncols, row_offset=row_offset, col_offset=col_offset, args=args)
    
    if args.save_figs:
        fname = os.path.join(proj_dir, 'img', 'epsilons_weights_log'+today()+'.svg')
        plt.savefig(fname)
        fname = os.path.join(proj_dir, 'img', 'epsilons_weights_log'+today()+'.png')
        plt.savefig(fname)
        fname = os.path.join(proj_dir, 'img', 'epsilons_weights_log'+today()+'.pdf')
        plt.savefig(fname)
    plt.show()



[docs]
def get_history_parser():
    " Script arguments when ran as main " 
    parser = argparse.ArgumentParser()
    
    parser.add_argument('--n_jobs', type=int, default=10, action='store', help="number of parallel processes launched")
    parser.add_argument('--n_sims', type=int, default=50, action='store', help="number of simulations ran with the same parameters (e.g. to get distribution that can be campared to clinical observations)")
    parser.add_argument('--gens', nargs='+', default=[], action='store', help="generation of the optimization (list, must be same length as histories)")
    parser.add_argument('--histories', nargs='+', default=None, action='store', help="optimizations to analyse and compare")
    parser.add_argument('--history_names', type=list, default=['controls', 'patients'], action='store', help="names given to each otpimization loaded")
    parser.add_argument('--compute_kdes', default=False, action='store_true', help='compute KDEs of parameter estimations')
    
    parser.add_argument('--save_kdes', default=False, action='store_true', help='save KDEs')
    parser.add_argument('--save_kdes_suffix', type=str, default=today(), help="identifier of the KDE in the saving folder")
    parser.add_argument('--save_figs', default=False, action='store_true', help='save figures')
    parser.add_argument('--save_outputs', default=False, action='store_true', help='save outputs')

    parser.add_argument('--plot_figs', default=False, action='store_true', help='plot figures')
    parser.add_argument('--plot_epsilons', default=False, action='store_true', help='plot optimization errors')
    parser.add_argument('--plot_weights', default=False, action='store_true', help='plot optimization weights for each gen')
    parser.add_argument('--plot_epsilons_weights', default=False, action='store_true', help='plot optimization errors')
    parser.add_argument('--plot_param_distrib', default=False, action='store_true', help='plot parameter distribution for each gen')
    parser.add_argument('--plot_kde_matrix', default=False, action='store_true', help='plot KDE matrix of optimization')
    parser.add_argument('--plot_stats', default=False, action='store_true', help='show statisics')
    parser.add_argument('--plot_kdes', default=False, action='store_true', help='plot KDEs of optimzed params')
    parser.add_argument('--plot_fc_sim_vs_data', default=False, action='store_true', help='plot FC of inference from optimization vs real data')
    parser.add_argument('--simulate_posterior_modes', default=False, action='store_true', help='run simulations with at posterior distributions modes ')

    parser.add_argument('--N', type=int, default=4, action='store', help="number of node in the soumilations (default N=4, i.e. no thalamus, other option is N=6 (with thalamus)")
    
    parser.add_argument('--annotate_restore', default=False, action='store_true', help='Annotate the plots with arrow between subjects params and the avg controls')
    return parser



if __name__=='__main__':
    args = get_history_parser().parse_args()
    
    histories = import_results(args)
    
    if args.plot_epsilons:
        #plot_epsilons(histories, args)
        custom_plot_epsilons(histories, n_gens=np.array(args.gens, dtype=int).max())
    if args.plot_weights:
        plot_weights(histories, args)
    if args.plot_epsilons_weights:
        plot_epsilons_weights(histories, args)
    if args.plot_param_distrib:
        plot_param_distrib(histories, args)
    if args.plot_kde_matrix:
        plot_kde_matrix(histories, args)


    # posterior distributions stats
    df_stats = compute_stats(histories, args)

    if args.compute_kdes:
        kdes,cols = compute_kdes(histories, args=args)
    if args.plot_kdes:
        plot_args = {4:{'nrows':4, 'ncols':5, 'row_offset':2, 'col_offset':3, 'figsize':[10,7], 'show_stars':True, 'hist_alpha':0.3, 'kde_alpha':1},
                     6:{'nrows':6, 'ncols':5, 'row_offset':0, 'col_offset':0, 'figsize':[10,12], 'show_stars':True, 'hist_alpha':0.3, 'kde_alpha':1}}
        plot_kdes(kdes, cols, df_stats, plot_args=plot_args[args.N], args=args)

    if args.plot_fc_sim_vs_data:
        # load simulated and observed data
        df_base = OCD_modeling.mcmc.get_df_base(args)
        #df_base = OCD_modeling.mcmc.fix_df_base(df_base)
        df_data = OCD_modeling.mcmc.load_df_data(args)
    
        # observed vs simulated stats
        stats = stats_obs_vs_sim(df_data, df_base)

        plot_fc_sim_vs_data(df_data, df_base, stats, args=args)

    if args.simulate_posterior_modes:
        kde_peaks = get_kde_peaks(kdes)
        sim_outputs = simulate_posterior_modes(kde_peaks, args)
        if args.save_outputs:
            with open(os.path.join(proj_dir, 'postprocessing', 'sims_posterior_modes'+today()+'.pkl'), 'wb') as f:
                pickle.dump(sim_outputs, f)