Source code for uq_physicell.model_analysis.sensitivity_analysis

import numpy as np
import pandas as pd
from SALib.analyze import fast as fast_analyze, rbd_fast as rbd_fast_analyze, ff as ff_analyze, pawn as pawn_analyze, dgsm as dgsm_analyze, enhanced_hdmr as hdmr_analyze, rsa as rsa_analyze, discrepancy as discrepancy_analyze, delta as delta_analyze, sobol as sobol_analyze

from ..database.ma_db import load_parameter_space, load_samples, load_metadata

# Compatibility of samplers with methods following the SALib library
# https://salib.readthedocs.io/en/latest/index.html
# https://salib.readthedocs.io/en/latest/user-guide/analysis.html
# https://salib.readthedocs.io/en/latest/user-guide/sampling.html

# Issues with methods:
# Discrepancy Sensitivity Indices: Error Bounds are not consistent 'l_bounds' < 'u_bounds'
# Delta Moment-Independent Measure: Error module 'numpy' has no attribute 'trapezoid' (outdated numpy version)
samplers_to_method = {
    "OAT": [
        "OAT - One-At-A-Time",
    ],
    "Fast": [
        "FAST - Fourier Amplitude Sensitivity Test",
        "RBD-FAST - Random Balance Designs Fourier Amplitude Sensitivity Test",
        "Delta Moment-Independent Measure",
        "PAWN Sensitivity Analysis",
        "High-Dimensional Model Representation",
        "Regional Sensitivity Analysis",
        "Discrepancy Sensitivity Indices"
    ],
    "Fractional Factorial": [
        "Fractional Factorial",
        "RBD-FAST - Random Balance Designs Fourier Amplitude Sensitivity Test",
        "Delta Moment-Independent Measure",
        "PAWN Sensitivity Analysis",
        "High-Dimensional Model Representation",
        "Regional Sensitivity Analysis",
        "Discrepancy Sensitivity Indices"
    ],
    "Finite Difference": [
        "Derivative-based Global Sensitivity Measure (DGSM)",
        "RBD-FAST - Random Balance Designs Fourier Amplitude Sensitivity Test",
        "Delta Moment-Independent Measure",
        "PAWN Sensitivity Analysis",
        "High-Dimensional Model Representation",
        "Regional Sensitivity Analysis",
        "Discrepancy Sensitivity Indices"
    ],
    "Latin hypercube sampling (LHS)": [
        "RBD-FAST - Random Balance Designs Fourier Amplitude Sensitivity Test",
        "Delta Moment-Independent Measure",
        "PAWN Sensitivity Analysis",
        "High-Dimensional Model Representation",
        "Regional Sensitivity Analysis",
        "Discrepancy Sensitivity Indices"
    ],
    "Sobol": [
        "Sobol Sensitivity Analysis",
        "RBD-FAST - Random Balance Designs Fourier Amplitude Sensitivity Test",
        "Delta Moment-Independent Measure",
        "PAWN Sensitivity Analysis",
        "High-Dimensional Model Representation",
        "Regional Sensitivity Analysis",
        "Discrepancy Sensitivity Indices"
    ]
}

def _set_time_labels(df_qois: pd.DataFrame) -> dict:
    """Set and validate time labels for QoI values.
    
    Args:
        df_qois (pd.DataFrame): DataFrame containing the QoI values with time columns.
    
    Returns:
        dict: Dictionary with time labels as keys and their corresponding time values
            as values, sorted by time values.
    
    Raises:
        ValueError: If more than one unique value is found for any time label.
    """
    # Check if qoi_time_values is empty
    qoi_time_values = {}
    all_times_label = [col for col in df_qois.columns if col.startswith("time")]

    # Ensure all times are present in the qoi_time_values
    for time_label in all_times_label:
        unique_values = df_qois[time_label].unique()
        if len(unique_values) == 1:
            qoi_time_values[time_label] = unique_values[0]
        else:
            raise ValueError(f"Error: More than one unique value for time label '{time_label}': {unique_values}.")

    # Sort the time values
    qoi_time_values = dict(sorted(qoi_time_values.items(), key=lambda item: item[1]))
    return qoi_time_values

def _get_SA_problem(params_dict: dict) -> dict:
    """Create a SALib problem dictionary from parameter definitions.
    
    This function converts parameter definitions to the format required by
    the SALib (Sensitivity Analysis Library) for conducting sensitivity analysis.

    Args:
        params_dict (dict): Dictionary containing parameter names as keys and
            parameter properties as values. Each parameter should have
            'lower_bound' and 'upper_bound' keys. The special key 'samples'
            is excluded from the problem definition.

    Returns:
        dict: SALib-formatted problem dictionary containing:
            - num_vars (int): Number of parameters
            - names (list): List of parameter names
            - bounds (list): List of tuples with (lower, upper) bounds for each parameter
    """
    param_names = [key for key in params_dict.keys() if key != "samples"]
    problem = {
        'num_vars': len(param_names),
        'names': param_names,
        'bounds': [(params_dict[key]['lower_bound'], params_dict[key]['upper_bound']) for key in param_names]
    }
    return problem


[docs]
def get_global_SA_parameters(db_file):
    df_parameter_space = load_parameter_space(db_file)
    global_SA_parameters = {'samples': load_samples(db_file)}
    for id, param in enumerate(df_parameter_space['ParamName']):
        global_SA_parameters[param] = {"lower_bound": df_parameter_space['lower_bound'].iloc[id],
                                                            "upper_bound": df_parameter_space['upper_bound'].iloc[id],
                                                            "ref_value": df_parameter_space['ref_value'].iloc[id]}
        perturbation = df_parameter_space['perturbation'].iloc[id]
        try:
            global_SA_parameters[param]["perturbation"] = float(perturbation)
        except Exception as e:
            print(f"Warning: Could not convert perturbation ({perturbation}) to float for parameter {param}.")
            # Calculate perturbation as percentage based on bounds and ref_value
            global_SA_parameters[param]["perturbation"] = 100.0 * (df_parameter_space['upper_bound'].iloc[id]/df_parameter_space['ref_value'].iloc[id] - 1.0)
    return global_SA_parameters



[docs]
def get_local_SA_parameters(db_file):
    df_parameter_space = load_parameter_space(db_file)
    local_SA_parameters = {'samples': load_samples(db_file)}
    for id, param in enumerate(df_parameter_space['ParamName']):
        perturbation = df_parameter_space['perturbation'].iloc[id]
        if isinstance(perturbation, list):
            perturbation = [float(x) for x in perturbation]
        local_SA_parameters[param] = {"ref_value": df_parameter_space['ref_value'].iloc[id], 
                                                    "perturbation": perturbation}
    return local_SA_parameters



[docs]
def run_global_sa(params_dict: dict, qoi_names: list, df_qois: pd.DataFrame, method: str, qoi_time_values: dict = None) -> tuple:
    """Run global sensitivity analysis using the specified method.
    
    Args:
        params_dict (dict): Dictionary containing parameter names, properties, and sample values.
            Must include a 'samples' key with parameter sample dictionaries.
        qoi_names (list): List of QoI names to analyze.
        df_qois (pd.DataFrame): DataFrame containing QoI values with columns formatted as
            '{qoi_name}_{time_index}' or '{qoi_name}' and time as an index.
        method (str): Name of the sensitivity analysis method to use. Supported methods
            include 'FAST - Fourier Amplitude Sensitivity Test', 'Sobol Sensitivity Analysis',
            'PAWN Sensitivity Analysis', etc.
        qoi_time_values (dict, optional): Dictionary mapping time labels to their values.
            If None, time labels will be extracted from df_qois. Defaults to None.
    
    Returns:
        tuple: A tuple containing:
            - sa_results_dict (dict): Nested dictionary with sensitivity analysis results.
              Structure: {qoi_name: {time_label: analysis_results}}
            - qoi_time_values (dict): Dictionary mapping time labels to their values,
              sorted by time.
    
    Raises:
        ValueError: If there's a mismatch between number of samples and QoI results,
            or if the specified method fails during analysis.
    """
    # Get the problem definition for SALib
    problem = _get_SA_problem(params_dict)
    # Generate params_np - it is sorted by sample ID
    params_np = np.array([[param_sample_dic[param] for param in problem['names']] for param_sample_dic in params_dict["samples"].values()])
    # Set the times labels sorted by time values
    if qoi_time_values is None:
        if 'time' in df_qois.index.names:
            qoi_time_values = df_qois.index.get_level_values('time').unique().to_series().sort_values().to_dict()
        else:
            qoi_time_values = _set_time_labels(df_qois)
    # SA results dictionary
    sa_results_dict = { qoi: {} for qoi in qoi_names }
    for qoi in qoi_names: 
        for time_id, time_label in enumerate(qoi_time_values.keys()):
            # Generate qoi_result_np - it is sorted by sample ID
            if 'time' in df_qois.index.names:
                df_filtered = df_qois.reset_index()
                qoi_result_np = (
                    df_filtered[df_filtered['time'] == qoi_time_values[time_label]]
                    .sort_values(by='SampleID')[qoi]
                    .to_numpy()
                )
                if np.isnan(qoi_result_np).all(): continue # Skip if QoI at this time does not exist
            elif qoi_time_values is None:
                if f"{qoi}_{time_label}" not in df_qois.columns: continue # Skip if QoI at this time does not exist
                if 'SampleID' in df_qois.columns:
                    qoi_result_np = df_qois[f"{qoi}_{time_label}"].sort_values(by='SampleID').to_numpy()
                elif 'SampleID' in df_qois.index.names:
                    qoi_result_np = df_qois[f"{qoi}_{time_label}"].sort_index(level='SampleID').to_numpy()
                else:
                    raise ValueError(f"Error: 'SampleID' must be present as a column or index in df_qois to sort QoI results for method '{method}'.")
            else:
                if f"{qoi}_{time_id}" not in df_qois.columns: continue # Skip if QoI at this time does not exist
                if 'SampleID' in df_qois.columns:
                    qoi_result_np = df_qois[f"{qoi}_{time_id}"].sort_values(by='SampleID').to_numpy()
                elif 'SampleID' in df_qois.index.names:
                    qoi_result_np = df_qois[f"{qoi}_{time_id}"].sort_index(level='SampleID').to_numpy()
                else:
                    raise ValueError(f"Error: 'SampleID' must be present as a column or index in df_qois to sort QoI results for method '{method}'.")
            if len(qoi_result_np) != len(params_np):
                raise ValueError(f"Error: Mismatch between number of samples ({len(params_np)}) and QoI results ({len(qoi_result_np)})!")
            print(f"Running {method} for QoI: {qoi} and time: {qoi_time_values[time_label]}") 

            try:
                # Run the analysis based on the selected method
                if method == "FAST - Fourier Amplitude Sensitivity Test":
                    sa_results_dict[qoi][time_label] = fast_analyze.analyze(problem, params_np, qoi_result_np)
                elif method == "RBD-FAST - Random Balance Designs Fourier Amplitude Sensitivity Test":
                    sa_results_dict[qoi][time_label] = rbd_fast_analyze.analyze(problem, params_np, qoi_result_np)
                elif method == "Fractional Factorial":
                    sa_results_dict[qoi][time_label] = ff_analyze.analyze(problem, params_np, qoi_result_np, second_order=True)
                elif method == "PAWN Sensitivity Analysis":
                    sa_results_dict[qoi][time_label] = pawn_analyze.analyze(problem, params_np, qoi_result_np)
                elif method == "Derivative-based Global Sensitivity Measure (DGSM)":
                    sa_results_dict[qoi][time_label] = dgsm_analyze.analyze(problem, params_np, qoi_result_np)
                elif method == "High-Dimensional Model Representation":
                    sa_results_dict[qoi][time_label] = hdmr_analyze.analyze(problem, params_np, qoi_result_np)
                elif method == "Regional Sensitivity Analysis":
                    sa_results_dict[qoi][time_label] = rsa_analyze.analyze(problem, params_np, qoi_result_np)
                elif method == "Discrepancy Sensitivity Indices":
                    sa_results_dict[qoi][time_label] = discrepancy_analyze.analyze(problem, params_np, qoi_result_np)
                elif method == "Delta Moment-Independent Measure":
                    sa_results_dict[qoi][time_label] = delta_analyze.analyze(problem, params_np, qoi_result_np)
                elif method == "Sobol Sensitivity Analysis":
                    sa_results_dict[qoi][time_label] = sobol_analyze.analyze(problem, qoi_result_np)
            except Exception as e:
                raise ValueError(f"Error running {method} for QoI: {qoi} and time: {qoi_time_values[time_label]} - {e}")

    return sa_results_dict, qoi_time_values



[docs]
def OAT_analyze(dic_samples: dict, dic_qoi: dict, sample_ref: int = 0) -> dict:
    """Perform One-At-a-Time (OAT) analysis on the simulation results.
    
    Args:
        dic_samples (dict): Dictionary of parameter sample dictionaries, where each key
            is a sample ID and each value is a dictionary of parameter names and values.
        dic_qoi (dict): Dictionary of Quantities of Interest (QoI) values, where each
            key is a sample ID and each value is the corresponding QoI result.
        sample_ref (int, optional): Sample ID to use as the reference for OAT analysis.
            Defaults to 0.
    
    Returns:
        dict: Dictionary containing sensitivity indices for each parameter. Keys are 
            parameter names and values are arrays of sensitivity indices for each
            perturbation.
    
    Note:
        The specified sample is treated as the reference sample. All other samples are compared
        against this reference to compute sensitivity indices.
    """
    # Extract parameter samples and QoI samples
    par_samples = np.array([list(sample.values()) for sample in dic_samples.values()])
    # Normalize the parameter samples
    par_samples = (par_samples - par_samples.min(axis=0)) / (par_samples.max(axis=0) - par_samples.min(axis=0))
    # sample_ref is the reference sample
    ref_pars = par_samples[sample_ref]; par_samples = np.delete(par_samples, sample_ref, axis=0)
    qoi_ref = dic_qoi[sample_ref]
    # Extract QoI samples, excluding the reference sample (SampleID different of 0)
    qoi_samples = np.array([qoi for sample_id, qoi in dic_qoi.items() if sample_id != sample_ref])
    # Initialize the results dictionary
    dic_results = {}
    for id, par in enumerate(dic_samples[sample_ref].keys()):
        # Calculate the mean and std deviation of the QoIs for each parameter
        par_var = np.abs(par_samples[:, id] - ref_pars[id])
        non_zero_indices = np.where(par_var != 0)[0] # As one-at-a-time, only one parameter is perturbed at a time, so we can skip the zero variance cases
        dic_results[par] = np.abs(qoi_samples[non_zero_indices] - qoi_ref) / par_var[non_zero_indices]  # Compute SI without skipping

    return dic_results



[docs]
def run_local_sa(params_dict: dict, qoi_names: list, df_qois: pd.DataFrame, method: str = "OAT", sample_ref: int = 0) -> tuple:
    """Run local sensitivity analysis using the One-At-a-Time (OAT) method.
    
    Args:
        params_dict (dict): Dictionary containing parameter names, properties, and sample values.
            Must include a 'samples' key with parameter sample dictionaries.
        qoi_names (list): List of QoI names to analyze.
        df_qois (pd.DataFrame): DataFrame containing QoI values with columns formatted as
            '{qoi_name}_{time_index}' or '{qoi_name}' and time as an index.
        method (str, optional): Local sensitivity analysis method. Currently only 'OAT'
            (One-At-a-Time) is supported. Defaults to "OAT".
        sample_ref (int, optional): Sample ID to use as the reference for OAT analysis.
            Defaults to 0.
    
    Returns:
        tuple: A tuple containing:
            - sa_results_dict (dict): Nested dictionary with sensitivity analysis results.
              Structure: {qoi_name: {time_label: {param_name: sensitivity_index}}}
            - qoi_time_values (dict): Dictionary mapping time labels to their values,
              sorted by time.
    
    Note:
        The OAT method computes sensitivity indices by comparing parameter perturbations
        against a reference sample (sample 0). Results are summed across all perturbations
        for each parameter.
    """
    # Get parameter names
    param_names = [key for key in params_dict.keys() if key != "samples"]
    # Set the times labels sorted by time values
    if 'time' in df_qois.index.names:
        qoi_time_values = df_qois.index.get_level_values('time').unique().to_series().sort_values().to_dict()
    else:
        qoi_time_values = _set_time_labels(df_qois)
    # SA results dictionary
    sa_results_dict = { qoi: {} for qoi in qoi_names }
    for qoi in qoi_names:
        for time_id, time_label in enumerate(qoi_time_values.keys()):
            if 'time' in df_qois.index.names:
                df_filtered = df_qois.reset_index()
                qoi_result_dict = (
                    df_filtered[df_filtered['time'] == qoi_time_values[time_label]]
                    .set_index('SampleID')[qoi]
                    .to_dict()
                )
                if np.isnan(list(qoi_result_dict.values())).all(): continue # Skip if QoI at this time does not exist
            else:
                if f"{qoi}_{time_id}" not in df_qois.columns: continue # Skip if QoI at this time does not exist
                qoi_result_dict = df_qois[f"{qoi}_{time_id}"].to_dict()
            if len(qoi_result_dict) != len(params_dict["samples"]):
                raise ValueError(f"Error: Mismatch between number of samples ({len(params_dict['samples'])}) and QoI results ({len(qoi_result_dict)})!")
            print(f"Running {method} for QoI: {qoi} and time: {qoi_time_values[time_label]}")
            # Return a dictionary of sensitivity indices for each perturbation
            sa_results_dict[qoi][time_label] = OAT_analyze(params_dict["samples"], qoi_result_dict, sample_ref=sample_ref)
            # Overwrite the results for perturbations by summing them up
            for key in sa_results_dict[qoi][time_label]: sa_results_dict[qoi][time_label][key] = np.sum(sa_results_dict[qoi][time_label][key])

    return sa_results_dict, qoi_time_values



[docs]
def get_sa_results(dbfile: str, qoi_names: list, df_qois: pd.DataFrame, method: str, sample_ref: int = 0, qoi_time_values: dict = None) -> tuple:
    """Get sensitivity analysis results for Global and Local methods.
    
    Args:
        dbfile (str): Path to the database file containing simulation results.
        qoi_names (list): List of QoI names to analyze.
        df_qois (pd.DataFrame): DataFrame containing QoI values with columns formatted as
            '{qoi_name}_{time_index}' or '{qoi_name}' and time as an index.
        method (str): Name of the sensitivity analysis method to use. Supported methods
            include 'FAST - Fourier Amplitude Sensitivity Test', 'Sobol Sensitivity Analysis',
            'PAWN Sensitivity Analysis', etc. For local sensitivity analysis, use "OAT".
        sample_ref (int, optional): Sample ID to use as the reference for OAT analysis.
            Defaults to 0. Only used for local sensitivity analysis with method "OAT".
        qoi_time_values (dict, optional): Dictionary mapping time labels to their values.
            If None, time labels will be extracted from df_qois. Defaults to None.
    Returns:
        tuple: A tuple containing:
            - sa_results_dict (dict): Nested dictionary with sensitivity analysis results.
              Structure: {qoi_name: {time_label: analysis_results}}
            - qoi_time_values (dict): Dictionary mapping time labels to their values,
              sorted by time.
    """
 

    Sampler = load_metadata(dbfile)['Sampler'].values[0]
    # Extract parameter samples from the database file
    if Sampler in samplers_to_method and method in samplers_to_method[Sampler]:
        if Sampler == "OAT":
            params_dict = get_local_SA_parameters(dbfile)
        else:
            params_dict = get_global_SA_parameters(dbfile)
    else:
        raise ValueError(f"Error: The specified method '{method}' is not compatible with the sampler '{Sampler}' used in the database. Please choose a compatible method for this sampler: {samplers_to_method.get(Sampler, 'No compatible methods found for this sampler.')}")
    # Run the appropriate sensitivity analysis method
    print(f"Running sensitivity analysis with method: {method}")
    if method == "OAT":
        sa_results_dict, qoi_time_values = run_local_sa(params_dict, qoi_names, df_qois, method=method, sample_ref=sample_ref)
    else:
        sa_results_dict, qoi_time_values = run_global_sa(params_dict, qoi_names, df_qois, method=method, qoi_time_values=qoi_time_values)

    return sa_results_dict, qoi_time_values