Source code for uq_physicell.model_analysis.sensitivity_analysis

import numpy as np
import pandas as pd
from SALib.analyze import fast as fast_analyze, rbd_fast as rbd_fast_analyze, ff as ff_analyze, pawn as pawn_analyze, dgsm as dgsm_analyze, enhanced_hdmr as hdmr_analyze, rsa as rsa_analyze, discrepancy as discrepancy_analyze, delta as delta_analyze, sobol as sobol_analyze

from ..database.ma_db import load_parameter_space, load_samples, load_metadata

# Compatibility of samplers with methods following the SALib library
# https://salib.readthedocs.io/en/latest/index.html
# https://salib.readthedocs.io/en/latest/user-guide/analysis.html
# https://salib.readthedocs.io/en/latest/user-guide/sampling.html

# Issues with methods:
# Discrepancy Sensitivity Indices: Error Bounds are not consistent 'l_bounds' < 'u_bounds'
# Delta Moment-Independent Measure: Error module 'numpy' has no attribute 'trapezoid' (outdated numpy version)
samplers_to_method = {
    "OAT": [
        "OAT - One-At-A-Time",
    ],
    "Fast": [
        "FAST - Fourier Amplitude Sensitivity Test",
        "RBD-FAST - Random Balance Designs Fourier Amplitude Sensitivity Test",
        "Delta Moment-Independent Measure",
        "PAWN Sensitivity Analysis",
        "High-Dimensional Model Representation",
        "Regional Sensitivity Analysis",
        "Discrepancy Sensitivity Indices"
    ],
    "Fractional Factorial": [
        "Fractional Factorial",
        "RBD-FAST - Random Balance Designs Fourier Amplitude Sensitivity Test",
        "Delta Moment-Independent Measure",
        "PAWN Sensitivity Analysis",
        "High-Dimensional Model Representation",
        "Regional Sensitivity Analysis",
        "Discrepancy Sensitivity Indices"
    ],
    "Finite Difference": [
        "Derivative-based Global Sensitivity Measure (DGSM)",
        "RBD-FAST - Random Balance Designs Fourier Amplitude Sensitivity Test",
        "Delta Moment-Independent Measure",
        "PAWN Sensitivity Analysis",
        "High-Dimensional Model Representation",
        "Regional Sensitivity Analysis",
        "Discrepancy Sensitivity Indices"
    ],
    "Latin hypercube sampling (LHS)": [
        "RBD-FAST - Random Balance Designs Fourier Amplitude Sensitivity Test",
        "Delta Moment-Independent Measure",
        "PAWN Sensitivity Analysis",
        "High-Dimensional Model Representation",
        "Regional Sensitivity Analysis",
        "Discrepancy Sensitivity Indices"
    ],
    "Sobol": [
        "Sobol Sensitivity Analysis",
        "RBD-FAST - Random Balance Designs Fourier Amplitude Sensitivity Test",
        "Delta Moment-Independent Measure",
        "PAWN Sensitivity Analysis",
        "High-Dimensional Model Representation",
        "Regional Sensitivity Analysis",
        "Discrepancy Sensitivity Indices"
    ]
}

def _set_time_labels(df_qois: pd.DataFrame) -> dict:
    """Set and validate time labels for QoI values.
    
    Args:
        df_qois (pd.DataFrame): DataFrame containing the QoI values with time columns.
    
    Returns:
        dict: Dictionary with time labels as keys and their corresponding time values
            as values, sorted by time values.
    
    Raises:
        ValueError: If more than one unique value is found for any time label.
    """
    # Check if qoi_time_values is empty
    qoi_time_values = {}
    all_times_label = [col for col in df_qois.columns if col.startswith("time")]

    # Ensure all times are present in the qoi_time_values
    for time_label in all_times_label:
        unique_values = df_qois[time_label].unique()
        if len(unique_values) == 1:
            qoi_time_values[time_label] = unique_values[0]
        else:
            raise ValueError(f"Error: More than one unique value for time label '{time_label}': {unique_values}.")

    # Sort the time values
    qoi_time_values = dict(sorted(qoi_time_values.items(), key=lambda item: item[1]))
    return qoi_time_values

def _get_SA_problem(params_dict: dict) -> dict:
    """Create a SALib problem dictionary from parameter definitions.
    
    This function converts parameter definitions to the format required by
    the SALib (Sensitivity Analysis Library) for conducting sensitivity analysis.

    Args:
        params_dict (dict): Dictionary containing parameter names as keys and
            parameter properties as values. Each parameter should have
            'lower_bound' and 'upper_bound' keys. The special key 'samples'
            is excluded from the problem definition.

    Returns:
        dict: SALib-formatted problem dictionary containing:
            - num_vars (int): Number of parameters
            - names (list): List of parameter names
            - bounds (list): List of tuples with (lower, upper) bounds for each parameter
    """
    param_names = [key for key in params_dict.keys() if key != "samples"]
    problem = {
        'num_vars': len(param_names),
        'names': param_names,
        'bounds': [(params_dict[key]['lower_bound'], params_dict[key]['upper_bound']) for key in param_names]
    }
    return problem

[docs] def get_global_SA_parameters(db_file): df_parameter_space = load_parameter_space(db_file) global_SA_parameters = {'samples': load_samples(db_file)} for id, param in enumerate(df_parameter_space['ParamName']): global_SA_parameters[param] = {"lower_bound": df_parameter_space['lower_bound'].iloc[id], "upper_bound": df_parameter_space['upper_bound'].iloc[id], "ref_value": df_parameter_space['ref_value'].iloc[id]} perturbation = df_parameter_space['perturbation'].iloc[id] try: global_SA_parameters[param]["perturbation"] = float(perturbation) except Exception as e: print(f"Warning: Could not convert perturbation ({perturbation}) to float for parameter {param}.") # Calculate perturbation as percentage based on bounds and ref_value global_SA_parameters[param]["perturbation"] = 100.0 * (df_parameter_space['upper_bound'].iloc[id]/df_parameter_space['ref_value'].iloc[id] - 1.0) return global_SA_parameters
[docs] def get_local_SA_parameters(db_file): df_parameter_space = load_parameter_space(db_file) local_SA_parameters = {'samples': load_samples(db_file)} for id, param in enumerate(df_parameter_space['ParamName']): perturbation = df_parameter_space['perturbation'].iloc[id] if isinstance(perturbation, list): perturbation = [float(x) for x in perturbation] local_SA_parameters[param] = {"ref_value": df_parameter_space['ref_value'].iloc[id], "perturbation": perturbation} return local_SA_parameters
[docs] def run_global_sa(params_dict: dict, qoi_names: list, df_qois: pd.DataFrame, method: str, qoi_time_values: dict = None) -> tuple: """Run global sensitivity analysis using the specified method. Args: params_dict (dict): Dictionary containing parameter names, properties, and sample values. Must include a 'samples' key with parameter sample dictionaries. qoi_names (list): List of QoI names to analyze. df_qois (pd.DataFrame): DataFrame containing QoI values with columns formatted as '{qoi_name}_{time_index}' or '{qoi_name}' and time as an index. method (str): Name of the sensitivity analysis method to use. Supported methods include 'FAST - Fourier Amplitude Sensitivity Test', 'Sobol Sensitivity Analysis', 'PAWN Sensitivity Analysis', etc. qoi_time_values (dict, optional): Dictionary mapping time labels to their values. If None, time labels will be extracted from df_qois. Defaults to None. Returns: tuple: A tuple containing: - sa_results_dict (dict): Nested dictionary with sensitivity analysis results. Structure: {qoi_name: {time_label: analysis_results}} - qoi_time_values (dict): Dictionary mapping time labels to their values, sorted by time. Raises: ValueError: If there's a mismatch between number of samples and QoI results, or if the specified method fails during analysis. """ # Get the problem definition for SALib problem = _get_SA_problem(params_dict) # Generate params_np - it is sorted by sample ID params_np = np.array([[param_sample_dic[param] for param in problem['names']] for param_sample_dic in params_dict["samples"].values()]) # Set the times labels sorted by time values if qoi_time_values is None: if 'time' in df_qois.index.names: qoi_time_values = df_qois.index.get_level_values('time').unique().to_series().sort_values().to_dict() else: qoi_time_values = _set_time_labels(df_qois) # SA results dictionary sa_results_dict = { qoi: {} for qoi in qoi_names } for qoi in qoi_names: for time_id, time_label in enumerate(qoi_time_values.keys()): # Generate qoi_result_np - it is sorted by sample ID if 'time' in df_qois.index.names: df_filtered = df_qois.reset_index() qoi_result_np = ( df_filtered[df_filtered['time'] == qoi_time_values[time_label]] .sort_values(by='SampleID')[qoi] .to_numpy() ) if np.isnan(qoi_result_np).all(): continue # Skip if QoI at this time does not exist elif qoi_time_values is None: if f"{qoi}_{time_label}" not in df_qois.columns: continue # Skip if QoI at this time does not exist if 'SampleID' in df_qois.columns: qoi_result_np = df_qois[f"{qoi}_{time_label}"].sort_values(by='SampleID').to_numpy() elif 'SampleID' in df_qois.index.names: qoi_result_np = df_qois[f"{qoi}_{time_label}"].sort_index(level='SampleID').to_numpy() else: raise ValueError(f"Error: 'SampleID' must be present as a column or index in df_qois to sort QoI results for method '{method}'.") else: if f"{qoi}_{time_id}" not in df_qois.columns: continue # Skip if QoI at this time does not exist if 'SampleID' in df_qois.columns: qoi_result_np = df_qois[f"{qoi}_{time_id}"].sort_values(by='SampleID').to_numpy() elif 'SampleID' in df_qois.index.names: qoi_result_np = df_qois[f"{qoi}_{time_id}"].sort_index(level='SampleID').to_numpy() else: raise ValueError(f"Error: 'SampleID' must be present as a column or index in df_qois to sort QoI results for method '{method}'.") if len(qoi_result_np) != len(params_np): raise ValueError(f"Error: Mismatch between number of samples ({len(params_np)}) and QoI results ({len(qoi_result_np)})!") print(f"Running {method} for QoI: {qoi} and time: {qoi_time_values[time_label]}") try: # Run the analysis based on the selected method if method == "FAST - Fourier Amplitude Sensitivity Test": sa_results_dict[qoi][time_label] = fast_analyze.analyze(problem, params_np, qoi_result_np) elif method == "RBD-FAST - Random Balance Designs Fourier Amplitude Sensitivity Test": sa_results_dict[qoi][time_label] = rbd_fast_analyze.analyze(problem, params_np, qoi_result_np) elif method == "Fractional Factorial": sa_results_dict[qoi][time_label] = ff_analyze.analyze(problem, params_np, qoi_result_np, second_order=True) elif method == "PAWN Sensitivity Analysis": sa_results_dict[qoi][time_label] = pawn_analyze.analyze(problem, params_np, qoi_result_np) elif method == "Derivative-based Global Sensitivity Measure (DGSM)": sa_results_dict[qoi][time_label] = dgsm_analyze.analyze(problem, params_np, qoi_result_np) elif method == "High-Dimensional Model Representation": sa_results_dict[qoi][time_label] = hdmr_analyze.analyze(problem, params_np, qoi_result_np) elif method == "Regional Sensitivity Analysis": sa_results_dict[qoi][time_label] = rsa_analyze.analyze(problem, params_np, qoi_result_np) elif method == "Discrepancy Sensitivity Indices": sa_results_dict[qoi][time_label] = discrepancy_analyze.analyze(problem, params_np, qoi_result_np) elif method == "Delta Moment-Independent Measure": sa_results_dict[qoi][time_label] = delta_analyze.analyze(problem, params_np, qoi_result_np) elif method == "Sobol Sensitivity Analysis": sa_results_dict[qoi][time_label] = sobol_analyze.analyze(problem, qoi_result_np) except Exception as e: raise ValueError(f"Error running {method} for QoI: {qoi} and time: {qoi_time_values[time_label]} - {e}") return sa_results_dict, qoi_time_values
[docs] def OAT_analyze(dic_samples: dict, dic_qoi: dict, sample_ref: int = 0) -> dict: """Perform One-At-a-Time (OAT) analysis on the simulation results. Args: dic_samples (dict): Dictionary of parameter sample dictionaries, where each key is a sample ID and each value is a dictionary of parameter names and values. dic_qoi (dict): Dictionary of Quantities of Interest (QoI) values, where each key is a sample ID and each value is the corresponding QoI result. sample_ref (int, optional): Sample ID to use as the reference for OAT analysis. Defaults to 0. Returns: dict: Dictionary containing sensitivity indices for each parameter. Keys are parameter names and values are arrays of sensitivity indices for each perturbation. Note: The specified sample is treated as the reference sample. All other samples are compared against this reference to compute sensitivity indices. """ # Extract parameter samples and QoI samples par_samples = np.array([list(sample.values()) for sample in dic_samples.values()]) # Normalize the parameter samples par_samples = (par_samples - par_samples.min(axis=0)) / (par_samples.max(axis=0) - par_samples.min(axis=0)) # sample_ref is the reference sample ref_pars = par_samples[sample_ref]; par_samples = np.delete(par_samples, sample_ref, axis=0) qoi_ref = dic_qoi[sample_ref] # Extract QoI samples, excluding the reference sample (SampleID different of 0) qoi_samples = np.array([qoi for sample_id, qoi in dic_qoi.items() if sample_id != sample_ref]) # Initialize the results dictionary dic_results = {} for id, par in enumerate(dic_samples[sample_ref].keys()): # Calculate the mean and std deviation of the QoIs for each parameter par_var = np.abs(par_samples[:, id] - ref_pars[id]) non_zero_indices = np.where(par_var != 0)[0] # As one-at-a-time, only one parameter is perturbed at a time, so we can skip the zero variance cases dic_results[par] = np.abs(qoi_samples[non_zero_indices] - qoi_ref) / par_var[non_zero_indices] # Compute SI without skipping return dic_results
[docs] def run_local_sa(params_dict: dict, qoi_names: list, df_qois: pd.DataFrame, method: str = "OAT", sample_ref: int = 0) -> tuple: """Run local sensitivity analysis using the One-At-a-Time (OAT) method. Args: params_dict (dict): Dictionary containing parameter names, properties, and sample values. Must include a 'samples' key with parameter sample dictionaries. qoi_names (list): List of QoI names to analyze. df_qois (pd.DataFrame): DataFrame containing QoI values with columns formatted as '{qoi_name}_{time_index}' or '{qoi_name}' and time as an index. method (str, optional): Local sensitivity analysis method. Currently only 'OAT' (One-At-a-Time) is supported. Defaults to "OAT". sample_ref (int, optional): Sample ID to use as the reference for OAT analysis. Defaults to 0. Returns: tuple: A tuple containing: - sa_results_dict (dict): Nested dictionary with sensitivity analysis results. Structure: {qoi_name: {time_label: {param_name: sensitivity_index}}} - qoi_time_values (dict): Dictionary mapping time labels to their values, sorted by time. Note: The OAT method computes sensitivity indices by comparing parameter perturbations against a reference sample (sample 0). Results are summed across all perturbations for each parameter. """ # Get parameter names param_names = [key for key in params_dict.keys() if key != "samples"] # Set the times labels sorted by time values if 'time' in df_qois.index.names: qoi_time_values = df_qois.index.get_level_values('time').unique().to_series().sort_values().to_dict() else: qoi_time_values = _set_time_labels(df_qois) # SA results dictionary sa_results_dict = { qoi: {} for qoi in qoi_names } for qoi in qoi_names: for time_id, time_label in enumerate(qoi_time_values.keys()): if 'time' in df_qois.index.names: df_filtered = df_qois.reset_index() qoi_result_dict = ( df_filtered[df_filtered['time'] == qoi_time_values[time_label]] .set_index('SampleID')[qoi] .to_dict() ) if np.isnan(list(qoi_result_dict.values())).all(): continue # Skip if QoI at this time does not exist else: if f"{qoi}_{time_id}" not in df_qois.columns: continue # Skip if QoI at this time does not exist qoi_result_dict = df_qois[f"{qoi}_{time_id}"].to_dict() if len(qoi_result_dict) != len(params_dict["samples"]): raise ValueError(f"Error: Mismatch between number of samples ({len(params_dict['samples'])}) and QoI results ({len(qoi_result_dict)})!") print(f"Running {method} for QoI: {qoi} and time: {qoi_time_values[time_label]}") # Return a dictionary of sensitivity indices for each perturbation sa_results_dict[qoi][time_label] = OAT_analyze(params_dict["samples"], qoi_result_dict, sample_ref=sample_ref) # Overwrite the results for perturbations by summing them up for key in sa_results_dict[qoi][time_label]: sa_results_dict[qoi][time_label][key] = np.sum(sa_results_dict[qoi][time_label][key]) return sa_results_dict, qoi_time_values
[docs] def get_sa_results(dbfile: str, qoi_names: list, df_qois: pd.DataFrame, method: str, sample_ref: int = 0, qoi_time_values: dict = None) -> tuple: """Get sensitivity analysis results for Global and Local methods. Args: dbfile (str): Path to the database file containing simulation results. qoi_names (list): List of QoI names to analyze. df_qois (pd.DataFrame): DataFrame containing QoI values with columns formatted as '{qoi_name}_{time_index}' or '{qoi_name}' and time as an index. method (str): Name of the sensitivity analysis method to use. Supported methods include 'FAST - Fourier Amplitude Sensitivity Test', 'Sobol Sensitivity Analysis', 'PAWN Sensitivity Analysis', etc. For local sensitivity analysis, use "OAT". sample_ref (int, optional): Sample ID to use as the reference for OAT analysis. Defaults to 0. Only used for local sensitivity analysis with method "OAT". qoi_time_values (dict, optional): Dictionary mapping time labels to their values. If None, time labels will be extracted from df_qois. Defaults to None. Returns: tuple: A tuple containing: - sa_results_dict (dict): Nested dictionary with sensitivity analysis results. Structure: {qoi_name: {time_label: analysis_results}} - qoi_time_values (dict): Dictionary mapping time labels to their values, sorted by time. """ Sampler = load_metadata(dbfile)['Sampler'].values[0] # Extract parameter samples from the database file if Sampler in samplers_to_method and method in samplers_to_method[Sampler]: if Sampler == "OAT": params_dict = get_local_SA_parameters(dbfile) else: params_dict = get_global_SA_parameters(dbfile) else: raise ValueError(f"Error: The specified method '{method}' is not compatible with the sampler '{Sampler}' used in the database. Please choose a compatible method for this sampler: {samplers_to_method.get(Sampler, 'No compatible methods found for this sampler.')}") # Run the appropriate sensitivity analysis method print(f"Running sensitivity analysis with method: {method}") if method == "OAT": sa_results_dict, qoi_time_values = run_local_sa(params_dict, qoi_names, df_qois, method=method, sample_ref=sample_ref) else: sa_results_dict, qoi_time_values = run_global_sa(params_dict, qoi_names, df_qois, method=method, qoi_time_values=qoi_time_values) return sa_results_dict, qoi_time_values