|
|
25a0fb6 |
from __future__ import division
|
|
|
25a0fb6 |
from __future__ import print_function
|
|
|
25a0fb6 |
|
|
|
25a0fb6 |
from scipy.stats import norm, gaussian_kde, rankdata
|
|
|
25a0fb6 |
|
|
|
25a0fb6 |
import numpy as np
|
|
|
25a0fb6 |
|
|
|
25a0fb6 |
from . import common_args
|
|
|
25a0fb6 |
from ..util import read_param_file, ResultDict
|
|
|
25a0fb6 |
|
|
|
25a0fb6 |
|
|
|
25a0fb6 |
def analyze(problem, X, Y, num_resamples=10,
|
|
|
25a0fb6 |
conf_level=0.95, print_to_console=False, seed=None):
|
|
|
25a0fb6 |
"""Perform Delta Moment-Independent Analysis on model outputs.
|
|
|
25a0fb6 |
|
|
|
25a0fb6 |
Returns a dictionary with keys 'delta', 'delta_conf', 'S1', and 'S1_conf',
|
|
|
25a0fb6 |
where each entry is a list of size D (the number of parameters) containing
|
|
|
25a0fb6 |
the indices in the same order as the parameter file.
|
|
|
25a0fb6 |
|
|
|
25a0fb6 |
Parameters
|
|
|
25a0fb6 |
----------
|
|
|
25a0fb6 |
problem : dict
|
|
|
25a0fb6 |
The problem definition
|
|
|
25a0fb6 |
X: numpy.matrix
|
|
|
25a0fb6 |
A NumPy matrix containing the model inputs
|
|
|
25a0fb6 |
Y : numpy.array
|
|
|
25a0fb6 |
A NumPy array containing the model outputs
|
|
|
25a0fb6 |
num_resamples : int
|
|
|
25a0fb6 |
The number of resamples when computing confidence intervals (default 10)
|
|
|
25a0fb6 |
conf_level : float
|
|
|
25a0fb6 |
The confidence interval level (default 0.95)
|
|
|
25a0fb6 |
print_to_console : bool
|
|
|
25a0fb6 |
Print results directly to console (default False)
|
|
|
25a0fb6 |
|
|
|
25a0fb6 |
References
|
|
|
25a0fb6 |
----------
|
|
|
25a0fb6 |
.. [1] Borgonovo, E. (2007). "A new uncertainty importance measure."
|
|
|
25a0fb6 |
Reliability Engineering & System Safety, 92(6):771-784,
|
|
|
25a0fb6 |
doi:10.1016/j.ress.2006.04.015.
|
|
|
25a0fb6 |
|
|
|
25a0fb6 |
.. [2] Plischke, E., E. Borgonovo, and C. L. Smith (2013). "Global
|
|
|
25a0fb6 |
sensitivity measures from given data." European Journal of
|
|
|
25a0fb6 |
Operational Research, 226(3):536-550, doi:10.1016/j.ejor.2012.11.047.
|
|
|
25a0fb6 |
|
|
|
25a0fb6 |
Examples
|
|
|
25a0fb6 |
--------
|
|
|
25a0fb6 |
>>> X = latin.sample(problem, 1000)
|
|
|
25a0fb6 |
>>> Y = Ishigami.evaluate(X)
|
|
|
25a0fb6 |
>>> Si = delta.analyze(problem, X, Y, print_to_console=True)
|
|
|
25a0fb6 |
"""
|
|
|
25a0fb6 |
if seed:
|
|
|
25a0fb6 |
np.random.seed(seed)
|
|
|
25a0fb6 |
|
|
|
25a0fb6 |
D = problem['num_vars']
|
|
|
25a0fb6 |
N = Y.size
|
|
|
25a0fb6 |
|
|
|
25a0fb6 |
if not 0 < conf_level < 1:
|
|
|
25a0fb6 |
raise RuntimeError("Confidence level must be between 0-1.")
|
|
|
25a0fb6 |
|
|
|
25a0fb6 |
# equal frequency partition
|
|
|
25a0fb6 |
exp = (2 / (7 + np.tanh((1500 - N) / 500)))
|
|
|
25a0fb6 |
M = int(np.round( min(int(np.ceil(N**exp)), 48) ))
|
|
|
25a0fb6 |
m = np.linspace(0, N, M + 1)
|
|
|
25a0fb6 |
Ygrid = np.linspace(np.min(Y), np.max(Y), 100)
|
|
|
25a0fb6 |
|
|
|
25a0fb6 |
keys = ('delta', 'delta_conf', 'S1', 'S1_conf')
|
|
|
25a0fb6 |
S = ResultDict((k, np.zeros(D)) for k in keys)
|
|
|
25a0fb6 |
S['names'] = problem['names']
|
|
|
25a0fb6 |
|
|
|
25a0fb6 |
if print_to_console:
|
|
|
25a0fb6 |
print("Parameter %s %s %s %s" % keys)
|
|
|
25a0fb6 |
|
|
|
25a0fb6 |
try:
|
|
|
25a0fb6 |
for i in range(D):
|
|
|
25a0fb6 |
X_i = X[:, i]
|
|
|
25a0fb6 |
S['delta'][i], S['delta_conf'][i] = bias_reduced_delta(
|
|
|
25a0fb6 |
Y, Ygrid, X_i, m, num_resamples, conf_level)
|
|
|
25a0fb6 |
S['S1'][i] = sobol_first(Y, X_i, m)
|
|
|
25a0fb6 |
S['S1_conf'][i] = sobol_first_conf(
|
|
|
25a0fb6 |
Y, X_i, m, num_resamples, conf_level)
|
|
|
25a0fb6 |
if print_to_console:
|
|
|
25a0fb6 |
print("%s %f %f %f %f" % (S['names'][i], S['delta'][
|
|
|
25a0fb6 |
i], S['delta_conf'][i], S['S1'][i], S['S1_conf'][i]))
|
|
|
25a0fb6 |
except np.linalg.LinAlgError as e:
|
|
|
25a0fb6 |
msg = "Singular matrix detected\n"
|
|
|
25a0fb6 |
msg += "This may be due to the sample size ({}) being too small\n".format(Y.size)
|
|
|
25a0fb6 |
msg += "If this is not the case, check Y values or raise an issue with the\n"
|
|
|
25a0fb6 |
msg += "SALib team"
|
|
|
25a0fb6 |
|
|
|
25a0fb6 |
raise np.linalg.LinAlgError(msg)
|
|
|
25a0fb6 |
|
|
|
25a0fb6 |
return S
|
|
|
25a0fb6 |
|
|
|
25a0fb6 |
# Plischke et al. 2013 estimator (eqn 26) for d_hat
|
|
|
25a0fb6 |
|
|
|
25a0fb6 |
|
|
|
25a0fb6 |
def calc_delta(Y, Ygrid, X, m):
|
|
|
25a0fb6 |
N = len(Y)
|
|
|
25a0fb6 |
fy = gaussian_kde(Y, bw_method='silverman')(Ygrid)
|
|
|
25a0fb6 |
abs_fy = np.abs(fy)
|
|
|
25a0fb6 |
xr = rankdata(X, method='ordinal')
|
|
|
25a0fb6 |
|
|
|
25a0fb6 |
d_hat = 0
|
|
|
25a0fb6 |
for j in range(len(m) - 1):
|
|
|
25a0fb6 |
ix = np.where((xr > m[j]) & (xr <= m[j + 1]))[0]
|
|
|
25a0fb6 |
nm = len(ix)
|
|
|
25a0fb6 |
|
|
|
25a0fb6 |
Y_ix = Y[ix]
|
|
|
25a0fb6 |
if not np.all(np.equal(Y_ix, Y_ix[0])):
|
|
|
25a0fb6 |
fyc = gaussian_kde(Y_ix, bw_method='silverman')(Ygrid)
|
|
|
25a0fb6 |
fy_ = np.abs(fy - fyc)
|
|
|
25a0fb6 |
else:
|
|
|
25a0fb6 |
fy_ = abs_fy
|
|
|
25a0fb6 |
|
|
|
25a0fb6 |
d_hat += (nm / (2 * N)) * np.trapz(fy_, Ygrid)
|
|
|
25a0fb6 |
|
|
|
25a0fb6 |
return d_hat
|
|
|
25a0fb6 |
|
|
|
25a0fb6 |
|
|
|
25a0fb6 |
def bias_reduced_delta(Y, Ygrid, X, m, num_resamples, conf_level):
|
|
|
25a0fb6 |
"""Plischke et al. 2013 bias reduction technique (eqn 30)"""
|
|
|
25a0fb6 |
d = np.zeros(num_resamples)
|
|
|
25a0fb6 |
d_hat = calc_delta(Y, Ygrid, X, m)
|
|
|
25a0fb6 |
|
|
|
25a0fb6 |
N = len(Y)
|
|
|
25a0fb6 |
r = np.random.randint(N, size=(num_resamples, N))
|
|
|
25a0fb6 |
for i in range(num_resamples):
|
|
|
25a0fb6 |
r_i = r[i, :]
|
|
|
25a0fb6 |
d[i] = calc_delta(Y[r_i], Ygrid, X[r_i], m)
|
|
|
25a0fb6 |
|
|
|
25a0fb6 |
d = 2 * d_hat - d
|
|
|
25a0fb6 |
return (d.mean(), norm.ppf(0.5 + conf_level / 2) * d.std(ddof=1))
|
|
|
25a0fb6 |
|
|
|
25a0fb6 |
|
|
|
25a0fb6 |
def sobol_first(Y, X, m):
|
|
|
25a0fb6 |
xr = rankdata(X, method='ordinal')
|
|
|
25a0fb6 |
Vi = 0
|
|
|
25a0fb6 |
N = len(Y)
|
|
|
25a0fb6 |
Y_mean = Y.mean()
|
|
|
25a0fb6 |
for j in range(len(m) - 1):
|
|
|
25a0fb6 |
ix = np.where((xr > m[j]) & (xr <= m[j + 1]))[0]
|
|
|
25a0fb6 |
nm = len(ix)
|
|
|
25a0fb6 |
Vi += (nm / N) * ((Y[ix].mean() - Y_mean)**2)
|
|
|
25a0fb6 |
return Vi / np.var(Y)
|
|
|
25a0fb6 |
|
|
|
25a0fb6 |
|
|
|
25a0fb6 |
def sobol_first_conf(Y, X, m, num_resamples, conf_level):
|
|
|
25a0fb6 |
s = np.zeros(num_resamples)
|
|
|
25a0fb6 |
|
|
|
25a0fb6 |
N = len(Y)
|
|
|
25a0fb6 |
r = np.random.randint(N, size=(num_resamples, N))
|
|
|
25a0fb6 |
for i in range(num_resamples):
|
|
|
25a0fb6 |
r_i = r[i, :]
|
|
|
25a0fb6 |
s[i] = sobol_first(Y[r_i], X[r_i], m)
|
|
|
25a0fb6 |
|
|
|
25a0fb6 |
return norm.ppf(0.5 + conf_level / 2) * s.std(ddof=1)
|
|
|
25a0fb6 |
|
|
|
25a0fb6 |
|
|
|
25a0fb6 |
def cli_parse(parser):
|
|
|
25a0fb6 |
parser.add_argument('-X', '--model-input-file', type=str, required=True,
|
|
|
25a0fb6 |
default=None,
|
|
|
25a0fb6 |
help='Model input file')
|
|
|
25a0fb6 |
parser.add_argument('-r', '--resamples', type=int, required=False,
|
|
|
25a0fb6 |
default=10,
|
|
|
25a0fb6 |
help='Number of bootstrap resamples for \
|
|
|
25a0fb6 |
Sobol confidence intervals')
|
|
|
25a0fb6 |
return parser
|
|
|
25a0fb6 |
|
|
|
25a0fb6 |
|
|
|
25a0fb6 |
def cli_action(args):
|
|
|
25a0fb6 |
problem = read_param_file(args.paramfile)
|
|
|
25a0fb6 |
Y = np.loadtxt(args.model_output_file,
|
|
|
25a0fb6 |
delimiter=args.delimiter, usecols=(args.column,))
|
|
|
25a0fb6 |
X = np.loadtxt(args.model_input_file, delimiter=args.delimiter, ndmin=2)
|
|
|
25a0fb6 |
if len(X.shape) == 1:
|
|
|
25a0fb6 |
X = X.reshape((len(X), 1))
|
|
|
25a0fb6 |
|
|
|
25a0fb6 |
analyze(problem, X, Y, num_resamples=args.resamples, print_to_console=True,
|
|
|
25a0fb6 |
seed=args.seed)
|
|
|
25a0fb6 |
|
|
|
25a0fb6 |
|
|
|
25a0fb6 |
if __name__ == "__main__":
|
|
|
25a0fb6 |
common_args.run_cli(cli_parse, cli_action)
|