Skip to content

Commit f60410b

Browse files
committed
Minor improvements to the bootlm
Various improvements relating to the 'auto' prior option of the 'bayesian' method - Performance enhancement in cases where sample sizes (and therefore the automatically calculated prior) are equal; only one simulation is necessary, rather than a separate simulation for each sample. - Rather than using a single prior for bayesian bootstrap of two samples in the posthoc tests, `bootlm` now uses a separate prior for each sample. This is a better approach when sample sizes are unequal. - bumped version number
1 parent 84d0fa4 commit f60410b

File tree

3 files changed

+113
-59
lines changed

3 files changed

+113
-59
lines changed

DESCRIPTION

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
name: statistics-resampling
2-
version: 5.5.17
3-
date: 2024-05-25
2+
version: 5.5.18
3+
date: 2024-06-03
44
author: Andrew Penn <[email protected]>
55
maintainer: Andrew Penn <[email protected]>
66
title: A statistics package with a variety of resampling tools

inst/bootlm.m

Lines changed: 110 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -377,7 +377,7 @@
377377
% calculated exclude variability attributed to other predictors in
378378
% the model. To avoid small sample bias inflating effect sizes for
379379
% posthoc comparisons when use the 'bayesian' method, use the 'auto'
380-
% setting for the prior.
380+
% setting for the prior.
381381
%
382382
% '[...] = bootlm (Y, GROUP, ..., 'seed', SEED)' initialises the Mersenne
383383
% Twister random number generator using an integer SEED value so that
@@ -393,8 +393,11 @@
393393
% - 'CI_upper': The upper bound(s) of the confidence/credible interval(s)
394394
% - 'pval': The p-value(s) for the hypothesis that the estimate(s) == 0
395395
% - 'fpr': The minimum false positive risk (FPR) for each p-value
396-
% - 'N': The number of independnet sampling units used to compute CIs
397-
% - 'prior': The prior used for Bayesian bootstrap
396+
% - 'N': The number of independent sampling units used to compute CIs
397+
% - 'prior': The prior used for Bayesian bootstrap. This will return a
398+
% scalar for regression coeefficients, or a P x 1 or P x 2
399+
% matrix for estimated marginal means or posthoc tests
400+
% respectively, where P is the number of parameter estimates.
398401
% - 'levels': A cell array with the levels of each predictor.
399402
% - 'contrasts': A cell array of contrasts associated with each predictor.
400403
%
@@ -680,7 +683,7 @@
680683
GROUP(excl,:) = [];
681684
end
682685
Y(excl) = [];
683-
if ((~ isempty(DEP)) && (~ isscalar(DEP)))
686+
if ((~ isempty (DEP)) && (~ isscalar (DEP)))
684687
DEP(excl) = [];
685688
end
686689
n = numel (Y); % Recalculate total number of observations
@@ -1111,34 +1114,43 @@
11111114
switch (lower (PRIOR))
11121115
case 'auto'
11131116
PRIOR = 1 - 2 ./ N_dim;
1114-
% Use parallel processing if it is available to accelerate
1115-
% bootstrap computation for each column of the hypothesis
1116-
% matrix.
1117-
if (PARALLEL)
1118-
if (ISOCTAVE)
1119-
[STATS, BOOTSTAT] = pararrayfun (inf, @(i) bootbayes ( ...
1120-
Y, X, DEP, NBOOT, fliplr (1 - ALPHA), ...
1121-
PRIOR(i), SEED, L(:, i), ISOCTAVE), (1:Np)', ...
1122-
'UniformOutput', false);
1123-
else
1124-
STATS = cell (Np, 1); BOOTSTAT = cell (Np, 1);
1125-
parfor i = 1:Np
1126-
[STATS{i}, BOOTSTAT{i}] = bootbayes (Y, X, DEP, ...
1127-
NBOOT, fliplr (1 - ALPHA), ...
1128-
PRIOR(i), SEED, L(:, i), ISOCTAVE);
1117+
if (all (PRIOR == PRIOR(1)))
1118+
[STATS, BOOTSTAT] = bootbayes (Y, X, DEP, NBOOT, ...
1119+
fliplr (1 - ALPHA), PRIOR(1), SEED, L, ISOCTAVE);
1120+
STATS.prior = PRIOR;
1121+
else
1122+
% If the prior is not the same across all the samples then
1123+
% we need to perform a separate simulation for each sample.
1124+
% Use parallel processing if it is available to accelerate
1125+
% bootstrap computation for each column of the hypothesis
1126+
% matrix.
1127+
if (PARALLEL)
1128+
if (ISOCTAVE)
1129+
[STATS, BOOTSTAT] = pararrayfun (inf, @(i) bootbayes ( ...
1130+
Y, X, DEP, NBOOT, fliplr (1 - ALPHA), ...
1131+
PRIOR(i), SEED, L(:, i), ISOCTAVE), (1:Np)', ...
1132+
'UniformOutput', false);
1133+
else
1134+
STATS = cell (Np, 1); BOOTSTAT = cell (Np, 1);
1135+
parfor i = 1:Np
1136+
[STATS{i}, BOOTSTAT{i}] = bootbayes (Y, X, DEP, ...
1137+
NBOOT, fliplr (1 - ALPHA), ...
1138+
PRIOR(i), SEED, L(:, i), ISOCTAVE);
1139+
end
11291140
end
1141+
else
1142+
[STATS, BOOTSTAT] = arrayfun (@(i) bootbayes (Y, X, ...
1143+
DEP, NBOOT, fliplr (1 - ALPHA), PRIOR(i), SEED, ...
1144+
L(:, i), ISOCTAVE), (1:Np)', 'UniformOutput', false);
11301145
end
1131-
else
1132-
[STATS, BOOTSTAT] = arrayfun (@(i) bootbayes (Y, X, ...
1133-
DEP, NBOOT, fliplr (1 - ALPHA), PRIOR(i), SEED, ...
1134-
L(:, i), ISOCTAVE), (1:Np)', 'UniformOutput', false);
1146+
STATS = flatten_struct (cell2mat (STATS));
1147+
BOOTSTAT = cell2mat (BOOTSTAT);
11351148
end
1136-
STATS = flatten_struct (cell2mat (STATS));
1137-
BOOTSTAT = cell2mat (BOOTSTAT);
11381149
otherwise
11391150
[STATS, BOOTSTAT] = bootbayes (Y, X, DEP, NBOOT, ...
11401151
fliplr (1 - ALPHA), PRIOR, SEED, ...
11411152
L, ISOCTAVE);
1153+
STATS.prior = PRIOR * ones (Np, 1);
11421154
end
11431155
% Create empty fields in STATS structure
11441156
STATS.pval = [];
@@ -1166,7 +1178,6 @@
11661178
' control group for ''trt_vs_ctrl'''))
11671179
end
11681180
pairs = feval (POSTHOC{1}, L, POSTHOC{2:end});
1169-
L = make_test_matrix (L, pairs);
11701181
POSTHOC = POSTHOC{1};
11711182
case 'char'
11721183
if (strcmp (POSTHOC, 'trt.vs.ctrl'))
@@ -1176,18 +1187,18 @@
11761187
' ''pairwise'' and ''trt_vs_ctrl'''))
11771188
end
11781189
pairs = feval (POSTHOC, L);
1179-
L = make_test_matrix (L, pairs);
11801190
otherwise
11811191
pairs = unique_stable (POSTHOC, 'rows');
11821192
if (size (pairs, 2) > 2)
11831193
error (cat (2, 'bootlm: pairs matrix defining posthoc', ...
11841194
' comparisons must have exactly two columns'))
11851195
end
1186-
L = make_test_matrix (L, pairs);
11871196
end
1188-
Np = size (pairs, 1); % Update the number of parameters
11891197
switch (lower (METHOD))
11901198
case 'wild'
1199+
% Modifying the hypothesis matrix (L) to perform the desired tests
1200+
L = make_test_matrix (L, pairs);
1201+
Np = size (pairs, 1); % Update the number of parameters
11911202
[STATS, BOOTSTAT] = bootwild (Y, X, DEP, NBOOT, ALPHA, SEED, ...
11921203
L, ISOCTAVE);
11931204
% Control the type 1 error rate across multiple comparisons
@@ -1199,40 +1210,70 @@
11991210
case {'bayes', 'bayesian'}
12001211
switch (lower (PRIOR))
12011212
case 'auto'
1202-
wgt = bsxfun (@rdivide, N_dim(pairs')', ...
1203-
sum (N_dim(pairs')', 2));
1204-
PRIOR = sum ((1 - wgt) .* (1 - 2 ./ N_dim(pairs')'), 2);
1205-
% Use parallel processing if it is available to accelerate
1206-
% bootstrap computation for each column of the hypothesis
1207-
% matrix.
1208-
if (PARALLEL)
1209-
if (ISOCTAVE)
1210-
[STATS, BOOTSTAT] = pararrayfun (inf, @(i) bootbayes ( ...
1211-
Y, X, DEP, NBOOT, fliplr (1 - ALPHA), ...
1212-
PRIOR(i), SEED, L(:, i), ISOCTAVE), (1:Np)', ...
1213-
'UniformOutput', false);
1214-
else
1215-
STATS = cell (Np, 1); BOOTSTAT = cell (Np, 1);
1216-
parfor i = 1:Np
1217-
[STATS{i}, BOOTSTAT{i}] = bootbayes (Y, X, DEP, ...
1218-
NBOOT, fliplr (1 - ALPHA), ...
1219-
PRIOR(i), SEED, L(:, i), ISOCTAVE);
1213+
% To enable the 'auto' prior to be set independently on each
1214+
% sample, we use the hypothesis matrix (L) already generated
1215+
% to compute the posterior distributions for the estimated
1216+
% marginal means each with their own prior. Later, we will
1217+
% perform elementwise subtraction for each pair of posterior
1218+
% distributions to find the differences
1219+
PRIOR = 1 - 2 ./ N_dim;
1220+
if (all (PRIOR == PRIOR(1)))
1221+
[STATS, BOOTSTAT] = bootbayes (Y, X, DEP, NBOOT, ...
1222+
fliplr (1 - ALPHA), PRIOR(1), SEED, L, ISOCTAVE);
1223+
else
1224+
% If the prior is not the same across all the samples then
1225+
% we need to perform a separate simulation for each sample.
1226+
% Use parallel processing if it is available to accelerate
1227+
% bootstrap computation for each column of the hypothesis
1228+
% matrix.
1229+
if (PARALLEL)
1230+
if (ISOCTAVE)
1231+
[STATS, BOOTSTAT] = pararrayfun (inf, @(i) bootbayes ( ...
1232+
Y, X, DEP, NBOOT, fliplr (1 - ALPHA), ...
1233+
PRIOR(i), SEED, L(:, i), ISOCTAVE), (1:Np)', ...
1234+
'UniformOutput', false);
1235+
else
1236+
STATS = cell (Np, 1); BOOTSTAT = cell (Np, 1);
1237+
parfor i = 1:Np
1238+
[STATS{i}, BOOTSTAT{i}] = bootbayes (Y, X, DEP, ...
1239+
NBOOT, fliplr (1 - ALPHA), ...
1240+
PRIOR(i), SEED, L(:, i), ISOCTAVE);
1241+
end
12201242
end
1243+
else
1244+
[STATS, BOOTSTAT] = arrayfun (@(i) bootbayes (Y, X, ...
1245+
DEP, NBOOT, fliplr (1 - ALPHA), PRIOR(i), SEED, ...
1246+
L(:, i), ISOCTAVE), (1:Np)', 'UniformOutput', false);
12211247
end
1222-
else
1223-
[STATS, BOOTSTAT] = arrayfun (@(i) bootbayes (Y, X, ...
1224-
DEP, NBOOT, fliplr (1 - ALPHA), PRIOR(i), SEED, ...
1225-
L(:, i), ISOCTAVE), (1:Np)', 'UniformOutput', false);
1248+
STATS = flatten_struct (cell2mat (STATS));
1249+
BOOTSTAT = cell2mat (BOOTSTAT);
12261250
end
1227-
STATS = flatten_struct (cell2mat (STATS));
1228-
BOOTSTAT = cell2mat (BOOTSTAT);
12291251
otherwise
1252+
% Compute the posterior distributions for the estimated
1253+
% marginal means each with the PRIOR. Later, we will perform
1254+
% elementwise subtraction for each pair of posterior
1255+
% distributions to find the differences
12301256
[STATS, BOOTSTAT] = bootbayes (Y, X, DEP, NBOOT, ...
1231-
fliplr (1 - ALPHA), PRIOR, SEED, L, ISOCTAVE);
1257+
fliplr (1 - ALPHA), PRIOR, SEED, ...
1258+
L, ISOCTAVE);
12321259
end
12331260
% Create empty fields in STATS structure
12341261
STATS.pval = [];
12351262
STATS.fpr = [];
1263+
% Calculate estimates and credible intervals from the estimated
1264+
% marginal means and their associated posterior distributions
1265+
Np = size (pairs, 1); % Update the number of parameters
1266+
STATS.original = bsxfun (@minus, STATS.original(pairs(:, 1), :), ...
1267+
STATS.original(pairs(:, 2), :));
1268+
BOOTSTAT = bsxfun (@minus, BOOTSTAT(pairs(:, 1),:), ...
1269+
BOOTSTAT(pairs(:, 2),:));
1270+
CI = credint (BOOTSTAT, fliplr (1 - ALPHA));
1271+
STATS.CI_lower = CI(:,1); STATS.CI_upper = CI(:,2);
1272+
if (isscalar (PRIOR))
1273+
STATS.prior = PRIOR * ones (Np, 2);
1274+
else
1275+
STATS.prior = cat (2, PRIOR(pairs(:, 1)), PRIOR(pairs(:, 2)));
1276+
end
12361277
otherwise
12371278
error (cat (2, 'bootlm: unrecignised bootstrap method.', ...
12381279
' Use ''wild'' or ''bayesian''.'))
@@ -1347,9 +1388,16 @@
13471388
% If applicable, print parameter estimates (a.k.a contrasts) for fixed
13481389
% effects. Parameter estimates correspond to the contrasts we set.
13491390
if (isempty (DIM))
1391+
switch (lower (METHOD))
1392+
case 'wild'
1393+
p_str = 'p-val';
1394+
case {'bayes', 'bayesian'}
1395+
p_str = ' ';
1396+
end
13501397
fprintf ('\nMODEL COEFFICIENTS\n\n');
1398+
13511399
fprintf (cat (2, 'name coeff', ...
1352-
' CI_lower CI_upper p-val\n'));
1400+
' CI_lower CI_upper %s\n'), p_str);
13531401
fprintf (cat (2, '--------------------------------------------', ...
13541402
'------------------------------------\n'));
13551403
else
@@ -1361,9 +1409,15 @@
13611409
fprintf (cat (2, '---------------------------------------', ...
13621410
'-----------------------------------------\n'));
13631411
otherwise
1412+
switch (lower (METHOD))
1413+
case 'wild'
1414+
p_str = 'p-adj';
1415+
case {'bayes', 'bayesian'}
1416+
p_str = ' ';
1417+
end
13641418
fprintf ('\nMODEL POSTHOC COMPARISONS\n\n');
13651419
fprintf (cat (2, 'name ', ...
1366-
'mean CI_lower CI_upper p-adj\n'));
1420+
'mean CI_lower CI_upper %s\n'), p_str);
13671421
fprintf (cat (2, '---------------------------------------', ...
13681422
'-----------------------------------------\n'));
13691423
end

inst/bootwild.m

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -450,7 +450,7 @@
450450
% Calculate the false-positive risk from the minumum Bayes Factor
451451
L10 = 1 ./ minBF; % Convert to Maximum Likelihood ratio L10 (P(H1)/P(H0))
452452
fpr = max (0, 1 ./ (1 + L10)); % Calculate minimum false positive risk
453-
fpr(isnan(p)) = NaN;
453+
fpr(isnan (p)) = NaN;
454454

455455
end
456456

0 commit comments

Comments
 (0)