library(tidyverse)
library(flipr)
<- 20
ngrid_in <- 100
ngrid_out <- 100000
B <- 30
n1 <- 30
n2 set.seed(1301)
<- rnorm(n1, mean = 0, sd = 1)
x1 <- rnorm(n2, mean = 3, sd = 1)
x2 <- rnorm(n1, mean = 0, sd = 1)
y1 <- rnorm(n2, mean = 0, sd = 2)
y2 <- rnorm(n1, mean = 0, sd = 1)
z1 <- rnorm(n2, mean = 3, sd = 2)
z2
<- function(center_value, min_value, max_value, n) {
generate_grid stopifnot(center_value > min_value && center_value < max_value)
c(
seq(min_value, center_value, len = n / 2 + 1)[1:(n / 2)],
center_value, seq(center_value, max_value, len = n / 2 + 1)[-1]
) }
The concept of \(p\)-value functions pertains to assessing the \(p\)-value of a set of null hypotheses and to plot this \(p\)-value surface on the domain defined by the set of null hypotheses. The idea behind is that if such a \(p\)-value function is available, you can deduce from it point estimates or confidence interval estimates for parameters used to define the nulls or extract a single \(p\)-value for a specific null of interest (Martin 2017; Fraser 2019; Infanger and Schmidt-Trucksäss 2019). In particular, there is another R package dedicated to \(p\)-value functions called pvaluefunctions.
<- stats::t.test(x2, x1, var.equal = TRUE)
test_param <- test_param$conf.int[1] - 1
delta_min <- test_param$conf.int[2] + 1
delta_max <- mean(x2) - mean(x1)
delta_pe <- generate_grid(delta_pe, delta_min, delta_max, ngrid_in)
delta_in
<- function(y, parameters) {
null_spec_mean <- parameters[1]
delta - delta
y
}
<- tibble(
df delta = delta_in,
two_tail = two_sample_pf(
parameters = delta,
null_specification = null_spec_mean,
x = x1,
y = x2,
statistic = stat_t,
B = B,
seed = 1234,
alternative = "two_tail"
),left_tail = two_sample_pf(
parameters = delta,
null_specification = null_spec_mean,
x = x1,
y = x2,
statistic = stat_t,
B = B,
seed = 1234,
alternative = "left_tail"
),right_tail = two_sample_pf(
parameters = delta,
null_specification = null_spec_mean,
x = x1,
y = x2,
statistic = stat_t,
B = B,
seed = 1234,
alternative = "right_tail"
)
)
<- generate_grid(delta_pe, delta_min, delta_max, ngrid_out)
delta_out
<- tibble(
df_mean delta = delta_out,
two_tail = approx(df$delta, df$two_tail, delta)$y,
left_tail = approx(df$delta, df$left_tail, delta)$y,
right_tail = approx(df$delta, df$right_tail, delta)$y,
%>%
) pivot_longer(-delta)
%>%
df_mean ggplot(aes(delta, value, color = name)) +
geom_line() +
labs(
title = "P-value function for the mean",
subtitle = "t-statistic",
x = expression(delta),
y = "p-value",
color = "Type"
+
) geom_hline(
yintercept = 0.05,
color = "black",
linetype = "dashed"
+
) geom_vline(
xintercept = mean(x2) - mean(x1),
color = "black"
+
) geom_vline(
xintercept = stats::t.test(x2, x1, var.equal = TRUE)$conf.int,
color = "black",
linetype = "dashed"
+
) scale_y_continuous(breaks = seq(0, 1, by = 0.05), limits = c(0, 1))
<- stats::var.test(y2, y1)
test_param <- 1e-3
rho_min <- sqrt(test_param$conf.int[2]) * 1.2
rho_max <- sd(y2) / sd(y1)
rho_pe <- generate_grid(rho_pe, rho_min, rho_max, ngrid_in)
rho_in
<- function(y, parameters) {
null_spec_sd <- parameters[1]
rho / rho
y
}
<- tibble(
df rho = rho_in,
two_tail = two_sample_pf(
parameters = rho,
null_specification = null_spec_sd,
x = y1,
y = y2,
statistic = stat_f,
B = B,
seed = 1234,
alternative = "two_tail"
),left_tail = two_sample_pf(
parameters = rho,
null_specification = null_spec_sd,
x = y1,
y = y2,
statistic = stat_f,
B = B,
seed = 1234,
alternative = "left_tail"
),right_tail = two_sample_pf(
parameters = rho,
null_specification = null_spec_sd,
x = y1,
y = y2,
statistic = stat_f,
B = B,
seed = 1234,
alternative = "right_tail"
)
)
<- generate_grid(rho_pe, rho_min, rho_max, ngrid_out)
rho_out
<- tibble(
df_sd rho = rho_out,
two_tail = approx(df$rho, df$two_tail, rho)$y,
left_tail = approx(df$rho, df$left_tail, rho)$y,
right_tail = approx(df$rho, df$right_tail, rho)$y,
%>%
) pivot_longer(-rho)
%>%
df_sd ggplot(aes(rho, value, color = name)) +
geom_line() +
labs(
title = "P-value function for the standard deviation",
subtitle = "F-statistic",
x = expression(rho),
y = "p-value",
color = "Type"
+
) geom_hline(
yintercept = 0.05,
color = "black",
linetype = "dashed"
+
) geom_vline(
xintercept = sqrt(stats::var.test(y2, y1)$statistic),
color = "black"
+
) geom_vline(
xintercept = sqrt(stats::var.test(y2, y1)$conf.int),
color = "black",
linetype = "dashed"
+
) scale_y_continuous(breaks = seq(0, 1, by = 0.05), limits = c(0, 1))
Assume that we have two r.v. \(X\) and \(Y\) that differ in distribution only in their first two moments. Let \(\mu_X\) and \(\mu_Y\) be the means of \(X\) and \(Y\) respectively and \(\sigma_X\) and \(\sigma_Y\) be the standard deviations. We can therefore write
\[ Y = \delta + \rho X. \]
In this case, we have
\[ \begin{cases} \mu_Y = \delta + \rho \mu_X \\ \sigma_Y^2 = \rho^2 \sigma_X^2 \end{cases} \Longleftrightarrow \begin{cases} \delta = \mu_Y - \frac{\sigma_Y}{\sigma_X} \mu_X \\ \rho = \frac{\sigma_Y}{\sigma_X} \end{cases} \]
In the following example, we have \(\delta = 3\) and \(\rho = 2\).
<- mean(z2) - sd(z2) / sd(z1) * mean(z1)
delta_pe <- sd(z2) / sd(z1)
rho_pe <- stats::t.test(z2, z1, var.equal = FALSE)
test_param_mean <- stats::var.test(z2, z1)
test_param_var <- test_param_mean$conf.int[1] - 1
delta_min <- test_param_mean$conf.int[2] + 1
delta_max <- 1e-3
rho_min <- sqrt(test_param_var$conf.int[2]) * 1.2
rho_max
<- function(y, parameters) {
null_spec_both <- parameters[1]
delta <- parameters[2]
rho - delta) / rho
(y
}
<- function(delta, rho, x, y, B, combine_with) {
process two_sample_pf(
parameters = map2(delta, rho, c),
null_specification = null_spec_both,
x = x,
y = y,
statistic = c("stat_mean", "stat_f"),
B = B,
seed = 1234,
combine_with = combine_with,
alternative = "two_tail"
)
}
<- compiler::cmpfun(process)
process_cmp
<- generate_grid(delta_pe, delta_min, delta_max, ngrid_in)
delta_in <- generate_grid(rho_pe, rho_min, rho_max, ngrid_in)
rho_in <- generate_grid(delta_pe, delta_min, delta_max, ngrid_out)
delta_out <- generate_grid(rho_pe, rho_min, rho_max, ngrid_out) rho_out
# Fisher method to combine test statistics
<- t(outer(
Z_fisher X = delta_in,
Y = rho_in,
FUN = process_cmp,
x = z1,
y = z2,
B = B,
combine_with = "fisher"
))
<- crossing(delta = delta_out, rho = rho_out) %>%
df_fisher mutate(pvalue = map2_dbl(delta, rho, ~ {
::interp2(
pracmax = delta_in,
y = rho_in,
Z = Z_fisher,
xp = .x,
yp = .y
)
}))
<- t(matrix(
pvalue_fisher data = df_fisher$pvalue,
nrow = ngrid_out + 1,
ncol = ngrid_out + 1,
byrow = TRUE
))
# Tippett method to combine test statistics
<- t(outer(
Z_tippett X = delta_in,
Y = rho_in,
FUN = process_cmp,
x = z1,
y = z2,
B = B,
combine_with = "tippett"
))
<- crossing(delta = delta_out, rho = rho_out) %>%
df_tippett mutate(pvalue = map2_dbl(delta, rho, ~ {
::interp2(
pracmax = delta_in,
y = rho_in,
Z = Z_tippett,
xp = .x,
yp = .y
)
}))
<- t(matrix(
pvalue_tippett data = df_tippett$pvalue,
nrow = ngrid_out + 1,
ncol = ngrid_out + 1,
byrow = TRUE
))
<- plotly::plot_ly(
fig x = delta_out,
y = rho_out,
z = pvalue_fisher
%>%
) ::add_surface()
plotly%>%
fig ::layout(scene = list(
plotlyxaxis = list(title = "delta"),
yaxis = list(title = "rho"),
zaxis = list(title = "p-value")
))
<- plotly::plot_ly(
fig2_fisher type = "contour",
x = delta_out,
y = rho_out,
z = pvalue_fisher
)%>%
fig2_fisher ::layout(
plotlyxaxis = list(title = plotly::TeX("\\delta")),
yaxis = list(title = plotly::TeX("\\rho"))
%>%
) ::config(mathjax = "cdn") plotly
<- plotly::plot_ly(
fig2_tippett type = "contour",
x = delta_out,
y = rho_out,
z = pvalue_tippett
)%>%
fig2_tippett ::layout(
plotlyxaxis = list(title = plotly::TeX("\\delta")),
yaxis = list(title = plotly::TeX("\\rho"))
%>%
) ::config(mathjax = "cdn") plotly
<- plotly::plot_ly(
fig3_fisher type = "contour",
x = delta_out,
y = rho_out,
z = pvalue_fisher,
autocontour = FALSE,
contours = list(
start = 0.05,
end = 1,
size = 0.05
)
)%>%
fig3_fisher ::layout(
plotlyxaxis = list(title = plotly::TeX("\\delta")),
yaxis = list(title = plotly::TeX("\\rho"))
%>%
) ::config(mathjax = "cdn") plotly
<- plotly::plot_ly(
fig3_tippett type = "contour",
x = delta_out,
y = rho_out,
z = pvalue_tippett,
autocontour = FALSE,
contours = list(
start = 0.05,
end = 1,
size = 0.05
)
)%>%
fig3_tippett ::layout(
plotlyxaxis = list(title = plotly::TeX("\\delta")),
yaxis = list(title = plotly::TeX("\\rho"))
%>%
) ::config(mathjax = "cdn") plotly