The hardware and bandwidth for this mirror is donated by dogado GmbH, the Webhosting and Full Service-Cloud Provider. Check out our Wordpress Tutorial.
If you wish to report a bug, or if you are interested in having us mirror your free-software or open-source project, please feel free to contact us at mirror[@]dogado.de.
Let \(\{X_t\}_{t=1}^n\) be the \(0/1\) hit sequence.
Under the null \(H_0 : X_t
\stackrel{\text{i.i.d.}}{\sim}
\mathrm{Bernoulli}(\alpha)\),
define the cell counts
\[ T_{00}=\sum_{t=2}^{n}\! \mathbf 1\{X_{t-1}=0,\;X_t=0\},\qquad T_{01},\;T_{10},\;T_{11}\hspace{2pt}\text{analogously},\qquad T_0=T_{00}+T_{10},\;T_1=T_{01}+T_{11}. \]
The likelihood-ratio statistic for independence is
\[ \mathrm{LR}_{\text{ind}} = -2\!\left[ T_0\log(1-\hat p)+T_1\log\hat p \;-\; T_{00}\log(1-\pi_{01})-T_{01}\log\pi_{01} \;-\; T_{10}\log(1-\pi_{11})-T_{11}\log\pi_{11} \right], \]
where \(\displaystyle \hat p = \frac{T_1}{n-1},\qquad \pi_{01} = \frac{T_{01}}{T_{00}+T_{01}},\qquad \pi_{11} = \frac{T_{11}}{T_{10}+T_{11}}.\)
Note.
All logarithms are evaluated with the safe function
\[ \operatorname{safe\_log}(x)=\log\bigl(\max\{x,10^{-15}\}\bigr), \]
which prevents floating-point underflow when \(x\) is extremely small.
At time \(k\;(1\le k\le n)\) we compress every partial path into the state
\[ s = \bigl(\,\ell,\;c_{00},c_{10},c_{01},c_{11}\bigr),\qquad \ell\in\{0,1\}, \]
where \(\ell\) is the last hit and
\(c_{xy}\) are the running counts
of
\((X_{t-1}=x,\;X_t=y)\) up to time
\(k\).
The forward probability attached to \(s\) is the summed mass of all
paths that lead to this state.
With transition probabilities
\[ P(X_k=0\mid H_0)=1-\alpha,\qquad P(X_k=1\mid H_0)=\alpha, \]
each state produces two offspring:
\[ \begin{aligned} \textbf{0-step}:&\; (\ell,c_{00},c_{10},c_{01},c_{11};\,p) \;\longrightarrow\; (0,c_{00}\!+\!\mathbf1_{\{\ell=0\}},c_{10}\!+\!\mathbf1_{\{\ell=1\}}, c_{01},c_{11};\,p(1-\alpha)),\\[6pt] \textbf{1-step}:&\; (\ell,c_{00},c_{10},c_{01},c_{11};\,p) \;\longrightarrow\; (1,c_{00},c_{10}, c_{01}\!+\!\mathbf1_{\{\ell=0\}},c_{11}\!+\!\mathbf1_{\{\ell=1\}};\,p\alpha). \end{aligned} \]
If multiple paths arrive at the same offspring state, their probabilities are summed (state aggregation).
States with probability mass below a fixed threshold \(\tau\;(=10^{-15}\text{ by default})\) are discarded at each step:
\[ \text{keep } s \iff p_s \ge \tau. \]
Empirically this leaves the exact distribution unchanged while reducing the state space by several orders of magnitude.
Christoffersen’s conditional-coverage test adds an unconditional-coverage term
\[ \mathrm{LR}_{\text{uc}} = -2\!\Bigl[ c_1\log\alpha + (n-c_1)\log(1-\alpha) -c_1\log\hat\alpha - (n-c_1)\log(1-\hat\alpha) \Bigr], \qquad \hat\alpha=\frac{c_1}{n}, \]
to the independence part, so that
\[ \mathrm{LR}_{\text{cc}} = \mathrm{LR}_{\text{uc}} + \mathrm{LR}_{\text{ind}}. \]
To adapt the algorithm, we augment each state with the running total of exceptions \(c_1=\sum_{t=1}^{k} X_t\):
\[ s = (\ell,\,c_1,\,c_{00},c_{10},c_{01},c_{11}). \]
A 1-step transition increments \(c_1\); a 0-step leaves it unchanged. All
other mechanics (expansion, aggregation, pruning) are identical to the
independence algorithm.
At termination we compute \(\mathrm{LR}_{\text{uc}}\) and \(\mathrm{LR}_{\text{ind}}\) for every state,
sum them to obtain \(\mathrm{LR}_{\text{cc}}\), and aggregate
identical values as before.
The table below benchmarks how long the package needs to produce the exact finite-sample distribution for a range of \(n\) and \(\alpha\).
library(bench)
library(dplyr)
library(tidyr)
library(purrr)
library(knitr)
n_vec <- c(50, 100, 250, 500, 750, 1000)
alpha_vec <- c(0.01, 0.025, 0.05)
grid <- expand.grid(n = n_vec, alpha = alpha_vec, KEEP.OUT.ATTRS = FALSE)
bench_one <- function(n, alpha, fun) {
bm <- bench::mark(
fun(n = n, alpha = alpha),
iterations = 3,
check = FALSE
)
tibble(
n = n,
alpha = alpha,
median_s = as.numeric(bm$median)
)
}
timings_ind <- pmap_dfr(grid, bench_one, fun = lr_ind_dist)
timings_cc <- pmap_dfr(grid, bench_one, fun = lr_cc_dist)
fmt_wide <- function(df) {
df %>%
mutate(alpha = sprintf("alpha = %.3f", alpha)) %>%
pivot_wider(names_from = alpha, values_from = median_s) %>%
arrange(n)
}
table_ind <- fmt_wide(timings_ind)
table_cc <- fmt_wide(timings_cc)
kable(
table_ind,
digits = 3,
col.names = c("n", "α = 0.010", "α = 0.025", "α = 0.050"),
caption = "Median run-time (seconds) for lr_ind_dist()"
)
kable(
table_cc,
digits = 3,
col.names = c("n", "α = 0.010", "α = 0.025", "α = 0.050"),
caption = "Median run-time (seconds) for lr_cc_dist()"
)
Here it measures the end-to-end cost of a single
backtest_lr()
call on a synthetic 0/1 series.
library(xts)
alpha <- 0.01
window <- 250
local_file <- "inst/extdata/GSPC_close.rds"
file_path <- if (file.exists(local_file)) local_file else
system.file("extdata", "GSPC_close.rds", package = "ExactVaRTest")
px <- readRDS(file_path)
ret <- diff(log(px), lag = 1)
VaR <- rollapply(
ret, window,
function(r) quantile(r, alpha, na.rm = TRUE),
by.column = FALSE, align = "right"
)
keep <- complete.cases(ret, VaR)
ret <- ret[keep]
VaR <- coredata(VaR[keep])
x <- ifelse(coredata(ret) < VaR, 1L, 0L)
cat("Series length :", length(x), "\n")
cat("Exception rate:", mean(x), "\n\n")
bench_res <- bench::mark(
LR_ind = backtest_lr(x, alpha, "ind"),
LR_cc = backtest_lr(x, alpha, "cc"),
iterations = 10,
check = FALSE
)
suppressWarnings(
print(bench_res[, c("expression", "median")])
)
res_ind <- backtest_lr(x, alpha, "ind")
res_cc <- backtest_lr(x, alpha, "cc")
cat("\n--- Independence test ---\n")
print(res_ind)
cat("\n--- Conditional-coverage test ---\n")
print(res_cc)
The following figure shows the exact finite-sample CDF with the usual \(\chi^2\) approximation for \(n\) = 250, \(\alpha\) = 1%.
n <- 250
alpha <- 0.01
d_ind <- lr_ind_dist(n, alpha)
d_cc <- lr_cc_dist(n, alpha)
d_cc$LR <- d_cc$LR_cc
d_cc$prob <- d_cc$prob_cc
oldpar <- par(mfrow = c(1, 2), mar = c(4, 4, 2, 1))
# LR_ind vs χ²(1)
curve(pchisq(x, df = 1), 0, 20, lty = 2, ylab = "CDF",
xlab = "LR_ind statistic", main = "LR_ind")
lines(stepfun(d_ind$LR, c(0, cumsum(d_ind$prob))), pch = 19)
legend("bottomright", c("Chi-sq(1)", "Exact"), lty = c(2, 1), bty = "n")
# LR_cc vs χ²(2)
curve(pchisq(x, df = 2), 0, 30, lty = 2, ylab = "CDF",
xlab = "LR_cc statistic", main = "LR_cc")
lines(stepfun(d_cc$LR, c(0, cumsum(d_cc$prob))), pch = 19)
legend("bottomright", c("Chi-sq(2)", "Exact"), lty = c(2, 1), bty = "n")
par(oldpar)
A quick Monte-Carlo shows how often each method rejects under the null when \(n\) = 250 and \(\alpha\) = 1%.
n <- 250
alpha <- 0.01
set.seed(1)
# LR_cc
p.chi.cc <- replicate(
1000,
ExactVaRTest::lr_cc_stat(rbinom(n, 1, alpha), alpha) > qchisq(.95, 2)
)
p.exact.cc <- replicate(
1000,
{
x <- rbinom(n, 1, alpha)
ExactVaRTest::backtest_lr(x, alpha, "cc")$pval < 0.05
}
)
# LR_ind
p.chi.ind <- replicate(
1000,
ExactVaRTest::lr_ind_stat(rbinom(n, 1, alpha), alpha) > qchisq(.95, 1)
)
p.exact.ind <- replicate(
1000,
{
x <- rbinom(n, 1, alpha)
ExactVaRTest::backtest_lr(x, alpha, "ind")$pval < 0.05
}
)
data.frame(
Test = c("LR_cc Chi^2", "LR_cc Exact", "LR_ind Chi^2", "LR_ind Exact"),
Size = c(mean(p.chi.cc), mean(p.exact.cc), mean(p.chi.ind), mean(p.exact.ind))
)
We plot 250-day rolling p-values (\(\alpha\) = 1%) for LRind and LRcc.
library(ggplot2)
library(dplyr)
library(tidyr)
alpha <- 0.01
win <- 250
local_file <- "inst/extdata/GSPC_close.rds"
file_path <- if (file.exists(local_file)) local_file else
system.file("extdata", "GSPC_close.rds", package = "ExactVaRTest")
px <- readRDS(file_path)
px <- px[index(px) >= "2012-01-01"]
ret <- diff(log(px))
VaR <- rollapply(
ret, win,
function(r) quantile(r, alpha, na.rm = TRUE),
by.column = FALSE, align = "right"
)
keep <- complete.cases(ret, VaR)
r <- coredata(ret[keep])
v <- coredata(VaR[keep])
exc <- ifelse(r < v, 1L, 0L)
n_seg <- length(exc) - win + 1
ind_exact <- ind_chi <- cc_exact <- cc_chi <- numeric(n_seg)
for (i in seq_len(n_seg)) {
seg <- exc[i:(i + win - 1)]
ind_stat <- ExactVaRTest::lr_ind_stat(seg, alpha)
cc_stat <- ExactVaRTest::lr_cc_stat(seg, alpha)
ind_exact[i] <- ExactVaRTest::pval_lr_ind(ind_stat, win, alpha)
cc_exact[i] <- ExactVaRTest::pval_lr_cc(cc_stat, win, alpha)
ind_chi[i] <- pchisq(ind_stat, df = 1, lower.tail = FALSE)
cc_chi[i] <- pchisq(cc_stat, df = 2, lower.tail = FALSE)
}
df <- tibble(
idx = seq_len(n_seg),
ind_exact, ind_chi, cc_exact, cc_chi
) %>%
pivot_longer(cols = -idx,
names_to = c("test", "method"),
names_pattern = "(ind|cc)_(.*)",
values_to = "pval") %>%
mutate(test = ifelse(test == "ind", "LRind", "LRcc"),
method = ifelse(method == "exact", "exact", "chi-sq"))
ggplot(df, aes(idx, pval, colour = method)) +
geom_step() +
geom_hline(yintercept = 0.05, linetype = 2, colour = "red") +
facet_wrap(~ test, ncol = 1, scales = "free_x") +
scale_colour_manual(values = c("chi-sq" = "red", "exact" = "cyan4")) +
labs(x = "window start index", y = "p-value",
title = "Rolling 250-day p-values (α = 1%)") +
theme_bw()
library(ExactVaRTest)
n_set <- c(250, 500, 750, 1000)
alpha_set <- c(0.005, 0.01, 0.025, 0.05)
gamma_set <- c(0.90, 0.95, 0.99)
q_lr <- function(d, g) d$LR[which(cumsum(d$prob) >= g)[1L]]
tbl <- expand.grid(n = n_set,
alpha = alpha_set,
gamma = gamma_set,
KEEP.OUT.ATTRS = FALSE,
stringsAsFactors = FALSE)
tbl$crit_ind <- mapply(function(n, a, g)
q_lr(lr_ind_dist(n, a, prune_threshold = 1e-15), g),
tbl$n, tbl$alpha, tbl$gamma, SIMPLIFY = TRUE)
tbl$crit_cc <- mapply(function(n, a, g) {
d <- lr_cc_dist(n, a, prune_threshold = 1e-15)
d$LR <- d$LR_cc
d$prob <- d$prob_cc
q_lr(d, g)
}, tbl$n, tbl$alpha, tbl$gamma, SIMPLIFY = TRUE)
print(tbl, digits = 6)
\(p = \sum_{k=0}^{P} \Pr\!\bigl(P' = k\bigr)\,\Pr\!\bigl(LR_{\text{uc/ind}} \ge \text{obs}\mid P' = k\bigr)\)
library(ExactVaRTest)
set.seed(123)
P <- 250
alpha <- 0.05
alpha_prime <- 0.10
inst_flag <- rbinom(P, 1, alpha_prime)
sys_flag <- if (sum(inst_flag)) rbinom(sum(inst_flag), 1, alpha) else integer(0)
lr_uc <- lr_uc_stat(sys_flag, alpha)
lr_ind <- lr_ind_stat(sys_flag, alpha)
mix_tail <- function(lr_obs, P, alpha, alpha_prime,
type = c("uc", "ind"), prune = 1e-15) {
type <- match.arg(type)
w <- dbinom(0:P, P, alpha_prime)
tail_prob <- function(k) {
if (type == "uc") {
if (!k) return(as.numeric(lr_obs <= 0))
d <- lr_uc_dist(k, alpha)
sum(d$prob[d$LR >= lr_obs])
} else {
if (k < 2) return(as.numeric(lr_obs <= 0))
d <- lr_ind_dist(k, alpha, prune)
sum(d$prob[d$LR >= lr_obs])
}
}
sum(vapply(0:P, tail_prob, numeric(1)) * w)
}
p_uc <- mix_tail(lr_uc, P, alpha, alpha_prime, "uc")
p_ind <- mix_tail(lr_ind, P, alpha, alpha_prime, "ind")
data.frame(
test = c("UC", "IND"),
stat = c(lr_uc, lr_ind),
p = c(p_uc, p_ind)
)
These binaries (installable software) and packages are in development.
They may not be fully stable and should be used with caution. We make no claims about them.
Health stats visible at Monitor.