The hardware and bandwidth for this mirror is donated by dogado GmbH, the Webhosting and Full Service-Cloud Provider. Check out our Wordpress Tutorial.
If you wish to report a bug, or if you are interested in having us mirror your free-software or open-source project, please feel free to contact us at mirror[@]dogado.de.
Full R-Code for:
Maier, M. J. (2014). DirichletReg: Dirichlet Regression for Compositional Data in R. Research Report Series/Department of Statistics and Mathematics, 125. WU Vienna University of Economics and Business, Vienna. https://epub.wu.ac.at/4077/
library("DirichletReg")
head(ArcticLake)
##    sand  silt  clay depth
## 1 0.775 0.195 0.030  10.4
## 2 0.719 0.249 0.032  11.7
## 3 0.507 0.361 0.132  12.8
## 4 0.522 0.409 0.066  13.0
## 5 0.700 0.265 0.035  15.7
## 6 0.665 0.322 0.013  16.3
AL <- DR_data(ArcticLake[, 1:3])
## Warning in DR_data(ArcticLake[, 1:3]): not all rows sum up to 1 =>
## normalization forced
AL[1:6, ]
##        sand      silt      clay
## 1 0.7750000 0.1950000 0.0300000
## 2 0.7190000 0.2490000 0.0320000
## 3 0.5070000 0.3610000 0.1320000
## 4 0.5235707 0.4102307 0.0661986
## 5 0.7000000 0.2650000 0.0350000
## 6 0.6650000 0.3220000 0.0130000Code for Fig. 1 (left):
Code for Fig. 1 (right):
plot(rep(ArcticLake$depth, 3), as.numeric(AL), pch = 21, bg = rep(c("#E495A5", "#86B875",
    "#7DB0DD"), each = 39), xlab = "Depth (m)", ylab = "Proportion", ylim = 0:1)lake1 <- DirichReg(AL ~ depth, ArcticLake)
lake1
## Call:
## DirichReg(formula = AL ~ depth, data = ArcticLake)
## using the common parametrization
## 
## Log-likelihood: 101.4 on 6 df (100 BFGS + 1 NR Iterations)
## 
## -----------------------------------------
## Coefficients for variable no. 1: sand
## (Intercept)        depth  
##     0.11662      0.02335  
## -----------------------------------------
## Coefficients for variable no. 2: silt
## (Intercept)        depth  
##    -0.31060      0.05557  
## -----------------------------------------
## Coefficients for variable no. 3: clay
## (Intercept)        depth  
##     -1.1520       0.0643  
## -----------------------------------------
coef(lake1)
## $sand
## (Intercept)       depth 
##  0.11662480  0.02335114 
## 
## $silt
## (Intercept)       depth 
## -0.31059591  0.05556745 
## 
## $clay
## (Intercept)       depth 
## -1.15195642  0.06430175
lake2 <- update(lake1, . ~ . + I(depth^2) | . + I(depth^2) | . + I(depth^2))
anova(lake1, lake2)
## Analysis of Deviance Table
## 
## Model 1: DirichReg(formula = AL ~ depth, data = ArcticLake)
## Model 2: DirichReg(formula = AL ~ depth + I(depth^2) | depth + I(depth^2) |
##   depth + I(depth^2), data = ArcticLake)
## 
##         Deviance N. par Difference df Pr(>Chi)   
## Model 1  -202.74      6                          
## Model 2  -217.99      9     15.254  3 0.001612 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(lake2)
## Call:
## DirichReg(formula = AL ~ depth + I(depth^2) | depth + I(depth^2) | depth +
## I(depth^2), data = ArcticLake)
## 
## Standardized Residuals:
##           Min       1Q   Median      3Q     Max
## sand  -1.7647  -0.7080  -0.1786  0.9598  3.0460
## silt  -1.1379  -0.5330  -0.1546  0.2788  1.5604
## clay  -1.7661  -0.6583  -0.0454  0.6584  2.0152
## 
## ------------------------------------------------------------------
## Beta-Coefficients for variable no. 1: sand
##               Estimate Std. Error z value Pr(>|z|)  
## (Intercept)  1.4361967  0.8026814   1.789   0.0736 .
## depth       -0.0072382  0.0329433  -0.220   0.8261  
## I(depth^2)   0.0001324  0.0002761   0.480   0.6315  
## ------------------------------------------------------------------
## Beta-Coefficients for variable no. 2: silt
##               Estimate Std. Error z value Pr(>|z|)  
## (Intercept) -0.0259705  0.7598827  -0.034   0.9727  
## depth        0.0717450  0.0343089   2.091   0.0365 *
## I(depth^2)  -0.0002679  0.0003088  -0.867   0.3857  
## ------------------------------------------------------------------
## Beta-Coefficients for variable no. 3: clay
##               Estimate Std. Error z value Pr(>|z|)   
## (Intercept) -1.7931487  0.7362293  -2.436  0.01487 * 
## depth        0.1107906  0.0357705   3.097  0.00195 **
## I(depth^2)  -0.0004872  0.0003308  -1.473  0.14079   
## ------------------------------------------------------------------
## Significance codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Log-likelihood: 109 on 9 df (162 BFGS + 2 NR Iterations)
## AIC: -200, BIC: -185
## Number of Observations: 39
## Link: Log
## Parametrization: commonCode for Fig. 2:
par(mar = c(4, 4, 4, 4) + 0.1)
plot(rep(ArcticLake$depth, 3), as.numeric(AL), pch = 21, bg = rep(c("#E495A5", "#86B875",
    "#7DB0DD"), each = 39), xlab = "Depth (m)", ylab = "Proportion", ylim = 0:1,
    main = "Sediment Composition in an Arctic Lake")
Xnew <- data.frame(depth = seq(min(ArcticLake$depth), max(ArcticLake$depth), length.out = 100))
for (i in 1:3) lines(cbind(Xnew, predict(lake2, Xnew)[, i]), col = c("#E495A5", "#86B875",
    "#7DB0DD")[i], lwd = 2)
legend("topleft", legend = c("Sand", "Silt", "Clay"), lwd = 2, col = c("#E495A5",
    "#86B875", "#7DB0DD"), pt.bg = c("#E495A5", "#86B875", "#7DB0DD"), pch = 21,
    bty = "n")
par(new = TRUE)
plot(cbind(Xnew, predict(lake2, Xnew, F, F, T)), lty = "24", type = "l", ylim = c(0,
    max(predict(lake2, Xnew, F, F, T))), axes = F, ann = F, lwd = 2)
axis(4)
mtext(expression(paste("Precision (", phi, ")", sep = "")), 4, line = 3)
legend("top", legend = c(expression(hat(mu[c] == hat(alpha)[c]/hat(alpha)[0])), expression(hat(phi) ==
    hat(alpha)[0])), lty = c(1, 2), lwd = c(3, 2), bty = "n")AL <- ArcticLake
AL$AL <- DR_data(ArcticLake[, 1:3])
## Warning in DR_data(ArcticLake[, 1:3]): not all rows sum up to 1 =>
## normalization forced
dd <- range(ArcticLake$depth)
X <- data.frame(depth = seq(dd[1], dd[2], length.out = 200))
pp <- predict(DirichReg(AL ~ depth + I(depth^2), AL), X)Code for Fig. 3:
plot(AL$AL, cex = 0.1, reset_par = FALSE)
points(toSimplex(AL$AL), pch = 16, cex = 0.5, col = gray(0.5))
lines(toSimplex(pp), lwd = 3, col = c("#6E1D34", "#004E42")[2])
Dols <- log(cbind(ArcticLake[, 2]/ArcticLake[, 1], ArcticLake[, 3]/ArcticLake[, 1]))
ols <- lm(Dols ~ depth + I(depth^2), ArcticLake)
p2 <- predict(ols, X)
p2m <- exp(cbind(0, p2[, 1], p2[, 2]))/rowSums(exp(cbind(0, p2[, 1], p2[, 2])))
lines(toSimplex(p2m), lwd = 3, col = c("#6E1D34", "#004E42")[1], lty = "21")Bld <- BloodSamples
Bld$Smp <- DR_data(Bld[, 1:4])
## Warning in DR_data(Bld[, 1:4]): not all rows sum up to 1 => normalization
## forced
blood1 <- DirichReg(Smp ~ Disease | 1, Bld, model = "alternative", base = 3)
blood2 <- DirichReg(Smp ~ Disease | Disease, Bld, model = "alternative", base = 3)
anova(blood1, blood2)
## Analysis of Deviance Table
## 
## Model 1: DirichReg(formula = Smp ~ Disease | 1, data = Bld, model =
##   "alternative", base = 3)
## Model 2: DirichReg(formula = Smp ~ Disease | Disease, data = Bld, model =
##   "alternative", base = 3)
## 
##         Deviance N. par Difference df Pr(>Chi)
## Model 1  -303.86      7                       
## Model 2  -304.61      8     0.7587  1   0.3837
summary(blood1)
## Call:
## DirichReg(formula = Smp ~ Disease | 1, data = Bld, model = "alternative", base
## = 3)
## 
## Standardized Residuals:
##                  Min       1Q   Median      3Q     Max
## Albumin      -2.1310  -0.9307  -0.1234  0.8149  2.8429
## Pre.Albumin  -1.0687  -0.4054  -0.0789  0.1947  1.5691
## Globulin.A   -2.0503  -1.0392   0.1938  0.7927  2.2393
## Globulin.B   -1.8176  -0.5347   0.1488  0.5115  1.3284
## 
## MEAN MODELS:
## ------------------------------------------------------------------
## Coefficients for variable no. 1: Albumin
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  1.11639    0.09935  11.237   <2e-16 ***
## DiseaseB    -0.07002    0.13604  -0.515    0.607    
## ------------------------------------------------------------------
## Coefficients for variable no. 2: Pre.Albumin
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   0.5490     0.1082   5.076 3.86e-07 ***
## DiseaseB     -0.1276     0.1493  -0.855    0.393    
## ------------------------------------------------------------------
## Coefficients for variable no. 3: Globulin.A
## - variable omitted (reference category) -
## ------------------------------------------------------------------
## Coefficients for variable no. 4: Globulin.B
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   0.4863     0.1094   4.445  8.8e-06 ***
## DiseaseB      0.1819     0.1472   1.236    0.216    
## ------------------------------------------------------------------
## 
## PRECISION MODEL:
## ------------------------------------------------------------------
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   4.2227     0.1475   28.64   <2e-16 ***
## ------------------------------------------------------------------
## Significance codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Log-likelihood: 151.9 on 7 df (43 BFGS + 1 NR Iterations)
## AIC: -289.9, BIC: -280
## Number of Observations: 30
## Links: Logit (Means) and Log (Precision)
## Parametrization: alternativeCode for Fig.~\(\ref{fig:blood:boxfitted}\):
par(mfrow = c(1, 4), mar = c(4, 4, 4, 2) + 0.25)
for (i in 1:4) {
    boxplot(Bld$Smp[, i] ~ Bld$Disease, ylim = range(Bld$Smp[, 1:4]), main = paste(names(Bld)[i]),
        xlab = "Disease Type", ylab = "Proportion")
    segments(c(-5, 1.5), unique(fitted(blood2)[, i]), c(1.5, 5), unique(fitted(blood2)[,
        i]), lwd = 2, lty = 2)
}alpha <- predict(blood2, data.frame(Disease = factor(c("A", "B"))), F, T, F)
L <- sapply(1:2, function(i) ddirichlet(DR_data(Bld[31:36, 1:4]), unlist(alpha[i,
    ])))
## Warning in DR_data(Bld[31:36, 1:4]): not all rows sum up to 1 => normalization
## forced
## Warning in DR_data(Bld[31:36, 1:4]): not all rows sum up to 1 => normalization
## forced
LP <- L/rowSums(L)
dimnames(LP) <- list(paste("C", 1:6), c("A", "B"))
print(data.frame(round(LP * 100, 1), pred. = as.factor(ifelse(LP[, 1] > LP[, 2],
    "==> A", "==> B"))), print.gap = 2)
##         A     B  pred.
## C 1  59.4  40.6  ==> A
## C 2  43.2  56.8  ==> B
## C 3  38.4  61.6  ==> B
## C 4  43.8  56.2  ==> B
## C 5  36.6  63.4  ==> B
## C 6  70.2  29.8  ==> ACode for Fig.~\(\ref{fig:blood:predictions}\):
B2 <- DR_data(BloodSamples[, c(1, 2, 4)])
## Warning in DR_data(BloodSamples[, c(1, 2, 4)]): not all rows sum up to 1 =>
## normalization forced
plot(B2, cex = 0.001, reset_par = FALSE)
div.col <- colorRampPalette(c("#023FA5", "#c0c0c0", "#8E063B"))(100)
# expected values
temp <- (alpha/rowSums(alpha))[, c(1, 2, 4)]
points(toSimplex(temp/rowSums(temp)), pch = 22, bg = div.col[c(1, 100)], cex = 2,
    lwd = 0.25)
# known values
temp <- B2[1:30, ]
points(toSimplex(temp/rowSums(temp)), pch = 21, bg = (div.col[c(1, 100)])[BloodSamples$Disease[1:30]],
    cex = 0.5, lwd = 0.25)
# unclassified
temp <- B2[31:36, ]
points(toSimplex(temp/rowSums(temp)), pch = 21, bg = div.col[round(100 * LP[, 2],
    0)], cex = 1, lwd = 0.5)
legend("topright", bty = "n", legend = c("Disease A", "Disease B", NA, "Expected Values"),
    pch = c(21, 21, NA, 22), pt.bg = c(div.col[c(1, 100)], NA, "white"))RS <- ReadingSkills
RS$acc <- DR_data(RS$accuracy)
## only one variable in [0, 1] supplied - beta-distribution assumed.
## check this assumption.
RS$dyslexia <- C(RS$dyslexia, treatment)
rs1 <- DirichReg(acc ~ dyslexia * iq | dyslexia * iq, RS, model = "alternative")
rs2 <- DirichReg(acc ~ dyslexia * iq | dyslexia + iq, RS, model = "alternative")
anova(rs1, rs2)
## Analysis of Deviance Table
## 
## Model 1: DirichReg(formula = acc ~ dyslexia * iq | dyslexia * iq, data = RS,
##   model = "alternative")
## Model 2: DirichReg(formula = acc ~ dyslexia * iq | dyslexia + iq, data = RS,
##   model = "alternative")
## 
##         Deviance N. par Difference df Pr(>Chi)
## Model 1  -133.47      8                       
## Model 2  -131.80      7     1.6645  1    0.197Code for Fig.~\(\ref{fig:ReadingSkills}\):
g.ind <- as.numeric(RS$dyslexia)
g1 <- g.ind == 1  # normal
g2 <- g.ind != 1  # dyslexia
par(mar = c(4, 4, 4, 4) + 0.25)
plot(accuracy ~ iq, RS, pch = 21, bg = c("#E495A5", "#39BEB1")[3 - g.ind], cex = 1.5,
    main = "Dyslexic (Red) vs. Control (Green) Group", xlab = "IQ Score", ylab = "Reading Accuracy",
    xlim = range(ReadingSkills$iq))
x1 <- seq(min(RS$iq[g1]), max(RS$iq[g1]), length.out = 200)
x2 <- seq(min(RS$iq[g2]), max(RS$iq[g2]), length.out = 200)
n <- length(x1)
X <- data.frame(dyslexia = factor(rep(0:1, each = n), levels = 0:1, labels = c("no",
    "yes")), iq = c(x1, x2))
pv <- predict(rs2, X, TRUE, TRUE, TRUE)
lines(x1, pv$mu[1:n, 2], col = c("#E495A5", "#39BEB1")[2], lwd = 3)
lines(x2, pv$mu[(n + 1):(2 * n), 2], col = c("#E495A5", "#39BEB1")[1], lwd = 3)
a <- RS$accuracy
logRa_a <- log(a/(1 - a))
rlr <- lm(logRa_a ~ dyslexia * iq, RS)
ols <- 1/(1 + exp(-predict(rlr, X)))
lines(x1, ols[1:n], col = c("#AD6071", "#00897D")[2], lwd = 3, lty = 2)
lines(x2, ols[(n + 1):(2 * n)], col = c("#AD6071", "#00897D")[1], lwd = 3, lty = 2)
### precision plot
par(new = TRUE)
plot(x1, pv$phi[1:n], col = c("#6E1D34", "#004E42")[2], lty = "11", type = "l", ylim = c(0,
    max(pv$phi)), axes = F, ann = F, lwd = 2, xlim = range(RS$iq))
lines(x2, pv$phi[(n + 1):(2 * n)], col = c("#6E1D34", "#004E42")[1], lty = "11",
    type = "l", lwd = 2)
axis(4)
mtext(expression(paste("Precision (", phi, ")", sep = "")), 4, line = 3)
legend("topleft", legend = c(expression(hat(mu)), expression(hat(phi)), "OLS"), lty = c(1,
    3, 2), lwd = c(3, 2, 3), bty = "n")a <- RS$accuracy
logRa_a <- log(a/(1 - a))
rlr <- lm(logRa_a ~ dyslexia * iq, RS)
summary(rlr)
## 
## Call:
## lm(formula = logRa_a ~ dyslexia * iq, data = RS)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.66405 -0.37966  0.03687  0.40887  2.50345 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      2.8067     0.2822   9.944 2.27e-12 ***
## dyslexiayes     -2.4113     0.4517  -5.338 4.01e-06 ***
## iq               0.7823     0.2992   2.615   0.0125 *  
## dyslexiayes:iq  -0.8457     0.4510  -1.875   0.0681 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.2 on 40 degrees of freedom
## Multiple R-squared:  0.6151, Adjusted R-squared:  0.5862 
## F-statistic: 21.31 on 3 and 40 DF,  p-value: 2.083e-08
summary(rs2)
## Call:
## DirichReg(formula = acc ~ dyslexia * iq | dyslexia + iq, data = RS, model =
## "alternative")
## 
## Standardized Residuals:
##                   Min       1Q   Median      3Q     Max
## 1 - accuracy  -1.5661  -0.8204  -0.5112  0.5211  3.4334
## accuracy      -3.4334  -0.5211   0.5112  0.8204  1.5661
## 
## MEAN MODELS:
## ------------------------------------------------------------------
## Coefficients for variable no. 1: 1 - accuracy
## - variable omitted (reference category) -
## ------------------------------------------------------------------
## Coefficients for variable no. 2: accuracy
##                Estimate Std. Error z value Pr(>|z|)    
## (Intercept)      1.8649     0.2991   6.235 4.52e-10 ***
## dyslexiayes     -1.4833     0.3029  -4.897 9.74e-07 ***
## iq               1.0676     0.3359   3.178 0.001482 ** 
## dyslexiayes:iq  -1.1625     0.3452  -3.368 0.000757 ***
## ------------------------------------------------------------------
## 
## PRECISION MODEL:
## ------------------------------------------------------------------
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   1.5579     0.3336   4.670 3.01e-06 ***
## dyslexiayes   3.4931     0.5880   5.941 2.83e-09 ***
## iq            1.2291     0.4596   2.674  0.00749 ** 
## ------------------------------------------------------------------
## Significance codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Log-likelihood: 65.9 on 7 df (56 BFGS + 2 NR Iterations)
## AIC: -117.8, BIC: -105.3
## Number of Observations: 44
## Links: Logit (Means) and Log (Precision)
## Parametrization: alternative
confint(rs2)
## 
## 95% Confidence Intervals (original form)
## 
## - Beta-Parameters:
## Variable: 1 - accuracy
##   variable omitted
## 
## Variable: accuracy
##                   2.5%   Est.   97.5%
## (Intercept)      1.279   1.86   2.451
## dyslexiayes     -2.077  -1.48  -0.890
## iq               0.409   1.07   1.726
## dyslexiayes:iq  -1.839  -1.16  -0.486
## 
## - Gamma-Parameters
##               2.5%  Est.  97.5%
## (Intercept)  0.904  1.56   2.21
## dyslexiayes  2.341  3.49   4.65
## iq           0.328  1.23   2.13
confint(rs2, exp = TRUE)
## 
## 95% Confidence Intervals (exponentiated)
## 
## - Beta-Parameters:
## Variable: 1 - accuracy
##   variable omitted
## 
## Variable: accuracy
##                  2.5%  exp(Est.)   97.5%
## (Intercept)     3.592      6.455  11.601
## dyslexiayes     0.125      0.227   0.411
## iq              1.506      2.908   5.618
## dyslexiayes:iq  0.159      0.313   0.615
## 
## - Gamma-Parameters
##               2.5%  exp(Est.)   97.5%
## (Intercept)   2.47       4.75    9.13
## dyslexiayes  10.39      32.89  104.12
## iq            1.39       3.42    8.41Code for Fig.~\(\ref{fig:readingskills.residplot}\):
gcol <- c("#E495A5", "#39BEB1")[3 - as.numeric(RS$dyslexia)]
tmt <- c(-3, 3)
par(mfrow = c(3, 2), cex = 0.8)
qqnorm(residuals(rlr, "pearson"), ylim = tmt, xlim = tmt, pch = 21, bg = gcol, main = "Normal Q-Q-Plot: OLS Residuals",
    cex = 0.75, lwd = 0.5)
abline(0, 1, lwd = 2)
qqline(residuals(rlr, "pearson"), lty = 2)
qqnorm(residuals(rs2, "standardized")[, 2], ylim = tmt, xlim = tmt, pch = 21, bg = gcol,
    main = "Normal Q-Q-Plot: DirichReg Residuals", cex = 0.75, lwd = 0.5)
abline(0, 1, lwd = 2)
qqline(residuals(rs2, "standardized")[, 2], lty = 2)
plot(ReadingSkills$iq, residuals(rlr, "pearson"), pch = 21, bg = gcol, ylim = c(-3,
    3), main = "OLS Residuals", xlab = "IQ", ylab = "Pearson Residuals", cex = 0.75,
    lwd = 0.5)
abline(h = 0, lty = 2)
plot(ReadingSkills$iq, residuals(rs2, "standardized")[, 2], pch = 21, bg = gcol,
    ylim = c(-3, 3), main = "DirichReg Residuals", xlab = "IQ", ylab = "Standardized Residuals",
    cex = 0.75, lwd = 0.5)
abline(h = 0, lty = 2)
plot(fitted(rlr), residuals(rlr, "pearson"), pch = 21, bg = gcol, ylim = c(-3, 3),
    main = "OLS Residuals", xlab = "Fitted", ylab = "Pearson Residuals", cex = 0.75,
    lwd = 0.5)
abline(h = 0, lty = 2)
plot(fitted(rs2)[, 2], residuals(rs2, "standardized")[, 2], pch = 21, bg = gcol,
    ylim = c(-3, 3), main = "DirichReg Residuals", xlab = "Fitted", ylab = "Standardized Residuals",
    cex = 0.75, lwd = 0.5)
abline(h = 0, lty = 2)These binaries (installable software) and packages are in development.
They may not be fully stable and should be used with caution. We make no claims about them.
Health stats visible at Monitor.