The hardware and bandwidth for this mirror is donated by dogado GmbH, the Webhosting and Full Service-Cloud Provider. Check out our Wordpress Tutorial.
If you wish to report a bug, or if you are interested in having us mirror your free-software or open-source project, please feel free to contact us at mirror[@]dogado.de.
This file contains some examples of the functions related to the Wrap routine. More specifically the functions wrap
and estLocScale
.
library("cellWise")
Simple example: Stars data
# load the data
X <- robustbase::starsCYG; plot(X)
cutoff <- sqrt(qchisq(0.975, ncol(X)))
X.mean <- colMeans(X)
X.cov <- cov(X)
cor(X)
## log.Te log.light
## log.Te 1.0000000 -0.2104133
## log.light -0.2104133 1.0000000
locScaleX <- estLocScale(X)
Xw <- wrap(X, locScaleX$loc, locScaleX$scale)$Xw
Xw.mean <- colMeans(Xw)
Xw.cov <- cov(Xw)
cor(Xw)
## [,1] [,2]
## [1,] 1.0000000 0.5732006
## [2,] 0.5732006 1.0000000
# classical Mahalanobis distances
#
md <- sqrt(mahalanobis(X, center = X.mean, cov = X.cov))
plot(1:nrow(X), md, pch = 16, cex = 0.9,
ylab = "Mahalanobis distance",
xlab = "Index",
main = "Mahalanobis distances",
xlim = c(0, nrow(X)), ylim = c(0, max(md, cutoff) + 0.2), col = "black")
abline(h = cutoff, lwd = 1.5, col = "red")
# wrapping-based robust distances
#
rd <- sqrt(mahalanobis(X, center = Xw.mean, cov = Xw.cov))
plot(1:nrow(X), rd, pch = 16, cex = 0.9,
ylab = "robust distance",
xlab = "Index",
main = "Wrapping-based robust distances",
xlim = c(0, nrow(X)), ylim = c(0, max(rd, cutoff) + 0.2), col = "black")
abline(h = cutoff, lwd = 1.5, col = "red")
# classical and wrapped tolerance ellipse
#
ell1 <- ellipse::ellipse(X.cov, centre = X.mean, t = cutoff, npoints = 120)
ell2 <- ellipse::ellipse(Xw.cov, centre = Xw.mean, t = cutoff, npoints = 120)
xmin <- min(min(X[, 1]), min(ell1[, 1]), min(ell2[, 1]))
xmax <- max(max(X[, 1]), max(ell1[, 1]), max(ell2[, 1]))
ymin <- min(min(X[, 2]), min(ell1[, 2]), min(ell2[, 2]))
ymax <- max(max(X[, 2]), max(ell1[, 2]), max(ell2[, 2]))
myxlim <- c(xmin, xmax); myylim <- c(ymin, ymax)
plot(X, xlab = "X1", ylab = "X2", main = "Stars: classical and wrapping ellipse",
pch = 16, col = "black", xlim = myxlim, ylim = myylim)
points(ell1, type = "l", lwd = 1.5, col = "red")
points(ell2, type = "l", lwd = 1.5, col = "blue")
Bushfire dataset: a 5-dimensional example
X <- as.matrix(robustbase::bushfire); pairs(X)
cutoff <- sqrt(qchisq(0.975, ncol(X))); cutoff
## [1] 3.582248
X.mean <- colMeans(X); X.cov <- cov(X)
round(cor(X), 2)
## V1 V2 V3 V4 V5
## V1 1.00 0.80 -0.59 -0.49 -0.49
## V2 0.80 1.00 -0.53 -0.53 -0.52
## V3 -0.59 -0.53 1.00 0.97 0.98
## V4 -0.49 -0.53 0.97 1.00 1.00
## V5 -0.49 -0.52 0.98 1.00 1.00
estX <- cellWise::estLocScale(X)
Xw <- cellWise::wrap(X, estX$loc, estX$scale)$Xw
Xw.mean <- colMeans(Xw); Xw.cov <- cov(Xw)
round(cor(Xw), 2)
## [,1] [,2] [,3] [,4] [,5]
## [1,] 1.00 0.78 -0.62 -0.52 -0.53
## [2,] 0.78 1.00 -0.25 -0.14 -0.15
## [3,] -0.62 -0.25 1.00 0.98 0.99
## [4,] -0.52 -0.14 0.98 1.00 1.00
## [5,] -0.53 -0.15 0.99 1.00 1.00
wd <- sqrt(mahalanobis(X, center = Xw.mean, cov = Xw.cov))
weights <- (wd < cutoff) + 0
# Reweighting functions:
wmean <- function(X, w) { # weighted mean
wmean1 <- function(y, w) { sum(y * w) / sum(w) }
apply(X, 2, FUN = wmean1, w = w)
}
wcov <- function(X, w) { # weighted cov
Xc <- sweep(X, 2, wmean(X, w), "-")
Xc <- sweep(Xc, 1, sqrt(w), "*")
t(Xc) %*% Xc / sum(w)
}
X.wmean <- wmean(X, weights)
X.wcov <- wcov(X, weights)
round(cov2cor(X.wcov), 2)
## V1 V2 V3 V4 V5
## V1 1.00 0.86 -0.74 -0.62 -0.63
## V2 0.86 1.00 -0.34 -0.19 -0.20
## V3 -0.74 -0.34 1.00 0.97 0.98
## V4 -0.62 -0.19 0.97 1.00 1.00
## V5 -0.63 -0.20 0.98 1.00 1.00
# Classical Mahalanobis distances
#
md <- sqrt(mahalanobis(X, center = X.mean, cov = X.cov))
plot(1:nrow(X), md, pch = 16, cex = 0.9,
ylab = "Mahalanobis distance", xlab = "Index",
main = "Mahalanobis distances", xlim = c(0, nrow(X)),
ylim = c(0, max(md, cutoff) + 0.2), col = "black")
abline(h = cutoff, lwd = 1.5, col = "red")
# No outliers stand out here.
# Wrapping-based robust distances
#
rwd <- sqrt(mahalanobis(X, center = X.wmean, cov = X.wcov))
plot(1:nrow(X),rwd,pch = 16, cex = 0.9,
ylab = "robust distance", xlab = "Index",
main = "Wrapping-based robust distances", xlim = c(0, nrow(X)),
ylim = c(0, max(rwd, cutoff) + 0.2), col = "black")
abline(h = cutoff, lwd = 1.5, col = "red")
# Many outliers are visible, the same as in Maronna-Yohai 1995.
# Distance-distance plot:
#
plot(md, rwd, pch = 16, cex = 0.9, ylab = "robust distance",
xlab = "classical distance", main = "distance-distance plot",
xlim = c(0, max(md, cutoff) + 0.2),
ylim = c(0, max(rwd, cutoff) + 0.2), col = "black")
abline(v = cutoff, lwd = 1.5, col = "red")
abline(h = cutoff, lwd = 1.5, col = "red")
abline(0, 1, lwd = 1.5, lty = 2)
# only the robust distances detect the outliers
# Classical and wrapped tolerance ellipses:
#
coordgrid <- expand.grid(1:dim(X)[2], 1:dim(X)[2])
coordgrid <- coordgrid[-(1 + (1:dim(X)[2] - 1) * (dim(X)[2] + 1)), ]
i = 1
pairs(X, panel = function(x, y) {
points(x, y)
inds <- as.numeric(coordgrid[i, ])
ell1 <- ellipse::ellipse(X.cov[inds, inds], centre = X.mean[inds],
t = cutoff, npoints = 120)
ell2 <- ellipse::ellipse(X.wcov[inds, inds], centre = X.wmean[inds],
t = cutoff, npoints = 120)
points(ell1, type = "l", lwd = 1.5, col = "red")
points(ell2, type = "l", lwd = 1.5, col = "blue")
i <<- i + 1
})
data(data_dogWalker)
# keep a copy of the data for later:
X <- data_dogWalker
# Combine all pixels of an image in a row:
dims <- dim(X); dims # 54 144 180 3
## [1] 54 144 180 3
dim(X) <- c(dims[1], prod(dims[2:4]))
dim(X) # 54 77760
## [1] 54 77760
# Estimate location and scale using estLocScale:
locScaleX <- estLocScale(X, precScale = 1e-12)
## Warning in estLocScale(X, precScale = 1e-12): 85 out of 77760 variables have an estimated scale <=
## "precScale" = 1e-12 .
# Preprocess the data to deal with zeroscales:
zeropix <- which(locScaleX$scale < 1e-12)
nzero <- length(zeropix)
filler <- 2 * ((1:dims[1]) - (dims[1] + 1) / 2) / (dims[1] - 1)
filler <- 0.9 / 255 * matrix(rep(filler, nzero), ncol = nzero, byrow = F)
centers <- apply(X[, zeropix], 2, FUN = median)
centers <- pmax(0, pmin(1, centers));
filler <- sweep(filler, 2, centers, "+")
X[, zeropix] <- filler
locScaleX$scale[zeropix] <- estLocScale(X[, zeropix])$scale
# Wrap the data
Xw <- wrap(X, locScaleX$loc, locScaleX$scale)$Xw
# center the wrapped data
aveXw <- colMeans(Xw)
XwC <- sweep(Xw, 2, aveXw)
# Compute singular value decomposition of centered wrapped data
prcXwz <- svd::propack.svd(XwC, neig = 3)
rloadings <- prcXwz$v; dim(rloadings)
## [1] 77760 3
Xwscores <- XwC %*% rloadings; dim(Xwscores)
## [1] 54 3
## Project original data on robust loadings:
XC <- sweep(X, 2, aveXw)
robscores <- XC %*% rloadings
# Compute the fitted frames and residuals:
Xznew.rfitted <- robscores %*% t(rloadings[, 1:3])
X.rfitted <- sweep(Xznew.rfitted, 2, aveXw, "+")
X.rresidual <- X - X.rfitted
# Plot the first loading
rim1 <- pmax(0, pmin(rloadings[, 1] + locScaleX$loc, 1))
dim(rim1) <- dims[2:4]; grid::grid.newpage()
grid::grid.raster(rim1, interpolate = FALSE)
# Compute location and scale of the residuals
res.med <- median(X.rresidual)
res.mad <- mad(X.rresidual)
res <- (X.rresidual - res.med) / res.mad
# Make plots
frameinds <- c(10, 20, 30, 40)
cutoff <- 200
for (i in 1:4) {
index <- frameinds[i]
restemp <- res[index,];
dim(restemp) <- c(prod(dims[2:3]), dims[4]);
res2 <- restemp[, 1]^2 + restemp[, 2]^2 + restemp[, 3]^2
mask <- rep(0, prod(dims[2:3])); mask[res2 > cutoff] <- 1
mask <- cbind(mask,mask,mask); dim(mask) <- dims[2:4]
realim <- data_dogWalker[index, , , ];
realim <- pmax(0, pmin(realim, 1)); dim(realim) <- dims[2:4]
masked <- realim * mask; dim(masked)
masked <- pmax(0, pmin(masked, 1)); dim(masked) <- dims[2:4]
masked[masked == 0] <- 0.5
finalim <- array(0, dim = c(dims[2], 2*dims[3], dims[4]))
finalim[, 1:dims[3], ] <- realim
finalim[, (dims[3] + 1):(2 * dims[3]), ] <- masked
grid::grid.newpage()
grid::grid.raster(finalim,interpolate = FALSE)
}
These binaries (installable software) and packages are in development.
They may not be fully stable and should be used with caution. We make no claims about them.
Health stats visible at Monitor.