The hardware and bandwidth for this mirror is donated by dogado GmbH, the Webhosting and Full Service-Cloud Provider. Check out our Wordpress Tutorial.
If you wish to report a bug, or if you are interested in having us mirror your free-software or open-source project, please feel free to contact us at mirror[@]dogado.de.
This file contains examples of the use of the weightedEM, unpack, and cwLocScat functions. It reproduces all the figures of the report “Analyzing cellwise weighted data” by P.J. Rousseeuw.
library("cellWise")
n = 10
d = 3
A = matrix(0.7, d, d); diag(A) = 1
A
## [,1] [,2] [,3]
## [1,] 1.0 0.7 0.7
## [2,] 0.7 1.0 0.7
## [3,] 0.7 0.7 1.0
set.seed(12345)
library("MASS")
X = mvrnorm(n, rep(0,d), A)
colnames(X) = c("X1","X2","X3")
X[1,3] = X[2,2] = X[3,1] = X[4,1] = X[6,2] = NA
X # rows 1, 2, 3, 4, 6 have NAs
## X1 X2 X3
## [1,] 0.81494704 0.5501005 NA
## [2,] 1.57453449 NA 0.5526973
## [3,] NA -0.2420284 0.2337319
## [4,] NA -0.5869239 0.2996664
## [5,] -0.24745384 0.9288529 0.9443676
## [6,] -0.74023654 NA -2.0885170
## [7,] 0.19707169 0.9746399 0.5190202
## [8,] -0.06183274 -0.1193971 -0.5598499
## [9,] 0.21050943 -0.7740109 -0.1989791
## [10,] -0.82944245 -0.9502024 -0.6871550
w = c(1,2,1,1,2,2,1,1,1,1) # rowwise weights
out = weightedEM(X,w,crit=1e-12,computeloglik=T)
out$niter # number of iteration steps taken
## [1] 131
out$mu
## X1 X2 X3
## 0.21182926 -0.28077406 -0.06154232
round(out$Sigma,6)
## X1 X2 X3
## X1 0.660288 0.331107 0.474936
## X2 0.331107 1.088924 0.943985
## X3 0.474936 0.943985 1.002466
plot(1:out$niter, out$loglikhd[1:out$niter], type='l',
lty=1, col=4, xlab='step', ylab='log(likelihood)',
main='log(likelihood) of weighted EM iterations')
tail(out$loglikhd) # would have NAs for computeloglik=F
## [1] -34.07189 -34.07189 -34.07189 -34.07189 -34.07189 -34.07189
out$impX # imputed data, has no NA's
## X1 X2 X3
## [1,] 0.81494704 0.55010045 0.7764194
## [2,] 1.57453449 0.01172782 0.5526973
## [3,] 0.49065646 -0.24202836 0.2337319
## [4,] 0.75818277 -0.58692386 0.2996664
## [5,] -0.24745384 0.92885285 0.9443676
## [6,] -0.74023654 -2.19170090 -2.0885170
## [7,] 0.19707169 0.97463990 0.5190202
## [8,] -0.06183274 -0.11939713 -0.5598499
## [9,] 0.21050943 -0.77401094 -0.1989791
## [10,] -0.82944245 -0.95020238 -0.6871550
# The weights may be multiplied by a constant:
#
(w = c(1,2,1,1,2,2,1,1,1,1)/3) # divide weights by 3
## [1] 0.3333333 0.6666667 0.3333333 0.3333333 0.6666667 0.6666667 0.3333333
## [8] 0.3333333 0.3333333 0.3333333
out = weightedEM(X,w,crit=1e-12,computeloglik=T)
out$niter # OK, same results:
## [1] 131
out$mu # same
## X1 X2 X3
## 0.21182926 -0.28077406 -0.06154232
round(out$Sigma,6) # same
## X1 X2 X3
## X1 0.660288 0.331107 0.474936
## X2 0.331107 1.088924 0.943985
## X3 0.474936 0.943985 1.002466
tail(out$loglikhd)
## [1] -11.3573 -11.3573 -11.3573 -11.3573 -11.3573 -11.3573
# converges to -11.3573 = -34.07189 / 3
# Create an equivalent matrix y without weights, by repeating
# some rows according to their integer weights:
#
Y = X[c(1,2,2,3,4,5,5,6,6,7,8,9,10),]
dim(Y)
## [1] 13 3
Y # This gives the same results:
## X1 X2 X3
## [1,] 0.81494704 0.5501005 NA
## [2,] 1.57453449 NA 0.5526973
## [3,] 1.57453449 NA 0.5526973
## [4,] NA -0.2420284 0.2337319
## [5,] NA -0.5869239 0.2996664
## [6,] -0.24745384 0.9288529 0.9443676
## [7,] -0.24745384 0.9288529 0.9443676
## [8,] -0.74023654 NA -2.0885170
## [9,] -0.74023654 NA -2.0885170
## [10,] 0.19707169 0.9746399 0.5190202
## [11,] -0.06183274 -0.1193971 -0.5598499
## [12,] 0.21050943 -0.7740109 -0.1989791
## [13,] -0.82944245 -0.9502024 -0.6871550
out = weightedEM(Y,crit=1e-12,computeloglik=T) # OK, same
out$niter
## [1] 131
out$mu
## X1 X2 X3
## 0.21182926 -0.28077406 -0.06154232
round(out$Sigma,6)
## X1 X2 X3
## X1 0.660288 0.331107 0.474936
## X2 0.331107 1.088924 0.943985
## X3 0.474936 0.943985 1.002466
tail(out$loglikhd)
## [1] -34.07189 -34.07189 -34.07189 -34.07189 -34.07189 -34.07189
# converges to -34.07189 like before.
X = matrix(c(2.8,5.3,4.9,7.4,
2.3,5.7,4.3,7.2,
2.5,5.1,4.4,7.6),nrow=3,byrow=T)
W = matrix(c(0.8,1.0,0.3,0.4,
0.3,0.5,0.9,0.5,
1.0,0.6,0,0.7),nrow=3,byrow=T)
rownames(X) = rownames(W) = c("A","B","C")
colnames(X) = colnames(W) = c("V1","V2","V3","V4")
n = nrow(X); d = ncol(X)
X
## V1 V2 V3 V4
## A 2.8 5.3 4.9 7.4
## B 2.3 5.7 4.3 7.2
## C 2.5 5.1 4.4 7.6
W
## V1 V2 V3 V4
## A 0.8 1.0 0.3 0.4
## B 0.3 0.5 0.9 0.5
## C 1.0 0.6 0.0 0.7
out = unpack(X,W)
cbind(out$U,out$v) # OK
## V1 V2 V3 V4
## A NA 5.3 NA NA 0.2
## A 2.8 5.3 NA NA 0.4
## A 2.8 5.3 NA 7.4 0.1
## A 2.8 5.3 4.9 7.4 0.3
## B NA NA 4.3 NA 0.4
## B NA 5.7 4.3 7.2 0.2
## B 2.3 5.7 4.3 7.2 0.3
## C 2.5 NA NA NA 0.3
## C 2.5 NA NA 7.6 0.1
## C 2.5 5.1 NA 7.6 0.6
dim(out$U)
## [1] 10 4
set.seed(12345)
n = 1000; d = 2
A = matrix(0.7, d, d); diag(A) = 1
A
## [,1] [,2]
## [1,] 1.0 0.7
## [2,] 0.7 1.0
X = mvrnorm(n, rep(0,d), A)
head(X)
## [,1] [,2]
## [1,] -0.1098666 1.1895284
## [2,] 0.6233152 0.6848755
## [3,] 0.2309203 -0.4324656
## [4,] -0.1164846 -0.7197229
## [5,] 0.7061365 0.4110647
## [6,] -0.9412289 -2.4109163
W = abs(mvrnorm(n, rep(0,d), diag(rep(1,2))))
W = W/max(as.vector(W))
W[2,1] = 0
W[5,2] = 0
head(W)
## [,1] [,2]
## [1,] 0.151856434 0.18102767
## [2,] 0.000000000 0.32052154
## [3,] 0.120772713 0.17167154
## [4,] 0.003729069 0.32719369
## [5,] 0.327865177 0.00000000
## [6,] 0.285126341 0.01091696
fit = cwLocScat(X,W)
fit$cwMLEiter # number of iteration steps
## [1] 47
fit$cwMLEmu
## 1 2
## 0.05972004 0.04231900
fit$cwMean
## 1 2
## 0.06657723 0.03736100
fit$cwMLEsigma
## 1 2
## 1 0.9717504 0.6735339
## 2 0.6735339 1.0075136
fit$cwCov # similar to cwMLEsigma:
## 1 2
## 1 0.9759485 0.7398607
## 2 0.7398607 1.0299615
fit$sqrtCov # same diagonal:
## 1 2
## 1 0.9759485 0.718826
## 2 0.7188260 1.029961
data("data_personality_traits")
X <- data_personality_traits$X
W <- data_personality_traits$W
cbind(X,W) # as in table in the paper
## t1 t2 t3 t4 t5 t6 t1 t2 t3 t4 t5 t6
## [1,] 7.0 5 7.0 5.0 5.0 5.0 0.50 0.29 0.50 0.29 0.29 0.29
## [2,] 10.0 10 10.0 7.0 8.5 7.0 1.00 1.00 1.00 0.50 0.58 0.50
## [3,] 5.0 5 10.0 5.0 5.0 5.0 0.29 0.29 1.00 0.29 0.29 0.29
## [4,] 10.0 10 10.0 5.0 5.0 5.0 1.00 1.00 1.00 0.29 0.29 0.29
## [5,] 7.0 7 8.5 5.0 5.0 5.0 0.50 0.50 0.58 0.29 0.29 0.29
## [6,] 10.0 5 5.0 8.5 8.5 5.0 1.00 0.29 0.29 0.58 0.58 0.29
## [7,] 5.0 7 7.0 5.0 5.0 8.5 0.29 0.50 0.50 0.29 0.29 0.58
## [8,] 10.0 10 10.0 10.0 10.0 10.0 1.00 1.00 1.00 1.00 1.00 1.00
## [9,] 8.5 7 8.5 5.0 5.0 5.0 0.58 0.50 0.58 0.29 0.29 0.29
## [10,] 5.0 10 5.0 7.0 5.0 7.0 0.29 1.00 0.29 0.50 0.29 0.50
out = unpack(X,W)
cbind(out$U,out$v)
## t1 t2 t3 t4 t5 t6
## 1 7.0 NA 7.0 NA NA NA 0.21
## 1 7.0 5 7.0 5.0 5.0 5.0 0.29
## 2 10.0 10 10.0 NA NA NA 0.42
## 2 10.0 10 10.0 NA 8.5 NA 0.08
## 2 10.0 10 10.0 7.0 8.5 7.0 0.50
## 3 NA NA 10.0 NA NA NA 0.71
## 3 5.0 5 10.0 5.0 5.0 5.0 0.29
## 4 10.0 10 10.0 NA NA NA 0.71
## 4 10.0 10 10.0 5.0 5.0 5.0 0.29
## 5 NA NA 8.5 NA NA NA 0.08
## 5 7.0 7 8.5 NA NA NA 0.21
## 5 7.0 7 8.5 5.0 5.0 5.0 0.29
## 6 10.0 NA NA NA NA NA 0.42
## 6 10.0 NA NA 8.5 8.5 NA 0.29
## 6 10.0 5 5.0 8.5 8.5 5.0 0.29
## 7 NA NA NA NA NA 8.5 0.08
## 7 NA 7 7.0 NA NA 8.5 0.21
## 7 5.0 7 7.0 5.0 5.0 8.5 0.29
## 8 10.0 10 10.0 10.0 10.0 10.0 1.00
## 9 8.5 NA 8.5 NA NA NA 0.08
## 9 8.5 7 8.5 NA NA NA 0.21
## 9 8.5 7 8.5 5.0 5.0 5.0 0.29
## 10 NA 10 NA NA NA NA 0.50
## 10 NA 10 NA 7.0 NA 7.0 0.21
## 10 5.0 10 5.0 7.0 5.0 7.0 0.29
fit = cwLocScat(X,W)
fit$cwMLEiter
## [1] 49
round(fit$cwMLEmu,2)
## t1 t2 t3 t4 t5 t6
## 8.82 8.72 8.98 7.40 7.53 7.37
round(fit$cwMean,2)
## t1 t2 t3 t4 t5 t6
## 8.73 8.61 8.87 7.09 7.16 7.09
round(fit$cwMLEsigma, 2)
## t1 t2 t3 t4 t5 t6
## t1 3.22 1.83 1.49 1.91 2.52 1.02
## t2 1.83 3.49 1.55 1.69 1.82 2.00
## t3 1.49 1.55 2.50 0.63 1.22 0.92
## t4 1.91 1.69 0.63 3.54 3.48 2.84
## t5 2.52 1.82 1.22 3.48 3.90 2.82
## t6 1.02 2.00 0.92 2.84 2.82 3.58
round(eigen(fit$cwMLEsigma)$values, 2)
## [1] 13.13 3.24 2.18 1.25 0.31 0.11
round(fit$cwCov, 2)
## t1 t2 t3 t4 t5 t6
## t1 3.36 2.10 1.72 2.63 3.33 1.37
## t2 2.10 3.61 1.78 2.20 2.44 2.46
## t3 1.72 1.78 2.59 0.81 1.64 1.14
## t4 2.63 2.20 0.81 3.99 4.17 3.26
## t5 3.33 2.44 1.64 4.17 4.76 3.40
## t6 1.37 2.46 1.14 3.26 3.40 4.00
round(eigen(fit$cwCov)$values,5)
## [1] 15.91356 2.99031 2.26943 1.08606 0.05570 0.00416
round(cov(X), 2) # unweighted
## t1 t2 t3 t4 t5 t6
## t1 4.96 1.61 1.44 2.01 3.00 0.07
## t2 1.61 4.93 1.38 1.39 1.26 2.17
## t3 1.44 1.38 4.04 -0.42 0.59 0.36
## t4 2.01 1.39 -0.42 3.29 3.25 1.93
## t5 3.00 1.26 0.59 3.25 3.90 1.89
## t6 0.07 2.17 0.36 1.93 1.89 3.29
round(eigen(cov(X))$values, 2)
## [1] 11.97 4.95 4.64 2.28 0.47 0.10
Now we reproduce the figure in the paper
ellips = function(covmat, mu, quant=0.95, npoints = 120)
{ # computes points of the ellipse t(X-mu)%*%covmat%*%(X-mu) = c
# with c = qchisq(quant,df=2)
if (!all(dim(covmat) == c(2, 2))) stop("covmat is not 2 by 2")
eig = eigen(covmat)
U = eig$vectors
R = U %*% diag(sqrt(eig$values)) %*% t(U) # square root of covmat
angles = seq(0, 2*pi, length = npoints+1)
xy = cbind(cos(angles),sin(angles)) # points on the unit circle
fac = sqrt(qchisq(quant, df=2))
scale(fac*xy%*%R, center = -mu, scale=FALSE)
}
n = nrow(X)
j = 3; k = 6 # to plot variables t3 and t6
xy = X[,c(j,k)]
cov2cor(cov(X)[c(j,k),c(j,k)]) # unweighted correlation is 0.10
## t3 t6
## t3 1.00000000 0.09896998
## t6 0.09896998 1.00000000
cov2cor(fit$cwMLEsigma[c(j,k),c(j,k)]) # now correlation is 0.31
## t3 t6
## t3 1.0000000 0.3077506
## t6 0.3077506 1.0000000
(wxy = W[,c(j,k)])
## t3 t6
## [1,] 0.50 0.29
## [2,] 1.00 0.50
## [3,] 1.00 0.29
## [4,] 1.00 0.29
## [5,] 0.58 0.29
## [6,] 0.29 0.29
## [7,] 0.50 0.58
## [8,] 1.00 1.00
## [9,] 0.58 0.29
## [10,] 0.29 0.50
duplicated(xy) # ties: row 4 equals row 3, and row 9 equals row 5
## [1] FALSE FALSE FALSE TRUE FALSE FALSE FALSE FALSE TRUE FALSE
wxy[3,] = wxy[3,] + wxy[4,] # add cell weights of rows 3 and 4
wxy[5,] = wxy[5,] + wxy[9,] # add cell weights of rows 5 and 9
(wxy = wxy[-c(4,9),]) # remove duplicate rows
## t3 t6
## [1,] 0.50 0.29
## [2,] 1.00 0.50
## [3,] 2.00 0.58
## [4,] 1.16 0.58
## [5,] 0.29 0.29
## [6,] 0.50 0.58
## [7,] 1.00 1.00
## [8,] 0.29 0.50
(xy = xy[-c(4,9),]) # remove duplicate rows
## t3 t6
## [1,] 7.0 5.0
## [2,] 10.0 7.0
## [3,] 10.0 5.0
## [4,] 8.5 5.0
## [5,] 5.0 5.0
## [6,] 7.0 8.5
## [7,] 10.0 10.0
## [8,] 5.0 7.0
# pdf("personality_cwMLE_cwCov.pdf",width=5.5,height=5.5)
myxlim = c(2,14); myylim = c(1,13)
plot(xy, pch=16, col="white", xlim=myxlim, ylim=myylim,
xlab="",ylab="")
fac = 0.3 # for the size of the lines representing the cell weights
for(i in seq_len(nrow(xy))){
WY = c(xy[i,1] - fac*wxy[i,1],xy[i,2]) # (WestX, Y)
EY = c(xy[i,1] + fac*wxy[i,1],xy[i,2]) # (EastX, Y)
XS = c(xy[i,1],xy[i,2] - fac*wxy[i,2]) # (X, SouthY)
XN = c(xy[i,1],xy[i,2] + fac*wxy[i,2]) # (X, NorthY)
lines(rbind(WY,EY),lwd=3)
lines(rbind(XS,XN),lwd=3)
}
title(main="tolerance ellipses with and without cell weights",
line=0.8,cex.main=1) # 1.2)
title(xlab="trait 3",line=2.1,cex.lab=1.0)
title(ylab="trait 6",line=2.1,cex.lab=1.0)
center1 = colMeans(X[,c(j,k)])
covmat1 = (n-1)*cov(X[,c(j,k)])/n
ell1 = ellips(covmat1, center1)
lines(ell1,lwd=1.5,col="red") # ellipse from unweighted covariance
fit2 = cwLocScat(xy,wxy)
center2 = fit2$cwMLEmu
covmat2 = fit2$cwMLEsigma
ell2 = ellips(covmat2, center2) # ellipse from cwMLE estimates
lines(ell2,lwd=1.5,col="blue")
center3 = fit2$cwMean
covmat3 = fit2$cwCov
ell3 = ellips(covmat3, center3) # ellipse from cwMean and cwCov
lines(ell3,lwd=1.5,lty=2,col="blue")
legend("topleft",c("cwMLE","cwCov","MLE"),
col=c("blue","blue","red"), lty=c(1,2,1), lwd=1.5, cex=1)
# dev.off()
# The blue ellipses, that use the cell weights, are a bit
# higher and more slanted than the red ellipse that doesn't,
# mainly due to the high cell weight of the y-coordinate of
# the point in the top right corner.
These binaries (installable software) and packages are in development.
They may not be fully stable and should be used with caution. We make no claims about them.
Health stats visible at Monitor.