The hardware and bandwidth for this mirror is donated by dogado GmbH, the Webhosting and Full Service-Cloud Provider. Check out our Wordpress Tutorial.
If you wish to report a bug, or if you are interested in having us mirror your free-software or open-source project, please feel free to contact us at mirror[@]dogado.de.

Predictor Identifier: Nonparametric PREDiction

Ashish Sharma, Raj Mehrotra, Sanjeev Jha, Jingwan Li and Ze Jiang

10:30:56 25 July, 2024

1 Synthetic data generation

# Other statistical models for generating synthetic data see Package - synthesis
require(synthesis)
#> Loading required package: synthesis
#> 
#> Attaching package: 'synthesis'
#> The following objects are masked from 'package:NPRED':
#> 
#>     data.gen.ar1, data.gen.ar4, data.gen.ar9

set.seed(2020) # set seed for reproducible results
# AR1 model from paper with 9 dummy variables
data.ar1 <- synthesis::data.gen.ar1(500)

# AR4 model from paper with total 9 dimensions
data.ar4 <- synthesis::data.gen.ar4(500)

# AR9 model from paper with total 9 dimensions
data.ar9 <- synthesis::data.gen.ar9(500)

# plot.zoo(cbind(data.ar1$x, data.ar4$x, data.ar9$x),
#   xlab = NA, main = "Example of AR models",
#   ylab = c("AR1", "AR4", "AR9"))

2 Partial informational correlation (PIC)

# calculate PIC
pic.calc(data.ar1$x, data.ar1$dp)
#> $pmi
#> [1] 0.69319346 0.41783003 0.26170451 0.17923658 0.12102203 0.09080493 0.05591261
#> [8] 0.02951112 0.01654911
#> 
#> $pic
#> [1] 0.8660388 0.7526034 0.6383594 0.5488694 0.4636577 0.4075210 0.3252683
#> [8] 0.2394038 0.1804341

pic.calc(data.ar4$x, data.ar4$dp)
#> $pmi
#> [1]  0.273343774  0.031705618  0.001377018  0.151620432  0.235788221
#> [6]  0.126943971  0.021324429 -0.008302517  0.030173933
#> 
#> $pic
#> [1] 0.6489498 0.2478761 0.0524428 0.5114477 0.6131739 0.4735201 0.2043335
#> [8] 0.0000000 0.2419980

pic.calc(data.ar9$x, data.ar9$dp)
#> $pmi
#> [1]  0.034701149 -0.008256610  0.073552041  0.353385264 -0.004119804
#> [6]  0.027551717  0.088435930  0.091603445  0.049201990
#> 
#> $pic
#> [1] 0.2589377 0.0000000 0.3698593 0.7118746 0.0000000 0.2315443 0.4026324
#> [8] 0.4091505 0.3061328

3 Predictor identifier: stepwise.PIC

pic1 <- stepwise.PIC(data.ar1$x, data.ar1$dp)

pic4 <- stepwise.PIC(data.ar4$x, data.ar4$dp)

pic9 <- stepwise.PIC(data.ar9$x, data.ar9$dp)

4 Partial weights (PW)

# calculate partial weights
pw.calc(data.ar1$x, data.ar1$dp, pic1$cpy, pic1$cpyPIC)
#> $pw
#> [1] 1

pw.calc(data.ar4$x, data.ar4$dp, pic4$cpy, pic4$cpyPIC)
#> $pw
#> [1] 0.5405296 0.4594704

pw.calc(data.ar9$x, data.ar9$dp, pic9$cpy, pic9$cpyPIC)
#> $pw
#> [1] 0.46352053 0.32817626 0.16792648 0.04037673

5 Nonparameteric prediction: knn

data("data3")
x <- ts(data3[, 1]) # response
z <- ts(data3[, -1]) # possible predictors
zout <- ts(data.gen.ar1(500, ndim = 15)$dp) # new input

xhat1 <- xhat2 <- x
# xhat1 <- NPRED::knn(x,z,zout,k=5,reg=T,extrap=F)
# xhat2 <- NPRED::knn(x,z,zout,k=5,reg=T,extrap=T)

for (i in 1:500) {
  xhat1[i] <- NPRED::knn(x[-i], z[-i, ], z[i, ], extrap = F)
  xhat2[i] <- NPRED::knn(x[-i], z[-i, ], z[i, ], extrap = T)
}

if (TRUE) {
  par(mfrow = c(1, 1),
      mar=c(3,2,1,1), mgp=c(1.5,0.5,0),
      pty = c("m"))

  ts.plot(x, xhat1, xhat2, col = c("black","red","blue"), ylim = c(-10, 10), lwd = c(1, 1, 1))
  legend("topleft",
    bty = "n", lwd = 3, cex = 1, lty = 1,
    # inset = c(-0.5, 0),
    legend = c("OBS", "Pred", "Pred(extrap=T)"),
    x.intersp = 0, xjust = 0, yjust = 0, text.width = c(0, 50, 50), horiz = T,
    col = c("black","red","blue"))

  par(mfrow = c(1, 1), pty = c("s"))
  plot(xhat1, xhat2, xlim = c(-10, 10), ylim = c(-10, 10))
  abline(coef = c(0, 1), lwd = 1, col = 2)

}

Figure 5.1: Example of KNN implemented in NPRED

Figure 5.2: Example of KNN implemented in NPRED

6 Illustration of the usage of partial weights

sample <- 500
k <- 0
u <- runif(sample, 0, 5 * pi)
z <- sin(u) + rnorm(sample, sd = 0.2)

u1 <- cbind(u, runif(sample, 0, 5 * pi), runif(sample, 0, 5 * pi), runif(sample, 0, 5 * pi))
# zhat1 <- knnregl1cv(x=z, z=u1, k=k)
zhat1 <- sapply(1:sample, function(i) knn(x = z[-i], z = u1[-i, ], zout = u1[i, ], k = k))

sel <- stepwise.PIC(x = z, py = u1)
sel
#> $cpy
#> [1] 1
#> 
#> $cpyPIC
#> [1] 0.1606175
#> 
#> $wt
#> [1] 1
#> 
#> $lstwet
#>   Intercept           X 
#> 0.141643996 0.002144423 
#> 
#> $icpy
#> [1] 1
# zhat2 <- knnregl1cv(x=z, z=u1[,sel$cpy], k=k)
zhat2 <- sapply(1:sample, function(i) knn(x = z[-i], z = u1[-i, sel$cpy], zout = u1[i, sel$cpy], k = k))

if (TRUE) {
  par(mfrow = c(1, 1), pty = c("m"))
  
  plot(u, z, pch = 16)
  lines(sort(u), zhat1[order(u)], col = "green")
  lines(sort(u), zhat2[order(u)], col = "red")
  abline(a = 0, b = 0)
}

Figure 6.1: Illustration of the usage of partial weights

7 Application to predicting drought anomalies

# An demo example used in Jiang, Z., Rashid, M. M., Johnson, F., & Sharma, A. (2020)
# Jiang, Z., Rashid, M. M., Johnson, F., & Sharma, A. (2021). A wavelet-based tool to modulate variance in predictors: An application to predicting drought anomalies. Environmental Modelling and Software, 135, 104907.

require(WASP)
#> Loading required package: WASP
#> 
#> Attaching package: 'WASP'
#> The following objects are masked from 'package:synthesis':
#> 
#>     data.gen.HL, data.gen.Rossler, data.gen.SW, data.gen.ar1,
#>     data.gen.ar4, data.gen.ar9, data.gen.tar1, data.gen.tar2
#> The following objects are masked from 'package:NPRED':
#> 
#>     data.gen.ar1, data.gen.ar4, data.gen.ar9, knn, knnregl1cv, pic.calc
#load response and predictor variables
data("SPI.12", package="WASP"); 
data("data.CI", package="WASP")
data("Ind_AWAP.2.5", package="WASP")

#study grids and period
Grid <- sample(Ind_AWAP.2.5,1)
Grid = 149 #Grid used in the SI
SPI.12.ts <- window(SPI.12, start=c(1910,1),end=c(2009,12))
data.CI.ts <- window(data.CI, start=c(1910,1),end=c(2009,12))
#partition into two folds
folds <- cut(seq(1,nrow(SPI.12.ts)),breaks=2,labels=FALSE)
sub.cali <- which(folds==1, arr.ind=TRUE); sub.vali <- which(folds==2, arr.ind=TRUE)
#-------------------------------------------------------------------------------------
###calibration and selection
data <- list(x=SPI.12.ts[sub.cali,Grid],dp=data.CI.ts[sub.cali,])

#variance transformation - calibration
modwt <- modwt.vt(data, wf="haar", J=8, boundary="periodic")

#stepwise PIC selection
sel <- NPRED::stepwise.PIC(modwt$x, modwt$dp.n)
#-------------------------------------------------------------------------------------
###validation and prediction
data.val <- list(x=SPI.12.ts[sub.vali,Grid],dp=data.CI.ts[sub.vali,])

#variance transformation - validation
modwt.val <- modwt.vt.val(data.val, J=8, modwt)

#knn prediction
cpy <- sel$cpy; wt <- sel$wt
x=data$x; z=modwt$dp.n[,cpy]; zout=modwt.val$dp.n[,cpy]
mod <- knn(x, z, zout, k=5, pw=wt, extrap=T)

#-------------------------------------------------------------------------------------
###plot
start.cal <- c(1910,1); start.val <- c(1960,1)
ndim = ncol(data.CI.ts); CI.names = colnames(data.CI.ts)
par(mfcol=c(ndim+1,2),mar=c(2,4,2,2),mgp=c(1.5,0.5,0),
    bg = "white",pty="m",cex.lab=1.5,ps=8)
#----------------------------------------------
#plot before and after vt - calibration
if(TRUE){

  x <- ts(modwt$x, start=start.cal, frequency = 12)
  dp <- ts(modwt$dp, start=start.cal, frequency = 12)
  dp.n <- ts(modwt$dp.n, start=start.cal, frequency = 12)

  ts.plot(x,xlab=NA, main=paste0("Sampled Grid: ", Grid), ylab=paste0("SPI",12), col=c("black"),lwd=c(1))
  for(nc in 1:ndim)
    ts.plot(dp[,nc],dp.n[,nc],xlab=NA,ylab=paste0(CI.names[nc]),
            col=c("red","blue"),lwd=c(1,1),lty=c(1,2))

}
#----------------------------------------------
#plot before and after vt - validation
if(TRUE){

  x <- ts(modwt.val$x, start=start.val, frequency = 12)
  dp <- ts(modwt.val$dp, start=start.val, frequency = 12)
  dp.n <- ts(modwt.val$dp.n, start=start.val, frequency = 12)

  ts.plot(x, xlab=NA, main=paste0("Sampled Grid: ",Grid), ylab=paste0("SPI",12), col=c("black"),lwd=c(1))
  for(nc in 1:ndim)
    ts.plot(dp[,nc],dp.n[,nc],xlab=NA,ylab=paste0(CI.names[nc]),
            col=c("red","blue"),lwd=c(1,1),lty=c(1,2))

}

Figure 7.1: Application to predicting drought anomalies

These binaries (installable software) and packages are in development.
They may not be fully stable and should be used with caution. We make no claims about them.
Health stats visible at Monitor.