The hardware and bandwidth for this mirror is donated by dogado GmbH, the Webhosting and Full Service-Cloud Provider. Check out our Wordpress Tutorial.
If you wish to report a bug, or if you are interested in having us mirror your free-software or open-source project, please feel free to contact us at mirror[@]dogado.de.

Predictor Identifier: Nonparametric PREDiction

Ashish Sharma, Raj Mehrotra, Sanjeev Jha, Jingwan Li and Ze Jiang

14:23:19 21 June, 2023

library(NPRED)

op <- par()
require(zoo)
#> Loading required package: zoo
#> 
#> Attaching package: 'zoo'
#> The following objects are masked from 'package:base':
#> 
#>     as.Date, as.Date.numeric
require(ggplot2)
#> Loading required package: ggplot2
#> Warning: package 'ggplot2' was built under R version 4.2.3

1 Synthetic data generation

# Other statistical models for generating synthetic data see Package - synthesis
# require(synthesis)

set.seed(2020) # set seed for reproducible results
# AR1 model from paper with 9 dummy variables
data.ar1 <- data.gen.ar1(500)

# AR4 model from paper with total 9 dimensions
data.ar4 <- data.gen.ar4(500)

# AR9 model from paper with total 9 dimensions
data.ar9 <- data.gen.ar9(500)

plot.zoo(cbind(data.ar1$x, data.ar4$x, data.ar9$x),
  xlab = NA, main = "Example of AR models",
  ylab = c("AR1", "AR4", "AR9"))
Example of AR models

Figure 1.1: Example of AR models

2 Partial informational correlation (PIC)

# R version of pic
pic.calc(data.ar9$x, data.ar9$dp)
#> $pmi
#> [1]  0.01667864 -0.01345925  0.03862148  0.33933764 -0.01281137  0.01703449
#> [7]  0.04359516  0.07521345  0.07754585
#> 
#> $pic
#> [1] 0.1811272 0.0000000 0.2726446 0.7019341 0.0000000 0.1830168 0.2889591
#> [8] 0.3737103 0.3790295

# fortran version of pic
calc.PIC(data.ar9$x, data.ar9$dp)
#> $pmi
#> [1] -0.003200178 -0.030318462  0.030197035  0.343560072 -0.019053916
#> [6]  0.010659037  0.046381753  0.068540976  0.059103240
#> 
#> $pic
#> [1] 0.0000000 0.0000000 0.2420878 0.7049662 0.0000000 0.1452324 0.2976424
#> [8] 0.3579123 0.3338973

3 Predictor identifier: stepwise.PIC

# pic <- stepwise.PIC(data.ar1$x, data.ar1$dp)
# pic <- stepwise.PIC(data.ar4$x, data.ar4$dp)
pic <- stepwise.PIC(data.ar9$x, data.ar9$dp)
pic
#> $cpy
#> [1] 4 9 1
#> 
#> $cpyPIC
#> [1] 0.7019341 0.5358898 0.3475098
#> 
#> $wt
#> [1] 0.4708906 0.3340259 0.1950835
#> 
#> $lstwet
#>  Intercept         X1         X2         X3 
#> 0.02092053 0.61892085 0.49902045 0.26565225 
#> 
#> $icpy
#> [1] 3

4 Partial weights (PW)

# R version of pw
pw.calc(data.ar9$x, data.ar9$dp, pic$cpy, pic$cpyPIC)
#> $pw
#> [1] 0.4708906 0.3340259 0.1950835

# fortran version of pw
calc.PW(data.ar9$x, data.ar9$dp, pic$cpy, pic$cpyPIC)
#> $pw
#> [1] 0.6371889 0.4519895 0.2639787

5 Nonparameteric prediction: knn

data("data3")
x <- ts(data3[, 1]) # response
z <- ts(data3[, -1]) # possible predictors
zout <- ts(data.gen.ar1(500, ndim = 15)$dp) # new input

xhat1 <- xhat2 <- x
# xhat1 <- NPRED::knn(x,z,zout,k=5,reg=T,extrap=F)
# xhat2 <- NPRED::knn(x,z,zout,k=5,reg=T,extrap=T)

for (i in 1:500) {
  xhat1[i] <- NPRED::knn(x[-i], z[-i, ], z[i, ], extrap = F)
  xhat2[i] <- NPRED::knn(x[-i], z[-i, ], z[i, ], extrap = T)
}

if (TRUE) {
  par(mfrow = c(1, 2), pty = c("s"))

  ts.plot(x, xhat1, xhat2, col = c("black","red","blue"), ylim = c(-10, 10), lwd = c(1, 1, 1))
  legend("topleft",
    bty = "n", lwd = 3, cex = 1, lty = 1,
    # inset = c(-0.5, 0),
    legend = c("OBS", "Pred", "Pred(extrap=T)"),
    x.intersp = 0, xjust = 0, yjust = 0, text.width = c(0, 50, 50), horiz = T,
    col = c("black","red","blue")
  )

  plot(xhat1, xhat2, xlim = c(-10, 10), ylim = c(-10, 10))
  abline(coef = c(0, 1), lwd = 1, col = 2)
  par(op)
}
#> Warning in par(op): graphical parameter "cin" cannot be set
#> Warning in par(op): graphical parameter "cra" cannot be set
#> Warning in par(op): graphical parameter "csi" cannot be set
#> Warning in par(op): graphical parameter "cxy" cannot be set
#> Warning in par(op): graphical parameter "din" cannot be set
#> Warning in par(op): graphical parameter "page" cannot be set
Example of KNN implemented in NPRED

Figure 5.1: Example of KNN implemented in NPRED

6 Illustration of the usage of partial weights

sample <- 500
k <- 0
u <- runif(sample, 0, 5 * pi)
z <- sin(u) + rnorm(sample, sd = 0.2)

u1 <- cbind(u, runif(sample, 0, 5 * pi), runif(sample, 0, 5 * pi), runif(sample, 0, 5 * pi))
# zhat1 <- knnregl1cv(x=z, z=u1, k=k)
zhat1 <- sapply(1:sample, function(i) knn(x = z[-i], z = u1[-i, ], zout = u1[i, ], k = k))

sel <- stepwise.PIC(x = z, py = u1)
sel
#> $cpy
#> [1] 1
#> 
#> $cpyPIC
#> [1] 0.2559138
#> 
#> $wt
#> [1] 1
#> 
#> $lstwet
#>  Intercept          X 
#> 0.17620490 0.00959311 
#> 
#> $icpy
#> [1] 1
# zhat2 <- knnregl1cv(x=z, z=u1[,sel$cpy], k=k)
zhat2 <- sapply(1:sample, function(i) knn(x = z[-i], z = u1[-i, sel$cpy], zout = u1[i, sel$cpy], k = k))

if (TRUE) {
  plot(u, z, pch = 16)
  lines(sort(u), zhat1[order(u)], col = "green")
  lines(sort(u), zhat2[order(u)], col = "red")
  abline(a = 0, b = 0)
}
Illustration of the usage of partial weights

Figure 6.1: Illustration of the usage of partial weights

7 Application to predicting drought anomalies

# An demo example used in Jiang, Z., Rashid, M. M., Johnson, F., & Sharma, A. (2020)
# Jiang, Z., Rashid, M. M., Johnson, F., & Sharma, A. (2021). A wavelet-based tool to modulate variance in predictors: An application to predicting drought anomalies. Environmental Modelling and Software, 135, 104907. https://doi.org/10.1016/j.envsoft.2020.104907 
require(WASP) 
#> Loading required package: WASP
#> 
#> Attaching package: 'WASP'
#> The following objects are masked from 'package:NPRED':
#> 
#>     data.gen.ar1, data.gen.ar4, data.gen.ar9, knn, knnregl1cv

#-------------------------------------------------------------------------------------
#load response and predictor variables
data(SPI.12); data(data.CI); data(Ind_AWAP.2.5)
#study grids and period
Grid <- sample(Ind_AWAP.2.5,1)
Grid = 149 #Grid used in the SI
SPI.12.ts <- window(SPI.12, start=c(1910,1),end=c(2009,12))
data.CI.ts <- window(data.CI, start=c(1910,1),end=c(2009,12))
#partition into two folds
folds <- cut(seq(1,nrow(SPI.12.ts)),breaks=2,labels=FALSE)
sub.cali <- which(folds==1, arr.ind=TRUE); sub.vali <- which(folds==2, arr.ind=TRUE)
#-------------------------------------------------------------------------------------
###calibration and selection
data <- list(x=SPI.12.ts[sub.cali,Grid],dp=data.CI.ts[sub.cali,])

#variance transformation - calibration
modwt <- modwt.vt(data, wf="haar", J=8, boundary="periodic")

#stepwise PIC selection
sel <- NPRED::stepwise.PIC(modwt$x, modwt$dp.n)
#-------------------------------------------------------------------------------------
###validation and prediction
data.val <- list(x=SPI.12.ts[sub.vali,Grid],dp=data.CI.ts[sub.vali,])

#variance transformation - validation
modwt.val <- modwt.vt.val(data.val, J=8, modwt)

#knn prediction
cpy <- sel$cpy; wt <- sel$wt
x=data$x; z=modwt$dp.n[,cpy]; zout=modwt.val$dp.n[,cpy]
mod <- knn(x, z, zout, k=5, pw=wt, extrap=T)

#-------------------------------------------------------------------------------------
###plot
start.cal <- c(1910,1); start.val <- c(1960,1)
ndim = ncol(data.CI.ts); CI.names = colnames(data.CI.ts)
par(mfcol=c(ndim+1,2),mar=c(2,4,2,2),mgp=c(1.5,0.5,0),
    bg = "white",pty="m", cex.lab=1.5, ps=8)
#----------------------------------------------
#plot before and after vt - calibration
if(TRUE){

  x <- ts(modwt$x, start=start.cal, frequency = 12)
  dp <- ts(modwt$dp, start=start.cal, frequency = 12)
  dp.n <- ts(modwt$dp.n, start=start.cal, frequency = 12)

  ts.plot(x,xlab=NA, main=paste0("Sampled Grid: ", Grid), ylab=paste0("SPI",12), col=c("black"),lwd=c(1))
  for(nc in 1:ndim)
    ts.plot(dp[,nc],dp.n[,nc],xlab=NA,ylab=paste0(CI.names[nc]),
            col=c("red","blue"),lwd=c(1,1),lty=c(1,2))

}
#----------------------------------------------
#plot before and after vt - validation
if(TRUE){

  x <- ts(modwt.val$x, start=start.val, frequency = 12)
  dp <- ts(modwt.val$dp, start=start.val, frequency = 12)
  dp.n <- ts(modwt.val$dp.n, start=start.val, frequency = 12)

  ts.plot(x, xlab=NA, main=paste0("Sampled Grid: ",Grid), ylab=paste0("SPI",12), col=c("black"),lwd=c(1))
  for(nc in 1:ndim)
    ts.plot(dp[,nc],dp.n[,nc],xlab=NA,ylab=paste0(CI.names[nc]),
            col=c("red","blue"),lwd=c(1,1),lty=c(1,2))

}
Application to predicting drought anomalies

Figure 7.1: Application to predicting drought anomalies


par(op)

These binaries (installable software) and packages are in development.
They may not be fully stable and should be used with caution. We make no claims about them.
Health stats visible at Monitor.