In this vignette, we will demonstrate the main capabilities of the GPvecchia package. These include estimating parameters and making spatial predictions. We also show how to use the package for processing non-linear, non-Gaussian data by combining the Vecchia with a Laplace approximation.

We start by importing the GPvecchia library.

library(GPvecchia)
library(Matrix)
library(fields)
#> Loading required package: spam
#> Loading required package: dotCall64
#> Loading required package: grid
#> Spam version 2.3-0 (2019-09-13) is loaded.
#> Type 'help( Spam)' or 'demo( spam)' for a short introduction 
#> and overview of this package.
#> Help for individual functions is also obtained by adding the
#> suffix '.spam' to the function name, e.g. 'help( chol.spam)'.
#> 
#> Attaching package: 'spam'
#> The following object is masked from 'package:Matrix':
#> 
#>     det
#> The following objects are masked from 'package:base':
#> 
#>     backsolve, forwardsolve
#> Loading required package: maps
#> See https://github.com/NCAR/Fields for
#>  an extensive vignette, other supplements and source code

Simulating data for illustration

To illustrate our method, we simulate a small dataset. First, consider a unit square and randomly select observation locations. (Set spatial.dim=1 to consider data on the one-dimensional unit interval.)

set.seed(1988)
spatial.dim=2
n=50
if(spatial.dim==1){
  locs=matrix(runif(n),ncol=1)
} else {
  locs <- cbind(runif(n),runif(n))
}

Next, we define the covariance function of the field as well as the scale of the measurement error

beta=2
sig2=1; range=.1; smooth=1.5
covparms =c(sig2,range,smooth)
covfun <- function(locs) sig2*MaternFun(fields::rdist(locs),covparms)
nuggets=rep(.1,n)

We are now ready to simulate the field and visualize it as a sanity check.

Om0 <- covfun(locs)+diag(nuggets)
z=as.numeric(t(chol(Om0))%*%rnorm(n))
data=z+beta

# plot simulated data
if(spatial.dim==1) {
  plot(locs,data)
} else {
  fields::quilt.plot(locs,data, nx=n, ny=n)
}

plot of chunk fig4

We also create a grid of \(n.p\) locations at which we would like to make predictions.

n.p=100
if(spatial.dim==1){  #  1-D case
  locs.pred=matrix(seq(0,1,length=n.p),ncol=1)
} else {   # 2-D case
  grid.oneside=seq(0,1,length=round(sqrt(n.p)))
  locs.pred=as.matrix(expand.grid(grid.oneside,grid.oneside)) # grid of pred.locs
}
n.p=nrow(locs.pred)

Basic functions for parameter estimation and prediction

Let us now estimate the mean and covariance parameters using the default settings, which assume a spatially constant mean or trend, and a Matern covariance structure. Note that the following code might take a minute or so to run.

vecchia.est=vecchia_estimate(data,locs)
#>   Nelder-Mead direct search function minimizer
#> function value for initial parameters = 51.864852
#>   Scaled convergence tolerance is 7.72847e-07
#> Stepsize computed as 0.254500
#> BUILD              5 52.244175 51.668224
#> HI-REDUCTION       7 52.158724 51.668224
#> LO-REDUCTION       9 51.973580 51.625294
#> LO-REDUCTION      11 51.864852 51.580405
#> REFLECTION        13 51.767217 51.495128
#> HI-REDUCTION      15 51.668224 51.495128
#> LO-REDUCTION      17 51.625294 51.487196
#> LO-REDUCTION      19 51.580405 51.482044
#> REFLECTION        21 51.538056 51.442751
#> REFLECTION        23 51.495128 51.418028
#> HI-REDUCTION      25 51.487196 51.418028
#> LO-REDUCTION      27 51.482044 51.410465
#> HI-REDUCTION      29 51.442751 51.403626
#> EXTENSION         31 51.425544 51.357102
#> EXTENSION         33 51.418028 51.340615
#> LO-REDUCTION      35 51.410465 51.340615
#> LO-REDUCTION      37 51.403626 51.340615
#> LO-REDUCTION      39 51.363585 51.340615
#> LO-REDUCTION      41 51.358799 51.340615
#> LO-REDUCTION      43 51.357102 51.340615
#> LO-REDUCTION      45 51.344313 51.339414
#> LO-REDUCTION      47 51.342780 51.339414
#> HI-REDUCTION      49 51.342159 51.339414
#> HI-REDUCTION      51 51.340615 51.339414
#> HI-REDUCTION      53 51.340366 51.339414
#> LO-REDUCTION      55 51.340342 51.339414
#> REFLECTION        57 51.340099 51.339137
#> LO-REDUCTION      59 51.339696 51.339137
#> HI-REDUCTION      61 51.339677 51.339137
#> REFLECTION        63 51.339466 51.339021
#> REFLECTION        65 51.339414 51.338938
#> REFLECTION        67 51.339364 51.338931
#> EXTENSION         69 51.339137 51.338125
#> LO-REDUCTION      71 51.339021 51.338125
#> LO-REDUCTION      73 51.338938 51.338125
#> EXTENSION         75 51.338931 51.337755
#> EXTENSION         77 51.338417 51.336844
#> LO-REDUCTION      79 51.338411 51.336844
#> LO-REDUCTION      81 51.338125 51.336844
#> EXTENSION         83 51.337755 51.335398
#> LO-REDUCTION      85 51.337196 51.335398
#> LO-REDUCTION      87 51.336933 51.335398
#> EXTENSION         89 51.336844 51.334961
#> EXTENSION         91 51.336206 51.334558
#> EXTENSION         93 51.335805 51.333068
#> LO-REDUCTION      95 51.335398 51.333068
#> LO-REDUCTION      97 51.334961 51.333068
#> LO-REDUCTION      99 51.334558 51.333068
#> REFLECTION       101 51.333843 51.333026
#> LO-REDUCTION     103 51.333782 51.333026
#> EXTENSION        105 51.333716 51.332510
#> REFLECTION       107 51.333214 51.332375
#> LO-REDUCTION     109 51.333068 51.332375
#> LO-REDUCTION     111 51.333026 51.332375
#> REFLECTION       113 51.332510 51.332261
#> LO-REDUCTION     115 51.332402 51.332261
#> REFLECTION       117 51.332385 51.332235
#> REFLECTION       119 51.332375 51.332197
#> HI-REDUCTION     121 51.332278 51.332197
#> REFLECTION       123 51.332261 51.332081
#> HI-REDUCTION     125 51.332235 51.332081
#> HI-REDUCTION     127 51.332201 51.332081
#> LO-REDUCTION     129 51.332197 51.332081
#> HI-REDUCTION     131 51.332137 51.332081
#> REFLECTION       133 51.332131 51.332074
#> LO-REDUCTION     135 51.332112 51.332074
#> LO-REDUCTION     137 51.332106 51.332074
#> REFLECTION       139 51.332084 51.332061
#> HI-REDUCTION     141 51.332081 51.332061
#> LO-REDUCTION     143 51.332081 51.332061
#> LO-REDUCTION     145 51.332074 51.332061
#> REFLECTION       147 51.332068 51.332057
#> LO-REDUCTION     149 51.332064 51.332057
#> REFLECTION       151 51.332061 51.332054
#> EXTENSION        153 51.332061 51.332049
#> LO-REDUCTION     155 51.332059 51.332049
#> LO-REDUCTION     157 51.332057 51.332049
#> REFLECTION       159 51.332054 51.332045
#> LO-REDUCTION     161 51.332051 51.332045
#> LO-REDUCTION     163 51.332049 51.332045
#> REFLECTION       165 51.332049 51.332045
#> REFLECTION       167 51.332046 51.332044
#> LO-REDUCTION     169 51.332045 51.332043
#> HI-REDUCTION     171 51.332045 51.332043
#> EXTENSION        173 51.332045 51.332041
#> HI-REDUCTION     175 51.332044 51.332041
#> HI-REDUCTION     177 51.332044 51.332041
#> EXTENSION        179 51.332043 51.332039
#> LO-REDUCTION     181 51.332043 51.332039
#> LO-REDUCTION     183 51.332042 51.332039
#> LO-REDUCTION     185 51.332041 51.332039
#> EXTENSION        187 51.332040 51.332037
#> EXTENSION        189 51.332039 51.332036
#> LO-REDUCTION     191 51.332039 51.332036
#> HI-REDUCTION     193 51.332039 51.332036
#> REFLECTION       195 51.332038 51.332036
#> EXTENSION        197 51.332038 51.332035
#> LO-REDUCTION     199 51.332037 51.332035
#> LO-REDUCTION     201 51.332036 51.332035
#> EXTENSION        203 51.332036 51.332035
#> REFLECTION       205 51.332036 51.332035
#> Exiting from Nelder Mead minimizer
#>     207 function evaluations used
#> estimated trend coefficients: beta.hat=1.77735109323067
#> estimated covariance parameters: theta.hat=0.8275680112135430.101426062937611.45815510100940.0909799904041399
#> ,

Based on these parameter estimates, we can then make predictions at the grid of locations we had specified above.

preds=vecchia_pred(vecchia.est,locs.pred)

Finally, we compare the approximate predictions with the best possible ones (i.e. those obtained using analytic expressions for conditional mean in the Gaussian distribution).

##  exact prediction
mu.exact=as.numeric(covfun(rbind(locs,locs.pred))[,1:n]%*%solve(Om0,data-beta))+beta
cov.exact=covfun(rbind(locs,locs.pred))-
  covfun(rbind(locs,locs.pred))[,1:n]%*%solve(Om0,t(covfun(rbind(locs,locs.pred))[,1:n]))
var.exact=diag(cov.exact)
cov.exact.pred=cov.exact[n+(1:n.p),n+(1:n.p)]


### plot Vecchia and exact predictions
if(spatial.dim==1) {
  plot(locs,z)
  lines(locs.pred,preds$mean.pred,col='blue')
  lines(locs.pred,preds$mean.pred-1.96*sqrt(preds$var.pred),col='blue',lty=2)
  lines(locs.pred,preds$mean.pred+1.96*sqrt(preds$var.pred),col='blue',lty=2)
  lines(locs.pred,mu.exact[n+(1:n.p)],col='red')
  lines(locs.pred,mu.exact[n+(1:n.p)]-1.96*sqrt(var.exact[n+(1:n.p)]),col='red',lty=2)
  lines(locs.pred,mu.exact[n+(1:n.p)]+1.96*sqrt(var.exact[n+(1:n.p)]),col='red',lty=2)
} else {
  sdrange=range(sqrt(c(preds$var.pred,var.exact[n+(1:n.p)])))
  defpar = par(mfrow=c(2,3))
  fields::quilt.plot(locs,z, nx=sqrt(n.p), ny=sqrt(n.p))
  fields::quilt.plot(locs.pred,preds$mean.pred, nx=sqrt(n.p), ny=sqrt(n.p))
  fields::quilt.plot(locs.pred,sqrt(preds$var.pred),zlim=sdrange, nx=sqrt(n.p), ny=sqrt(n.p))
  fields::quilt.plot(locs,z, nx=sqrt(n.p), ny=sqrt(n.p))
  fields::quilt.plot(locs.pred,mu.exact[n+(1:n.p)], nx=sqrt(n.p), ny=sqrt(n.p))
  fields::quilt.plot(locs.pred,sqrt(var.exact[n+(1:n.p)]),zlim=sdrange, nx=sqrt(n.p), ny=sqrt(n.p))
  par(defpar)
}

plot of chunk unnamed-chunk-6

More details on likelihood evaluation

Let's take a closer look at how the likelihood is evaluated using Vecchia. Most importantly, we can specify a parameter, \(m\). Its value determines the number of “neighbours” of each point, or, in other words, how many other points a given point conditions on. The larger this parameter, the more accurate and expensive the approximation will be.

m=20
vecchia.approx=vecchia_specify(locs,m)
vecchia_likelihood(z,vecchia.approx,covparms,nuggets)
#> [1] -51.3744

Note that the function vecchia_specify determines the general properties of the approximation, but it does not depend on the data or the specific parameter values. Hence, it does not have to be re-run when searching over different parameter values in an estimation procedure.

We can also compare the results to the exact likelihood:

library(mvtnorm)
dmvnorm(z,mean=rep(0,n),sigma=Om0,log=TRUE)
#> [1] -51.441

In this case the approximation is very good. In general, \(m=20\) is a good value, and \(m\) should usually be between 10 and 40. For one-dimensional space, we can get good approximations even with \(m=5\) or smaller.

More details on spatial prediction

Similar to the previous section we next specify the approximation and indicate at which locations prediction is desired.

m=30
vecchia.approx=vecchia_specify(locs,m,locs.pred=locs.pred)
preds=vecchia_prediction(z,vecchia.approx,covparms,nuggets)
# returns a list with elements mu.pred,mu.obs,var.pred,var.obs,V.ord

It is also possible to print the entire predictive covariance matrix. We do it here only for the purpose of illustration. If \(n.p\) is very large, this matrix might use up a lot of memory and we generally do not recommend plotting it directly.

Sigma=V2covmat(preds)$Sigma.pred
cov.range=quantile(rbind(Sigma,cov.exact.pred),c(.01,.99))
defpar = par(mfrow=c(1,2))
fields::image.plot(cov.exact.pred,zlim=cov.range)
fields::image.plot(Sigma,zlim=cov.range)

plot of chunk unnamed-chunk-10

par(mfrow=c(defpar))

Linear combinations

We might sometimes be interested in a linear combination of the predicted values. In particular, we can limit our attention to only a subset of our predictions. This can be accomplished by specifying the linear combination coefficients as a matrix. As an example, we assume we are only interested in predictions at the unobserved prediction locations (not at the first n observed locations):

H=Matrix::sparseMatrix(i=1:(n+n.p),j=1:(n+n.p),x=1)[(n+1):(n+n.p),]

# compute variances of Hy
lincomb.vars=vecchia_lincomb(H,preds$U.obj,preds$V.ord)
plot(preds$var.pred,lincomb.vars)

plot of chunk unnamed-chunk-11

As another example, we consider the overall mean of the process at all prediction locations. Using the vecchia_lincomb() function enables us to get the variance estimates easily.

mean(preds$mu.pred)
#> [1] -0.1875505

# compute entire covariance matrix of Hy (here, 1x1)
H=Matrix::sparseMatrix(i=rep(1,n.p),j=n+(1:n.p),x=1/n.p)
lincomb.cov=vecchia_lincomb(H,preds$U.obj,preds$V.ord,cov.mat=TRUE)

Other GP approximations as special cases

By specifying appropriate options in vecchia_specify, we can do everything described above for several other GP approximations: Modified predictive process, FSA, MRA, latent, standard Vecchia

Setting \(M=1\) results in block full-scale approximation, specifically one with \(r_0 = \frac{m}{2}\) knots spread over the entire domain and the remaining locations being partitioned into blocks of size \(<\frac{m}{2}+1\).

m=20
mra.options.fulls=list(M=1)
blockFS = vecchia_specify(locs, m, 'maxmin', conditioning='mra', mra.options=mra.options.fulls, verbose=TRUE)
#> MRA params: m=19; J=4; r=10,10; M=1

Another popular existing approximation method, modified predictive process (MPP), can also be obtained by specifying appropriate parameter settings:

mra.options.mpproc=list(r=c(m,1))
MPP = vecchia_specify(locs, m, 'maxmin', conditioning='mra', mra.options=mra.options.mpproc, verbose=TRUE)
#> MRA params: m=20; J=32; r=20; M=0

As we can see, MPP can be viewed as a special case of the multi-resolution approximation (MRA).

A general MRA is obtained my specifying all of its three parameters

mra.options.mra = list(r=c(10, 5, 5), M=2, J=2)
MRA_rJM = vecchia_specify(locs, m, 'maxmin', conditioning='mra', mra.options=mra.options.mra, verbose=TRUE)
#> Warning in get.mra.params(n, mra.options, m): M, r set for MRA. If
#> parameter m was given, it will be overridden
#> MRA params: m=22; J=2,2; r=10,5,7; M=2

We should note two things to note about this full specifiction of an MRA. First, providing all three \(r\),\(J\) and \(M\) overrides whatever value of \(m\) was provided. Second, in order to be able to place a knot at each point of the grid, the provided parameters might need to be adjusted.

Finally, we can also use the GPvecchia package to specify a Nearest Neighbour Gaussian Process (NNGP) approximation. This can be accomplished as shown below.

NNGP = vecchia_specify(locs, m, cond.yz='y')

We can now easily compare different approximation methods and compare it with SGV and exact likelihood.

vecchia_likelihood(z,blockFS,covparms,nuggets)
#> [1] -53.88739
vecchia_likelihood(z,MPP,covparms,nuggets)
#> [1] -50.34473
vecchia_likelihood(z,MRA_rJM,covparms,nuggets)
#> [1] -52.25845
vecchia_likelihood(z,NNGP,covparms,nuggets)
#> [1] -51.41795
vecchia_likelihood(z, vecchia_specify(locs, m), covparms, nuggets)
#> [1] -51.3744
dmvnorm(z,mean=rep(0,n),sigma=Om0,log=TRUE)
#> [1] -51.441

Non-Gaussian data

Here we demonstrate how GPVecchia can fit a latent model to non-Gaussian data using the Vecchia-Laplace method. We simulate data by first generating a correlated latent field without noise, assuming the same covariance and locations generated earlier:

# simulate latent process
y=as.numeric(t(chol(Om0))%*%rnorm(n))

Then we sample a single non-Gaussian value for each latent value. The variability introduced by the sampling induces heteroskedasticity, in contrast the the constant noise added to the Gaussian case. Below we use a logistic model for binary data, but there are implementations for count and continuous positive (right-skewed) data as well.

data.model = "logistic"

# simulate data
if(data.model=='poisson'){
  z = rpois(n, exp(y))
} else if(data.model=='logistic'){
  z = rbinom(n,1,prob = exp(y)/(1+exp(y)))
} else if(data.model=='gamma'){
  z = rgamma(n, shape = default_lh_params$alpha, rate = default_lh_params$alpha*exp(-y))
}else{
  print('Error: Distribution not implemented yet.')
}

# plot simulated data, 1 or 2D
defpar = par(mfrow=c(1,2))
if(spatial.dim==1) {
  plot(locs,y, main = "latent")
  plot(locs,z, main = "observed")
} else {
  fields::quilt.plot(locs,y, main = "Latent")
  fields::quilt.plot(locs,z, main = "Observed")
}

plot of chunk unnamed-chunk-19

par(defpar)

Given the simulated data, we now can efficiently estimate the latent field by specifying the number of conditioning points \(m\) described earlier. Interweaved ordering is best for 1D data while response-first ('zy') ordering is best for higher dimensions.

m=10
if(spatial.dim==1){
  vecchia.approx=vecchia_specify(locs,m) #IW ordering
} else {
  vecchia.approx=vecchia_specify(locs,m,cond.yz='zy') #RF ordering
}

With the approximated covariance structure, we can calculate the posterior estimate for the latent field using the Vecchia-Laplace method and plot the result. Pure Laplace approximation is included for comparison; even with a small value for \(m\), we can get a result similar to Laplace but with much lower cost.

posterior = calculate_posterior_VL(z,vecchia.approx,likelihood_model=data.model,
                                   covparms = covparms)
if (spatial.dim==1){
  par(mfrow=c(1,1))
  ord = order(locs) # order so that lines appear correctly
  y_limits = c(min(y, posterior$mean[ord]), max(y, posterior$mean[ord]))
  plot(locs[ord], y[ord], type = "l", ylim = y_limits )
  lines(locs[ord], posterior$mean[ord], type = "l", col=3, lwd=3)
  legend("bottomright", legend = c("Latent", "VL"), col= c(1,3), lwd=c(1,3))
} else if (spatial.dim==2){
  dfpar = par(mfrow=c(1,2))
  # ordering unnecessary; we are using a scatter plot rather than lines
  quilt.plot(locs, y, main= "Truth")
  quilt.plot(locs, posterior$mean,  main= "VL m=10")
  par(defpar)

}

plot of chunk unnamed-chunk-21

Non-Gaussian predictions

Predictions are computed as before, using Vecchia-Laplace methods where needed

######  specify prediction locations   #######
n.p=30^2
if(spatial.dim==1){  #  1-D case
  locs.pred=matrix(seq(0,1,length=n.p),ncol=1)
} else {   # 2-D case
  grid.oneside=seq(0,1,length=round(sqrt(n.p)))
  locs.pred=as.matrix(expand.grid(grid.oneside,grid.oneside)) # grid of pred.locs
}
n.p=nrow(locs.pred)

######  specify Vecchia approximation   #######
vecchia.approx.pred = vecchia_specify(locs, m=10, locs.pred=locs.pred)
###  carry out prediction
preds = vecchia_laplace_prediction(posterior, vecchia.approx.pred, covparms)

# plotting predicitions
if (spatial.dim==1){
  defpar = par(mfrow=c(1,1))
  ord = order(locs) # order so that lines appear correctly
  plot(locs[ord], y[ord], type = "l", xlim=c(0,1.2), ylim = c(-1,3))
  lines(locs, posterior$mean, type = "p", col=4, lwd=3, lty=1)
  lines(locs.pred, preds$mu.pred, type = "l", col=3, lwd=3, lty=1)
  lines(locs.pred,preds$mu.pred+sqrt(preds$var.pred), type = "l", lty = 3, col=3)
  lines(locs.pred,preds$mu.pred-sqrt(preds$var.pred), type = "l", lty = 3, col=3)
  legend("topleft", legend = c("Latent", "VL: Pred", "VL: 1 stdev"), 
         col= c(1,3,3), lwd=c(1,2,1), lty = c(1,1,3))
  par(defpar)
} else if (spatial.dim==2){
  defpar =  par(mfrow=c(1,2))
  # ordering unnecessary; we are using a scatter plot rather than lines
  quilt.plot(locs, y, main= "True Latent", 
             xlim = c(0,1), ylim = c(0,1), nx=64, ny=64)
  quilt.plot(locs.pred, preds$mu.pred,  main= "VL Prediction",nx = 30, ny=30)
  par(defpar)
}

plot of chunk unnamed-chunk-22

Parameter estimation

The likelihood of the data for a set of parameters can be computed efficiently using the command below.

vecchia_laplace_likelihood(z,vecchia.approx,likelihood_model=data.model,covparms = covparms)
#> [1] -34.90571

This can be used for parameter estimation by evaluating the likelihood over a grid of parameter values or in an iterative optimization method such as Nelder-Mead. Reparameterizing the parameters improves performance.

# currently set up for covariance estimation 
vecchia.approx=vecchia_specify(locs, m=10, cond.yz = "zy") # for posterior
vecchia.approx.IW = vecchia_specify(locs, m=10) # for integrated likelihood
if (spatial.dim==1) vecchia.approx=vecchia.approx.IW

vl_likelihood = function(x0){
  theta = exp(x0)
  covparms=c(theta[1], theta[2], theta[3]) # sigma range smoothness
  prior_mean = 0 # can be a parameter as well
  # Perform inference on latent mean with Vecchia Laplace approximation
  vll = vecchia_laplace_likelihood(z,vecchia.approx, likelihood_model=data.model,
                                   covparms, return_all = FALSE,
                                   likparms = default_lh_params, prior_mean = prior_mean,
                                   vecchia.approx.IW = vecchia.approx.IW)
  return(-vll)

}
x0 = log(c(.07,1.88, 1.9))
vl_likelihood(x0)
# Issues with R aborting, maxit set to 1
res = optim(x0, vl_likelihood, method = "Nelder-Mead", control = list("trace" = 1, "maxit" = 1))
exp(res$par[1:3])
vl_likelihood(x0)