The hardware and bandwidth for this mirror is donated by dogado GmbH, the Webhosting and Full Service-Cloud Provider. Check out our Wordpress Tutorial.
If you wish to report a bug, or if you are interested in having us mirror your free-software or open-source project, please feel free to contact us at mirror[@]dogado.de.

spatialfusion: short demo

Craig Wang (craig.wang@math.uzh.ch)

June 21, 2019

This brief demo provides code and output for fitting spatial fusion models using R package spatialfusion. The first section analyze the built-in synthetic dataset with INLA implementation while the second section analyze a simulated dataset with Stan implementation. The method argument in fusionData() function decides on which implementation to use.

1. Spatial fusion modelling with INLA on built-in synthetic data

Load libraries

library(spatialfusion)
## Loading spatialfusion (version 0.7):
## - The compilation time for a Stan model can be up to 20s.
## - INLA method is recommended for larger datasets (> 1000 observations).
## - It is good practice to test the model specification on sub-sampled dataset first before running it on the full dataset.
library(tmap, quietly = TRUE)
library(sp, quietly = TRUE)
library(sf, quietly = TRUE)
## Linking to GEOS 3.13.1, GDAL 3.10.2, PROJ 9.5.1; sf_use_s2() is TRUE

Load and view built-in synthetic data

summary(dataGeo)
##   lungfunction        covariate                 geometry  
##  Min.   :-14.1384   Min.   :-2.76510   POINT        :200  
##  1st Qu.: -2.6764   1st Qu.:-0.68487   epsg:NA      :  0  
##  Median :  1.1040   Median :-0.04320   +proj=long...:  0  
##  Mean   :  0.9074   Mean   :-0.05798                      
##  3rd Qu.:  4.7340   3rd Qu.: 0.73442                      
##  Max.   : 17.9710   Max.   : 3.20051
summary(dataLattice)
##    mortality        covariate             pop               mr         
##  Min.   :  0.00   Min.   :-2.86427   Min.   :   362   Min.   : 0.0000  
##  1st Qu.:  0.00   1st Qu.:-0.67110   1st Qu.:  1880   1st Qu.: 0.0000  
##  Median :  1.00   Median :-0.05943   Median :  4431   Median : 0.2741  
##  Mean   : 14.90   Mean   :-0.05240   Mean   :  9357   Mean   : 1.7206  
##  3rd Qu.:  6.75   3rd Qu.: 0.52046   3rd Qu.:  7880   3rd Qu.: 1.4923  
##  Max.   :540.00   Max.   : 2.07776   Max.   :413912   Max.   :26.3228  
##           geometry  
##  POLYGON      :162  
##  epsg:NA      :  0  
##  +proj=long...:  0  
##                     
##                     
## 
summary(dataPP)
##           geometry  
##  POINT        :116  
##  epsg:NA      :  0  
##  +proj=long...:  0

Plot data

tm_shape(dataLattice) + tm_polygons(col = "white") +
  tm_shape(dataGeo) + tm_dots(size = 0.1) +
  tm_add_legend(type = "symbol", shape = 16, size = 0.3, col = "black", label = "geostatistical") +
  tm_shape(dataPP) + tm_symbols(col = "red", shape = 4, size = 0.02) +  
  tm_add_legend(type = "symbol", shape = 4, size = 0.2, col = "red", label = "point pattern") +
  tm_layout(main.title = "dataGeo, dataPP", main.title.size = 1, 
            frame = F, fontface = 2, legend.outside = T)

tm_shape(dataLattice) +
  tm_fill(col = "mr", style = "fixed", breaks = c(0, 0.5, 2, 10, 30),
          title = "Mortality rate \n(per thousand)", legend.reverse = T) + tm_borders() +
  tm_layout(main.title = "dataLattice", main.title.size = 1, frame = F, fontface = 2,
            legend.position = c(0.77,0.8), legend.text.size = 0.5, legend.title.size = 0.5)

Data preparation

dat <- fusionData(geo.data = dataGeo, geo.formula = lungfunction ~ covariate,
                  lattice.data = dataLattice,
                  lattice.formula = mortality ~ covariate + log(pop),
                  pp.data = dataPP, distributions = c("normal", "poisson"),
                  method = "INLA")

dat
## data object for spatial fusion modeling with INLA consisting of: 
## - 1 geostatistical variable(s)
## - 1 lattice variable(s)
## - 1 point pattern variable(s)
## 
## Provide this object as 'data' argument in fusion() to fit a spatial fusion model.

Fit a spatial fusion model

if (require(INLA)){
mod <- fusion(data = dat, n.latent = 1, bans = matrix(c(0,0,0), ncol = 1),
              pp.offset = 400, prior.range = c(0.1, 0.5),
              prior.sigma = c(1, 0.5),  mesh.locs = dat$locs_point,
              mesh.max.edge = c(0.05, 0.5))
}
## Loading required package: INLA
## Loading required package: Matrix
## This is INLA_24.12.11 built 2024-12-11 19:58:26 UTC.
##  - See www.r-inla.org/contact-us for how to get help.
##  - List available models/likelihoods/etc with inla.list.models()
##  - Use inla.doc(<NAME>) to access documentation

Inspect the fit

if (require(INLA)){
mod_fit <- fitted(mod, type = "link")
oldpar <- par(mfrow = c(1,2))
plot(dataGeo$lungfunction, mod_fit$point1,
     xlab = "Observed", ylab = "Fitted")
abline(0,1)
plot(log(dataLattice$mortality), mod_fit$area1,
     xlab = "Observed", ylab = "Fitted")
abline(0,1)
par(oldpar)
}

Check parameter estimates

if (require(INLA)){
summary(mod, digits = 3)
}
## Model:
## geostatistical formula: lungfunction ~ covariate 
## lattice formula: mortality ~ covariate + log(pop) 
## point pattern variables: 1 
## latent process(es): 1 
## --------------
## Fixed effect coefficients:
##                       mean     sd 0.025quant 0.5quant 0.975quant  mode
## intercept (beta_p11)  1.02 0.1320      0.737     1.02       1.26  1.04
## covariate (beta_p12)  4.98 0.0554      4.870     4.98       5.09  4.98
## intercept (beta_a11) -8.22 0.2350     -8.690    -8.22      -7.77 -8.22
## covariate (beta_a12)  2.03 0.0484      1.940     2.03       2.13  2.03
## log(pop) (beta_a13)   1.04 0.0219      0.997     1.04       1.08  1.04
## 
## Latent parameters:
##                     mean     sd 0.025quant 0.5quant 0.975quant  mode
## Gaussian precision 1.750 0.2280      1.350    1.740      2.240 1.720
## Range for s11      0.313 0.1160      0.151    0.292      0.601 0.253
## Stdev for s11      0.824 0.1770      0.537    0.804      1.230 0.761
## Beta for s12       0.439 0.0802      0.283    0.438      0.599 0.434
## Beta for s13       0.914 0.1660      0.593    0.911      1.250 0.903

Diagnostic plots

if (require(INLA)){
plot(mod, interactive = FALSE)
}

if (require(INLA)){
plot(mod, posterior = FALSE)
}

Predict latent surface

if (require(INLA)){
pred.locs <- st_as_sf(spsample(as_Spatial(dataDomain), 20000, type = "regular"))
mod.pred <- predict(mod, pred.locs)
mod.pred <- st_set_crs(mod.pred, "+proj=longlat +ellps=WGS84")
tm_shape(mod.pred) +
  tm_symbols(col = "latent.s11", shape = 15, size = 0.05, style = "cont",
             midpoint = NA, legend.col.reverse = TRUE, palette = "Oranges",
             title.col = "Latent process") +
  tm_shape(dataLattice) + tm_borders() +
  tm_layout(frame = FALSE, legend.outside = TRUE)
}
## 
## ── tmap v3 code detected ───────────────────────────────────────────────────────
## [v3->v4] `symbols()`: instead of `style = "cont"`, use fill.scale =
## `tm_scale_continuous()`.
## ℹ Migrate the argument(s) 'midpoint', 'palette' (rename to 'values') to
##   'tm_scale_continuous(<HERE>)'
## For small multiples, specify a 'tm_scale_' for each multiple, and put them in a
## list: 'fill'.scale = list(<scale1>, <scale2>, ...)'
## [v3->v4] `symbols()`: migrate the argument(s) related to the legend of the
## visual variable `fill` namely 'title.col' (rename to 'title') to 'fill.legend =
## tm_legend(<HERE>)'
## [v3->v4] `tm_symbols()`: migrate the argument(s) related to the scale of the
## visual variable `shape` namely 'midpoint' to shape.scale =
## tm_scale_intervals(<HERE>).
## ℹ For small multiples, specify a 'tm_scale_' for each multiple, and put them in
##   a list: 'shape.scale = list(<scale1>, <scale2>, ...)'
## [cols4all] color palettes: use palettes from the R package cols4all. Run
## `cols4all::c4a_gui()` to explore them. The old palette name "Oranges" is named
## "brewer.oranges"
## Multiple palettes called "oranges" found: "brewer.oranges", "matplotlib.oranges". The first one, "brewer.oranges", is returned.
## 
## [plot mode] fit legend/component: Some legend items or map compoments do not
## fit well, and are therefore rescaled.
## ℹ Set the tmap option `component.autoscale = FALSE` to disable rescaling.

2. Spatial fusion modelling with Stan on simulated data

Simulate data

dat <- fusionSimulate(n.point = 200, n.area = 30, n.grid = 5, n.pred = 100,
                      psill = 1.5, phi = 1, nugget = 0, tau.sq = 0.2,
                      dimension = 10, domain = NULL, point.beta = list(rbind(1,5)),
                      area.beta = list(rbind(1.5, 1.5)), nvar.pp = 1,
                      distributions = c("normal","poisson"),
                      design.mat = matrix(c(2, 0.5, 1), ncol = 1), 
                      pp.offset = 0.5, seed = 1)

geo.data <- st_as_sf(data.frame(y = dat$mrf[dat$sample.ind,"x"],
                       x = dat$mrf[dat$sample.ind,"y"],
                       cov.point = dat$data$X_point[,2],
                       outcome =dat$dat$Y_point[[1]]), 
                     coords = c("x","y"),
                     crs = st_crs("+proj=longlat +ellps=WGS84"))
            
                               
lattice.data <- cbind(dat$poly,
                     (data.frame(outcome = dat$data$Y_area[[1]],
                                 cov.area = dat$data$X_area[,2])))
lattice.data <- st_set_crs(lattice.data, "+proj=longlat +ellps=WGS84")

pp.data <- dat$data$lgcp.coords[[1]]
pp.data <- st_set_crs(pp.data, "+proj=longlat +ellps=WGS84")

Plot data

tm_shape(lattice.data) + tm_polygons(col = "white") + 
  tm_shape(geo.data) + tm_dots(size = 0.1) +
  tm_add_legend(type = "symbol", shape = 16, size = 0.3, col = "black", label = "geostatistical") +
  tm_shape(pp.data) + tm_symbols(col = "red", shape = 4, size = 0.02) +
  tm_add_legend(type = "symbol", shape = 4, size = 0.2, col = "red", label = "point pattern") +
  tm_layout(main.title = "geo.data, pp.data", main.title.size = 1, 
            frame = FALSE, fontface = 2, legend.outside = TRUE)
## [v3->v4] `tm_add_legend()`: use `type = "symbols"` instead of `type =
## "symbol"`.
## [v3->v4] `tm_add_legend()`: use `fill` instead of `col` for the fill color of
## symbols.
## [v3->v4] `tm_add_legend()`: use `type = "symbols"` instead of `type =
## "symbol"`.
## [v3->v4] `tm_add_legend()`: use `fill` instead of `col` for the fill color of
## symbols.
## [v3->v4] `tm_layout()`: use text.fontface instead of fontface
## [v3->v4] `tm_layout()`: use `tm_title()` instead of `tm_layout(main.title = )`

tm_shape(lattice.data) + 
  tm_fill(col="outcome", style="fixed", breaks=c(0, 10, 50, 200, 1200),
          title = "Outcome", legend.reverse = T) + tm_borders() +
  tm_layout(main.title="lattice.data", main.title.size = 1, frame = FALSE, fontface = 2,
            legend.position = c(0.77,0.8), legend.text.size = 0.5, legend.title.size = 0.5)
## 
## ── tmap v3 code detected ───────────────────────────────────────────────────────
## [v3->v4] `tm_polygons()`: instead of `style = "fixed"`, use fill.scale =
## `tm_scale_intervals()`.
## ℹ Migrate the argument(s) 'style', 'breaks' to 'tm_scale_intervals(<HERE>)'
## [v3->v4] `tm_polygons()`: migrate the argument(s) related to the legend of the
## visual variable `fill` namely 'title', 'legend.reverse' (rename to 'reverse')
## to 'fill.legend = tm_legend(<HERE>)'
## [v3->v4] `tm_layout()`: use text.fontface instead of fontface
## [v3->v4] `tm_layout()`: use `tm_title()` instead of `tm_layout(main.title = )`

Data preparation

dat2 <- fusionData(geo.data = geo.data, geo.formula = outcome ~ cov.point,
                   lattice.data = lattice.data, lattice.formula = outcome ~ cov.area,
                   pp.data = pp.data, distributions = c("normal","poisson"), 
                   method = "Stan")
dat2
## data object for spatial fusion modeling with Stan consisting of: 
## - 1 geostatistical variable(s), with 200 locations 
## - 1 lattice variable(s), with 150 sampling point locations 
## - 1 point pattern variable(s), with 100 gridded locations 
## 
## Provide this object as 'data' argument in fusion() to fit a spatial fusion model.

Fit a spatial fusion model

mod <- fusion(data = dat2, n.latent = 1, bans = matrix(c(0,0,0), ncol = 1),
              pp.offset = 0.5, prior.phi = list(distr = "normal", pars = c(1, 1)),
              nsamples = 1000, nburnin = 500, nchain = 1, ncore = 1)

Inspect the fit

mod_fit <- fitted(mod, type = "link")
oldpar <- par(mfrow = c(1,2))
plot(dat$data$Y_point[[1]], mod_fit$point1, xlab = "Observed", ylab = "Fitted")
abline(0,1)
plot(log(dat$data$Y_area[[1]]), mod_fit$area1, xlab = "Observed", ylab = "Fitted")
abline(0,1)

par(oldpar)

Check parameter estimates

summary(mod, digits = 2) 
## Model:
## geostatistical formula: outcome ~ cov.point 
## lattice formula: outcome ~ cov.area 
## point pattern variables: 1 
## latent process(es): 1 
## --------------
## Fixed effect coefficients:
##                         mean se_mean   sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
## intercept (beta_p[1,1])  1.4  0.0190 0.23 0.93 1.2 1.4 1.6   1.8   140    1
## cov.point (beta_p[1,2])  4.9  0.0170 0.14 4.60 4.8 4.9 5.0   5.2    74    1
## intercept (beta_a[1,1])  1.5  0.0069 0.13 1.20 1.4 1.5 1.5   1.7   360    1
## cov.area (beta_a[1,2])   1.6  0.0074 0.13 1.40 1.6 1.6 1.7   1.9   290    1
## 
## Latent parameters:
##           mean se_mean   sd  2.5%  25%  50%  75% 97.5% n_eff Rhat
## tau_sq[1] 0.42   0.061 0.24 0.170 0.26 0.35 0.49  1.10    16  1.1
## phi[1]    0.51   0.011 0.11 0.330 0.43 0.49 0.55  0.78   100  1.0
## Z_1[1]    2.10   0.021 0.15 1.800 2.00 2.10 2.20  2.40    48  1.1
## Z_2[1]    0.40   0.018 0.23 0.009 0.26 0.39 0.53  0.88   160  1.0
## Z_3[1]    1.40   0.015 0.20 1.100 1.30 1.40 1.50  1.80   170  1.0

Diagnostic plots

plot(mod, interactive = FALSE)

plot(mod, posterior = FALSE) 

## Enter <return> to proceed ...

Predict latent surface and compare with simulated truth

mod.pred <- predict(mod, new.locs = dat$pred.loc, type = "summary")
oldpar <- par(mfrow = c(1,1))
plot(dat$mrf[dat$pred.ind, c("sim1")], mod.pred$latent1, 
     xlab = "Truth", ylab = "Predicted")
abline(0,1)

par(oldpar)

These binaries (installable software) and packages are in development.
They may not be fully stable and should be used with caution. We make no claims about them.
Health stats visible at Monitor.