- 1 Code to reproduce Figure6.1
- 2 Code to reproduce Table 6.1
- 3 Code for fitting models with spBayes
- 4 Code to reproduce Table 6.2
- 5 Stan code from Section 6.6.2 to fit the marginal model
- 6 Code to necessary to fit spatial models using INLA
- 7 Code for Section 6.7 on Model Choice
- 8 Code for model validation
- 9 Code to choose the phi parameter in the spatial model
- 10 Code to reproduce Table 6.7 and Figure 6.4
<-proc.time()[3]
start.time<- "figures/chap6figures/"
figurepath <- "mapfiles"
mappath <- "txttables/"
filepath <- FALSE
longrun if (!longrun) {
.3 <- dget(file=paste0(filepath, "table6.3.txt"))
table6.5 <- dget(file=paste0(filepath, "table6.5.txt"))
table6.6 <- dget(file=paste0(filepath, "table6.6.txt"))
table6
}library(bmstdr)
## Warning in .recacheSubclasses(def@className, def, env): undefined subclass
## "numericVector" of class "Mnumeric"; definition not updated
library(ggplot2)
library(akima)
library(tidyr)
library(RColorBrewer)
library(ggpubr)
library(spdep)
library(GGally)
library(geoR)
library(fields)
library(doBy)
1 Code to reproduce Figure6.1
head(nyspatial)
## s.index Longitude Latitude utmx utmy yo3 xmaxtemp xwdsp
## 1 1 -73.757 42.681 601838.1 4726140 45.08065 27.53184 4.142083
## 2 2 -73.881 40.866 594300.8 4524485 48.34758 29.60294 5.666202
## 3 3 -79.587 42.291 121812.7 4692285 54.65677 26.28780 5.742751
## 4 4 -76.802 42.111 351019.8 4663672 44.95774 27.44725 4.842872
## 5 5 -73.743 41.782 604457.3 4626336 43.92903 28.27555 4.427053
## 6 6 -78.771 42.993 192581.3 4766941 55.37726 26.61813 6.146428
## xrh
## 1 3.686737
## 2 3.478130
## 3 3.950145
## 4 3.699996
## 5 3.698446
## 6 3.869971
<- Bspatial(formula=yo3~xmaxtemp+xwdsp+xrh, data=nyspatial)
M1 ## ##
## # Total time taken:: 0.01 - Sec.
print(M1)
##
## The lm model has been fitted using bmstdr code in R.
## Call:
## Bspatial(formula = yo3 ~ xmaxtemp + xwdsp + xrh, data = nyspatial)
##
## Model formula
## yo3 ~ xmaxtemp + xwdsp + xrh
##
##
## Coefficients:
## [1] -52.776 1.526 3.252 11.240 10.998
##
## ##
## # Total time taken:: 0.01 - Sec.
plot(M1)
##
## No other plots implemented for this model fitting method.
##
## Summary of the residuals
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -8.264164 -1.888167 0.234218 -0.000188 2.175525 8.134383
residuals(M1)
##
## Summary of the residuals
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -8.264164 -1.888167 0.234218 -0.000188 2.175525 8.134383
## [1] 0.93423105 -1.57103871 4.24211590 -1.48774171 -2.41060320 4.04689047
## [7] 8.13438342 1.19708052 0.08478264 -4.55991285 -2.01951831 -0.96188897
## [13] -1.84438370 -1.39188272 3.31235485 0.38365423 1.86240427 -4.38346529
## [19] 1.56620807 3.71451313 3.11488849 -2.65240154 3.45918976 0.88766396
## [25] -5.69143315 -8.26416410 -1.05410468 1.34690057
summary(M1)
##
## The lm model has been fitted using bmstdr code in R.
## Call:
## Bspatial(formula = yo3 ~ xmaxtemp + xwdsp + xrh, data = nyspatial)
## ##
## # Total time taken:: 0.01 - Sec.
## Model formula
## yo3 ~ xmaxtemp + xwdsp + xrh
##
##
## Parameter Estimates:
## mean sd 2.5% 97.5%
## (Intercept) -52.776 52.909 -157.126 51.573
## xmaxtemp 1.526 0.989 -0.425 3.477
## xwdsp 3.252 1.185 0.914 5.590
## xrh 11.240 8.462 -5.449 27.930
## sigma2 10.998 2.939 6.668 18.038
## Spatial model fitting
M2 <- Bspatial(model="spat", formula=yo3~xmaxtemp+xwdsp+xrh, data=nyspatial,
coordtype="utm", coords=4:5, phi=0.4)
## mean sd low up
## (Intercept) -58.17 53.84 -164.35 48.01
## xmaxtemp 1.65 1.00 -0.33 3.62
## xwdsp 3.28 1.19 0.94 5.62
## xrh 11.74 8.61 -5.25 28.73
## sigma2 10.90 2.91 6.61 17.87
## ##
## # Total time taken:: 0.01 - Sec.
names(M2)
## [1] "params" "fit" "max.d" "fitteds"
## [5] "phi" "residuals" "sn" "tn"
## [9] "formula" "scale.transform" "package" "model"
## [13] "call" "computation.time"
print(M2)
##
## The spat model has been fitted using bmstdr code in R.
## Call:
## Bspatial(formula = yo3 ~ xmaxtemp + xwdsp + xrh, data = nyspatial,
## model = "spat", coordtype = "utm", coords = 4:5, phi = 0.4)
##
## Model formula
## yo3 ~ xmaxtemp + xwdsp + xrh
##
##
## Coefficients:
## [1] -58.168 1.648 3.281 11.741 10.897
##
## ##
## # Total time taken:: 0.01 - Sec.
plot(M2)
##
## No other plots implemented for this model fitting method.
##
## Summary of the residuals
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -8.17500 -1.81981 0.26879 0.02192 2.26535 8.37285
ggsave(filename = paste0(figurepath, "M2_fitv_resid.png"))
## Saving 7 x 5 in image
residuals(M2)
##
## Summary of the residuals
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -8.17500 -1.81981 0.26879 0.02192 2.26535 8.37285
## [1] 1.0126866 -1.6845595 4.2933407 -1.4260899 -2.4367621 4.0862733
## [7] 8.3728457 1.4339752 0.2256680 -4.4204139 -1.9068074 -0.8676949
## [13] -1.7908124 -1.3168398 3.2676868 0.3119068 1.9634197 -4.2975435
## [19] 1.3987367 3.5257330 3.1711345 -2.5857731 3.3522672 0.9462576
## [25] -5.8067424 -8.1750019 -1.2457093 1.2125946
ggsave(filename = paste0(figurepath, "M2_resid_v_obsn.png"))
## Saving 7 x 5 in image
summary(M2)
##
## The spat model has been fitted using bmstdr code in R.
## Call:
## Bspatial(formula = yo3 ~ xmaxtemp + xwdsp + xrh, data = nyspatial,
## model = "spat", coordtype = "utm", coords = 4:5, phi = 0.4)
## ##
## # Total time taken:: 0.01 - Sec.
## Model formula
## yo3 ~ xmaxtemp + xwdsp + xrh
##
##
## Parameter Estimates:
## mean sd 2.5% 97.5%
## (Intercept) -58.168 53.836 -164.345 48.010
## xmaxtemp 1.648 1.002 -0.329 3.624
## xwdsp 3.281 1.187 0.941 5.622
## xrh 11.741 8.613 -5.247 28.728
## sigma2 10.897 2.912 6.607 17.872
2 Code to reproduce Table 6.1
.1 <- cbind(M1$params, M2$params)
table6round(table6.1, 2)
## mean sd 2.5% 97.5% mean sd 2.5% 97.5%
## (Intercept) -52.78 52.91 -157.13 51.57 -58.17 53.84 -164.35 48.01
## xmaxtemp 1.53 0.99 -0.42 3.48 1.65 1.00 -0.33 3.62
## xwdsp 3.25 1.19 0.91 5.59 3.28 1.19 0.94 5.62
## xrh 11.24 8.46 -5.45 27.93 11.74 8.61 -5.25 28.73
## sigma2 11.00 2.94 6.67 18.04 10.90 2.91 6.61 17.87
# xtable(table6.1, digits=2)
dput(table6.1, file=paste0(filepath, "table6.1.txt"))
3 Code for fitting models with spBayes
library(spBayes)
library(coda)
set.seed(44)
<- 20000
N <- 4
p <- nyspatial
fdat <- list("phi"=1, "sigma.sq"=10, "tau.sq"=1)
starting <- list("phi"=0.3, "sigma.sq"=0.3, "tau.sq"=0.3)
tuning .1 <- list("beta.Norm"=list(rep(0,p), diag(1000,p)),
priors"phi.Unif"=c(0.05, 2), "sigma.sq.IG"=c(2, 2),
"tau.sq.IG"=c(2, 0.1))
<- "exponential"
cov.model <- FALSE
verbose <- cbind(fdat$utmx, fdat$utmy)
coords <- spLM(sqrt(yo3)~xmaxtemp+xwdsp+xrh, data=fdat, coords=coords, starting=starting,
mod3 tuning=tuning, priors=priors.1, cov.model=cov.model,
n.samples=N, verbose=verbose, n.report=5)
<- 0.5* N + 1
burn.in .3 <- spRecover(mod3, start=burn.in, verbose=FALSE)
mround(summary(m.3$p.theta.recover.samples)$quantiles[,c(3,1,5)],2)
## 50% 2.5% 97.5%
## sigma.sq 0.22 0.13 0.40
## tau.sq 0.03 0.01 0.09
## phi 0.99 0.07 1.94
round(summary(m.3$p.beta.recover.samples)$quantiles[,c(3,1,5)],2)
## 50% 2.5% 97.5%
## (Intercept) -0.33 -16.09 15.82
## xmaxtemp 0.11 -0.19 0.41
## xwdsp 0.23 -0.13 0.59
## xrh 0.81 -1.79 3.31
# names(m.3)
<- mcmc(data=cbind(m.3$p.beta.recover.samples, m.3$p.theta.recover.samples), thin=100)
samps # samps <- as.mcmc(cbind(m.4$p.theta.recover.samples))
head(samps)
## Markov Chain Monte Carlo (MCMC) output:
## Start = 1
## End = 601
## Thinning interval = 100
## (Intercept) xmaxtemp xwdsp xrh sigma.sq tau.sq
## [1,] -2.5753937 0.12428318 0.33685313 1.1498303 0.1977640 0.01913157
## [2,] 12.5734909 -0.04935851 0.43433537 -1.7303863 0.1977640 0.01913157
## [3,] 7.7296075 -0.04375927 0.24290350 -0.2196203 0.3256541 0.02434937
## [4,] 7.0140414 0.03878646 0.24547142 -0.6880487 0.3256541 0.02434937
## [5,] 8.0222950 -0.11055973 0.01093527 0.4825042 0.3256541 0.02434937
## [6,] 0.6392681 0.04496527 0.39199269 0.7834181 0.3256541 0.02434937
## [7,] 3.3188478 0.03734955 0.15802698 0.4525282 0.1989606 0.02165145
## phi
## [1,] 1.956952
## [2,] 1.956952
## [3,] 1.911967
## [4,] 1.911967
## [5,] 1.911967
## [6,] 1.911967
## [7,] 1.884529
dim(samps)
## [1] 10000 7
dev.off()
## null device
## 1
plot(samps)
# autocorr.plot(samps, lag.max=75)
rejectionRate(samps)
## (Intercept) xmaxtemp xwdsp xrh sigma.sq tau.sq
## 0.0000000 0.0000000 0.0000000 0.0000000 0.6256626 0.6256626
## phi
## 0.6256626
# autocorr(samps, lags = c(0, 1, 5, 10, 50), relative=TRUE)
autocorr.plot(samps)
effectiveSize(samps)
## (Intercept) xmaxtemp xwdsp xrh sigma.sq tau.sq
## 10534.4089 10905.7348 10485.4410 10702.3782 1608.0239 802.6024
## phi
## 110.2447
batchSE(samps, batchSize=100)
## (Intercept) xmaxtemp xwdsp xrh sigma.sq tau.sq
## 0.077162667 0.001428252 0.001783563 0.012311022 0.001663686 0.000672879
## phi
## 0.044493143
raftery.diag(samps, q=0.025, r=0.005, s=0.95, converge.eps=0.001)
##
## Quantile (q) = 0.025
## Accuracy (r) = +/- 0.005
## Probability (s) = 0.95
##
## Burn-in Total Lower bound Dependence
## (M) (N) (Nmin) factor (I)
## (Intercept) 200 380200 3746 101.0
## xmaxtemp 200 377100 3746 101.0
## xwdsp 200 374100 3746 99.9
## xrh 200 374100 3746 99.9
## sigma.sq 1700 1796100 3746 479.0
## tau.sq 1900 2079700 3746 555.0
## phi 15000 16369300 3746 4370.0
geweke.diag(samps, frac1=0.1, frac2=0.5)
##
## Fraction in 1st window = 0.1
## Fraction in 2nd window = 0.5
##
## (Intercept) xmaxtemp xwdsp xrh sigma.sq tau.sq
## 0.04128 -0.06384 0.11126 -0.04710 0.18335 -1.20123
## phi
## 4.02084
crosscorr(samps)
## (Intercept) xmaxtemp xwdsp xrh sigma.sq
## (Intercept) 1.0000000000 -0.936920895 0.5173241877 -0.960589167 0.0010916228
## xmaxtemp -0.9369208954 1.000000000 -0.6028366414 0.818465028 0.0017644462
## xwdsp 0.5173241877 -0.602836641 1.0000000000 -0.535934737 -0.0001728054
## xrh -0.9605891667 0.818465028 -0.5359347370 1.000000000 -0.0034203620
## sigma.sq 0.0010916228 0.001764446 -0.0001728054 -0.003420362 1.0000000000
## tau.sq -0.0002154989 -0.002218384 -0.0067863042 0.003684749 0.1163776973
## phi 0.0131530848 -0.018115634 0.0112277381 -0.008590094 -0.0144629564
## tau.sq phi
## (Intercept) -0.0002154989 0.013153085
## xmaxtemp -0.0022183838 -0.018115634
## xwdsp -0.0067863042 0.011227738
## xrh 0.0036847493 -0.008590094
## sigma.sq 0.1163776973 -0.014462956
## tau.sq 1.0000000000 0.037640622
## phi 0.0376406222 1.000000000
densplot(samps)
rejectionRate(samps)
## (Intercept) xmaxtemp xwdsp xrh sigma.sq tau.sq
## 0.0000000 0.0000000 0.0000000 0.0000000 0.6256626 0.6256626
## phi
## 0.6256626
###
## Now we obtain parameter estimates
<- summary(samps)
a <- HPDinterval(samps,prob=0.95)
b <- cbind(a$statistics[, 1:2], b)
estimates round(estimates, 2)
## Mean SD lower upper
## (Intercept) -0.27 8.11 -16.47 15.32
## xmaxtemp 0.11 0.15 -0.18 0.42
## xwdsp 0.23 0.18 -0.12 0.59
## xrh 0.79 1.30 -1.74 3.35
## sigma.sq 0.23 0.07 0.12 0.36
## tau.sq 0.04 0.02 0.01 0.08
## phi 0.99 0.56 0.05 1.88
# xtable(estimates, digits=2)
## Model diagnostics using spBayes
## Calculates DIC and PMCC
spDiag(m.3)
## -------------------------------------------------
## Calculating scores
## -------------------------------------------------
## $DIC
## value
## bar.D -69.46052
## D.bar.Omega -90.69995
## pD 21.23943
## DIC -48.22109
##
## $GP
## value
## G 0.03723636
## P 1.96268848
## D 1.99992484
##
## $GRS
## [1] 73.89896
4 Code to reproduce Table 6.2
<- Bspatial(package="spBayes", formula=yo3~xmaxtemp+xwdsp+xrh, data=nyspatial,
M3 coordtype="utm", coords=4:5, prior.phi=c(0.005, 2), n.report=2)
## ----------------------------------------
## General model description
## ----------------------------------------
## Model fit with 28 observations.
##
## Number of covariates 4 (including intercept if specified).
##
## Using the exponential spatial correlation model.
##
## Number of MCMC samples 5000.
##
## Priors and hyperpriors:
## beta normal:
## mu: 0.000 0.000 0.000 0.000
## cov:
## 10000.000 0.000 0.000 0.000
## 0.000 10000.000 0.000 0.000
## 0.000 0.000 10000.000 0.000
## 0.000 0.000 0.000 10000.000
##
## sigma.sq IG hyperpriors shape=2.00000 and scale=1.00000
## tau.sq IG hyperpriors shape=2.00000 and scale=0.10000
## phi Unif hyperpriors a=0.00500 and b=2.00000
## -------------------------------------------------
## Sampling
## -------------------------------------------------
## Sampled: 2500 of 5000, 50.00%
## Report interval Metrop. Acceptance rate: 41.96%
## Overall Metrop. Acceptance rate: 41.96%
## -------------------------------------------------
## Sampled: 5000 of 5000, 100.00%
## Report interval Metrop. Acceptance rate: 41.24%
## Overall Metrop. Acceptance rate: 41.60%
## -------------------------------------------------
## ##
## # Total time taken:: 0.32 - Sec.
.2 <- M3$params
table6round(table6.2, 2)
## mean sd 2.5% 97.5%
## X.Intercept. -41.15 49.46 -138.32 58.49
## xmaxtemp 1.32 0.94 -0.53 3.14
## xwdsp 3.38 1.22 1.00 5.79
## xrh 9.48 7.94 -6.52 24.72
## sigma.sq 12.52 3.65 7.46 21.31
## tau.sq 0.09 0.23 0.02 0.31
## phi 1.09 0.62 0.06 1.96
dput(table6.2, file=paste0(filepath, "table6.2.txt"))
# xtable(table6.2, digits=2)
5 Stan code from Section 6.6.2 to fit the marginal model
Save the following code in the file spatial_model.stan
in the current working directory. This code does not run in R
. See the next set of code chunk to run this code using the rstan
package.
// data block. This must contain the same variables as data list
// that will come from R
data {<lower=0> n; // number of sites
int<lower=0> p; // number of covariates
int
vector[n] y;
matrix[n, p] X;// to hold n by n distance matrix
matrix[n, n] dist; <lower=0.00001> phi;
real<lower=0>[2] priorsigma2;
vector<lower=0>[2] priortau2;
vector
}
// These will never change during the MCMC computation.
transformed data {=1e-5;
real delta= rep_vector(0, n);
vector[n] mu_0
}
// Declare all the parameters to be sampled here
parameters {
vector[p] beta;<lower=0> sigma_sq;
real
vector[n] eta;<lower=0> tau_sq;
real
}
// Transformations of parameters that help to specify
// the model.
transformed parameters {
real xbmodel[n];
real dats[n];
matrix[n, n] L;
matrix[n, n] Sigma;
real u;
for (i in 1:n) {
for (j in 1:n) {
= sigma_sq * exp((-1)*phi*dist[i,j]);
Sigma[i, j]
}= Sigma[i, i] + delta;
Sigma[i, i]
}
= cholesky_decompose(Sigma);
L
for ( i in 1:n) {
=0.0;
u for (j in 1:p) {
+= beta[j] * X[i, j];
u
}= u;
xbmodel[i] = y[i] - eta[i];
dats[i]
}
}
// Model specification
model {~ multi_normal_cholesky(mu_0, L);
eta ~ inv_gamma(priorsigma2[1], priorsigma2[2]);
sigma_sq ~ inv_gamma(priortau2[1], priortau2[2]);
tau_sq ~ normal(xbmodel, sqrt(tau_sq));
dats
}
Here is the code block to fit the marginal model using the rstan
package.
rm(list=ls())
library(bmstdr)
head(nyspatial)
<- nrow(nyspatial)
n <- nyspatial$yo3
y <- cbind(1, nyspatial[, 7:9])
X <- ncol(X)
p head(X)
<- 0.4
phi <- as.matrix(dist(as.matrix(nyspatial[, 4:5]))/1000)
distmat dim(distmat)
1:5, 1:5]
distmat[max(distmat)
<- list(n=n, p=p, y = y, X=as.matrix(X),
datatostan priorsigma2 = c(2, 1), priortau2 = c(2, 1),
phi=phi, dist=distmat)
<- lm(formula=yo3~xmaxtemp+xwdsp+xrh, data=nyspatial)
M0 coef(M0)
<- function() {
initfun # starting values near the lm estimates
# variations will work as well
list(sigma_sq = 1, tau_sq=1, beta=coef(M0))
}library(rstan)
<- stan(data=datatostan, file = "spatial_model.stan", seed = 44, chains = 1, iter = 1000, warmup = 500, init=initfun)
stanfit
<- rstan::summary(stanfit, pars =c("beta", "tau_sq", "sigma_sq"), probs = c(.025, .975))
stanestimates names(stanestimates)
<- data.frame(stanestimates$summary)
params print(params)
Here is the bmstdr
command to fit the marginal model using rstan
. This command does everything that has been noted in the previous two code blocks.
if (longrun) {
<- Bspatial(package="stan", formula=yo3~xmaxtemp+xwdsp+xrh, data=nyspatial,
M4 coordtype="utm", coords=4:5,phi=0.4)
.3 <- M4$params
table6dput(table6.3, file=paste0(filepath, "table6.3.txt"))
else {
} cat ("Table 6.3 has been read from the disk")
}## Table 6.3 has been read from the disk
round(table6.3, 2)
## mean sd 2.5% 97.5%
## (Intercept) -55.13 58.87 -168.17 58.38
## xmaxtemp 1.63 1.13 -0.52 3.84
## xwdsp 3.22 1.30 0.63 5.77
## xrh 11.15 9.24 -6.75 29.16
## tausq 0.10 0.13 0.02 0.43
## sigmasq 12.43 3.70 7.21 21.06
# xtable(table6.3, digits=2)
6 Code to necessary to fit spatial models using INLA
library(INLA)
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
## The following object is masked from 'package:spam':
##
## det
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
## Loading required package: foreach
## Loading required package: parallel
## This is INLA_21.02.23 built 2021-07-22 12:47:15 UTC.
## - See www.r-inla.org/contact-us for how to get help.
## - To enable PARDISO sparse library; see inla.pardiso()
## - Save 273.9Mb of storage running 'inla.prune()'
##
## Attaching package: 'INLA'
## The following object is masked from 'package:spam':
##
## Oral
<- nyspatial[, c("utmx", "utmy")]/1000
coords <- diff(range(coords[,1]))/15
max.edge <- diff(range(coords[,2]))/3
bound.outer <- inla.mesh.2d(loc = coords, max.edge = c(1,5)*max.edge,
mesh offset = c(max.edge, bound.outer), cutoff = max.edge/5)
plot(mesh)
if (longrun) ggsave(filename=paste0(figurepath, "ny_triangulation.png"))
##
<- inla.spde2.pcmatern(mesh = mesh, alpha = 1.5, prior.range = c(1, 0.5), prior.sigma = c(1, 0.05))
spde ##
<- inla.spde.make.A(mesh = mesh, loc = as.matrix(coords))
A ##
<- inla.stack(tag = 'est', data = list(yo3 = nyspatial$yo3), A = list(A, 1),
stack effects = list(se = 1:spde$n.spde,
Xcov = cbind(intercept=1, nyspatial[, 7:9])))
##
<- list(prec = list(prior = "loggamma", param = c(2,0.1)))
hyper ##
<- yo3~ - 1 + xmaxtemp+xwdsp+xrh + f(se, model = spde)
f1 <- inla(formula=f1, data = inla.stack.data(stack), family = "gaussian",
ifit control.family = list(hyper = hyper),
control.predictor = list(A = inla.stack.A(stack), compute = T),
control.compute = list(config = T, dic = T, waic = T), verbose = F)
names(ifit$marginals.hyperpar)
## [1] "Precision for the Gaussian observations"
## [2] "Range for se"
## [3] "Stdev for se"
##
<- 5000
N
<- inla.rmarginal(N, ifit$marginals.fixed[[1]])
beta1.samp <- inla.rmarginal(N, ifit$marginals.fixed[[2]])
beta2.samp <- inla.rmarginal(N, ifit$marginals.fixed[[3]])
beta3.samp names(ifit$marginals.hyperpar)
## [1] "Precision for the Gaussian observations"
## [2] "Range for se"
## [3] "Stdev for se"
<- inla.rmarginal(N, ifit$marginals.hyperpar[[1]])
prec.samp <- 1/prec.samp
sig2e.samp <- inla.rmarginal(N, ifit$marginals.hyperpar[[2]])
range.samp <- 3/range.samp
phi.samp <- inla.rmarginal(N, ifit$marginals.hyperpar[[3]])
sd.samp <- sd.samp^2
sig2w.samp <- cbind(beta1.samp, beta2.samp, beta3.samp, phi.samp, sig2w.samp, sig2e.samp)
psamps print(get_parameter_estimates(psamps))
## mean sd low up
## beta1.samp 0.6023813 0.3506826 -0.0925198926 1.279406
## beta2.samp 3.8846246 1.0678077 1.7675689764 5.988309
## beta3.samp 3.1298227 2.4168513 -1.6998730083 7.850223
## phi.samp 8.4328492 32.2571675 0.3291781029 40.729637
## sig2w.samp 0.3009525 0.9435805 0.0009784489 2.035566
## sig2e.samp 12.5268316 3.3875180 7.4082205505 20.509760
Here is the bmstdr
command to fit the marginal model using INLA
. This command does everything that has been noted in the preceding code block.
<- Bspatial(package="inla",formula=yo3~xmaxtemp+xwdsp+xrh, data=nyspatial,
M5 coordtype="utm", coords=4:5)
## INLA will be used.
## ##
## # Total time taken:: 1.8 - Sec.
.4 <- M5$params
table6round(table6.4, 2)
## mean sd 2.5% 97.5%
## X.Intercept. -12.42 27.34 -65.77 41.77
## xmaxtemp 0.83 0.60 -0.38 2.00
## xwdsp 3.67 1.10 1.54 5.91
## xrh 5.13 4.78 -4.38 14.47
## phi 8.64 30.19 0.40 40.55
## sigmasq 0.13 0.40 0.00 0.82
## tausq 12.58 3.42 7.35 20.71
# xtable(table6.4, digits=2)
dput(table6.4, file=paste0(filepath, "table6.4.txt"))
7 Code for Section 6.7 on Model Choice
if (longrun) {
<- Bspatial(formula=yo3~xmaxtemp+xwdsp+xrh, data=nyspatial, mchoice=T)
M1.c <- Bspatial(model="spat", formula=yo3~xmaxtemp+xwdsp+xrh, data=nyspatial,
M2.c coordtype="utm", coords=4:5, phi=0.4, mchoice=T)
<- Bspatial(package="spBayes", formula=yo3~xmaxtemp+xwdsp+xrh, data=nyspatial,
M3.c coordtype="utm", coords=4:5, prior.phi=c(0.005, 2), mchoice=T, n.report=2)
<- Bspatial(package="stan", formula=yo3~xmaxtemp+xwdsp+xrh, data=nyspatial,
M4.c coordtype="utm", coords=4:5,phi=0.4, mchoice=T)
<-Bspatial(package="inla",formula=yo3~xmaxtemp+xwdsp+xrh, data=nyspatial,
M5.c coordtype="utm", coords=4:5, mchoice=T)
## Reproduce the estimates
<- Bmchoice(case="MC.sigma2.unknown", y=ydata)
a3 <- rep(NA, 11)
a5 c(1, 3, 5, 7, 9:11)] <- unlist(M5.c$mchoice)
a5[.5 <- cbind.data.frame(unlist(a3), M1.c$mchoice, M2.c$mchoice, M3.c$mchoice, M4.c$mchoice, a5)
table6dput(table6.5, file = paste0(filepath, "table6.5.txt"))
else {
} cat("Table 6.5 has been read from the disk")
}## Table 6.5 has been read from the disk
colnames(table6.5) <- paste("M", 0:5, sep="")
round(table6.5, 2)
## M0 M1 M2 M3 M4 M5
## pdic 2.07 4.99 4.98 5.17 4.85 4.17
## pdicalt 13.58 5.17 5.16 7.83 5.13 NA
## dic 169.20 158.36 158.06 158.68 157.84 157.23
## dicalt 192.22 158.72 158.41 163.99 158.39 NA
## pwaic1 1.82 5.20 4.93 4.88 4.42 4.73
## pwaic2 2.52 6.32 5.91 6.77 5.29 NA
## waic1 168.95 158.57 157.51 158.70 156.99 158.46
## waic2 170.35 160.82 159.47 162.48 158.72 NA
## gof 593.54 327.98 330.08 323.56 320.49 334.03
## penalty 575.80 351.52 346.73 396.63 393.92 39.17
## pmcc 1169.34 679.50 676.82 720.18 714.41 373.19
# xtable(table6.5, digits=2)
8 Code for model validation
8.1 Code for Figure 6.3
<- map_data(database="state",regions="new york")
nymap head(nymap)
## long lat group order region subregion
## 1 -73.92874 40.80605 1 1 new york manhattan
## 2 -73.93448 40.78886 1 2 new york manhattan
## 3 -73.95166 40.77741 1 3 new york manhattan
## 4 -73.96312 40.75449 1 4 new york manhattan
## 5 -73.96885 40.73730 1 5 new york manhattan
## 6 -73.97458 40.72584 1 6 new york manhattan
<- c(8,11,12,14,18,21,24,28)
s <- nyspatial[-s, c("Longitude", "Latitude")]
fcoords <- nyspatial[s, c("Longitude", "Latitude")]
vcoords library(tidyr)
<- tibble(
label long = -76.1,
lat = 41.5,
label = "20 fitted (circles) & 8 \n validation (numbered) sites"
)
<- ggplot() +
vsites8 geom_polygon(data=nymap, aes(x=long, y=lat, group=group),
color="black", size = 0.6, fill=NA) +
geom_point(data =fcoords, aes(x=Longitude,y=Latitude)) +
geom_text(data=vcoords, aes(x=Longitude,y=Latitude, label=s), col=4) +
labs( title= "28 air pollution monitoring sites in New York", x="Longitude", y = "Latitude") +
geom_text(aes(label=label, x=long, y=lat), data = label, vjust = "top", hjust = "right") +
# geom_rect(mapping=aes(xmin=-80.2, xmax=-77.3, ymin=41, ymax=41.6), color="black", fill=NA) +
geom_rect(mapping=aes(xmin=-78.7, xmax=-75.8, ymin=41, ymax=41.6), color="black", fill=NA) +
::scalebar(data =nymap, dist = 100, location = "bottomleft", transform=T, dist_unit = "km",
ggsnst.dist = .05, st.size = 5, height = .06, st.bottom=T, model="WGS84") +
::north(data=nymap, location="topleft", symbol=12)
ggsn vsites8
ggsave(filename=paste0(figurepath, "8_validation_sites_nydata.png"))
## Saving 7 x 5 in image
if (longrun) {
<- c(8,11,12,14,18,21,24,28)
s <- Bspatial(model="lm", formula=yo3~xmaxtemp+xwdsp+xrh, data=nyspatial,
M1.v coordtype="utm", coords=4:5,validrows=s, mchoice=F)
<- Bspatial(model="spat", formula=yo3~xmaxtemp+xwdsp+xrh, data=nyspatial,
M2.v coordtype="utm", coords=4:5,validrows=s, mchoice=F, phi=0.4)
<- Bspatial(package="spBayes",formula=yo3~xmaxtemp+xwdsp+xrh, data=nyspatial,
M3.v coordtype="utm", coords=4:5, validrows=s, mchoice=F,
n.report=2, prior.phi=c(0.005, 2))
<- Bspatial(package="stan", formula=yo3~xmaxtemp+xwdsp+xrh, data=nyspatial,
M4.v coordtype="utm", coords=4:5,validrows=s, phi=0.4)
<- Bspatial(package="inla", formula=yo3~xmaxtemp+xwdsp+xrh, data=nyspatial,
M5.v coordtype="utm", coords=4:5, validrows=s, mchoice=F)
.6 <- cbind(unlist(M1.v$stats), unlist(M2.v$stats), unlist(M3.v$stats), unlist(M4.v$stats), unlist(M5.v$stats))
table6colnames(table6.6) <- paste("M", 1:5, sep="")
dput(table6.6, file=paste0(filepath, "table6.6.txt"))
else {
} cat("Table 6.6 has been read from the disk")
}## Table 6.6 has been read from the disk
round(table6.6, 3)
## M1 M2 M3 M4 M5
## rmse 2.447 2.400 2.428 2.423 2.386
## mae 2.135 2.015 2.043 2.035 1.983
## crps 2.891 2.885 2.891 2.199 1.944
## cvg 100.000 100.000 100.000 100.000 100.000
# xtable(table6.6[-4, ], digits=3)
9 Code to choose the phi parameter in the spatial model
<- phichoice_sp()
asave ## rmse mae crps cvg
## 2.447 2.135 2.891 100.000
## ##
## # Total time taken:: 0.84 - Sec.
## mean sd low up
## (Intercept) -45.68 121.94 -286.63 195.28
## xmaxtemp 0.65 2.95 -5.19 6.48
## xwdsp 1.23 2.93 -4.56 7.01
## xrh 18.94 15.31 -11.31 49.19
## sigma2 43.73 13.83 24.44 77.57
## rmse mae crps cvg
## 4.436 4.105 2.263 75.000
## ##
## # Total time taken:: 0.85 - Sec.
## mean sd low up
## (Intercept) -56.11 67.65 -189.78 77.56
## xmaxtemp 1.54 1.22 -0.86 3.95
## xwdsp 3.09 1.52 0.08 6.09
## xrh 12.30 11.37 -10.17 34.77
## sigma2 13.28 4.20 7.42 23.57
## rmse mae crps cvg
## 2.913 2.593 2.742 100.000
## ##
## # Total time taken:: 0.83 - Sec.
## mean sd low up
## (Intercept) -58.34 65.42 -187.61 70.93
## xmaxtemp 1.55 1.19 -0.79 3.90
## xwdsp 3.05 1.46 0.15 5.94
## xrh 12.86 10.84 -8.56 34.28
## sigma2 13.01 4.11 7.27 23.08
## rmse mae crps cvg
## 2.528 2.261 2.845 100.000
## ##
## # Total time taken:: 0.86 - Sec.
## mean sd low up
## (Intercept) -60.88 64.83 -188.98 67.22
## xmaxtemp 1.57 1.19 -0.77 3.91
## xwdsp 2.99 1.45 0.12 5.85
## xrh 13.48 10.65 -7.56 34.52
## sigma2 13.00 4.11 7.26 23.06
## rmse mae crps cvg
## 2.419 2.101 2.875 100.000
## ##
## # Total time taken:: 0.81 - Sec.
## mean sd low up
## (Intercept) -62.56 64.54 -190.09 64.97
## xmaxtemp 1.58 1.18 -0.76 3.93
## xwdsp 2.95 1.44 0.10 5.80
## xrh 13.88 10.55 -6.96 34.72
## sigma2 13.01 4.11 7.27 23.08
## rmse mae crps cvg
## 2.400 2.015 2.885 100.000
## ##
## # Total time taken:: 0.81 - Sec.
## mean sd low up
## (Intercept) -63.56 64.38 -190.77 63.66
## xmaxtemp 1.59 1.18 -0.75 3.93
## xwdsp 2.92 1.44 0.08 5.77
## xrh 14.12 10.49 -6.60 34.84
## sigma2 13.02 4.12 7.28 23.10
## rmse mae crps cvg
## 2.407 2.018 2.888 100.000
## ##
## # Total time taken:: 0.82 - Sec.
## mean sd low up
## (Intercept) -64.14 64.29 -191.18 62.91
## xmaxtemp 1.60 1.18 -0.74 3.94
## xwdsp 2.91 1.44 0.07 5.75
## xrh 14.26 10.45 -6.40 34.92
## sigma2 13.03 4.12 7.28 23.11
## rmse mae crps cvg
## 2.419 2.062 2.890 100.000
## ##
## # Total time taken:: 0.83 - Sec.
## mean sd low up
## (Intercept) -64.46 64.24 -191.41 62.48
## xmaxtemp 1.60 1.18 -0.74 3.94
## xwdsp 2.90 1.43 0.07 5.74
## xrh 14.34 10.43 -6.28 34.96
## sigma2 13.03 4.12 7.28 23.12
## rmse mae crps cvg
## 2.428 2.089 2.890 100.000
## ##
## # Total time taken:: 0.83 - Sec.
## mean sd low up
## (Intercept) -64.65 64.21 -191.54 62.24
## xmaxtemp 1.60 1.18 -0.74 3.94
## xwdsp 2.90 1.43 0.06 5.73
## xrh 14.38 10.42 -6.21 34.98
## sigma2 13.03 4.12 7.28 23.12
## rmse mae crps cvg
## 2.435 2.106 2.891 100.000
## ##
## # Total time taken:: 0.79 - Sec.
## mean sd low up
## (Intercept) -64.75 64.20 -191.61 62.11
## xmaxtemp 1.60 1.18 -0.74 3.94
## xwdsp 2.90 1.43 0.06 5.73
## xrh 14.41 10.42 -6.18 34.99
## sigma2 13.04 4.12 7.29 23.13
## rmse mae crps cvg
## 2.439 2.117 2.891 100.000
## ##
## # Total time taken:: 0.82 - Sec.
## mean sd low up
## (Intercept) -64.81 64.19 -191.65 62.03
## xmaxtemp 1.60 1.18 -0.74 3.94
## xwdsp 2.89 1.43 0.06 5.73
## xrh 14.42 10.41 -6.16 35.00
## sigma2 13.04 4.12 7.29 23.13
## rmse mae crps cvg
## 2.442 2.124 2.891 100.000
## ##
## # Total time taken:: 0.79 - Sec.
<- Bspatial(model="lm",formula=yo3~xmaxtemp+xwdsp+xrh, data=nyspatial,
lmresults coordtype="utm", coords=4:5, validrows= c(8,11,12,14,18,21,24,28))
## rmse mae crps cvg
## 2.447 2.135 2.891 100.000
## ##
## # Total time taken:: 0.81 - Sec.
<- Bspatial(model="spat", formula=yo3~xmaxtemp+xwdsp+xrh, data=nyspatial,
phi1res coordtype="utm", coords=4:5,validrows= c(8,11,12,14,18,21,24,28))
## mean sd low up
## (Intercept) -45.68 121.94 -286.63 195.28
## xmaxtemp 0.65 2.95 -5.19 6.48
## xwdsp 1.23 2.93 -4.56 7.01
## xrh 18.94 15.31 -11.31 49.19
## sigma2 43.73 13.83 24.44 77.57
## rmse mae crps cvg
## 4.436 4.105 2.263 75.000
## ##
## # Total time taken:: 0.82 - Sec.
<- Bspatial(model="spat", formula=yo3~xmaxtemp+xwdsp+xrh, data=nyspatial,
phi2res coordtype="utm", coords=4:5,validrows= c(8,11,12,14,18,21,24,28), phi=2)
## mean sd low up
## (Intercept) -64.88 64.18 -191.70 61.93
## xmaxtemp 1.60 1.18 -0.74 3.94
## xwdsp 2.89 1.43 0.06 5.72
## xrh 14.44 10.41 -6.13 35.01
## sigma2 13.04 4.12 7.29 23.13
## rmse mae crps cvg
## 2.447 2.135 2.891 100.000
## ##
## # Total time taken:: 0.79 - Sec.
<- Bspatial(model="spat", formula=yo3~xmaxtemp+xwdsp+xrh, data=nyspatial,
phihalfres coordtype="utm", coords=4:5,validrows= c(8,11,12,14,18,21,24,28), phi=0.5)
## mean sd low up
## (Intercept) -63.56 64.38 -190.77 63.66
## xmaxtemp 1.59 1.18 -0.75 3.93
## xwdsp 2.92 1.44 0.08 5.77
## xrh 14.12 10.49 -6.60 34.84
## sigma2 13.02 4.12 7.28 23.10
## rmse mae crps cvg
## 2.407 2.018 2.888 100.000
## ##
## # Total time taken:: 0.8 - Sec.
<- cbind(unlist(lmresults$stats), unlist(phihalfres$stats),
validation_results unlist(phi1res$stats), unlist(phi2res$stats))
dimnames(validation_results)[[2]] <- c("lm", "phi=0.5", "phi=1", "phi=2")
round(validation_results, 3)
## lm phi=0.5 phi=1 phi=2
## rmse 2.447 2.407 4.436 2.447
## mae 2.135 2.018 4.105 2.135
## crps 2.891 2.888 2.263 2.891
## cvg 100.000 100.000 75.000 100.000
# xtable(validation_results, digits=3)
10 Code to reproduce Table 6.7 and Figure 6.4
## K fold cross-validation for M2 only
set.seed(44)
<- runif(n=28)
x <- order(x)
u # Here are the four folds
<- u[1:7]
s1 <- u[8:14]
s2 <- u[15:21]
s3 <- u[22:28]
s4 summary((1:28) - sort(c(s1, s2, s3, s4))) ## check
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0 0 0 0 0 0
<- Bspatial(model="spat", formula=yo3~xmaxtemp+xwdsp+xrh, data=nyspatial,
v1 coordtype="utm", coords=4:5,validrows= s1, phi=0.4, mchoice=F)
## mean sd low up
## (Intercept) -82.15 65.60 -211.74 47.43
## xmaxtemp 2.23 1.28 -0.31 4.76
## xwdsp 2.43 1.59 -0.71 5.56
## xrh 15.04 10.14 -5.00 35.08
## sigma2 12.69 3.92 7.18 22.24
## rmse mae crps cvg
## 2.441 1.789 2.085 100.000
## ##
## # Total time taken:: 0.72 - Sec.
<- Bspatial(model="spat", formula=yo3~xmaxtemp+xwdsp+xrh, data=nyspatial,
v2 coordtype="utm", coords=4:5,validrows= s2, phi=0.4, mchoice=F)
## mean sd low up
## (Intercept) 58.16 82.58 -104.97 221.29
## xmaxtemp 0.11 1.43 -2.72 2.94
## xwdsp 4.67 1.34 2.02 7.31
## xrh -9.87 13.26 -36.06 16.31
## sigma2 10.42 3.22 5.90 18.27
## rmse mae crps cvg
## 5.005 3.545 2.077 85.714
## ##
## # Total time taken:: 0.74 - Sec.
<- Bspatial(model="spat", formula=yo3~xmaxtemp+xwdsp+xrh, data=nyspatial,
v4 coordtype="utm", coords=4:5,validrows= s4, phi=0.4, mchoice=F)
## mean sd low up
## (Intercept) -52.56 60.81 -172.69 67.58
## xmaxtemp 1.56 1.14 -0.70 3.83
## xwdsp 3.43 1.57 0.33 6.53
## xrh 10.55 10.03 -9.25 30.36
## sigma2 12.46 3.85 7.05 21.85
## rmse mae crps cvg
## 2.508 2.145 2.072 100.000
## ##
## # Total time taken:: 0.74 - Sec.
<- Bspatial(model="spat", formula=yo3~xmaxtemp+xwdsp+xrh, data=nyspatial,
v3 coordtype="utm", coords=4:5, validrows= s3, phi=0.4, mchoice=F)
## mean sd low up
## (Intercept) -87.61 38.21 -163.10 -12.12
## xmaxtemp 1.76 0.73 0.32 3.21
## xwdsp 3.10 0.84 1.44 4.76
## xrh 19.09 5.99 7.25 30.92
## sigma2 4.65 1.43 2.63 8.15
## rmse mae crps cvg
## 5.865 5.462 1.228 57.143
## ##
## # Total time taken:: 0.72 - Sec.
ggsave(file=paste0(figurepath, "fold3_obs_v_pred_nydata.png"))
## Saving 7 x 5 in image
.7 <- cbind(unlist(v1$stats), unlist(v2$stats), unlist(v3$stats), unlist(v4$stats))
table6
dimnames(table6.7)[[2]] <- paste("Fold", 1:4, sep="")
round(table6.7, 3)
## Fold1 Fold2 Fold3 Fold4
## rmse 2.441 5.005 5.865 2.508
## mae 1.789 3.545 5.462 2.145
## crps 2.085 2.077 1.228 2.072
## cvg 100.000 85.714 57.143 100.000
# xtable(table6.7, digits=3)
dput(table6.7, file=paste0(filepath, "table6.7.txt"))
## Use v3 to demonstrate posterior predictive checks
library(bayesplot)
## This is bayesplot version 1.8.1
## - Online documentation and vignettes at mc-stan.org/bayesplot
## - bayesplot theme set to bayesplot::theme_default()
## * Does _not_ affect other ggplot2 plots
## * See ?bayesplot_theme_set for details on theme setting
color_scheme_set("brightblue")
<- v2$yobs_preds$yo3
y1 dim(v2$valpreds)
## [1] 5000 7
<- v2$valpreds
ypred
ppc_dens_overlay(y1, ypred)
if (longrun) ggsave(filename = paste0(figurepath, "ppc_sp.png"))
ppc_stat(y1, ypred, stat = "max", binwidth = 0.5)
# ppc_scatter(y1, ypred)
# available_ppc()
if (longrun) ggsave(filename = paste0(figurepath, "ppc_sp_max.png"))
# All done
<- proc.time()[3]
end.time <- (end.time-start.time)/60
comp.time# comp.time<-fancy.time(comp.time)
print(comp.time)
## elapsed
## 1.293167