Code to reproduce Figure3.1
summary(nyspatial)
## s.index Longitude Latitude utmx
## Min. : 1.00 Min. :-79.59 Min. :40.74 Min. :121813
## 1st Qu.: 7.75 1st Qu.:-75.99 1st Qu.:41.34 1st Qu.:420017
## Median :14.50 Median :-74.08 Median :42.54 Median :576087
## Mean :14.50 Mean :-75.01 Mean :42.38 Mean :500016
## 3rd Qu.:21.25 3rd Qu.:-73.76 3rd Qu.:43.23 3rd Qu.:602346
## Max. :28.00 Max. :-72.71 Max. :44.39 Max. :692461
## utmy yo3 xmaxtemp xwdsp
## Min. :4510117 Min. :39.18 Min. :26.05 Min. :4.030
## 1st Qu.:4577789 1st Qu.:44.92 1st Qu.:26.53 1st Qu.:4.475
## Median :4710883 Median :47.24 Median :27.31 Median :5.000
## Mean :4693783 Mean :47.88 Mean :27.54 Mean :5.095
## 3rd Qu.:4789464 3rd Qu.:51.41 3rd Qu.:28.57 3rd Qu.:5.743
## Max. :4916157 Max. :57.60 Max. :29.70 Max. :6.146
## xrh
## Min. :3.455
## 1st Qu.:3.687
## Median :3.725
## Mean :3.742
## 3rd Qu.:3.868
## Max. :3.950
p <- ggplot(nyspatial, aes(x=yo3)) +
geom_histogram(binwidth=4.5, color="black", fill="white") +
labs(x="Average daily ozone concentration", y = "count")
p
ggsave(filename=paste0(figurepath, "hist_nyspatial.png"))
## Saving 7 x 5 in image
p <- ggpairs(nyspatial, columns=6:9)
p
ggsave(filename=paste0(figurepath, "pairs_nyspatial.png"))
## Saving 7 x 5 in image
a <- bmstdr_variogram(data=nyspatial, formula = yo3~utmx + utmy, coordtype="utm", nb=50)
## `geom_smooth()` using formula 'y ~ x'
names(a)
## [1] "cloud" "variogram" "cloudplot" "variogramplot"
ggarrange(a$cloudplot, a$variogramplot, legend = "none", nrow = 1, ncol = 2)
## `geom_smooth()` using formula 'y ~ x'
ggsave(file=paste0(figurepath, "variogram_nyspatial.png"))
## Saving 7 x 5 in image
## Check variogram using the geoR package
head(nyspatial)
## s.index Longitude Latitude utmx utmy yo3 xmaxtemp xwdsp
## 1 1 -73.757 42.681 601838.1 4726140 45.08065 27.53184 4.142083
## 2 2 -73.881 40.866 594300.8 4524485 48.34758 29.60294 5.666202
## 3 3 -79.587 42.291 121812.7 4692285 54.65677 26.28780 5.742751
## 4 4 -76.802 42.111 351019.8 4663672 44.95774 27.44725 4.842872
## 5 5 -73.743 41.782 604457.3 4626336 43.92903 28.27555 4.427053
## 6 6 -78.771 42.993 192581.3 4766941 55.37726 26.61813 6.146428
## xrh
## 1 3.686737
## 2 3.478130
## 3 3.950145
## 4 3.699996
## 5 3.698446
## 6 3.869971
y <- as.geodata(nyspatial, coords.col=4:5, data.col=6, covar.col=7:9)
vgram <- variog(y, estimator.type="classical")
## variog: computing omnidirectional variogram
vgram.robust <- variog(y, estimator.type="modulus")
## variog: computing omnidirectional variogram
plot(vgram)
krig.fit1<- Krig(x=nyspatial[, c("utmx", "utmy")], Y=nyspatial$yo3, Z=as.matrix(nyspatial[, 7:9]))
summary(krig.fit1)
## CALL:
## Krig(x = nyspatial[, c("utmx", "utmy")], Y = nyspatial$yo3, Z = as.matrix(nyspatial[,
## 7:9]))
##
## Number of Observations: 28
## Number of unique points: 28
## Number of parameters in the null space 6
## Parameters for fixed spatial drift 3
## Effective degrees of freedom: 13.6
## Residual degrees of freedom: 14.4
## MLE tau 2.649
## GCV tau 2.989
## MLE sigma 3.672
## Scale passed for covariance (sigma) <NA>
## Scale passed for nugget (tau^2) <NA>
## Smoothing parameter lambda 1.911
##
## Residual Summary:
## min 1st Q median 3rd Q max
## -5.28400 -1.10700 0.09712 1.29800 4.71800
##
## Covariance Model: stationary.cov
## Covariance function is
## Names of non-default covariance arguments:
##
##
## DETAILS ON SMOOTHING PARAMETER:
## Method used: REML Cost: 1
## lambda trA GCV GCV.one GCV.model tauHat
## 1.911 13.557 17.318 17.318 NA 2.989
##
## Summary of all estimates found for lambda
## lambda trA GCV tauHat -lnLike Prof converge
## GCV 0.06797 26.60 17.32 0.9306 59.93 NA
## GCV.model NA NA NA NA NA NA
## GCV.one 0.06797 26.60 17.32 0.9306 NA NA
## RMSE NA NA NA NA NA NA
## pure error NA NA NA NA NA NA
## REML 1.91130 13.56 17.32 2.9888 59.93 1
predsurface <- predict(krig.fit1, x=gridnyspatial[, c("utmx", "utmy")], Z=as.matrix(gridnyspatial[, 6:8]))
ksurf <- gridnyspatial[, c("Longitude","Latitude")]
ksurf$krig <- predsurface ## u has the x, y, z
coord <- nyspatial[, c("Longitude","Latitude")]
xo <- seq(from=min(coord$Longitude)-0.5, to = max(coord$Longitude)+0.8, length=200)
yo <- seq(from=min(coord$Latitude)-0.25, to = max(coord$Latitude)+0.8, length=200)
surf <- interp(ksurf$Longitude, ksurf$Latitude, ksurf$krig, xo=xo, yo=yo)
names(surf)
## [1] "x" "y" "z"
surf <- list(x=surf$x, y=surf$y, z=surf$z)
v <- fnc.delete.map.XYZ(xyz=surf)
interp1 <- data.frame(long = v$x, v$z )
names(interp1)[1:length(v$y)+1] <- v$y
interp1 <- gather(interp1,key = lat,value =Kriged,-long,convert = TRUE)
nymap <- map_data(database="state",regions="new york")
maplonglat <- cbind(nymap$long, nymap$lat)
com <- rev(c("firebrick4","firebrick2","white","dodgerblue2","dodgerblue4"))#colour palette
zr <- range(interp1$Kriged, na.rm=T)
P <- ggplot() +
geom_raster(data=interp1, aes(x = long, y = lat,fill = Kriged)) +
geom_polygon(data=nymap, aes(x=long, y=lat, group=group), color="black", size = 0.6, fill=NA) +
geom_point(data=coord, aes(x=Longitude,y=Latitude)) +
stat_contour(data=na.omit(interp1), aes(x = long, y = lat,z = Kriged), colour = "black", binwidth =0.4) +
scale_fill_gradientn(colours=com, na.value="gray95", limits=zr) +
theme(axis.text = element_blank(), axis.ticks = element_blank()) +
ggsn::scalebar(data =interp1, dist = 100, location = "bottomleft", transform=T, dist_unit = "km",
st.dist = .05, st.size = 5, height = .06, st.bottom=T, model="WGS84") +
ggsn::north(data=interp1, location="topleft", symbol=12) +
labs(title= "Kriged map of ozone air pollution in New York", x="Longitude", y = "Latitude", size=2.5)
P
ggsave(file=paste(figurepath, "kriged_ny.png", sep=""))
## Saving 7 x 5 in image
Code for exploring spatio-temporal data
fun_mean_var <- function(x) {
c(mean = mean(x, na.rm=T), s2 = var(x, na.rm=T))
}
a <- nysptime
a$s.index <- as.factor(a$s.index)
a$sqrty <- sqrt(a$y8hrmax)
sum_orig <- summaryBy(y8hrmax ~ s.index, data = a, FUN = fun_mean_var)
sum_sqrt <- summaryBy(sqrty ~ s.index, data = a, FUN = fun_mean_var)
sum_orig$s.index <- as.factor(sum_orig$s.index)
sum_sqrt$s.index <- as.factor(sum_sqrt$s.index)
p1 <- ggplot(sum_orig, aes(x=y8hrmax.mean, y=y8hrmax.s2))+
geom_smooth(method="lm", se=F) +
geom_point(size=1) +
theme(legend.position = NULL, legend.key.size = unit(0.5, "cm")) +
labs( title= "Original scale", x="mean", y="variance")
p1
## `geom_smooth()` using formula 'y ~ x'
p2 <- ggplot(sum_sqrt, aes(x=sqrty.mean, y=sqrty.s2))+
geom_point(size=1) +
geom_smooth(method="lm", se=F) +
theme(legend.position = NULL, legend.key.size = unit(0.5, "cm")) +
labs( title= "Square root scale", x="mean", y="variance")
p2
## `geom_smooth()` using formula 'y ~ x'
ggarrange(p1, p2, nrow = 1)
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
ggsave(file=paste0(figurepath, "mean_variance_nysptime.png"), width=12, height = 6, units="cm")
a <- nysptime
a$time <- rep(1:62, 28)
a$s.index <- as.factor(a$s.index)
head(a)
sp <- ggplot(data=a, aes(x=time, y=y8hrmax, group=s.index, col=s.index)) +
geom_line() +
theme(legend.position="none")
sp
ggsave(file=paste0(figurepath, "timeseriesplot_nysptime.png"))
p <- ggplot(data=na.omit(nysptime), aes(x=as.factor(s.index), y=y8hrmax)) +
geom_boxplot(outlier.colour="red", outlier.shape=8,
outlier.size=2) +
labs(title= "Site wise boxplots", x="Site", y = "Daily ozone", size=2.5)
p
ggsave(filename=paste0(figurepath, "boxplots_sptime.png"))
s <- c(8,11,12,14,18,21,24,28)
a <- nysptime
a$time <- rep(1:62, 28)
a$s.index <- as.factor(a$s.index)
vdat <- a[which(nysptime$s.index%in%s), ]
np <- ggplot(data=vdat, aes(x=time, y=y8hrmax,shape=s.index, col=s.index)) +
geom_point() +
geom_line() +
# facet_wrap(~ s.index, ncol=4) +
labs(x="Day", y = "Daily ozone", size=2.5)
np
## Warning: The shape palette can deal with a maximum of 6 discrete values because
## more than 6 becomes difficult to discriminate; you have 8. Consider
## specifying shapes manually if you must have them.
## Warning: Removed 127 rows containing missing values (geom_point).
## Warning: Removed 5 row(s) containing missing values (geom_path).
ggsave(filename=paste0(figurepath, "validation_time_series.png"))
## Warning: The shape palette can deal with a maximum of 6 discrete values because
## more than 6 becomes difficult to discriminate; you have 8. Consider
## specifying shapes manually if you must have them.
## Warning: Removed 127 rows containing missing values (geom_point).
## Warning: Removed 5 row(s) containing missing values (geom_path).
phist <- ggplot(nysptime, aes(x=sqrt(y8hrmax))) +
geom_histogram(binwidth=0.5, color="black", fill="white") +
labs(x="Square-root ozone", y = "count")
phist
## Warning: Removed 24 rows containing non-finite values (stat_bin).
a <- nysptime
a$Month <- as.factor(a$Month)
## max temperature
ptmp <- ggplot(a, aes(x=xmaxtemp, y=sqrt(y8hrmax), color=Month)) +
geom_point(shape=16, size=0.5) +
stat_smooth(method = "lm", col = "black")+
labs(x = "Maximum temperature", y = "Square root ozone")+
# geom_text(aes(label=s.index), hjust = -0.7, show.legend = F)+
theme(legend.position = c(0.1, 0.85), legend.key.size = unit(0.2, "cm"))
ptmp
## Warning: Removed 24 rows containing non-finite values (stat_smooth).
## Warning: Removed 24 rows containing missing values (geom_point).
pwdsp <- ggplot(a, aes(x=xwdsp, y=sqrt(y8hrmax), color=Month)) +
geom_point(shape=16, size=0.5) +
stat_smooth(method = "lm", col = "black")+
labs(x = "Wind speed", y = "Square root ozone")+
# geom_text(aes(label=s.index), hjust = -0.7, show.legend = F)+
theme(legend.position =c(0.1, 0.85), legend.key.size = unit(0.2, "cm"))
pwdsp
## Warning: Removed 24 rows containing non-finite values (stat_smooth).
## Warning: Removed 24 rows containing missing values (geom_point).
prh <- ggplot(a, aes(x=xrh, y=sqrt(y8hrmax), color=Month)) +
geom_point(shape=16, size=0.5) +
stat_smooth(method = "lm", col = "black")+
labs(x = "Relative humidity", y = "Square root ozone")+
# geom_text(aes(label=s.index), hjust = -0.7, show.legend = F)+
theme(legend.position =c(0.85, 0.85), legend.key.size = unit(0.2, "cm"))
prh
## Warning: Removed 24 rows containing non-finite values (stat_smooth).
## Warning: Removed 24 rows containing missing values (geom_point).
ggarrange(phist, ptmp, pwdsp, prh, nrow = 2, ncol=2)
## Warning: Removed 24 rows containing non-finite values (stat_bin).
## Warning: Removed 24 rows containing non-finite values (stat_smooth).
## Warning: Removed 24 rows containing missing values (geom_point).
## Warning: Removed 24 rows containing non-finite values (stat_smooth).
## Warning: Removed 24 rows containing missing values (geom_point).
## Warning: Removed 24 rows containing non-finite values (stat_smooth).
## Warning: Removed 24 rows containing missing values (geom_point).
ggsave(file=paste0(figurepath, "nysptime_edaplots.png"), width=12, height = 8, units="cm")
EDA for areal data
colnames(engtotals)
## [1] "Areacode" "mapid" "spaceid" "Region"
## [5] "popn" "jsa" "houseprice" "popdensity"
## [9] "no2" "covid" "allcause" "noofcases"
## [13] "Edeaths" "Ecases" "logEdeaths" "logEcases"
## [17] "casesmr" "nweek" "noofhighweeks"
sum(engtotals$covid)
## [1] 49292
## Moran's I
set.seed(44)
summary(engtotals$covid)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 4.0 77.0 118.0 157.5 200.0 1223.0
covidrate <- engtotals$covid/engtotals$popn*100000
load(file=paste0(mappath, "/englad.rda"))
nbhood <- poly2nb(englad)
Wlist <- nb2listw(nbhood, style = "B", zero.policy = T)
moran.mc(engtotals$covid, Wlist, nsim=1000, zero.policy=T)
##
## Monte-Carlo simulation of Moran I
##
## data: engtotals$covid
## weights: Wlist
## number of simulations + 1: 1001
##
## statistic = 0.33891, observed rank = 1001, p-value = 0.000999
## alternative hypothesis: greater
geary.mc(engtotals$covid, Wlist, nsim=1000, zero.policy=T)
##
## Monte-Carlo simulation of Geary C
##
## data: engtotals$covid
## weights: Wlist
## number of simulations + 1: 1001
##
## statistic = 0.80762, observed rank = 29, p-value = 0.02897
## alternative hypothesis: greater
## For the covid rate per 100k
moran.mc(covidrate, Wlist, nsim=1000, zero.policy=T)
##
## Monte-Carlo simulation of Moran I
##
## data: covidrate
## weights: Wlist
## number of simulations + 1: 1001
##
## statistic = 0.44945, observed rank = 1001, p-value = 0.000999
## alternative hypothesis: greater
geary.mc(covidrate, Wlist, nsim=1000, zero.policy=T)
##
## Monte-Carlo simulation of Geary C
##
## data: covidrate
## weights: Wlist
## number of simulations + 1: 1001
##
## statistic = 0.52418, observed rank = 1, p-value = 0.000999
## alternative hypothesis: greater
head(engtotals)
## Areacode mapid spaceid Region popn jsa houseprice popdensity no2
## 1 E06000001 0 1 NorthEast 93663 0.6 127000 999 6.092857
## 2 E06000002 1 2 NorthEast 140980 1.1 135000 2616 9.437047
## 3 E06000003 2 3 NorthEast 137150 1.0 132000 560 8.686981
## 4 E06000004 3 4 NorthEast 197348 0.8 148000 963 9.301667
## 5 E06000005 4 5 NorthEast 106803 0.8 141000 541 8.686981
## 6 E06000006 5 6 NorthWest 129410 0.2 146000 1636 18.064286
## covid allcause noofcases Edeaths Ecases logEdeaths logEcases casesmr
## 1 110 477 616 101.1573 425.0841 4.616676 6.052287 1.449125
## 2 208 765 971 162.2333 639.8295 5.089036 6.461202 1.517592
## 3 134 710 711 150.5695 622.4473 5.014425 6.433659 1.142265
## 4 153 807 980 171.1403 895.6524 5.142483 6.797552 1.094174
## 5 90 522 611 110.7004 484.7192 4.706827 6.183570 1.260524
## 6 129 583 699 123.6366 587.3197 4.817347 6.375569 1.190152
## nweek noofhighweeks
## 1 20 9
## 2 20 8
## 3 20 8
## 4 20 7
## 5 20 6
## 6 20 9
dim(engtotals)
## [1] 313 19
head(englamap)
## id long lat order hole piece group Areacode
## 1 0 447097.0 537152.0 1 FALSE 1 0.1 E06000001
## 2 0 447228.8 537033.4 2 FALSE 1 0.1 E06000001
## 3 0 447280.7 537120.1 3 FALSE 1 0.1 E06000001
## 4 0 447378.1 537095.1 4 FALSE 1 0.1 E06000001
## 5 0 447455.3 537023.7 5 FALSE 1 0.1 E06000001
## 6 0 447550.5 537078.1 6 FALSE 1 0.1 E06000001
bdf <- merge(englamap, engtotals, by.x="id", by.y="mapid", all.y=T, all.x=F)
head(bdf)
## id long lat order hole piece group Areacode.x Areacode.y spaceid
## 1 0 447097.0 537152.0 1 FALSE 1 0.1 E06000001 E06000001 1
## 2 0 447228.8 537033.4 2 FALSE 1 0.1 E06000001 E06000001 1
## 3 0 447280.7 537120.1 3 FALSE 1 0.1 E06000001 E06000001 1
## 4 0 447378.1 537095.1 4 FALSE 1 0.1 E06000001 E06000001 1
## 5 0 447455.3 537023.7 5 FALSE 1 0.1 E06000001 E06000001 1
## 6 0 447550.5 537078.1 6 FALSE 1 0.1 E06000001 E06000001 1
## Region popn jsa houseprice popdensity no2 covid allcause noofcases
## 1 NorthEast 93663 0.6 127000 999 6.092857 110 477 616
## 2 NorthEast 93663 0.6 127000 999 6.092857 110 477 616
## 3 NorthEast 93663 0.6 127000 999 6.092857 110 477 616
## 4 NorthEast 93663 0.6 127000 999 6.092857 110 477 616
## 5 NorthEast 93663 0.6 127000 999 6.092857 110 477 616
## 6 NorthEast 93663 0.6 127000 999 6.092857 110 477 616
## Edeaths Ecases logEdeaths logEcases casesmr nweek noofhighweeks
## 1 101.1573 425.0841 4.616676 6.052287 1.449125 20 9
## 2 101.1573 425.0841 4.616676 6.052287 1.449125 20 9
## 3 101.1573 425.0841 4.616676 6.052287 1.449125 20 9
## 4 101.1573 425.0841 4.616676 6.052287 1.449125 20 9
## 5 101.1573 425.0841 4.616676 6.052287 1.449125 20 9
## 6 101.1573 425.0841 4.616676 6.052287 1.449125 20 9
bdf$smr <- bdf$covid/bdf$Edeaths
summary(bdf$smr)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.1264 0.5625 0.8216 0.8040 1.0217 2.0711
psmr <- ggplot(data=bdf, aes(x=long, y=lat, group = group, fill=smr)) +
scale_fill_gradientn(colours=colpalette, na.value="black",limits=c(0.1, 2.1)) +
geom_polygon(colour='black',size=0.25) +
geom_polygon(data=engregmap, aes(x=long, y=lat, group = group), fill=NA, colour='black',size=0.6) +
coord_equal() + guides(fill=guide_colorbar(title="SMR")) +
theme_bw()+theme(text=element_text(family="Times")) +
labs(title= "SMR of covid deaths during Mar 13 to July 31, 2020", x="", y = "", size=2.5) +
theme(axis.text.x = element_blank(), axis.text.y = element_blank(),axis.ticks = element_blank()) +
theme(legend.position =c(0.2, 0.5)) +
ggsn::scalebar(data=bdf, dist =50, location = "topleft", transform=F, dist_unit = "km",
st.dist = .05, st.size =4, height = .06, st.bottom=T)
psmr
ggsave(filename=paste0(figurepath, "smr_covid_death.png"))
summary(bdf$popdensity)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 25.0 142.0 235.0 739.5 575.0 16427.0
a <- log(range(bdf$popdensity))
ppop <- ggplot(data=bdf, aes(x=long, y=lat, group = group, fill=log(popdensity))) +
scale_fill_gradientn(colours=colpalette, na.value="black",limits=a) +
geom_polygon(colour='black',size=0.25) +
geom_polygon(data=engregmap, aes(x=long, y=lat, group = group), fill=NA, colour='black',size=0.6) +
coord_equal() + guides(fill=guide_colorbar(title="Log pop")) +
theme_bw()+theme(text=element_text(family="Times")) +
labs(title= "Log population density in 2019", x="", y = "", size=2.5) +
theme(axis.text.x = element_blank(), axis.text.y = element_blank(),axis.ticks = element_blank()) +
theme(legend.position =c(0.2, 0.5)) +
ggsn::scalebar(data=bdf, dist =50, location = "topleft", transform=F, dist_unit = "km",
st.dist = .05, st.size =4, height = .06, st.bottom=T)
ppop
ggsave(filename=paste0(figurepath, "log_pop_density.png"))
summary(bdf$jsa)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.100 0.200 0.200 0.298 0.300 1.500
a <- range(bdf$jsa)
pjsa <- ggplot(data=bdf, aes(x=long, y=lat, group = group, fill=jsa)) +
scale_fill_gradientn(colours=colpalette, na.value="black",limits=a) +
geom_polygon(colour='black',size=0.25) +
geom_polygon(data=engregmap, aes(x=long, y=lat, group = group), fill=NA, colour='black',size=0.6) +
coord_equal() + guides(fill=guide_colorbar(title="JSA %")) +
theme_bw()+theme(text=element_text(family="Times")) +
labs(title= "Percentage of people receiving job seeker's allowance", x="", y = "", size=2.5) +
theme(axis.text.x = element_blank(), axis.text.y = element_blank(),axis.ticks = element_blank()) +
theme(legend.position =c(0.2, 0.5)) +
ggsn::scalebar(data=bdf, dist =50, location = "topleft", transform=F, dist_unit = "km",
st.dist = .05, st.size =4, height = .06, st.bottom=T)
pjsa
ggsave(filename=paste0(figurepath, "england_jsa_dec_2019.png"))
summary(log10(bdf$houseprice))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 4.987 5.301 5.380 5.384 5.462 6.114
a <- range(log10(bdf$houseprice))
pprice <- ggplot(data=bdf, aes(x=long, y=lat, group = group, fill=log10(houseprice))) +
scale_fill_gradientn(colours=colpalette, na.value="black",limits=a) +
geom_polygon(colour='black',size=0.25) +
geom_polygon(data=engregmap, aes(x=long, y=lat, group = group), fill=NA, colour='black',size=0.6) +
coord_equal() + guides(fill=guide_colorbar(title="log10 price")) +
theme_bw()+theme(text=element_text(family="Times")) +
labs(title= "Log (base 10) median house price in March 2020", x="", y = "", size=2.5) +
theme(axis.text.x = element_blank(), axis.text.y = element_blank(),axis.ticks = element_blank()) +
theme(legend.position =c(0.2, 0.5)) +
ggsn::scalebar(data=bdf, dist =50, location = "topleft", transform=F, dist_unit = "km",
st.dist = .05, st.size =4, height = .06, st.bottom=T)
pprice
ggsave(filename=paste0(figurepath, "england_house_price_mar_2020.png"))
# ggarrange(psmr, ppop, pjsa, pprice, nrow = 2, ncol=2)
# ggsave(file=paste0(figurepath, "endeaths_socio_eco.png"), width=12, height =12, units="cm")
summary(bdf$noofcases)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 54 330 547 788 892 5260
bdf$caserate <- 100000 * bdf$noofcases/(bdf$popn*20) # 20 weeks
summary(bdf$caserate)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 3.955 14.312 18.249 19.722 24.656 70.591
a <- range(bdf$caserate)
p32 <- ggplot(data=bdf, aes(x=long, y=lat, group = group, fill=caserate)) +
scale_fill_gradientn(colours=colpalette, na.value="black",limits=a) +
geom_polygon(colour='black',size=0.25) +
geom_polygon(data=engregmap, aes(x=long, y=lat, group = group), fill=NA, colour='black',size=0.6) +
coord_equal() + guides(fill=guide_colorbar(title="Case rate")) +
theme_bw()+theme(text=element_text(family="Times")) +
labs(title= "Weekly cases per 100k during Mar 13 to Jul 31, 2020", x="", y = "", size=2.5) +
theme(axis.text.x = element_blank(), axis.text.y = element_blank(),axis.ticks = element_blank()) +
theme(legend.position =c(0.2, 0.5)) +
ggsn::scalebar(data=bdf, dist =50, location = "topleft", transform=F, dist_unit = "km",
st.dist = .05, st.size =4, height = .06, st.bottom=T)
p32
ggsave(filename=paste0(figurepath, "england_case_rate.png"))
engdeaths$caserate <- 100000*engdeaths$noofcases/engdeaths$popn
summary(engdeaths$caserate)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.000 4.503 13.847 21.711 33.030 277.409
ctime <- ggplot(data=engdeaths, aes(x=factor(Weeknumber), y=caserate)) +
geom_boxplot() +
labs(#title="Boxplot of square-root precipitation against months by year",
x = "Week", y = "No of cases per 100,000") +
stat_summary(fun=median, geom="line", aes(group=1, col="red")) +
theme(legend.position = "none")
ctime
ggsave(filename =paste0(figurepath, "eng_boxplot_case_rate.png"))
colnames(engdeaths)
## [1] "Areacode" "mapid" "spaceid" "Region"
## [5] "popn" "jsa" "houseprice" "popdensity"
## [9] "startdate" "Weeknumber" "no2" "covid"
## [13] "allcause" "noofcases" "lag1noofcases" "lag2noofcases"
## [17] "lag3noofcases" "lag4noofcases" "n0" "n1"
## [21] "n2" "n3" "n4" "Edeaths"
## [25] "Ecases" "logEdeaths" "logEcases" "highdeathsmr"
## [29] "caserate"
engdeaths$deathrate <- engdeaths$covid * 100000/engdeaths$popn
engdeaths$caserate <- engdeaths$noofcases * 100000/engdeaths$popn
u <- engdeaths[, c("Weeknumber", "deathrate", "caserate", "no2")]
head(u)
## Weeknumber deathrate caserate no2
## 1 11 0.000000 2.135315 8.285714
## 2 12 1.067657 4.270630 12.000000
## 3 13 1.067657 20.285492 6.142857
## 4 14 5.338287 53.382873 7.142857
## 5 15 14.947204 48.044585 8.571429
## 6 16 10.676575 105.698088 2.428571
wkmeans <- aggregate.data.frame(u[, -1], by=list(Week=u[,1]), FUN=mean)
head(wkmeans)
## Week deathrate caserate no2
## 1 11 0.06751427 6.801892 18.89352
## 2 12 0.57516468 19.377759 20.83553
## 3 13 2.82722575 37.082191 12.63644
## 4 14 7.97997210 45.369661 18.88982
## 5 15 12.90445264 41.083810 17.69912
## 6 16 13.30915947 47.370480 12.46674
deathcolor <- "#FF0000"
casecolor <- rgb(0.2, 0.6, 0.9, 1)
twoplots <- ggplot(data=wkmeans, aes(x=Week)) +
geom_bar( aes(y=caserate), stat="identity", size=.1, fill=casecolor, color="blue", alpha=.4) +
geom_line(aes(y=deathrate), size=1, color=deathcolor) +
scale_y_continuous(
# Features of the first axis
name = "Deathrate per 100,000",
# Add a second axis and specify its features
sec.axis = sec_axis(~.*1, name="Caserate per 100,000")
) +
theme(
axis.title.y = element_text(color = deathcolor, size=13),
axis.title.y.right = element_text(color = casecolor, size=13)
) +
ggtitle("Weekly death (left axis) and case (right axis) rate per 100,000")
twoplots
pdf(file=paste0(figurepath, "death_case_by_week.pdf"))
twoplots
dev.off()
## png
## 2
summary(bdf$noofcases/bdf$Ecases)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.1743 0.6307 0.8042 0.8691 1.0865 3.1108
bdf$casesmr <- bdf$noofcases/bdf$Ecases
summary(bdf$casesmr)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.1743 0.6307 0.8042 0.8691 1.0865 3.1108
a <- range(bdf$casesmr)
p31 <- ggplot(data=bdf, aes(x=long, y=lat, group = group, fill=casesmr)) +
scale_fill_gradientn(colours=colpalette, na.value="black",limits=a) +
geom_polygon(colour='black',size=0.25) +
geom_polygon(data=engregmap, aes(x=long, y=lat, group = group), fill=NA, colour='black',size=0.6) +
coord_equal() + guides(fill=guide_colorbar(title="Case SMR")) +
theme_bw()+theme(text=element_text(family="Times")) +
labs(title= "Standardised case morbidity rate", x="", y = "", size=2.5) +
theme(axis.text.x = element_blank(), axis.text.y = element_blank(),axis.ticks = element_blank()) +
theme(legend.position =c(0.2, 0.5)) +
ggsn::scalebar(data=bdf, dist =50, location = "topleft", transform=F, dist_unit = "km",
st.dist = .05, st.size =4, height = .06, st.bottom=T)
p31
ggsave(filename=paste0(figurepath, "england_case_smr.png"))
no2eobs <- ggplot(data=bdf, aes(x=long, y=lat, group = group, fill=no2)) +
scale_fill_gradientn(colours=colpalette, na.value="black",limits=range(bdf$no2)) +
geom_polygon(colour='black',size=0.25) +
geom_polygon(data=engregmap, aes(x=long, y=lat, group = group), fill=NA, colour='black',size=0.6) +
coord_equal() + guides(fill=guide_colorbar(title="Mean NO2")) +
theme_bw()+theme(text=element_text(family="Times")) +
labs(title= "Average NO2 levels", x="", y = "", size=2.5) +
theme(axis.text.x = element_blank(), axis.text.y = element_blank(),axis.ticks = element_blank()) +
ggsn::scalebar(data=bdf, dist =50, location = "topleft", transform=F, dist_unit = "km",
st.dist = .05, st.size =4, height = .06, st.bottom=T) +
theme(legend.position =c(0.2, 0.5)) +
geom_point(data=aurnsites, aes(x=Easting, y=Northing), inherit.aes = FALSE, size = 1, colour = "black")
no2eobs
ggsave(filename=paste0(figurepath, "mean_no2_by_LA.png"))
pno2time <- ggplot(data=engdeaths, aes(x=factor(Weeknumber), y=no2)) + geom_boxplot() +
# facet_wrap(~year, ncol=5) +
theme(legend.position = "none") +
labs(#title="Boxplot of square-root precipitation against months by year",
x = "Week", y = "NO2 levels") +
stat_summary(fun=median, geom="line", aes(group=1, col="0"))
pno2time
ggsave(filename=paste0(figurepath, "no2_boxplots.png"))
p51 <- ggplot(data=wkmeans, aes(x=Week)) +
geom_bar( aes(y=caserate/2.5), stat="identity", size=.1, fill=casecolor, color="blue", alpha=.4) +
geom_line(aes(y=no2), size=1, color=deathcolor) +
scale_y_continuous(
# Features of the first axis
name = "Average NO2 levels",
# Add a second axis and specify its features
sec.axis = sec_axis(~.*2.5, name="Caserate per 100,000")
) +
# theme_ipsum() +
theme(
axis.title.y = element_text(color = deathcolor, size=13),
axis.title.y.right = element_text(color = casecolor, size=13)
) +
ggtitle("Weekly NO2 levels (left) and case rate per 100,000 (right)")
p51
pdf(file=paste0(figurepath, "no2_case_by_week.pdf"))
p51
dev.off()
## png
## 2
head(engdeaths)
## Areacode mapid spaceid Region popn jsa houseprice popdensity startdate
## 1 E06000001 0 1 NorthEast 93663 0.6 127000 999 2020-03-13
## 2 E06000001 0 1 NorthEast 93663 0.6 127000 999 2020-03-20
## 3 E06000001 0 1 NorthEast 93663 0.6 127000 999 2020-03-27
## 4 E06000001 0 1 NorthEast 93663 0.6 127000 999 2020-04-03
## 5 E06000001 0 1 NorthEast 93663 0.6 127000 999 2020-04-10
## 6 E06000001 0 1 NorthEast 93663 0.6 127000 999 2020-04-17
## Weeknumber no2 covid allcause noofcases lag1noofcases lag2noofcases
## 1 11 8.285714 0 18 2 0 0
## 2 12 12.000000 1 24 4 2 0
## 3 13 6.142857 1 18 19 4 2
## 4 14 7.142857 5 31 50 19 4
## 5 15 8.571429 14 33 45 50 19
## 6 16 2.428571 10 35 99 45 50
## lag3noofcases lag4noofcases n0 n1 n2 n3
## 1 0 0 -2.3634075 -7.6009025 -7.6009025 -7.6009025
## 2 0 0 -1.6702603 -2.3634075 -7.6009025 -7.6009025
## 3 0 0 -0.1121157 -1.6702603 -2.3634075 -7.6009025
## 4 2 0 0.8554683 -0.1121157 -1.6702603 -2.3634075
## 5 4 2 0.7501078 0.8554683 -0.1121157 -1.6702603
## 6 19 4 1.5385652 0.7501078 0.8554683 -0.1121157
## n4 Edeaths Ecases logEdeaths logEcases highdeathsmr caserate
## 1 -7.600902 3.817255 21.2542 1.339532 3.056555 0 2.135315
## 2 -7.600902 5.089673 21.2542 1.627214 3.056555 0 4.270630
## 3 -7.600902 3.817255 21.2542 1.339532 3.056555 0 20.285492
## 4 -7.600902 6.574161 21.2542 1.883147 3.056555 0 53.382873
## 5 -2.363408 6.998301 21.2542 1.945667 3.056555 1 48.044585
## 6 -1.670260 7.422440 21.2542 2.004508 3.056555 1 105.698088
## deathrate
## 1 0.000000
## 2 1.067657
## 3 1.067657
## 4 5.338287
## 5 14.947204
## 6 10.676575
engdeaths$casesmr <- engdeaths$noofcases/engdeaths$Ecases
engdeaths$smr <- engdeaths$covid/engdeaths$Edeaths
dp1 <- engdeaths[, c("jsa", "houseprice", "popdensity", "no2", "casesmr", "smr")]
head(dp1)
## jsa houseprice popdensity no2 casesmr smr
## 1 0.6 127000 999 8.285714 0.09409903 0.0000000
## 2 0.6 127000 999 12.000000 0.18819807 0.1964763
## 3 0.6 127000 999 6.142857 0.89394082 0.2619684
## 4 0.6 127000 999 7.142857 2.35247583 0.7605533
## 5 0.6 127000 999 8.571429 2.11722825 2.0004857
## 6 0.6 127000 999 2.428571 4.65790215 1.3472659
dp1$no2 <- sqrt(dp1$no2)
dp1$houseprice <- log10(dp1$houseprice)
dp1$popdensity <- log(dp1$popdensity)
head(dp1)
## jsa houseprice popdensity no2 casesmr smr
## 1 0.6 5.103804 6.906755 2.878492 0.09409903 0.0000000
## 2 0.6 5.103804 6.906755 3.464102 0.18819807 0.1964763
## 3 0.6 5.103804 6.906755 2.478479 0.89394082 0.2619684
## 4 0.6 5.103804 6.906755 2.672612 2.35247583 0.7605533
## 5 0.6 5.103804 6.906755 2.927700 2.11722825 2.0004857
## 6 0.6 5.103804 6.906755 1.558387 4.65790215 1.3472659
colnames(dp1) <- c("jsa", "price", "popden", "no2", "casesmr", "deathsmr")
p3 <- dp1 %>% ggpairs(., # mapping = ggplot2::aes(colour="blue"),
lower = list(continuous = wrap("smooth", alpha = 0.3, size=0.1),
discrete = "blank", colpalettebo = wrap("box_no_facet", alpha=0.5)),
diag = list(discrete="barDiag",
continuous = wrap("densityDiag", alpha=0.5 )),
upper = list(colpalettebo = "blank", discrete = "blank", continuous = wrap("cor", family="sans", size=4, align_percent=0.5))) +
theme(panel.grid.major = element_blank())
p3
ggsave(filename=paste0(figurepath, "covd_and_socio.png"))
# All done
end.time <- proc.time()[3]
comp.time<- (end.time-start.time)/60
# comp.time<-fancy.time(comp.time)
print(comp.time)
## elapsed
## 1.398633