- 1 Code to reproduce Figure1.1
- 2 Code to reproduce Figure1.2
- 3 Code to reproduce Figure1.3
- 4 Code to reproduce Figure 1.4
- 5 Code to reproduce Figure 1.6
- 6 Code to reproduce Figure 1.7
- 7 Code to reproduce Figure 1.8
- 8 Code to reproduce Figure 1.9
- 9 Code to reproduce Figure 1.10
- 10 Code to reproduce Figure 1.11
- 11 Code to reproduce Figures 1.12 and 1.13
- 12 Code to reproduce Figure 1.14
- 13 Code to reproduce Figures 1.15 and 1.16
## Note the start time
<-proc.time()[3]
start.time# Download mapfiles and data files from https://github.com/sujit-sahu/bookbmstdr
# figurepath <- "Your path to save figures" # Folder to save the figures
# dpath <- "your data path/" # Folder containing the data files
<- "mapfiles/" # Folder containing the required map files
mappath <- "figures/chap1figures/"
figurepath <- "datafiles/"
dpath load(file=paste0(dpath, "C8ewpollution.RData"))
load(file=paste0(dpath, "C11us48cancerdata.RData"))
load(file=paste0(dpath, "C8euso3.RData"))
load(file=paste0(dpath, "C11England_LA_map_and_Respiratory.RData"))
load(file=paste0(dpath, "C11childpovertylondon.RData"))
load(file=paste0(mappath, "englamap.rda"))
load(file=paste0(mappath, "englainfo.rda"))
load(file=paste0(mappath, "aurnsites.rda"))
load(file=paste0(mappath, "engregmap.rda"))
load(file=paste0(mappath, "colpalette.rda"))
<- read.csv(file=paste0(dpath, "C11Kenya_vaccine.csv")) kdat
# Load all the libraries
library(bmstdr)
## Warning in .recacheSubclasses(def@className, def, env): undefined subclass
## "numericVector" of class "Mnumeric"; definition not updated
library(ggplot2)
require(ggsn)
library(tidyr)
library(RColorBrewer)
library(akima)
library(rgdal)
library(broom)
library(doBy)
library(directlabels)
library(usmap)
1 Code to reproduce Figure1.1
<- map_data(database="state",regions="new york")
nymap head(nymap)
## long lat group order region subregion
## 1 -73.92874 40.80605 1 1 new york manhattan
## 2 -73.93448 40.78886 1 2 new york manhattan
## 3 -73.95166 40.77741 1 3 new york manhattan
## 4 -73.96312 40.75449 1 4 new york manhattan
## 5 -73.96885 40.73730 1 5 new york manhattan
## 6 -73.97458 40.72584 1 6 new york manhattan
<- ggplot() +
p geom_polygon(data=nymap, aes(x=long, y=lat, group=group),
color="black", size = 0.6, fill=NA) +
geom_point(data =nyspatial, aes(x=Longitude,y=Latitude)) +
labs( title= "28 air pollution monitoring sites in New York", x="Longitude", y = "Latitude") +
scalebar(data =nymap, dist = 100, location = "bottomleft", transform=T, dist_unit = "km",
st.dist = .05, st.size = 5, height = .06, st.bottom=T, model="WGS84") +
north(data=nymap, location="topleft", symbol=12)
p
ggsave(filename=paste0(figurepath, "nysites.png"))
## Saving 7 x 5 in image
2 Code to reproduce Figure1.2
<- readOGR(dsn=path.expand(mappath), layer="infuse_ctry_2011", verbose = FALSE)
wmap ## Warning in OGRSpatialRef(dsn, layer, morphFromESRI = morphFromESRI, dumpSRS =
## dumpSRS, : Discarded datum OSGB_1936 in Proj4 definition: +proj=tmerc +lat_0=49
## +lon_0=-2 +k=0.9996012717 +x_0=400000 +y_0=-100000 +ellps=airy +units=m +no_defs
<- wmap[-grep(pattern="N92000002", wmap$label), ]
ewmap <- ewmap[-grep(pattern="S92000003", ewmap$label), ]
ewmap <- tidy(ewmap)
engw head(engw)
# load(file=paste0(dpath, "C8ewpollution.RData"))
head(p2011data)
## index lon lat easting northing type validation year month day
## 1462 4 -3.034167 52.50361 329899 290027 Rural 1 2011 1 1
## 1463 4 -3.034167 52.50361 329899 290027 Rural 1 2011 1 2
## 1464 4 -3.034167 52.50361 329899 290027 Rural 1 2011 1 3
## 1465 4 -3.034167 52.50361 329899 290027 Rural 1 2011 1 4
## 1466 4 -3.034167 52.50361 329899 290027 Rural 1 2011 1 5
## 1467 4 -3.034167 52.50361 329899 290027 Rural 1 2011 1 6
## aqum_no2 aqum_ozone aqum_pm10 aqum_pm2p5 obs_no2 obs_ozone obs_pm10
## 1462 25.328662 72.91099 4.701968 4.699310 9.9 57.75 NA
## 1463 17.231038 42.87047 4.801431 4.794895 7.4 70.25 NA
## 1464 20.505962 32.46326 5.794126 5.788368 2.5 71.25 NA
## 1465 5.899658 58.21732 3.547942 3.544742 6.9 61.75 NA
## 1466 7.551908 60.83800 1.651302 1.647939 2.5 70.25 NA
## 1467 4.496976 60.15438 1.067562 1.066221 7.4 66.75 NA
## obs_pm2p5
## 1462 NA
## 1463 NA
## 1464 NA
## 1465 NA
## 1466 NA
## 1467 NA
<- unique(p2011data[, 1:6])
grid dim(grid)
## [1] 144 6
# head(grid)
# table(grid$type)
<- ggplot(data=engw, aes(x=long, y=lat, group = group)) +
pew geom_polygon(colour='black',size=0.8, fill=NA) +
geom_point(data=grid, aes(x=easting, y = northing, shape=type, col=type), inherit.aes = FALSE, size=1.5) +
labs( title= "144 air pollution monitoring sites in England and Wales", x="", y = "") +
theme(axis.text.x = element_blank(), axis.text.y = element_blank(), axis.ticks = element_blank()) +
theme(legend.title = element_blank()) +
theme(legend.position =c(0.1, 0.6)) +
::scalebar(data =engw, dist =100, location = "topleft", transform=F, dist_unit = "km", st.dist = .05, st.size = 5, height = .06, st.bottom=T) +
ggsn::north(data=engw, location="topright", symbol=12)
ggsn pew
ggsave(filename=paste0(figurepath, "engwsites.png"))
## Saving 7 x 5 in image
3 Code to reproduce Figure1.3
# load(file=paste0(dpath, "C8euso3.RData"))
head(euso3)
## s.index Longitude Latitude Year Month Day o8hrmax cMAX torder sqcmax
## 1 1 -85.802 33.281 1997 5 1 59.75 25.59053 1 5.058708
## 2 1 -85.802 33.281 1997 5 2 52.75 27.03593 2 5.199608
## 3 1 -85.802 33.281 1997 5 3 51.75 24.16542 3 4.915833
## 4 1 -85.802 33.281 1997 5 4 59.38 20.42035 4 4.518888
## 5 1 -85.802 33.281 1997 5 5 66.17 23.00181 5 4.796020
## 6 1 -85.802 33.281 1997 5 6 75.50 25.79922 6 5.079294
<- unique(euso3[, 1:3])
grid head(grid)
## s.index Longitude Latitude
## 1 1 -85.802 33.281
## 1531 2 -86.137 32.498
## 3061 3 -86.915 33.486
## 4591 4 -87.004 33.331
## 6121 5 -86.817 33.386
## 7651 6 -86.669 33.705
<- map_data(database="state",regions=eaststates)
useastmap head(useastmap)
## long lat group order region subregion
## 1 -87.46201 30.38968 1 1 alabama <NA>
## 2 -87.48493 30.37249 1 2 alabama <NA>
## 3 -87.52503 30.37249 1 3 alabama <NA>
## 4 -87.53076 30.33239 1 4 alabama <NA>
## 5 -87.57087 30.32665 1 5 alabama <NA>
## 6 -87.58806 30.32665 1 6 alabama <NA>
<- ggplot(data=useastmap, aes(x=long, y=lat, group = group)) +
p geom_polygon(colour='black',size=0.8, fill=NA) +
geom_point(data=grid, aes(x=Longitude, y = Latitude), inherit.aes = FALSE, col="Red", size=0.6) +
labs( title= "691 sites in the Eastern US", x="", y = "") +
theme(axis.text.x = element_blank(), axis.text.y = element_blank(), axis.ticks = element_blank()) +
::north(data=useastmap, location="bottomright", symbol=12) +
ggsn::scalebar(data =useastmap, dist =500, location ="bottomleft", transform=T, dist_unit = "km",
ggsnst.dist = .05, st.size = 5, height = .06, st.bottom=T, model="WGS84")
p
ggsave(file=paste0(figurepath, "uso3sites.png"), width=12, height =10, units="cm")
4 Code to reproduce Figure 1.4
## Function to find 4th highest value
<-function(x){
oz4th<-sort(x,na.last=F)
y<-y[length(y)-3]
an4th
}##
## Function to calculate 3 year rolling averages
<- function(x) {
three_year_rolling_av <- length(x)
n <- rep(NA, n-2)
y for (k in 3:n) {
-2] <- mean(x[(k-2):k])
y[k
}
y
}
## Evaluate the 4th highest
<- aggregate.data.frame(euso3$o8hrmax,
ob4th by=list(s.index=euso3$s.index, Year=euso3$Year), oz4th)
head(ob4th)
## s.index Year x
## 1 1 1997 79.000
## 2 2 1997 70.130
## 3 3 1997 86.250
## 4 4 1997 79.880
## 5 5 1997 83.125
## 6 6 1997 78.500
dim(ob4th)
## [1] 6910 3
<- summaryBy(x ~ s.index, data = ob4th, FUN = three_year_rolling_av)
a dim(a)
## [1] 691 9
head(a)
## s.index x.FUN1 x.FUN2 x.FUN3 x.FUN4 x.FUN5 x.FUN6 x.FUN7
## 1 1 88.00000 87.41667 83.83333 81.16667 79.27333 75.40000 71.61000
## 2 2 79.88000 84.58667 79.71000 80.70667 76.08000 74.66333 71.00000
## 3 3 92.66667 92.87667 85.00333 83.04667 79.33667 76.83667 75.79333
## 4 4 88.92000 93.87667 89.79333 86.91667 79.54333 76.00000 77.29333
## 5 5 95.06333 99.29333 94.83500 90.54500 84.39833 82.06500 80.83500
## 6 6 86.04000 89.62333 85.66667 82.92000 77.75333 73.75333 71.62667
## x.FUN8
## 1 72.46333
## 2 71.83667
## 3 78.79333
## 4 81.20667
## 5 84.35667
## 6 73.29333
<- as.vector(t(a[,-1]))
b <- data.frame(s.index=rep(a$s.index, each=8), Year=rep(1999:2006, 691), ave3=b)
rolling head(rolling)
## s.index Year ave3
## 1 1 1999 88.00000
## 2 1 2000 87.41667
## 3 1 2001 83.83333
## 4 1 2002 81.16667
## 5 1 2003 79.27333
## 6 1 2004 75.40000
head(ob4th)
## s.index Year x
## 1 1 1997 79.000
## 2 2 1997 70.130
## 3 3 1997 86.250
## 4 4 1997 79.880
## 5 5 1997 83.125
## 6 6 1997 78.500
<- merge(ob4th, rolling, all.x=T)
u <- u[order(u$s.index, u$Year), ]
udat head(udat)
## s.index Year x ave3
## 1 1 1997 79.00 NA
## 2 1 1998 94.00 NA
## 3 1 1999 91.00 88.00000
## 4 1 2000 77.25 87.41667
## 5 1 2001 83.25 83.83333
## 6 1 2002 83.00 81.16667
$s.index <- as.factor(udat$s.index)
udat<- ggplot(data=udat, aes(x=Year, y=x, color=s.index)) +
p1 geom_line() +
geom_abline(slope=0, intercept=85, col="black", size=1)+
labs(x="Year", y="Annual 4th highest ozone") +
scale_colour_discrete(guide="none")
p1## Warning: Removed 564 row(s) containing missing values (geom_path).
ggsave(filename = paste0(figurepath, "an4thmax_97_06_base85.png"))
## Saving 7 x 5 in image
## Warning: Removed 564 row(s) containing missing values (geom_path).
<- na.omit(udat)
udat <- ggplot(data=udat, aes(x=Year, y=ave3, color=s.index)) +
p2 geom_line() +
geom_abline(slope=0, intercept=85, col="black", size=1)+
labs(x="Year", y="Three year rolling average") +
scale_colour_discrete(guide="none")
p2
ggsave(filename = paste0(figurepath, "3yave_99_06_base85.png"))
## Saving 7 x 5 in image
5 Code to reproduce Figure 1.6
<- readOGR(dsn=path.expand(mappath), layer="Longhurst_world_v4_2010", verbose = FALSE)
long # head(long@data)
# dim(long@data)
<- tidy(long)
long_df # head(long_df)
$area <- as.numeric(long_df$id)
long_df$id <- as.numeric(long_df$id)
long_dfsummary(long_df$id)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00 0.00 15.00 20.97 43.00 53.00
## Just the outline map of the Longhurst regions
<- c(NA,NA,NA,13,11,15,4,10,1,8,NA,NA,NA,NA,NA,NA,NA,14,NA,NA,NA,2,3,NA,NA,NA,NA,NA,NA,NA,18,19,12,16,17,23,20,6,5,7,9,NA,NA,NA,NA,NA,NA,NA,NA,NA,21,22,NA,NA)#slightly different due to null region
provsorder $truid <- NA
long_df
for(i in 1:54){
$truid[which(long_df$id==i)] <- provsorder[i]
long_df
}<- as.data.frame(coordinates(long))
centroids_df head(centroids_df)
## V1 V2
## 0 -12.32365 79.01508
## 1 -19.88359 66.41352
## 2 11.21437 67.31430
## 3 -25.60820 49.66515
## 4 -57.87931 40.08621
## 5 -52.85529 32.19891
5,] <- c(-55.3,40.7) #small number of regions centrepoints not optimal
centroids_df[33,] <- c(149.3,39) #small number of regions centrepoints not optimal
centroids_df[36,] <- c(160.6,-36.3)
centroids_df[51,] <- c(75,-36.2)
centroids_df[
$lonc <- NA
long_df$latc <- NA
long_dfhead(long_df)
## # A tibble: 6 × 11
## long lat order hole piece group id area truid lonc latc
## <dbl> <dbl> <int> <lgl> <fct> <fct> <dbl> <dbl> <dbl> <lgl> <lgl>
## 1 180 89.1 1 FALSE 1 0.1 0 0 NA NA NA
## 2 180 71.5 2 FALSE 1 0.1 0 0 NA NA NA
## 3 180. 71.5 3 FALSE 1 0.1 0 0 NA NA NA
## 4 180. 71.5 4 FALSE 1 0.1 0 0 NA NA NA
## 5 180. 71.5 5 FALSE 1 0.1 0 0 NA NA NA
## 6 180. 71.5 6 FALSE 1 0.1 0 0 NA NA NA
for(i in c(4,5,6,7,8,9,10,18,22,23,31,32,33,34,35,36,37,38,39,40,41,51,52)){
$lonc[which(long_df$area==i)] <- centroids_df$V1[i]
long_df$latc[which(long_df$area==i)] <- centroids_df$V2[i]
long_df
}<- ggplot(data=long_df, aes(x=long, y=lat, group = group,fill=4)) +
p geom_polygon(colour='black',size=0.25)+ #longhurst outlines with half width size
geom_text(data=na.omit(long_df),aes(label = truid, x = lonc, y = latc,family="Times", col="red"),size=5) +
ylab("Latitude")+xlab("Longitude")+
coord_cartesian(ylim = c(-75, 75),expand=F)+
scale_y_continuous(breaks=seq(-60,60,30)) +
scale_x_continuous(breaks=seq(-180,180,60)) +
#annotate("text",x=-133,y=-38.1,label="21",size=4,colour='black',family="Times")+ #extra label for region 21 to show that it wraps
theme_bw()+theme(text=element_text(family="Times"))+
theme(legend.title = element_blank()) +
theme(legend.position="none")
plot(p)
ggsave(paste0(figurepath, "blank_longhurst.png"), width=8.27, height=3.44, dpi=300)
6 Code to reproduce Figure 1.7
## Argofloat Example
<- map_data("world", xlim=c(-70, 10), ylim=c(15, 65))
atlmap head(atlmap)
## long lat group order region subregion
## 1 -63.00122 18.22178 1 1 Anguilla <NA>
## 2 -63.16001 18.17139 1 2 Anguilla <NA>
## 3 -63.15332 18.20029 1 3 Anguilla <NA>
## 4 -63.02603 18.26973 1 4 Anguilla <NA>
## 5 -62.97959 18.26480 1 5 Anguilla <NA>
## 6 -63.00122 18.22178 1 6 Anguilla <NA>
<- atlmap[atlmap$long < 5, ]
atlmap <- atlmap[atlmap$long > -70, ]
atlmap
<- atlmap[atlmap$lat < 65, ]
atlmap <- atlmap[atlmap$lat > 10, ]
atlmap
<- argo_floats_atlantic_2003
argo
<- argo[argo$depth==3, ]
deep $month <- factor(deep$month)
deep
<- ggplot() +
p geom_polygon(data=atlmap, aes(x=long, y=lat, group=group),
color="black", size = 0.6, fill=NA) +
geom_point(data =deep, aes(x=lon, y=lat, colour=month), size=1) +
labs( title= "Argo float locations in deep ocean in 2003", x="Longitude", y = "Latitude") +
::scalebar(data =atlmap, dist = 1000, location = "bottomleft", transform=T, dist_unit = "km",
ggsnst.dist = .05, st.size =5, height = .05, st.bottom=T, model="WGS84") +
::north(data=atlmap, location="topright", symbol=12)
ggsn p
ggsave(paste0(figurepath, "argo_float_locations_deep.png"), width=8.27, height=5, dpi=300)
7 Code to reproduce Figure 1.8
<- merge(englamap, engtotals, by.x="id", by.y="mapid", all.y=T, all.x=F)
bdf # head(bdf)
<- englainfo[, c(1, 3, 7:8)]
u colnames(u)
## [1] "Areacode" "Areaname" "centreEasting" "centreNorthing"
#head(u)
<- c("Birmingham", "City of London", "Manchester", "Newcastle upon Tyne")
cities <- u[match(cities, u$Areaname), ]
v $aname <- c("Birmingham", "London","Manchester", "Newcastle")
vhead(v)
## Areacode Areaname centreEasting centreNorthing aname
## 268 E08000025 Birmingham 408600.1 287857.5 Birmingham
## 281 E09000001 City of London 532479.9 181273.7 London
## 247 E08000003 Manchester 384654.7 394994.0 Manchester
## 264 E08000021 Newcastle upon Tyne 421956.1 568241.6 Newcastle
$xend <- v$centreEasting
v$yend <- v$centreNorthing
v$x <- v$centreEasting + c(-150000, 0, -150000, 80000)
v$y <- v$centreNorthing + c(0, -150000, 0, 50000)
v$tx <- v$centreEasting + 100000 * c(-1.5, 0, -1.5, 1)
v$ty <- v$y + 50000* c(0.5, -0.1, 0.5, 0.5)
v#v
<- ggplot(data=bdf, aes(x=long, y=lat, group = group, fill="White")) +
spatframe # scale_fill_gradientn(colours=com, na.value="black",limits=range(bdf$no2)) +
geom_path(colour='red',size=0.5) +
geom_polygon(data=engregmap, aes(x=long, y=lat, group = group), fill=NA, colour='black',size=0.6) +
coord_equal() +
theme_bw()+theme(text=element_text(family="Times")) +
labs(title= "A map of local authorities and regions in England", x="", y = "", size=2.5) +
theme(axis.text.x = element_blank(), axis.text.y = element_blank(),axis.ticks = element_blank()) +
::scalebar(data=bdf, dist =50, location = "topleft", transform=F, dist_unit = "km",
ggsnst.dist = .05, st.size =4, height = .06, st.bottom=T) +
annotate("text", x = v$tx, y = v$ty, label = v$aname) +
annotate("segment", x=v$x, xend=v$xend, y=v$y, yend=v$yend, arrow=arrow(), color = "purple", size=1) +
geom_point(data=aurnsites, aes(x=Easting, y=Northing), inherit.aes = FALSE, size = 1, colour = "blue")
spatframe
ggsave(filename=paste0(figurepath, "empty_frame_LA_regions.png"))
## Saving 7 x 5 in image
8 Code to reproduce Figure 1.9
<- range(bdf$covid)
plimits
plimits## [1] 4 1223
<- ggplot(data=bdf, aes(x=long, y=lat, group = group, fill=covid)) +
praw scale_fill_gradientn(colours=colpalette, na.value="black",limits=plimits) +
geom_polygon(colour='black',size=0.25) +
geom_polygon(data=engregmap, aes(x=long, y=lat, group = group), fill=NA, colour='black',size=0.6) +
coord_equal() + guides(fill=guide_colorbar(title="No of deaths")) +
theme_bw()+theme(text=element_text(family="Times")) +
labs(title= "Observed number of Covid deaths during Mar 13 to Jul 31, 2020", x="", y = "", size=2.5) +
theme(axis.text.x = element_blank(), axis.text.y = element_blank(),axis.ticks = element_blank()) +
theme(legend.position =c(0.2, 0.5)) +
::scalebar(data=bdf, dist =50, location = "topleft", transform=F, dist_unit = "km",
ggsnst.dist = .05, st.size =4, height = .06, st.bottom=T)
plot(praw)
ggsave(filename=paste0(figurepath, "raw_covid_death_numbers.png"))
## Saving 7 x 5 in image
$covidrate <- bdf$covid/bdf$popn*100000
bdf<- range(bdf$covidrate)
plimits <- ggplot(data=bdf, aes(x=long, y=lat, group = group, fill=covidrate)) +
prate scale_fill_gradientn(colours=colpalette, na.value="black",limits=plimits) +
geom_polygon(colour='black',size=0.25) +
geom_polygon(data=engregmap, aes(x=long, y=lat, group = group), fill=NA, colour='black',size=0.6) +
coord_equal() + guides(fill=guide_colorbar(title="Death rate")) +
theme_bw()+theme(text=element_text(family="Times")) +
labs(title= "Covid death rate per 100,000 during Mar 13 to Jul 31, 2020", x="", y = "", size=2.5) +
theme(axis.text.x = element_blank(), axis.text.y = element_blank(),axis.ticks = element_blank()) +
theme(legend.position =c(0.2, 0.5)) +
::scalebar(data=bdf, dist =50, location = "topleft", transform=F, dist_unit = "km",
ggsnst.dist = .05, st.size =4, height = .06, st.bottom=T)
prate
ggsave(filename=paste0(figurepath, "covid_death_rate.png"))
## Saving 7 x 5 in image
9 Code to reproduce Figure 1.10
head(engdeaths)
## Areacode mapid spaceid Region popn jsa houseprice popdensity startdate
## 1 E06000001 0 1 NorthEast 93663 0.6 127000 999 2020-03-13
## 2 E06000001 0 1 NorthEast 93663 0.6 127000 999 2020-03-20
## 3 E06000001 0 1 NorthEast 93663 0.6 127000 999 2020-03-27
## 4 E06000001 0 1 NorthEast 93663 0.6 127000 999 2020-04-03
## 5 E06000001 0 1 NorthEast 93663 0.6 127000 999 2020-04-10
## 6 E06000001 0 1 NorthEast 93663 0.6 127000 999 2020-04-17
## Weeknumber no2 covid allcause noofcases lag1noofcases lag2noofcases
## 1 11 8.285714 0 18 2 0 0
## 2 12 12.000000 1 24 4 2 0
## 3 13 6.142857 1 18 19 4 2
## 4 14 7.142857 5 31 50 19 4
## 5 15 8.571429 14 33 45 50 19
## 6 16 2.428571 10 35 99 45 50
## lag3noofcases lag4noofcases n0 n1 n2 n3
## 1 0 0 -2.3634075 -7.6009025 -7.6009025 -7.6009025
## 2 0 0 -1.6702603 -2.3634075 -7.6009025 -7.6009025
## 3 0 0 -0.1121157 -1.6702603 -2.3634075 -7.6009025
## 4 2 0 0.8554683 -0.1121157 -1.6702603 -2.3634075
## 5 4 2 0.7501078 0.8554683 -0.1121157 -1.6702603
## 6 19 4 1.5385652 0.7501078 0.8554683 -0.1121157
## n4 Edeaths Ecases logEdeaths logEcases highdeathsmr
## 1 -7.600902 3.817255 21.2542 1.339532 3.056555 0
## 2 -7.600902 5.089673 21.2542 1.627214 3.056555 0
## 3 -7.600902 3.817255 21.2542 1.339532 3.056555 0
## 4 -7.600902 6.574161 21.2542 1.883147 3.056555 0
## 5 -2.363408 6.998301 21.2542 1.945667 3.056555 1
## 6 -1.670260 7.422440 21.2542 2.004508 3.056555 1
$covidrate <- 100000*engdeaths$covid/engdeaths$popn
engdeathssummary(engdeaths$covidrate)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0000 0.2832 1.9303 4.2369 6.3483 40.0309
<- ggplot(data=engdeaths, aes(x=factor(Weeknumber), y=covidrate)) +
ptime geom_boxplot() +
labs(x = "Week", y = "Death rate per 100,000") +
stat_summary(fun=median, geom="line", aes(group=1, col="red")) +
theme(legend.position = "none")
ptime
ggsave(filename=paste0(figurepath, "deathrate_weekly_boxplots.png"))
## Saving 7 x 5 in image
10 Code to reproduce Figure 1.11
<- readOGR(dsn=path.expand(mappath), layer="sdr_subnational_boundaries2", verbose = FALSE)
Kmap names(Kmap)
## [1] "ISO" "FIPS" "DHSCC" "SVYTYPE" "SVYYEAR"
## [6] "CNTRYNAMEE" "CNTRYNAMEF" "CNTRYNAMES" "DHSREGEN" "DHSREGFR"
## [11] "DHSREGSP" "SVYID" "REG_ID" "Svy_Map" "MULTLEVEL"
## [16] "LEVELRNK" "REGVAR" "REGCODE" "REGNAME" "OTHREGVAR"
## [21] "OTHREGCO" "OTHREGNA" "LEVELCO" "LEVELNA" "REPALLIND"
## [26] "REGNOTES" "SVYNOTES"
# head(Kmap@data)
<- tidy(Kmap)
adf #head(adf)
# table(adf$id)
# head(adf)
# head(kdat)
$vacprop <- kdat$yvac/kdat$n
kdat<- merge(kdat, adf)
udf # head(udf)
<- range(udf$vacprop)
a <- ggplot(data=udf, aes(x=long, y=lat, group = group, fill=vacprop)) +
vmap scale_fill_gradientn(colours=colpalette, na.value="black",limits=a) +
# geom_path(colour='black',size=0.5) +
geom_polygon(colour='black',size=0.25) +
theme_bw()+theme(text=element_text(family="Times")) +
labs(title= "Observed vaccination map of 47 Counties in Kenya", x="", y = "", size=2.5) +
theme(axis.text.x = element_blank(), axis.text.y = element_blank(),axis.ticks = element_blank())
vmap
ggsave(file=paste0(figurepath, "Observed_vaccination_rate.png"))
## Saving 7 x 5 in image
11 Code to reproduce Figures 1.12 and 1.13
# load(file=paste0(dpath, "C11us48cancerdata.RData"))
head(us48cancertot)
## Name state fips totcount totpop totrate AIAN.HL AIAN.NHL
## 1 Alabama AL 1 25611.33 4736261 540.1333 0.1272515 0.5892589
## 2 Arizona AZ 4 29450.87 6363790 462.0467 1.3033103 4.2843893
## 3 Arkansas AR 5 15938.33 2894066 550.0467 0.1995402 0.7781714
## 4 California CA 6 165372.00 37325590 442.7733 1.2801805 0.5671331
## 5 Colorado CO 8 22118.60 5047750 437.6800 0.9988665 0.7593700
## 6 Connecticut CT 9 21617.73 3555387 607.9467 0.2868551 0.2620113
## API.HL API.NHL BAA.HL BAA.NHL White.HL White.NHL
## 1 0.08855458 1.264443 0.2878583 26.567691 3.073277 68.00167
## 2 0.37435044 3.157346 0.6966057 4.198725 27.147177 58.83810
## 3 0.08576009 1.550466 0.2291451 15.805733 5.679722 75.67146
## 4 0.81832412 14.032092 0.8891200 6.373004 34.193313 41.84683
## 5 0.25679949 3.199021 0.5706648 4.324408 18.605895 71.28497
## 6 0.18256169 4.045786 1.5652350 10.071039 11.344880 72.24163
plot_usmap(data = us48cancertot, values = "totrate", color = "red", exclude=c("AK", "HI")) +
# scale_fill_continuous(name = "Cancer rate", label = scales::comma) +
scale_fill_gradientn(colours=colpalette,na.value="black",limits=range(us48cancertot$totrate),
name = "Cancer rate") +
theme(legend.position = "right")
ggsave(paste0(figurepath, "uscancer_rate.png"), width=8.27, height=3.44, dpi=300)
head(us48cancer0317)
## fips state Year totcount totpop totrate malescount malespop malesrate
## 1 1 AL 2003 21472 4503491 476.8 11228 2179422 515.2
## 2 1 AL 2004 22951 4530729 506.6 12024 2192872 548.3
## 3 1 AL 2005 23342 4565917 511.2 12172 2211403 550.4
## 4 1 AL 2006 24113 4628981 520.9 12798 2243501 570.4
## 5 1 AL 2007 25086 4672840 536.8 13236 2265565 584.2
## 6 1 AL 2008 26093 4718206 553.0 13708 2287949 599.1
## femalescount femalespop femalesrate AIAN.HL AIAN.NHL API.HL API.NHL
## 1 10244 2324069 440.8 0.06708129 0.5561019 0.05142677 0.9124255
## 2 10927 2337857 467.4 0.07808898 0.5614108 0.06045385 0.9714331
## 3 11170 2354514 474.4 0.08941301 0.5661292 0.06757400 1.0243107
## 4 11315 2385480 474.3 0.10133980 0.5719185 0.07634510 1.0712509
## 5 11850 2407275 492.3 0.11241558 0.5782351 0.08480924 1.1150392
## 6 12385 2430257 509.6 0.12699742 0.5833997 0.09126350 1.1586819
## BAA.HL BAA.NHL White.HL White.NHL unemployment
## 1 0.2035754 26.22443 1.975512 70.00944 6.0
## 2 0.2141819 26.25379 2.147623 69.71302 5.7
## 3 0.2288719 26.26904 2.345592 69.40907 4.5
## 4 0.2426236 26.34064 2.557496 69.03839 4.0
## 5 0.2551981 26.36388 2.771569 68.71885 4.0
## 6 0.2730275 26.41459 2.978441 68.37359 5.7
<- sum(us48cancer0317$totcount)
a1 <- sum(us48cancer0317$totpop)
a2 <- a1/a2
rr $exptot <- rr * us48cancer0317$totpop
us48cancer0317summary(us48cancer0317$exptot)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 2658 10061 23959 33637 38976 208034
$obs_smr <- us48cancer0317$totcount/us48cancer0317$exptot
us48cancer0317
colnames(us48cancer0317)
## [1] "fips" "state" "Year" "totcount" "totpop"
## [6] "totrate" "malescount" "malespop" "malesrate" "femalescount"
## [11] "femalespop" "femalesrate" "AIAN.HL" "AIAN.NHL" "API.HL"
## [16] "API.NHL" "BAA.HL" "BAA.NHL" "White.HL" "White.NHL"
## [21] "unemployment" "exptot" "obs_smr"
<- us48cancer0317[, c("state", "Year", "obs_smr")]
u <- u[u$Year==2003, ]
u <- u[order(u$obs_smr), ]
u # u
<- data.frame(state=c("PA", "WV", "ME", "UT", "TX","TN", "FL", "CA", "ND", "NY"))
pstate
<- merge(pstate, us48cancer0317)
dp <- merge(pstate, us48cancer0317)
dp2 dim(dp)
## [1] 150 23
# head(dp)
<- ggplot(data=dp, aes(x=Year, y=obs_smr, color=state, group=state)) +
p1 # geom_boxplot() + facet_wrap(~year, ncol=5) +
geom_line() +
labs(x="Year", y="Annual observed SMR") +
scale_colour_discrete(guide="none") +
geom_dl(data=dp, aes(x=Year, y=obs_smr, label =state), method = list(dl.combine("last.bumpup")), cex = 0.8)
p1
ggsave(filename=paste0(figurepath, "obsd_SMR_10.png"))
## Saving 7 x 5 in image
12 Code to reproduce Figure 1.14
# load(file=paste0(dpath, "C11England_LA_map_and_Respiratory.RData"))
head(eng323_la_map)
## # A tibble: 6 × 7
## long lat order hole piece group id
## <dbl> <dbl> <int> <lgl> <fct> <fct> <chr>
## 1 548881. 191088. 1 FALSE 1 E09000002.1 E09000002
## 2 548852. 190846. 2 FALSE 1 E09000002.1 E09000002
## 3 548886. 190844. 3 FALSE 1 E09000002.1 E09000002
## 4 548881. 190797. 4 FALSE 1 E09000002.1 E09000002
## 5 549000. 190881. 5 FALSE 1 E09000002.1 E09000002
## 6 549097. 190702. 6 FALSE 1 E09000002.1 E09000002
<- summaryBy(smr ~ Areacode, data = Engrespriratory, FUN =mean)
la_mean head(la_mean)
## Areacode smr.mean
## 1 E06000001 0.7508640
## 2 E06000002 1.1941685
## 3 E06000003 0.9379811
## 4 E06000004 1.0992236
## 5 E06000005 0.7233094
## 6 E06000006 0.8726364
summary(la_mean)
## Areacode smr.mean
## E06000001: 1 Min. :0.1682
## E06000002: 1 1st Qu.:0.4481
## E06000003: 1 Median :0.5635
## E06000004: 1 Mean :0.6729
## E06000005: 1 3rd Qu.:0.8291
## E06000006: 1 Max. :2.2239
## (Other) :317
<- merge(eng323_la_map, la_mean, by.x="id", by.y="Areacode", all.y=T, all.x=F)
bdf <- range(bdf$smr.mean)
mr <- ggplot( data=bdf, aes(x=long, y=lat, group = group,fill=smr.mean)) +
psmr scale_fill_gradientn(colours=colpalette,na.value="black",limits=mr) +
geom_polygon(colour='black',size=0.25) + #longhurst outlines with half width size
coord_equal() + guides(fill=guide_colorbar(title="SMR"))+
theme_bw()+theme(text=element_text(family="Times")) +
labs(title= "Mean standardised mortality rate, 2007-11", x="", y = "", size=2.5) +
theme(axis.text.x = element_blank(), axis.text.y = element_blank(),axis.ticks = element_blank()) +
theme(legend.position =c(0.2, 0.5)) +
::scalebar(data=bdf, dist =50, location = "bottomright", transform=F, dist_unit = "km",
ggsnst.dist = .05, st.size =4, height = .06, st.bottom=T)
psmr
png(file=paste0(figurepath, "smr_england_07-11.png"))
::north2(psmr, x=0.65, y=0.80, symbol=12)
ggsndev.off()
## png
## 2
<- ggplot(data=Engrespriratory, aes(x=month, y=smr, color=month)) + geom_boxplot() +
ptime facet_wrap(~year, ncol=5) +
theme(legend.position = "none") +
labs(#title="Boxplot of square-root precipitation against months by year",
x = "Month", y = "SMR")
ptime
ggsave(filename=paste0(figurepath, "smr_eng_boxplot.png"))
## Saving 7 x 5 in image
13 Code to reproduce Figures 1.15 and 1.16
# load(file=paste0(dpath, "C11childpovertylondon.RData"))
dim(childpoverty)
## [1] 330 19
colnames(childpoverty)
## [1] "id" "Areacode" "LA" "Areaname"
## [5] "Areahectares" "landareaSqKm" "imdavscore07" "year"
## [9] "population" "medianincome" "econinactive" "houseprice"
## [13] "nGPregistration" "punder16" "pall" "mincome"
## [17] "log10price" "logitpall" "logitinactive"
# summary(childpoverty)
## Get the average fix data
head(lainfo)
## id Areacode Areaname hectares Abbrevname centerx centery
## 1 0 E09000021 Kingston upon Thames 3726.117 Kingston 519297.6 166820.0
## 2 1 E09000008 Croydon 8649.441 Croydon 533290.2 163541.2
## 3 2 E09000006 Bromley 15013.487 Bromley 542895.5 165655.5
## 4 3 E09000018 Hounslow 5658.541 Hounslow 513515.5 175643.2
## 5 4 E09000009 Ealing 5554.428 Ealing 515887.9 181715.5
## 6 5 E09000016 Havering 11445.735 Havering 554049.0 187392.0
<- aggregate.data.frame(childpoverty[, c(7, 9:15)], by=list(id=childpoverty$id), FUN = mean)
u <- merge(lainfo, u)
lonfixdata <- lonfixdata [order(as.numeric(lonfixdata$id)), ]
lonfixdata
colnames(lonfixdata)
## [1] "id" "Areacode" "Areaname" "hectares"
## [5] "Abbrevname" "centerx" "centery" "imdavscore07"
## [9] "population" "medianincome" "econinactive" "houseprice"
## [13] "nGPregistration" "punder16" "pall"
<- c(0, 20, 25, 30, 40, 100)
b <- c("< 20", "20-25", "25-30", "30-40", "> 40")
labss $cutpall <- factor(cut(lonfixdata$pall, breaks=b, labels=labss))
lonfixdata$cutpunder16 <- factor(cut(lonfixdata$punder16, breaks=b, labels=labss))
lonfixdata
<- merge(lonadf, lonfixdata, by="id")
bdf # head(bdf)
# 1 = City of London, 2 = Tower Hamlets,
# 3 = Islington, 4 = Camden, 5 = Westminster, 6 = Kensington and Chelsea,
# 7 = Hammersmith and Fulham.
colnames(bdf)
## [1] "id" "long" "lat" "order"
## [5] "hole" "piece" "group" "Areacode"
## [9] "Areaname" "hectares" "Abbrevname" "centerx"
## [13] "centery" "imdavscore07" "population" "medianincome"
## [17] "econinactive" "houseprice" "nGPregistration" "punder16"
## [21] "pall" "cutpall" "cutpunder16"
<- c("lightyellow2", "lightyellow3" , "yellow2", "firebrick2", "purple")
com <- ggplot(data=bdf, aes(x=long, y=lat, group = group, fill=cutpall)) +
lonpmap geom_polygon(colour='black',size=0.25) +
geom_text(data=lainfo, inherit.aes=F, fontface="bold", aes(label =Abbrevname, x=centerx, y=centery), size = 3) +
scale_fill_manual(values =com, guides("Percentage"), guide = guide_legend(reverse=TRUE)) +
theme_bw()+theme(text=element_text(family="Times")) +
theme(legend.position = c(0.9, 0.25)) +
coord_equal() +
labs(title= "Percentage of children living in poverty, 2006-15", x="", y = "", size=2.5) +
theme(axis.text.x = element_blank(), axis.text.y = element_blank(),axis.ticks = element_blank())
lonpmap
ggsave(filename=paste(figurepath, "london_poverty.png", sep=""))
## Saving 7 x 5 in image
head(childpoverty)
## id Areacode LA Areaname Areahectares landareaSqKm
## 201 0 E09000021 00AX Kingston upon Thames 3726 37.25
## 202 0 E09000021 00AX Kingston upon Thames 3726 37.25
## 203 0 E09000021 00AX Kingston upon Thames 3726 37.25
## 204 0 E09000021 00AX Kingston upon Thames 3726 37.25
## 205 0 E09000021 00AX Kingston upon Thames 3726 37.25
## 206 0 E09000021 00AX Kingston upon Thames 3726 37.25
## imdavscore07 year population medianincome econinactive houseprice
## 201 13.1 2006 153700 22400 22.2 249999
## 202 13.1 2007 154500 22900 25.4 290000
## 203 13.1 2008 156000 24050 23.4 285000
## 204 13.1 2009 157300 25200 21.6 270000
## 205 13.1 2010 158600 25600 22.4 300000
## 206 13.1 2011 160400 24800 25.9 300000
## nGPregistration punder16 pall mincome log10price logitpall
## 201 3622 16.1 15.2 -0.330229493 5.397938 -1.719000
## 202 3721 16.8 16.1 -0.259822636 5.462398 -1.650806
## 203 3700 16.2 15.7 -0.097886866 5.454845 -1.680721
## 204 3576 16.1 15.8 0.064048904 5.431364 -1.673185
## 205 3632 15.0 14.9 0.120374389 5.477121 -1.742466
## 206 3950 13.6 13.8 0.007723419 5.477121 -1.832002
## logitinactive
## 201 -1.254049
## 202 -1.077391
## 203 -1.185861
## 204 -1.289131
## 205 -1.242506
## 206 -1.051173
<- ggplot(data=childpoverty, aes(x=year, y=pall, color=Areaname)) +
pall geom_line() +
theme(legend.position = "none") +
labs(title="Percentage of children living in poverty", y = "Percentage") +
scale_x_continuous(name="Year", breaks=2006:2015)
pall
ggsave(filename=paste0(figurepath, "tsplot_poverty.png"))
## Saving 7 x 5 in image
# All done
<- proc.time()[3]
end.time <- (end.time-start.time)/60
comp.time# comp.time<-fancy.time(comp.time)
print(comp.time)
## elapsed
## 1.028433