# Please load the packages.
library(ggplot2)
library(ipsRdbs)
# Start with a clean workspace
rm(list=ls())
summary(rice)
## Yield Days
## Min. :2508 Min. :16.0
## 1st Qu.:3092 1st Qu.:23.5
## Median :3318 Median :31.0
## Mean :3283 Mean :31.0
## 3rd Qu.:3549 3rd Qu.:38.5
## Max. :3883 Max. :46.0
plot(rice$Days, rice$Yield, pch="*", xlab="Days", ylab="Yield")
$daymin31 <- rice$Days-31
rice<- lm(Yield ~ daymin31, data=rice)
rice.lm summary(rice.lm)
##
## Call:
## lm(formula = Yield ~ daymin31, data = rice)
##
## Residuals:
## Min 1Q Median 3Q Max
## -691.07 -217.65 45.85 271.77 612.14
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3283.12 103.95 31.582 2.05e-14 ***
## daymin31 12.26 11.28 1.088 0.295
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 415.8 on 14 degrees of freedom
## Multiple R-squared: 0.07791, Adjusted R-squared: 0.01205
## F-statistic: 1.183 on 1 and 14 DF, p-value: 0.2951
# Check the diagnostics
plot(rice.lm$fit, rice.lm$res, xlab="Fitted values", ylab = "Residuals")
abline(h=0)
# Should be a random scatter
# Needs a quadratic term
qqnorm(rice.lm$res)
qqline(rice.lm$res)
<- lm(Yield ~ daymin31 + I(daymin31^2) ,data=rice)
rice.lm2 <- par(no.readonly = TRUE)
old.par par(mfrow=c(1, 2))
plot(rice.lm2$fit, rice.lm2$res, xlab="Fitted values", ylab = "Residuals")
abline(h=0)
# Should be a random scatter
# Much better plot!
qqnorm(rice.lm2$res)
qqline(rice.lm2$res)
summary(rice.lm2)
##
## Call:
## lm(formula = Yield ~ daymin31 + I(daymin31^2), data = rice)
##
## Residuals:
## Min 1Q Median 3Q Max
## -303.96 -118.11 13.86 115.67 319.06
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3668.6682 76.7086 47.826 5.33e-16 ***
## daymin31 12.2632 5.5286 2.218 0.045 *
## I(daymin31^2) -4.5358 0.6744 -6.726 1.41e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 203.9 on 13 degrees of freedom
## Multiple R-squared: 0.7942, Adjusted R-squared: 0.7625
## F-statistic: 25.08 on 2 and 13 DF, p-value: 3.452e-05
par(old.par) # par(mfrow=c(1,1))
plot(rice$Days, rice$Yield, xlab="Days", ylab="Yield")
lines(rice$Days, rice.lm2$fit, lty=1, col=3)
<- lm(Yield ~ daymin31 + I(daymin31^2)+I(daymin31^3) , data=rice)
rice.lm3 #check the diagnostics
summary(rice.lm3) # Will print the summary of the fitted model
##
## Call:
## lm(formula = Yield ~ daymin31 + I(daymin31^2) + I(daymin31^3),
## data = rice)
##
## Residuals:
## Min 1Q Median 3Q Max
## -281.97 -113.21 -6.11 97.75 330.92
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3668.66815 79.32668 46.248 6.82e-15 ***
## daymin31 17.52493 14.49451 1.209 0.25
## I(daymin31^2) -4.53580 0.69743 -6.504 2.92e-05 ***
## I(daymin31^3) -0.03457 0.08751 -0.395 0.70
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 210.8 on 12 degrees of freedom
## Multiple R-squared: 0.7968, Adjusted R-squared: 0.746
## F-statistic: 15.68 on 3 and 12 DF, p-value: 0.0001876
#### Predict at a new value of Days=31.1465
# Create a new data set called new
<- data.frame(daymin31=32.1465-31)
new
<- predict(rice.lm2, newdata=new, se.fit=TRUE)
a # Confidence interval for the mean of rice yield at day=31.1465
<- predict(rice.lm2, newdata=new, interval="confidence")
a
a## fit lwr upr
## 1 3676.766 3511.904 3841.628
<- predict(rice.lm2, newdata=new, interval="prediction")
b
b## fit lwr upr
## 1 3676.766 3206.461 4147.071
summary(cheese)
## Taste AceticAcid H2S LacticAcid
## Min. : 0.70 Min. : 87.97 Min. : 20.01 Min. :0.860
## 1st Qu.:13.55 1st Qu.:188.20 1st Qu.: 53.74 1st Qu.:1.250
## Median :20.95 Median :227.03 Median : 207.46 Median :1.450
## Mean :24.53 Mean :284.18 Mean : 2702.98 Mean :1.442
## 3rd Qu.:36.70 3rd Qu.:358.84 3rd Qu.: 1950.35 3rd Qu.:1.667
## Max. :57.20 Max. :637.78 Max. :26876.31 Max. :2.010
## logH2S
## Min. : 2.996
## 1st Qu.: 3.977
## Median : 5.329
## Mean : 5.942
## 3rd Qu.: 7.575
## Max. :10.199
pairs(cheese)
::ggpairs(data=cheese)
GGally## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
<- lm(Taste ~ AceticAcid + LacticAcid + logH2S, data=cheese, subset=2:30)
cheese.lm # Check the diagnostics
plot(cheese.lm$fit, cheese.lm$res, xlab="Fitted values", ylab = "Residuals")
abline(h=0)
# Should be a random scatter
qqnorm(cheese.lm$res)
qqline(cheese.lm$res)
summary(cheese.lm)
##
## Call:
## lm(formula = Taste ~ AceticAcid + LacticAcid + logH2S, data = cheese,
## subset = 2:30)
##
## Residuals:
## Min 1Q Median 3Q Max
## -18.0718 -5.9432 0.3657 5.3919 25.0876
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -31.41789 9.95702 -3.155 0.00414 **
## AceticAcid 0.00478 0.01484 0.322 0.74997
## LacticAcid 21.70560 8.69079 2.498 0.01945 *
## logH2S 3.84994 1.21284 3.174 0.00396 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 10.06 on 25 degrees of freedom
## Multiple R-squared: 0.6631, Adjusted R-squared: 0.6227
## F-statistic: 16.4 on 3 and 25 DF, p-value: 4.222e-06
<- lm(Taste ~ LacticAcid + logH2S, data=cheese)
cheese.lm2 # Check the diagnostics
plot(cheese.lm2$fit, cheese.lm2$res, xlab="Fitted values", ylab = "Residuals")
abline(h=0)
qqnorm(cheese.lm2$res)
qqline(cheese.lm2$res)
summary(cheese.lm2)
##
## Call:
## lm(formula = Taste ~ LacticAcid + logH2S, data = cheese)
##
## Residuals:
## Min 1Q Median 3Q Max
## -17.343 -6.529 -1.164 4.844 25.618
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -27.592 8.982 -3.072 0.00481 **
## LacticAcid 19.888 7.959 2.499 0.01885 *
## logH2S 3.946 1.136 3.475 0.00174 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 9.942 on 27 degrees of freedom
## Multiple R-squared: 0.6517, Adjusted R-squared: 0.6259
## F-statistic: 25.26 on 2 and 27 DF, p-value: 6.552e-07
# How can we predict?
<- data.frame(AceticAcid = 300, LacticAcid = 1.5, logH2S=4)
newcheese <- predict(cheese.lm2, newdata=newcheese, se.fit=TRUE)
cheese.pred
cheese.pred## $fit
## 1
## 18.02409
##
## $se.fit
## [1] 3.111924
##
## $df
## [1] 27
##
## $residual.scale
## [1] 9.942372
# Obtain confidence interval
$fit + c(-1, 1) * qt(0.975, df=27) * cheese.pred$se.fit
cheese.pred## [1] 11.63895 24.40923
# Using R to predict
<- predict(cheese.lm2, newdata=newcheese, interval="confidence")
cheese.pred.conf.limits
cheese.pred.conf.limits## fit lwr upr
## 1 18.02409 11.63895 24.40923
# How to find prediction interval
<- predict(cheese.lm2, newdata=newcheese, interval="prediction")
cheese.pred.pred.limits
cheese.pred.pred.limits## fit lwr upr
## 1 18.02409 -3.351891 39.40007
pairs(puffin)
summary(puffin)
## Nesting_Frequency Grass_Cover Mean_Soil_Depth Slope_Angle
## Min. : 0.000 Min. : 0.00 Min. :24.30 Min. : 2.00
## 1st Qu.: 0.000 1st Qu.:40.00 1st Qu.:32.75 1st Qu.: 7.25
## Median : 7.500 Median :60.00 Median :37.40 Median :10.00
## Mean : 7.684 Mean :56.45 Mean :37.72 Mean :15.00
## 3rd Qu.:12.750 3rd Qu.:80.00 3rd Qu.:42.83 3rd Qu.:21.25
## Max. :25.000 Max. :95.00 Max. :51.40 Max. :38.00
## Distance_from_Edge
## Min. : 3.00
## 1st Qu.:18.00
## Median :30.00
## Mean :30.39
## 3rd Qu.:44.25
## Max. :60.00
$sqrtfreq <- sqrt(puffin$Nesting_Frequency)
puffin
<- lm(sqrtfreq~ Grass_Cover + Mean_Soil_Depth + Slope_Angle
puff.sqlm + Distance_from_Edge, data=puffin)
#par(mfrow=c(2,1))
qqnorm(puff.sqlm$res,main="Normal probability plot")
qqline(puff.sqlm$res)
plot(puff.sqlm$fit, puff.sqlm$res,xlab="Fitted values",ylab="Residuals", main="Anscombe plot")
abline(h=0)
summary(puff.sqlm)
##
## Call:
## lm(formula = sqrtfreq ~ Grass_Cover + Mean_Soil_Depth + Slope_Angle +
## Distance_from_Edge, data = puffin)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.17058 -0.28555 0.07289 0.43606 1.08192
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.4176153 0.7479664 5.906 1.27e-06 ***
## Grass_Cover 0.0004179 0.0045698 0.091 0.9277
## Mean_Soil_Depth 0.0486374 0.0181385 2.681 0.0114 *
## Slope_Angle -0.0318777 0.0182695 -1.745 0.0903 .
## Distance_from_Edge -0.1189727 0.0134970 -8.815 3.45e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6217 on 33 degrees of freedom
## Multiple R-squared: 0.8853, Adjusted R-squared: 0.8714
## F-statistic: 63.67 on 4 and 33 DF, p-value: 4.753e-15
#####################################
# F test for two betas at the same time:
######################################
<- lm(sqrtfreq~ Mean_Soil_Depth + Distance_from_Edge, data=puffin)
puff.sqlm2
anova(puff.sqlm)
## Analysis of Variance Table
##
## Response: sqrtfreq
## Df Sum Sq Mean Sq F value Pr(>F)
## Grass_Cover 1 7.307 7.307 18.904 0.0001241 ***
## Mean_Soil_Depth 1 0.820 0.820 2.122 0.1546449
## Slope_Angle 1 60.278 60.278 155.945 4.725e-14 ***
## Distance_from_Edge 1 30.034 30.034 77.700 3.453e-10 ***
## Residuals 33 12.756 0.387
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(puff.sqlm2)
## Analysis of Variance Table
##
## Response: sqrtfreq
## Df Sum Sq Mean Sq F value Pr(>F)
## Mean_Soil_Depth 1 0.513 0.513 1.2596 0.2694
## Distance_from_Edge 1 96.437 96.437 236.9547 <2e-16 ***
## Residuals 35 14.245 0.407
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
<- 1/2*(14.245-12.756)/0.387
fval
# 1.924
qf(0.95, 2, 33)
## [1] 3.284918
# 3.28
1-pf(fval, 2, 33)
## [1] 0.1620835
# 0.162
anova(puff.sqlm2, puff.sqlm)
## Analysis of Variance Table
##
## Model 1: sqrtfreq ~ Mean_Soil_Depth + Distance_from_Edge
## Model 2: sqrtfreq ~ Grass_Cover + Mean_Soil_Depth + Slope_Angle + Distance_from_Edge
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 35 14.245
## 2 33 12.756 2 1.4889 1.926 0.1618
summary(emissions)
Make Odometer Capacity CS505
Length:54 Min. : 7285 Min. :1.300 Min. : 3.675
Class :character 1st Qu.: 44208 1st Qu.:1.600 1st Qu.: 8.364
Mode :character Median : 80164 Median :2.000 Median :10.598
Mean : 84444 Mean :2.346 Mean :10.949
3rd Qu.:117248 3rd Qu.:2.900 3rd Qu.:13.125
Max. :232574 Max. :5.000 Max. :21.475
T867 H505 ADR27 ADR37
Min. : 1.158 Min. : 1.340 Min. :0.480 Min. :0.340
1st Qu.: 4.188 1st Qu.: 4.884 1st Qu.:1.053 1st Qu.:1.005
Median : 6.769 Median : 8.795 Median :1.475 Median :1.423
Mean : 7.217 Mean : 9.376 Mean :1.525 Mean :1.478
3rd Qu.:10.001 3rd Qu.:12.906 3rd Qu.:1.981 3rd Qu.:2.029
Max. :15.982 Max. :21.465 Max. :2.851 Max. :2.862
logCS505 logT867 logH505 logADR27
Min. :1.302 Min. :0.1466 Min. :0.2927 Min. :-0.73397
1st Qu.:2.124 1st Qu.:1.4322 1st Qu.:1.5850 1st Qu.: 0.05191
Median :2.361 Median :1.9104 Median :2.1742 Median : 0.38857
Mean :2.318 Mean :1.8185 Mean :2.0440 Mean : 0.33895
3rd Qu.:2.575 3rd Qu.:2.3026 3rd Qu.:2.5577 3rd Qu.: 0.68382
Max. :3.067 Max. :2.7715 Max. :3.0664 Max. : 1.04765
logADR37
Min. :-1.078810
1st Qu.: 0.004947
Median : 0.352585
Mean : 0.290615
3rd Qu.: 0.707568
Max. : 1.051611
<- emissions[, c(8, 4:7)]
rawdata pairs(rawdata)
# Fit the model on the raw scale
<- lm(ADR37 ~ ADR27 + CS505 + T867 + H505, data=rawdata)
raw.lm <- par(no.readonly = TRUE)
old.par par(mfrow=c(2,1))
plot(raw.lm$fit, raw.lm$res,xlab="Fitted values",ylab="Residuals", main="Anscombe plot")
abline(h=0)
qqnorm(raw.lm$res,main="Normal probability plot")
qqline(raw.lm$res)
# summary(raw.lm)
<- log(rawdata)
logdata # This only logs the values but not the column names!
# We can use the following command to change the column names or you can use
# fix(logdata) to do it.
dimnames(logdata)[[2]] <- c("logADR37", "logCS505", "logT867", "logH505", "logADR27")
pairs(logdata)
<- lm(logADR37 ~ logADR27 + logCS505 + logT867 + logH505, data=logdata)
log.lm plot(log.lm$fit, log.lm$res,xlab="Fitted values",ylab="Residuals", main="Anscombe plot")
abline(h=0)
qqnorm(log.lm$res,main="Normal probability plot")
qqline(log.lm$res)
summary(log.lm)
Call: lm(formula = logADR37 ~ logADR27 + logCS505 + logT867 + logH505, data = logdata)
Residuals: Min 1Q Median 3Q Max -0.140390 -0.038389 -0.003712 0.040204 0.153275
Coefficients: Estimate Std. Error t value Pr(>|t|)
(Intercept) -0.12997 0.43073 -0.302 0.764119
logADR27 0.94173 0.24140 3.901 0.000292 logCS505 -0.16790
0.16086 -1.044 0.301712
logT867 0.15736 0.08986 1.751 0.086177 .
logH505 0.09997 0.01857 5.385 2.04e-06 — Signif. codes: 0
‘’ 0.001 ’’ 0.01 ’’ 0.05 ‘.’ 0.1 ’ ’ 1
Residual standard error: 0.06038 on 49 degrees of freedom Multiple R-squared: 0.9852, Adjusted R-squared: 0.984 F-statistic: 815.9 on 4 and 49 DF, p-value: < 2.2e-16
<- lm(logADR37 ~ logADR27 + logT867 + logH505, data=logdata)
log.lm2 summary(log.lm2)
Call: lm(formula = logADR37 ~ logADR27 + logT867 + logH505, data = logdata)
Residuals: Min 1Q Median 3Q Max -0.157932 -0.039221 -0.004817 0.038672 0.157626
Coefficients: Estimate Std. Error t value Pr(>|t|)
(Intercept) -0.57484 0.06231 -9.226 2.25e-12 logADR27
0.69588 0.05289 13.157 < 2e-16 logT867 0.24471 0.03275
7.472 1.10e-09 logH505 0.09030 0.01610 5.608 8.85e-07
— Signif. codes: 0 ‘’ 0.001 ’’ 0.01
’’ 0.05 ‘.’ 0.1 ’ ’ 1
Residual standard error: 0.06043 on 50 degrees of freedom Multiple R-squared: 0.9849, Adjusted R-squared: 0.984 F-statistic: 1086 on 3 and 50 DF, p-value: < 2.2e-16
plot(log.lm2$fit, log.lm2$res,xlab="Fitted values",ylab="Residuals", main="Anscombe plot")
abline(h=0)
qqnorm(log.lm2$res,main="Normal probability plot")
qqline(log.lm2$res)
par(old.par)
#####################################
# Multicollinearity Analysis
######################################
<- lm(logADR27 ~ logT867 + logCS505 + logH505, data=logdata)
mod.adr27 summary(mod.adr27) # Multiple R^2 = 0.9936,
Call: lm(formula = logADR27 ~ logT867 + logCS505 + logH505, data = logdata)
Residuals: Min 1Q Median 3Q Max -0.060363 -0.020065 -0.005929 0.015471 0.153280
Coefficients: Estimate Std. Error t value Pr(>|t|)
(Intercept) -1.77188 0.02975 -59.56 < 2e-16 logT867
0.36474 0.01052 34.66 < 2e-16 logCS505 0.65020 0.02063
31.52 < 2e-16 * logH505 -0.02901 0.01007 -2.88 0.00584
— Signif. codes: 0 ‘’ 0.001 ’’ 0.01
’’ 0.05 ‘.’ 0.1 ’ ’ 1
Residual standard error: 0.03537 on 50 degrees of freedom Multiple R-squared: 0.9936, Adjusted R-squared: 0.9932 F-statistic: 2589 on 3 and 50 DF, p-value: < 2.2e-16
<- lm(logT867 ~ logADR27 + logH505 + logCS505, data=logdata)
mod.t867 summary(mod.t867) # Multiple R^2 = 0.977,
Call: lm(formula = logT867 ~ logADR27 + logH505 + logCS505, data = logdata)
Residuals: Min 1Q Median 3Q Max -0.46512 -0.04505 0.01989 0.05619 0.16509
Coefficients: Estimate Std. Error t value Pr(>|t|)
(Intercept) 4.64307 0.16840 27.572 <2e-16 logADR27
2.63215 0.07594 34.663 <2e-16 logH505 0.07192 0.02739
2.626 0.0114 *
logCS505 -1.66719 0.09218 -18.086 <2e-16 *** — Signif. codes: 0
‘’ 0.001 ’’ 0.01 ’’ 0.05 ‘.’ 0.1 ’ ’ 1
Residual standard error: 0.09503 on 50 degrees of freedom Multiple R-squared: 0.977, Adjusted R-squared: 0.9757 F-statistic: 709.1 on 3 and 50 DF, p-value: < 2.2e-16
<- lm(logCS505 ~ logADR27 + logH505 + logT867, data=logdata)
mod.cs505 summary(mod.cs505) # Multiple R^2 = 0.9837,
Call: lm(formula = logCS505 ~ logADR27 + logH505 + logT867, data = logdata)
Residuals: Min 1Q Median 3Q Max -0.197444 -0.025126 0.008913 0.028843 0.104478
Coefficients: Estimate Std. Error t value Pr(>|t|)
(Intercept) 2.64960 0.05473 48.411 < 2e-16 logADR27
1.46430 0.04646 31.519 < 2e-16 logH505 0.05760 0.01415
4.072 0.000166 logT867 -0.52029 0.02877 -18.086 < 2e-16
— Signif. codes: 0 ‘’ 0.001 ’’ 0.01
’’ 0.05 ‘.’ 0.1 ’ ’ 1
Residual standard error: 0.05309 on 50 degrees of freedom Multiple R-squared: 0.9837, Adjusted R-squared: 0.9828 F-statistic: 1008 on 3 and 50 DF, p-value: < 2.2e-16
<- lm(logH505 ~ logADR27 + logCS505 + logT867, data=logdata)
mod.h505 summary(mod.h505) # Multiple R^2 = 0.5784,
Call: lm(formula = logH505 ~ logADR27 + logCS505 + logT867, data = logdata)
Residuals: Min 1Q Median 3Q Max -1.50570 -0.11784 0.07984 0.24783 0.85079
Coefficients: Estimate Std. Error t value Pr(>|t|)
(Intercept) -9.3778 3.0009 -3.125 0.002959 ** logADR27 -4.9041 1.7029
-2.880 0.005844 ** logCS505 4.3236 1.0618 4.072 0.000166 ** logT867
1.6848 0.6417 2.626 0.011444
— Signif. codes: 0 ‘’ 0.001 ’’ 0.01 ’’ 0.05
‘.’ 0.1 ’ ’ 1
Residual standard error: 0.4599 on 50 degrees of freedom Multiple R-squared: 0.5784, Adjusted R-squared: 0.5531 F-statistic: 22.87 on 3 and 50 DF, p-value: 1.849e-09
# Variance inflation factors
<- c(0.9936, 0.977, 0.9837, 0.5784)
vifs <- 1/(1-vifs)
vifs #Condition numbers
<- logdata
X # X is a copy of logdata
1] <- 1
X[,# the first column of X is 1
# this is for the intercept
<- as.matrix(X)
X # Coerces X to be a matrix
<- t(X) %*% X # Gives X^T X
xtx <- eigen(xtx)$values
eigenvalues <- max(eigenvalues)/min(eigenvalues)
kappa <- sqrt(kappa)
kappa # kappa = 244 is much LARGER than 30!
### Validation statistic
# Fit the log.lm2 model with the first 45 observations
# use the fitted model to predict the remaining 9 observations
# Calculate the mean square error validation statistic
<- lm(logADR37 ~ logADR27 + logT867 + logH505, data=logdata, subset=1:45)
log.lmsub # Now predict all 54 observations using the fitted model
<- predict(log.lmsub, logdata, se.fit=TRUE)
mod.pred $fit # provides all the 54 predicted values mod.pred
1 2 3 4 5 6
0.869733716 0.751945766 0.464377364 0.065106368 -0.750908661 0.676749888 7 8 9 10 11 12 0.863161161 0.533123773 0.724024242 0.101489732 0.758165626 -0.009235144 13 14 15 16 17 18 0.262467812 0.499359749 0.896844370 -0.512513262 0.860758626 0.288778088 19 20 21 22 23 24 0.638085289 0.691680440 0.047967101 -0.125585969 0.542661299 -0.298423547 25 26 27 28 29 30 -0.607263492 0.529236907 0.296110240 0.476668407 -0.343488224 0.286358929 31 32 33 34 35 36 0.732443474 0.740078857 -0.002182982 0.421013296 -0.201423341 0.002081194 37 38 39 40 41 42 0.782644479 -0.305494632 -0.978948479 0.130633957 0.245486987 0.071403922 43 44 45 46 47 48 0.781436122 0.341921707 0.195907253 0.425131377 -0.094842350 0.754611045 49 50 51 52 53 54 0.444664924 0.353051544 0.805738645 -0.303182437 -0.356706826 1.052803842
$pred <- mod.pred$fit
logdata# Get only last 9
<- logdata[46:54, ]
a <- a$logADR37 - a$pred
validation.residuals <- mean(validation.residuals^2)
validation.stat validation.stat
[1] 0.005814136