Post on 16-Oct-2020
Project_3.Rvictoradaji
Sun Mar 10 01:30:54 2019
#READ FILEoptions(max.print=999999)rm(list=ls())df<-read.table("crabs.data", header = TRUE)
#Swap Columns And Rearrange Datanew_df<-df[ c("postsz", "presz","inc","year","lf")]
#Plot PostMolt vs Pre-Molt SizeA=new_df$postszB=new_df$preszplot(A,B)
40 60 80 100 120 140 160
4060
8010
014
0
A
B
scatter.smooth(x=B,y=A,xlab = "Post-Molting Size",ylab = "Pre-Molting Size",main = "Crab Molting Size")
1
40 60 80 100 120 140 160
4060
8010
014
0
Crab Molting Size
Post−Molting Size
Pre
−M
oltin
g S
ize
# Finding linear modelmodel <- lm(B ~ A, data=new_df)summary(model)
#### Call:## lm(formula = B ~ A, data = new_df)#### Residuals:## Min 1Q Median 3Q Max## -6.1557 -1.3052 0.0564 1.3174 14.6750#### Coefficients:## Estimate Std. Error t value Pr(>|t|)## (Intercept) -25.21370 1.00089 -25.19 <2e-16 ***## A 1.07316 0.00692 155.08 <2e-16 ***## ---## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1#### Residual standard error: 2.199 on 470 degrees of freedom## Multiple R-squared: 0.9808, Adjusted R-squared: 0.9808## F-statistic: 2.405e+04 on 1 and 470 DF, p-value: < 2.2e-16plot(model)
2
20 40 60 80 100 120 140
−5
05
1015
Fitted values
Res
idua
ls
lm(B ~ A)
Residuals vs Fitted
177
302178
−3 −2 −1 0 1 2 3
−2
02
46
8
Theoretical Quantiles
Sta
ndar
dize
d re
sidu
als
lm(B ~ A)
Normal Q−Q
177
178302
3
20 40 60 80 100 120 140
0.0
0.5
1.0
1.5
2.0
2.5
Fitted values
Sta
ndar
dize
d re
sidu
als
lm(B ~ A)
Scale−Location177
178 302
0.00 0.02 0.04 0.06 0.08 0.10
−4
−2
02
46
8
Leverage
Sta
ndar
dize
d re
sidu
als
lm(B ~ A)
Cook's distance1
0.5
0.5
1
Residuals vs Leverage
177
178302
# Plot of PreMolt vs PostMoltplot(B ~ A, data = new_df, xlab = "PostMolt", ylab = "PreMolt")abline(coef(model)[1:2])
# Line Equationcf <- round(coef(model), 2)
4
eq <- paste0("PreMolt = ", cf[1],ifelse(sign(cf[2])==1, " + ", " - "), abs(cf[2]), " PostMolt ")
mtext(eq, 3, line=-2)slope <- cor(B,A) * (sd(A)/sd(B))intercept <- mean(A)-(slope*mean(B))
# Breusch Pagan test functionlibrary(lmtest)
## Loading required package: zoo
#### Attaching package: 'zoo'
## The following objects are masked from 'package:base':#### as.Date, as.Date.numeric
40 60 80 100 120 140 160
4060
8010
014
0
PostMolt
Pre
Mol
t
PreMolt = −25.21 + 1.07 PostMolt
bptest(model)
#### studentized Breusch-Pagan test#### data: model## BP = 89.386, df = 1, p-value < 2.2e-16#Finding the correlation/pearsoon's valuecor(B, A)
## [1] 0.9903699str(summary(model))
## List of 11## $ call : language lm(formula = B ~ A, data = new_df)
5
## $ terms :Classes 'terms', 'formula' language B ~ A## .. ..- attr(*, "variables")= language list(B, A)## .. ..- attr(*, "factors")= int [1:2, 1] 0 1## .. .. ..- attr(*, "dimnames")=List of 2## .. .. .. ..$ : chr [1:2] "B" "A"## .. .. .. ..$ : chr "A"## .. ..- attr(*, "term.labels")= chr "A"## .. ..- attr(*, "order")= int 1## .. ..- attr(*, "intercept")= int 1## .. ..- attr(*, "response")= int 1## .. ..- attr(*, ".Environment")=<environment: R_GlobalEnv>## .. ..- attr(*, "predvars")= language list(B, A)## .. ..- attr(*, "dataClasses")= Named chr [1:2] "numeric" "numeric"## .. .. ..- attr(*, "names")= chr [1:2] "B" "A"## $ residuals : Named num [1:472] 1.7709 0.3685 -0.0852 -2.3705 2.4222 ...## ..- attr(*, "names")= chr [1:472] "1" "2" "3" "4" ...## $ coefficients : num [1:2, 1:4] -25.2137 1.07316 1.00089 0.00692 -25.1913 ...## ..- attr(*, "dimnames")=List of 2## .. ..$ : chr [1:2] "(Intercept)" "A"## .. ..$ : chr [1:4] "Estimate" "Std. Error" "t value" "Pr(>|t|)"## $ aliased : Named logi [1:2] FALSE FALSE## ..- attr(*, "names")= chr [1:2] "(Intercept)" "A"## $ sigma : num 2.2## $ df : int [1:3] 2 470 2## $ r.squared : num 0.981## $ adj.r.squared: num 0.981## $ fstatistic : Named num [1:3] 24051 1 470## ..- attr(*, "names")= chr [1:3] "value" "numdf" "dendf"## $ cov.unscaled : num [1:2, 1:2] 2.07e-01 -1.43e-03 -1.43e-03 9.91e-06## ..- attr(*, "dimnames")=List of 2## .. ..$ : chr [1:2] "(Intercept)" "A"## .. ..$ : chr [1:2] "(Intercept)" "A"## - attr(*, "class")= chr "summary.lm"summary(model)$r.squared
## [1] 0.9808326summary(model)$adj.r.squared
## [1] 0.9807918summary(model)$sigma^2
## [1] 4.834374#Analysing Residuals
## Descriptive Statisticslibrary(moments)C <- residuals(model)mean(C)
## [1] -2.280672e-17sd(C)
## [1] 2.196386
6
skewness(C)
## [1] 0.8454525kurtosis(C)
## [1] 8.378684summary(C)
## Min. 1st Qu. Median Mean 3rd Qu. Max.## -6.15570 -1.30517 0.05639 0.00000 1.31741 14.67500qqnorm(C)
−3 −2 −1 0 1 2 3
−5
05
1015
Normal Q−Q Plot
Theoretical Quantiles
Sam
ple
Qua
ntile
s
hist(residuals(model))
7
Histogram of residuals(model)
residuals(model)
Fre
quen
cy
−5 0 5 10 15
050
100
150
boxplot(residuals(model))
−5
05
1015
shapiro.test(residuals(model))
#### Shapiro-Wilk normality test#### data: residuals(model)## W = 0.94724, p-value = 6.356e-12# Residuals Distributionplot(A, C, ylab="Residuals", xlab="Post-Molting", main="Residual Variation")abline(0, 0)# the horizon
8
40 60 80 100 120 140 160
−5
05
1015
Residual Variation
Post−Molting
Res
idua
ls
######################################################WITHOUT OUTLIERS#Making a new data.frame with no outliers and plotset.seed(20)D1<-2*sd(C)E = new_df$Outs<-ifelse(abs(C)>D1, 1, 0)plot(C, col=new_df$Outs+1, pch=16,ylim=c(-10,10))
0 100 200 300 400
−10
−5
05
10
Index
C
9
DF2<-new_df[!new_df$Outs,]nrow(DF2)
## [1] 457#Plot PostMolt vs Pre-Molt Size of data without outliersA_1=DF2$postszB_1=DF2$preszplot(B_1,A_1)
60 80 100 120 140
8010
012
014
016
0
B_1
A_1
scatter.smooth(x=B_1,y=A_1,xlab = "Post-Molting Size",ylab = "Pre-Molting Size",main = "Crab Molting Size")
10
60 80 100 120 140
8010
012
014
016
0
Crab Molting Size
Post−Molting Size
Pre
−M
oltin
g S
ize
#Finding linear model of data without outliersmodel_1 <- lm(B_1 ~ A_1, data=DF2)summary(model_1)
#### Call:## lm(formula = B_1 ~ A_1, data = DF2)#### Residuals:## Min 1Q Median 3Q Max## -4.3648 -1.2449 0.1013 1.2760 4.4130#### Coefficients:## Estimate Std. Error t value Pr(>|t|)## (Intercept) -28.90660 0.94984 -30.43 <2e-16 ***## A_1 1.09861 0.00654 167.98 <2e-16 ***## ---## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1#### Residual standard error: 1.787 on 455 degrees of freedom## Multiple R-squared: 0.9841, Adjusted R-squared: 0.9841## F-statistic: 2.822e+04 on 1 and 455 DF, p-value: < 2.2e-16plot(model_1)
11
60 80 100 120 140
−4
−2
02
4
Fitted values
Res
idua
ls
lm(B_1 ~ A_1)
Residuals vs Fitted
231
218
443
−3 −2 −1 0 1 2 3
−2
−1
01
23
Theoretical Quantiles
Sta
ndar
dize
d re
sidu
als
lm(B_1 ~ A_1)
Normal Q−Q
231
218
443
12
60 80 100 120 140
0.0
0.5
1.0
1.5
Fitted values
Sta
ndar
dize
d re
sidu
als
lm(B_1 ~ A_1)
Scale−Location231 218443
abline(model_1)
0.00 0.01 0.02 0.03 0.04 0.05
−3
−2
−1
01
23
Leverage
Sta
ndar
dize
d re
sidu
als
lm(B_1 ~ A_1)
Cook's distance
Residuals vs Leverage
169
207
174
# Plot PreMolt vs PostMoltplot(B_1 ~ A_1, data = DF2, xlab = "PostMolt", ylab = "PreMolt")abline(coef(model_1)[1:2])
13
# Line Equation without Outlierscf <- round(coef(model_1), 2)eq <- paste0("PreMolt = ", cf[1],
ifelse(sign(cf[2])==1, " + ", " - "), abs(cf[2]), " PostMolt ")mtext(eq, 3, line=-2)
80 100 120 140 160
6080
100
120
140
PostMolt
Pre
Mol
t
PreMolt = −28.91 + 1.1 PostMolt
slope_1 <- cor(B_1,A_1) * (sd(A_1)/sd(B_1))intercept_1 <- mean(A_1)-(slope*mean(B_1))
# Breusch Pagan test function of data without outlierslibrary(lmtest)bptest(model_1)
#### studentized Breusch-Pagan test#### data: model_1## BP = 0.45663, df = 1, p-value = 0.4992#Finding the correlation/pearsoon's value of data without outlierscor(B_1, A_1)
## [1] 0.9920334str(summary(model_1))
## List of 11## $ call : language lm(formula = B_1 ~ A_1, data = DF2)## $ terms :Classes 'terms', 'formula' language B_1 ~ A_1## .. ..- attr(*, "variables")= language list(B_1, A_1)## .. ..- attr(*, "factors")= int [1:2, 1] 0 1## .. .. ..- attr(*, "dimnames")=List of 2## .. .. .. ..$ : chr [1:2] "B_1" "A_1"## .. .. .. ..$ : chr "A_1"
14
## .. ..- attr(*, "term.labels")= chr "A_1"## .. ..- attr(*, "order")= int 1## .. ..- attr(*, "intercept")= int 1## .. ..- attr(*, "response")= int 1## .. ..- attr(*, ".Environment")=<environment: R_GlobalEnv>## .. ..- attr(*, "predvars")= language list(B_1, A_1)## .. ..- attr(*, "dataClasses")= Named chr [1:2] "numeric" "numeric"## .. .. ..- attr(*, "names")= chr [1:2] "B_1" "A_1"## $ residuals : Named num [1:457] 2.214 0.672 0.165 -2.324 2.571 ...## ..- attr(*, "names")= chr [1:457] "1" "2" "3" "4" ...## $ coefficients : num [1:2, 1:4] -28.9066 1.09861 0.94984 0.00654 -30.43303 ...## ..- attr(*, "dimnames")=List of 2## .. ..$ : chr [1:2] "(Intercept)" "A_1"## .. ..$ : chr [1:4] "Estimate" "Std. Error" "t value" "Pr(>|t|)"## $ aliased : Named logi [1:2] FALSE FALSE## ..- attr(*, "names")= chr [1:2] "(Intercept)" "A_1"## $ sigma : num 1.79## $ df : int [1:3] 2 455 2## $ r.squared : num 0.984## $ adj.r.squared: num 0.984## $ fstatistic : Named num [1:3] 28216 1 455## ..- attr(*, "names")= chr [1:3] "value" "numdf" "dendf"## $ cov.unscaled : num [1:2, 1:2] 2.82e-01 -1.94e-03 -1.94e-03 1.34e-05## ..- attr(*, "dimnames")=List of 2## .. ..$ : chr [1:2] "(Intercept)" "A_1"## .. ..$ : chr [1:2] "(Intercept)" "A_1"## - attr(*, "class")= chr "summary.lm"summary(model_1)$r.squared
## [1] 0.9841304summary(model_1)$adj.r.squared
## [1] 0.9840955summary(model_1)$sigma^2
## [1] 3.194625#Analysing Residuals of data without outliers
## Descriptive Statisticslibrary(moments)C_1 <- residuals(model_1)mean(C_1)
## [1] -1.989604e-16sd(C_1)
## [1] 1.78539skewness(C_1)
## [1] -0.08156428kurtosis(C_1)
15
## [1] 2.591623summary(C_1)
## Min. 1st Qu. Median Mean 3rd Qu. Max.## -4.3648 -1.2449 0.1013 0.0000 1.2760 4.4130qqnorm(C_1)
−3 −2 −1 0 1 2 3
−4
−2
02
4
Normal Q−Q Plot
Theoretical Quantiles
Sam
ple
Qua
ntile
s
hist(residuals(model_1))
16
Histogram of residuals(model_1)
residuals(model_1)
Fre
quen
cy
−4 −2 0 2 4
020
4060
80
boxplot(residuals(model_1))
−4
−2
02
4
shapiro.test(residuals(model_1))
#### Shapiro-Wilk normality test#### data: residuals(model_1)## W = 0.99362, p-value = 0.05123# Residuals Distribution of data without outliersplot(A_1, C_1, ylab="Residuals", xlab="PRE-Molting", main="Residual Variation")abline(0, 0)# the horizon
17
80 100 120 140 160
−4
−2
02
4
Residual Variation
PRE−Molting
Res
idua
ls
plot(C_1, col=DF2$Outs+1,pch=16, ylim=c(-10,10))
0 100 200 300 400
−10
−5
05
10
Index
C_1
# Predict Post Molt Sizes
new <- read.table("crabpop.data", header = FALSE)P = predict(model, newdata = new$V1, interval="confidence")plot(P)
18
20 40 60 80 100 120 140
2040
6080
120
fit
lwr
19