Project 3 - victoradajimth332.files.wordpress.com€¦ · 20 40 60 80 100 120 140-5 0 5 10 15...

Project_3.Rvictoradaji

Sun Mar 10 01:30:54 2019

#READ FILEoptions(max.print=999999)rm(list=ls())df<-read.table("crabs.data", header = TRUE)

#Swap Columns And Rearrange Datanew_df<-df[ c("postsz", "presz","inc","year","lf")]

#Plot PostMolt vs Pre-Molt SizeA=new_df$postszB=new_df$preszplot(A,B)

40 60 80 100 120 140 160

4060

8010

014

0

A

B

scatter.smooth(x=B,y=A,xlab = "Post-Molting Size",ylab = "Pre-Molting Size",main = "Crab Molting Size")

1

40 60 80 100 120 140 160

4060

8010

014

0

Crab Molting Size

Post−Molting Size

Pre

−M

oltin

g S

ize

# Finding linear modelmodel <- lm(B ~ A, data=new_df)summary(model)

#### Call:## lm(formula = B ~ A, data = new_df)#### Residuals:## Min 1Q Median 3Q Max## -6.1557 -1.3052 0.0564 1.3174 14.6750#### Coefficients:## Estimate Std. Error t value Pr(>|t|)## (Intercept) -25.21370 1.00089 -25.19 <2e-16 ***## A 1.07316 0.00692 155.08 <2e-16 ***## ---## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1#### Residual standard error: 2.199 on 470 degrees of freedom## Multiple R-squared: 0.9808, Adjusted R-squared: 0.9808## F-statistic: 2.405e+04 on 1 and 470 DF, p-value: < 2.2e-16plot(model)

2

20 40 60 80 100 120 140

−5

05

1015

Fitted values

Res

idua

ls

lm(B ~ A)

Residuals vs Fitted

177

302178

−3 −2 −1 0 1 2 3

−2

02

46

8

Theoretical Quantiles

Sta

ndar

dize

d re

sidu

als

lm(B ~ A)

Normal Q−Q

177

178302

3

20 40 60 80 100 120 140

0.0

0.5

1.0

1.5

2.0

2.5

Fitted values

Sta

ndar

dize

d re

sidu

als

lm(B ~ A)

Scale−Location177

178 302

0.00 0.02 0.04 0.06 0.08 0.10

−4

−2

02

46

8

Leverage

Sta

ndar

dize

d re

sidu

als

lm(B ~ A)

Cook's distance1

0.5

0.5

1

Residuals vs Leverage

177

178302

# Plot of PreMolt vs PostMoltplot(B ~ A, data = new_df, xlab = "PostMolt", ylab = "PreMolt")abline(coef(model)[1:2])

# Line Equationcf <- round(coef(model), 2)

4

eq <- paste0("PreMolt = ", cf[1],ifelse(sign(cf[2])==1, " + ", " - "), abs(cf[2]), " PostMolt ")

mtext(eq, 3, line=-2)slope <- cor(B,A) * (sd(A)/sd(B))intercept <- mean(A)-(slope*mean(B))

# Breusch Pagan test functionlibrary(lmtest)

## Loading required package: zoo

#### Attaching package: 'zoo'

## The following objects are masked from 'package:base':#### as.Date, as.Date.numeric

40 60 80 100 120 140 160

4060

8010

014

0

PostMolt

Pre

Mol

t

PreMolt = −25.21 + 1.07 PostMolt

bptest(model)

#### studentized Breusch-Pagan test#### data: model## BP = 89.386, df = 1, p-value < 2.2e-16#Finding the correlation/pearsoon's valuecor(B, A)

## [1] 0.9903699str(summary(model))

## List of 11## $ call : language lm(formula = B ~ A, data = new_df)

5

## $ terms :Classes 'terms', 'formula' language B ~ A## .. ..- attr(*, "variables")= language list(B, A)## .. ..- attr(*, "factors")= int [1:2, 1] 0 1## .. .. ..- attr(*, "dimnames")=List of 2## .. .. .. ..$ : chr [1:2] "B" "A"## .. .. .. ..$ : chr "A"## .. ..- attr(*, "term.labels")= chr "A"## .. ..- attr(*, "order")= int 1## .. ..- attr(*, "intercept")= int 1## .. ..- attr(*, "response")= int 1## .. ..- attr(*, ".Environment")=<environment: R_GlobalEnv>## .. ..- attr(*, "predvars")= language list(B, A)## .. ..- attr(*, "dataClasses")= Named chr [1:2] "numeric" "numeric"## .. .. ..- attr(*, "names")= chr [1:2] "B" "A"## $ residuals : Named num [1:472] 1.7709 0.3685 -0.0852 -2.3705 2.4222 ...## ..- attr(*, "names")= chr [1:472] "1" "2" "3" "4" ...## $ coefficients : num [1:2, 1:4] -25.2137 1.07316 1.00089 0.00692 -25.1913 ...## ..- attr(*, "dimnames")=List of 2## .. ..$ : chr [1:2] "(Intercept)" "A"## .. ..$ : chr [1:4] "Estimate" "Std. Error" "t value" "Pr(>|t|)"## $ aliased : Named logi [1:2] FALSE FALSE## ..- attr(*, "names")= chr [1:2] "(Intercept)" "A"## $ sigma : num 2.2## $ df : int [1:3] 2 470 2## $ r.squared : num 0.981## $ adj.r.squared: num 0.981## $ fstatistic : Named num [1:3] 24051 1 470## ..- attr(*, "names")= chr [1:3] "value" "numdf" "dendf"## $ cov.unscaled : num [1:2, 1:2] 2.07e-01 -1.43e-03 -1.43e-03 9.91e-06## ..- attr(*, "dimnames")=List of 2## .. ..$ : chr [1:2] "(Intercept)" "A"## .. ..$ : chr [1:2] "(Intercept)" "A"## - attr(*, "class")= chr "summary.lm"summary(model)$r.squared

## [1] 0.9808326summary(model)$adj.r.squared

## [1] 0.9807918summary(model)$sigma^2

## [1] 4.834374#Analysing Residuals

## Descriptive Statisticslibrary(moments)C <- residuals(model)mean(C)

## [1] -2.280672e-17sd(C)

## [1] 2.196386

6

skewness(C)

## [1] 0.8454525kurtosis(C)

## [1] 8.378684summary(C)

## Min. 1st Qu. Median Mean 3rd Qu. Max.## -6.15570 -1.30517 0.05639 0.00000 1.31741 14.67500qqnorm(C)

−3 −2 −1 0 1 2 3

−5

05

1015

Normal Q−Q Plot


Sam

ple

Qua

ntile

s

hist(residuals(model))

7

Histogram of residuals(model)

residuals(model)

Fre

quen

cy

−5 0 5 10 15

050

100

150

boxplot(residuals(model))

−5

05

1015

shapiro.test(residuals(model))

#### Shapiro-Wilk normality test#### data: residuals(model)## W = 0.94724, p-value = 6.356e-12# Residuals Distributionplot(A, C, ylab="Residuals", xlab="Post-Molting", main="Residual Variation")abline(0, 0)# the horizon

8

40 60 80 100 120 140 160

−5

05

1015

Residual Variation

Post−Molting

Res

idua

ls

######################################################WITHOUT OUTLIERS#Making a new data.frame with no outliers and plotset.seed(20)D1<-2*sd(C)E = new_df$Outs<-ifelse(abs(C)>D1, 1, 0)plot(C, col=new_df$Outs+1, pch=16,ylim=c(-10,10))

0 100 200 300 400

−10

−5

05

10

Index

C

9

DF2<-new_df[!new_df$Outs,]nrow(DF2)

## [1] 457#Plot PostMolt vs Pre-Molt Size of data without outliersA_1=DF2$postszB_1=DF2$preszplot(B_1,A_1)

60 80 100 120 140

8010

012

014

016

0

B_1

A_1

scatter.smooth(x=B_1,y=A_1,xlab = "Post-Molting Size",ylab = "Pre-Molting Size",main = "Crab Molting Size")

10

60 80 100 120 140

8010

012

014

016

0

Crab Molting Size

Post−Molting Size

Pre

−M

oltin

g S

ize

#Finding linear model of data without outliersmodel_1 <- lm(B_1 ~ A_1, data=DF2)summary(model_1)

#### Call:## lm(formula = B_1 ~ A_1, data = DF2)#### Residuals:## Min 1Q Median 3Q Max## -4.3648 -1.2449 0.1013 1.2760 4.4130#### Coefficients:## Estimate Std. Error t value Pr(>|t|)## (Intercept) -28.90660 0.94984 -30.43 <2e-16 ***## A_1 1.09861 0.00654 167.98 <2e-16 ***## ---## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1#### Residual standard error: 1.787 on 455 degrees of freedom## Multiple R-squared: 0.9841, Adjusted R-squared: 0.9841## F-statistic: 2.822e+04 on 1 and 455 DF, p-value: < 2.2e-16plot(model_1)

11

60 80 100 120 140

−4

−2

02

4

Fitted values

Res

idua

ls

lm(B_1 ~ A_1)

Residuals vs Fitted

231

218

443

−3 −2 −1 0 1 2 3

−2

−1

01

23


Sta

ndar

dize

d re

sidu

als

lm(B_1 ~ A_1)

Normal Q−Q

231

218

443

12

60 80 100 120 140

0.0

0.5

1.0

1.5

Fitted values

Sta

ndar

dize

d re

sidu

als

lm(B_1 ~ A_1)

Scale−Location231 218443

abline(model_1)

0.00 0.01 0.02 0.03 0.04 0.05

−3

−2

−1

01

23

Leverage

Sta

ndar

dize

d re

sidu

als

lm(B_1 ~ A_1)

Cook's distance

Residuals vs Leverage

169

207

174

# Plot PreMolt vs PostMoltplot(B_1 ~ A_1, data = DF2, xlab = "PostMolt", ylab = "PreMolt")abline(coef(model_1)[1:2])

13

# Line Equation without Outlierscf <- round(coef(model_1), 2)eq <- paste0("PreMolt = ", cf[1],

ifelse(sign(cf[2])==1, " + ", " - "), abs(cf[2]), " PostMolt ")mtext(eq, 3, line=-2)

80 100 120 140 160

6080

100

120

140

PostMolt

Pre

Mol

t

PreMolt = −28.91 + 1.1 PostMolt

slope_1 <- cor(B_1,A_1) * (sd(A_1)/sd(B_1))intercept_1 <- mean(A_1)-(slope*mean(B_1))

# Breusch Pagan test function of data without outlierslibrary(lmtest)bptest(model_1)

#### studentized Breusch-Pagan test#### data: model_1## BP = 0.45663, df = 1, p-value = 0.4992#Finding the correlation/pearsoon's value of data without outlierscor(B_1, A_1)

## [1] 0.9920334str(summary(model_1))

## List of 11## $ call : language lm(formula = B_1 ~ A_1, data = DF2)## $ terms :Classes 'terms', 'formula' language B_1 ~ A_1## .. ..- attr(*, "variables")= language list(B_1, A_1)## .. ..- attr(*, "factors")= int [1:2, 1] 0 1## .. .. ..- attr(*, "dimnames")=List of 2## .. .. .. ..$ : chr [1:2] "B_1" "A_1"## .. .. .. ..$ : chr "A_1"

14

## .. ..- attr(*, "term.labels")= chr "A_1"## .. ..- attr(*, "order")= int 1## .. ..- attr(*, "intercept")= int 1## .. ..- attr(*, "response")= int 1## .. ..- attr(*, ".Environment")=<environment: R_GlobalEnv>## .. ..- attr(*, "predvars")= language list(B_1, A_1)## .. ..- attr(*, "dataClasses")= Named chr [1:2] "numeric" "numeric"## .. .. ..- attr(*, "names")= chr [1:2] "B_1" "A_1"## $ residuals : Named num [1:457] 2.214 0.672 0.165 -2.324 2.571 ...## ..- attr(*, "names")= chr [1:457] "1" "2" "3" "4" ...## $ coefficients : num [1:2, 1:4] -28.9066 1.09861 0.94984 0.00654 -30.43303 ...## ..- attr(*, "dimnames")=List of 2## .. ..$ : chr [1:2] "(Intercept)" "A_1"## .. ..$ : chr [1:4] "Estimate" "Std. Error" "t value" "Pr(>|t|)"## $ aliased : Named logi [1:2] FALSE FALSE## ..- attr(*, "names")= chr [1:2] "(Intercept)" "A_1"## $ sigma : num 1.79## $ df : int [1:3] 2 455 2## $ r.squared : num 0.984## $ adj.r.squared: num 0.984## $ fstatistic : Named num [1:3] 28216 1 455## ..- attr(*, "names")= chr [1:3] "value" "numdf" "dendf"## $ cov.unscaled : num [1:2, 1:2] 2.82e-01 -1.94e-03 -1.94e-03 1.34e-05## ..- attr(*, "dimnames")=List of 2## .. ..$ : chr [1:2] "(Intercept)" "A_1"## .. ..$ : chr [1:2] "(Intercept)" "A_1"## - attr(*, "class")= chr "summary.lm"summary(model_1)$r.squared

## [1] 0.9841304summary(model_1)$adj.r.squared

## [1] 0.9840955summary(model_1)$sigma^2

## [1] 3.194625#Analysing Residuals of data without outliers

## Descriptive Statisticslibrary(moments)C_1 <- residuals(model_1)mean(C_1)

## [1] -1.989604e-16sd(C_1)

## [1] 1.78539skewness(C_1)

## [1] -0.08156428kurtosis(C_1)

15

## [1] 2.591623summary(C_1)

## Min. 1st Qu. Median Mean 3rd Qu. Max.## -4.3648 -1.2449 0.1013 0.0000 1.2760 4.4130qqnorm(C_1)

−3 −2 −1 0 1 2 3

−4

−2

02

4

Normal Q−Q Plot


Sam

ple

Qua

ntile

s

hist(residuals(model_1))

16

Histogram of residuals(model_1)

residuals(model_1)

Fre

quen

cy

−4 −2 0 2 4

020

4060

80

boxplot(residuals(model_1))

−4

−2

02

4

shapiro.test(residuals(model_1))

#### Shapiro-Wilk normality test#### data: residuals(model_1)## W = 0.99362, p-value = 0.05123# Residuals Distribution of data without outliersplot(A_1, C_1, ylab="Residuals", xlab="PRE-Molting", main="Residual Variation")abline(0, 0)# the horizon

17

80 100 120 140 160

−4

−2

02

4

Residual Variation

PRE−Molting

Res

idua

ls

plot(C_1, col=DF2$Outs+1,pch=16, ylim=c(-10,10))

0 100 200 300 400

−10

−5

05

10

Index

C_1

# Predict Post Molt Sizes

new <- read.table("crabpop.data", header = FALSE)P = predict(model, newdata = new$V1, interval="confidence")plot(P)

18

20 40 60 80 100 120 140

2040

6080

120

fit

lwr

19

Project 3 - victoradajimth332.files.wordpress.com€¦ · 20 40 60 80 100 120 140-5 0 5 10 15...

Documents

Transcript of Project 3 - victoradajimth332.files.wordpress.com€¦ · 20 40 60 80 100 120 140-5 0 5 10 15...