Project 3 - victoradajimth332.files.wordpress.com€¦ · 20 40 60 80 100 120 140-5 0 5 10 15...

Project_3.Rvictoradaji

Sun Mar 10 01:30:54 2019

#READ FILEoptions(max.print=999999)rm(list=ls())df<-read.table("crabs.data", header = TRUE)

#Swap Columns And Rearrange Datanew_df<-df[ c("postsz", "presz","inc","year","lf")]

#Plot PostMolt vs Pre-Molt SizeA=new_df$postszB=new_df$preszplot(A,B)

40 60 80 100 120 140 160

scatter.smooth(x=B,y=A,xlab = "Post-Molting Size",ylab = "Pre-Molting Size",main = "Crab Molting Size")

40 60 80 100 120 140 160

Crab Molting Size

Post−Molting Size

# Finding linear modelmodel <- lm(B ~ A, data=new_df)summary(model)

#### Call:## lm(formula = B ~ A, data = new_df)#### Residuals:## Min 1Q Median 3Q Max## -6.1557 -1.3052 0.0564 1.3174 14.6750#### Coefficients:## Estimate Std. Error t value Pr(>|t|)## (Intercept) -25.21370 1.00089 -25.19 <2e-16 ***## A 1.07316 0.00692 155.08 <2e-16 ***## ---## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1#### Residual standard error: 2.199 on 470 degrees of freedom## Multiple R-squared: 0.9808, Adjusted R-squared: 0.9808## F-statistic: 2.405e+04 on 1 and 470 DF, p-value: < 2.2e-16plot(model)

20 40 60 80 100 120 140

Fitted values

lm(B ~ A)

Residuals vs Fitted

302178

−3 −2 −1 0 1 2 3

Theoretical Quantiles

lm(B ~ A)

Normal Q−Q

178302

20 40 60 80 100 120 140

Fitted values

lm(B ~ A)

Scale−Location177

178 302

0.00 0.02 0.04 0.06 0.08 0.10

Leverage

lm(B ~ A)

Cook's distance1

Residuals vs Leverage

178302

# Plot of PreMolt vs PostMoltplot(B ~ A, data = new_df, xlab = "PostMolt", ylab = "PreMolt")abline(coef(model)[1:2])

# Line Equationcf <- round(coef(model), 2)

eq <- paste0("PreMolt = ", cf[1],ifelse(sign(cf[2])==1, " + ", " - "), abs(cf[2]), " PostMolt ")

mtext(eq, 3, line=-2)slope <- cor(B,A) * (sd(A)/sd(B))intercept <- mean(A)-(slope*mean(B))

# Breusch Pagan test functionlibrary(lmtest)

## Loading required package: zoo

#### Attaching package: 'zoo'

## The following objects are masked from 'package:base':#### as.Date, as.Date.numeric

40 60 80 100 120 140 160

PostMolt

PreMolt = −25.21 + 1.07 PostMolt

bptest(model)

#### studentized Breusch-Pagan test#### data: model## BP = 89.386, df = 1, p-value < 2.2e-16#Finding the correlation/pearsoon's valuecor(B, A)

## [1] 0.9903699str(summary(model))

## List of 11## $ call : language lm(formula = B ~ A, data = new_df)

## $ terms :Classes 'terms', 'formula' language B ~ A## .. ..- attr(*, "variables")= language list(B, A)## .. ..- attr(*, "factors")= int [1:2, 1] 0 1## .. .. ..- attr(*, "dimnames")=List of 2## .. .. .. ..$ : chr [1:2] "B" "A"## .. .. .. ..$ : chr "A"## .. ..- attr(*, "term.labels")= chr "A"## .. ..- attr(*, "order")= int 1## .. ..- attr(*, "intercept")= int 1## .. ..- attr(*, "response")= int 1## .. ..- attr(*, ".Environment")=<environment: R_GlobalEnv>## .. ..- attr(*, "predvars")= language list(B, A)## .. ..- attr(*, "dataClasses")= Named chr [1:2] "numeric" "numeric"## .. .. ..- attr(*, "names")= chr [1:2] "B" "A"## $ residuals : Named num [1:472] 1.7709 0.3685 -0.0852 -2.3705 2.4222 ...## ..- attr(*, "names")= chr [1:472] "1" "2" "3" "4" ...## $ coefficients : num [1:2, 1:4] -25.2137 1.07316 1.00089 0.00692 -25.1913 ...## ..- attr(*, "dimnames")=List of 2## .. ..$ : chr [1:2] "(Intercept)" "A"## .. ..$ : chr [1:4] "Estimate" "Std. Error" "t value" "Pr(>|t|)"## $ aliased : Named logi [1:2] FALSE FALSE## ..- attr(*, "names")= chr [1:2] "(Intercept)" "A"## $ sigma : num 2.2## $ df : int [1:3] 2 470 2## $ r.squared : num 0.981## $ adj.r.squared: num 0.981## $ fstatistic : Named num [1:3] 24051 1 470## ..- attr(*, "names")= chr [1:3] "value" "numdf" "dendf"## $ cov.unscaled : num [1:2, 1:2] 2.07e-01 -1.43e-03 -1.43e-03 9.91e-06## ..- attr(*, "dimnames")=List of 2## .. ..$ : chr [1:2] "(Intercept)" "A"## .. ..$ : chr [1:2] "(Intercept)" "A"## - attr(*, "class")= chr "summary.lm"summary(model)$r.squared

## [1] 0.9808326summary(model)$adj.r.squared

## [1] 0.9807918summary(model)$sigma^2

## [1] 4.834374#Analysing Residuals

## Descriptive Statisticslibrary(moments)C <- residuals(model)mean(C)

## [1] -2.280672e-17sd(C)

## [1] 2.196386

skewness(C)

## [1] 0.8454525kurtosis(C)

## [1] 8.378684summary(C)

## Min. 1st Qu. Median Mean 3rd Qu. Max.## -6.15570 -1.30517 0.05639 0.00000 1.31741 14.67500qqnorm(C)

−3 −2 −1 0 1 2 3

Normal Q−Q Plot

hist(residuals(model))

Histogram of residuals(model)

residuals(model)

−5 0 5 10 15

boxplot(residuals(model))

shapiro.test(residuals(model))

#### Shapiro-Wilk normality test#### data: residuals(model)## W = 0.94724, p-value = 6.356e-12# Residuals Distributionplot(A, C, ylab="Residuals", xlab="Post-Molting", main="Residual Variation")abline(0, 0)# the horizon

40 60 80 100 120 140 160

Residual Variation

Post−Molting

######################################################WITHOUT OUTLIERS#Making a new data.frame with no outliers and plotset.seed(20)D1<-2*sd(C)E = new_df$Outs<-ifelse(abs(C)>D1, 1, 0)plot(C, col=new_df$Outs+1, pch=16,ylim=c(-10,10))

0 100 200 300 400

DF2<-new_df[!new_df$Outs,]nrow(DF2)

## [1] 457#Plot PostMolt vs Pre-Molt Size of data without outliersA_1=DF2$postszB_1=DF2$preszplot(B_1,A_1)

60 80 100 120 140

scatter.smooth(x=B_1,y=A_1,xlab = "Post-Molting Size",ylab = "Pre-Molting Size",main = "Crab Molting Size")

60 80 100 120 140

Crab Molting Size

Post−Molting Size

#Finding linear model of data without outliersmodel_1 <- lm(B_1 ~ A_1, data=DF2)summary(model_1)

#### Call:## lm(formula = B_1 ~ A_1, data = DF2)#### Residuals:## Min 1Q Median 3Q Max## -4.3648 -1.2449 0.1013 1.2760 4.4130#### Coefficients:## Estimate Std. Error t value Pr(>|t|)## (Intercept) -28.90660 0.94984 -30.43 <2e-16 ***## A_1 1.09861 0.00654 167.98 <2e-16 ***## ---## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1#### Residual standard error: 1.787 on 455 degrees of freedom## Multiple R-squared: 0.9841, Adjusted R-squared: 0.9841## F-statistic: 2.822e+04 on 1 and 455 DF, p-value: < 2.2e-16plot(model_1)

60 80 100 120 140

Fitted values

lm(B_1 ~ A_1)

Residuals vs Fitted

−3 −2 −1 0 1 2 3

lm(B_1 ~ A_1)

Normal Q−Q

60 80 100 120 140

Fitted values

lm(B_1 ~ A_1)

Scale−Location231 218443

abline(model_1)

0.00 0.01 0.02 0.03 0.04 0.05

Leverage

lm(B_1 ~ A_1)

Cook's distance

Residuals vs Leverage

# Plot PreMolt vs PostMoltplot(B_1 ~ A_1, data = DF2, xlab = "PostMolt", ylab = "PreMolt")abline(coef(model_1)[1:2])

# Line Equation without Outlierscf <- round(coef(model_1), 2)eq <- paste0("PreMolt = ", cf[1],

ifelse(sign(cf[2])==1, " + ", " - "), abs(cf[2]), " PostMolt ")mtext(eq, 3, line=-2)

80 100 120 140 160

PostMolt

PreMolt = −28.91 + 1.1 PostMolt

slope_1 <- cor(B_1,A_1) * (sd(A_1)/sd(B_1))intercept_1 <- mean(A_1)-(slope*mean(B_1))

# Breusch Pagan test function of data without outlierslibrary(lmtest)bptest(model_1)

#### studentized Breusch-Pagan test#### data: model_1## BP = 0.45663, df = 1, p-value = 0.4992#Finding the correlation/pearsoon's value of data without outlierscor(B_1, A_1)

## [1] 0.9920334str(summary(model_1))

## List of 11## $ call : language lm(formula = B_1 ~ A_1, data = DF2)## $ terms :Classes 'terms', 'formula' language B_1 ~ A_1## .. ..- attr(*, "variables")= language list(B_1, A_1)## .. ..- attr(*, "factors")= int [1:2, 1] 0 1## .. .. ..- attr(*, "dimnames")=List of 2## .. .. .. ..$ : chr [1:2] "B_1" "A_1"## .. .. .. ..$ : chr "A_1"

## .. ..- attr(*, "term.labels")= chr "A_1"## .. ..- attr(*, "order")= int 1## .. ..- attr(*, "intercept")= int 1## .. ..- attr(*, "response")= int 1## .. ..- attr(*, ".Environment")=<environment: R_GlobalEnv>## .. ..- attr(*, "predvars")= language list(B_1, A_1)## .. ..- attr(*, "dataClasses")= Named chr [1:2] "numeric" "numeric"## .. .. ..- attr(*, "names")= chr [1:2] "B_1" "A_1"## $ residuals : Named num [1:457] 2.214 0.672 0.165 -2.324 2.571 ...## ..- attr(*, "names")= chr [1:457] "1" "2" "3" "4" ...## $ coefficients : num [1:2, 1:4] -28.9066 1.09861 0.94984 0.00654 -30.43303 ...## ..- attr(*, "dimnames")=List of 2## .. ..$ : chr [1:2] "(Intercept)" "A_1"## .. ..$ : chr [1:4] "Estimate" "Std. Error" "t value" "Pr(>|t|)"## $ aliased : Named logi [1:2] FALSE FALSE## ..- attr(*, "names")= chr [1:2] "(Intercept)" "A_1"## $ sigma : num 1.79## $ df : int [1:3] 2 455 2## $ r.squared : num 0.984## $ adj.r.squared: num 0.984## $ fstatistic : Named num [1:3] 28216 1 455## ..- attr(*, "names")= chr [1:3] "value" "numdf" "dendf"## $ cov.unscaled : num [1:2, 1:2] 2.82e-01 -1.94e-03 -1.94e-03 1.34e-05## ..- attr(*, "dimnames")=List of 2## .. ..$ : chr [1:2] "(Intercept)" "A_1"## .. ..$ : chr [1:2] "(Intercept)" "A_1"## - attr(*, "class")= chr "summary.lm"summary(model_1)$r.squared

## [1] 0.9841304summary(model_1)$adj.r.squared

## [1] 0.9840955summary(model_1)$sigma^2

## [1] 3.194625#Analysing Residuals of data without outliers

## Descriptive Statisticslibrary(moments)C_1 <- residuals(model_1)mean(C_1)

## [1] -1.989604e-16sd(C_1)

## [1] 1.78539skewness(C_1)

## [1] -0.08156428kurtosis(C_1)

## [1] 2.591623summary(C_1)

## Min. 1st Qu. Median Mean 3rd Qu. Max.## -4.3648 -1.2449 0.1013 0.0000 1.2760 4.4130qqnorm(C_1)

−3 −2 −1 0 1 2 3

Normal Q−Q Plot

hist(residuals(model_1))

Histogram of residuals(model_1)

residuals(model_1)

−4 −2 0 2 4

boxplot(residuals(model_1))

shapiro.test(residuals(model_1))

#### Shapiro-Wilk normality test#### data: residuals(model_1)## W = 0.99362, p-value = 0.05123# Residuals Distribution of data without outliersplot(A_1, C_1, ylab="Residuals", xlab="PRE-Molting", main="Residual Variation")abline(0, 0)# the horizon

80 100 120 140 160

Residual Variation

PRE−Molting

plot(C_1, col=DF2$Outs+1,pch=16, ylim=c(-10,10))

0 100 200 300 400

# Predict Post Molt Sizes

new <- read.table("crabpop.data", header = FALSE)P = predict(model, newdata = new$V1, interval="confidence")plot(P)

20 40 60 80 100 120 140

Project 3 - victoradajimth332.files.wordpress.com€¦ · 20 40 60 80 100 120 140-5 0 5 10 15...

Documents

Transcript of Project 3 - victoradajimth332.files.wordpress.com€¦ · 20 40 60 80 100 120 140-5 0 5 10 15...

A Vector Auto-Regressıve (VAR) Model for the Turkish ...that there are no serial or cross-correlations in the residuals. For a fitted VAR(p) model, the Qk (m) statistic of the residuals

The Examination of Residuals. The residuals are defined as the n differences : where is an observation and is the corresponding fitted value obtained.

Physicochemical Properties of Residuals from Anaerobic ...

Water Residuals Management Graves.pdf

Coal Combustion Residuals (CCR) Bottom Ash Transfer … Combustion Residuals (CCR) Bottom Ash Transfer (BAT) Impoundments Groundwater Detection Monitoring Plan Revision 0 Rawhide Energy

820r11003 (DW Residuals Management Report)

Astronomical Data Methond of Residuals Simulations

Applied Statistics - uniroma1.it · 2019. 11. 11. · Applied Statistics Lecturer: CristinaMollica. Multiple regression OLS estimation A real example Fitted values and residuals R2

Cook Weisberg Residuals and Influence

Quenching H O Residuals Using Granular Activated Carbon: Effect … · Residuals Using Granular Activated Carbon: Effect of pH and Biodegradation By ... Residuals Using Granular Activated

BUREAU OF PRETREATMENT AND RESIDUALS - State

Diagnosing Problems in Linear and Generalized 6 Linear Models · 2010. 11. 16. · income Pearson residuals bc prof wc −15 −5 0 5 10 type Pearson residuals 30 40 50 60 70 80 90

The Effectiveness of Disinfectant Residuals in the ... Effectiveness of Disinfectant Residuals... · The Effectiveness of Disinfectant Residuals in the Distribution System . ii ...

Regression Diagnostics: Residuals

MobileNetV2: Inverted Residuals and Linear Bottlenecksopenaccess.thecvf.com/content_cvpr_2018/papers/Sandler_MobileNetV2... · MobileNetV2: Inverted Residuals and Linear Bottlenecks

Chapter 9 Regression Wisdom math2200. Sifting residuals for groups Residuals: ‘left over’ after the model How to examine residuals? –Residual plot: residuals.

Astronomical Data-method of Residuals

Chapter 6 Prediction, Residuals, Influence

Ch.3 Exercises: Linear Regression...-3 -2 -1 0 1 2 3-3 0 2 4 Theoretical Quantiles Standardized residuals Normal Q-Q 323 330 334 5 10 15 20 25 30 0.0 1.0 Fitted values S t a n d a

Chapter 6 Prediction, Residuals, Influence Some remarks: Residual = Observed Y – Predicted Y Residuals are errors.