HOMEWORK: 8
- Set up a new markdown file for this homework. For each of the 4 models create a function to run the model. You will need to think carefully about the formal parameters for the input, the default values, and the output from your function. The output should just include model results, not any graphics
- create the function in a single R chunk
- immediately after the function run it with its default values
- create a tiny fake data set (use some of the random number functions you now know about) and run your code on the fake data.
- Now, for each of the 4 statistical models, write a graphics function that will generate a nice plot of the results. The formal parameters for input to your graphics function should be the same as the input for your corresponding stats function. Again, illustrate the graphics function for your default settings and for the tiny fake data set you created in 1.
FAKE DATA SET
x <- runif(20)
y <- runif(20)
aVar <- rep(c("Acer", "Pinus"), each=10)
bVar <- rep(c("Poor", "Rich", "Dry", "Mesic"), each=5)
dFrame <- data.frame(x, y, aVar,bVar)
print(dFrame)
## x y aVar bVar
## 1 0.15795376 0.59615539 Acer Poor
## 2 0.56145281 0.37582133 Acer Poor
## 3 0.75085500 0.27499462 Acer Poor
## 4 0.41807609 0.15335099 Acer Poor
## 5 0.24687822 0.65668821 Acer Poor
## 6 0.35389498 0.31839140 Acer Rich
## 7 0.71727700 0.68030631 Acer Rich
## 8 0.70983252 0.21205573 Acer Rich
## 9 0.52989767 0.41017012 Acer Rich
## 10 0.04798545 0.06639821 Acer Rich
## 11 0.70535137 0.23119775 Pinus Dry
## 12 0.42797049 0.72785187 Pinus Dry
## 13 0.44999022 0.74108942 Pinus Dry
## 14 0.29813234 0.17210612 Pinus Dry
## 15 0.73598992 0.02240452 Pinus Dry
## 16 0.91328564 0.72222990 Pinus Mesic
## 17 0.32087929 0.19477306 Pinus Mesic
## 18 0.07542172 0.68316270 Pinus Mesic
## 19 0.54644584 0.47450170 Pinus Mesic
## 20 0.04045669 0.55586898 Pinus Mesic
REGRESSION ANALYSIS
###########################################################################
# FUNCTION: Regression
# input: x = 1:10, y = runif(10)
# output: linear model list and data summary
# This function runs a basic linear regression
####################################################################
Regression <- function(x=1:10, y=runif(10)){
rmod <- lm(y~x)
out <- list(rmod, summary(rmod))
return(out)
}
#####################################################################
# END OF FUNCITON
#####################################################################
Test Function
Regression()
## [[1]]
##
## Call:
## lm(formula = y ~ x)
##
## Coefficients:
## (Intercept) x
## 0.553074 0.003508
##
##
## [[2]]
##
## Call:
## lm(formula = y ~ x)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.55196 -0.18681 0.01938 0.21306 0.43749
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.553074 0.230643 2.398 0.0433 *
## x 0.003508 0.037171 0.094 0.9271
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3376 on 8 degrees of freedom
## Multiple R-squared: 0.001112, Adjusted R-squared: -0.1237
## F-statistic: 0.008904 on 1 and 8 DF, p-value: 0.9271
Fake Data Run
Regression(x=dFrame$x, y=dFrame$y)
## [[1]]
##
## Call:
## lm(formula = y ~ x)
##
## Coefficients:
## (Intercept) x
## 0.44327 -0.06616
##
##
## [[2]]
##
## Call:
## lm(formula = y ~ x)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.37370 -0.19501 -0.01418 0.23353 0.33938
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.44327 0.11371 3.898 0.00105 **
## x -0.06616 0.22042 -0.300 0.76749
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.248 on 18 degrees of freedom
## Multiple R-squared: 0.00498, Adjusted R-squared: -0.0503
## F-statistic: 0.0901 on 1 and 18 DF, p-value: 0.7675
###########################################################################
# FUNCTION: lrPlot
# lrPlot() plots a basic linear regression from the Regression() function
# input: x = 1:10, y = runif(10)
# output: linear regression plot
###########################################################################
lrPlot <- function(x=1:10, y=runif(10)){
plot <- plot(y=y,x=x,pch=21,bg="lightblue",cex=2, main="LINEAR REGRESSION PLOT")
regMod <- lm(y~x)
abline(regMod)
return(plot)
}
###########################################################################
# END OF FUNCITON
###########################################################################
TEST lrPlot()
lrPlot()
## NULL
Test lrPlot on Fake Data
lrPlot(x=dFrame$x, y=dFrame$y)
## NULL
ANOVA
###########################################################################
# FUNCTION: ANOVA
# input: x = discrete variables (x= Acer, Pinus); y=runif(10))
# output: anova model list and data summary
####################################################################
ANOVA <- function(x=as.factor(c(rep("Apple", 3), rep("Sauce", 7))), y=runif(10)){
amod <- aov(y~x)
out <- list(amod, summary(amod))
return(out)
}
#####################################################################
# END OF FUNCITON
#####################################################################
Test Function
ANOVA()
## [[1]]
## Call:
## aov(formula = y ~ x)
##
## Terms:
## x Residuals
## Sum of Squares 0.0297547 0.7209528
## Deg. of Freedom 1 8
##
## Residual standard error: 0.3001984
## Estimated effects may be unbalanced
##
## [[2]]
## Df Sum Sq Mean Sq F value Pr(>F)
## x 1 0.0298 0.02975 0.33 0.581
## Residuals 8 0.7210 0.09012
Fake Data Run
ANOVA(x=dFrame$aVar, y=dFrame$y)
## [[1]]
## Call:
## aov(formula = y ~ x)
##
## Terms:
## x Residuals
## Sum of Squares 0.0304866 1.0820560
## Deg. of Freedom 1 18
##
## Residual standard error: 0.245182
## Estimated effects may be unbalanced
##
## [[2]]
## Df Sum Sq Mean Sq F value Pr(>F)
## x 1 0.0305 0.03049 0.507 0.486
## Residuals 18 1.0821 0.06011
ANOVA PLOT
###########################################################################
# FUNCTION: anovaPlot
# lrPlot() plots anova analysis
# input: x=as.factor(c(rep("Acer", 3), rep("Pine", 7)), y=runif(10)
# output: anova plot
###########################################################################
anovaPLot <- function(x=as.factor(c(rep("Acer", 3), rep("Pine", 7))), y=runif(10)){
plot <- boxplot(y~x, main="ANOVA PLOT")
return(plot)
}
###########################################################################
# END OF FUNCITON
###########################################################################
Test anovaPlot()
anovaPLot()
## $stats
## [,1] [,2]
## [1,] 0.06463806 0.1195158
## [2,] 0.07268502 0.1624245
## [3,] 0.08073198 0.3660784
## [4,] 0.32035994 0.4668600
## [5,] 0.55998789 0.6808261
##
## $n
## [1] 3 7
##
## $conf
## [,1] [,2]
## [1,] -0.1452004 0.1842744
## [2,] 0.3066644 0.5478823
##
## $out
## numeric(0)
##
## $group
## numeric(0)
##
## $names
## [1] "Acer" "Pine"
anovaPlot(FAKE DATA)
anovaPLot(x=dFrame$aVar, y=dFrame$y)
## $stats
## [,1] [,2]
## [1,] 0.06639821 0.02240452
## [2,] 0.21205573 0.19477306
## [3,] 0.34710636 0.51518534
## [4,] 0.59615539 0.72222990
## [5,] 0.68030631 0.74108942
##
## $n
## [1] 10 10
##
## $conf
## [,1] [,2]
## [1,] 0.1551949 0.2516469
## [2,] 0.5390179 0.7787238
##
## $out
## numeric(0)
##
## $group
## numeric(0)
##
## $names
## [1] "Acer" "Pinus"
CONTINGENCY TABLE ANALYSIS
###########################################################################
# FUNCTION: contingency
# input: x = discrete variables; discrete y variables
# output: Chi-square analysis output list and summary
####################################################################
contingency <- function(x=as.factor(c(rep("Acer", 3), rep("Pinus", 7))), y=as.factor(c(rep("Poor", 2), rep("Rich", 3), rep("Dry", 2), rep("Mesic", 3)))){
matrix1<- rbind(x,y)
chisqMod <- chisq.test(matrix1)
out <- list(chisqMod, summary(chisqMod))
return(out)
}
#####################################################################
# END OF FUNCITON
#####################################################################
Test Function
contingency()
## Warning in chisq.test(matrix1): Chi-squared approximation may be incorrect
## [[1]]
##
## Pearson's Chi-squared test
##
## data: matrix1
## X-squared = 4.0957, df = 9, p-value = 0.905
##
##
## [[2]]
## Length Class Mode
## statistic 1 -none- numeric
## parameter 1 -none- numeric
## p.value 1 -none- numeric
## method 1 -none- character
## data.name 1 -none- character
## observed 20 -none- numeric
## expected 20 -none- numeric
## residuals 20 -none- numeric
## stdres 20 -none- numeric
Fake Data Run
contingency(x=dFrame$aVar, y=dFrame$bVar)
## Warning in chisq.test(matrix1): Chi-squared approximation may be incorrect
## [[1]]
##
## Pearson's Chi-squared test
##
## data: matrix1
## X-squared = 11.378, df = 19, p-value = 0.9105
##
##
## [[2]]
## Length Class Mode
## statistic 1 -none- numeric
## parameter 1 -none- numeric
## p.value 1 -none- numeric
## method 1 -none- character
## data.name 1 -none- character
## observed 40 -none- numeric
## expected 40 -none- numeric
## residuals 40 -none- numeric
## stdres 40 -none- numeric
CONTINGENCY PLOT
###########################################################################
# FUNCTION: contPlot
# contPlot() produces a mosaic plot for a contingency chi-square analysis
# input: x=as.factor(c(rep("Acer", 3), rep("Pinus", 7))), y=as.factor(c(rep("Poor", 2), rep("Rich", 3), rep("Dry", 2), rep("Mesic", 3)))
# output: mosaic plot
###########################################################################
contPlot <- function(x=as.factor(c(rep("Acer", 3), rep("Pinus", 7))), y=as.factor(c(rep("Poor", 2), rep("Rich", 3), rep("Dry", 2), rep("Mesic", 3)))){
matrix1 <- rbind(x,y)
plot <- mosaicplot(x=matrix1,
shade=TRUE)
return(plot)
}
###########################################################################
# END OF FUNCITON
###########################################################################
Test contPlot()
contPlot()
## NULL
TEST contPLot(FAKE DATA)
contPlot(x=dFrame$aVar, y=dFrame$bVar)
## NULL
LOGISTIC REGRESSION
###########################################################################
# FUNCTION: logreg
# input: x = continuous variables; discrete y variables
# output: logistic regression analysis output list and summary
####################################################################
logreg <- function(x=runif(10), y=as.factor(c(rep("Poor", 2), rep("Rich", 3), rep("Dry", 2), rep("Mesic", 3)))){
logregMod <- glm(y~x,family=binomial(link="logit"))
out <- list(logregMod, summary(logregMod))
return(out)
}
#####################################################################
# END OF FUNCITON
#####################################################################
Test Function
logreg()
## [[1]]
##
## Call: glm(formula = y ~ x, family = binomial(link = "logit"))
##
## Coefficients:
## (Intercept) x
## -0.8074 6.9409
##
## Degrees of Freedom: 9 Total (i.e. Null); 8 Residual
## Null Deviance: 10.01
## Residual Deviance: 7.803 AIC: 11.8
##
## [[2]]
##
## Call:
## glm(formula = y ~ x, family = binomial(link = "logit"))
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.53109 0.08171 0.23487 0.73616 1.08520
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.8074 2.0175 -0.400 0.689
## x 6.9409 7.7751 0.893 0.372
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 10.0080 on 9 degrees of freedom
## Residual deviance: 7.8029 on 8 degrees of freedom
## AIC: 11.803
##
## Number of Fisher Scoring iterations: 7
Fake Data Test
logreg(x=dFrame$x, y=dFrame$bVar)
## [[1]]
##
## Call: glm(formula = y ~ x, family = binomial(link = "logit"))
##
## Coefficients:
## (Intercept) x
## 1.855 -1.591
##
## Degrees of Freedom: 19 Total (i.e. Null); 18 Residual
## Null Deviance: 22.49
## Residual Deviance: 21.92 AIC: 25.92
##
## [[2]]
##
## Call:
## glm(formula = y ~ x, family = binomial(link = "logit"))
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.79153 0.04716 0.66320 0.79854 1.01221
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.855 1.195 1.552 0.121
## x -1.591 2.149 -0.740 0.459
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 22.493 on 19 degrees of freedom
## Residual deviance: 21.921 on 18 degrees of freedom
## AIC: 25.921
##
## Number of Fisher Scoring iterations: 4
###########################################################################
# FUNCTION: logPlot
# logPlot() produces a mosaic plot for a contingency chi-square analysis
# input: x = continuous variables; discrete y variables
# output: mosaic plot
###########################################################################
logPlot <- function(x=runif(10), y=as.factor(c(rep("Poor", 2), rep("Rich", 3), rep("Dry", 2), rep("Mesic", 3)))){
logregMod <- glm(y~x,family=binomial(link="logit"))
plot<- plot(x=x, y=y,pch=21,bg="tan",cex=2.5)
curve(predict(logregMod,data.frame(x),type="response"),add=TRUE,lwd=2)
return(plot)
}
###########################################################################
# END OF FUNCITON
###########################################################################
Test logPlot()
logPlot()
## NULL
Test logPlot(FAKE DATA)
logPlot(x=dFrame$x, y=dFrame$bVar)
## NULL