HOMEWORK: 8

  1. Set up a new markdown file for this homework. For each of the 4 models create a function to run the model. You will need to think carefully about the formal parameters for the input, the default values, and the output from your function. The output should just include model results, not any graphics
    • create the function in a single R chunk
    • immediately after the function run it with its default values
    • create a tiny fake data set (use some of the random number functions you now know about) and run your code on the fake data.
  2. Now, for each of the 4 statistical models, write a graphics function that will generate a nice plot of the results. The formal parameters for input to your graphics function should be the same as the input for your corresponding stats function. Again, illustrate the graphics function for your default settings and for the tiny fake data set you created in 1.

FAKE DATA SET

x <- runif(20)
y <- runif(20)
aVar <- rep(c("Acer", "Pinus"), each=10)
bVar <- rep(c("Poor", "Rich", "Dry", "Mesic"), each=5)
dFrame <- data.frame(x, y, aVar,bVar)
print(dFrame)
##             x          y  aVar  bVar
## 1  0.15795376 0.59615539  Acer  Poor
## 2  0.56145281 0.37582133  Acer  Poor
## 3  0.75085500 0.27499462  Acer  Poor
## 4  0.41807609 0.15335099  Acer  Poor
## 5  0.24687822 0.65668821  Acer  Poor
## 6  0.35389498 0.31839140  Acer  Rich
## 7  0.71727700 0.68030631  Acer  Rich
## 8  0.70983252 0.21205573  Acer  Rich
## 9  0.52989767 0.41017012  Acer  Rich
## 10 0.04798545 0.06639821  Acer  Rich
## 11 0.70535137 0.23119775 Pinus   Dry
## 12 0.42797049 0.72785187 Pinus   Dry
## 13 0.44999022 0.74108942 Pinus   Dry
## 14 0.29813234 0.17210612 Pinus   Dry
## 15 0.73598992 0.02240452 Pinus   Dry
## 16 0.91328564 0.72222990 Pinus Mesic
## 17 0.32087929 0.19477306 Pinus Mesic
## 18 0.07542172 0.68316270 Pinus Mesic
## 19 0.54644584 0.47450170 Pinus Mesic
## 20 0.04045669 0.55586898 Pinus Mesic

REGRESSION ANALYSIS

###########################################################################
# FUNCTION: Regression
# input: x = 1:10, y = runif(10)
# output: linear model list and data summary 
# This function runs a basic linear regression 
####################################################################
Regression <- function(x=1:10, y=runif(10)){
  rmod <- lm(y~x)
  
  out <- list(rmod, summary(rmod))
return(out)
}

#####################################################################
# END OF FUNCITON
#####################################################################

Test Function

Regression()
## [[1]]
## 
## Call:
## lm(formula = y ~ x)
## 
## Coefficients:
## (Intercept)            x  
##    0.553074     0.003508  
## 
## 
## [[2]]
## 
## Call:
## lm(formula = y ~ x)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.55196 -0.18681  0.01938  0.21306  0.43749 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept) 0.553074   0.230643   2.398   0.0433 *
## x           0.003508   0.037171   0.094   0.9271  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3376 on 8 degrees of freedom
## Multiple R-squared:  0.001112,   Adjusted R-squared:  -0.1237 
## F-statistic: 0.008904 on 1 and 8 DF,  p-value: 0.9271

Fake Data Run

Regression(x=dFrame$x, y=dFrame$y)
## [[1]]
## 
## Call:
## lm(formula = y ~ x)
## 
## Coefficients:
## (Intercept)            x  
##     0.44327     -0.06616  
## 
## 
## [[2]]
## 
## Call:
## lm(formula = y ~ x)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.37370 -0.19501 -0.01418  0.23353  0.33938 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)   
## (Intercept)  0.44327    0.11371   3.898  0.00105 **
## x           -0.06616    0.22042  -0.300  0.76749   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.248 on 18 degrees of freedom
## Multiple R-squared:  0.00498,    Adjusted R-squared:  -0.0503 
## F-statistic: 0.0901 on 1 and 18 DF,  p-value: 0.7675
###########################################################################
# FUNCTION: lrPlot 
# lrPlot() plots a basic linear regression from the Regression() function
# input: x = 1:10, y = runif(10)
# output: linear regression plot
###########################################################################

lrPlot <- function(x=1:10, y=runif(10)){
  
  plot <- plot(y=y,x=x,pch=21,bg="lightblue",cex=2, main="LINEAR REGRESSION PLOT")
  regMod <- lm(y~x)
  abline(regMod)
  
return(plot)
}

###########################################################################
# END OF FUNCITON
###########################################################################

TEST lrPlot()

lrPlot()

## NULL

Test lrPlot on Fake Data

lrPlot(x=dFrame$x, y=dFrame$y)

## NULL

ANOVA

###########################################################################
# FUNCTION: ANOVA
# input: x = discrete variables (x= Acer, Pinus); y=runif(10))
# output: anova model list and data summary 
####################################################################
ANOVA <- function(x=as.factor(c(rep("Apple", 3), rep("Sauce", 7))), y=runif(10)){
  amod <- aov(y~x)
  
  out <- list(amod, summary(amod))
return(out)
}

#####################################################################
# END OF FUNCITON
#####################################################################

Test Function

ANOVA()
## [[1]]
## Call:
##    aov(formula = y ~ x)
## 
## Terms:
##                         x Residuals
## Sum of Squares  0.0297547 0.7209528
## Deg. of Freedom         1         8
## 
## Residual standard error: 0.3001984
## Estimated effects may be unbalanced
## 
## [[2]]
##             Df Sum Sq Mean Sq F value Pr(>F)
## x            1 0.0298 0.02975    0.33  0.581
## Residuals    8 0.7210 0.09012

Fake Data Run

ANOVA(x=dFrame$aVar, y=dFrame$y)
## [[1]]
## Call:
##    aov(formula = y ~ x)
## 
## Terms:
##                         x Residuals
## Sum of Squares  0.0304866 1.0820560
## Deg. of Freedom         1        18
## 
## Residual standard error: 0.245182
## Estimated effects may be unbalanced
## 
## [[2]]
##             Df Sum Sq Mean Sq F value Pr(>F)
## x            1 0.0305 0.03049   0.507  0.486
## Residuals   18 1.0821 0.06011

ANOVA PLOT

###########################################################################
# FUNCTION: anovaPlot 
# lrPlot() plots anova analysis 
# input: x=as.factor(c(rep("Acer", 3), rep("Pine", 7)), y=runif(10)
# output: anova plot
###########################################################################

anovaPLot <- function(x=as.factor(c(rep("Acer", 3), rep("Pine", 7))), y=runif(10)){
  plot <- boxplot(y~x, main="ANOVA PLOT")
  
return(plot)
}

###########################################################################
# END OF FUNCITON
###########################################################################

Test anovaPlot()

anovaPLot()

## $stats
##            [,1]      [,2]
## [1,] 0.06463806 0.1195158
## [2,] 0.07268502 0.1624245
## [3,] 0.08073198 0.3660784
## [4,] 0.32035994 0.4668600
## [5,] 0.55998789 0.6808261
## 
## $n
## [1] 3 7
## 
## $conf
##            [,1]      [,2]
## [1,] -0.1452004 0.1842744
## [2,]  0.3066644 0.5478823
## 
## $out
## numeric(0)
## 
## $group
## numeric(0)
## 
## $names
## [1] "Acer" "Pine"

anovaPlot(FAKE DATA)

anovaPLot(x=dFrame$aVar, y=dFrame$y)

## $stats
##            [,1]       [,2]
## [1,] 0.06639821 0.02240452
## [2,] 0.21205573 0.19477306
## [3,] 0.34710636 0.51518534
## [4,] 0.59615539 0.72222990
## [5,] 0.68030631 0.74108942
## 
## $n
## [1] 10 10
## 
## $conf
##           [,1]      [,2]
## [1,] 0.1551949 0.2516469
## [2,] 0.5390179 0.7787238
## 
## $out
## numeric(0)
## 
## $group
## numeric(0)
## 
## $names
## [1] "Acer"  "Pinus"

CONTINGENCY TABLE ANALYSIS

###########################################################################
# FUNCTION: contingency
# input: x = discrete variables; discrete y variables 
# output: Chi-square analysis output list and summary 
####################################################################
contingency <- function(x=as.factor(c(rep("Acer", 3), rep("Pinus", 7))), y=as.factor(c(rep("Poor", 2), rep("Rich", 3), rep("Dry", 2), rep("Mesic", 3)))){
  matrix1<- rbind(x,y)
  chisqMod <- chisq.test(matrix1)
  
  out <- list(chisqMod, summary(chisqMod))
return(out)
}
#####################################################################
# END OF FUNCITON
#####################################################################

Test Function

contingency()
## Warning in chisq.test(matrix1): Chi-squared approximation may be incorrect
## [[1]]
## 
##  Pearson's Chi-squared test
## 
## data:  matrix1
## X-squared = 4.0957, df = 9, p-value = 0.905
## 
## 
## [[2]]
##           Length Class  Mode     
## statistic  1     -none- numeric  
## parameter  1     -none- numeric  
## p.value    1     -none- numeric  
## method     1     -none- character
## data.name  1     -none- character
## observed  20     -none- numeric  
## expected  20     -none- numeric  
## residuals 20     -none- numeric  
## stdres    20     -none- numeric

Fake Data Run

contingency(x=dFrame$aVar, y=dFrame$bVar)
## Warning in chisq.test(matrix1): Chi-squared approximation may be incorrect
## [[1]]
## 
##  Pearson's Chi-squared test
## 
## data:  matrix1
## X-squared = 11.378, df = 19, p-value = 0.9105
## 
## 
## [[2]]
##           Length Class  Mode     
## statistic  1     -none- numeric  
## parameter  1     -none- numeric  
## p.value    1     -none- numeric  
## method     1     -none- character
## data.name  1     -none- character
## observed  40     -none- numeric  
## expected  40     -none- numeric  
## residuals 40     -none- numeric  
## stdres    40     -none- numeric

CONTINGENCY PLOT

###########################################################################
# FUNCTION: contPlot 
# contPlot() produces a mosaic plot for a contingency chi-square analysis
# input: x=as.factor(c(rep("Acer", 3), rep("Pinus", 7))), y=as.factor(c(rep("Poor", 2), rep("Rich", 3), rep("Dry", 2), rep("Mesic", 3)))
# output: mosaic plot
###########################################################################

contPlot <- function(x=as.factor(c(rep("Acer", 3), rep("Pinus", 7))), y=as.factor(c(rep("Poor", 2), rep("Rich", 3), rep("Dry", 2), rep("Mesic", 3)))){

  matrix1 <- rbind(x,y)
  plot <- mosaicplot(x=matrix1,
           shade=TRUE)

return(plot)
}

###########################################################################
# END OF FUNCITON
###########################################################################

Test contPlot()

contPlot()

## NULL

TEST contPLot(FAKE DATA)

contPlot(x=dFrame$aVar, y=dFrame$bVar)

## NULL

LOGISTIC REGRESSION

###########################################################################
# FUNCTION: logreg
# input: x = continuous variables; discrete y variables 
# output: logistic regression analysis output list and summary 
####################################################################
logreg <- function(x=runif(10), y=as.factor(c(rep("Poor", 2), rep("Rich", 3), rep("Dry", 2), rep("Mesic", 3)))){
  logregMod <- glm(y~x,family=binomial(link="logit"))
  
  out <- list(logregMod, summary(logregMod))
return(out)
}

#####################################################################
# END OF FUNCITON
#####################################################################

Test Function

logreg()
## [[1]]
## 
## Call:  glm(formula = y ~ x, family = binomial(link = "logit"))
## 
## Coefficients:
## (Intercept)            x  
##     -0.8074       6.9409  
## 
## Degrees of Freedom: 9 Total (i.e. Null);  8 Residual
## Null Deviance:       10.01 
## Residual Deviance: 7.803     AIC: 11.8
## 
## [[2]]
## 
## Call:
## glm(formula = y ~ x, family = binomial(link = "logit"))
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -1.53109   0.08171   0.23487   0.73616   1.08520  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)
## (Intercept)  -0.8074     2.0175  -0.400    0.689
## x             6.9409     7.7751   0.893    0.372
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 10.0080  on 9  degrees of freedom
## Residual deviance:  7.8029  on 8  degrees of freedom
## AIC: 11.803
## 
## Number of Fisher Scoring iterations: 7

Fake Data Test

logreg(x=dFrame$x, y=dFrame$bVar)
## [[1]]
## 
## Call:  glm(formula = y ~ x, family = binomial(link = "logit"))
## 
## Coefficients:
## (Intercept)            x  
##       1.855       -1.591  
## 
## Degrees of Freedom: 19 Total (i.e. Null);  18 Residual
## Null Deviance:       22.49 
## Residual Deviance: 21.92     AIC: 25.92
## 
## [[2]]
## 
## Call:
## glm(formula = y ~ x, family = binomial(link = "logit"))
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -1.79153   0.04716   0.66320   0.79854   1.01221  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)
## (Intercept)    1.855      1.195   1.552    0.121
## x             -1.591      2.149  -0.740    0.459
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 22.493  on 19  degrees of freedom
## Residual deviance: 21.921  on 18  degrees of freedom
## AIC: 25.921
## 
## Number of Fisher Scoring iterations: 4
###########################################################################
# FUNCTION: logPlot 
# logPlot() produces a mosaic plot for a contingency chi-square analysis
# input: x = continuous variables; discrete y variables 
# output: mosaic plot
###########################################################################
logPlot <- function(x=runif(10), y=as.factor(c(rep("Poor", 2), rep("Rich", 3), rep("Dry", 2), rep("Mesic", 3)))){
  logregMod <- glm(y~x,family=binomial(link="logit"))
  plot<- plot(x=x, y=y,pch=21,bg="tan",cex=2.5)
curve(predict(logregMod,data.frame(x),type="response"),add=TRUE,lwd=2)
return(plot)
}

###########################################################################
# END OF FUNCITON
###########################################################################

Test logPlot()

logPlot()

## NULL

Test logPlot(FAKE DATA)

logPlot(x=dFrame$x, y=dFrame$bVar)

## NULL