ANOVA, explained

x <- 1:10
y <- c(4,4,4,4,4, 6,6,6,6,6)

x1 <- x
y1 <- y

x2 <- x
y2 <- c(5,4,5,6,5, 5,4,5,6,5)

PCH <- c(20, 20, 20, 20, 20,  15, 15, 15, 15, 15)


par(mfrow = c(1,3), mgp = c(2, 0.5, 0), tcl = 0.5, lwd = 2, cex = 1.25, mar = c(4,4,1.5,1), las = 1)

plot(y ~ x, xlim = c(0.5, 10.5), ylim = c(2,8), pch = PCH, cex = 2, main = 'SST = big')
segments(x0 = x, x1 = x,
         y0 = y, y1 = mean(y))
abline(h = 5)

plot(y1 ~ x, xlim = c(0.5, 10.5), ylim = c(2,8), pch = PCH, cex = 2, main = 'SSE = 0')
segments(x0 = 1, x1 = 6,
         y0 = mean(y[1:5]), y1 = mean(y[1:5]))
segments(x0 = 5, x1 = 11,
         y0 = mean(y[6:10]), y1 = mean(y[6:10]))

plot(y2 ~ x, xlim = c(0.5, 10.5), ylim = c(2,8), pch = PCH, cex = 2, main = 'SST = SSE')
abline(h = 5)
segments(x0 = x2, x1 = x2,
         y0 = y2, y1 = rep(mean(y2), times = 10))

plot of chunk unnamed-chunk-1

#dat <- read.delim('yields.txt')
dat <- read.delim('http://www.bio.ic.ac.uk/research/mjcraw/therbook/data/yields.txt')

btn.mean <- sapply(list(dat$sand, dat$clay, dat$loam), mean)

dat1 <- stack(dat)
dat1$ind <- factor(dat1$ind, levels = c('sand', 'clay', 'loam'))
grand.mean <- mean(dat1$values)

x <- 1:3
x1 <- c(seq(from = 0.80, to = 1.25, by = 0.05),
        seq(from = 1.80, to = 2.25, by = 0.05),
        seq(from = 2.80, to = 3.25, by = 0.05))

par(mfrow = c(1,3), mgp = c(2, 0.5, 0), tcl = 0.5, lwd = 2, cex = 1.25, mar = c(4,4,1.5,1), las = 1)

plot(values ~ ind, data = dat1, col = "blue", xlim = c(0.5, 3.5), ylim = c(0, 20), main = 'Boxplot', xlab = 'Soil', ylab = 'Yield')

plot(btn.mean ~ x, xlim = c(0.5, 3.5), ylim = c(0, 20), pch = 20, xaxt = 'n', main = 'Between-group', xlab = 'Soil', ylab = 'Yield')
axis(side = 1, at = x, labels = c('sand', 'clay', 'loam'))
abline(h = mean(dat1$values))
segments(x0 = x, 
         x1 = x,  
         y0 = mean(dat1$values), 
         y1 = btn.mean)

plot(dat1$values ~ x1, xlim = c(0.5, 3.5), ylim = c(0, 20), pch = 20,  xaxt = 'n', main = 'Within-group', xlab = 'Soil', ylab = 'Yield')
axis(side = 1, at = x, labels = c('sand', 'clay', 'loam'))

segments(x0 = 0.7,
         x1 = 1.35,
         y0 = btn.mean[1],
         y1 = btn.mean[1])

segments(x0 = 1.7,
         x1 = 2.35,
         y0 = btn.mean[2],
         y1 = btn.mean[2])

segments(x0 = 2.7,
         x1 = 3.35,
         y0 = btn.mean[3],
         y1 = btn.mean[3])

segments(x0 = x1,
         x1 = x1,
         y0 = rep(btn.mean, each = 10),
         y1 = dat1$values)

plot of chunk unnamed-chunk-2

summary(aov(dat1$values ~ dat1$ind))
##             Df Sum Sq Mean Sq F value Pr(>F)  
## dat1$ind     2   99.2   49.60   4.245  0.025 *
## Residuals   27  315.5   11.69                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# mean square = Sum Sq / df
99.2 / 2
## [1] 49.6
315.5 / 27
## [1] 11.68519
# F value:
49.60 / 11.69
## [1] 4.242943