R code Chapter 2
This document contains abridged sections from Discovering Statistics Using R and RStudio by Andy Field so there are some copyright considerations. You can use this material for teaching and non-profit activities but please do not meddle with it or claim it as your own work. See the full license terms at the bottom of the page.
# Make sure to load this package
library(tidyverse)
The median
fb_tib <- tibble::tibble(
friends = c(57, 40, 103, 234, 93, 53, 116, 98, 108, 121, 22)
)
# Base R:
median(fb_tib$friends)
## [1] 98
# One pipe:
fb_tib %>%
dplyr::summarize(
median = median(friends)
)
## # A tibble: 1 x 1
## median
## <dbl>
## 1 98
The mean
# Base R:
mean(fb_tib$friends)
## [1] 95
mean(fb_tib$friends, trim = 0.1)
## [1] 87.66667
# A pipe to get the mean, trimmed mean and median:
fb_tib %>%
dplyr::summarize(
median = median(friends),
mean = mean(friends),
`trimmed mean 10%` = mean(friends, trim = 0.1)
)
## # A tibble: 1 x 3
## median mean `trimmed mean 10%`
## <dbl> <dbl> <dbl>
## 1 98 95 87.7
The dispersion in a distribution
# The range
max(fb_tib$friends) - min(fb_tib$friends)
## [1] 212
# Quartiles
quantile(fb_tib$friends, probs = c(0.25, 0.5, 0.75))
## 25% 50% 75%
## 55 98 112
# Lower quartile
quantile(fb_tib$friends, probs = 0.25)
## 25%
## 55
# Upper quartile
quantile(fb_tib$friends, probs = 0.75)
## 75%
## 112
# Inter-quartile range
IQR(fb_tib$friends)
## [1] 57
# Variance
var(fb_tib$friends)
## [1] 3224.6
# standard deviation
sd(fb_tib$friends)
## [1] 56.78556
# Tidyverse sumptuousness:
fb_tib %>%
dplyr::summarize(
median = median(friends),
mean = mean(friends),
`trimmed mean 10%` = mean(friends, trim = 0.1),
range = max(friends) - min(friends),
`lower quartile` = quantile(friends, probs = 0.25),
`upper quartile` = quantile(friends, probs = 0.75),
IQR = IQR(friends),
var = var(friends),
sd = sd(friends)
) %>%
round(., 2)
## # A tibble: 1 x 9
## median mean `trimmed mean 1… range `lower quartile` `upper quartile` IQR
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 98 95 87.7 212 55 112 57
## # … with 2 more variables: var <dbl>, sd <dbl>
Pieces of great
Pieces of great 2.1
round(3.211420)
## [1] 3
round(3.211420, 2)
## [1] 3.21
round(3.211420, 4)
## [1] 3.2114
round(mean(fb_tib$friends, trim = 0.1), 2)
## [1] 87.67
mean(fb_tib$friends, trim = 0.1) %>% round(., 2)
## [1] 87.67
fb_tib %>%
dplyr::summarize(
median = median(friends),
mean = mean(friends),
`trimmed mean 10%` = mean(friends, trim = 0.1)
) %>%
round(., 2)
## # A tibble: 1 x 3
## median mean `trimmed mean 10%`
## <dbl> <dbl> <dbl>
## 1 98 95 87.7
Pieces of great 2.2
get_summary <- function(tibble, variable){
variable <- enquo(variable)
summary <- tibble %>%
dplyr::summarise(
median = median(!!variable),
mean = mean(!!variable),
`trimmed mean 10%` = mean(!!variable, trim = 0.1)
) %>%
round(., 2)
return(summary)
}
fb_tib %>%
get_summary(., friends)
## # A tibble: 1 x 3
## median mean `trimmed mean 10%`
## <dbl> <dbl> <dbl>
## 1 98 95 87.7
Or, annoy people by using random names for the inputs of the function:
get_summary <- function(johnson_pitchfork, harry_the_hungy_hippo){
harry_the_hungy_hippo <- enquo(harry_the_hungy_hippo)
summary <- johnson_pitchfork %>%
dplyr::summarise(
median = median(!!harry_the_hungy_hippo),
mean = mean(!!harry_the_hungy_hippo),
`trimmed mean 10%` = mean(!!harry_the_hungy_hippo, trim = 0.1)
) %>%
round(., 2)
return(summary)
}
fb_tib %>%
get_summary(., friends)
## # A tibble: 1 x 3
## median mean `trimmed mean 10%`
## <dbl> <dbl> <dbl>
## 1 98 95 87.7
Include options relating to mean()
and median()
get_summary <- function(tibble, variable, na_remove = FALSE, trim_val = 0){
variable <- enquo(variable)
summary <- tibble %>%
dplyr::summarise(
median = median(!!variable, na.rm = na_remove),
mean = mean(!!variable, na.rm = na_remove, trim = trim_val)
) %>%
round(., 2)
return(summary)
}
fb_tib %>%
get_summary(., friends, na_remove = TRUE, trim_val = 0.1)
## # A tibble: 1 x 2
## median mean
## <dbl> <dbl>
## 1 98 87.7