R code Chapter 8
This document contains abridged sections from Discovering Statistics Using R and RStudio by Andy Field so there are some copyright considerations. You can use this material for teaching and non-profit activities but please do not meddle with it or claim it as your own work. See the full license terms at the bottom of the page.
Load packages
Remember to load the tidyverse:
library(tidyverse)
Load the data
Remember to install the package for the book with library(discovr)
. After which you can load data into a tibble by executing:
name_of_tib <- discovr::name_of_data
For example, execute these lines to create the tibbles referred to in the chapter:
album_tib <- discovr::album_sales
df_tib <- discovr::df_beta
pub_tib <- discovr::pubs
If you want to read the file from the CSV and you have set up your project folder as I suggest in Chapter 1, then the general format of the code you would use is:
tibble_name <- here::here("../data/name_of_file.csv") %>%
readr::read_csv() %>%
dplyr::mutate(
...
code to convert character variables to factors
...
)
In which you’d replace tibble_name
with the name you want to assign to the tibble and change name_of_file.csv
to the name of the file that you’re trying to load. You can use mutate to convert categorical variables to factors. For example, for the album_sales
data you would load it by executing:
library(here)
album_tib <- here::here("data/album_sales.csv") %>%
readr::read_csv()
Self test on df_beta
Load the data and fit the model with all cases.
df_tib <- discovr::df_beta
df_lm <- df_tib %>%
lm(y ~ x, data = .)
df_lm %>%
broom::tidy()
## # A tibble: 2 x 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) 29.0 0.992 29.2 1.58e-22
## 2 x -0.903 0.0559 -16.2 9.90e-16
Fit the model with case 30 excluded.
df_tib %>%
dplyr::filter(case != 30) %>%
lm(y ~ x, data = .) %>%
broom::tidy()
## # A tibble: 2 x 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) 31. 1.39e-15 2.24e16 0
## 2 x -1 7.68e-17 -1.30e16 0
Other diagnostics (look at the values for case 30):
dfbeta(df_lm)
## (Intercept) x
## 1 0.0689655172 -6.674082e-03
## 2 0.0544297438 -5.469987e-03
## 3 0.0418250951 -4.399554e-03
## 4 0.0310406796 -3.454527e-03
## 5 0.0219815743 -2.627778e-03
## 6 0.0145669922 -1.913176e-03
## 7 0.0087287732 -1.305476e-03
## 8 0.0044101433 -8.002276e-04
## 9 0.0015647004 -3.936988e-04
## 10 0.0001555936 -8.281594e-05
## 11 0.0001548707 1.348874e-04
## 12 0.0015429718 2.613097e-04
## 13 0.0043083551 2.978126e-04
## 14 0.0084472431 2.452425e-04
## 15 0.0139634801 1.039465e-04
## 16 0.0208684978 -1.262208e-04
## 17 0.0291813853 -4.458955e-04
## 18 0.0389290660 -8.562111e-04
## 19 0.0501465823 -1.358811e-03
## 20 0.0628774973 -1.955867e-03
## 21 0.0771744204 -2.650110e-03
## 22 0.0930996714 -3.444865e-03
## 23 0.1107260986 -4.344093e-03
## 24 0.1301380733 -5.352453e-03
## 25 0.1514326877 -6.475366e-03
## 26 0.1747211896 -7.719099e-03
## 27 0.2001306976 -9.090861e-03
## 28 0.2278062490 -1.059893e-02
## 29 0.2579132474 -1.225277e-02
## 30 -2.0000000000 9.677419e-02
Jane superbrain influential cases
Load the data and fit the model with all cases.
pub_tib <- discovr::pubs
pub_lm <- pub_tib %>%
lm(mortality ~ pubs, data = .)
pub_lm %>%
broom::tidy()
## # A tibble: 2 x 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) 3352. 781. 4.29 0.00515
## 2 pubs 14.3 4.30 3.33 0.0157
Save influential cases
pub_inf <- pub_lm %>%
broom::augment() %>%
dplyr::rename(
Residual = .resid,
`Cook's distance` = .cooksd,
Leverage = .hat,
) %>% dplyr::mutate(
District = 1:8,
`DF beta (intercept)` = dfbeta(pub_lm)[, 1] %>% round(., 3),
`DF beta (pubs)` = dfbeta(pub_lm)[, 2] %>% round(., 3),
) %>%
dplyr::select(District, Residual, `Cook's distance`, Leverage, `DF beta (intercept)`, `DF beta (pubs)`) %>%
round(., 3)
pub_inf
## # A tibble: 8 x 6
## District Residual `Cook's distance` Leverage `DF beta (inter… `DF beta (pubs)`
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 2495. 0.213 0.166 -510. 1.39
## 2 2 1639. 0.085 0.157 -321. 0.802
## 3 3 782. 0.018 0.149 -147. 0.33
## 4 4 -74.5 0 0.143 13.5 -0.027
## 5 5 -931. 0.023 0.137 161. -0.273
## 6 6 -1788. 0.081 0.132 298. -0.411
## 7 7 -2644. 0.171 0.129 423. -0.444
## 8 8 521. 227. 0.987 3352. -85.7
Summary information for album sales
Summary statistics
album_sum <- album_tib %>%
tidyr::pivot_longer(
cols = adverts:image,
names_to = "Variable",
values_to = "value"
) %>%
dplyr::group_by(Variable) %>%
dplyr::summarize(
Mean = mean(value, na.rm = TRUE),
SD = sd(value, na.rm = TRUE),
`95% CI Lower` = ggplot2::mean_cl_normal(value)$ymin,
`95% CI Upper` = ggplot2::mean_cl_normal(value)$ymax,
Min = min(value, na.rm = TRUE),
Max = max(value, na.rm = TRUE),
)
album_sum
## # A tibble: 4 x 7
## Variable Mean SD `95% CI Lower` `95% CI Upper` Min Max
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 adverts 614. 486. 547. 682. 9.10 2272.
## 2 airplay 27.5 12.3 25.8 29.2 0 63
## 3 image 6.77 1.40 6.58 6.96 1 10
## 4 sales 193. 80.7 182. 204. 10 360
Scatterplot of album sales against advertising
ggplot2::ggplot(album_tib, aes(adverts, sales)) +
geom_point(colour = "#5c97bf") +
coord_cartesian(ylim = c(0, 400), xlim = c(0, 2500)) +
scale_x_continuous(breaks = seq(0, 2500, 500)) +
scale_y_continuous(breaks = seq(0, 400, 100)) +
labs(x = "Advertising budget (thousands)", y = "Album sales (thousands)") +
geom_smooth(method = "lm", colour = "#d47500", fill = "#d47500", alpha = 0.2) +
theme_minimal()
Fit a model with one predictor
album_lm <- lm(sales ~ adverts, data = album_tib, na.action = na.exclude)
get text output for the model:
summary(album_lm)
##
## Call:
## lm(formula = sales ~ adverts, data = album_tib, na.action = na.exclude)
##
## Residuals:
## Min 1Q Median 3Q Max
## -152.949 -43.796 -0.393 37.040 211.866
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.341e+02 7.537e+00 17.799 <2e-16 ***
## adverts 9.612e-02 9.632e-03 9.979 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 65.99 on 198 degrees of freedom
## Multiple R-squared: 0.3346, Adjusted R-squared: 0.3313
## F-statistic: 99.59 on 1 and 198 DF, p-value: < 2.2e-16
Get the confidence intervals for the model parameters old-style:
confint(album_lm)
## 2.5 % 97.5 %
## (Intercept) 119.27768082 149.0021948
## adverts 0.07712929 0.1151197
Or, better …. use broom()
to get nicely tabulated fit statistics:
broom::glance(album_lm)
## # A tibble: 1 x 12
## r.squared adj.r.squared sigma statistic p.value df logLik AIC BIC
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0.335 0.331 66.0 99.6 2.94e-19 1 -1121. 2247. 2257.
## # … with 3 more variables: deviance <dbl>, df.residual <int>, nobs <int>
Finally, you can compute multiple R:
summary(album_lm)$r.squared %>%
sqrt()
## [1] 0.5784877
Also use broom()
to get nicely tabulated parameter estimates (including confidence intervals):
broom::tidy(album_lm, conf.int = TRUE)
## # A tibble: 2 x 7
## term estimate std.error statistic p.value conf.low conf.high
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) 134. 7.54 17.8 5.97e-43 119. 149.
## 2 adverts 0.0961 0.00963 9.98 2.94e-19 0.0771 0.115
Impress your friends by rounding values to 3 decimal places using mutate()
and across()
broom::tidy(album_lm, conf.int = TRUE) %>%
dplyr::mutate(
dplyr::across(where(is.numeric), ~round(., 3))
)
## # A tibble: 2 x 7
## term estimate std.error statistic p.value conf.low conf.high
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) 134. 7.54 17.8 0 119. 149.
## 2 adverts 0.096 0.01 9.98 0 0.077 0.115
Or by using the pixiedust
package:
broom::tidy(album_lm, conf.int = TRUE) %>%
pixiedust::dust() %>%
pixiedust::sprinkle(col = 2:7, round = 3)
term | estimate | std.error | statistic | p.value | conf.low | conf.high |
---|---|---|---|---|---|---|
(Intercept) | 134.14 | 7.537 | 17.799 | 0 | 119.278 | 149.002 |
adverts | 0.096 | 0.01 | 9.979 | 0 | 0.077 | 0.115 |
Fit a model with several predictor
Bivariate correlations and scatterplots
Plot the data
GGally::ggscatmat(album_tib, columns = c("adverts", "airplay", "image", "sales")) +
theme_minimal()
Build the model
album_full_lm <- lm(sales ~ adverts + airplay + image, data = album_tib, na.action = na.exclude)
Or, use the update function:
album_full_lm <- update(album_lm, .~. + airplay + image)
Fit statistics
broom::glance(album_full_lm)
## # A tibble: 1 x 12
## r.squared adj.r.squared sigma statistic p.value df logLik AIC BIC
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0.665 0.660 47.1 129. 2.88e-46 3 -1052. 2114. 2131.
## # … with 3 more variables: deviance <dbl>, df.residual <int>, nobs <int>
multiple R-square:
summary(album_full_lm)$r.squared %>%
sqrt()
## [1] 0.8152715
Compare models
anova(album_lm, album_full_lm) %>%
broom::tidy()
## # A tibble: 2 x 6
## res.df rss df sumsq statistic p.value
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 198 862264. NA NA NA NA
## 2 196 434575. 2 427690. 96.4 6.88e-30
Parameter estimates
As text:
summary(album_full_lm)
##
## Call:
## lm(formula = sales ~ adverts + airplay + image, data = album_tib,
## na.action = na.exclude)
##
## Residuals:
## Min 1Q Median 3Q Max
## -121.324 -28.336 -0.451 28.967 144.132
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -26.612958 17.350001 -1.534 0.127
## adverts 0.084885 0.006923 12.261 < 2e-16 ***
## airplay 3.367425 0.277771 12.123 < 2e-16 ***
## image 11.086335 2.437849 4.548 9.49e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 47.09 on 196 degrees of freedom
## Multiple R-squared: 0.6647, Adjusted R-squared: 0.6595
## F-statistic: 129.5 on 3 and 196 DF, p-value: < 2.2e-16
confint(album_full_lm)
## 2.5 % 97.5 %
## (Intercept) -60.82960967 7.60369295
## adverts 0.07123166 0.09853799
## airplay 2.81962186 3.91522848
## image 6.27855218 15.89411823
As a nicely formatted table:
broom::tidy(album_full_lm, conf.int = TRUE)
## # A tibble: 4 x 7
## term estimate std.error statistic p.value conf.low conf.high
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) -26.6 17.4 -1.53 1.27e- 1 -60.8 7.60
## 2 adverts 0.0849 0.00692 12.3 5.05e-26 0.0712 0.0985
## 3 airplay 3.37 0.278 12.1 1.33e-25 2.82 3.92
## 4 image 11.1 2.44 4.55 9.49e- 6 6.28 15.9
Restrict the output to 3 decimal places
broom::tidy(album_full_lm, conf.int = TRUE) %>%
dplyr::mutate(
dplyr::across(where(is.numeric), ~round(., 3))
)
## # A tibble: 4 x 7
## term estimate std.error statistic p.value conf.low conf.high
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) -26.6 17.4 -1.53 0.127 -60.8 7.60
## 2 adverts 0.085 0.007 12.3 0 0.071 0.099
## 3 airplay 3.37 0.278 12.1 0 2.82 3.92
## 4 image 11.1 2.44 4.55 0 6.28 15.9
Standardized betas
parameters::model_parameters(album_full_lm, standardize = "refit", digits = 3)
## Parameter | Coefficient | SE | 95% CI | t | df | p
## -----------------------------------------------------------------------------
## (Intercept) | -3.982e-17 | 0.041 | [-0.08, 0.08] | -9.650e-16 | 196 | > .999
## adverts | 0.511 | 0.042 | [ 0.43, 0.59] | 12.261 | 196 | < .001
## airplay | 0.512 | 0.042 | [ 0.43, 0.60] | 12.123 | 196 | < .001
## image | 0.192 | 0.042 | [ 0.11, 0.27] | 4.548 | 196 | < .001
Influence measures
album_full_rsd <- album_full_lm %>%
broom::augment() %>%
tibble::rowid_to_column(var = "case_no")
album_full_rsd
## # A tibble: 200 x 11
## case_no sales adverts airplay image .fitted .resid .std.resid .hat .sigma
## <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 330 10.3 43 10 230. -100. 2.18 0.0472 46.6
## 2 2 120 986. 28 7 229. 109. -2.32 0.00801 46.6
## 3 3 360 1446. 35 7 292. -68.4 1.47 0.0207 46.9
## 4 4 270 1188. 33 7 263. -7.02 0.150 0.0126 47.2
## 5 5 220 575. 44 5 226. 5.75 -0.124 0.0261 47.2
## 6 6 170 569. 19 5 141. -28.9 0.618 0.0142 47.2
## 7 7 70 472. 20 1 91.9 21.9 -0.487 0.0910 47.2
## 8 8 210 537. 22 9 193. -17.1 0.368 0.0209 47.2
## 9 9 200 514. 21 7 165. -34.7 0.739 0.00691 47.1
## 10 10 300 174. 40 7 200. -99.5 2.13 0.0154 46.7
## # … with 190 more rows, and 1 more variable: .cooksd <dbl>
album_full_inf <- influence.measures(album_full_lm)
album_full_inf <- album_full_inf$infmat %>%
tibble::as_tibble() %>%
tibble::rowid_to_column(var = "case_no")
album_full_inf
## # A tibble: 200 x 9
## case_no dfb.1_ dfb.advr dfb.arpl dfb.imag dffit cov.r cook.d hat
## <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 -0.316 -0.242 0.158 0.353 0.489 0.971 0.0587 0.0472
## 2 2 0.0126 -0.126 0.00942 -0.0187 -0.211 0.920 0.0109 0.00801
## 3 3 -0.0381 0.175 0.0466 -0.00538 0.214 0.997 0.0114 0.0207
## 4 4 -0.00258 0.0122 0.00344 0.000129 0.0169 1.03 0.0000717 0.0126
## 5 5 -0.00858 0.00109 -0.0143 0.0136 -0.0202 1.05 0.000103 0.0261
## 6 6 0.0658 0.00224 -0.0208 -0.0512 0.0741 1.03 0.00138 0.0142
## 7 7 -0.145 -0.00100 -0.00509 0.148 -0.154 1.12 0.00594 0.0910
## 8 8 -0.0281 -0.00586 -0.0192 0.0452 0.0537 1.04 0.000723 0.0209
## 9 9 0.0112 -0.00896 -0.0290 0.0145 0.0615 1.02 0.000949 0.00691
## 10 10 -0.0126 -0.156 0.168 0.00672 0.269 0.944 0.0178 0.0154
## # … with 190 more rows
If you want to keep all of your diagnostic stats in one object:
album_full_rsd <- album_full_rsd %>%
dplyr::left_join(., album_full_inf, by = "case_no") %>%
dplyr::select(-c(cook.d, hat))
Variance inflation factor
car::vif(album_full_lm)
## adverts airplay image
## 1.014593 1.042504 1.038455
1/car::vif(album_full_lm)
## adverts airplay image
## 0.9856172 0.9592287 0.9629695
car::vif(album_full_lm) %>%
mean()
## [1] 1.03185
Casewise diagnostics
get_cum_percent <- function(var, cut_off = 1.96){
ecdf_var <- abs(var) %>% ecdf()
100*(1 - ecdf_var(cut_off))
}
album_full_rsd %>%
dplyr::summarize(
`z >= 1.96` = get_cum_percent(.std.resid),
`z >= 2.58` = get_cum_percent(.std.resid, cut_off = 2.58),
`z >= 3.29` = get_cum_percent(.std.resid, cut_off = 3.29)
)
## # A tibble: 1 x 3
## `z >= 1.96` `z >= 2.58` `z >= 3.29`
## <dbl> <dbl> <dbl>
## 1 6.50 1. 0
album_full_rsd %>%
dplyr::filter(abs(.std.resid) >= 1.96) %>%
dplyr::select(case_no, .std.resid, .resid) %>%
dplyr::arrange(.std.resid)
## # A tibble: 13 x 3
## case_no .std.resid .resid
## <int> <dbl> <dbl>
## 1 164 -2.63 121.
## 2 47 -2.46 115.
## 3 55 -2.46 114.
## 4 68 -2.36 110.
## 5 2 -2.32 109.
## 6 200 -2.09 97.2
## 7 167 -2.00 93.6
## 8 100 2.10 -97.3
## 9 52 2.10 -97.4
## 10 61 2.10 -98.8
## 11 10 2.13 -99.5
## 12 1 2.18 -100.
## 13 169 3.09 -144.
album_full_inf %>%
dplyr::arrange(desc(cook.d)) %>%
dplyr::mutate(
dplyr::across(where(is.numeric), ~round(., 3))
)
## # A tibble: 200 x 9
## case_no dfb.1_ dfb.advr dfb.arpl dfb.imag dffit cov.r cook.d hat
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 164 0.18 0.290 -0.401 -0.117 -0.54 0.92 0.071 0.039
## 2 1 -0.316 -0.242 0.158 0.353 0.489 0.971 0.059 0.047
## 3 169 -0.168 -0.258 0.257 0.17 0.461 0.853 0.051 0.021
## 4 55 0.174 -0.326 -0.023 -0.124 -0.407 0.925 0.04 0.026
## 5 52 0.353 -0.029 -0.137 -0.27 0.367 0.96 0.033 0.029
## 6 100 0.061 0.145 -0.3 0.068 0.357 0.959 0.031 0.028
## 7 119 0.004 -0.054 -0.312 0.129 -0.353 1.00 0.031 0.04
## 8 99 0.088 -0.111 -0.279 0.039 -0.337 0.99 0.028 0.034
## 9 200 0.166 -0.046 0.142 -0.259 -0.32 0.954 0.025 0.023
## 10 47 0.066 0.196 0.048 -0.179 -0.315 0.915 0.024 0.016
## # … with 190 more rows
Show case with any df beta with an absolute value greater than 1.
album_full_inf %>%
dplyr::filter_at(
vars(starts_with("dfb")),
any_vars(abs(.) > 1)
) %>%
dplyr::select(case_no, starts_with("dfb")) %>%
dplyr::mutate(
dplyr::across(where(is.numeric), ~round(., 3))
)
## # A tibble: 0 x 5
## # … with 5 variables: case_no <dbl>, dfb.1_ <dbl>, dfb.advr <dbl>,
## # dfb.arpl <dbl>, dfb.imag <dbl>
Show cases with problematic leverage or CVR:
leverage_thresh <- 3*mean(album_full_inf$hat, na.rm = TRUE)
album_full_inf %>%
dplyr::filter(
hat > leverage_thresh | !between(cov.r, 1 - leverage_thresh, 1 + leverage_thresh)
) %>%
dplyr::select(case_no, cov.r, hat, cook.d) %>%
dplyr::mutate(
dplyr::across(where(is.numeric), ~round(., 3))
)
## # A tibble: 21 x 4
## case_no cov.r hat cook.d
## <dbl> <dbl> <dbl> <dbl>
## 1 2 0.92 0.008 0.011
## 2 7 1.12 0.091 0.006
## 3 12 1.07 0.064 0.017
## 4 23 1.07 0.045 0
## 5 42 1.06 0.058 0.013
## 6 43 1.07 0.046 0.001
## 7 47 0.915 0.016 0.024
## 8 55 0.925 0.026 0.04
## 9 61 0.937 0.005 0.006
## 10 68 0.924 0.016 0.022
## # … with 11 more rows
Diagnostic plots
Standard influence plots
plot(album_full_lm, which = 4:6)
Residual plot:
plot(album_full_lm, which = c(1, 3))
Q-Q plot:
plot(album_full_lm, which = 2)
Prettier diagnostic plots
The ggfortify
package allows us to produce prettier versions of the diagnostic plots. For example:
influence plots
library(ggfortify)
ggplot2::autoplot(album_full_lm, which = 4:6,
colour = "#5c97bf",
smooth.colour = "#ef4836",
alpha = 0.5,
size = 1) +
theme_minimal()
Residual plot:
ggplot2::autoplot(album_full_lm, which = c(1, 3),
colour = "#5c97bf",
smooth.colour = "#ef4836",
alpha = 0.5,
size = 1) +
theme_minimal()
Q-Q plot:
ggplot2::autoplot(album_full_lm, which = 2,
colour = "#5c97bf",
smooth.colour = "#ef4836",
alpha = 0.5,
size = 1) +
theme_minimal()
Robust linear model
Robust estimates:
album_full_rob <- robust::lmRob(sales ~ adverts + airplay + image, data = album_tib, na.action = na.exclude)
summary(album_full_rob)
##
## Call:
## robust::lmRob(formula = sales ~ adverts + airplay + image, data = album_tib,
## na.action = na.exclude)
##
## Residuals:
## Min 1Q Median 3Q Max
## -124.62 -30.47 -1.76 28.89 141.35
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -31.523201 21.098348 -1.494 0.136756
## adverts 0.085295 0.008501 10.033 < 2e-16 ***
## airplay 3.418749 0.339017 10.084 < 2e-16 ***
## image 11.771441 2.973559 3.959 0.000105 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 44.61 on 196 degrees of freedom
## Multiple R-Squared: 0.5785
##
## Test for Bias:
## statistic p-value
## M-estimate 2.299 0.6809
## LS-estimate 7.659 0.1049
Robust standard errors and confidence intervals:
parameters::model_parameters(
album_full_lm,
robust = TRUE,
vcov.type = "HC4",
digits = 3
)
## Parameter | Coefficient | SE | 95% CI | t | df | p
## ----------------------------------------------------------------------------
## (Intercept) | -26.613 | 16.170 | [-58.50, 5.28] | -1.646 | 196 | 0.101
## adverts | 0.085 | 0.007 | [ 0.07, 0.10] | 12.246 | 196 | < .001
## airplay | 3.367 | 0.315 | [ 2.75, 3.99] | 10.687 | 196 | < .001
## image | 11.086 | 2.247 | [ 6.65, 15.52] | 4.933 | 196 | < .001
Bootstrap
parameters::model_parameters(
album_full_lm,
bootstrap = TRUE,
digits = 3
)
## Parameter | Coefficient | 95% CI | p
## ----------------------------------------------------
## (Intercept) | -26.928 | [-55.65, 7.06] | 0.116
## adverts | 0.085 | [ 0.07, 0.10] | < .001
## airplay | 3.366 | [ 2.74, 4.02] | < .001
## image | 11.059 | [ 6.36, 15.40] | < .001
Bayes factors
album_bf <- album_tib %>%
BayesFactor::regressionBF(sales ~ adverts + airplay + image, rscaleCont = "medium", data = .)
album_bf
## Bayes factor analysis
## --------------
## [1] adverts : 1.320123e+16 ±0%
## [2] airplay : 4.723817e+17 ±0.01%
## [3] image : 6039.289 ±0%
## [4] adverts + airplay : 5.65038e+39 ±0%
## [5] adverts + image : 2.65494e+20 ±0%
## [6] airplay + image : 1.034464e+20 ±0%
## [7] adverts + airplay + image : 7.746101e+42 ±0%
##
## Against denominator:
## Intercept only
## ---
## Bayes factor type: BFlinearModel, JZS
Bayesian parameter estimates
album_full_bf <- album_tib %>%
BayesFactor::lmBF(sales ~ adverts + airplay + image, rscaleCont = "medium", data = .)
album_full_post <- BayesFactor::posterior(album_full_bf, iterations = 10000)
summary(album_full_post)
##
## Iterations = 1:10000
## Thinning interval = 1
## Number of chains = 1
## Sample size per chain = 10000
##
## 1. Empirical mean and standard deviation for each variable,
## plus standard error of the mean:
##
## Mean SD Naive SE Time-series SE
## mu 1.932e+02 3.309e+00 3.309e-02 3.309e-02
## adverts 8.406e-02 7.038e-03 7.038e-05 7.038e-05
## airplay 3.333e+00 2.804e-01 2.804e-03 2.804e-03
## image 1.096e+01 2.475e+00 2.475e-02 2.475e-02
## sig2 2.253e+03 2.321e+02 2.321e+00 2.313e+00
## g 9.859e-01 1.553e+00 1.553e-02 1.553e-02
##
## 2. Quantiles for each variable:
##
## 2.5% 25% 50% 75% 97.5%
## mu 1.868e+02 1.909e+02 1.932e+02 1.954e+02 1.998e+02
## adverts 7.004e-02 7.932e-02 8.412e-02 8.882e-02 9.798e-02
## airplay 2.780e+00 3.145e+00 3.336e+00 3.522e+00 3.872e+00
## image 6.056e+00 9.269e+00 1.097e+01 1.266e+01 1.571e+01
## sig2 1.841e+03 2.092e+03 2.239e+03 2.395e+03 2.750e+03
## g 1.770e-01 3.708e-01 6.017e-01 1.047e+00 4.203e+00