R code Chapter 8

Mae Jemstone character from Discovering Statistics using R and RStudio

This document contains abridged sections from Discovering Statistics Using R and RStudio by Andy Field so there are some copyright considerations. You can use this material for teaching and non-profit activities but please do not meddle with it or claim it as your own work. See the full license terms at the bottom of the page.

Load packages

Remember to load the tidyverse:

library(tidyverse)

Load the data

Remember to install the package for the book with library(discovr). After which you can load data into a tibble by executing:

name_of_tib <- discovr::name_of_data

For example, execute these lines to create the tibbles referred to in the chapter:

album_tib <- discovr::album_sales
df_tib <- discovr::df_beta
pub_tib <- discovr::pubs

If you want to read the file from the CSV and you have set up your project folder as I suggest in Chapter 1, then the general format of the code you would use is:

tibble_name <- here::here("../data/name_of_file.csv") %>%
  readr::read_csv() %>%
  dplyr::mutate(
    ...
    code to convert character variables to factors
    ...
  )

In which you’d replace tibble_name with the name you want to assign to the tibble and change name_of_file.csv to the name of the file that you’re trying to load. You can use mutate to convert categorical variables to factors. For example, for the album_sales data you would load it by executing:

library(here)

album_tib <- here::here("data/album_sales.csv") %>%
  readr::read_csv()

Self test on df_beta

Load the data and fit the model with all cases.

df_tib <- discovr::df_beta

df_lm <- df_tib %>% 
  lm(y ~ x, data = .)

df_lm %>% 
  broom::tidy()

## # A tibble: 2 x 5
##   term        estimate std.error statistic  p.value
##   <chr>          <dbl>     <dbl>     <dbl>    <dbl>
## 1 (Intercept)   29.0      0.992       29.2 1.58e-22
## 2 x             -0.903    0.0559     -16.2 9.90e-16

Fit the model with case 30 excluded.

df_tib %>% 
  dplyr::filter(case != 30) %>% 
  lm(y ~ x, data = .) %>% 
  broom::tidy()

## # A tibble: 2 x 5
##   term        estimate std.error statistic p.value
##   <chr>          <dbl>     <dbl>     <dbl>   <dbl>
## 1 (Intercept)      31.  1.39e-15   2.24e16       0
## 2 x                -1   7.68e-17  -1.30e16       0

Other diagnostics (look at the values for case 30):

dfbeta(df_lm)

##      (Intercept)             x
## 1   0.0689655172 -6.674082e-03
## 2   0.0544297438 -5.469987e-03
## 3   0.0418250951 -4.399554e-03
## 4   0.0310406796 -3.454527e-03
## 5   0.0219815743 -2.627778e-03
## 6   0.0145669922 -1.913176e-03
## 7   0.0087287732 -1.305476e-03
## 8   0.0044101433 -8.002276e-04
## 9   0.0015647004 -3.936988e-04
## 10  0.0001555936 -8.281594e-05
## 11  0.0001548707  1.348874e-04
## 12  0.0015429718  2.613097e-04
## 13  0.0043083551  2.978126e-04
## 14  0.0084472431  2.452425e-04
## 15  0.0139634801  1.039465e-04
## 16  0.0208684978 -1.262208e-04
## 17  0.0291813853 -4.458955e-04
## 18  0.0389290660 -8.562111e-04
## 19  0.0501465823 -1.358811e-03
## 20  0.0628774973 -1.955867e-03
## 21  0.0771744204 -2.650110e-03
## 22  0.0930996714 -3.444865e-03
## 23  0.1107260986 -4.344093e-03
## 24  0.1301380733 -5.352453e-03
## 25  0.1514326877 -6.475366e-03
## 26  0.1747211896 -7.719099e-03
## 27  0.2001306976 -9.090861e-03
## 28  0.2278062490 -1.059893e-02
## 29  0.2579132474 -1.225277e-02
## 30 -2.0000000000  9.677419e-02

Jane superbrain influential cases

Load the data and fit the model with all cases.

pub_tib <- discovr::pubs

pub_lm <- pub_tib %>% 
  lm(mortality ~ pubs, data = .) 

pub_lm %>% 
  broom::tidy()

## # A tibble: 2 x 5
##   term        estimate std.error statistic p.value
##   <chr>          <dbl>     <dbl>     <dbl>   <dbl>
## 1 (Intercept)   3352.     781.        4.29 0.00515
## 2 pubs            14.3      4.30      3.33 0.0157

Save influential cases

pub_inf <- pub_lm %>% 
  broom::augment() %>%
  dplyr::rename(
    Residual = .resid,
    `Cook's distance` = .cooksd,
    Leverage = .hat,
  ) %>% dplyr::mutate(
    District = 1:8,
    `DF beta (intercept)` = dfbeta(pub_lm)[, 1] %>% round(., 3),
    `DF beta (pubs)` = dfbeta(pub_lm)[, 2] %>% round(., 3),
  ) %>% 
  dplyr::select(District, Residual, `Cook's distance`, Leverage, `DF beta (intercept)`, `DF beta (pubs)`) %>% 
  round(., 3)

pub_inf

## # A tibble: 8 x 6
##   District Residual `Cook's distance` Leverage `DF beta (inter… `DF beta (pubs)`
##      <dbl>    <dbl>             <dbl>    <dbl>            <dbl>            <dbl>
## 1        1   2495.              0.213    0.166           -510.             1.39 
## 2        2   1639.              0.085    0.157           -321.             0.802
## 3        3    782.              0.018    0.149           -147.             0.33 
## 4        4    -74.5             0        0.143             13.5           -0.027
## 5        5   -931.              0.023    0.137            161.            -0.273
## 6        6  -1788.              0.081    0.132            298.            -0.411
## 7        7  -2644.              0.171    0.129            423.            -0.444
## 8        8    521.            227.       0.987           3352.           -85.7

Summary information for album sales

Summary statistics

album_sum <- album_tib %>% 
  tidyr::pivot_longer(
    cols = adverts:image,
    names_to = "Variable",
    values_to = "value"
    ) %>% 
  dplyr::group_by(Variable) %>% 
  dplyr::summarize(
    Mean = mean(value, na.rm = TRUE),
    SD = sd(value, na.rm = TRUE),
    `95% CI Lower` = ggplot2::mean_cl_normal(value)$ymin,
    `95% CI Upper` = ggplot2::mean_cl_normal(value)$ymax,
    Min = min(value, na.rm = TRUE),
    Max = max(value, na.rm = TRUE),
  )
album_sum

## # A tibble: 4 x 7
##   Variable   Mean     SD `95% CI Lower` `95% CI Upper`   Min   Max
##   <chr>     <dbl>  <dbl>          <dbl>          <dbl> <dbl> <dbl>
## 1 adverts  614.   486.           547.           682.    9.10 2272.
## 2 airplay   27.5   12.3           25.8           29.2   0      63 
## 3 image      6.77   1.40           6.58           6.96  1      10 
## 4 sales    193.    80.7          182.           204.   10     360

Scatterplot of album sales against advertising

ggplot2::ggplot(album_tib, aes(adverts, sales)) +
  geom_point(colour = "#5c97bf") +
  coord_cartesian(ylim = c(0, 400), xlim = c(0, 2500)) +
  scale_x_continuous(breaks = seq(0, 2500, 500)) +
  scale_y_continuous(breaks = seq(0, 400, 100)) +
  labs(x = "Advertising budget (thousands)", y = "Album sales (thousands)") +
  geom_smooth(method = "lm", colour = "#d47500", fill = "#d47500", alpha = 0.2) +
  theme_minimal()

Fit a model with one predictor

album_lm <- lm(sales ~ adverts, data = album_tib, na.action = na.exclude)

get text output for the model:

summary(album_lm)

## 
## Call:
## lm(formula = sales ~ adverts, data = album_tib, na.action = na.exclude)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -152.949  -43.796   -0.393   37.040  211.866 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 1.341e+02  7.537e+00  17.799   <2e-16 ***
## adverts     9.612e-02  9.632e-03   9.979   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 65.99 on 198 degrees of freedom
## Multiple R-squared:  0.3346,	Adjusted R-squared:  0.3313 
## F-statistic: 99.59 on 1 and 198 DF,  p-value: < 2.2e-16

Get the confidence intervals for the model parameters old-style:

confint(album_lm)

##                    2.5 %      97.5 %
## (Intercept) 119.27768082 149.0021948
## adverts       0.07712929   0.1151197

Or, better …. use broom() to get nicely tabulated fit statistics:

broom::glance(album_lm)

## # A tibble: 1 x 12
##   r.squared adj.r.squared sigma statistic  p.value    df logLik   AIC   BIC
##       <dbl>         <dbl> <dbl>     <dbl>    <dbl> <dbl>  <dbl> <dbl> <dbl>
## 1     0.335         0.331  66.0      99.6 2.94e-19     1 -1121. 2247. 2257.
## # … with 3 more variables: deviance <dbl>, df.residual <int>, nobs <int>

Finally, you can compute multiple R:

summary(album_lm)$r.squared %>% 
  sqrt()

## [1] 0.5784877

Also use broom() to get nicely tabulated parameter estimates (including confidence intervals):

broom::tidy(album_lm, conf.int = TRUE)

## # A tibble: 2 x 7
##   term        estimate std.error statistic  p.value conf.low conf.high
##   <chr>          <dbl>     <dbl>     <dbl>    <dbl>    <dbl>     <dbl>
## 1 (Intercept) 134.       7.54        17.8  5.97e-43 119.       149.   
## 2 adverts       0.0961   0.00963      9.98 2.94e-19   0.0771     0.115

Impress your friends by rounding values to 3 decimal places using mutate() and across()

broom::tidy(album_lm, conf.int = TRUE) %>%
  dplyr::mutate(
    dplyr::across(where(is.numeric), ~round(., 3))
  )

## # A tibble: 2 x 7
##   term        estimate std.error statistic p.value conf.low conf.high
##   <chr>          <dbl>     <dbl>     <dbl>   <dbl>    <dbl>     <dbl>
## 1 (Intercept)  134.         7.54     17.8        0  119.      149.   
## 2 adverts        0.096      0.01      9.98       0    0.077     0.115

Or by using the pixiedust package:

broom::tidy(album_lm, conf.int = TRUE) %>%
  pixiedust::dust() %>% 
  pixiedust::sprinkle(col = 2:7, round = 3)

term	estimate	std.error	statistic	p.value	conf.low	conf.high
(Intercept)	134.14	7.537	17.799	0	119.278	149.002
adverts	0.096	0.01	9.979	0	0.077	0.115

Fit a model with several predictor

Bivariate correlations and scatterplots

Plot the data

GGally::ggscatmat(album_tib, columns = c("adverts", "airplay", "image", "sales")) +
  theme_minimal()

Build the model

album_full_lm <- lm(sales ~ adverts + airplay + image, data = album_tib, na.action = na.exclude)

Or, use the update function:

album_full_lm <- update(album_lm, .~. + airplay + image)

Fit statistics

broom::glance(album_full_lm)

## # A tibble: 1 x 12
##   r.squared adj.r.squared sigma statistic  p.value    df logLik   AIC   BIC
##       <dbl>         <dbl> <dbl>     <dbl>    <dbl> <dbl>  <dbl> <dbl> <dbl>
## 1     0.665         0.660  47.1      129. 2.88e-46     3 -1052. 2114. 2131.
## # … with 3 more variables: deviance <dbl>, df.residual <int>, nobs <int>

multiple R-square:

summary(album_full_lm)$r.squared %>% 
  sqrt()

## [1] 0.8152715

Compare models

anova(album_lm, album_full_lm) %>% 
  broom::tidy()

## # A tibble: 2 x 6
##   res.df     rss    df   sumsq statistic   p.value
##    <dbl>   <dbl> <dbl>   <dbl>     <dbl>     <dbl>
## 1    198 862264.    NA     NA       NA   NA       
## 2    196 434575.     2 427690.      96.4  6.88e-30

Parameter estimates

As text:

summary(album_full_lm)

## 
## Call:
## lm(formula = sales ~ adverts + airplay + image, data = album_tib, 
##     na.action = na.exclude)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -121.324  -28.336   -0.451   28.967  144.132 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -26.612958  17.350001  -1.534    0.127    
## adverts       0.084885   0.006923  12.261  < 2e-16 ***
## airplay       3.367425   0.277771  12.123  < 2e-16 ***
## image        11.086335   2.437849   4.548 9.49e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 47.09 on 196 degrees of freedom
## Multiple R-squared:  0.6647,	Adjusted R-squared:  0.6595 
## F-statistic: 129.5 on 3 and 196 DF,  p-value: < 2.2e-16

confint(album_full_lm)

##                    2.5 %      97.5 %
## (Intercept) -60.82960967  7.60369295
## adverts       0.07123166  0.09853799
## airplay       2.81962186  3.91522848
## image         6.27855218 15.89411823

As a nicely formatted table:

broom::tidy(album_full_lm, conf.int = TRUE)

## # A tibble: 4 x 7
##   term        estimate std.error statistic  p.value conf.low conf.high
##   <chr>          <dbl>     <dbl>     <dbl>    <dbl>    <dbl>     <dbl>
## 1 (Intercept) -26.6     17.4         -1.53 1.27e- 1 -60.8       7.60  
## 2 adverts       0.0849   0.00692     12.3  5.05e-26   0.0712    0.0985
## 3 airplay       3.37     0.278       12.1  1.33e-25   2.82      3.92  
## 4 image        11.1      2.44         4.55 9.49e- 6   6.28     15.9

Restrict the output to 3 decimal places

broom::tidy(album_full_lm, conf.int = TRUE) %>% 
  dplyr::mutate(
    dplyr::across(where(is.numeric), ~round(., 3))
  )

## # A tibble: 4 x 7
##   term        estimate std.error statistic p.value conf.low conf.high
##   <chr>          <dbl>     <dbl>     <dbl>   <dbl>    <dbl>     <dbl>
## 1 (Intercept)  -26.6      17.4       -1.53   0.127  -60.8       7.60 
## 2 adverts        0.085     0.007     12.3    0        0.071     0.099
## 3 airplay        3.37      0.278     12.1    0        2.82      3.92 
## 4 image         11.1       2.44       4.55   0        6.28     15.9

Standardized betas

parameters::model_parameters(album_full_lm, standardize = "refit", digits = 3)

## Parameter   | Coefficient |    SE |        95% CI |          t |  df |      p
## -----------------------------------------------------------------------------
## (Intercept) |  -3.982e-17 | 0.041 | [-0.08, 0.08] | -9.650e-16 | 196 | > .999
## adverts     |       0.511 | 0.042 | [ 0.43, 0.59] |     12.261 | 196 | < .001
## airplay     |       0.512 | 0.042 | [ 0.43, 0.60] |     12.123 | 196 | < .001
## image       |       0.192 | 0.042 | [ 0.11, 0.27] |      4.548 | 196 | < .001

Influence measures

album_full_rsd <- album_full_lm %>% 
  broom::augment() %>% 
  tibble::rowid_to_column(var = "case_no")

album_full_rsd

## # A tibble: 200 x 11
##    case_no sales adverts airplay image .fitted  .resid .std.resid    .hat .sigma
##      <int> <dbl>   <dbl>   <dbl> <dbl>   <dbl>   <dbl>      <dbl>   <dbl>  <dbl>
##  1       1   330    10.3      43    10   230.  -100.        2.18  0.0472    46.6
##  2       2   120   986.       28     7   229.   109.       -2.32  0.00801   46.6
##  3       3   360  1446.       35     7   292.   -68.4       1.47  0.0207    46.9
##  4       4   270  1188.       33     7   263.    -7.02      0.150 0.0126    47.2
##  5       5   220   575.       44     5   226.     5.75     -0.124 0.0261    47.2
##  6       6   170   569.       19     5   141.   -28.9       0.618 0.0142    47.2
##  7       7    70   472.       20     1    91.9   21.9      -0.487 0.0910    47.2
##  8       8   210   537.       22     9   193.   -17.1       0.368 0.0209    47.2
##  9       9   200   514.       21     7   165.   -34.7       0.739 0.00691   47.1
## 10      10   300   174.       40     7   200.   -99.5       2.13  0.0154    46.7
## # … with 190 more rows, and 1 more variable: .cooksd <dbl>

album_full_inf <- influence.measures(album_full_lm)

album_full_inf <- album_full_inf$infmat %>% 
  tibble::as_tibble() %>%
  tibble::rowid_to_column(var = "case_no")
  

album_full_inf

## # A tibble: 200 x 9
##    case_no   dfb.1_ dfb.advr dfb.arpl  dfb.imag   dffit cov.r    cook.d     hat
##      <int>    <dbl>    <dbl>    <dbl>     <dbl>   <dbl> <dbl>     <dbl>   <dbl>
##  1       1 -0.316   -0.242    0.158    0.353     0.489  0.971 0.0587    0.0472 
##  2       2  0.0126  -0.126    0.00942 -0.0187   -0.211  0.920 0.0109    0.00801
##  3       3 -0.0381   0.175    0.0466  -0.00538   0.214  0.997 0.0114    0.0207 
##  4       4 -0.00258  0.0122   0.00344  0.000129  0.0169 1.03  0.0000717 0.0126 
##  5       5 -0.00858  0.00109 -0.0143   0.0136   -0.0202 1.05  0.000103  0.0261 
##  6       6  0.0658   0.00224 -0.0208  -0.0512    0.0741 1.03  0.00138   0.0142 
##  7       7 -0.145   -0.00100 -0.00509  0.148    -0.154  1.12  0.00594   0.0910 
##  8       8 -0.0281  -0.00586 -0.0192   0.0452    0.0537 1.04  0.000723  0.0209 
##  9       9  0.0112  -0.00896 -0.0290   0.0145    0.0615 1.02  0.000949  0.00691
## 10      10 -0.0126  -0.156    0.168    0.00672   0.269  0.944 0.0178    0.0154 
## # … with 190 more rows

If you want to keep all of your diagnostic stats in one object:

album_full_rsd <- album_full_rsd %>% 
  dplyr::left_join(., album_full_inf, by = "case_no") %>% 
  dplyr::select(-c(cook.d, hat))

Variance inflation factor

car::vif(album_full_lm)

##  adverts  airplay    image 
## 1.014593 1.042504 1.038455

1/car::vif(album_full_lm)

##   adverts   airplay     image 
## 0.9856172 0.9592287 0.9629695

car::vif(album_full_lm) %>% 
  mean()

## [1] 1.03185

Casewise diagnostics

get_cum_percent <- function(var,  cut_off = 1.96){
  ecdf_var <- abs(var) %>% ecdf()
  100*(1 - ecdf_var(cut_off))
}


album_full_rsd %>%
  dplyr::summarize(
    `z >= 1.96` = get_cum_percent(.std.resid),
    `z >= 2.58` = get_cum_percent(.std.resid, cut_off = 2.58),
    `z >= 3.29` = get_cum_percent(.std.resid, cut_off = 3.29)
  )

## # A tibble: 1 x 3
##   `z >= 1.96` `z >= 2.58` `z >= 3.29`
##         <dbl>       <dbl>       <dbl>
## 1        6.50          1.           0

album_full_rsd %>% 
  dplyr::filter(abs(.std.resid) >= 1.96) %>%
  dplyr::select(case_no, .std.resid, .resid) %>%
  dplyr::arrange(.std.resid)

## # A tibble: 13 x 3
##    case_no .std.resid .resid
##      <int>      <dbl>  <dbl>
##  1     164      -2.63  121. 
##  2      47      -2.46  115. 
##  3      55      -2.46  114. 
##  4      68      -2.36  110. 
##  5       2      -2.32  109. 
##  6     200      -2.09   97.2
##  7     167      -2.00   93.6
##  8     100       2.10  -97.3
##  9      52       2.10  -97.4
## 10      61       2.10  -98.8
## 11      10       2.13  -99.5
## 12       1       2.18 -100. 
## 13     169       3.09 -144.

album_full_inf %>%
  dplyr::arrange(desc(cook.d)) %>% 
  dplyr::mutate(
    dplyr::across(where(is.numeric), ~round(., 3))
  )

## # A tibble: 200 x 9
##    case_no dfb.1_ dfb.advr dfb.arpl dfb.imag  dffit cov.r cook.d   hat
##      <dbl>  <dbl>    <dbl>    <dbl>    <dbl>  <dbl> <dbl>  <dbl> <dbl>
##  1     164  0.18     0.290   -0.401   -0.117 -0.54  0.92   0.071 0.039
##  2       1 -0.316   -0.242    0.158    0.353  0.489 0.971  0.059 0.047
##  3     169 -0.168   -0.258    0.257    0.17   0.461 0.853  0.051 0.021
##  4      55  0.174   -0.326   -0.023   -0.124 -0.407 0.925  0.04  0.026
##  5      52  0.353   -0.029   -0.137   -0.27   0.367 0.96   0.033 0.029
##  6     100  0.061    0.145   -0.3      0.068  0.357 0.959  0.031 0.028
##  7     119  0.004   -0.054   -0.312    0.129 -0.353 1.00   0.031 0.04 
##  8      99  0.088   -0.111   -0.279    0.039 -0.337 0.99   0.028 0.034
##  9     200  0.166   -0.046    0.142   -0.259 -0.32  0.954  0.025 0.023
## 10      47  0.066    0.196    0.048   -0.179 -0.315 0.915  0.024 0.016
## # … with 190 more rows

Show case with any df beta with an absolute value greater than 1.

album_full_inf %>% 
  dplyr::filter_at(
    vars(starts_with("dfb")),
    any_vars(abs(.) > 1)
  ) %>% 
  dplyr::select(case_no, starts_with("dfb")) %>% 
  dplyr::mutate(
    dplyr::across(where(is.numeric), ~round(., 3))
  )

## # A tibble: 0 x 5
## # … with 5 variables: case_no <dbl>, dfb.1_ <dbl>, dfb.advr <dbl>,
## #   dfb.arpl <dbl>, dfb.imag <dbl>

Show cases with problematic leverage or CVR:

leverage_thresh <- 3*mean(album_full_inf$hat, na.rm = TRUE)                

album_full_inf %>%
  dplyr::filter(
    hat > leverage_thresh | !between(cov.r, 1 - leverage_thresh, 1 + leverage_thresh)
    ) %>%  
  dplyr::select(case_no, cov.r, hat, cook.d) %>% 
  dplyr::mutate(
    dplyr::across(where(is.numeric), ~round(., 3))
  )

## # A tibble: 21 x 4
##    case_no cov.r   hat cook.d
##      <dbl> <dbl> <dbl>  <dbl>
##  1       2 0.92  0.008  0.011
##  2       7 1.12  0.091  0.006
##  3      12 1.07  0.064  0.017
##  4      23 1.07  0.045  0    
##  5      42 1.06  0.058  0.013
##  6      43 1.07  0.046  0.001
##  7      47 0.915 0.016  0.024
##  8      55 0.925 0.026  0.04 
##  9      61 0.937 0.005  0.006
## 10      68 0.924 0.016  0.022
## # … with 11 more rows

Diagnostic plots

Standard influence plots

plot(album_full_lm, which = 4:6)

Residual plot:

plot(album_full_lm, which = c(1, 3))

Q-Q plot:

plot(album_full_lm, which = 2)

Prettier diagnostic plots

The ggfortify package allows us to produce prettier versions of the diagnostic plots. For example:

influence plots

library(ggfortify)
ggplot2::autoplot(album_full_lm, which = 4:6,
                  colour = "#5c97bf",
                  smooth.colour = "#ef4836",
                  alpha = 0.5,
                  size = 1) + 
  theme_minimal()

Residual plot:

ggplot2::autoplot(album_full_lm, which = c(1, 3),
                  colour = "#5c97bf",
                  smooth.colour = "#ef4836",
                  alpha = 0.5,
                  size = 1) + 
  theme_minimal()

Q-Q plot:

ggplot2::autoplot(album_full_lm, which = 2,
                  colour = "#5c97bf",
                  smooth.colour = "#ef4836",
                  alpha = 0.5,
                  size = 1) + 
  theme_minimal()

Robust linear model

Robust estimates:

album_full_rob <- robust::lmRob(sales ~ adverts + airplay + image, data = album_tib, na.action = na.exclude)
summary(album_full_rob)

## 
## Call:
## robust::lmRob(formula = sales ~ adverts + airplay + image, data = album_tib, 
##     na.action = na.exclude)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -124.62  -30.47   -1.76   28.89  141.35 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -31.523201  21.098348  -1.494 0.136756    
## adverts       0.085295   0.008501  10.033  < 2e-16 ***
## airplay       3.418749   0.339017  10.084  < 2e-16 ***
## image        11.771441   2.973559   3.959 0.000105 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 44.61 on 196 degrees of freedom
## Multiple R-Squared: 0.5785 
## 
## Test for Bias:
##             statistic p-value
## M-estimate      2.299  0.6809
## LS-estimate     7.659  0.1049

Robust standard errors and confidence intervals:

parameters::model_parameters(
  album_full_lm,
  robust = TRUE,
  vcov.type = "HC4",
  digits = 3
  )

## Parameter   | Coefficient |     SE |          95% CI |      t |  df |      p
## ----------------------------------------------------------------------------
## (Intercept) |     -26.613 | 16.170 | [-58.50,  5.28] | -1.646 | 196 | 0.101 
## adverts     |       0.085 |  0.007 | [  0.07,  0.10] | 12.246 | 196 | < .001
## airplay     |       3.367 |  0.315 | [  2.75,  3.99] | 10.687 | 196 | < .001
## image       |      11.086 |  2.247 | [  6.65, 15.52] |  4.933 | 196 | < .001

Bootstrap

parameters::model_parameters(
  album_full_lm,
  bootstrap = TRUE,
  digits = 3
  )

## Parameter   | Coefficient |          95% CI |      p
## ----------------------------------------------------
## (Intercept) |     -26.928 | [-55.65,  7.06] | 0.116 
## adverts     |       0.085 | [  0.07,  0.10] | < .001
## airplay     |       3.366 | [  2.74,  4.02] | < .001
## image       |      11.059 | [  6.36, 15.40] | < .001

Bayes factors

album_bf <- album_tib %>%
  BayesFactor::regressionBF(sales ~ adverts + airplay + image, rscaleCont = "medium", data = .)

album_bf

## Bayes factor analysis
## --------------
## [1] adverts                   : 1.320123e+16 ±0%
## [2] airplay                   : 4.723817e+17 ±0.01%
## [3] image                     : 6039.289     ±0%
## [4] adverts + airplay         : 5.65038e+39  ±0%
## [5] adverts + image           : 2.65494e+20  ±0%
## [6] airplay + image           : 1.034464e+20 ±0%
## [7] adverts + airplay + image : 7.746101e+42 ±0%
## 
## Against denominator:
##   Intercept only 
## ---
## Bayes factor type: BFlinearModel, JZS

Bayesian parameter estimates

album_full_bf <- album_tib %>%
  BayesFactor::lmBF(sales ~ adverts + airplay + image, rscaleCont = "medium", data = .)

album_full_post <- BayesFactor::posterior(album_full_bf, iterations = 10000)
summary(album_full_post)

## 
## Iterations = 1:10000
## Thinning interval = 1 
## Number of chains = 1 
## Sample size per chain = 10000 
## 
## 1. Empirical mean and standard deviation for each variable,
##    plus standard error of the mean:
## 
##              Mean        SD  Naive SE Time-series SE
## mu      1.932e+02 3.309e+00 3.309e-02      3.309e-02
## adverts 8.406e-02 7.038e-03 7.038e-05      7.038e-05
## airplay 3.333e+00 2.804e-01 2.804e-03      2.804e-03
## image   1.096e+01 2.475e+00 2.475e-02      2.475e-02
## sig2    2.253e+03 2.321e+02 2.321e+00      2.313e+00
## g       9.859e-01 1.553e+00 1.553e-02      1.553e-02
## 
## 2. Quantiles for each variable:
## 
##              2.5%       25%       50%       75%     97.5%
## mu      1.868e+02 1.909e+02 1.932e+02 1.954e+02 1.998e+02
## adverts 7.004e-02 7.932e-02 8.412e-02 8.882e-02 9.798e-02
## airplay 2.780e+00 3.145e+00 3.336e+00 3.522e+00 3.872e+00
## image   6.056e+00 9.269e+00 1.097e+01 1.266e+01 1.571e+01
## sig2    1.841e+03 2.092e+03 2.239e+03 2.395e+03 2.750e+03
## g       1.770e-01 3.708e-01 6.017e-01 1.047e+00 4.203e+00

Last updated on May 29, 2025