Week 8 figures - Lectures 14 and 15

multiple linear regression
AIC
Author
Affiliation
Lecture date

May 20, 2024

Modified

June 13, 2024

Code
# cleaning
library(tidyverse)

# visualization
theme_set(theme_classic() +
            theme(panel.grid = element_blank(),
                  axis.text = element_text(size = 18),
                  axis.title = element_text(size = 18),
                  text = element_text(family = "Lato")))
library(patchwork)
library(ggeffects)
library(flextable)
library(GGally)

# data
library(palmerpenguins)

# analysis
library(car)
library(performance)
library(broom)
library(DHARMa)
library(MuMIn)

multiple linear regression equation

\[ \begin{align} y &= \beta_0 + \beta_1x_1 + \beta_2x_2 + ... \beta_kx_k + \epsilon \end{align} \]

formulas

sum of squares for linear regression

regression (or model)

\[ SS_{reg} = \sum_{i = 1}^{n}(\hat{y} - \bar{y})^2 \]

error

\[ SS_{err} = \sum_{i = 1}^{n}(y_i - \hat{y})^2 \]

total

\[ SS_{tot} = \sum_{i = 1}^n(y_i - \bar{y}) \]

mean square

regression

\[ MS_{reg} = \frac{SS_{reg}}{1} \]

error

\[ MS_{err} = \frac{SS_{err}}{n - 2} \]

F-statistic

\[ F = \frac{MS_{reg}}{MS_{err}} \]

soil example

Code
set.seed(666)
# sample size
n <- 64

soil_df <- tibble(
  # compaction
  force_mpa = round(rnorm(n = n, mean = 0.7, sd = 0.03), 3),
  # root length
  root_mm = round(rnorm(n = n, mean = -1, sd = 0.02)*force_mpa, 3) + 5
) 

ggplot(data = soil_df,
       aes(x = force_mpa,
           y = root_mm)) +
  geom_point()

Code
soil_lm <- lm(root_mm ~ force_mpa,
              data = soil_df)

par(mfrow = c(2, 2))
plot(soil_lm)

Code
summary(soil_lm)

Call:
lm(formula = root_mm ~ force_mpa, data = soil_df)

Residuals:
      Min        1Q    Median        3Q       Max 
-0.033749 -0.009088  0.000251  0.008962  0.028832 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)   4.9569     0.0357   138.9   <2e-16 ***
force_mpa    -0.9400     0.0511   -18.4   <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.01338 on 62 degrees of freedom
Multiple R-squared:  0.8451,    Adjusted R-squared:  0.8427 
F-statistic: 338.4 on 1 and 62 DF,  p-value: < 2.2e-16
Code
cor.test(soil_df$root_mm, soil_df$force_mpa)

    Pearson's product-moment correlation

data:  soil_df$root_mm and soil_df$force_mpa
t = -18.395, df = 62, p-value < 2.2e-16
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
 -0.9503672 -0.8701422
sample estimates:
       cor 
-0.9193192 
Code
model_preds <- ggpredict(soil_lm,
                         terms = "force_mpa [0.63:0.77 by = 0.01]") 

ggpredict(soil_lm,
          terms = "force_mpa [0.63:0.77 by = 0.01]") %>% 
  plot(show_data = TRUE)

Code
flextable::as_flextable(soil_lm) 

Estimate

Standard Error

t value

Pr(>|t|)

(Intercept)

4.957

0.036

138.866

0.0000

***

force_mpa

-0.940

0.051

-18.395

0.0000

***

Signif. codes: 0 <= '***' < 0.001 < '**' < 0.01 < '*' < 0.05

Residual standard error: 0.01338 on 62 degrees of freedom

Multiple R-squared: 0.8451, Adjusted R-squared: 0.8427

F-statistic: 338.4 on 62 and 1 DF, p-value: 0.0000

Code
flextable::as_flextable(soil_lm) %>% 
  set_formatter(
    # special function to represent p < 0.001
    values = list("p.value" = function(x){ 
      z <- scales::label_pvalue()(x)
      z[!is.finite(x)] <- ""
      z
    })
  )

Estimate

Standard Error

t value

Pr(>|t|)

(Intercept)

4.957

0.036

138.866

<0.001

***

force_mpa

-0.940

0.051

-18.395

<0.001

***

Signif. codes: 0 <= '***' < 0.001 < '**' < 0.01 < '*' < 0.05

Residual standard error: 0.01338 on 62 degrees of freedom

Multiple R-squared: 0.8451, Adjusted R-squared: 0.8427

F-statistic: 338.4 on 62 and 1 DF, p-value: 0.0000

plant example

Code
set.seed(666)
# sample size
n <- 64
plant_df <- tibble(
  # predictor variables
  temperature = round(rnorm(n = n, mean = 28, sd = 1), digits = 1),
  light = round(rnorm(n = n, mean = 1, sd = 0.2), digits = 1),
  ph = rnorm(n = n, mean = 7, sd = 0.01),
  
  # response: growth in cm/week
  growth = light*rnorm(n = n, mean = 0.3, sd = 0.1) + temperature/round(rnorm(n = n, mean = 5, sd = 0.1))
) 

ggplot(data = plant_df,
       aes(x = temperature,
           y = growth)) +
  geom_point()

Code
ggplot(data = plant_df,
       aes(x = ph,
           y = light)) +
  geom_point()

Code
ggplot(data = plant_df,
       aes(x = light,
           y = growth)) +
  geom_point()

Code
plant_lm <- lm(growth ~ temperature + ph + light,
               data = plant_df)
simulateResiduals(plant_lm) %>% plot()

Code
summary(plant_lm)

Call:
lm(formula = growth ~ temperature + ph + light, data = plant_df)

Residuals:
     Min       1Q   Median       3Q      Max 
-0.23209 -0.06571  0.01010  0.06173  0.19950 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept) -6.67140    6.80194  -0.981    0.331    
temperature  0.19626    0.01058  18.558  < 2e-16 ***
ph           0.96241    0.96806   0.994    0.324    
light        0.35196    0.05939   5.926 1.63e-07 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.0911 on 60 degrees of freedom
Multiple R-squared:  0.8759,    Adjusted R-squared:  0.8696 
F-statistic: 141.1 on 3 and 60 DF,  p-value: < 2.2e-16
Code
ggpredict(plant_lm,
          terms = c("temperature")) %>% 
  plot(show_data = TRUE)

Code
ggpredict(plant_lm,
          terms = c("ph")) %>% 
  plot(show_data = TRUE)

Code
ggpredict(plant_lm,
          terms = c("light")) %>% 
  plot(show_data = TRUE)

Code
plant_model <- lm(growth ~ light + temperature, 
                  data = plant_df)
Code
simulateResiduals(plant_model,
                  plot = TRUE)

Object of Class DHARMa with simulated residuals based on 250 simulations with refit = FALSE . See ?DHARMa::simulateResiduals for help. 
 
Scaled residual values: 0 0.756 0.792 0.816 0.06 0.132 0.408 0.708 0.988 0.892 0.452 0.876 0.644 0.512 0.984 0.904 0.936 0.56 0.58 0.724 ...
Code
check_model(plant_model)

Code
pairs(plant_df, upper.panel = NULL)

Code
ggpairs(plant_df)

Code
cor(plant_df)
            temperature       light          ph      growth
temperature  1.00000000  0.15540780 -0.06657697  0.89551840
light        0.15540780  1.00000000 -0.04769987  0.40397013
ph          -0.06657697 -0.04769987  1.00000000 -0.02466729
growth       0.89551840  0.40397013 -0.02466729  1.00000000
Code
vif(plant_model)
      light temperature 
   1.024749    1.024749 
Code
summary(plant_model)

Call:
lm(formula = growth ~ light + temperature, data = plant_df)

Residuals:
      Min        1Q    Median        3Q       Max 
-0.227058 -0.071297  0.006266  0.063745  0.197573 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)  0.08464    0.29166   0.290    0.773    
light        0.34972    0.05934   5.894 1.76e-07 ***
temperature  0.19563    0.01056  18.534  < 2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.09109 on 61 degrees of freedom
Multiple R-squared:  0.8738,    Adjusted R-squared:  0.8697 
F-statistic: 211.2 on 2 and 61 DF,  p-value: < 2.2e-16
Code
# anova(plant_model)

For example, temperature F-value:

Code
2.85039/0.0083
[1] 343.4205

\[ \begin{align} F &= \frac{2.85039}{0.00830} \\ &= 343.4205 \end{align} \]

dummy variable examples

Code
tibble(
  herbivores = sample(x = c("absent", "present"), size = 10, replace = TRUE, prob = c(0.5, 0.5))
)
# A tibble: 10 × 1
   herbivores
   <chr>     
 1 present   
 2 present   
 3 present   
 4 present   
 5 present   
 6 absent    
 7 present   
 8 present   
 9 absent    
10 present   
Code
tibble(
  fertilizer = sample(x = c("low", "medium", "high"), size = 10, replace = TRUE, prob = c(0.3, 0.3, 0.3))
)
# A tibble: 10 × 1
   fertilizer
   <chr>     
 1 low       
 2 high      
 3 low       
 4 low       
 5 low       
 6 medium    
 7 medium    
 8 medium    
 9 high      
10 low       
Code
df <- tibble(
  year = sample(x = c("1st", "2nd", "3rd", "4th", "5th"), size = 20, replace = TRUE, prob = c(0.2, 0.2, 0.2, 0.2, 0.2))
) %>% 
  mutate(year = as_factor(year),
         year = fct_relevel(year, "1st", "2nd", "3rd", "4th", "5th"))

df 
# A tibble: 20 × 1
   year 
   <fct>
 1 2nd  
 2 4th  
 3 1st  
 4 1st  
 5 2nd  
 6 2nd  
 7 3rd  
 8 2nd  
 9 3rd  
10 4th  
11 3rd  
12 5th  
13 2nd  
14 3rd  
15 4th  
16 1st  
17 5th  
18 5th  
19 4th  
20 5th  
Code
str(df)
tibble [20 × 1] (S3: tbl_df/tbl/data.frame)
 $ year: Factor w/ 5 levels "1st","2nd","3rd",..: 2 4 1 1 2 2 3 2 3 4 ...

plant growth dummy variable

Code
# humidity units: %
# plant growth: cm / week

low_col <- "#2176ae"
medium_col <- "#fbb13c"
high_col <- "#fe6847"

n <- 30
set.seed(10)
plant_df <- tibble(
  humidity = round(rnorm(n = n, mean = 60, sd = 15), 0),
  low = humidity*rnorm(n = n, mean = 0.025, sd = 0.012) + 1,
  medium = humidity*rnorm(n = n, mean = 0.025, sd = 0.015) + 1.5,
  high = humidity*rnorm(n = n, mean = 0.025, sd = 0.017) + 2
) %>% 
  pivot_longer(cols = low:high,
               names_to = "fertilizer", 
               values_to = "growth") %>% 
  mutate(fertilizer = fct_relevel(fertilizer, "low", "medium", "high"))

# gives you a random sample
sample_n(plant_df, 10)
# A tibble: 10 × 3
   humidity fertilizer growth
      <dbl> <fct>       <dbl>
 1       71 high         3.01
 2       42 high         1.90
 3       58 medium       1.92
 4       50 low          3.07
 5       56 low          1.96
 6       61 low          2.50
 7       47 medium       4.24
 8       54 low          2.86
 9       77 low          3.93
10       39 medium       1.99
Code
# look at the structure
str(plant_df)
tibble [90 × 3] (S3: tbl_df/tbl/data.frame)
 $ humidity  : num [1:90] 60 60 60 57 57 57 39 39 39 51 ...
 $ fertilizer: Factor w/ 3 levels "low","medium",..: 1 2 3 1 2 3 1 2 3 1 ...
 $ growth    : num [1:90] 1.17 1.89 3.08 2.37 2.53 ...
Code
ggplot(data = plant_df,
       aes(x = humidity,
           y = growth)) +
  geom_point() 

Code
ggplot(data = plant_df,
       aes(x = fertilizer,
           y = growth,
           color = fertilizer)) +
  geom_jitter(width = 0.2,
              height = 0,
              alpha = 0.3) +
  stat_summary(fun.data = mean_cl_normal,
               geom = "pointrange",
               size = 1,
               linewidth = 1) +
  scale_color_manual(values = c(low_col, medium_col, high_col)) +
  theme(legend.position = "none")

Code
plant_df %>% 
  group_by(fertilizer) %>% 
  summarize(mean = mean(growth))
# A tibble: 3 × 2
  fertilizer  mean
  <fct>      <dbl>
1 low         2.25
2 medium      2.82
3 high        3.51
Code
plant_df %>% 
  summarize(mean = mean(growth))
# A tibble: 1 × 1
   mean
  <dbl>
1  2.86
Code
plant_lm <- lm(growth ~ humidity + fertilizer,
               data = plant_df)

# am i broken because i can't look at anything other than dharma residuals
simulateResiduals(plant_lm) %>% plot()

Code
par(mfrow = c(2, 2))
plot(plant_lm)

Code
ggpredict(plant_lm,
          terms = c("humidity",
                    "fertilizer")) %>% 
  plot(show_data = TRUE) +
  scale_color_manual(values = c(low_col, medium_col, high_col)) +
  scale_fill_manual(values = c(low_col, medium_col, high_col)) +
  theme_classic()

Code
ggpredict(plant_lm,
          terms = c("humidity")) %>% 
  plot(show_data = TRUE) +
  theme_classic()

Code
ggpredict(lm(growth ~ humidity, data = plant_df),
          terms = "humidity") %>% 
  plot(show_data = TRUE) +
  theme_classic()

Code
summary(plant_lm)

Call:
lm(formula = growth ~ humidity + fertilizer, data = plant_df)

Residuals:
    Min      1Q  Median      3Q     Max 
-1.7361 -0.5220  0.1129  0.5353  1.6649 

Coefficients:
                 Estimate Std. Error t value Pr(>|t|)    
(Intercept)      1.280788   0.390802   3.277  0.00151 ** 
humidity         0.017697   0.006615   2.675  0.00894 ** 
fertilizermedium 0.573791   0.207205   2.769  0.00688 ** 
fertilizerhigh   1.264891   0.207205   6.105 2.88e-08 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.8025 on 86 degrees of freedom
Multiple R-squared:  0.3411,    Adjusted R-squared:  0.3182 
F-statistic: 14.84 on 3 and 86 DF,  p-value: 7.19e-08
Code
summary(lm(growth ~ humidity, data = plant_df))

Call:
lm(formula = growth ~ humidity, data = plant_df)

Residuals:
    Min      1Q  Median      3Q     Max 
-1.7902 -0.7698 -0.0693  0.5993  1.9402 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept) 1.893683   0.440513   4.299 4.42e-05 ***
humidity    0.017697   0.007833   2.259   0.0263 *  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.9502 on 88 degrees of freedom
Multiple R-squared:  0.05483,   Adjusted R-squared:  0.04409 
F-statistic: 5.105 on 1 and 88 DF,  p-value: 0.02633

frog example

generating data

Code
set.seed(666)
frog_n <- 87

frog_df <- tibble(
  weight = (round(rnorm(n = frog_n, mean = 3, sd = 0.1), 2)),
  blue = round(rnorm(n = frog_n, mean = 2.5, sd = 0.09)*(round(rnorm(n = frog_n, mean = 3, sd = 0.1), 2)), 2),
  green = round(rnorm(n = frog_n, mean = 3, sd = 0.05)*(round(rnorm(n = frog_n, mean = 3, sd = 0.1), 2)), 2),
  red = round(rnorm(n = frog_n, mean = 2.3, sd = 0.05)*(round(rnorm(n = frog_n, mean = 3, sd = 0.1), 2)), 2)
) %>% 
  pivot_longer(cols = blue:red,
               names_to = "color",
               values_to = "toxicity")

df <- cbind(
  # predictor variables
  # color = sample(x = c("blue", "green", "red"), size = frog_n, replace = TRUE, prob = c(0.3, 0.3, 0.3)),
  weight = (round(rnorm(n = frog_n, mean = 3, sd = 0.1), 2))
  #pattern = sample(x = c("striped", "spotted", "none"), size = frog_n, replace = TRUE, prob = c(0.3, 0.3, 0.3))
) %>%
  as_tibble() %>%
  mutate(toxicity = round(rnorm(n = frog_n, mean = 1.5, sd = 0.06)*weight, 3)) %>%
  mutate(color = case_when(
    between(toxicity, 2.5, 4.4) ~ "blue",
    between(toxicity, 4.4, 4.6) ~ "green",
    between(toxicity, 4.6, 5.5) ~ "red"
  )) %>% 
  mutate(toxicity = case_when(
    color == "blue" ~ round(rnorm(n = frog_n, mean = 2.5, sd = 0.09)*weight, 3),
    color == "green" ~ round(rnorm(n = frog_n, mean = 3, sd = 0.05)*weight, 3),
    color == "red" ~ round(rnorm(n = frog_n, mean = 2.3, sd = 0.05)*weight, 3)
  )) %>% 
  mutate(color = as_factor(color),
         color = fct_relevel(color, "red", "blue", "green"))


# df <- cbind(
#   color = sample(x = c("blue", "green", "red"), size = frog_n, replace = TRUE, prob = c(0.3, 0.3, 0.3))
# ) %>% 
#   as_tibble() %>% 
#   mutate(weight = (round(rnorm(n = frog_n, mean = 3.5, sd = 0.1), 2))) %>% 
#   mutate(toxicity = case_when(
#     color == "blue" ~ round(rnorm(n = frog_n, mean = 3, sd = 0.4)*weight, 2),
#     color == "green" ~ round(rnorm(n = frog_n, mean = 2, sd = 0.5)*weight, 2),
#     color == "red" ~ round(rnorm(n = frog_n, mean = 1, sd = 0.03)*weight, 2)
#   ))

plotting data

Code
blue_col <- "cornflowerblue"
green_col <- "darkgreen"
red_col <- "maroon"

striped_col <- "grey1"
spotted_col <- "grey50"
none_col <- "grey80"

ggplot(data = frog_df, aes(x = color, y = toxicity, color = color, fill = color)) +
  geom_jitter(width = 0.2, height = 0, alpha = 0.3) +
  scale_color_manual(values = c("blue" = blue_col, "green" = green_col, "red" = red_col)) +
  scale_fill_manual(values = c("blue" = blue_col, "green" = green_col, "red" = red_col)) +
  stat_summary(geom = "pointrange", 
               fun = mean, 
               fun.min = function(x) mean(x) - sd(x), 
               fun.max = function(x) mean(x) + sd(x), 
               shape = 21, 
               size = 1) +
 #  geom_point(position = position_jitter(width = 0.2, height = 0, seed = 666), alpha = 0.3) +
  labs(title = "Color") +
  theme(legend.position = "none",
        axis.title.x = element_blank(),
        text = element_text(size = 22))

Code
ggplot(data = frog_df, aes(x = weight, y = toxicity)) +
  geom_point() +
  # geom_smooth(method = "lm") +
  labs(title = "Weight") +
  theme(legend.position = "none",
        axis.title.x = element_blank(),
        text = element_text(size = 22))

Code
ggplot(data = frog_df, aes(x = weight, y = toxicity, color = color)) +
  geom_point() +
  scale_color_manual(values = c("blue" = blue_col, "green" = green_col, "red" = red_col)) +
  geom_smooth(method = "lm") +
  labs(title = "Weight")

Code
head(df, 10)
# A tibble: 10 × 3
   weight toxicity color
    <dbl>    <dbl> <fct>
 1   3.18     8.29 blue 
 2   3.02     9.24 green
 3   3.01     8.75 green
 4   3.07     7.24 red  
 5   2.98     7.47 blue 
 6   2.94     8.89 green
 7   2.95     7.26 blue 
 8   2.91     7.25 blue 
 9   2.95     6.61 red  
10   3.06     8.96 green

model

Code
model1 <- lm(toxicity ~ weight + color, 
             data = frog_df)

model2 <- lm(toxicity ~ weight * color, 
             data = frog_df)

simulateResiduals(model1, plot = TRUE)

Object of Class DHARMa with simulated residuals based on 250 simulations with refit = FALSE . See ?DHARMa::simulateResiduals for help. 
 
Scaled residual values: 0.804 0.556 0.336 0.376 0.5 0.56 0.076 0.74 0.308 0.008 0.416 0.828 0.684 0.204 0.272 0.66 0.172 0.208 0.604 0.708 ...
Code
simulateResiduals(model2, plot = TRUE)

Object of Class DHARMa with simulated residuals based on 250 simulations with refit = FALSE . See ?DHARMa::simulateResiduals for help. 
 
Scaled residual values: 0.82 0.536 0.336 0.42 0.436 0.572 0.076 0.74 0.3 0.012 0.368 0.84 0.628 0.256 0.244 0.692 0.144 0.212 0.58 0.752 ...
Code
check_model(model2)

Code
testOutliers(model2)


    DHARMa outlier test based on exact binomial test with approximate
    expectations

data:  model2
outliers at both margin(s) = 1, observations = 261, p-value = 0.7287
alternative hypothesis: true probability of success is not equal to 0.007968127
95 percent confidence interval:
 9.699839e-05 2.116127e-02
sample estimates:
frequency of outliers (expected: 0.00796812749003984 ) 
                                           0.003831418 

diagnostics

Code
par(mfrow = c(2, 2))
plot(model1)

Code
plot(model2)

model summary

Code
summary(model1)

Call:
lm(formula = toxicity ~ weight + color, data = frog_df)

Residuals:
     Min       1Q   Median       3Q      Max 
-0.90113 -0.21865 -0.00068  0.22588  0.90229 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)  7.71921    0.58381  13.222   <2e-16 ***
weight      -0.07811    0.19467  -0.401    0.689    
colorgreen   1.48908    0.05049  29.490   <2e-16 ***
colorred    -0.56874    0.05049 -11.263   <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.333 on 257 degrees of freedom
Multiple R-squared:  0.8733,    Adjusted R-squared:  0.8718 
F-statistic: 590.6 on 3 and 257 DF,  p-value: < 2.2e-16
Code
summary(model2)

Call:
lm(formula = toxicity ~ weight * color, data = frog_df)

Residuals:
     Min       1Q   Median       3Q      Max 
-0.91522 -0.22141 -0.00686  0.21240  0.87930 

Coefficients:
                  Estimate Std. Error t value Pr(>|t|)    
(Intercept)         8.2941     1.0119   8.196 1.21e-14 ***
weight             -0.2702     0.3379  -0.800    0.425    
colorgreen          0.1204     1.4311   0.084    0.933    
colorred           -0.9248     1.4311  -0.646    0.519    
weight:colorgreen   0.4573     0.4778   0.957    0.339    
weight:colorred     0.1189     0.4778   0.249    0.804    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.3337 on 255 degrees of freedom
Multiple R-squared:  0.8738,    Adjusted R-squared:  0.8713 
F-statistic: 353.1 on 5 and 255 DF,  p-value: < 2.2e-16
Code
model.sel(model1, model2)
Model selection table 
       (Int) clr   wgh clr:wgh df  logLik  AICc delta
model1 7.719   + 0.832          5 -81.355 172.9   0.0
model2 8.294   + 0.168       +  7 -80.851 176.1   3.2
Models ranked by AICc(x) 
Code
ggpredict(model1,
          terms = c("weight [2:4 by = 0.01]", 
                    "color")) %>% 
  plot(show_data = TRUE,
       limit_range = TRUE) +
  scale_color_manual(values = c("green" = green_col, 
                                "red" = red_col, 
                                "blue" = blue_col)) +
  scale_fill_manual(values = c("green" = green_col, 
                               "red" = red_col, 
                               "blue" = blue_col)) +
  theme_classic()

Code
ggpredict(model2,
          terms = c("weight [2:4 by = 0.01]", 
                    "color")) %>% 
  plot(show_data = TRUE,
       limit_range = TRUE) +
  scale_color_manual(values = c("green" = green_col, 
                                "red" = red_col, 
                                "blue" = blue_col)) +
  scale_fill_manual(values = c("green" = green_col, 
                               "red" = red_col, 
                               "blue" = blue_col)) +
  theme_classic()

\[ \hat{y}_h \pm t_{(1-\alpha/2, n-2)}*\sqrt{MSE*(\frac{1}{n}+\frac{(x_h-\bar{x})^2}{\sum(x_i-\bar{x})^2})} \]

\[ MSE = \frac{\sum(y_i-\hat{y})^2}{n} \]

Code
tidy(model2, conf.int = TRUE, conf.level = 0.95)
# A tibble: 6 × 7
  term              estimate std.error statistic  p.value conf.low conf.high
  <chr>                <dbl>     <dbl>     <dbl>    <dbl>    <dbl>     <dbl>
1 (Intercept)          8.29      1.01     8.20   1.21e-14    6.30     10.3  
2 weight              -0.270     0.338   -0.800  4.25e- 1   -0.936     0.395
3 colorgreen           0.120     1.43     0.0841 9.33e- 1   -2.70      2.94 
4 colorred            -0.925     1.43    -0.646  5.19e- 1   -3.74      1.89 
5 weight:colorgreen    0.457     0.478    0.957  3.39e- 1   -0.484     1.40 
6 weight:colorred      0.119     0.478    0.249  8.04e- 1   -0.822     1.06 
Code
model_summary <- summary(model2)
c("lower" = model_summary$coef[2,1] - qt(0.975, df = model_summary$df[2]) * model_summary$coef[2, 2],
  "upper" = model_summary$coef[2,1] + qt(0.975, df = model_summary$df[2]) * model_summary$coef[2, 2])
     lower      upper 
-0.9355103  0.3951563 

Confidence interval for a single coefficient: in words: estimate plus or minus the t-value at your confidence level * standard error

Citation

BibTeX citation:
@online{bui2024,
  author = {Bui, An},
  title = {Week 8 Figures - {Lectures} 14 and 15},
  date = {2024-05-20},
  url = {https://spring-2024.envs-193ds.com/lecture/lecture_week-08.html},
  langid = {en}
}
For attribution, please cite this work as:
Bui, An. 2024. “Week 8 Figures - Lectures 14 and 15.” May 20, 2024. https://spring-2024.envs-193ds.com/lecture/lecture_week-08.html.