5. Heteroscedasticity#

suppressPackageStartupMessages({
  library(tidyverse)
  library(tidymodels)
  library(readxl)
  library(multcomp)
  library(car)
  library(lmtest)
  library(sandwich)
})
df <- read_excel("data/Table5_1.xls")
head(df)
A tibble: 6 × 10
stateabortionreligionpricelawsfundseducincomepicketlnabortion
<chr><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl>
MISSISSIPPI 12.438.02560064.3140821002.517696
NEW_MEXICO 17.744.73320075.115458 202.873565
UTAH 9.376.72981085.115573 02.230014
WEST_VIRGINIA 7.7 9.82510166.015598 502.041220
ARKANSAS 13.530.02481066.315635 332.602690
LOUISIANA 13.450.92281068.315931 602.595255

Abortion rate
State = name of the state (50 US states).
ABR = Abortion rate, number of abortions per thousand women aged 15–44 in 1992.
Religion = the percent of a state’s population that is Catholic, Southern Baptist, Evangelical, or Mormon.
Price = the average price charged in 1993 in non-hospital facilities for an abortion at 10 weeks with local anesthesia (weighted by the number of abortions performed in 1992).
Laws = a variable that takes the value of 1 if a state enforces a law that restricts a minor’s access to abortion, 0 otherwise.
Funds = a variable that takes the value of 1 if state funds are available for use to pay for an abortion under most circumstances, 0 otherwise.
Educ = the percent of a state’s population that is 25 years or older with a high school degree (or equivalent), 1990.
Income = disposable income per capita, 1992.
Picket = the percentage of respondents that reported experiencing picketing with physical contact or blocking of patients.

model <- lm(abortion ~ religion + price + laws + funds + educ + income + picket, data =df)
tidy(model)
glance(model)
A tibble: 8 × 5
termestimatestd.errorstatisticp.value
<chr><dbl><dbl><dbl><dbl>
(Intercept)14.2839572801.507763e+01 0.94736093.488740e-01
religion 0.0200709018.638054e-02 0.23235448.173913e-01
price -0.0423631062.222320e-02-1.90625556.347505e-02
laws -0.8731017862.376566e+00-0.36737957.151807e-01
funds 2.8200030242.783475e+00 1.01312333.168024e-01
educ -0.2872551211.995545e-01-1.43948181.574261e-01
income 0.0024006824.551884e-04 5.27404104.354736e-06
picket -0.1168712144.217986e-02-2.77078238.295326e-03
A tibble: 1 × 12
r.squaredadj.r.squaredsigmastatisticp.valuedflogLikAICBICdeviancedf.residualnobs
<dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><int><int>
0.57742630.50699737.0625828.1987062.847197e-067-164.3286346.6572363.86552094.9624250
data <-augment(model)
data <- data |> 
mutate(squared_resid = .resid^2)
data |>
ggplot(aes(x = squared_resid)) +
geom_histogram(bins  = 10)
_images/915503e53d0cd0d570b4c0fa488177504c68f4cd6d1584d5a01be48a51d0f2ce.png
ggplot(data, aes(x = .fitted, y = squared_resid))+
geom_point()
_images/ca59f4c32d44d4c5b4d89541232da5046ef3fe11c9c92e88b3ab173f5c4429d7.png

Breusch–Pagan (BP) test#

bptest(model, studentize = TRUE, data = df)
	studentized Breusch-Pagan test

data:  model
BP = 16.001, df = 7, p-value = 0.02511

White test#

white_test_result <- bptest(model, ~ religion + price + laws + funds + educ + income + picket + I(religion^2) + I(price^2) + I(laws^2) + I(funds^2) + I(educ^2) + I(income^2) + I(picket^2) +
                            religion:price + religion:laws + religion:funds + religion:educ + religion:income + 
                            religion:picket + price:laws + price:funds + price:educ + price:income + price:picket + laws:funds + 
                            laws:educ + laws:income + laws:picket + funds:educ + funds:income + funds:picket + educ:income + educ:picket + income:picket, 
                            data = df)

# Print the test results
print(white_test_result)

# Check if we reject the null hypothesis of homoscedasticity
if (white_test_result$p.value < 0.05) {
  cat("Reject the null hypothesis of homoscedasticity. The model suffers from heteroscedasticity.\n")
} else {
  cat("Do not reject the null hypothesis of homoscedasticity. The model does not suffer from heteroscedasticity.\n")
}
	studentized Breusch-Pagan test

data:  model
BP = 32.102, df = 33, p-value = 0.5116

Do not reject the null hypothesis of homoscedasticity. The model does not suffer from heteroscedasticity.

Abridged white test#

data <- augment(model)
data <- data |>
    mutate(squared_resid = .resid^2)
model2 <- lm(squared_resid ~ .fitted + I(.fitted^2), data = data)
tidy(model2)
glance(model2)
A tibble: 3 × 5
termestimatestd.errorstatisticp.value
<chr><dbl><dbl><dbl><dbl>
(Intercept) 20.202399549.030794 0.41203490.6821871
.fitted -1.4552665 4.759941-0.30573210.7611587
I(.fitted^2) 0.1074319 0.107890 0.99575480.3244684
A tibble: 1 × 12
r.squaredadj.r.squaredsigmastatisticp.valuedflogLikAICBICdeviancedf.residualnobs
<dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><int><int>
0.19308260.158745753.133745.6231810.0064637232-268.0407544.0813551.7294132690.24750

p value for F = 0.006463723, we suffer from heteroscedasticity

df <- read_excel("data/Table5_1.xls")
model <- lm(abortion ~ religion + price + laws + funds + educ + income + picket, data =df)

fitted_values <- fitted(model)

df_transformed <- df

df_transformed <- df_transformed %>%
  mutate(across(where(is.numeric), ~ ./fitted_values))


model2 <- lm(abortion ~ religion + price + laws + funds + 
             educ + income + picket, data = df_transformed)
tidy(model2)
glance(model2)
A tibble: 8 × 5
termestimatestd.errorstatisticp.value
<chr><dbl><dbl><dbl><dbl>
(Intercept) 0.89736061060.786093091 1.141544970.2601101
religion 0.04807749870.075204771 0.639287880.5261073
price -0.01345253620.037889433-0.355047170.7243310
laws -0.58795025392.179388159-0.269777670.7886524
funds 0.19562074733.709311583 0.052737750.9581909
educ 0.02886841350.174382545 0.165546460.8693082
income 0.00006525940.002101188 0.031058330.9753701
picket 0.02237719290.084639962 0.264380940.7927795
A tibble: 1 × 12
r.squaredadj.r.squaredsigmastatisticp.valuedflogLikAICBICdeviancedf.residualnobs
<dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><int><int>
0.07414331-0.080166130.34739630.48048460.84324937-13.7236345.4472662.655475.0687354250
bptest(model2)
	studentized Breusch-Pagan test

data:  model2
BP = 12.679, df = 7, p-value = 0.08034
model3 <- lm( lnabortion ~ religion + price + laws + funds + educ + income + picket, data = df)
tidy(model3)
glance(model3)
A tibble: 8 × 5
termestimatestd.errorstatisticp.value
<chr><dbl><dbl><dbl><dbl>
(Intercept) 2.83326427737.552630e-01 3.75136125.328625e-04
religion 0.00045754044.326942e-03 0.10574229.162903e-01
price -0.00311212051.113196e-03-2.79566157.776827e-03
laws -0.01288391751.190461e-01-0.10822639.143316e-01
funds 0.08768764791.394288e-01 0.62890645.328161e-01
educ -0.01448837949.996011e-03-1.44941611.546477e-01
income 0.00012647772.280113e-05 5.54699471.775782e-06
picket -0.00651528872.112858e-03-3.08363793.606917e-03
A tibble: 1 × 12
r.squaredadj.r.squaredsigmastatisticp.valuedflogLikAICBICdeviancedf.residualnobs
<dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><int><int>
0.58917960.52070960.35377628.6049231.649353e-067-14.6335547.2671164.475315.2566194250

if price goes up by a dollar, the relative change in the abortion rate is –0.003 or about –0.3%

bptest(model3)
	studentized Breusch-Pagan test

data:  model3
BP = 7.95, df = 7, p-value = 0.337

White Robust standard errors#

robust_se <-coeftest(model, vcov = vcovHC(model, type = "HC0"))
summary(robust_se)
    Estimate          Std. Error           t value           Pr(>|t|)        
 Min.   :-0.87310   Min.   : 0.000468   Min.   :-3.1548   Min.   :0.0000069  
 1st Qu.:-0.15947   1st Qu.: 0.033729   1st Qu.:-1.7763   1st Qu.:0.0622757  
 Median :-0.01998   Median : 0.119371   Median :-0.1347   Median :0.1924175  
 Mean   : 1.97585   Mean   : 2.304262   Mean   : 0.0245   Mean   :0.2735694  
 3rd Qu.: 0.72005   3rd Qu.: 1.942125   3rd Qu.: 1.0086   3rd Qu.:0.3932858  
 Max.   :14.28396   Max.   :13.657414   Max.   : 5.1341   Max.   :0.7952640  
tidy(robust_se)
A tibble: 8 × 5
termestimatestd.errorstatisticp.value
<chr><dbl><dbl><dbl><dbl>
(Intercept)14.2839572801.365741e+01 1.04587573.016005e-01
religion 0.0200709017.686001e-02 0.26113587.952640e-01
price -0.0423631062.377806e-02-1.78160518.204549e-02
laws -0.8731017861.645923e+00-0.53046335.985845e-01
funds 2.8200030242.830730e+00 0.99621063.248529e-01
educ -0.2872551211.618822e-01-1.77446968.323437e-02
income 0.0024006824.675923e-04 5.13413496.879939e-06
picket -0.1168712143.704561e-02-3.15479272.966278e-03
glance(robust_se)
Original model not retained as part of coeftest object. For additional model summary information (r.squared, df, etc.), consider passing `glance.coeftest()` an object where the underlying model has been saved, i.e.`lmtest::coeftest(..., save = TRUE)`.
This message is displayed once per session.
A tibble: 1 × 4
logLikAICBICnobs
<chr><dbl><dbl><int>
-164.329346.6572363.865550
df2 <- read_excel("data/Table1_1.xls")
head(df2)
A tibble: 6 × 17
obswagefemalenonwhiteunioneducationexperagewindfemalenonwlnwageeducation_exper_Ifemale_1_IfemXeduca_1_IfemXexper_1_Inonwhite_1_InonXeduca_1
<dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl>
111.55100122038102.446686240112200 0
2 5.00000 9 924001.609438 810 0 00 0
312.00000161537102.4849072400 0 00 0
4 7.00011143858001.9459105320 0 0114
521.15110161941113.05164030411619116
6 6.9210012 422101.934416 48112 40 0
model4 <- lm(wage ~ female + nonwhite + union + education + exper, data = df2)
tidy(model4)
glance(model4)
A tibble: 6 × 5
termestimatestd.errorstatisticp.value
<chr><dbl><dbl><dbl><dbl>
(Intercept)-7.18333821.01578786-7.0716912.508276e-12
female -3.07487540.36461621-8.4331848.939423e-17
nonwhite -1.56531330.50918754-3.0741392.155664e-03
union 1.09597580.50607809 2.1656263.052356e-02
education 1.37030100.0659042120.7923125.507613e-83
exper 0.16660650.0160475610.3820502.659960e-24
A tibble: 1 × 12
r.squaredadj.r.squaredsigmastatisticp.valuedflogLikAICBICdeviancedf.residualnobs
<dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><int><int>
0.32333880.32070186.508137122.61493.453151e-1065-4240.378494.7418530.87254342.5412831289
bptest(model4)
	studentized Breusch-Pagan test

data:  model4
BP = 55.327, df = 5, p-value = 1.118e-10
model5 <- coeftest(model, vcov = vcovHC(model, type = "HC0"))
tidy(model5)
glance(model5)
A tibble: 8 × 5
termestimatestd.errorstatisticp.value
<chr><dbl><dbl><dbl><dbl>
(Intercept)14.2839572801.365741e+01 1.04587573.016005e-01
religion 0.0200709017.686001e-02 0.26113587.952640e-01
price -0.0423631062.377806e-02-1.78160518.204549e-02
laws -0.8731017861.645923e+00-0.53046335.985845e-01
funds 2.8200030242.830730e+00 0.99621063.248529e-01
educ -0.2872551211.618822e-01-1.77446968.323437e-02
income 0.0024006824.675923e-04 5.13413496.879939e-06
picket -0.1168712143.704561e-02-3.15479272.966278e-03
A tibble: 1 × 4
logLikAICBICnobs
<chr><dbl><dbl><int>
-164.329346.6572363.865550

Exercises#

🚧 Under Construction