6. Autocorrelation#

suppressPackageStartupMessages({
  library(tidyverse)
  library(tidymodels)
  library(readxl)
  library(multcomp)
  library(car)
  library(lmtest)
  library(sandwich)
})

US Consumption function 1947-2000#

df <- read_excel("data/Table6_1.xls")
head(df)
A tibble: 6 × 24
yearconsumptionincomewealthinterestlnconsumplndpilnwealthdlnconsumpdlndpirlnwealthrinterestdconsumpdincomedwealthrconsumprincomerwealthrinterest2time
<dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl>
21916 976.41035.25166.815-10.3509406.8838736.9423508.550012 NA NA NA NA NA NA NA NA NA NA NA1
21916 998.11090.05280.757 -4.7198046.9058536.9939338.5718250.021980760.0515832905.795887-1.3591570921.6999554.800049113.9419704.3560778.56643726.352-1.605786802
219161025.31095.65607.351 1.0440636.9327416.9990578.6318340.026887420.0051240925.848814 2.5764448627.20007 5.599976326.5942725.0278767.68014018.668 2.463987113
219161090.91192.75759.515 0.4073466.9947587.0839758.6586080.062017440.0849175455.856105 0.0683693765.5999897.099976152.1641782.4448863.09544072.578 0.093246044
219161107.11227.06086.056 -5.2831527.0094997.1123288.7137560.014740940.0283527375.902559-5.4154052716.1999534.300049326.5410778.9094868.18354353.341-5.405699735
219161142.41266.86243.864 -0.2770117.0408867.1442498.7393540.031387330.0319218645.910253 1.4382736735.3000539.800049157.8076809.3358897.66464412.911 1.312393076
colnames(df)
  1. 'year'
  2. 'consumption'
  3. 'income'
  4. 'wealth'
  5. 'interest'
  6. 'lnconsump'
  7. 'lndpi'
  8. 'lnwealth'
  9. 'dlnconsump'
  10. 'dlndpi'
  11. 'dlnwealth'
  12. 'dinterest'
  13. 'rlnconsump'
  14. 'rlndpi'
  15. 'rlnwealth'
  16. 'rinterest'
  17. 'dconsump'
  18. 'dincome'
  19. 'dwealth'
  20. 'rconsump'
  21. 'rincome'
  22. 'rwealth'
  23. 'rinterest2'
  24. 'time'
\[ \ln C_T = \beta_1 + \beta_2 \ln DPI_t + \beta_3 \ln W_t + \beta_4 R_t + u_t \]

real consumption expenditure (C),
real (after tax) disposable personal income (DPI),
real wealth (W)
and real interest rate (R)

model <- lm(lnconsump ~ lndpi + lnwealth +  interest, data =df)
tidy(model)
glance(model)
A tibble: 4 × 5
termestimatestd.errorstatisticp.value
<chr><dbl><dbl><dbl><dbl>
(Intercept)-0.4677120440.0427779986-10.9334727.330410e-15
lndpi 0.8048727590.0174978490 45.9983831.376236e-42
lnwealth 0.2012702150.0175925850 11.4406281.433544e-15
interest -0.0026890610.0007619294 -3.5292799.046458e-04
A tibble: 1 × 12
r.squaredadj.r.squaredsigmastatisticp.valuedflogLikAICBICdeviancedf.residualnobs
<dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><int><int>
0.99955970.99953320.0119337537832.667.120776e-843164.588-319.1761-309.23120.0071207215054

if real personal disposal income goes up by 1%, mean real consumption expenditure goes up by about 0.8%

if real wealth goes up by 1%, mean real consumption expenditure goes up by about 0.2%,

if rate goes up by one percentage point, mean real consumption expenditure goes down by about 0.26%

\(R^2\) indicating spurious correlation

Tests of autocorrelation#

plot(augment(model)$.resid, type = 'b')
_images/6a3ba80af82312e4d24489d3148c7d9db2a07af2c8a69d02540a3db91cf090d2.png
plot(augment(model)$.std.resid, type = 'b')
_images/f15753b446692c94bf65c4b439d864d5658cdbd5b8193a5142ee9c87b60b627a.png
augmented_data <- augment(model)

ggplot(augmented_data, aes(x = seq_along(.resid), y = .resid)) +
  geom_line() +
  geom_point() +
  labs(title = "Residuals from Linear Model",
       x = "Observation",
       y = "Residuals") +
  theme_minimal()
_images/58ddc665f281bc726051c39b610cd903b43ab2767b383fb9174bc4489cc2947b.png
augmented_data <- augmented_data %>%
  mutate(lagged_resid = lag(.resid))

# remove first row of NA lagged residual
augmented_data <- augmented_data %>%
  filter(!is.na(lagged_resid))

correlation <- cor(augmented_data$.resid, augmented_data$lagged_resid)


ggplot(augmented_data, aes(x = lagged_resid, y = .resid)) +
  geom_point() +
  geom_smooth(method = "lm",formula = 'y ~ x', se = FALSE, color = "blue") +
  labs(title = "Residuals vs Lagged Residuals",
       x = "Lagged Residuals",
       y = "Residuals") +
  theme_minimal()
_images/26df109911864c9537523ff68120fb0af2fc9593647507efe0a411f94b8e7a09.png

Durbin watson test#

must have an intercept term

durbinWatsonTest(model)
 lag Autocorrelation D-W Statistic p-value
   1       0.2977555      1.289232       0
 Alternative hypothesis: rho != 0
durbinWatsonTest(model, max.lag = 3)
 lag Autocorrelation D-W Statistic p-value
   1      0.29775555      1.289232   0.000
   2     -0.02840613      1.933710   0.694
   3      0.01631809      1.678649   0.262
 Alternative hypothesis: rho[lag] != 0
dwtest(model)
	Durbin-Watson test

data:  model
DW = 1.2892, p-value = 0.0009445
alternative hypothesis: true autocorrelation is greater than 0

Bruech Godfrey general test#

bgtest(model, order = 1)
	Breusch-Godfrey test for serial correlation of order up to 1

data:  model
LM test = 5.312, df = 1, p-value = 0.02118
bgtest(model, order = 2)
	Breusch-Godfrey test for serial correlation of order up to 2

data:  model
LM test = 6.4472, df = 2, p-value = 0.03981
bgtest(model, order = 3)
	Breusch-Godfrey test for serial correlation of order up to 3

data:  model
LM test = 6.6573, df = 3, p-value = 0.08366

AR(2) is significant, residuals follow AR(2)

Remedial measures#

First Difference#

\[ u_t - \rho u_{t-1} = v_t \]

\begin{align} \ln C_t- \rho \ln C_{t-1}&= \beta_{1(1- \rho)} \ &+ \beta_2 (\ln DPI_t - \rho \ln DPI_{t-1})\ &+ \beta_3 (\ln W_t - \rho \ln W_{t-1}) \ &+ \beta_4 (R_t - R_{t-1}) \ &+ (u_t - \rho u_{t-1}) \end{align}

if \(\rho\) = 1

\[ \Delta \ln C_t = \beta_2 \Delta \ln DPI_t + \beta_3 \Delta \ln W_t + \beta_4 \Delta R_t + v_t \]

Called first difference transformation

model2 <- lm(diff(lnconsump) ~ 0 +diff(lndpi) + diff(lnwealth) +  diff(interest), data =df)
tidy(model2)
glance(model2)
A tibble: 3 × 5
termestimatestd.errorstatisticp.value
<chr><dbl><dbl><dbl><dbl>
diff(lndpi) 0.84899404130.051537630916.47328427.667028e-22
diff(lnwealth)0.10635495550.0368543147 2.88582105.749199e-03
diff(interest)0.00065301380.0008260853 0.79049194.329740e-01
A tibble: 1 × 12
r.squaredadj.r.squaredsigmastatisticp.valuedflogLikAICBICdeviancedf.residualnobs
<dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><int><int>
0.92365270.91907180.0111335201.63396.473553e-283164.7236-321.4472-313.56610.0061977385053
bgtest(model2)
	Breusch-Godfrey test for serial correlation of order up to 1

data:  model2
LM test = 0.86275, df = 1, p-value = 0.353
dwtest(model2)
	Durbin-Watson test

data:  model2
DW = 2.0265, p-value = 0.547
alternative hypothesis: true autocorrelation is greater than 0
durbinWatsonTest(model2)
 lag Autocorrelation D-W Statistic p-value
   1      -0.1150018      2.026542   0.916
 Alternative hypothesis: rho != 0

feasible generalized least squares (FGLS)#

residuals = augment(model)$.resid
residuals
  1. 0.0151792320906274
  2. 0.00639409190557938
  3. 0.0325786316647871
  4. 0.0191471920745867
  5. -0.0153337901242683
  6. -0.00132990098127195
  7. 0.00653719274943132
  8. 0.00162048925147751
  9. 0.00975015218633235
  10. -0.00722925871798008
  11. 0.000174758857319546
  12. -0.0146975939327243
  13. 0.00380689173579718
  14. 0.00454249958702402
  15. -0.0167822153758674
  16. -0.0091399779495811
  17. -0.00422252846219351
  18. -0.014143908709439
  19. -0.011875949295832
  20. 0.00245712248775831
  21. -0.0184412867267767
  22. -0.0138427704324453
  23. 0.00757496364792853
  24. -0.00357671809715665
  25. -0.0171562076207774
  26. -0.0124861952550575
  27. -0.00754242394929072
  28. 0.00245643437586018
  29. -0.0102882063980623
  30. 0.00718617423785339
  31. 0.0143585227051846
  32. 0.0127678086817866
  33. 0.0070919535900682
  34. -0.00903155158175295
  35. -0.00726634215564381
  36. -0.0109670199434095
  37. 0.0110890173466256
  38. 0.000254097439515988
  39. 0.00116108304687401
  40. 0.000586076244815104
  41. 0.00687811518401915
  42. 0.00293814373603141
  43. 8.79859166129648e-05
  44. 0.00366504301640092
  45. -0.0140024869709681
  46. -0.0155797023743123
  47. -0.00012753645438579
  48. 0.0196279528980181
  49. 0.0142734120216019
  50. 0.0108911843032331
  51. 0.000420259520168997
  52. -0.0108587536823013
  53. -0.00387038232170589
  54. 0.024296225009854
rho <- coef(lm(residuals ~ 0 + lag(residuals)))
rho
lag(residuals): 0.324670692527429
dwtest(model)
	Durbin-Watson test

data:  model
DW = 1.2892, p-value = 0.0009445
alternative hypothesis: true autocorrelation is greater than 0
durbinWatsonTest(model)
 lag Autocorrelation D-W Statistic p-value
   1       0.2977555      1.289232   0.004
 Alternative hypothesis: rho != 0
rho2 = 1 - (1.289232/2) 
rho2
0.355384
fgls_model <- lm(I(lnconsump-rho*lag(lnconsump)) ~ I(lndpi-rho*lag(lndpi)) + 
                 I(lnwealth-rho*lag(lnwealth)) + I(interest-rho*lag(interest)) , data = df)
tidy(fgls_model)
glance(fgls_model)
A tibble: 4 × 5
termestimatestd.errorstatisticp.value
<chr><dbl><dbl><dbl><dbl>
(Intercept) -2.797310e-010.033726467-8.29410936.807447e-11
I(lndpi - rho * lag(lndpi)) 8.187056e-010.02109688838.80693621.878713e-38
I(lnwealth - rho * lag(lnwealth)) 1.836285e-010.020987236 8.74953021.396990e-11
I(interest - rho * lag(interest))-1.799604e-050.000969139-0.01856919.852603e-01
A tibble: 1 × 12
r.squaredadj.r.squaredsigmastatisticp.valuedflogLikAICBICdeviancedf.residualnobs
<dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><int><int>
0.99923480.9991880.010422621330.122.542681e-763168.756-327.512-317.66060.0053229024953
bgtest(fgls_model)
	Breusch-Godfrey test for serial correlation of order up to 1

data:  fgls_model
LM test = 2.8168, df = 1, p-value = 0.09328
dwtest(fgls_model)
	Durbin-Watson test

data:  fgls_model
DW = 1.449, p-value = 0.008542
alternative hypothesis: true autocorrelation is greater than 0
durbinWatsonTest(fgls_model)
 lag Autocorrelation D-W Statistic p-value
   1       0.2092955      1.448986   0.014
 Alternative hypothesis: rho != 0
library(dynlm)


# Create the FGLS formula
fgls_formula <- as.formula(paste("I(lnconsump - ", rho, "* lag(lnconsump)) ~ ",
                                 "I(lndpi - ", rho, "* lag(lndpi)) + ",
                                 "I(lnwealth - ", rho, "* lag(lnwealth)) + ",
                                 "I(interest - ", rho, "* lag(interest))"
))

# Fit the FGLS model using dynlm
fgls_model <- dynlm(fgls_formula, data = df)

# Tidy model coefficients
tidy(fgls_model)

# Model summary
glance(fgls_model)
Warning message:
"The `tidy()` method for objects of class `dynlm` is not maintained by the broom team, and is only supported through the `lm` tidier method. Please be cautious in interpreting and reporting broom output.

This warning is displayed once per session."
A tibble: 4 × 5
termestimatestd.errorstatisticp.value
<chr><dbl><dbl><dbl><dbl>
(Intercept) -2.797310e-010.033726467-8.29410936.807447e-11
I(lndpi - 0.324670692527429 * lag(lndpi)) 8.187056e-010.02109688838.80693621.878713e-38
I(lnwealth - 0.324670692527429 * lag(lnwealth)) 1.836285e-010.020987236 8.74953021.396990e-11
I(interest - 0.324670692527429 * lag(interest))-1.799604e-050.000969139-0.01856919.852603e-01
A tibble: 1 × 12
r.squaredadj.r.squaredsigmastatisticp.valuedflogLikAICBICdeviancedf.residualnobs
<dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><int><int>
0.99923480.9991880.010422621330.122.542681e-763168.756-327.512-317.66060.0053229024953
library(dynlm)

# Fit the original model
model <- lm(lnconsump ~ lndpi + lnwealth + interest, data = df)

# Get residuals
residuals <- residuals(model)

# Estimate AR(2) coefficients
ar_model <- lm(residuals ~ lag(residuals, 1) + lag(residuals, 2))
rho1 <- coef(ar_model)[2]
rho2 <- coef(ar_model)[3]

# Create the FGLS formula for AR(2)
fgls_formula <- as.formula(paste("I(lnconsump - ", rho1, "* lag(lnconsump, 1) - ", rho2, "* lag(lnconsump, 2)) ~ ",
                                 "I(lndpi - ", rho1, "* lag(lndpi, 1) - ", rho2, "* lag(lndpi, 2)) + ",
                                 "I(lnwealth - ", rho1, "* lag(lnwealth, 1) - ", rho2, "* lag(lnwealth, 2)) + ",
                                 "I(interest - ", rho1, "* lag(interest, 1) - ", rho2, "* lag(interest, 2))"
))


fgls_model <- lm(fgls_formula, data = df)

tidy(fgls_model)

glance(fgls_model)
A tibble: 4 × 5
termestimatestd.errorstatisticp.value
<chr><dbl><dbl><dbl><dbl>
(Intercept) -0.34020257490.037997412-8.95330918.307709e-12
I(lndpi - 0.375966677578179 * lag(lndpi, 1) - -0.159419476639279 * lag(lndpi, 2)) 0.81216134330.01992356740.76385206.679649e-39
I(lnwealth - 0.375966677578179 * lag(lnwealth, 1) - -0.159419476639279 * lag(lnwealth, 2)) 0.19132134760.019988221 9.57170461.040704e-12
I(interest - 0.375966677578179 * lag(interest, 1) - -0.159419476639279 * lag(interest, 2))-0.00075071040.001005589-0.74653824.589834e-01
A tibble: 1 × 12
r.squaredadj.r.squaredsigmastatisticp.valuedflogLikAICBICdeviancedf.residualnobs
<dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><int><int>
0.99936670.99932710.0107861125247.099.734735e-773163.8301-317.6602-307.9040.0055843274852

Newey–West method of correcting OLS standard errors#

NeweyWest(model)
A matrix: 4 × 4 of type dbl
(Intercept)lndpilnwealthinterest
(Intercept) 1.971479e-03 2.692235e-05-2.333159e-04 2.652791e-05
lndpi 2.692235e-05 2.628983e-04-2.225614e-04-6.845951e-06
lnwealth-2.333159e-04-2.225614e-04 2.109662e-04 2.866692e-06
interest 2.652791e-05-6.845951e-06 2.866692e-06 6.724860e-07

Model Evaluation#

consider case of specification error

model4 <- lm(lnconsump ~ log(income) + lnwealth +  interest + lag(lnconsump), data =df)
tidy(model4)
glance(model4)
A tibble: 5 × 5
termestimatestd.errorstatisticp.value
<chr><dbl><dbl><dbl><dbl>
(Intercept) -0.3160213640.0556667072-5.67702647.777927e-07
log(income) 0.5748331150.0696731710 8.25042279.237758e-11
lnwealth 0.1502881630.0208376010 7.21235443.477275e-09
interest -0.0006752060.0008937643-0.75546324.536624e-01
lag(lnconsump) 0.2765615990.0804719157 3.43674681.225013e-03
A tibble: 1 × 12
r.squaredadj.r.squaredsigmastatisticp.valuedflogLikAICBICdeviancedf.residualnobs
<dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><int><int>
0.99964540.99961590.0106190633833.643.891148e-824168.3127-324.6254-312.80370.0054126944853
bgtest(model4)
	Breusch-Godfrey test for serial correlation of order up to 1

data:  model4
LM test = 4.4804, df = 1, p-value = 0.03429

Exercises#

🚧 Under Construction