8 Granger causality
Is advertising generating sales or is it going in the other direction? Do past values help us get better prediction of sales today than we would get if we did not use these past values of advertising?
Granger causality model:
\[ Adv_t = \alpha + \sum_{i=1}^{I}\beta_iAdv_{t-i} + \sum_{j=1}^{J}\gamma_jSales_{t-j} + \epsilon_t \]
where \(\epsilon_t\) is white noise error term. Advertising today is impacted by previous advertising plus previous sales. Capital I (number of years capital advertising), Capital J (number of years past sales). In order to do it, we choose capital I and capital J so that they are large enough to accomodate rich autocorrelation structure to model current advertising. Partial F of the null that \(\gamma_j = 0\)… that we do not need past values of sales in order to forecast current advertising.
<- read.csv("/cloud/project/data/Lydia.txt")
lydia attach(lydia)
head(lydia)
## year sales advrtsng advcopy Ind1 Ind2 Ind3 lagsales lag2sales lagadv
## 1 1907 1016 608 1 1 0 0 NA NA NA
## 2 1908 921 451 1 1 0 0 1016 NA 608
## 3 1909 934 529 1 1 0 0 921 1016 451
## 4 1910 976 543 1 1 0 0 934 921 529
## 5 1911 930 525 1 1 0 0 976 934 543
## 6 1912 1052 549 1 1 0 0 930 976 525
<-c(NA,NA,head(advrtsng,-2))
lag2adv<-c(NA,NA,NA,head(advrtsng,-3))
lag3adv<-c(NA,NA,NA,NA,head(advrtsng,-4))
lag4adv<-c(NA,NA,NA,NA,NA,head(advrtsng,-5))
lag5adv<-as.factor(advcopy)
fadvcopy<- lydia$sales
sales<- lydia$advrtsng
ad<-c(NA,NA,NA,head(sales,-3))
lag3sales<-c(NA,NA,NA,NA,head(sales,-4))
lag4sales
<-
modelG1lm(advrtsng~lagadv+lag2adv+lag3adv+lag4adv+lagsales+lag2sales+lag3sales
+lag4sales+Ind1+Ind2+Ind3);summary(modelG1)
##
## Call:
## lm(formula = advrtsng ~ lagadv + lag2adv + lag3adv + lag4adv +
## lagsales + lag2sales + lag3sales + lag4sales + Ind1 + Ind2 +
## Ind3)
##
## Residuals:
## Min 1Q Median 3Q Max
## -319.76 -68.73 -9.47 104.03 305.05
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.802e+02 1.100e+02 -1.639 0.109441
## lagadv 3.737e-01 1.830e-01 2.042 0.048142 *
## lag2adv -3.120e-01 1.821e-01 -1.713 0.094825 .
## lag3adv 1.146e-01 1.786e-01 0.642 0.524997
## lag4adv 1.580e-01 1.773e-01 0.891 0.378432
## lagsales 5.970e-01 1.465e-01 4.076 0.000225 ***
## lag2sales -4.588e-01 2.152e-01 -2.132 0.039526 *
## lag3sales 2.672e-01 2.217e-01 1.205 0.235543
## lag4sales -5.637e-03 1.586e-01 -0.036 0.971832
## Ind1 1.313e+02 1.019e+02 1.288 0.205596
## Ind2 1.236e+02 6.987e+01 1.769 0.084942 .
## Ind3 1.051e+02 8.848e+01 1.188 0.242198
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 163.9 on 38 degrees of freedom
## (4 observations deleted due to missingness)
## Multiple R-squared: 0.8459, Adjusted R-squared: 0.8012
## F-statistic: 18.96 on 11 and 38 DF, p-value: 3.609e-12
<-
modelG2lm(advrtsng~lagadv+lag2adv+lag3adv+lag4adv+Ind1+Ind2+Ind3)
summary(modelG2)
##
## Call:
## lm(formula = advrtsng ~ lagadv + lag2adv + lag3adv + lag4adv +
## Ind1 + Ind2 + Ind3)
##
## Residuals:
## Min 1Q Median 3Q Max
## -472.77 -113.07 22.23 125.10 352.17
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -8.6272 123.2128 -0.070 0.9445
## lagadv 0.8880 0.1492 5.954 4.63e-07 ***
## lag2adv -0.3726 0.2010 -1.854 0.0707 .
## lag3adv 0.2808 0.1969 1.426 0.1612
## lag4adv 0.1958 0.1673 1.170 0.2486
## Ind1 29.1848 119.5617 0.244 0.8083
## Ind2 192.7088 81.9106 2.353 0.0234 *
## Ind3 -84.2094 85.2865 -0.987 0.3291
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 200 on 42 degrees of freedom
## (4 observations deleted due to missingness)
## Multiple R-squared: 0.7463, Adjusted R-squared: 0.704
## F-statistic: 17.65 on 7 and 42 DF, p-value: 1.151e-10
anova(modelG2,modelG1)
## Analysis of Variance Table
##
## Model 1: advrtsng ~ lagadv + lag2adv + lag3adv + lag4adv + Ind1 + Ind2 +
## Ind3
## Model 2: advrtsng ~ lagadv + lag2adv + lag3adv + lag4adv + lagsales +
## lag2sales + lag3sales + lag4sales + Ind1 + Ind2 + Ind3
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 42 1679599
## 2 38 1020341 4 659258 6.1381 0.0006524 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
<- lm(sales~advrtsng+lagsales+Ind1+Ind2+Ind3+advrtsng*Ind1+advrtsng*Ind2+advrtsng*Ind3+lagsales*Ind1+lagsales*Ind2+lagsales*Ind3)
modelintrctn1summary(modelintrctn1)
##
## Call:
## lm(formula = sales ~ advrtsng + lagsales + Ind1 + Ind2 + Ind3 +
## advrtsng * Ind1 + advrtsng * Ind2 + advrtsng * Ind3 + lagsales *
## Ind1 + lagsales * Ind2 + lagsales * Ind3)
##
## Residuals:
## Min 1Q Median 3Q Max
## -265.83 -67.05 -18.07 67.72 440.64
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -182.4488 180.7390 -1.009 0.31867
## advrtsng 1.6753 0.3270 5.123 7.55e-06 ***
## lagsales 0.3042 0.1506 2.020 0.04998 *
## Ind1 247.2927 920.2953 0.269 0.78950
## Ind2 403.9173 217.3517 1.858 0.07031 .
## Ind3 629.2076 218.6483 2.878 0.00633 **
## advrtsng:Ind1 -0.7603 1.5671 -0.485 0.63012
## advrtsng:Ind2 -1.5474 0.4814 -3.214 0.00255 **
## advrtsng:Ind3 -1.2114 0.3506 -3.455 0.00129 **
## lagsales:Ind1 0.1588 0.6539 0.243 0.80937
## lagsales:Ind2 0.6277 0.2428 2.585 0.01340 *
## lagsales:Ind3 0.1403 0.1758 0.798 0.42925
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 141.9 on 41 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.9598, Adjusted R-squared: 0.949
## F-statistic: 89.03 on 11 and 41 DF, p-value: < 2.2e-16
<-
modelintrctn2lm(sales~advrtsng+lagsales+Ind1+Ind2+Ind3+lagsales*Ind1+lagsales*Ind2+lagsales*Ind3)
summary(modelintrctn2)
##
## Call:
## lm(formula = sales ~ advrtsng + lagsales + Ind1 + Ind2 + Ind3 +
## lagsales * Ind1 + lagsales * Ind2 + lagsales * Ind3)
##
## Residuals:
## Min 1Q Median 3Q Max
## -302.51 -88.35 -30.19 90.71 460.24
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.71624 192.71383 0.040 0.96824
## advrtsng 0.57365 0.12440 4.611 3.44e-05 ***
## lagsales 0.71720 0.10817 6.631 3.99e-08 ***
## Ind1 203.00323 718.96761 0.282 0.77900
## Ind2 227.37512 235.17049 0.967 0.33890
## Ind3 429.98714 235.59575 1.825 0.07478 .
## lagsales:Ind1 -0.21977 0.69612 -0.316 0.75371
## lagsales:Ind2 -0.01568 0.11611 -0.135 0.89317
## lagsales:Ind3 -0.33134 0.12021 -2.756 0.00848 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 158.3 on 44 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.9463, Adjusted R-squared: 0.9366
## F-statistic: 97.01 on 8 and 44 DF, p-value: < 2.2e-16
<-lm(sales~advrtsng+lagsales+Ind1+Ind2+Ind3+advrtsng*Ind1+advrtsng*Ind2+advrtsng*Ind3)
modelintrctn3summary(modelintrctn3)
##
## Call:
## lm(formula = sales ~ advrtsng + lagsales + Ind1 + Ind2 + Ind3 +
## advrtsng * Ind1 + advrtsng * Ind2 + advrtsng * Ind3)
##
## Residuals:
## Min 1Q Median 3Q Max
## -324.46 -66.60 -8.68 84.38 448.72
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -243.81773 182.87655 -1.333 0.189316
## advrtsng 1.36155 0.23859 5.707 9.09e-07 ***
## lagsales 0.48173 0.07471 6.448 7.40e-08 ***
## Ind1 295.67391 843.89700 0.350 0.727735
## Ind2 565.20546 216.45901 2.611 0.012294 *
## Ind3 661.73670 212.08477 3.120 0.003187 **
## advrtsng:Ind1 -0.45759 1.57013 -0.291 0.772088
## advrtsng:Ind2 -0.43302 0.22475 -1.927 0.060498 .
## advrtsng:Ind3 -0.93641 0.22406 -4.179 0.000137 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 148.4 on 44 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.9528, Adjusted R-squared: 0.9442
## F-statistic: 111.1 on 8 and 44 DF, p-value: < 2.2e-16
anova(modelintrctn2,modelintrctn1)
## Analysis of Variance Table
##
## Model 1: sales ~ advrtsng + lagsales + Ind1 + Ind2 + Ind3 + lagsales *
## Ind1 + lagsales * Ind2 + lagsales * Ind3
## Model 2: sales ~ advrtsng + lagsales + Ind1 + Ind2 + Ind3 + advrtsng *
## Ind1 + advrtsng * Ind2 + advrtsng * Ind3 + lagsales * Ind1 +
## lagsales * Ind2 + lagsales * Ind3
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 44 1102297
## 2 41 825567 3 276730 4.5811 0.007411 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(modelintrctn3,modelintrctn1)
## Analysis of Variance Table
##
## Model 1: sales ~ advrtsng + lagsales + Ind1 + Ind2 + Ind3 + advrtsng *
## Ind1 + advrtsng * Ind2 + advrtsng * Ind3
## Model 2: sales ~ advrtsng + lagsales + Ind1 + Ind2 + Ind3 + advrtsng *
## Ind1 + advrtsng * Ind2 + advrtsng * Ind3 + lagsales * Ind1 +
## lagsales * Ind2 + lagsales * Ind3
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 44 969425
## 2 41 825567 3 143858 2.3815 0.0834 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
<-
model71lm(sales~fadvcopy+advrtsng+lagsales+lag2sales+lagadv);summary(model71)
##
## Call:
## lm(formula = sales ~ fadvcopy + advrtsng + lagsales + lag2sales +
## lagadv)
##
## Residuals:
## Min 1Q Median 3Q Max
## -333.56 -98.41 -12.37 85.55 436.33
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 143.3039 80.0307 1.791 0.080237 .
## fadvcopy2 275.0504 97.9004 2.809 0.007376 **
## fadvcopy3 28.1850 96.9147 0.291 0.772553
## fadvcopy4 116.5564 88.3765 1.319 0.194038
## advrtsng 0.5500 0.1339 4.109 0.000171 ***
## lagsales 0.9654 0.1503 6.425 8e-08 ***
## lag2sales -0.2050 0.1163 -1.763 0.084863 .
## lagadv -0.3447 0.1510 -2.282 0.027377 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 165 on 44 degrees of freedom
## (2 observations deleted due to missingness)
## Multiple R-squared: 0.9391, Adjusted R-squared: 0.9294
## F-statistic: 96.96 on 7 and 44 DF, p-value: < 2.2e-16
<-factor(fadvcopy,levels=c(2,1,3,4)) fadvcopy2