8 Granger causality

Is advertising generating sales or is it going in the other direction? Do past values help us get better prediction of sales today than we would get if we did not use these past values of advertising?

Granger causality model:

\[ Adv_t = \alpha + \sum_{i=1}^{I}\beta_iAdv_{t-i} + \sum_{j=1}^{J}\gamma_jSales_{t-j} + \epsilon_t \]

where \(\epsilon_t\) is white noise error term. Advertising today is impacted by previous advertising plus previous sales. Capital I (number of years capital advertising), Capital J (number of years past sales). In order to do it, we choose capital I and capital J so that they are large enough to accomodate rich autocorrelation structure to model current advertising. Partial F of the null that \(\gamma_j = 0\)… that we do not need past values of sales in order to forecast current advertising.

lydia <- read.csv("/cloud/project/data/Lydia.txt")
attach(lydia)
head(lydia)
##   year sales advrtsng advcopy Ind1 Ind2 Ind3 lagsales lag2sales lagadv
## 1 1907  1016      608       1    1    0    0       NA        NA     NA
## 2 1908   921      451       1    1    0    0     1016        NA    608
## 3 1909   934      529       1    1    0    0      921      1016    451
## 4 1910   976      543       1    1    0    0      934       921    529
## 5 1911   930      525       1    1    0    0      976       934    543
## 6 1912  1052      549       1    1    0    0      930       976    525
lag2adv<-c(NA,NA,head(advrtsng,-2))
lag3adv<-c(NA,NA,NA,head(advrtsng,-3))
lag4adv<-c(NA,NA,NA,NA,head(advrtsng,-4))
lag5adv<-c(NA,NA,NA,NA,NA,head(advrtsng,-5))
fadvcopy<-as.factor(advcopy)
sales<- lydia$sales
ad<- lydia$advrtsng
lag3sales<-c(NA,NA,NA,head(sales,-3)) 
lag4sales<-c(NA,NA,NA,NA,head(sales,-4))

modelG1<-
lm(advrtsng~lagadv+lag2adv+lag3adv+lag4adv+lagsales+lag2sales+lag3sales  
+lag4sales+Ind1+Ind2+Ind3);summary(modelG1)
## 
## Call:
## lm(formula = advrtsng ~ lagadv + lag2adv + lag3adv + lag4adv + 
##     lagsales + lag2sales + lag3sales + lag4sales + Ind1 + Ind2 + 
##     Ind3)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -319.76  -68.73   -9.47  104.03  305.05 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -1.802e+02  1.100e+02  -1.639 0.109441    
## lagadv       3.737e-01  1.830e-01   2.042 0.048142 *  
## lag2adv     -3.120e-01  1.821e-01  -1.713 0.094825 .  
## lag3adv      1.146e-01  1.786e-01   0.642 0.524997    
## lag4adv      1.580e-01  1.773e-01   0.891 0.378432    
## lagsales     5.970e-01  1.465e-01   4.076 0.000225 ***
## lag2sales   -4.588e-01  2.152e-01  -2.132 0.039526 *  
## lag3sales    2.672e-01  2.217e-01   1.205 0.235543    
## lag4sales   -5.637e-03  1.586e-01  -0.036 0.971832    
## Ind1         1.313e+02  1.019e+02   1.288 0.205596    
## Ind2         1.236e+02  6.987e+01   1.769 0.084942 .  
## Ind3         1.051e+02  8.848e+01   1.188 0.242198    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 163.9 on 38 degrees of freedom
##   (4 observations deleted due to missingness)
## Multiple R-squared:  0.8459, Adjusted R-squared:  0.8012 
## F-statistic: 18.96 on 11 and 38 DF,  p-value: 3.609e-12
modelG2<-
lm(advrtsng~lagadv+lag2adv+lag3adv+lag4adv+Ind1+Ind2+Ind3)
summary(modelG2) 
## 
## Call:
## lm(formula = advrtsng ~ lagadv + lag2adv + lag3adv + lag4adv + 
##     Ind1 + Ind2 + Ind3)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -472.77 -113.07   22.23  125.10  352.17 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -8.6272   123.2128  -0.070   0.9445    
## lagadv        0.8880     0.1492   5.954 4.63e-07 ***
## lag2adv      -0.3726     0.2010  -1.854   0.0707 .  
## lag3adv       0.2808     0.1969   1.426   0.1612    
## lag4adv       0.1958     0.1673   1.170   0.2486    
## Ind1         29.1848   119.5617   0.244   0.8083    
## Ind2        192.7088    81.9106   2.353   0.0234 *  
## Ind3        -84.2094    85.2865  -0.987   0.3291    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 200 on 42 degrees of freedom
##   (4 observations deleted due to missingness)
## Multiple R-squared:  0.7463, Adjusted R-squared:  0.704 
## F-statistic: 17.65 on 7 and 42 DF,  p-value: 1.151e-10
anova(modelG2,modelG1)
## Analysis of Variance Table
## 
## Model 1: advrtsng ~ lagadv + lag2adv + lag3adv + lag4adv + Ind1 + Ind2 + 
##     Ind3
## Model 2: advrtsng ~ lagadv + lag2adv + lag3adv + lag4adv + lagsales + 
##     lag2sales + lag3sales + lag4sales + Ind1 + Ind2 + Ind3
##   Res.Df     RSS Df Sum of Sq      F    Pr(>F)    
## 1     42 1679599                                  
## 2     38 1020341  4    659258 6.1381 0.0006524 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
modelintrctn1<- lm(sales~advrtsng+lagsales+Ind1+Ind2+Ind3+advrtsng*Ind1+advrtsng*Ind2+advrtsng*Ind3+lagsales*Ind1+lagsales*Ind2+lagsales*Ind3)
summary(modelintrctn1)
## 
## Call:
## lm(formula = sales ~ advrtsng + lagsales + Ind1 + Ind2 + Ind3 + 
##     advrtsng * Ind1 + advrtsng * Ind2 + advrtsng * Ind3 + lagsales * 
##     Ind1 + lagsales * Ind2 + lagsales * Ind3)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -265.83  -67.05  -18.07   67.72  440.64 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   -182.4488   180.7390  -1.009  0.31867    
## advrtsng         1.6753     0.3270   5.123 7.55e-06 ***
## lagsales         0.3042     0.1506   2.020  0.04998 *  
## Ind1           247.2927   920.2953   0.269  0.78950    
## Ind2           403.9173   217.3517   1.858  0.07031 .  
## Ind3           629.2076   218.6483   2.878  0.00633 ** 
## advrtsng:Ind1   -0.7603     1.5671  -0.485  0.63012    
## advrtsng:Ind2   -1.5474     0.4814  -3.214  0.00255 ** 
## advrtsng:Ind3   -1.2114     0.3506  -3.455  0.00129 ** 
## lagsales:Ind1    0.1588     0.6539   0.243  0.80937    
## lagsales:Ind2    0.6277     0.2428   2.585  0.01340 *  
## lagsales:Ind3    0.1403     0.1758   0.798  0.42925    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 141.9 on 41 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.9598, Adjusted R-squared:  0.949 
## F-statistic: 89.03 on 11 and 41 DF,  p-value: < 2.2e-16
modelintrctn2<-
lm(sales~advrtsng+lagsales+Ind1+Ind2+Ind3+lagsales*Ind1+lagsales*Ind2+lagsales*Ind3)
summary(modelintrctn2) 
## 
## Call:
## lm(formula = sales ~ advrtsng + lagsales + Ind1 + Ind2 + Ind3 + 
##     lagsales * Ind1 + lagsales * Ind2 + lagsales * Ind3)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -302.51  -88.35  -30.19   90.71  460.24 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     7.71624  192.71383   0.040  0.96824    
## advrtsng        0.57365    0.12440   4.611 3.44e-05 ***
## lagsales        0.71720    0.10817   6.631 3.99e-08 ***
## Ind1          203.00323  718.96761   0.282  0.77900    
## Ind2          227.37512  235.17049   0.967  0.33890    
## Ind3          429.98714  235.59575   1.825  0.07478 .  
## lagsales:Ind1  -0.21977    0.69612  -0.316  0.75371    
## lagsales:Ind2  -0.01568    0.11611  -0.135  0.89317    
## lagsales:Ind3  -0.33134    0.12021  -2.756  0.00848 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 158.3 on 44 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.9463, Adjusted R-squared:  0.9366 
## F-statistic: 97.01 on 8 and 44 DF,  p-value: < 2.2e-16
modelintrctn3<-lm(sales~advrtsng+lagsales+Ind1+Ind2+Ind3+advrtsng*Ind1+advrtsng*Ind2+advrtsng*Ind3)
summary(modelintrctn3)
## 
## Call:
## lm(formula = sales ~ advrtsng + lagsales + Ind1 + Ind2 + Ind3 + 
##     advrtsng * Ind1 + advrtsng * Ind2 + advrtsng * Ind3)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -324.46  -66.60   -8.68   84.38  448.72 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   -243.81773  182.87655  -1.333 0.189316    
## advrtsng         1.36155    0.23859   5.707 9.09e-07 ***
## lagsales         0.48173    0.07471   6.448 7.40e-08 ***
## Ind1           295.67391  843.89700   0.350 0.727735    
## Ind2           565.20546  216.45901   2.611 0.012294 *  
## Ind3           661.73670  212.08477   3.120 0.003187 ** 
## advrtsng:Ind1   -0.45759    1.57013  -0.291 0.772088    
## advrtsng:Ind2   -0.43302    0.22475  -1.927 0.060498 .  
## advrtsng:Ind3   -0.93641    0.22406  -4.179 0.000137 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 148.4 on 44 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.9528, Adjusted R-squared:  0.9442 
## F-statistic: 111.1 on 8 and 44 DF,  p-value: < 2.2e-16
anova(modelintrctn2,modelintrctn1)
## Analysis of Variance Table
## 
## Model 1: sales ~ advrtsng + lagsales + Ind1 + Ind2 + Ind3 + lagsales * 
##     Ind1 + lagsales * Ind2 + lagsales * Ind3
## Model 2: sales ~ advrtsng + lagsales + Ind1 + Ind2 + Ind3 + advrtsng * 
##     Ind1 + advrtsng * Ind2 + advrtsng * Ind3 + lagsales * Ind1 + 
##     lagsales * Ind2 + lagsales * Ind3
##   Res.Df     RSS Df Sum of Sq      F   Pr(>F)   
## 1     44 1102297                                
## 2     41  825567  3    276730 4.5811 0.007411 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(modelintrctn3,modelintrctn1)
## Analysis of Variance Table
## 
## Model 1: sales ~ advrtsng + lagsales + Ind1 + Ind2 + Ind3 + advrtsng * 
##     Ind1 + advrtsng * Ind2 + advrtsng * Ind3
## Model 2: sales ~ advrtsng + lagsales + Ind1 + Ind2 + Ind3 + advrtsng * 
##     Ind1 + advrtsng * Ind2 + advrtsng * Ind3 + lagsales * Ind1 + 
##     lagsales * Ind2 + lagsales * Ind3
##   Res.Df    RSS Df Sum of Sq      F Pr(>F)  
## 1     44 969425                             
## 2     41 825567  3    143858 2.3815 0.0834 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
model71<-
lm(sales~fadvcopy+advrtsng+lagsales+lag2sales+lagadv);summary(model71) 
## 
## Call:
## lm(formula = sales ~ fadvcopy + advrtsng + lagsales + lag2sales + 
##     lagadv)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -333.56  -98.41  -12.37   85.55  436.33 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 143.3039    80.0307   1.791 0.080237 .  
## fadvcopy2   275.0504    97.9004   2.809 0.007376 ** 
## fadvcopy3    28.1850    96.9147   0.291 0.772553    
## fadvcopy4   116.5564    88.3765   1.319 0.194038    
## advrtsng      0.5500     0.1339   4.109 0.000171 ***
## lagsales      0.9654     0.1503   6.425    8e-08 ***
## lag2sales    -0.2050     0.1163  -1.763 0.084863 .  
## lagadv       -0.3447     0.1510  -2.282 0.027377 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 165 on 44 degrees of freedom
##   (2 observations deleted due to missingness)
## Multiple R-squared:  0.9391, Adjusted R-squared:  0.9294 
## F-statistic: 96.96 on 7 and 44 DF,  p-value: < 2.2e-16
fadvcopy2<-factor(fadvcopy,levels=c(2,1,3,4))