spurious relations over time: the case of covid and weekly economic activity

## Warning: package 'dplyr' was built under R version 4.0.2

So we got ourselves some time series data:

head(df)
##         week  WEI   Index cases deaths      lnindex lockshare
## 1 2008-01-05 1.42 1.00000     0      0 0.0000000000         0
## 2 2008-01-12 1.46 1.00028     0      0 0.0002799608         0
## 3 2008-01-19 1.40 1.00055     0      0 0.0005498488         0
## 4 2008-01-26 0.96 1.00073     0      0 0.0007297337         0
## 5 2008-02-02 0.73 1.00088     0      0 0.0008796130         0
## 6 2008-02-09 0.78 1.00103     0      0 0.0010294699         0
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.0.2
library(scales)


  scaler=max(df$cases,na.rm=TRUE)/max(df$Index,na.rm=TRUE)
  
  ggplot(df,aes(x=week)) + theme_minimal() + xlab("Weekly Data") +
    theme(axis.text.x = element_text(angle = 90, hjust = 1))+
    scale_x_date(breaks = date_breaks("12 months"),labels = date_format("%Y")) +
    geom_line(aes(y = Index*scaler, colour = "Economic Activity Index"))+
    geom_line(aes(y = cases, colour = "Covid Cases per 100K"))+ylab("Covid Cases")+
    scale_y_continuous(sec.axis = sec_axis(label=comma, trans=~./(scaler),
                       name = "Economic Activity Index"),labels=comma)

Illustration of time trend

lm(lnindex~cases,df) %>% summary()
## 
## Call:
## lm(formula = lnindex ~ cases, data = df)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.108375 -0.064942 -0.002043  0.055871  0.121388 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.082359   0.002731  30.156  < 2e-16 ***
## cases       0.050576   0.007800   6.484 1.74e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.06928 on 669 degrees of freedom
## Multiple R-squared:  0.05913,    Adjusted R-squared:  0.05772 
## F-statistic: 42.04 on 1 and 669 DF,  p-value: 1.736e-10
df=df %>% mutate(t=1:n())
lm(lnindex~cases+t,df) %>% summary()
## 
## Call:
## lm(formula = lnindex ~ cases + t, data = df)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.024859 -0.004965 -0.001175  0.003861  0.038124 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -3.850e-02  9.170e-04  -41.98   <2e-16 ***
## cases       -2.262e-02  1.393e-03  -16.23   <2e-16 ***
## t            3.752e-04  2.466e-06  152.11   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.01161 on 668 degrees of freedom
## Multiple R-squared:  0.9736, Adjusted R-squared:  0.9735 
## F-statistic: 1.232e+04 on 2 and 668 DF,  p-value: < 2.2e-16
lm(Index~cases+t,df,year(week)>2018) %>% summary()
## 
## Call:
## lm(formula = Index ~ cases + t, data = df, subset = year(week) > 
##     2018)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0023664 -0.0006246 -0.0002099  0.0003270  0.0041289 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  9.674e-01  4.579e-03  211.28   <2e-16 ***
## cases       -2.959e-02  2.702e-04 -109.51   <2e-16 ***
## t            4.078e-04  7.519e-06   54.23   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.001204 on 94 degrees of freedom
## Multiple R-squared:  0.994,  Adjusted R-squared:  0.9939 
## F-statistic:  7808 on 2 and 94 DF,  p-value: < 2.2e-16
lm(Index~cases+t+lockshare,df,year(week)>2018) %>% summary()
## 
## Call:
## lm(formula = Index ~ cases + t + lockshare, data = df, subset = year(week) > 
##     2018)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0029987 -0.0004746 -0.0000545  0.0003908  0.0035047 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  9.737e-01  4.864e-03 200.179   <2e-16 ***
## cases       -3.015e-02  3.169e-04 -95.129   <2e-16 ***
## t            3.971e-04  8.025e-06  49.478   <2e-16 ***
## lockshare    1.702e-05  5.604e-06   3.037   0.0031 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.001154 on 93 degrees of freedom
## Multiple R-squared:  0.9946, Adjusted R-squared:  0.9944 
## F-statistic:  5664 on 3 and 93 DF,  p-value: < 2.2e-16

The case of Panel data

head(statsbyweek %>% arrange(state,week))
## # A tibble: 6 x 9
## # Groups:   state [1]
##   state   week        hoax tweets cases deaths hoaxsh Dcases Ddeaths
##   <chr>   <date>     <int>  <int> <int>  <int>  <dbl>  <int>   <int>
## 1 Alabama 2020-03-15     4   1503    51      0  0.266     NA      NA
## 2 Alabama 2020-03-22    62   4198   386      1  1.48     335       1
## 3 Alabama 2020-03-29    14   5218  1108     28  0.268    722      27
## 4 Alabama 2020-04-05    12   4793  2498     67  0.250   1390      39
## 5 Alabama 2020-04-12     9   4486  4241    123  0.201   1743      56
## 6 Alabama 2020-04-19     6   3570  5610    201  0.168   1369      78
statsbyweek %>% group_by(state) %>% summarise(n())
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 50 x 2
##    state       `n()`
##    <chr>       <int>
##  1 Alabama        29
##  2 Alaska         29
##  3 Arizona        36
##  4 Arkansas       30
##  5 California     36
##  6 Colorado       30
##  7 Connecticut    30
##  8 Delaware       30
##  9 Florida        31
## 10 Georgia        31
## # ... with 40 more rows
tsplotter(statsbyweek %>% filter(state=="New York"),label="New York")

tsplotter(statsbyweek %>% filter(state=="California"),label="California")

tsplotter(statsbyweek %>% filter(state=="Texas"),label="Texas")

statsbyweek=as.data.frame(statsbyweek)
lm(cases~hoaxsh,statsbyweek) %>% summary()
## 
## Call:
## lm(formula = cases ~ hoaxsh, data = statsbyweek)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -189328  -50914  -40048    7176  751613 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    50929       3108  16.388  < 2e-16 ***
## hoaxsh         11555       2380   4.855 1.33e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 108700 on 1544 degrees of freedom
## Multiple R-squared:  0.01504,    Adjusted R-squared:  0.0144 
## F-statistic: 23.57 on 1 and 1544 DF,  p-value: 1.326e-06
lm(cases~hoaxsh+factor(week),statsbyweek) %>% summary()
## 
## Call:
## lm(formula = cases ~ hoaxsh + factor(week), data = statsbyweek)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -199861  -37318   -9820    1098  668461 
## 
## Coefficients:
##                         Estimate Std. Error t value Pr(>|t|)   
## (Intercept)                 1.00   99956.85   0.000  0.99999   
## hoaxsh                   7865.20    2593.18   3.033  0.00246 **
## factor(week)2020-01-26  -2439.83  111758.05  -0.022  0.98259   
## factor(week)2020-02-02    -95.32  107965.74  -0.001  0.99930   
## factor(week)2020-02-09      1.00  106858.37   0.000  0.99999   
## factor(week)2020-02-16   -254.24  106020.28  -0.002  0.99809   
## factor(week)2020-02-23    -50.25  105363.77   0.000  0.99962   
## factor(week)2020-03-01  -1014.28  102855.30  -0.010  0.99213   
## factor(week)2020-03-08    -70.35  101086.35  -0.001  0.99944   
## factor(week)2020-03-15   -133.49  100951.52  -0.001  0.99895   
## factor(week)2020-03-22    236.01  100952.16   0.002  0.99813   
## factor(week)2020-03-29   2530.42  100953.13   0.025  0.98001   
## factor(week)2020-04-05   6423.98  100953.96   0.064  0.94927   
## factor(week)2020-04-12  10540.03  100954.06   0.104  0.91686   
## factor(week)2020-04-19  14195.65  100954.80   0.141  0.88819   
## factor(week)2020-04-26  18636.04  100953.93   0.185  0.85357   
## factor(week)2020-05-03  13982.43  101011.43   0.138  0.88992   
## factor(week)2020-05-10  10506.78  101111.51   0.104  0.91725   
## factor(week)2020-05-17  21445.90  101000.18   0.212  0.83187   
## factor(week)2020-05-24  27687.45  100972.27   0.274  0.78396   
## factor(week)2020-05-31  33274.50  100958.75   0.330  0.74176   
## factor(week)2020-06-07  37034.00  100956.02   0.367  0.71380   
## factor(week)2020-06-14  36945.13  100972.52   0.366  0.71450   
## factor(week)2020-06-21  44614.05  100956.18   0.442  0.65861   
## factor(week)2020-06-28  48322.28  100966.91   0.479  0.63230   
## factor(week)2020-07-05  57900.43  100956.69   0.574  0.56638   
## factor(week)2020-07-12  65139.58  100963.23   0.645  0.51891   
## factor(week)2020-07-19  74518.32  100962.63   0.738  0.46058   
## factor(week)2020-07-26  86415.02  100952.94   0.856  0.39214   
## factor(week)2020-08-02  94234.49  100953.14   0.933  0.35074   
## factor(week)2020-08-09 100693.59  100955.54   0.997  0.31873   
## factor(week)2020-08-16 108173.80  100953.17   1.072  0.28411   
## factor(week)2020-08-23 114411.68  100952.54   1.133  0.25726   
## factor(week)2020-08-30 117968.07  100958.41   1.168  0.24280   
## factor(week)2020-09-06 118457.69  100986.50   1.173  0.24098   
## factor(week)2020-09-13 124743.57  100979.23   1.235  0.21690   
## factor(week)2020-09-20 114588.39  101244.89   1.132  0.25790   
## factor(week)2020-09-27 140013.96  100959.49   1.387  0.16570   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 99960 on 1508 degrees of freedom
## Multiple R-squared:  0.1859, Adjusted R-squared:  0.1659 
## F-statistic: 9.304 on 37 and 1508 DF,  p-value: < 2.2e-16
lm(cases~hoaxsh++factor(state)+factor(week),statsbyweek) %>% summary()
## 
## Call:
## lm(formula = cases ~ hoaxsh + +factor(state) + factor(week), 
##     data = statsbyweek)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -264367  -23041     593   22221  456332 
## 
## Coefficients:
##                             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                    -1002      70567  -0.014 0.988669    
## hoaxsh                          3788       1863   2.033 0.042192 *  
## factor(state)Alaska           -52626      17974  -2.928 0.003465 ** 
## factor(state)Arizona           41125      17193   2.392 0.016885 *  
## factor(state)Arkansas         -24748      17843  -1.387 0.165651    
## factor(state)California       223775      17186  13.021  < 2e-16 ***
## factor(state)Colorado         -19744      17833  -1.107 0.268403    
## factor(state)Connecticut      -15063      17829  -0.845 0.398342    
## factor(state)Delaware         -45578      17878  -2.549 0.010892 *  
## factor(state)Florida          200328      17695  11.321  < 2e-16 ***
## factor(state)Georgia           59035      17710   3.333 0.000879 ***
## factor(state)Hawaii           -47306      17881  -2.646 0.008243 ** 
## factor(state)Idaho            -40063      17996  -2.226 0.026152 *  
## factor(state)Illinois          78867      17185   4.589 4.83e-06 ***
## factor(state)Indiana           -3130      17831  -0.176 0.860678    
## factor(state)Iowa             -19088      17868  -1.068 0.285580    
## factor(state)Kansas           -31846      17840  -1.785 0.074457 .  
## factor(state)Kentucky         -28872      17843  -1.618 0.105857    
## factor(state)Louisiana         18274      17849   1.024 0.306101    
## factor(state)Maine            -50864      17993  -2.827 0.004766 ** 
## factor(state)Maryland          10304      17871   0.577 0.564306    
## factor(state)Massachusetts     40510      17291   2.343 0.019276 *  
## factor(state)Michigan          14787      17827   0.829 0.406972    
## factor(state)Minnesota        -15911      17827  -0.893 0.372243    
## factor(state)Mississippi      -17436      17829  -0.978 0.328263    
## factor(state)Missouri         -15275      17829  -0.857 0.391718    
## factor(state)Montana          -50312      17999  -2.795 0.005255 ** 
## factor(state)Nebraska         -24086      17490  -1.377 0.168685    
## factor(state)Nevada           -25351      17833  -1.422 0.155373    
## factor(state)New Hampshire    -43303      17711  -2.445 0.014602 *  
## factor(state)New Jersey        89083      17707   5.031 5.48e-07 ***
## factor(state)New Mexico       -40573      17826  -2.276 0.022992 *  
## factor(state)New York         273568      17695  15.461  < 2e-16 ***
## factor(state)North Carolina    24713      17703   1.396 0.162942    
## factor(state)North Dakota     -47733      17826  -2.678 0.007498 ** 
## factor(state)Ohio               7729      17831   0.433 0.664720    
## factor(state)Oklahoma         -28669      17827  -1.608 0.108011    
## factor(state)Oregon           -37723      17693  -2.132 0.033171 *  
## factor(state)Pennsylvania      31036      17828   1.741 0.081916 .  
## factor(state)Rhode Island     -34009      17715  -1.920 0.055089 .  
## factor(state)South Carolina    -1035      17833  -0.058 0.953744    
## factor(state)South Dakota     -45387      17829  -2.546 0.011008 *  
## factor(state)Tennessee         12180      17826   0.683 0.494573    
## factor(state)Texas            195074      17365  11.234  < 2e-16 ***
## factor(state)Utah             -20935      17593  -1.190 0.234257    
## factor(state)Vermont          -48885      17893  -2.732 0.006368 ** 
## factor(state)Virginia           9484      17835   0.532 0.594974    
## factor(state)Washington         1003      17192   0.058 0.953470    
## factor(state)West Virginia    -48636      17997  -2.702 0.006962 ** 
## factor(state)Wisconsin         -1503      17272  -0.087 0.930670    
## factor(state)Wyoming          -50667      17829  -2.842 0.004547 ** 
## factor(week)2020-01-26        -86364      77154  -1.119 0.263164    
## factor(week)2020-02-02        -63005      74656  -0.844 0.398845    
## factor(week)2020-02-09        -81689      73928  -1.105 0.269351    
## factor(week)2020-02-16        -68464      73377  -0.933 0.350953    
## factor(week)2020-02-23        -58332      72946  -0.800 0.424036    
## factor(week)2020-03-01        -62159      71303  -0.872 0.383489    
## factor(week)2020-03-08        -12585      70148  -0.179 0.857643    
## factor(week)2020-03-15         -6066      70060  -0.087 0.931016    
## factor(week)2020-03-22         -5266      70060  -0.075 0.940094    
## factor(week)2020-03-29         -2647      70060  -0.038 0.969869    
## factor(week)2020-04-05          1451      70061   0.021 0.983483    
## factor(week)2020-04-12          5590      70061   0.080 0.936414    
## factor(week)2020-04-19          9398      70061   0.134 0.893308    
## factor(week)2020-04-26         13656      70061   0.195 0.845486    
## factor(week)2020-05-03         13366      70101   0.191 0.848810    
## factor(week)2020-05-10         13360      70173   0.190 0.849033    
## factor(week)2020-05-17         20290      70093   0.289 0.772264    
## factor(week)2020-05-24         24822      70073   0.354 0.723218    
## factor(week)2020-05-31         29094      70064   0.415 0.678023    
## factor(week)2020-06-07         32454      70062   0.463 0.643278    
## factor(week)2020-06-14         34099      70073   0.487 0.626599    
## factor(week)2020-06-21         40060      70062   0.572 0.567563    
## factor(week)2020-06-28         45011      70069   0.642 0.520725    
## factor(week)2020-07-05         53427      70063   0.763 0.445850    
## factor(week)2020-07-12         61475      70067   0.877 0.380427    
## factor(week)2020-07-19         70792      70066   1.010 0.312495    
## factor(week)2020-07-26         81185      70060   1.159 0.246730    
## factor(week)2020-08-02         89061      70060   1.271 0.203860    
## factor(week)2020-08-09         96031      70062   1.371 0.170692    
## factor(week)2020-08-16        103009      70060   1.470 0.141699    
## factor(week)2020-08-23        109055      70060   1.557 0.119782    
## factor(week)2020-08-30        113742      70064   1.623 0.104718    
## factor(week)2020-09-06        116552      70083   1.663 0.096516 .  
## factor(week)2020-09-13        122379      70078   1.746 0.080965 .  
## factor(week)2020-09-20        120611      70270   1.716 0.086304 .  
## factor(week)2020-09-27        135928      70064   1.940 0.052567 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 68440 on 1459 degrees of freedom
## Multiple R-squared:  0.6307, Adjusted R-squared:  0.609 
## F-statistic: 28.98 on 86 and 1459 DF,  p-value: < 2.2e-16
library(plm)
plm(cases~hoaxsh+factor(week)+factor(state),statsbyweek,
    index=c("state","week"),
    model="within", 
    effect="twoways") %>% summary()
## Twoways effects Within Model
## 
## Call:
## plm(formula = cases ~ hoaxsh + factor(week) + factor(state), 
##     data = statsbyweek, effect = "twoways", model = "within", 
##     index = c("state", "week"))
## 
## Unbalanced Panel: n = 50, T = 29-37, N = 1546
## 
## Residuals:
##       Min.    1st Qu.     Median    3rd Qu.       Max. 
## -264367.42  -23040.53     592.79   22221.48  456331.70 
## 
## Coefficients:
##        Estimate Std. Error t-value Pr(>|t|)  
## hoaxsh   3788.3     1863.0  2.0334  0.04219 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Total Sum of Squares:    6.8535e+12
## Residual Sum of Squares: 6.8341e+12
## R-Squared:      0.0028259
## Adj. R-Squared: -0.055952
## F-statistic: 4.13474 on 1 and 1459 DF, p-value: 0.042192

Unit roots

obs=100
eps=rnorm(obs)
y99=eps
y100=eps
yM99=eps

for(i in 2:obs){
  
  y99[i] = y99[i-1] *.95 +eps[i]
  yM99[i]=-yM99[i-1]*.99+eps[i]
  y100[i]= y100[i-1]*1+eps[i]
  
}


sdf=data.frame(y99,y100,yM99,eps,period=1:obs) 

#library(latex2exp)
ggplot(sdf,aes(x=period))+geom_line(aes(y=y99,color="rho=0.99"))+
      geom_line(aes(y=y100,color="rho=1"))+
      geom_line(aes(y=yM99,color="rho=-.99"))+
     theme_minimal()+ylab("y")

  library(urca)
## Warning: package 'urca' was built under R version 4.0.2
  ur.df(df$cases,type="none",lags=1) %>% summary()
## 
## ############################################### 
## # Augmented Dickey-Fuller Test Unit Root Test # 
## ############################################### 
## 
## Test regression none 
## 
## 
## Call:
## lm(formula = z.diff ~ z.lag.1 - 1 + z.diff.lag)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.02151  0.00000  0.00000  0.00000  0.07106 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## z.lag.1    0.0004516  0.0006910   0.654    0.514    
## z.diff.lag 0.9805725  0.0133827  73.272   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.004085 on 667 degrees of freedom
## Multiple R-squared:  0.9468, Adjusted R-squared:  0.9467 
## F-statistic:  5938 on 2 and 667 DF,  p-value: < 2.2e-16
## 
## 
## Value of test-statistic is: 0.6536 
## 
## Critical values for test statistics: 
##       1pct  5pct 10pct
## tau1 -2.58 -1.95 -1.62
  ur.df(df$lnindex,type="none",lags=1) %>% summary()
## 
## ############################################### 
## # Augmented Dickey-Fuller Test Unit Root Test # 
## ############################################### 
## 
## Test regression none 
## 
## 
## Call:
## lm(formula = z.diff ~ z.lag.1 - 1 + z.diff.lag)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -8.343e-04 -2.643e-05  4.240e-06  4.131e-05  1.922e-04 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## z.lag.1    -2.155e-05  2.703e-05  -0.797    0.426    
## z.diff.lag  9.924e-01  5.596e-03 177.334   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.414e-05 on 667 degrees of freedom
## Multiple R-squared:  0.9812, Adjusted R-squared:  0.9811 
## F-statistic: 1.739e+04 on 2 and 667 DF,  p-value: < 2.2e-16
## 
## 
## Value of test-statistic is: -0.7971 
## 
## Critical values for test statistics: 
##       1pct  5pct 10pct
## tau1 -2.58 -1.95 -1.62
  ur.df(diff(df$cases,1),type="none",lags=1) %>% summary()
## 
## ############################################### 
## # Augmented Dickey-Fuller Test Unit Root Test # 
## ############################################### 
## 
## Test regression none 
## 
## 
## Call:
## lm(formula = z.diff ~ z.lag.1 - 1 + z.diff.lag)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.02330  0.00000  0.00000  0.00000  0.04392 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## z.lag.1    -0.036593   0.007388  -4.953 9.26e-07 ***
## z.diff.lag  0.604696   0.031607  19.132  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.003285 on 666 degrees of freedom
## Multiple R-squared:  0.3567, Adjusted R-squared:  0.3547 
## F-statistic: 184.6 on 2 and 666 DF,  p-value: < 2.2e-16
## 
## 
## Value of test-statistic is: -4.9534 
## 
## Critical values for test statistics: 
##       1pct  5pct 10pct
## tau1 -2.58 -1.95 -1.62
  ur.df(diff(df$lnindex,1),type="none",lags=1) %>% summary()
## 
## ############################################### 
## # Augmented Dickey-Fuller Test Unit Root Test # 
## ############################################### 
## 
## Test regression none 
## 
## 
## Call:
## lm(formula = z.diff ~ z.lag.1 - 1 + z.diff.lag)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -8.236e-04 -3.079e-05  3.980e-06  4.133e-05  1.963e-04 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## z.lag.1    -0.010977   0.005233  -2.098   0.0363 *  
## z.diff.lag  0.195464   0.038082   5.133 3.75e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.28e-05 on 666 degrees of freedom
## Multiple R-squared:  0.04215,    Adjusted R-squared:  0.03927 
## F-statistic: 14.65 on 2 and 666 DF,  p-value: 5.92e-07
## 
## 
## Value of test-statistic is: -2.0976 
## 
## Critical values for test statistics: 
##       1pct  5pct 10pct
## tau1 -2.58 -1.95 -1.62
df=df %>% arrange(week) %>% mutate(Dlnindex=lnindex-dplyr::lag(lnindex),
                                     Dcases=cases-dplyr::lag(cases) ,
                                     DDlnindex=Dlnindex-dplyr::lag(Dlnindex),Dlockshare=lockshare-dplyr::lag(lockshare))
  
  
lm(Dlnindex~Dcases+t,df) %>% summary()  
## 
## Call:
## lm(formula = Dlnindex ~ Dcases + t, data = df)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -1.107e-03 -9.941e-05  4.439e-05  1.487e-04  1.041e-03 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.429e-04  2.415e-05   5.918 5.20e-09 ***
## Dcases      -2.316e-02  7.258e-04 -31.914  < 2e-16 ***
## t            5.269e-07  6.490e-08   8.119 2.28e-15 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.000305 on 667 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.6065, Adjusted R-squared:  0.6053 
## F-statistic: 513.9 on 2 and 667 DF,  p-value: < 2.2e-16
lm(Dlnindex~Dcases+t+Dlockshare,df) %>% summary()
## 
## Call:
## lm(formula = Dlnindex ~ Dcases + t + Dlockshare, data = df)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0011149 -0.0001001  0.0000414  0.0001472  0.0010273 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.401e-04  2.404e-05   5.831 8.61e-09 ***
## Dcases      -2.311e-02  7.221e-04 -32.010  < 2e-16 ***
## t            5.400e-07  6.471e-08   8.345 4.10e-16 ***
## Dlockshare  -1.253e-05  4.354e-06  -2.878  0.00412 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0003034 on 666 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.6113, Adjusted R-squared:  0.6095 
## F-statistic: 349.1 on 3 and 666 DF,  p-value: < 2.2e-16
lm(Dlnindex~Dcases+t+Dlockshare,df) %>% summary()
## 
## Call:
## lm(formula = Dlnindex ~ Dcases + t + Dlockshare, data = df)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0011149 -0.0001001  0.0000414  0.0001472  0.0010273 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.401e-04  2.404e-05   5.831 8.61e-09 ***
## Dcases      -2.311e-02  7.221e-04 -32.010  < 2e-16 ***
## t            5.400e-07  6.471e-08   8.345 4.10e-16 ***
## Dlockshare  -1.253e-05  4.354e-06  -2.878  0.00412 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0003034 on 666 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.6113, Adjusted R-squared:  0.6095 
## F-statistic: 349.1 on 3 and 666 DF,  p-value: < 2.2e-16
lm(Dlnindex~dplyr::lag(Dlnindex)+Dcases+t+Dlockshare,df) %>% summary()  
## 
## Call:
## lm(formula = Dlnindex ~ dplyr::lag(Dlnindex) + Dcases + t + Dlockshare, 
##     data = df)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -4.875e-04 -3.296e-05 -7.900e-07  3.513e-05  1.848e-04 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           2.998e-06  5.098e-06   0.588  0.55673    
## dplyr::lag(Dlnindex)  9.557e-01  7.810e-03 122.365  < 2e-16 ***
## Dcases               -1.286e-03  2.325e-04  -5.530  4.6e-08 ***
## t                     3.859e-08  1.400e-08   2.756  0.00602 ** 
## Dlockshare           -1.382e-05  8.985e-07 -15.377  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6.26e-05 on 664 degrees of freedom
##   (2 observations deleted due to missingness)
## Multiple R-squared:  0.9835, Adjusted R-squared:  0.9834 
## F-statistic:  9894 on 4 and 664 DF,  p-value: < 2.2e-16
lm(Dlnindex~dplyr::lag(Dlnindex)+dplyr::lag(Dlnindex,2)+Dcases+dplyr::lag(Dcases)+dplyr::lag(Dcases,2)+t+Dlockshare+dplyr::lag(Dlockshare)+dplyr::lag(Dlockshare,2),df) %>% summary()  
## 
## Call:
## lm(formula = Dlnindex ~ dplyr::lag(Dlnindex) + dplyr::lag(Dlnindex, 
##     2) + Dcases + dplyr::lag(Dcases) + dplyr::lag(Dcases, 2) + 
##     t + Dlockshare + dplyr::lag(Dlockshare) + dplyr::lag(Dlockshare, 
##     2), data = df)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -2.264e-04 -3.238e-05 -2.604e-06  3.437e-05  1.893e-04 
## 
## Coefficients:
##                             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                1.374e-06  4.565e-06   0.301   0.7634    
## dplyr::lag(Dlnindex)       7.911e-01  3.793e-02  20.855  < 2e-16 ***
## dplyr::lag(Dlnindex, 2)    1.854e-01  3.743e-02   4.954 9.27e-07 ***
## Dcases                    -7.495e-04  1.100e-03  -0.681   0.4960    
## dplyr::lag(Dcases)        -3.192e-03  1.444e-03  -2.210   0.0275 *  
## dplyr::lag(Dcases, 2)      3.703e-03  7.344e-04   5.043 5.94e-07 ***
## t                          2.186e-08  1.270e-08   1.721   0.0857 .  
## Dlockshare                -1.036e-05  8.940e-07 -11.586  < 2e-16 ***
## dplyr::lag(Dlockshare)    -9.179e-06  1.167e-06  -7.867 1.49e-14 ***
## dplyr::lag(Dlockshare, 2) -3.171e-06  1.449e-06  -2.189   0.0290 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.585e-05 on 658 degrees of freedom
##   (3 observations deleted due to missingness)
## Multiple R-squared:  0.987,  Adjusted R-squared:  0.9868 
## F-statistic:  5546 on 9 and 658 DF,  p-value: < 2.2e-16
# Time Series

#< load libraries
  library(foreign)
  

#< GDP Japan
  library(haven)
  gdpjp <-read_dta("data/gdp_JP_etc.dta")
  library(zoo)
  library(DataCombine)
  library(quantmod)
  library(tseries)
  
  
  gdpjp["L1lngdp"]=Lag(gdpjp$lngdp,1)
  
  
  gdpjp["L1lngdp"]=Lag(gdpjp$lngdp,1)
  
  summary(lm(lngdp~L1lngdp ,gdpjp))
  summary(arma(gdpjp$lngdp, order = c(1, 0)))
  
  summary(arma(gdpjp$lngdp, order = c(2, 0)))
  
  
  
  summary(lm(lngdp~L1lngdp+time ,gdpjp))
  
  gdpjp["Dlngdp"]<-c(NA,diff(gdpjp$lngdp, differences=1))
#>
  
#< Dickey Fuller
  library(urca)
  
  df=ur.df(gdpjp$lngdp,type="trend",lags=0)
  summary(df)
 
  summary(ur.df(diff(gdpjp$lngdp,1),type="trend",lags=0))
  
  summary(ur.df(diff(gdpjp$lngdp,1),type="trend",lags=1))
  
  
  summary(ur.df(gdpjp$lngdp,type="trend",lags=1))
  summary(ur.df(gdpjp$lngdp,type="trend",lags=5))
  summary(ur.df(gdpjp$lngdp,type="trend",lags=3))
  
  summary(ur.df(diff(gdpjp$lngdp,1),type="trend",lags=3))
  
  

#>
#< Oragne juice
  
  oj <-read_dta("data/oj.dta")  
  oj = na.omit(oj)
  oj["lnp"]=log(oj$ppioj/oj$pwfsa *100)
  
  
  summary(ur.df(oj$fdd,type="trend",lags=12))
  
  summary(ur.df(oj$lnp,type="trend",lags=12))
  summary(ur.df(diff(oj$lnp,1),type="trend",lags=12))

  
  summary(lm(diff(oj$lnp,1)~oj$fdd[-1]))
  
  
  
  ojm=lm(diff(oj$lnp)~oj$fdd[-1])
  library(sandwich)
  library(lmtest)
  coeftest(ojm, vcov. = NeweyWest)
  
#>