Dice

How likely is it to throw repeated sixes with a fair dice? Let’s use a loop to work it out

for(i in 1:5){
  p=as.character(format( round(1/6^i,4),scientific=F))
    
  cat(paste0("$$Prob\\{", i,"\\textrm{ sixes in a row}\\}=",p,"$$\n"))
  # Cat is like print but better suited for use in markdown where mingle
  # other stuff into the output such as latex
}

\[Prob\{1\textrm{ sixes in a row}\}=0.1667\] \[Prob\{2\textrm{ sixes in a row}\}=0.0278\] \[Prob\{3\textrm{ sixes in a row}\}=0.0046\] \[Prob\{4\textrm{ sixes in a row}\}=0.0008\] \[Prob\{5\textrm{ sixes in a row}\}=0.0001\]

When should we start taking the slope of a line serious?

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
ff=read.csv("https://www.dropbox.com/s/g1w75gkw7g91zef/foreigners.csv?dl=1")  
ff=ff%>%mutate(crimesPc=crimes11/pop11)%>% filter( crimesPc<10)  # We are dropping some outliers

ggplot(ff, aes(y=crimesPc,x=b_migr11)) + geom_point()   +   # Make sure + is on this line so R understands the command is not finished
                   xlab("Share of foreign born in %")   +  
                   ylab("Number of Crimes per capita")  + 
                   geom_smooth(method = "lm", se = FALSE) +
                   theme_minimal()
## `geom_smooth()` using formula 'y ~ x'

Recall the regression command:

df=ff
reg1=lm(crimesPc~b_migr11,df) 
reg1 %>%  summary()
## 
## Call:
## lm(formula = crimesPc ~ b_migr11, data = df)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.13314 -0.33959 -0.06763  0.22302  2.92572 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 1.091273   0.045146   24.17  < 2e-16 ***
## b_migr11    0.025164   0.002922    8.61 3.33e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5482 on 321 degrees of freedom
## Multiple R-squared:  0.1876, Adjusted R-squared:  0.1851 
## F-statistic: 74.14 on 1 and 321 DF,  p-value: 3.325e-16

Monte Carlo

Let’s create the data ourselves so we know what drives it.

  obs <- 100
  x <- 0.5 + runif(obs)*2.5
  sig=sqrt(5.5)*2
  eps <- rnorm(obs,0,sig)
  y <- 2 + x * 0 + eps
  
  df=data.frame(x,y)
  
  ggplot(df, aes(x, y))+geom_point(color="blue") +theme_minimal()

Now let’s run a regression

  ggplot(df, aes(x, y))+geom_point(color="blue") +theme_minimal()+geom_smooth(method="lm",se=F)
## `geom_smooth()` using formula 'y ~ x'

  monte1 <- lm(y ~ x , data = df)
  
  summary(monte1)
## 
## Call:
## lm(formula = y ~ x, data = df)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -11.0668  -3.7905   0.4592   3.7530  11.6931 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept)   3.1054     1.3004   2.388   0.0189 *
## x            -0.6835     0.6788  -1.007   0.3165  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.157 on 98 degrees of freedom
## Multiple R-squared:  0.01024,    Adjusted R-squared:  0.0001396 
## F-statistic: 1.014 on 1 and 98 DF,  p-value: 0.3165

Now let’s repeat that many times

  df=df %>% mutate(round=0)
  b2<-numeric()
  
  sig=sqrt(5.5)
  
  for(i in 1:5) {
    
    x <- 0.5 + runif(obs)*2.5  
    
    eps <- rnorm(obs,0,sig)
    y <- 2 + x * 0 + eps
    
    monte2 <- lm(y ~ x )
    b2[i]=monte2$coefficients[2]
    
    
    bx   =monte2$coefficients[2] 
    
    dfnew=data.frame(x,y,round=i)
    df=bind_rows(df,dfnew)
  }
  
  
  ggplot(df, aes(x, y,color=factor(round)))+geom_point() +theme_minimal()+geom_smooth(method="lm",se=F)
## `geom_smooth()` using formula 'y ~ x'

Really a lot of times..

  df=df %>% mutate(round=0)
  b2<-numeric()
  
  sig=sqrt(5.5)
  
  for(i in 1:1000) {
    
    x <- 0.5 + runif(obs)*2.5  
    
    eps <- rnorm(obs,0,sig)
    y <- 2 + x * 0 + eps
    
    monte2 <- lm(y ~ x )
    b2[i]=monte2$coefficients[2]
    
    
    bx   =monte2$coefficients[2] 
    
    dfnew=data.frame(x,y,round=i)
    df=bind_rows(df,dfnew)
  }  
  
#b2[i]=monte2$coefficients[2]  
bx
##         x 
## -0.658715
b2  
##    [1]  0.5125125535 -0.4903282534  0.6131301041  0.0513504576  0.1756606379
##    [6] -0.2889058407  0.2108780613 -0.3876234556  0.3648135409 -0.1279350238
##   [11] -0.2987526729 -0.3629004567  0.0304071374  0.7754674919  0.4383181006
##   [16]  0.1193775263 -0.0836904715  0.2836652677 -0.0612198918  0.2080954214
##   [21]  0.5753571657  0.5326006169  0.4300876151  0.0231140462 -0.3070010776
##   [26]  0.0667744671  0.3122312032 -0.2833740940  0.1692914070  0.0403755467
##   [31] -0.1166663777 -0.0255296127 -0.5839193761  0.2217231802  0.0468249992
##   [36]  0.1889190752  0.3282827200 -0.2324061283  0.2722599521  0.7837780943
##   [41]  0.2612028245  0.2041206925 -0.2529668848  0.2539387692 -0.0063592446
##   [46]  0.2867181952 -0.0741754237  0.0774829548 -0.1556304684  0.1924600431
##   [51] -0.3539535957 -0.4852497192 -0.1728834720  0.1136479487 -0.1363411934
##   [56]  0.1698033169  0.1962351836  0.2519595258  0.2547569085 -0.1395332741
##   [61] -0.2644031128 -0.3538159561  0.1565058973  0.5122697107 -0.0658971267
##   [66] -0.2587186227 -0.1428779235 -0.2732185344 -0.0571724417  0.2361973348
##   [71] -0.2549275382 -0.0034193258 -0.1863742490  0.3224077009  0.1073326600
##   [76]  0.5110766449 -0.4089653849  0.3646424738  0.3330244847  1.0484181332
##   [81] -0.1125188719 -0.4231741738 -0.1058808851 -0.1868951665  0.0930821520
##   [86] -0.0885947539 -0.0336988809  0.2743858036  0.0673018938 -0.5503315152
##   [91] -0.1955555157 -0.2981461263 -0.5171252151  0.2735226411 -0.2101421104
##   [96]  0.3315925184 -0.6521010726  0.0630814561  0.2401682360  0.2751239605
##  [101] -0.2524570425  0.1226921242 -0.2002586912 -0.2215022139 -0.6654161760
##  [106] -0.4037218228  0.1146034139 -0.0242543483  0.2696374625  0.3097770674
##  [111]  0.5242935923  0.1488118820  0.1883263688  0.1714785626  0.2873515855
##  [116] -0.1810392853 -0.5308145955  0.2443160973  0.4706206775 -0.2218155180
##  [121]  0.7639111248  0.5486077726 -0.5425117847 -0.3330294677 -0.1188089665
##  [126] -0.1283391480 -0.3421217264  0.1823038380  0.4991009265  0.5431626249
##  [131]  0.0166241042  0.2980321732  0.3106137345 -0.3711570311 -0.2655330580
##  [136] -0.3439418253 -0.0518495782  0.0719127469 -0.1914686748  0.3016699580
##  [141]  0.3938144291 -0.0373554326 -0.1212459972  0.0582452201 -0.5568480057
##  [146]  0.1315538240  0.3532823889  0.2736240484 -0.0254015027 -0.2700578891
##  [151] -0.2209058531  0.0124529545  0.1505179503  0.2308504543 -0.5351961651
##  [156]  0.1710611772 -0.4873758289 -0.1508016162 -0.3434406794 -0.1490197655
##  [161]  0.2926450989 -0.1068771272  0.0638967780 -0.0027937942  0.0479235626
##  [166]  0.0823055991  0.6796403824  0.1131588852 -0.3506824580  0.0010357244
##  [171] -0.1207037528  0.2879707649  0.3581087277  0.6943246293  0.5644940496
##  [176] -0.3110486816 -0.1732154744  0.0187619463 -0.0606242731 -0.2311644959
##  [181] -0.2001808259 -0.0707253677 -0.4204488729  0.0316089114 -0.4246540831
##  [186]  0.1371836233 -0.1414315458  0.2028002219  0.2515239653  0.1368724409
##  [191]  0.2452234699  0.1300195315  0.0604799675  0.4896409936  0.8771850287
##  [196] -0.1027184050  0.6120751223  0.8805460541 -0.2550677839  0.3046379568
##  [201]  0.0720225735  0.2012741221 -0.0195798004 -0.0540759568 -0.1758090920
##  [206]  0.1471251053 -0.2218998412  0.4367406969  0.0522429978  0.1321735756
##  [211] -0.1362567607  0.1782215114  0.3788567464 -0.5256091542 -0.2088125557
##  [216]  0.3068083139 -0.1156899697 -0.0196028134 -0.0297045421 -0.0697922358
##  [221]  0.1584611387 -0.3019718282 -0.1873038722 -0.1788012777 -0.2446594325
##  [226]  0.1846856314 -0.1083932029 -0.3741299355  0.4678490018 -0.6960547321
##  [231]  0.4467444487  0.2461268185  0.2874563932 -0.2786218561  0.0856827135
##  [236]  0.2236558236 -0.0681298080  0.1982496275  0.2159842744 -0.2444415217
##  [241]  0.0168324562  0.4842641096 -0.0879031466 -0.4963183999 -0.3254967348
##  [246]  0.2198464158 -0.5767754495  0.1739284427  0.3474983930 -0.1646602851
##  [251] -0.3529166695  0.3814224146 -0.0184612848 -0.1741783782 -0.1729244327
##  [256]  0.0395755772 -0.3169984156  0.3729489486 -0.5345519298 -0.1001000624
##  [261] -0.1991218929  0.6323609319 -0.1540139101  0.2684515845  0.0456815938
##  [266] -0.0428885562 -0.0626184812 -0.6187948368 -0.4415592560 -0.3984919508
##  [271]  0.2136885640 -0.0038885002  0.2060374791 -0.5364552901  0.0948541574
##  [276]  0.1100881367  0.3291529757  0.0769835064 -0.0580033916  0.9337607723
##  [281]  0.7591887320  0.1266101219 -0.0507295381  0.5978535218  0.1151136271
##  [286]  0.3258742646  0.5072858494 -0.4630156451 -0.0047019530  0.1923801599
##  [291]  0.0382104098  0.1200347048 -0.1747442003  0.0426706611  0.3417268791
##  [296] -0.3541121120 -0.0605373838  0.2923292995 -0.0521323324 -0.3140123473
##  [301]  0.0146912809  0.4176042022 -0.5023368464 -0.5030603667  0.2487245857
##  [306]  0.4815392594 -0.2215088505  0.3046700594  0.4208565010  0.0007312618
##  [311]  0.1666252391  0.4009231214 -0.1184292156  0.0228785406  0.4062353934
##  [316] -0.0180568307  0.0048567895  0.5171096480  0.1419999130  0.2413869046
##  [321]  0.4417554982  0.1926781596 -0.0485159985  0.2147412588  0.1838995699
##  [326] -0.4323810602  0.3401947233 -0.2378380960  0.0041486480 -0.1895806395
##  [331] -0.0127141603  0.0481835904  0.1460890942 -0.1719269857  0.1034670507
##  [336] -0.5672392973 -0.1935066034  0.0784366685 -0.0308786356 -0.0008234514
##  [341] -0.3978363879 -0.3541736207 -0.5746227999  0.2017309678 -0.3672599388
##  [346]  0.1369178814 -0.1980884497 -0.4597833602 -0.0900561672  0.6930586157
##  [351]  0.4913973760 -0.2114215690 -0.2363735405 -0.0709355960 -0.3307852918
##  [356]  0.0766560433 -0.4469443737  0.2196967413  0.0317241379  0.2670186099
##  [361]  0.0108039935 -0.3529497942  0.2005223203  0.1029566789 -0.0395230386
##  [366] -0.0715077540 -0.1772387473  0.6872266716  0.0346316935  0.3812032316
##  [371]  0.6650767647  0.0327817753 -0.5058414351 -0.0826853789  0.1450820719
##  [376] -0.3791454563  0.5333640580 -0.0643347834  0.7308097047  0.1216099920
##  [381]  0.0991477350  0.0911615515 -0.0825946488 -0.1144324420 -0.0638728853
##  [386]  0.2773669450 -0.1338829888  0.1180854329 -0.2906047984  0.2127597974
##  [391] -0.3738162300 -0.1676814079 -0.2079402954  0.1976624782 -0.0146168934
##  [396]  0.3962667161  0.0341399908  0.1456548555  0.0405810967 -0.4259176372
##  [401] -0.0399901747  0.0209184195  0.0597161098 -0.0107387963  0.0039197489
##  [406] -0.0646444346 -0.3936737976 -0.4958077901 -0.3005793270 -0.3849078473
##  [411] -0.4994883444 -0.0494362715 -0.0695766438  0.5903683110 -0.3081415390
##  [416]  0.1051045238 -0.3798222134 -0.0445383609 -0.0451449828  0.0988238216
##  [421] -0.7473913412 -0.3135864569  0.1597318291 -0.3599924447 -0.2615285934
##  [426] -0.2152279440 -0.4037198701  0.0165444824 -0.2014111573 -0.0660790934
##  [431]  0.3269292713  0.0890463396 -0.1841389784  0.1949391539  0.1966222388
##  [436] -0.5324032252 -0.0572380439  0.5658208850  0.0057110091 -0.2789030268
##  [441] -0.9133757550  0.1176436092 -0.2324490506  0.0699059195  0.0547921665
##  [446] -0.0795177340  0.3903154586 -0.0299899927 -0.0433430410  0.0373275326
##  [451] -0.1373119714  0.1908178973  0.0625644446  0.1824811438 -0.1509931550
##  [456]  0.1032925094  0.2510171889 -0.1060600363  0.0766007165  0.5200002300
##  [461]  0.3357575809 -0.2162400377  0.0099457772 -0.3929839012  0.3074960711
##  [466]  0.1888569078 -0.1775569439 -0.2541762807 -0.4104967776  0.3689297733
##  [471] -0.2793204832 -0.1665630185  0.0297214243  0.0604086326  0.1353021970
##  [476]  0.5602449227 -0.2839962513  0.0794403654  0.3645162639 -0.3856970352
##  [481] -0.2544814749 -0.1549553191 -0.4885976761  0.2327626622 -0.2327087475
##  [486]  0.2596641564 -0.0541431618  0.6878986430  0.5360457818 -0.3650338456
##  [491]  0.2679570051 -0.1344480042 -0.0078300870 -0.1496940250  0.0932702638
##  [496] -0.0223015617  0.0558096835  0.3677232517 -0.7004134651  0.0085103135
##  [501]  0.3735924551  0.0537549577  0.2898412984  0.2707050314 -0.6049203012
##  [506]  0.1610302222 -0.2168138503 -0.1709768351 -0.0320659341  0.4294110033
##  [511] -0.3327640959  0.2990754222  0.3787829950  0.6274057103 -0.3909282609
##  [516]  0.5894441722  0.0491305766  0.1206627284  0.4186333854 -0.2601016038
##  [521]  0.2635517720 -0.0284555376  0.0211771778  0.2517920959 -0.1151804172
##  [526]  0.1662173170  0.5942605691 -0.2427928213  0.0160753798  0.2115721485
##  [531]  0.0308491495  0.7659506268  0.5692714627  0.1654819082  0.1429607538
##  [536]  0.2546501256 -0.0948071332  0.4902045102 -0.2669786170 -0.1876128748
##  [541]  0.3575797887  0.2792051841 -0.3306765208  0.2599031717 -0.4270380374
##  [546] -0.6605614252 -0.0055960113  0.2270128867 -0.4784522514 -0.3877764764
##  [551]  0.1917911071 -0.1810080419 -0.5029314931 -0.0103748998 -0.0031837416
##  [556]  0.1689643654 -0.2547943240 -0.1917847666  0.1141237952  0.5505358710
##  [561] -0.2983681049  0.5264641704 -0.3756400551 -0.1930873538  0.5540879667
##  [566] -0.0667307861  0.2815717245  0.1365700759 -0.2583325752 -0.0207024852
##  [571] -0.1482807083 -0.1565624119 -0.0951888134  0.2475107507 -0.2889834999
##  [576] -0.5929532241  0.5884042194  0.4130188648 -0.1572251132 -0.6749379361
##  [581] -0.1861976065  0.1302755674 -0.2998305479  0.0396031915  0.3526488476
##  [586] -0.2026499917  0.5798093836 -0.0412583079  0.0542510582 -0.0571454047
##  [591] -0.2600165015  0.3067723752  0.3341562793  0.0420585364 -0.5411171992
##  [596]  0.1978994709 -0.1572460324 -0.2559694471  0.0165162253  0.1165684384
##  [601] -0.4889250406 -0.0498436598  0.1355217150  0.0739036602  0.6374603348
##  [606]  0.3449447040 -0.0823194491 -0.1244895913 -0.4316294549  0.0370220097
##  [611] -0.2602475437  0.1383217867 -0.0167711553 -0.1397531056  0.2045103834
##  [616] -0.0701120267 -0.3536578250 -0.8059397209 -0.5242868953 -0.1383647613
##  [621] -0.3108453750 -0.2521988964 -0.3255397884  0.0175411387 -0.1599474513
##  [626] -0.0714545044  0.2456286594 -0.0553338169  0.3543568087  0.2320908956
##  [631]  0.0391510438  0.0264789055  0.0993902070  0.1908371494 -0.0853492248
##  [636] -0.0042906255  0.0824839801  0.1206504694  0.0995020642 -0.5438823105
##  [641] -0.4034953529  0.0395118357 -0.1546555566 -0.0444776077 -0.2714941796
##  [646] -0.0172480139 -0.1697605456  0.1158199952 -0.1230362494 -0.1230415390
##  [651]  0.0105886633  0.2873873465 -0.3471942600  0.8189647263 -0.7050031641
##  [656] -0.4409068928  0.6728684089  0.3066532798  0.2854584024 -0.4062073076
##  [661] -0.1283715988 -0.1339344022  0.1841236487 -0.2138212808  0.0035689803
##  [666]  0.0123736772  0.8030133706 -0.1582564457  0.0916786759 -0.1382436710
##  [671] -0.0496996242  0.0555772077  0.2536697880 -0.6801163415 -0.2087910421
##  [676] -0.7295143750  0.0529086627 -0.4035707442 -0.1916771467  0.6664923634
##  [681]  0.0583558820  0.2813126694  0.3636856777 -0.7401511485 -0.0390078772
##  [686]  0.4519538281  0.2289351389  0.0943299619 -0.0059913031  0.0384673723
##  [691]  0.0745371649 -0.5580829268  0.6843951465 -0.2146507618 -0.3227807141
##  [696] -0.1693612795  0.3337623868  0.2338763565 -0.0670247427  0.2280639222
##  [701]  0.4048169661 -1.1206487667  0.0258509398 -0.3214489475 -0.1085943372
##  [706]  0.4585896973 -0.1023336184  0.2519635088  0.2136116467 -0.1608276801
##  [711]  0.0292525165 -0.6981304060  0.2543215354  0.2919859883  0.4579647284
##  [716]  0.1639959610 -0.3163085461  0.0676844535  0.2836543813  0.2678403731
##  [721] -0.6722153486 -0.6373947300 -0.3073520322 -0.2877806806  0.3382380947
##  [726] -0.2019625230  0.0547818198 -0.0746982726  0.9647439708  0.4741465025
##  [731]  0.5428850877 -0.0960850528  0.2900508724 -0.1758484133 -0.4453305081
##  [736] -0.3199172978  0.1890621984 -0.1592934882  0.0624147232  0.2429472762
##  [741]  0.0144360207 -0.2676704227 -0.0700720166 -0.1954848778 -0.3053337495
##  [746]  0.7970598351 -0.2499958088 -0.5414003551 -0.2599041976 -0.1971206708
##  [751] -0.5395566044  0.1627075116  0.1288198507  0.1537359626 -0.0875977965
##  [756] -0.0743346364 -0.2824604305  0.4897721234  0.1834554484  0.1992750477
##  [761]  0.1881397134  0.2720779795  0.2120804596  0.5794107123  0.4818378727
##  [766]  0.2074690930  0.1993413547 -0.3938056883  0.1936907645  0.4963077767
##  [771] -0.1976493291 -0.0943487679  0.1462503880  0.2585373942  0.0158206945
##  [776] -0.2202128715 -0.2071011964 -0.1699588702 -0.3330559316 -0.3533250298
##  [781]  0.2388668876  0.1204056157 -0.4231124435 -0.2277864135  0.4162733695
##  [786]  0.2156120562 -0.3500395335  0.3815228649  0.1137898664  0.6356150262
##  [791] -0.0133762463  0.2947320363 -0.2658162034  0.2613381414 -0.1319009032
##  [796] -0.3138901360 -0.1044504576 -0.4759072010 -0.1127981654 -0.0411561939
##  [801] -0.0738838027  0.0424043013 -0.3568333487  0.4697220431 -0.2249206643
##  [806] -0.2418809705 -0.2756300997 -0.2048867187 -0.0182218494  0.4149756852
##  [811]  0.4800930514 -0.1736412347 -0.1142536223  0.6569910597 -0.3455053448
##  [816] -0.0925431680  0.1295063433  0.1940262208  0.0040177755 -0.4216422255
##  [821] -0.0813593796 -0.8757119119 -0.4187238679  0.1472032743  0.0467018830
##  [826] -0.2168492014 -0.0459188600  0.1801723210  0.2112624682  0.0520516283
##  [831] -0.1506958070 -0.0961191437  0.0556026670  0.4376023235  0.0152141461
##  [836]  0.0931756111  0.3970503260 -0.1648662266 -0.3025839625  0.0889931874
##  [841]  0.0442479852  0.5453605220  0.1072302878 -0.1820477562 -0.0840405158
##  [846]  0.1568455553  0.5102958787  0.2034709435 -0.1714342504 -0.3051902786
##  [851]  0.8058369917 -0.1372510714 -0.5248268998  0.2559593583  0.1293933203
##  [856] -0.3751495224 -0.2286445187 -0.2845206462  0.2250087857  0.0062184129
##  [861]  0.0164747357  0.5799364449 -0.1619111468 -0.1241593466  0.1709429346
##  [866]  0.0595315970  0.2816193160  0.4841069233 -0.5101683794  0.0162787651
##  [871] -0.0867756213 -0.3099886670  0.2910108222  0.1421910744  0.1580736004
##  [876] -0.4953032294  1.1801270631  0.9722992451 -0.0201874643 -0.5665074281
##  [881]  0.2344181506  0.1957777936 -0.2780500942 -0.0465433442  0.1487553610
##  [886]  0.3683523298  0.2392441715 -0.1460899988  0.4032903168  0.4203095467
##  [891] -1.0239615091 -0.2517127758 -0.3272646240 -0.5775309919  0.6143613021
##  [896]  0.2115242465  0.4659418528  0.0660098415 -0.4180899638  0.0903014217
##  [901]  0.5075432527 -0.1283666101 -0.8513289895 -0.3893146624 -0.1322581068
##  [906] -0.6082031567  0.2597862881  0.1030985659  0.5833320741  0.6520196419
##  [911] -0.0017301204  0.4200144421 -0.0389673824  0.0765807390 -0.3148182956
##  [916] -0.2785163777 -0.2465998001 -0.6588473921 -0.5836079772 -0.1740145230
##  [921]  0.1507780238  0.2561369987  0.2860960078  0.4783726039 -0.5564287132
##  [926]  0.2410473875 -0.0651116085 -0.1341940974 -0.1356803293  0.0528004582
##  [931] -0.2049817052  0.2569218935 -0.0909037585  0.5119871270 -0.0036765677
##  [936] -0.2050357074 -0.0842522728 -0.3158909333 -0.3060636404  0.0535904961
##  [941] -0.0141090625 -0.2098854611 -0.3491690490  0.0765522346  0.1142362903
##  [946]  0.3512620595 -0.1390796808 -0.4260708523  0.1959371299  0.0090846141
##  [951]  0.0451456831 -0.4654357830  0.1445800846  0.6270892773 -0.1557798377
##  [956] -0.2206923971 -0.2355447390 -0.0019706689  0.3405474334  0.3743752856
##  [961]  0.0948239295 -0.1471410963 -0.0452962812  0.0113767339 -0.1729185905
##  [966]  0.1895978593  0.0797603235  0.1772980494 -0.1805099793 -0.7060312299
##  [971]  0.2355640922  0.5351392850  0.0690346920  0.0205189358 -0.3934211331
##  [976] -0.1729706542 -0.2093946813  0.1376660964 -0.3014734990  0.1241434569
##  [981]  0.0436298265  0.2726733775  0.0161308215 -0.6401381211 -0.0640225231
##  [986]  0.0655733091  0.3701602756 -0.3882831145  0.3074154724 -0.1590179632
##  [991]  0.2056599995 -0.1055995126 -0.0624169215 -0.8866231428  0.6240239464
##  [996] -0.1868791033  0.0623372551 -0.0044673664  0.1312305416 -0.6587150449
b2[10000]
## [1] NA
bx
##         x 
## -0.658715
mean(b2)
## [1] 0.008514473

Let’s look at the distribution

  library(latex2exp)
  # Compute the standard errors of the estimates
   b2_sig         =sqrt(sig^2/(obs* var(x)) )

 
  hist(b2,30,main=TeX("Distribution of $\\beta_1$"))

  hist(b2,30,prob=TRUE,main=TeX("Distribution of $\\beta_1$"))

  hist(b2,100,prob=TRUE,main=TeX("Distribution of $\\beta_1$"))
  db2=density(b2)
  lines(db2,col="blue")
  
  curve(dnorm(x,0,b2_sig),add=TRUE, col="orange")

Now let’s look what happens if we have - less observations - more variation in X - more variation in epsilon - non normal epsilon - non normal epsilon less observations

  b2_less_obs<-numeric()
  b2_more_obs<-numeric()
  b2_more_var_x<-numeric()
  b2_more_var_eps<-numeric()
  b2_non_normal_eps<-numeric()
  b2_non_normal_eps_less<-numeric()
  b2_non_normal_eps_more<-numeric()
  
  
  less_obs=10
  
  more_obs=750
  
  
  

  
  for(i in 1:1000) {

    x            <- 0.5 + runif(obs)     *2.5
    x_less_obs   <- 0.5 + runif(less_obs)*2.5
    x_more_var_x <- 0.5 + runif(obs)     *5
    x_more_obs   <- 0.5 + runif(more_obs)*2.5

    

    eps                = rnorm(obs,0,sig)
    eps_less_obs       = rnorm(less_obs,0,sig)
    eps_more_obs       = rnorm(more_obs,0,sig)
    
    eps_more_var_eps   = rnorm(obs,0,sig*4)
    
    
    uni=runif(obs)
    eps_non_normal_eps = -1*(uni<0.5) + 10*(uni>=.95)
      

    uni=runif(more_obs)
    eps_non_normal_eps_more = -1*(uni<0.5) + 10*(uni>=.95)
    
    
    beta1=0 
    
    y                     = 2 + x            * beta1 + eps
    y_less_obs            = 2 + x_less_obs   * beta1 + eps_less_obs
    y_more_obs            = 2 + x_more_obs   * beta1 + eps_more_obs
    
    
    y_more_var_x          = 2 + x_more_var_x * beta1 + eps
    y_more_var_eps        = 2 + x            * beta1 + eps_more_var_eps
    
    y_non_normal_eps      = 2 + x            * beta1 + eps_non_normal_eps
    
    
    y_non_normal_eps_less = 2 + x_less_obs   * beta1 + eps_non_normal_eps[1:less_obs]
    y_non_normal_eps_more = 2 + x_more_obs   * beta1 + eps_non_normal_eps_more
    
    
    
    
    monte2 <- lm(y_less_obs ~ x_less_obs )
    b2_less_obs[i]=monte2$coefficients[2]
    
    monte2 <- lm(y_more_obs ~ x_more_obs )
    b2_more_obs[i]=monte2$coefficients[2]

        
    monte2 <- lm(y_more_var_x ~ x_more_var_x )
    b2_more_var_x[i]=monte2$coefficients[2]

            
    monte2 <- lm(y_more_var_eps ~ x )
    b2_more_var_eps[i]=monte2$coefficients[2]
    
    
    monte2 <- lm(y_non_normal_eps ~ x )
    b2_non_normal_eps[i]=monte2$coefficients[2]
    

    monte2 <- lm(y_non_normal_eps_less ~ x_less_obs )
    b2_non_normal_eps_less[i]=monte2$coefficients[2]

    monte2 <- lm(y_non_normal_eps_more ~ x_more_obs )
    b2_non_normal_eps_more[i]=monte2$coefficients[2]
        
    
  }

Let’s look at this

   # Compute the standard errors of the estimates
     b2_sig         =sqrt(sig^2/(obs* var(x)) )


   #b2_sig_less_obs=sqrt(sig^2/(less_obs* var(x_less_obs)))
   #b2_sig_less_obs    =sqrt(sig^2/(more_obs* var(x_more_obs)) )
   #b2_sig_more_var_eps=sqrt((sig*4)^2/(obs* var(x))) 


  # Compute the densities
  db2=density(b2)
  db2_less_obs=density(b2_less_obs)
  db2_more_obs=density(b2_more_obs)
  
  db2_more_var_x=density(b2_more_var_x)
  db2_more_var_eps=density(b2_more_var_eps)
  db2_non_normal_eps=density(b2_non_normal_eps)
  db2_non_normal_eps_less=density(b2_non_normal_eps_less)
  db2_non_normal_eps_more=density(b2_non_normal_eps_more)
  
  db2_eps_more_var_eps=density(eps_more_var_eps)
  db2_eps=density(eps)
  
  # Baseline
  plot(db2,xlim=c(-1.5,1.5),ylim=c(0,3.5),main="Baseline",xlab="")
  curve(dnorm(x,0,b2_sig),add=TRUE, col="blue")
  
  legend(2,2,legend=c("Basline density", "Baseline normal distribution"),col=c("black","blue"),lty=1, cex=0.8)

  # Changing obs
  plot(db2,xlim=c(-4,4),main="Changing the number of observations",xlab="")
  lines(db2_less_obs,col="blue")
  legend(2,.2,legend=c("obs=100", "obs=10"),col=c("black","blue"),lty=1, cex=0.8)

  # Changing x variation
  plot(db2,xlim=c(-1.5,1.5),ylim=c(0,3),main=TeX("Changing x variation - Density of $\\beta$"),xlab="")
  lines(db2_more_var_x,col="blue")
  legend(.5,2.5,legend=c("baseline", "higher variation"),col=c("black","blue"),lty=1, cex=0.8)

  #legend(4,.5,legend=c("baseline", "higher variation"),col=c("black","blue"),lty=1, cex=0.8)

  #summary(x)
  dx=density(x)
  dx_more_var_x=density(x_more_var_x)
  plot(dx,xlim=c(-1,6),ylim=c(0,2),main="Changing x variation - Density of x",xlab="")
  lines(dx_more_var_x,col="blue")
  legend(2,2,legend=c("baseline", "higher variation"),col=c("black","blue"),lty=1, cex=0.8)

  # Changing eps variation
  
  
  plot(db2,xlim=c(-4,4),ylim=c(0,2),main=TeX("Changing $\\epsilon$ Variation - Density of $\\beta$"),xlab="")
  lines(db2_more_var_eps,col="blue")
  legend(2,2,legend=c("baseline", "higher variation"),col=c("black","blue"),lty=1, cex=0.8)

  plot(db2_eps,xlim=c(-30,30),ylim=c(0,.2),
       main=TeX("Changing $\\epsilon$ Variation - Density of $\\epsilon$"),xlab="") 
  lines(db2_eps_more_var_eps,col="blue")

  # Non normal eps
  plot(db2,xlim=c(-3,3),ylim=c(0,2),main=TeX("Non normal $\\epsilon$"),xlab="")
  lines(db2_non_normal_eps,col="blue")
  legend(2,2,legend=c("normal", "non-normal"),col=c("black","blue"),lty=1, cex=0.8)

  deps=density(eps)
  deps_non_normal_eps=density(eps_non_normal_eps)

  
  plot(deps,xlim=c(-3,3),ylim=c(0,1),main=TeX("Non normal $\\epsilon$ - Distribution of $\\epsilon"),xlab="")
  lines(deps_non_normal_eps,col="blue")
  legend(5,.5,legend=c("normal", "non-normal"),col=c("black","blue"),lty=1, cex=0.8)

  # Non normal eps less
  plot(db2_less_obs,xlim=c(-3,3),ylim=c(0,2),main=TeX("Non normal $\\epsilon$ less obs"),xlab="")
  lines(db2_non_normal_eps_less,col="blue")
  legend(2,2,legend=c("normal", "non-normal"),col=c("black","blue"),lty=1, cex=0.8)

  # Non normal eps more
  plot(db2_more_obs,xlim=c(-3,3),ylim=c(0,4),main="Non normal eps more",xlab="")
  lines(db2_non_normal_eps_more,col="blue")
  legend(2,2,legend=c("normal", "non-normal"),col=c("black","blue"),lty=1, cex=0.8)

Hypothesis Testing in the foreigners and crime example

 library(haven)   # make sure libraries such as this are installed. If not go to Tools -> Install Packages
 df=ff
 
 df['crimesPc']=df$crimes11/df$pop11
 reg1=lm(crimesPc~b_migr11,df)
 summary(reg1)
## 
## Call:
## lm(formula = crimesPc ~ b_migr11, data = df)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.13314 -0.33959 -0.06763  0.22302  2.92572 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 1.091273   0.045146   24.17  < 2e-16 ***
## b_migr11    0.025164   0.002922    8.61 3.33e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5482 on 321 degrees of freedom
## Multiple R-squared:  0.1876, Adjusted R-squared:  0.1851 
## F-statistic: 74.14 on 1 and 321 DF,  p-value: 3.325e-16
 library("car")
## Loading required package: carData
## 
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
## 
##     recode
 linearHypothesis(reg1, c( "b_migr11= 0.04") )
## Linear hypothesis test
## 
## Hypothesis:
## b_migr11 = 0.04
## 
## Model 1: restricted model
## Model 2: crimesPc ~ b_migr11
## 
##   Res.Df     RSS Df Sum of Sq      F    Pr(>F)    
## 1    322 104.212                                  
## 2    321  96.467  1    7.7448 25.771 6.525e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Working out the cut off points

  pnorm(-1.644854)
## [1] 0.04999996
  pnorm(-1.959964)
## [1] 0.025

qnorm(0.005)
qnorm(0.025) qnorm(0.05)

qnorm(0.995)
qnorm(0.975) qnorm(0.95)

#< Annother example

  library(foreign)
  library(haven)
  eaef <- read.csv("https://www.dropbox.com/s/31lyn5p5edyoxl5/eaef21.csv?dl=1")
  head(eaef, n=10)  
##     X   ID FEMALE MALE ETHBLACK ETHHISP ETHWHITE AGE  S EDUCPROF EDUCPHD
## 1   1 5531      0    1        0       0        1  45 12        0       0
## 2   2 2658      0    1        0       1        0  40 12        0       0
## 3   3 5365      0    1        0       0        1  38 15        0       0
## 4   4 4468      0    1        0       0        1  43 13        0       0
## 5   5 3142      0    1        0       0        1  38 18        0       0
## 6   6 2170      0    1        1       0        0  39 16        0       0
## 7   7 2344      0    1        0       0        1  40 14        0       0
## 8   8 4583      0    1        0       0        1  37 12        0       0
## 9   9 2517      0    1        0       0        1  39 12        0       0
## 10 10 3563      0    1        0       0        1  42 12        0       0
##    EDUCMAST EDUCBA EDUCAA EDUCHSD EDUCDO SINGLE MARRIED DIVORCED FAITHN FAITHP
## 1         0      0      0       1      0      0       1        0      0      1
## 2         0      0      0       0      1      0       0        1      1      0
## 3         0      0      1       0      0      0       0        1      0      0
## 4         0      0      0       1      0      0       1        0      0      1
## 5         1      0      0       0      0      0       1        0      0      1
## 6         0      0      0       0      0      0       1        0      0      1
## 7         0      0      1       0      0      1       0        0      0      1
## 8         0      0      0       1      0      1       0        0      0      1
## 9         0      0      0       1      0      0       1        0      0      1
## 10        0      0      0       1      0      0       1        0      0      1
##    FAITHC FAITHJ FAITHO ASVAB01 ASVAB02 ASVAB03 ASVAB04 ASVAB05 ASVAB06
## 1       0      0      0      58      64      52      56      54      56
## 2       0      0      0      32      39      29      29      27      22
## 3       0      0      1      42      40      37      38      42      45
## 4       0      0      0      66      55      59      53      51      52
## 5       0      0      0      64      65      61      62      47      34
## 6       0      0      0      62      53      51      59      58      45
## 7       0      0      0      50      32      44      38      33      29
## 8       0      0      0      56      62      52      44      41      47
## 9       0      0      0      60      59      55      62      58      46
## 10      0      0      0      46      57      28      32      47      46
##      ASVABC HEIGHT WEIGHT85 WEIGHT02 SM SF SIBLINGS LIBRARY POV78       EXP
## 1  60.89985     67      160      200  8  8       11       0     0 22.384615
## 2  33.63790     67      185      205  5  5        3       0     1  8.903846
## 3  38.81767     69      135      185 11 12        3       1     0 13.250000
## 4  57.08318     72      200      250 12 16        2       1     0 18.250000
## 5  65.53439     76      185      220 16 20        1       1     0 13.769231
## 6  55.44746     69      180      215 12 12        2       0     0 11.692307
## 7  36.36409     65      150      145 12 12        1       1     0 19.480770
## 8  56.53794     69      125      140  8 13       10       1     0 15.230769
## 9  60.62724     72      195      240 12 12        3       1    NA 14.365385
## 10 43.99744     70      180      210 10  8        9       1     0 22.038462
##    EARNINGS HOURS    TENURE COLLBARG CATGOV CATPRI CATSE URBAN REGNE REGNC REGW
## 1     53.41    45 2.7500000        0      0      1     0     0     0     0    0
## 2      8.00    40 2.3846154        0      0      1     0     0     0     0    0
## 3     24.00    40 5.7500000        1      0      1     0     1     0     1    0
## 4     29.50    40 6.1346154        1      0      1     0     1     0     0    1
## 5     32.05    54 0.8269231        0      1      0     0     1     1     0    0
## 6     14.73    40 4.2884617        0      0      1     0     1     0     0    0
## 7     13.00    48 0.7692308        0      0      1     0     1     0     0    0
## 8     11.25    40 5.1538463        0      0      1     0     1     0     0    1
## 9     18.01    40 1.5769231        1      1      0     0     1     0     1    0
## 10    11.78    40 0.2307692        1      0      1     0     1     0     1    0
##    REGS
## 1     1
## 2     1
## 3     0
## 4     0
## 5     0
## 6     1
## 7     1
## 8     0
## 9     0
## 10    0
  library("lattice")
  xyplot(EARNINGS ~ S, data = eaef, type = c("p","r"), col.line = "red")

  mod_earn <- lm(EARNINGS ~ S , data = eaef)
  summary(mod_earn)
## 
## Call:
## lm(formula = EARNINGS ~ S, data = eaef)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -24.112  -7.142  -2.476   3.625  95.974 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -13.9335     3.2199  -4.327  1.8e-05 ***
## S             2.4553     0.2319  10.590  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 13.13 on 538 degrees of freedom
## Multiple R-squared:  0.1725, Adjusted R-squared:  0.171 
## F-statistic: 112.1 on 1 and 538 DF,  p-value: < 2.2e-16

#>

tea time

  qt(0.025, 10  )
## [1] -2.228139

Effect of Experience on Earnings

  library(foreign)
  eaef <- read.csv("https://www.dropbox.com/s/31lyn5p5edyoxl5/eaef21.csv?dl=1")

  mod_earn_exp <- lm(EARNINGS ~ EXP , data = eaef)    
  summary(mod_earn_exp) 
## 
## Call:
## lm(formula = EARNINGS ~ EXP, data = eaef)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -17.140  -8.876  -3.723   3.869  99.986 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  15.5553     2.4425   6.369 4.09e-10 ***
## EXP           0.2415     0.1398   1.727   0.0847 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 14.39 on 538 degrees of freedom
## Multiple R-squared:  0.005515,   Adjusted R-squared:  0.003666 
## F-statistic: 2.983 on 1 and 538 DF,  p-value: 0.08469

More general hypothesis tests

  mod_exp_s <- lm(EXP ~ S , data = eaef)
  summary(mod_exp_s)  
## 
## Call:
## lm(formula = EXP ~ S, data = eaef)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -17.0512  -2.3320   0.8564   3.1391   6.3756 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  22.3165     1.0624  21.006  < 2e-16 ***
## S            -0.3961     0.0765  -5.178 3.17e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.331 on 538 degrees of freedom
## Multiple R-squared:  0.04748,    Adjusted R-squared:  0.04571 
## F-statistic: 26.82 on 1 and 538 DF,  p-value: 3.169e-07
  library(data.table)
## 
## Attaching package: 'data.table'
## The following objects are masked from 'package:dplyr':
## 
##     between, first, last
  library("car")
  linearHypothesis(mod_exp_s, c( "S = -1") )
## Linear hypothesis test
## 
## Hypothesis:
## S = - 1
## 
## Model 1: restricted model
## Model 2: EXP ~ S
## 
##   Res.Df   RSS Df Sum of Sq      F    Pr(>F)    
## 1    539 11260                                  
## 2    538 10091  1    1168.7 62.307 1.658e-14 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
  linearHypothesis(mod_exp_s, c( "S = 0") )
## Linear hypothesis test
## 
## Hypothesis:
## S = 0
## 
## Model 1: restricted model
## Model 2: EXP ~ S
## 
##   Res.Df   RSS Df Sum of Sq      F    Pr(>F)    
## 1    539 10594                                  
## 2    538 10091  1    502.96 26.815 3.169e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1