Content
--------------------------------------------------------------------------------------------------------------------------------------------
name:
log: C:\doug work\Colin\JHR cluster paper\work Summer 2014\empirical example code\t1_boot_breakdown.log
log type: text
opened on: 5 Jul 2014, 13:17:42
. clear ;
. set seed 9479826 ;
. set more off ;
. do set_directory_macros ;
. #delimit ;
delimiter now ;
. local my_top_path = subinstr(c(pwd), "doug work\Colin\JHR cluster paper\work Summer 2014\empirical example code", "",.) ;
. global rawdat = "`my_top_path'data\CPS\July 2013 extract for cluster JHR" ;
. global madedat = "`my_top_path'madedata\Colin JHR paper" ;
. if "$S_OS" == "Unix" { ;
. global rawdat = "../data" ;
. global madedat = "../data" ;
. } ;
. di "$rawdat" ;
C:\data\CPS\July 2013 extract for cluster JHR
. di "$madedat" ;
C:\madedata\Colin JHR paper
. *global rawdat = "C:\data\CPS\July 2013 extract for cluster JHR" ;
. *global madedat = "C:\madedata\Colin JHR paper" ;
. *global rawdat = "C:\Users\dlmiller\data\CPS\July 2013 extract for cluster JHR" ;
. *global madedat = "C:\Users\dlmiller\madedata\Colin JHR paper" ;
.
end of do-file
. cap prog drop runme ;
. prog def runme ;
1. local bsreps = 999 ;
2. /* generate state policy variable */
> use statefip using "${madedat}/CPS_2012_micro" , replace ;
3. contract statefip ;
4. generate sort_order = uniform() ;
5. sort sort_order ;
6. keep if _n <= 6 ;
7. gen policy = _n <= 3 ;
8. sort statefip ;
9. drop sort_order _freq ;
10. tempfile statelist ;
11. qui save `statelist' ;
12. use "${madedat}/CPS_2012_micro" , replace ;
13. sort statefip ;
14. merge m:1 statefip using `statelist' ;
15. tab _merge ;
16. keep if _merge == 3 ;
17. drop _merge ;
18. tab statefip ;
19. /* first run the basic regressions for G=6 */
> reg lnwage policy age age2 yrseduc ;
20. reg lnwage policy age age2 yrseduc , robust ;
21. reg lnwage policy age age2 yrseduc , cluster(statefip) ;
22. local main_b = _b[policy] ;
23. local main_se = _se[policy] ;
24. /* generate restricted residuals, and y-hats; these will be used later in the Wild bootstraps */
> /* the hypothesis is: beta on policy = 0. So to estimtate imposing this restriction, we will just drop "policy" from the regression */
> reg lnwage age age2 yrseduc ;
25. predict resid_restricted , resid ;
26. predict yhat_restricted , xb ;
27. sort statefip ;
28. tempfile maindata ;
29. qui save `maindata' ;
30. /* next we do a nonparametric bootstrap; show that the resulting distribution of the bootstrapped betas can be non-normal and multimoda
> l */
> tempfile bootout ;
31. reg lnwage policy age age2 yrseduc , vce(bootstrap , cluster(statefip) reps(`bsreps') saving(`bootout') ) ;
32. drop _all ;
33. use `bootout' ;
34. summ ;
35. summ _b_policy , detail ;
36. kdensity _b_policy , bw(0.004) n(400) ;
37. graph export G6_boot_density.png , replace ;
38. /* next we do a wild bootstrap, Rademacher weights (+-1), and show that the resulting distribution of t-statistics takes only a few val
> ues
> (and hence the p-value of the main t-staistic is only interval-identified We will do the same with the Webb 6-point distribution,
> to see
> that the t-stats has a much more "continuous" distribution. */
> use `maindata' , clear ;
39. keep statefip ;
40. contract statefip ;
41. drop _freq ;
42. sort statefip ;
43. tempfile to_be_sampled ;
44. qui save `to_be_sampled' ;
45. list ;
46. local main_t = (`main_b' - 0) / `main_se' ;
47. cap postclose bs_output ;
48. tempfile bsout ;
49. cap erase `bsout' ;
50. postfile bs_output t_rad_res t_webb_res using `bsout' ;
51. qui forvalues bb = 1/`bsreps' { ;
52. /* for the wild bootstrap */
> /* take the cluster list, generate 3 sets of residual transformations */
> /* then merge these back onto main dataset, created transformed residuals and then transformed y-hats */
> /* then estimate the models, and save the t-statistics */
>
> use statefip using `to_be_sampled' , replace ;
53. gen my_uniform = uniform() ;
54. gen wild_rademacher = -1 + 2 * (my_uniform >= 0.5) ;
55. gen wild_webb = (-1) * sqrt(1.5) * (my_uniform > (0) & my_uniform <= (1/6)) +
> (-1) * sqrt(1) * (my_uniform > (1/6) & my_uniform <= (2/6)) +
> (-1) * sqrt(0.5) * (my_uniform > (2/6) & my_uniform <= (3/6)) +
> (+1) * sqrt(0.5) * (my_uniform > (3/6) & my_uniform <= (4/6)) +
> (+1) * sqrt(1) * (my_uniform > (4/6) & my_uniform <= (5/6)) +
> (+1) * sqrt(1.5) * (my_uniform > (5/6) & my_uniform <= (6/6)) ;
56. keep statefip wild_rademacher wild_webb ;
57. sort statefip ;
58. merge 1:m statefip using `maindata' , assert(match) keep(match) nogenerate ;
59. /* create transformed residuals and new wild-outcome-variables */
> gen resid_wild_rad_restricted = resid_restricted * wild_rademacher ;
60. gen resid_wild_webb_restricted = resid_restricted * wild_webb ;
61. gen y_wild_rademacher_restricted = yhat_restricted + resid_wild_rad_restricted ;
62. gen y_wild_webb_restricted = yhat_restricted + resid_wild_webb_restricted ;
63. /* now estimate cluster-robust models on each of these three, generating t-statistics.
> For the restricted model, the t-stat is based on the null hypothesis. for the unrestricted
> model the t-stat is based on the main (first) estiamted beta */
>
> reg y_wild_rademacher_restricted policy age age2 yrseduc , cluster(statefip) ;
64. local b_wild_rademacher_restricted = _b[policy] ;
65. local se_wild_rademacher_restricted = _se[policy] ;
66. reg y_wild_webb_restricted policy age age2 yrseduc , cluster(statefip) ;
67. local b_wild_webb_restricted = _b[policy] ;
68. local se_wild_webb_restricted = _se[policy] ;
69. /* make the t-stats ; store away into a postfile */
>
> local t_wild_rademacher_restricted = (`b_wild_rademacher_restricted ' - 0)
> / `se_wild_rademacher_restricted' ;
70. local t_wild_webb_restricted = (`b_wild_webb_restricted ' - 0)
> / `se_wild_webb_restricted' ;
71. post bs_output (`t_wild_rademacher_restricted') (`t_wild_webb_restricted') ;
72. } ;
73. postclose bs_output ;
74. use `bsout' , clear ;
75. summ ;
76. gen one = 1 ;
77. sort t_rad_res ;
78. gen Rademacher = sum(one) / _N ;
79. sort t_webb_res ;
80. gen Webb = sum(one) / _N ;
81. qui save `bsout' , replace ;
82. keep Rademacher t_rad_res ;
83. rename t_rad_res t_stat ;
84. tempfile rad ;
85. sort t_stat ;
86. qui save `rad' ;
87. qui use `bsout' ;
88. keep Webb t_webb_res ;
89. rename t_webb_res t_stat ;
90. sort t_stat ;
91. merge t_stat using `rad' ;
92. sort t_stat Rademacher Webb ;
93. summ ;
94. graph twoway (line Rademacher t_stat) (line Webb t_stat) , xline(`main_t') ti("CDFs of Bootstrapped t-distributions")
> note("Note: 6 Clusters. 999 bootstrap replications. Vertical line at main t-statistic.") ;
95. graph export G6_Wild_CDFs.png , replace ;
96. local lower = `main_t' - 0.04 ;
97. local upper = `main_t' + 0.04 ;
98. graph twoway (line Rademacher t_stat if t_stat >= `lower' & t_stat <= `upper') (line Webb t_stat if t_stat >= `lower' & t_stat <= `uppe
> r') , xline(`main_t') ti("CDFs of Bootstrapped t-distributions")
> note("Note: 6 Clusters. 999 bootstrap replications. Vertical line at main t-statistic. Zoomed in near main t-staistic.") ;
99. graph export G6_Wild_CDFs_ZOOM.png , replace ;
100. end ;
. runme ;
(45 observations deleted)
(label STATEFIP already defined)
Result # of obs.
-----------------------------------------
not matched 58,697
from master 58,697 (_merge==1)
from using 0 (_merge==2)
matched 6,988 (_merge==3)
-----------------------------------------
_merge | Freq. Percent Cum.
------------------------+-----------------------------------
master only (1) | 58,697 89.36 89.36
matched (3) | 6,988 10.64 100.00
------------------------+-----------------------------------
Total | 65,685 100.00
(58697 observations deleted)
State (FIPS code) | Freq. Percent Cum.
----------------------------------------+-----------------------------------
Arizona | 839 12.01 12.01
Delaware | 1,055 15.10 27.10
Louisiana | 560 8.01 35.12
Maryland | 1,824 26.10 61.22
Pennsylvania | 1,883 26.95 88.17
Utah | 827 11.83 100.00
----------------------------------------+-----------------------------------
Total | 6,988 100.00
Source | SS df MS Number of obs = 6988
-------------+------------------------------ F( 4, 6983) = 636.46
Model | 718.103242 4 179.52581 Prob > F = 0.0000
Residual | 1969.68395 6983 .282068445 R-squared = 0.2672
-------------+------------------------------ Adj R-squared = 0.2668
Total | 2687.78719 6987 .384684012 Root MSE = .5311
------------------------------------------------------------------------------
lnwage | Coef. Std. Err. t P>|t| [95% Conf. Interval]
-------------+----------------------------------------------------------------
policy | .0372644 .0127533 2.92 0.003 .012264 .0622647
age | .0717394 .0040295 17.80 0.000 .0638404 .0796385
age2 | -.0007132 .0000477 -14.94 0.000 -.0008067 -.0006196
yrseduc | .0955451 .002366 40.38 0.000 .090907 .1001833
_cons | -.3223019 .0852035 -3.78 0.000 -.4893267 -.1552772
------------------------------------------------------------------------------
Linear regression Number of obs = 6988
F( 4, 6983) = 575.89
Prob > F = 0.0000
R-squared = 0.2672
Root MSE = .5311
------------------------------------------------------------------------------
| Robust
lnwage | Coef. Std. Err. t P>|t| [95% Conf. Interval]
-------------+----------------------------------------------------------------
policy | .0372644 .0127651 2.92 0.004 .0122409 .0622879
age | .0717394 .0039865 18.00 0.000 .0639247 .0795542
age2 | -.0007132 .0000476 -14.98 0.000 -.0008065 -.0006199
yrseduc | .0955451 .002559 37.34 0.000 .0905287 .1005616
_cons | -.3223019 .0842319 -3.83 0.000 -.487422 -.1571819
------------------------------------------------------------------------------
Linear regression Number of obs = 6988
F( 4, 5) = 4661.07
Prob > F = 0.0000
R-squared = 0.2672
Root MSE = .5311
(Std. Err. adjusted for 6 clusters in statefip)
------------------------------------------------------------------------------
| Robust
lnwage | Coef. Std. Err. t P>|t| [95% Conf. Interval]
-------------+----------------------------------------------------------------
policy | .0372644 .0577369 0.65 0.547 -.1111531 .1856818
age | .0717394 .004072 17.62 0.000 .0612721 .0822068
age2 | -.0007132 .0000538 -13.25 0.000 -.0008515 -.0005748
yrseduc | .0955451 .0028759 33.22 0.000 .0881524 .1029379
_cons | -.3223019 .0664937 -4.85 0.005 -.4932293 -.1513746
------------------------------------------------------------------------------
Source | SS df MS Number of obs = 6988
-------------+------------------------------ F( 3, 6984) = 844.86
Model | 715.695024 3 238.565008 Prob > F = 0.0000
Residual | 1972.09217 6984 .282372876 R-squared = 0.2663
-------------+------------------------------ Adj R-squared = 0.2660
Total | 2687.78719 6987 .384684012 Root MSE = .53139
------------------------------------------------------------------------------
lnwage | Coef. Std. Err. t P>|t| [95% Conf. Interval]
-------------+----------------------------------------------------------------
age | .0717968 .0040316 17.81 0.000 .0638936 .0797001
age2 | -.0007137 .0000478 -14.94 0.000 -.0008073 -.00062
yrseduc | .0957739 .002366 40.48 0.000 .0911358 .100412
_cons | -.3098749 .0851432 -3.64 0.000 -.4767814 -.1429683
------------------------------------------------------------------------------
(running regress on estimation sample)
Bootstrap replications (999)
----+--- 1 ---+--- 2 ---+--- 3 ---+--- 4 ---+--- 5
......x........................................... 50
.....................x..................x......... 100
.................................................. 150
.x.........................x...................... 200
..............x....xx............................. 250
......................................x........x.. 300
....................x..................x.......... 350
......xx.........x.....x.......................... 400
.................................................. 450
...............................................x.. 500
..........................................x....... 550
...................x.........x.................... 600
.................................................. 650
........x.........x............................... 700
.................................................. 750
.....................x...................x........ 800
.................................................. 850
.....x............................................ 900
......x................x.......................... 950
..............x..................................
Linear regression Number of obs = 6988
Replications = 971
Wald chi2(4) = 9628.73
Prob > chi2 = 0.0000
R-squared = 0.2672
Adj R-squared = 0.2668
Root MSE = 0.5311
(Replications based on 6 clusters in statefip)
------------------------------------------------------------------------------
| Observed Bootstrap Normal-based
lnwage | Coef. Std. Err. z P>|z| [95% Conf. Interval]
-------------+----------------------------------------------------------------
policy | .0372644 .0621875 0.60 0.549 -.084621 .1591497
age | .0717394 .0036473 19.67 0.000 .0645909 .078888
age2 | -.0007132 .0000482 -14.79 0.000 -.0008077 -.0006186
yrseduc | .0955451 .0030006 31.84 0.000 .0896641 .1014262
_cons | -.3223019 .0697258 -4.62 0.000 -.4589621 -.1856418
------------------------------------------------------------------------------
Note: One or more parameters could not be estimated in 28 bootstrap replicates;
standard-error estimates include only complete replications.
(bootstrap: regress)
Variable | Obs Mean Std. Dev. Min Max
-------------+--------------------------------------------------------
_b_policy | 971 .0177986 .0621875 -.1209276 .1330283
_b_age | 971 .0709595 .0036473 .0566686 .079687
_b_age2 | 971 -.0007035 .0000482 -.0008215 -.000517
_b_yrseduc | 971 .0946171 .0030006 .0849783 .1002435
_b_cons | 971 -.2942799 .0697258 -.4465786 .003846
_b[policy]
-------------------------------------------------------------
Percentiles Smallest
1% -.1089039 -.1209276
5% -.1009745 -.1140021
10% -.0732127 -.1128442 Obs 971
25% -.0381706 -.1114387 Sum of Wgt. 971
50% .0323881 Mean .0177986
Largest Std. Dev. .0621875
75% .0667737 .1241792
90% .084883 .1241792 Variance .0038673
95% .1032195 .1271342 Skewness -.5213004
99% .1191724 .1330283 Kurtosis 2.151208
(file G6_boot_density.png written in PNG format)
+----------+
| statefip |
|----------|
1. | Arizona |
2. | Delaware |
3. | Louisian |
4. | Maryland |
5. | Pennsylv |
|----------|
6. | Utah |
+----------+
Variable | Obs Mean Std. Dev. Min Max
-------------+--------------------------------------------------------
t_rad_res | 999 .1319911 2.931311 -7.093219 7.093218
t_webb_res | 999 .1085676 2.780773 -9.562051 13.94146
(note: you are using old merge syntax; see [D] merge for new syntax)
variable t_stat does not uniquely identify observations in the master data
variable t_stat does not uniquely identify observations in C:\Users\dlmiller\AppData\Local\Temp\ST_03000006.tmp
Variable | Obs Mean Std. Dev. Min Max
-------------+--------------------------------------------------------
t_stat | 1998 .1202793 2.856342 -9.562051 13.94146
Webb | 999 .5005005 .2888196 .001001 1
Rademacher | 999 .5005005 .2888196 .001001 1
_merge | 1998 1.5 .5001252 1 2
(file G6_Wild_CDFs.png written in PNG format)
(file G6_Wild_CDFs_ZOOM.png written in PNG format)
. log close _all ;
name:
log: C:\doug work\Colin\JHR cluster paper\work Summer 2014\empirical example code\t1_boot_breakdown.log
log type: text
closed on: 5 Jul 2014, 13:18:13
--------------------------------------------------------------------------------------------------------------------------------------------