Content
--------------------------------------------------------------------------------------------------------------------------------------------
name:
log: C:\doug work\Colin\JHR cluster paper\work Summer 2014\empirical example code\makedata.log
log type: text
opened on: 25 Jun 2014, 18:20:50
. clear ;
. set matsize 2000 ;
. do set_directory_macros ;
. #delimit ;
delimiter now ;
. local my_top_path = subinstr(c(pwd), "doug work\Colin\JHR cluster paper\work Summer 2014\empirical example code", "",.) ;
. global rawdat = "`my_top_path'data\CPS\July 2013 extract for cluster JHR" ;
. global madedat = "`my_top_path'madedata\Colin JHR paper" ;
. if "$S_OS" == "Unix" { ;
. global rawdat = "../data" ;
. global madedat = "../data" ;
. } ;
. di "$rawdat" ;
C:\data\CPS\July 2013 extract for cluster JHR
. di "$madedat" ;
C:\madedata\Colin JHR paper
. *global rawdat = "C:\data\CPS\July 2013 extract for cluster JHR" ;
. *global madedat = "C:\madedata\Colin JHR paper" ;
. *global rawdat = "C:\Users\dlmiller\data\CPS\July 2013 extract for cluster JHR" ;
. *global madedat = "C:\Users\dlmiller\madedata\Colin JHR paper" ;
.
end of do-file
. use "${rawdat}\cps_00003" , replace ;
. summ ;
Variable | Obs Mean Std. Dev. Min Max
-------------+--------------------------------------------------------
year | 4752942 1989.967 14.35442 1962 2012
serial | 4752942 37230.54 24759.98 1 99986
hwtsupp | 4752942 1530.477 816.556 -708.28 28722.98
statefip | 4752942 30.9139 19.11728 1 99
metro | 4752942 2.243568 1.045929 0 9
-------------+--------------------------------------------------------
CPI99 | 4752942 1.987522 1.45632 .741 5.572
month | 4752942 3 0 3 3
wtsupp | 4752942 1566.833 858.2344 -1310.45 28722.98
age | 4752942 38.89543 12.96385 18 64
sex | 4752942 1.519867 .4996052 1 2
-------------+--------------------------------------------------------
educ | 4710365 77.24322 25.76935 2 999
WKSWORK2 | 4752942 3.961487 2.544978 0 9
hrswork | 4752942 26.77776 21.62771 0 99
uhrswork | 3806483 31.38733 19.03592 0 99
inctot | 4752942 638022.9 7833824 -29647 1.00e+08
-------------+--------------------------------------------------------
incwage | 4752942 78725.58 782471.5 0 9999999
qincwage | 3806483 .0705567 .3494416 0 3
. /* states uniquely defined only from 1977 on */
> keep if year >= 1977 ;
(1024253 observations deleted)
. /* drop missing values */
>
> foreach vv in educ age sex year state incwage { ;
2. di "`vv'" ;
3. drop if `vv' == . ;
4. } ;
educ
(0 observations deleted)
age
(0 observations deleted)
sex
(0 observations deleted)
year
(0 observations deleted)
state
(0 observations deleted)
incwage
(0 observations deleted)
. /* drop if earnings allocated */
> drop if qincwage ~= 0 ;
(178333 observations deleted)
. /* turn education into years */
>
> gen yrseduc = . ;
(3550356 missing values generated)
. replace yrseduc = 0 if educ == 2 ;
(14805 real changes made)
. replace yrseduc = 3 if educ == 10 ;
(17954 real changes made)
. replace yrseduc = 1 if educ == 11 ;
(1527 real changes made)
. replace yrseduc = 2 if educ == 12 ;
(3545 real changes made)
. replace yrseduc = 3 if educ == 13 ;
(5705 real changes made)
. replace yrseduc = 4 if educ == 14 ;
(6595 real changes made)
. replace yrseduc = 6 if educ == 20 ;
(38573 real changes made)
. replace yrseduc = 5 if educ == 21 ;
(8365 real changes made)
. replace yrseduc = 6 if educ == 22 ;
(18713 real changes made)
. replace yrseduc = 8 if educ == 30 ;
(42409 real changes made)
. replace yrseduc = 7 if educ == 31 ;
(17075 real changes made)
. replace yrseduc = 8 if educ == 32 ;
(51887 real changes made)
. replace yrseduc = 9 if educ == 40 ;
(87471 real changes made)
. replace yrseduc = 10 if educ == 50 ;
(117319 real changes made)
. replace yrseduc = 11 if educ == 60 ;
(151223 real changes made)
. replace yrseduc = 11 if educ == 71 ;
(34808 real changes made)
. replace yrseduc = 12 if educ == 72 ;
(476607 real changes made)
. replace yrseduc = 12 if educ == 73 ;
(759105 real changes made)
. replace yrseduc = 13 if educ == 80 ;
(94895 real changes made)
. replace yrseduc = 14 if educ == 81 ;
(454128 real changes made)
. replace yrseduc = 14 if educ == 90 ;
(108305 real changes made)
. replace yrseduc = 14 if educ == 91 ;
(99194 real changes made)
. replace yrseduc = 14 if educ == 92 ;
(93262 real changes made)
. replace yrseduc = 15 if educ == 100 ;
(43443 real changes made)
. replace yrseduc = 16 if educ == 110 ;
(137342 real changes made)
. replace yrseduc = 16 if educ == 111 ;
(387174 real changes made)
. replace yrseduc = 16 if educ == 121 ;
(28599 real changes made)
. replace yrseduc = 16 if educ == 122 ;
(64482 real changes made)
. replace yrseduc = 18 if educ == 123 ;
(132803 real changes made)
. replace yrseduc = 19 if educ == 124 ;
(29708 real changes made)
. replace yrseduc = 20 if educ == 125 ;
(23335 real changes made)
. drop if yrseduc == 999 ;
(0 observations deleted)
. /* keep workers with sufficient labor force attachment */
> keep if WKSWORK2 >= 4 ;
(1251965 observations deleted)
. /* 40 or more weeks worked in last year */
> keep if uhrswork >= 30 ;
(207277 observations deleted)
. /* 30 or more usual hours per week worked in last year */
> drop if incwage == 0 ;
(135108 observations deleted)
. /* generate ln(earnings/hour) */
> gen numweeks = 43.5 if WKSWORK2 == 4 ;
(1830378 missing values generated)
. replace numweeks = 48.5 if WKSWORK2 == 5 ;
(48531 real changes made)
. replace numweeks = 50 if WKSWORK2 == 6 ;
(1781847 real changes made)
. gen annual_hours = numweeks * uhrswork ;
. gen incwage1999 = incwage * CPI99 ;
. gen wage_per_hour = incwage1999 / annual_hours ;
. summ wage_per_hour , det ;
wage_per_hour
-------------------------------------------------------------
Percentiles Smallest
1% 1.935 .000294
5% 4.86816 .0003004
10% 6.2968 .0003071 Obs 1956006
25% 9.26 .0003071 Sum of Wgt. 1956006
50% 14 Mean 17.07468
Largest Std. Dev. 14.49506
75% 20.83636 528.069
90% 29.70992 543.3995 Variance 210.1068
95% 37.3864 560.2662 Skewness 6.088514
99% 63.19376 560.2662 Kurtosis 79.10572
. drop if wage_per_hour < 2 | wage_per_hour > 100 ;
(30677 observations deleted)
. gen lnwage = ln(wage_per_hour) ;
. *graph twoway hist lnwage ;
. gen age2 = age*age ;
. /* keep only variables of interest */
> keep year statefip wtsupp age age2 sex yrseduc lnwage wage_per_hour ;
. /* Feb 19, 2014: doug thinks these two lines can be cut:
> reg lnwage age age2 sex yrseduc i.year i.statefip ;
> predict resid , resid ;
> */
>
> summ ;
Variable | Obs Mean Std. Dev. Min Max
-------------+--------------------------------------------------------
year | 1925329 1996.723 10.2517 1977 2012
statefip | 1925329 28.11244 15.58644 1 56
wtsupp | 1925329 1559.984 930.9336 0 16441.59
age | 1925329 39.41479 11.34152 18 64
sex | 1925329 1.432122 .4953713 1 2
-------------+--------------------------------------------------------
yrseduc | 1925329 13.40003 2.698104 0 20
wage_per_h~r | 1925329 16.54089 10.46338 2 99.98187
lnwage | 1925329 2.632298 .5940311 .6931472 4.604989
age2 | 1925329 1682.156 926.0186 324 4096
. save "${madedat}\CPS_all_micro" , replace ;
file C:\madedata\Colin JHR paper\CPS_all_micro.dta saved
. /* save 2012 analysis data file for Tables 1 and 2 */
> keep if year == 2012 ;
(1859644 observations deleted)
. summ ;
Variable | Obs Mean Std. Dev. Min Max
-------------+--------------------------------------------------------
year | 65685 2012 0 2012 2012
statefip | 65685 27.82032 15.93185 1 56
wtsupp | 65685 1548.646 987.1174 104.17 7709.13
age | 65685 41.77176 11.3903 18 64
sex | 65685 1.456786 .4981329 1 2
-------------+--------------------------------------------------------
yrseduc | 65685 14.07761 2.718555 0 20
wage_per_h~r | 65685 17.66863 12.18621 2.0007 99.294
lnwage | 65685 2.675728 .6251131 .6934971 4.598085
age2 | 65685 1874.617 959.7299 324 4096
. save "${madedat}\CPS_2012_micro" , replace ;
file C:\madedata\Colin JHR paper\CPS_2012_micro.dta saved
. /* now make a medium subsample */
> set seed 10101 ;
. qui keep if uniform() < 0.20 ;
. save "${madedat}\CPS_2012_micro_medium" , replace ;
file C:\madedata\Colin JHR paper\CPS_2012_micro_medium.dta saved
. /* now make a small subsample. 15% of 20% is 3% of original */
> set seed 10102 ;
. qui keep if uniform() < 0.15 ;
. save "${madedat}\CPS_2012_micro_small" , replace ;
file C:\madedata\Colin JHR paper\CPS_2012_micro_small.dta saved
. /* get state-year panel ready */
> use "${madedat}\CPS_all_micro" , replace ;
. /* partial out differences in demographics across state years. So, get regression on */
> egen styr = group(statefip year) ;
. areg lnwage age age2 sex yrseduc [pw=wtsupp] , a(styr) ;
Linear regression, absorbing indicators Number of obs = 1925260
F( 4,1923420) = 145524.22
Prob > F = 0.0000
R-squared = 0.3213
Adj R-squared = 0.3206
Root MSE = 0.4909
------------------------------------------------------------------------------
| Robust
lnwage | Coef. Std. Err. t P>|t| [95% Conf. Interval]
-------------+----------------------------------------------------------------
age | .070167 .0002624 267.42 0.000 .0696527 .0706812
age2 | -.0007081 3.24e-06 -218.41 0.000 -.0007145 -.0007018
sex | -.277407 .0008327 -333.14 0.000 -.2790391 -.275775
yrseduc | .0884843 .0001674 528.49 0.000 .0881562 .0888125
_cons | .2714887 .0054199 50.09 0.000 .2608659 .2821116
-------------+----------------------------------------------------------------
styr | absorbed (1836 categories)
. predict lnwage_sy , d ;
(69 missing values generated)
. /* uses the estimated value of the fixed effect for prediction, outcome_sy_hat */
>
>
> collapse (mean) lnwage lnwage_sy (rawsum) wtsupp [pw=wtsupp] , by(statefip year) ;
. rename wtsupp popweight ;
. summ ;
Variable | Obs Mean Std. Dev. Min Max
-------------+--------------------------------------------------------
year | 1836 1994.5 10.39112 1977 2012
statefip | 1836 28.96078 15.68111 1 56
lnwage | 1836 2.604528 .1179528 2.293772 3.137917
lnwage_sy | 1836 -.0209217 .1121829 -.3087785 .5791852
popweight | 1836 1635884 1850099 72969.1 1.25e+07
. save "${madedat}\CPS_panel" , replace ;
file C:\madedata\Colin JHR paper\CPS_panel.dta saved
.
end of do-file
. do "C:\doug work\Colin\JHR cluster paper\work Summer 2014\empirical example code\table1.do"
. #delimit ;
delimiter now ;
. cap log close ;