makedata.do open
file:script:do
# | content log |
---|---|
1 [+] | #delimit |
2 [+] | cap log close |
3 [+] | log using makedata.log , |
5 [+] | clear |
6 [+] | set matsize 2000 |
8 [+] | do set_directory_macros |
10 [+] | use "${rawdat}\cps_00003" , |
12 [+] | summ |
/* states uniquely defined only from 1977 on */ |
|
15 [+] | keep if year >= 1977 |
17 [+] | /* drop missing values */ |
19 [+] | foreach vv in educ age sex year state incwage { |
21 [+] | di "`vv'" |
22 [+] | drop if `vv' == . |
24 [+] | } |
/* drop if earnings allocated */ |
|
27 [+] | drop if qincwage ~= 0 |
29 [+] | /* turn education into years */ |
31 [+] | gen yrseduc = . |
32 [+] | replace yrseduc = 0 if educ == 2 |
33 [+] | replace yrseduc = 3 if educ == 10 |
34 [+] | replace yrseduc = 1 if educ == 11 |
35 [+] | replace yrseduc = 2 if educ == 12 |
36 [+] | replace yrseduc = 3 if educ == 13 |
37 [+] | replace yrseduc = 4 if educ == 14 |
39 [+] | replace yrseduc = 6 if educ == 20 |
40 [+] | replace yrseduc = 5 if educ == 21 |
41 [+] | replace yrseduc = 6 if educ == 22 |
43 [+] | replace yrseduc = 8 if educ == 30 |
44 [+] | replace yrseduc = 7 if educ == 31 |
45 [+] | replace yrseduc = 8 if educ == 32 |
47 [+] | replace yrseduc = 9 if educ == 40 |
48 [+] | replace yrseduc = 10 if educ == 50 |
49 [+] | replace yrseduc = 11 if educ == 60 |
51 [+] | replace yrseduc = 11 if educ == 71 |
52 [+] | replace yrseduc = 12 if educ == 72 |
53 [+] | replace yrseduc = 12 if educ == 73 |
55 [+] | replace yrseduc = 13 if educ == 80 |
56 [+] | replace yrseduc = 14 if educ == 81 |
58 [+] | replace yrseduc = 14 if educ == 90 |
59 [+] | replace yrseduc = 14 if educ == 91 |
60 [+] | replace yrseduc = 14 if educ == 92 |
62 [+] | replace yrseduc = 15 if educ == 100 |
63 [+] | replace yrseduc = 16 if educ == 110 |
64 [+] | replace yrseduc = 16 if educ == 111 |
66 [+] | replace yrseduc = 16 if educ == 121 |
67 [+] | replace yrseduc = 16 if educ == 122 |
68 [+] | replace yrseduc = 18 if educ == 123 |
69 [+] | replace yrseduc = 19 if educ == 124 |
70 [+] | replace yrseduc = 20 if educ == 125 |
72 [+] | drop if yrseduc == 999 |
/* keep workers with sufficient labor force attachment */ |
|
75 [+] | keep if WKSWORK2 >= 4 |
76 [+] | keep if uhrswork >= 30 |
77 [+] | drop if incwage == 0 |
/* generate ln(earnings/hour) */ |
|
80 [+] | gen numweeks = 43.5 if WKSWORK2 == 4 |
81 [+] | replace numweeks = 48.5 if WKSWORK2 == 5 |
82 [+] | replace numweeks = 50 if WKSWORK2 == 6 |
83 [+] | gen annual_hours = numweeks * uhrswork |
85 [+] | gen incwage1999 = incwage * CPI99 |
86 [+] | gen wage_per_hour = incwage1999 / annual_hours |
87 [+] | summ wage_per_hour , |
88 [+] | drop if wage_per_hour < 2 | wage_per_hour > 100 |
91 [+] | gen lnwage = ln(wage_per_hour) |
92 [+] | *graph twoway hist lnwage ; |
94 [+] | gen age2 = age*age |
/* keep only variables of interest */ |
|
97 [+] | keep year statefip wtsupp age age2 sex yrseduc lnwage wage_per_hour |
99 [+] | /* Feb 19, 2014: doug thinks these two lines can be cut: reg lnwage age age2 sex yrseduc i.year i.statefip ; predict resid , resid ; */ |
104 [+] | summ |
107 [+] | save "${madedat}\CPS_all_micro" , |
/* save 2012 analysis data file for Tables 1 and 2 */ |
|
110 [+] | keep if year == 2012 |
111 [+] | summ |
112 [+] | save "${madedat}\CPS_2012_micro" , |
/* now make a medium subsample */ |
|
115 [+] | set seed 10101 |
116 [+] | qui keep if uniform() < 0.20 |
117 [+] | save "${madedat}\CPS_2012_micro_medium" , |
/* now make a small subsample. 15% of 20% is 3% of original */ |
|
120 [+] | set seed 10102 |
121 [+] | qui keep if uniform() < 0.15 |
122 [+] | save "${madedat}\CPS_2012_micro_small" , |
/* get state-year panel ready */ |
|
127 [+] | use "${madedat}\CPS_all_micro" , |
/* partial out differences in demographics across state years. So, get regression on */ |
|
130 [+] | egen styr = group(statefip year) |
131 [+] | areg lnwage age age2 sex yrseduc [pw=wtsupp] , |
132 [+] | predict lnwage_sy , |
135 [+] | collapse (mean) lnwage lnwage_sy (rawsum) wtsupp [pw=wtsupp] , |
136 [+] | rename wtsupp popweight |
137 [+] | summ |
138 [+] | save "${madedat}\CPS_panel" , |
#delimit ; cap log close ; log using makedata.log, text replace ; clear ; set matsize 2000 ; do set_directory_macros ; use "${rawdat}\cps_00003" , replace ; summ ; /* states uniquely defined only from 1977 on */ keep if year >= 1977 ; /* drop missing values */ foreach vv in educ age sex year state incwage { ; di "`vv'" ; drop if `vv' == . ; } ; /* drop if earnings allocated */ drop if qincwage ~= 0 ; /* turn education into years */ gen yrseduc = . ; replace yrseduc = 0 if educ == 2 ; replace yrseduc = 3 if educ == 10 ; replace yrseduc = 1 if educ == 11 ; replace yrseduc = 2 if educ == 12 ; replace yrseduc = 3 if educ == 13 ; replace yrseduc = 4 if educ == 14 ; replace yrseduc = 6 if educ == 20 ; replace yrseduc = 5 if educ == 21 ; replace yrseduc = 6 if educ == 22 ; replace yrseduc = 8 if educ == 30 ; replace yrseduc = 7 if educ == 31 ; replace yrseduc = 8 if educ == 32 ; replace yrseduc = 9 if educ == 40 ; replace yrseduc = 10 if educ == 50 ; replace yrseduc = 11 if educ == 60 ; replace yrseduc = 11 if educ == 71 ; replace yrseduc = 12 if educ == 72 ; replace yrseduc = 12 if educ == 73 ; replace yrseduc = 13 if educ == 80 ; replace yrseduc = 14 if educ == 81 ; replace yrseduc = 14 if educ == 90 ; replace yrseduc = 14 if educ == 91 ; replace yrseduc = 14 if educ == 92 ; replace yrseduc = 15 if educ == 100 ; replace yrseduc = 16 if educ == 110 ; replace yrseduc = 16 if educ == 111 ; replace yrseduc = 16 if educ == 121 ; replace yrseduc = 16 if educ == 122 ; replace yrseduc = 18 if educ == 123 ; replace yrseduc = 19 if educ == 124 ; replace yrseduc = 20 if educ == 125 ; drop if yrseduc == 999 ; /* keep workers with sufficient labor force attachment */ keep if WKSWORK2 >= 4 ; /* 40 or more weeks worked in last year */ keep if uhrswork >= 30 ; /* 30 or more usual hours per week worked in last year */ drop if incwage == 0 ; /* generate ln(earnings/hour) */ gen numweeks = 43.5 if WKSWORK2 == 4 ; replace numweeks = 48.5 if WKSWORK2 == 5 ; replace numweeks = 50 if WKSWORK2 == 6 ; gen annual_hours = numweeks * uhrswork ; gen incwage1999 = incwage * CPI99 ; gen wage_per_hour = incwage1999 / annual_hours ; summ wage_per_hour , det ; drop if wage_per_hour < 2 | wage_per_hour > 100 ; gen lnwage = ln(wage_per_hour) ; *graph twoway hist lnwage ; gen age2 = age*age ; /* keep only variables of interest */ keep year statefip wtsupp age age2 sex yrseduc lnwage wage_per_hour ; /* Feb 19, 2014: doug thinks these two lines can be cut: reg lnwage age age2 sex yrseduc i.year i.statefip ; predict resid , resid ; */ summ ; save "${madedat}\CPS_all_micro" , replace ; /* save 2012 analysis data file for Tables 1 and 2 */ keep if year == 2012 ; summ ; save "${madedat}\CPS_2012_micro" , replace ; /* now make a medium subsample */ set seed 10101 ; qui keep if uniform() < 0.20 ; save "${madedat}\CPS_2012_micro_medium" , replace ; /* now make a small subsample. 15% of 20% is 3% of original */ set seed 10102 ; qui keep if uniform() < 0.15 ; save "${madedat}\CPS_2012_micro_small" , replace ; /* get state-year panel ready */ use "${madedat}\CPS_all_micro" , replace ; /* partial out differences in demographics across state years. So, get regression on */ egen styr = group(statefip year) ; areg lnwage age age2 sex yrseduc [pw=wtsupp] , a(styr) ; predict lnwage_sy , d ; /* uses the estimated value of the fixed effect for prediction, outcome_sy_hat */ collapse (mean) lnwage lnwage_sy (rawsum) wtsupp [pw=wtsupp] , by(statefip year) ; rename wtsupp popweight ; summ ; save "${madedat}\CPS_panel" , replace ;