makedata.do open
file:script:do
| # | content log |
|---|---|
| 1 [+] | #delimit |
| 2 [+] | cap log close |
| 3 [+] | log using makedata.log , |
| 5 [+] | clear |
| 6 [+] | set matsize 2000 |
| 8 [+] | do set_directory_macros |
| 10 [+] | use "${rawdat}\cps_00003" , |
| 12 [+] | summ |
/* states uniquely defined only from 1977 on */ |
|
| 15 [+] | keep if year >= 1977 |
| 17 [+] | /* drop missing values */ |
| 19 [+] | foreach vv in educ age sex year state incwage { |
| 21 [+] | di "`vv'" |
| 22 [+] | drop if `vv' == . |
| 24 [+] | } |
/* drop if earnings allocated */ |
|
| 27 [+] | drop if qincwage ~= 0 |
| 29 [+] | /* turn education into years */ |
| 31 [+] | gen yrseduc = . |
| 32 [+] | replace yrseduc = 0 if educ == 2 |
| 33 [+] | replace yrseduc = 3 if educ == 10 |
| 34 [+] | replace yrseduc = 1 if educ == 11 |
| 35 [+] | replace yrseduc = 2 if educ == 12 |
| 36 [+] | replace yrseduc = 3 if educ == 13 |
| 37 [+] | replace yrseduc = 4 if educ == 14 |
| 39 [+] | replace yrseduc = 6 if educ == 20 |
| 40 [+] | replace yrseduc = 5 if educ == 21 |
| 41 [+] | replace yrseduc = 6 if educ == 22 |
| 43 [+] | replace yrseduc = 8 if educ == 30 |
| 44 [+] | replace yrseduc = 7 if educ == 31 |
| 45 [+] | replace yrseduc = 8 if educ == 32 |
| 47 [+] | replace yrseduc = 9 if educ == 40 |
| 48 [+] | replace yrseduc = 10 if educ == 50 |
| 49 [+] | replace yrseduc = 11 if educ == 60 |
| 51 [+] | replace yrseduc = 11 if educ == 71 |
| 52 [+] | replace yrseduc = 12 if educ == 72 |
| 53 [+] | replace yrseduc = 12 if educ == 73 |
| 55 [+] | replace yrseduc = 13 if educ == 80 |
| 56 [+] | replace yrseduc = 14 if educ == 81 |
| 58 [+] | replace yrseduc = 14 if educ == 90 |
| 59 [+] | replace yrseduc = 14 if educ == 91 |
| 60 [+] | replace yrseduc = 14 if educ == 92 |
| 62 [+] | replace yrseduc = 15 if educ == 100 |
| 63 [+] | replace yrseduc = 16 if educ == 110 |
| 64 [+] | replace yrseduc = 16 if educ == 111 |
| 66 [+] | replace yrseduc = 16 if educ == 121 |
| 67 [+] | replace yrseduc = 16 if educ == 122 |
| 68 [+] | replace yrseduc = 18 if educ == 123 |
| 69 [+] | replace yrseduc = 19 if educ == 124 |
| 70 [+] | replace yrseduc = 20 if educ == 125 |
| 72 [+] | drop if yrseduc == 999 |
/* keep workers with sufficient labor force attachment */ |
|
| 75 [+] | keep if WKSWORK2 >= 4 |
| 76 [+] | keep if uhrswork >= 30 |
| 77 [+] | drop if incwage == 0 |
/* generate ln(earnings/hour) */ |
|
| 80 [+] | gen numweeks = 43.5 if WKSWORK2 == 4 |
| 81 [+] | replace numweeks = 48.5 if WKSWORK2 == 5 |
| 82 [+] | replace numweeks = 50 if WKSWORK2 == 6 |
| 83 [+] | gen annual_hours = numweeks * uhrswork |
| 85 [+] | gen incwage1999 = incwage * CPI99 |
| 86 [+] | gen wage_per_hour = incwage1999 / annual_hours |
| 87 [+] | summ wage_per_hour , |
| 88 [+] | drop if wage_per_hour < 2 | wage_per_hour > 100 |
| 91 [+] | gen lnwage = ln(wage_per_hour) |
| 92 [+] | *graph twoway hist lnwage ; |
| 94 [+] | gen age2 = age*age |
/* keep only variables of interest */ |
|
| 97 [+] | keep year statefip wtsupp age age2 sex yrseduc lnwage wage_per_hour |
| 99 [+] | /* Feb 19, 2014: doug thinks these two lines can be cut: reg lnwage age age2 sex yrseduc i.year i.statefip ; predict resid , resid ; */ |
| 104 [+] | summ |
| 107 [+] | save "${madedat}\CPS_all_micro" , |
/* save 2012 analysis data file for Tables 1 and 2 */ |
|
| 110 [+] | keep if year == 2012 |
| 111 [+] | summ |
| 112 [+] | save "${madedat}\CPS_2012_micro" , |
/* now make a medium subsample */ |
|
| 115 [+] | set seed 10101 |
| 116 [+] | qui keep if uniform() < 0.20 |
| 117 [+] | save "${madedat}\CPS_2012_micro_medium" , |
/* now make a small subsample. 15% of 20% is 3% of original */ |
|
| 120 [+] | set seed 10102 |
| 121 [+] | qui keep if uniform() < 0.15 |
| 122 [+] | save "${madedat}\CPS_2012_micro_small" , |
/* get state-year panel ready */ |
|
| 127 [+] | use "${madedat}\CPS_all_micro" , |
/* partial out differences in demographics across state years. So, get regression on */ |
|
| 130 [+] | egen styr = group(statefip year) |
| 131 [+] | areg lnwage age age2 sex yrseduc [pw=wtsupp] , |
| 132 [+] | predict lnwage_sy , |
| 135 [+] | collapse (mean) lnwage lnwage_sy (rawsum) wtsupp [pw=wtsupp] , |
| 136 [+] | rename wtsupp popweight |
| 137 [+] | summ |
| 138 [+] | save "${madedat}\CPS_panel" , |
#delimit ;
cap log close ;
log using makedata.log, text replace ;
clear ;
set matsize 2000 ;
do set_directory_macros ;
use "${rawdat}\cps_00003" , replace ;
summ ;
/* states uniquely defined only from 1977 on */
keep if year >= 1977 ;
/* drop missing values */
foreach vv in educ age sex year state incwage { ;
di "`vv'" ;
drop if `vv' == . ;
} ;
/* drop if earnings allocated */
drop if qincwage ~= 0 ;
/* turn education into years */
gen yrseduc = . ;
replace yrseduc = 0 if educ == 2 ;
replace yrseduc = 3 if educ == 10 ;
replace yrseduc = 1 if educ == 11 ;
replace yrseduc = 2 if educ == 12 ;
replace yrseduc = 3 if educ == 13 ;
replace yrseduc = 4 if educ == 14 ;
replace yrseduc = 6 if educ == 20 ;
replace yrseduc = 5 if educ == 21 ;
replace yrseduc = 6 if educ == 22 ;
replace yrseduc = 8 if educ == 30 ;
replace yrseduc = 7 if educ == 31 ;
replace yrseduc = 8 if educ == 32 ;
replace yrseduc = 9 if educ == 40 ;
replace yrseduc = 10 if educ == 50 ;
replace yrseduc = 11 if educ == 60 ;
replace yrseduc = 11 if educ == 71 ;
replace yrseduc = 12 if educ == 72 ;
replace yrseduc = 12 if educ == 73 ;
replace yrseduc = 13 if educ == 80 ;
replace yrseduc = 14 if educ == 81 ;
replace yrseduc = 14 if educ == 90 ;
replace yrseduc = 14 if educ == 91 ;
replace yrseduc = 14 if educ == 92 ;
replace yrseduc = 15 if educ == 100 ;
replace yrseduc = 16 if educ == 110 ;
replace yrseduc = 16 if educ == 111 ;
replace yrseduc = 16 if educ == 121 ;
replace yrseduc = 16 if educ == 122 ;
replace yrseduc = 18 if educ == 123 ;
replace yrseduc = 19 if educ == 124 ;
replace yrseduc = 20 if educ == 125 ;
drop if yrseduc == 999 ;
/* keep workers with sufficient labor force attachment */
keep if WKSWORK2 >= 4 ; /* 40 or more weeks worked in last year */
keep if uhrswork >= 30 ; /* 30 or more usual hours per week worked in last year */
drop if incwage == 0 ;
/* generate ln(earnings/hour) */
gen numweeks = 43.5 if WKSWORK2 == 4 ;
replace numweeks = 48.5 if WKSWORK2 == 5 ;
replace numweeks = 50 if WKSWORK2 == 6 ;
gen annual_hours = numweeks * uhrswork ;
gen incwage1999 = incwage * CPI99 ;
gen wage_per_hour = incwage1999 / annual_hours ;
summ wage_per_hour , det ;
drop if wage_per_hour < 2 | wage_per_hour > 100 ;
gen lnwage = ln(wage_per_hour) ;
*graph twoway hist lnwage ;
gen age2 = age*age ;
/* keep only variables of interest */
keep year statefip wtsupp age age2 sex yrseduc lnwage wage_per_hour ;
/* Feb 19, 2014: doug thinks these two lines can be cut:
reg lnwage age age2 sex yrseduc i.year i.statefip ;
predict resid , resid ;
*/
summ ;
save "${madedat}\CPS_all_micro" , replace ;
/* save 2012 analysis data file for Tables 1 and 2 */
keep if year == 2012 ;
summ ;
save "${madedat}\CPS_2012_micro" , replace ;
/* now make a medium subsample */
set seed 10101 ;
qui keep if uniform() < 0.20 ;
save "${madedat}\CPS_2012_micro_medium" , replace ;
/* now make a small subsample. 15% of 20% is 3% of original */
set seed 10102 ;
qui keep if uniform() < 0.15 ;
save "${madedat}\CPS_2012_micro_small" , replace ;
/* get state-year panel ready */
use "${madedat}\CPS_all_micro" , replace ;
/* partial out differences in demographics across state years. So, get regression on */
egen styr = group(statefip year) ;
areg lnwage age age2 sex yrseduc [pw=wtsupp] , a(styr) ;
predict lnwage_sy , d ; /* uses the estimated value of the fixed effect for prediction, outcome_sy_hat */
collapse (mean) lnwage lnwage_sy (rawsum) wtsupp [pw=wtsupp] , by(statefip year) ;
rename wtsupp popweight ;
summ ;
save "${madedat}\CPS_panel" , replace ;