| ch2.0.do:28 ch2.0.do:29 | [4] | [+] lcensus2 | ||
| ch2.0.do:28 ch2.0.do:29 | [31] | census2c.dta | ||
| [143] | gdp4cty.dta | |||
| ch2.0.do:28 ch2.0.do:29 | [155] | census2c.dta | ||
| master.do:26 | ||||
| → | ch2.0.do | ↘ | ||
| census2c.dta | [29] | ch2.0.do:4 ch2.0.do:31 ch2.0.do:155 ch3.01.do:1 ch11.01.do:487 | ||
| census2c.dta | [28] | ch2.0.do:4 ch2.0.do:31 ch2.0.do:155 ch3.01.do:1 ch11.01.do:487 | ||
ch2.0.do open
file:script:do
* create extract for Chapter 2 revised
// use http://www.stata-press.com/data/r8/census2, clear
use census2, clear
keep if region < 3  // =="N Cntrl" | region == "NE"
foreach v of varlist pop* marriage divorce {
replace `v' = `v'/1000
format `v' %8.1f
}
rename popurban popurb
rename marriage marr
rename divorce divr
* encode region, gen(reg)
* drop region
label var state "State"
label var pop "1980 Population, '000"
label var popurb "1980 Urban population, '000"
label var medage "Median age, years"
label var drate "Death rate per 10,000"
label var marr "Marriages, '000"
label var divr "Divorces, '000"
* label var reg "Census region"
label data "1980 Census data for NE and NC states"
compress
keep state region pop popurb medage marr divr 
list, sep(0)
save census2c,replace
outsheet using census2c, replace
use census2c
list, sep(0)
describe
generate urbanized = popurb / pop
summarize urbanized
replace urbanized = 100 * urbanized
summarize urbanized
gsort region -pop
list region state pop, sepby(region)
sort pop
list state region pop in 1/5
list state region pop in -5/l
gsort -pop
list state region pop in 1/5
generate medagel = medage if pop > 5000
sort state
list state region pop medagel, sep(0)
summarize medagel 
summarize medage if pop > 5000
generate smallpop = 0
replace smallpop = 1 if pop <= 5000 & !missing(pop)
generate largepop = 0
replace largepop = 1 if pop > 5000 & !missing(pop) 
list state pop smallpop largepop, sep(0)
drop smallpop largepop
generate smallpop = (pop <= 5000)
generate largepop = (pop > 5000)
drop smallpop largepop
generate smallpop = (pop <= 5000) if !missing(pop)
generate largepop = (pop > 5000) if !missing(pop)
* summarize medage marr divr if smallpop
* summarize medage marr divr if largepop
summarize medage marr divr if region==1
summarize medage marr divr if region==2
sort region
by region: summarize medage marr divr
tabstat medage, by(region) statistics(N mean sd min max) 
generate popsize = smallpop + 2*largepop
bysort region popsize: summarize medage marr divr
describe region
label list cenreg
label variable urbanized "Population in urban areas, %"
label variable smallpop "States with <= 5 million pop, 1980"
label variable largepop "States with > 5 million pop, 1980"
label variable popsize "Population size code"
describe pop smallpop largepop popsize urbanized
label define popsize 1 "<= 5 million" 2 "> 5 million"
label values popsize popsize
describe popsize
bysort popsize: summarize medage
label data "1980 US Census data with population size indicators"
note: Subset of Census data, prepared on TS for Chapter 2 
note medagel: median age for large states only
note popsize: variable separating states by population size
note popsize: value label popsize defined for this variable
describe
notes
* generate netmarr = marr/divr
* summarize netmarr
generate netmarr2x = cond(marr/divr > 2.0, 1, 2)
label define netmarr2xc 1 "marr > 2 divr" 2 "marr <= 2 divr"
label values netmarr2x netmarr2xc
tabstat pop medage, by(netmarr2x)
generate medagebrack = recode(medage, 28, 29, 30, 31, 32, 33)
tabulate medagebrack
histogram medagebrack, discrete frequency ///
lcolor(black) fcolor(gs15) addlabels ///
addlabopts(mlabposition(6)) xtitle(Upper limits of median age) ///
title(Northeast and North Central States: Median Age) 
gsort region -pop
by region: generate totpop = sum(pop)
list region state pop totpop, sepby(region)
by region: list region totpop if _n == _N
by region: egen meanpop = mean(pop)
list region state pop meanpop, sepby(region)
generate popsize = smallpop + 2*largepop
label variable popsize "Population size code"
label define popsize 1 "<= 5 million" 2 "> 5 million", modify
label values popsize popsize
bysort region popsize: egen meanpop2 = mean(pop)
list region popsize state pop meanpop2, sepby(region popsize)
use gdp4cty, clear
local country US UK DE FR
local cc 1
foreach c of local country {
	generate double `c'gdp = gdp`cc'
	local ++cc
	}
foreach c in US UK DE FR  {
	generate double lngdp`c' = log(`c'gdp)
	summarize lngdp`c' 
}
use census2c, clear
summarize pop
return list
mean pop popurb
ereturn list
matrix list e(b)
matrix list e(V)