*descriptives.do * Stata do file for descriptives workshop * using HILDA data, but saving to a working file * recodes, tables, graphics * june 2011 cd "/Volumes/Work/Projects/2011/descriptives" use "/Volumes/Work/Data/HILDA9/Data/STATA 90c/Combined_i90c.dta", clear *recode required vars recode ihhwtrps (min/0=.), copyrest gen(psampwt) la var psampwt "HILDA person sample size weight" gen mywt = 1 la var mywt "Blank weight" *household category for comparisons recode ihhfty /// (4/8 = 1) (13/17 = 2) (nonm = 3), /// gen(hhtype) la var hhtype "Household type (reduced)" la def hhtype 1 "Couple with dep children" 2 "Lone parent with dep children" 3 "Other" la val hhtype hhtype ta ihhfty hhtype *income measures *complete household income gen hhinc = ihifefp /1000 la var hhinc "Annual household gross income (thousands)" *isolate hhs with single earner recode iwsfei (0 = 0) (nonm=1), gen(has_iwsfei) bys ihhrhid: egen tot_has_iwsfei = sum(has_iwsfei) list ihhrhid iwsfei has_iwsfei tot_has_iwsfei hhinc hhtype in 1/20 la var has_iwsfei "Flag for persons in hh where wage & salary earning" la var tot_has_iwsfei "Total num persons in hh where wage & salary earning" gen s_hhinc = cond(tot_has_iwsfei==1, hhinc, 0) recode s_hhinc (0=.) la var s_hhinc "Household income where single earner" sum s_hhinc, d list ihhrhid iwsfei has_iwsfei tot_has_iwsfei hhinc s_hhinc hhtype in 1/20 *categorical var with grouped hhold income capture drop s_hhinc_grp egen s_hhinc_grp = cut(s_hhinc), at(0, 20, 30, 40, 50, 60, 70, 80, 90, 100, 125, 150, 200, 999) la var s_hhinc_grp "Household income where single earner (grouped)" ta s_hhinc_grp #delimit ; la def s_hhinc_grp 0 "Under 20,000" 20 "20,000--29,999" 30 "30,000--39,999" 40 "40,000--49,999" 50 "50,000--59,999" 60 "60,000--69,999" 70 "70,000--79,999" 80 "80,000--89,999" 90 "90,000--99,999" 100 "100,000--124,999" 125 "125,000--149,999" 150 "150,000--199,999" 200 "200,000 and over", modify; #delimit cr la val s_hhinc_grp s_hhinc_grp ta s_hhinc_grp *repeat but without labels (needed for graph) capture drop s_hhinc_grp2 egen s_hhinc_grp2 = cut(s_hhinc), at(0, 20, 30, 40, 50, 60, 70, 80, 90, 100, 125, 150, 200, 999) la var s_hhinc_grp2 "Household income where single earner (grouped)" ta s_hhinc_grp2 *various hh, demographic & lm characteristics recode ihgage (min/24=1) (25/34=2) (35/44=3) (45/54=4) (55/64=5) (65/max=6), gen(agegrp) la var agegrp "Age group" la def agegrp 1 "Under 25" 2 "25 to 34" 3 "35 to 44" 4 "45 to 54" 5 "55 to 64" 6 "65 and older", modify la val agegrp agegrp ta ihgage agegrp gen sex = ihgsex la var sex "Sex" la def sex 1 "Male" 2 "Female" la val sex sex recode ijbmcnt (min/0=.) (3=1) (2=2) (1 8=3), gen(empstat) la var empstat "Employment status" la def empstat 1 "Permanent" 2 "Casual" 3 "Temporary & other", modify la val empstat empstat gen s_hhtype = cond(tot_has_iwsfei==1, 1, 2) la var s_hhtype "Households with single earner" la def s_hhtype 1 "Single earner" 2 "Other" la val s_hhtype s_hhtype recode iesdtl (min/0=.), copyrest gen(lfstat) la var lfstat "Labour market status" la def lfstat /// 1 "Emp full-time" /// 2 "Emp part-time" /// 3 "Unemp, look FT" /// 4 "Unemp, look PT" /// 5 "NILF, marg attach" /// 6 "NILF, not marg" /// 7 "Other", modify la val lfstat lfstat keep psampwt - lfstat save mydata, replace *tables and graphs for presentation use mydata, clear log using "report/t_summary.smcl", smcl replace sum hhinc, d log close *switch off following after first run *translate "report/t_summary.smcl" "report/t_summary.pdf" tabstat hhinc, by(hhtype) format(%9.0fc) stats(mean median iqr skewness kurtosis n) *latabstat hhinc, by(hhtype) format(%9.0fc) stats(mean median iqr sk k n) /// * labelwidth(50) caption(Household income) tf(report/t_tabstat1) replace *histograms for full hh income histogram hhinc if hhinc < 300 & hhtype != 3, bin(10) percent by(hhtype) graph export "report/g_histogram1.pdf", replace histogram hhinc if hhinc < 300 & hhtype != 3, bin(20) percent by(hhtype) graph export "report/g_histogram2.pdf", replace *kernel density graphs for full hh income twoway (kdensity hhinc if hhinc <= 300 & hhtype ==1) (kdensity hhinc if hhinc <= 300 & hhtype ==2), /// legend(label(1 "Couple with dep children") label(2 "Sole parent with dep children")) /// xtitle(Annual hh gross income (thousands)) ytitle(Density) xlabel(#10) graph export "report/g_density1.pdf", replace twoway (kdensity hhinc if hhinc <= 300 & hhtype ==1) (kdensity hhinc if hhinc <= 300 & hhtype ==2), /// legend(label(1 "Couple with dep children") label(2 "Sole parent with dep children")) /// xtitle(Annual hh gross income (thousands)) ytitle(Density) xlabel(#10) /// xline(101, lcolor(green) lwidth(1)) graph export "report/g_density2.pdf", replace twoway (kdensity hhinc if hhinc <= 300 & hhtype ==1) /// (kdensity hhinc if hhinc <= 300 & hhtype ==2), /// legend(label(1 "Couple with dep children") label(2 "Sole parent with dep children")) /// xtitle(Annual hh gross income (thousands)) ytitle(Density) xlabel(#10) /// xline(101, lcolor(green) lwidth(1)) xline(127, lcolor(green) lpattern(longdash)) graph export "report/g_density3.pdf", replace twoway (kdensity hhinc if hhinc <= 300 & hhtype ==1) /// (kdensity hhinc if hhinc <= 300 & hhtype ==2), /// legend(label(1 "Couple with dep children") label(2 "Sole parent with dep children")) /// xtitle(Annual hh gross income (thousands)) ytitle(Density) xlabel(#10) /// xline(101, lcolor(green) lwidth(1)) xline(127, lcolor(green) lpattern(longdash)) /// xline(71, lcolor(green) lpattern(shortdash)) graph export "report/g_density4.pdf", replace tabstat s_hhinc, by(hhtype) format(%9.0fc) stats(mean median iqr sk k n) *latabstat s_hhinc, by(hhtype) format(%9.0fc) stats(mean median iqr sk k n) /// * labelwidth(50) caption(Household income) tf(report/t_s_tabstat1) replace *histograms for subset hh income variable histogram s_hhinc if s_hhinc < 300 & hhtype != 3, bin(10) percent by(hhtype) graph export "report/g_s_histogram1.pdf", replace histogram s_hhinc if s_hhinc < 300 & hhtype != 3, bin(20) percent by(hhtype) graph export "report/g_s_histogram2.pdf", replace *kernel density graphs for subset hh income variable twoway (kdensity s_hhinc if s_hhinc <= 300 & hhtype ==1) (kdensity s_hhinc if s_hhinc <= 300 & hhtype ==2), /// legend(label(1 "Couple with dep children") label(2 "Sole parent with dep children")) /// xtitle(Annual hh gross income (thousands)) ytitle(Density) xlabel(#10) graph export "report/g_s_density1.pdf", replace twoway (kdensity s_hhinc if s_hhinc <= 300 & hhtype ==1) (kdensity s_hhinc if s_hhinc <= 300 & hhtype ==2), /// legend(label(1 "Couple with dep children") label(2 "Sole parent with dep children")) /// xtitle(Annual hh gross income (thousands)) ytitle(Density) xlabel(#10) /// xline(81, lcolor(green) lwidth(1)) graph export "report/g_s_density2.pdf", replace twoway (kdensity s_hhinc if s_hhinc <= 300 & hhtype ==1) (kdensity s_hhinc if s_hhinc <= 300 & hhtype ==2), /// legend(label(1 "Couple with dep children") label(2 "Sole parent with dep children")) /// xtitle(Annual hh gross income (thousands)) ytitle(Density) xlabel(#10) /// xline(81, lcolor(green) lwidth(1)) xline(101, lcolor(green) lpattern(longdash)) graph export "report/g_s_density3.pdf", replace twoway (kdensity s_hhinc if s_hhinc <= 300 & hhtype ==1) (kdensity s_hhinc if s_hhinc <= 300 & hhtype ==2), /// legend(label(1 "Couple with dep children") label(2 "Sole parent with dep children")) /// xtitle(Annual hh gross income (thousands)) ytitle(Density) xlabel(#10) /// xline(81, lcolor(green) lwidth(1)) xline(101, lcolor(green) lpattern(longdash)) /// xline(68, lcolor(green) lpattern(shortdash)) graph export "report/g_s_density4.pdf", replace twoway (kdensity hhinc if hhinc <= 300 & hhtype ==1) (kdensity hhinc if hhinc <= 300 & hhtype ==2) /// (kdensity s_hhinc if s_hhinc <= 300 & hhtype ==1) (kdensity s_hhinc if s_hhinc <= 300 & hhtype ==2), /// legend(label(1 "Couple (all hhs)") label(2 "Sole parent (all hhs)") /// label(3 "Couple (comparable hhs)") label(4 "Sole parent (comparable hhs)")) /// xtitle(Annual hh gross income (thousands)) ytitle(Density) xlabel(#10) graph export "report/g_density5.pdf", replace *confidence intervals bys hhtype: ci s_hhinc bys hhtype: ci s_hhinc, level(90) *illustrate difference with weighting svyset [pw=mywt] tabout hhtype using "report/t_ci.tex", c(mean s_hhinc ci) svy sum style(tex) cisep(-) replace f(0) svyset [pw=psampwt] tabout hhtype using "report/t_ci_wts.tex", c(mean s_hhinc ci) svy sum style(tex) cisep(-) replace f(0) *illustrate difference with row and col %s tabout agegrp hhtype using "report/t_agegrp_row.tex", c(row) replace /// npos(col) style(tex) h1(nil) h2(nil) tabout agegrp hhtype using "report/t_agegrp_col.tex", c(col) replace /// npos(row) style(tex) h1(nil) h2(nil) *cumulative hh income example tabout s_hhinc_grp hhtype [iw=psampwt] using "report/t_incgrp.tex", c(col cum) replace /// style(tex) h1(nil) h2(nil) h3(nil) /// f(1) lines(none) tabout s_hhinc_grp2 hhtype [iw=psampwt] using "incgrp.txt", c(cum) replace /// h1(nil) h2(nil) h3(nil) lines(none) ptotal(none) *send table out, then bring back for graphing save mydata, replace clear insheet using incgrp.txt la var v1 "Cumulative hhold income ('000s)" la var v2 "Couple with dep children" la var v3 "Sole parent with dep children" twoway (line v2 v1) (line v3 v1), ytitle(Percentage) graph export "report/g_cumulative.pdf", replace use mydata, clear *survey set for lf comparison tables svyset [pw=psampwt] tabout lfstat hhtype using "report/t_lfstat_ci.tex", c(col ci) replace /// style(tex) h1(nil) h2(nil) h3(nil) npos(row) /// svy percent lay(row) clab(_ _) lines(none) cisep(--) f(0) *subset of those under 35 tabout lfstat hhtype if agegrp <=2 using "report/t_lfstat_ci_subset.tex", c(col ci) replace /// style(tex) h1(nil) h2(nil) h3(nil) npos(row ) /// svy percent lay(row) clab(_ _) lines(none) cisep(--) f(0) tabout lfstat hhtype if agegrp <=2 using "report/t_lfstat_se_subset.tex", c(col se) replace /// style(tex) h1(nil) h2(nil) h3(nil) /// svy percent lines(none) *calculate the SE of the difference di 1.96 * ( sqrt(1.1 ^2 + 2.9 ^2) )