/* This do file reads in a STATA dataset (which you can download from the ipums website and which you should call cps_ipums), replaces top coded observations, and produces most of the variables described in Appendix A of Heathcote et al. (2010). From here, you should be able to (at least roughly) reproduce the CPS-related figures in that paper. Note: I couldn't figure out why, but the private transfers according to my calculations are somewhat lower than those in Heathcote et al. For this reason, for all household earning measures that incorporate private transfers, the 25th percentile, 10th percentile, and 5th percentile measures will be lower--and the variance of the log will be higher--compared to what is in the Heathcote et al. paper. The trends in the income variables' inequality measures will be as in the paper, though. */ set more off local read_data=0 /* The first time you run this do file, this local macro variable should be equal to 1. The first part of the code, which runs only when variable is set equal to 1, takes awhile to run; it reads the data and imputes the top-coded income variables. After you run this part of the code once, you can set read_data to be equal to 0. */ if `read_data'==1 { use cps_ipums, replace rename *, lower foreach var of varlist ftotval inctot { replace `var'=0 if `var'==99999999 | `var'== 99999998 } foreach var of varlist incwage incbus incfarm oincwage { replace `var'=0 if `var'==9999999 | `var'== 9999998 gen top`var'=. gen top`var'_v2=999997 } foreach var of varlist incretir incsurv incdisab incdivid oincbus oincfarm inclongj { replace `var'=0 if `var'==999999 gen top`var'=. gen top`var'_v2=99997 } foreach var of varlist incss incssi incwelfr incgov incidr incdrt incint incunemp incwkcom incvet incrent inceduc incchild incalim incasist incother { replace `var'=0 if `var'==99999 gen top`var'=. gen top`var'_v2=99997 } foreach var of varlist incdisa1 incdisa2 increti1 increti2 incsurv1 incsurv2 incaloth { replace `var'=0 if `var'==99999 gen top`var'=. gen top`var'_v2=99997 } cap drop *swap /* See https://cps.ipums.org/cps/topcodes_tables.shtml for the values that are used below. */ replace topinclongj=99999 if year>=1988 & year<=1995 replace topincalim=99999 if year>=1988 & year<=1998 replace topincasist=99999 if year>=1988 & year<=1998 replace topoincbus=99999 if year>=1988 & year<=1995 replace topincchild=99999 if year>=1988 & year<=1998 replace topincdisa1=99999 if year>=1988 & year<=1998 replace topincdisa2=99999 if year>=1988 & year<=1998 replace topincdivid=99999 if year>=1988 & year<=1998 replace topinceduc=99999 if year>=1988 & year<=1998 replace topoincfarm=99999 if year>=1988 & year<=1995 replace topincint=99999 if year>=1988 & year<=1998 replace topincother=99999 if year>=1988 & year<=1998 replace topincrent=99999 if year>=1988 & year<=1998 replace topincreti1=99999 if year>=1988 & year<=1998 replace topincreti2=99999 if year>=1988 & year<=1998 replace topincss=9999 if year>=1976 & year<=1981 replace topincssi=5999 if year>=1976 & year<=1984 replace topincsurv1=99999 if year>=1988 & year<=1998 replace topincsurv2=99999 if year>=1988 & year<=1998 replace topincunemp=99999 if year>=1988 & year<=2010 replace topincvet=29999 if year>=1988 & year<=1993 replace topoincwage=99999 if year>=1988 & year<=1995 replace topincwelfr=19999 if year>=1988 & year<=1993 replace topincwkcom=99999 if year>=1988 & year<=2010 replace topincaloth=50000 if year>=1968 & year<=1981 replace topincbus=50000 if year>=1968 & year<=1981 replace topincdrt=50000 if year>=1968 & year<=1981 replace topincgov=29999 if year>=1968 & year<=1987 replace topinclongj=150000 if year>=1996 & year<=2002 replace topincalim=50000 if year==1999 replace topincasist=30000 if year>=1999 & year<=2010 replace topoincbus=40000 if year>=1996 & year<=2002 replace topincchild=15000 if year>=1999 & year<=2010 replace topincdisa1=35000 if year>=1999 & year<=2010 replace topincdisa2=35000 if year>=1999 & year<=2010 replace topincdivid=15000 if year>=1999 & year<=2010 replace topinceduc=20000 if year>=1999 & year<=2010 replace topoincfarm=25000 if year>=1996 & year<=2010 replace topincint=15000 if year>=1999 & year<=2010 replace topincother=25000 if year>=1999 & year<=2010 replace topincrent=25000 if year>=1999 & year<=2002 replace topincreti1=45000 if year>=1999 & year<=2010 replace topincreti2=45000 if year>=1999 & year<=2010 replace topincss=19999 if year>=1982 & year<=1987 replace topincssi=9999 if year>=1985 & year<=1995 replace topincsurv1=50000 if year>=1999 & year<=2010 replace topincsurv2=50000 if year>=1999 & year<=2010 replace topincvet=99999 if year>=1988 & year<=2010 replace topoincwage=25000 if year>=1996 & year<=2002 replace topincwage=50000 if year>=1968 & year<=1981 replace topincwelfr=25000 if year>=1994 & year<=2010 replace topincaloth=75000 if year>=1982 & year<=1984 replace topincbus=75000 if year>=1982 & year<=1984 replace topincdrt=75000 if year>=1982 & year<=1984 replace topincfarm=50000 if year>=1976 & year<=1981 replace topincretir=50000 if year>=1976 & year<=1981 replace topinclongj=200000 if year>=2003 & year<=2010 replace topincalim=40000 if year>=2000 & year<=2002 replace topoincbus=50000 if year>=2003 & year<=2010 replace topincrent=40000 if year>=2003 & year<=2010 replace topincss=29999 if year>=1988 & year<=1993 replace topincssi=25000 if year>=1996 & year<=2010 replace topoincwage=35000 if year>=2003 & year<=2010 replace topincwage=75000 if year>=1982 & year<=1984 replace topincaloth=99999 if year>=1985 & year<=1987 replace topincbus=99999 if year>=1985 & year<=1987 replace topincdrt=99999 if year>=1985 & year<=1987 replace topincfarm=75000 if year>=1982 & year<=1984 replace topincretir=75000 if year>=1982 & year<=1984 replace topincalim=45000 if year>=2003 & year<=2010 replace topincss=49999 if year>=1994 & year<=2010 replace topincwage=99999 if year>=1985 & year<=1987 replace topincwage=199998 if year>=1988 & year<=1995 replace topincfarm=99999 if year>=1985 & year<=1987 replace topincretir=99999 if year>=1985 & year<=1987 /* In this for loop, we replace each the observations of each top-coded income variable with an estimate of what the censored income would have been without the top coding */ foreach incvar of varlist incwage incbus incfarm oincwage incretir incsurv incdisab incdivid /* */ oincbus oincfarm inclongj incss incssi incwelfr incgov incidr incdrt incint incunemp /* */ incwkcom incvet incrent inceduc incchild incalim incasist incother incdisa1 incdisa2 /* */ increti1 increti2 incsurv1 incsurv2 incaloth { quietly { /* Quietly means that this part of the code is not outputted */ sort year `incvar' by year : gen culm_weight=sum(wt) if `incvar'~=0 by year : egen total_weight=sum(wt) if `incvar'~=0 gen logv=log(1-culm_weight/total_weight) /* Compute the logarithm of 1-CDF of the income variable */ gen logy=log(`incvar') /* Beginning in 2011, the CPS uses a different way to preserve the confidentiality of survey respondents; no top-coding */ forvalues yr=1976/2010 { local factor=1 sum `incvar' if year==`yr' & `incvar'~=top`incvar' & `incvar'~=0 [aw=wt] , detail local x1=r(p90) count if year==`yr' & `incvar'~=. & `incvar'>`x1' & `incvar'~=`incvar'[_n+1] if r(N) > 5 { /* Run the regression only if there are sufficiently many observations */ reg logv logy if year==`yr' & `incvar'~=. & `incvar'>`x1' & `incvar'~=`incvar'[_n+1] local factor=(-1)*_b[logy]/(-1*_b[logy]-1) replace `incvar'=min(max(`factor',1),5)*`incvar' if year==`yr' & ((`incvar'>=top`incvar' & `incvar'~=.) | `incvar'==top`incvar'_v2) } } } drop logy logv culm_weight total_weight } drop top* compress save cps_ipums_topcode, replace } /* End of the "read_data" part of the code */ use cps_ipums_topcode, replace replace earnweek=0 if earnweek==9999 replace hhincome=0 if hhincome==99999999 | hhincome==-9999997 gen cps_head = (relate == 101) /* 101 is code for the head of the household */ bys serial year: egen cps_head_count = sum(cps_head) gen hours=uhrswork*wkswork1 gen wage=incwage/hours gen bad_ref_person = (cps_head_count ~= 1) drop cps_head_count gen working_age = (age >= 25 & age <= 60) gsort serial year -working_age sex -age gen zero_weeks = (incwage > 0 & incwage<. & wkswork2==0) by serial year: egen temp = sum(working_age) gen no_working_age = (temp==0) drop temp /* Mwage is the minimum wage; see http://www.dol.gov/whd/minwage/chart.htm for a sources for these values */ gen mwage=2.20 replace mwage=2.30 if year>=1977 replace mwage=2.65 if year>=1978 replace mwage=2.90 if year>=1979 replace mwage=3.10 if year>=1980 replace mwage=3.35 if year>=1981 replace mwage=3.80 if year>=1990 replace mwage=4.25 if year>=1991 replace mwage=4.75 if year>=1996 replace mwage=5.15 if year>=1997 replace mwage=5.85 if year>=2007 replace mwage=6.55 if year>=2008 replace mwage=7.25 if year>=2009 gen low_wage = (wage=260 & hrswork<99) | (hours>=260 & uhrswork<99))*(age>=25 & age<=60) foreach var of varlist ftotval inc* hhincome housret earnweek { replace `var'=`var'*cpi99 } /* See Appendix A of Heathcote for these formulas */ gen laborinc = incwage replace laborinc = inclongj*(srcearn==1)+oincwage if year>=1987 gen self = incbus + incfarm replace self= inclongj*(srcearn==2 | srcearn==3)+oincbus + oincfarm if year>=1987 gen private=incaloth + incretir replace incsurv1=0 if incsurv1==. & year==1988 replace incsurv2=0 if incsurv2==. & year==1988 replace private=incchild + incalim + incdisa1 + incdisa2 + incother+ increti1 + increti2 + incsurv1 + incsurv2 + incasist if year>=1987 gen earnings=laborinc+ 2/3 * self gen asset_inc= incdrt + incint replace asset_inc = incint + incdivid + incrent if year>=1987 gen transfers=incss+ incssi + incwelfr + incgov replace transfers=incvet+ inceduc+ incss+ incssi + incwelfr +incdisab + incwkcom if year>=1987 gen earnings_plus=earnings+private gen pre_gov=earnings_plus + asset_inc gen pre_tax=pre_gov + transfers foreach var of varlist laborinc-pre_tax { gen l`var'=log(`var') } /* Equivalization */ gen byte adult = (age > 16) gen byte child = (adult == 0) by serial year: egen adult_count = sum(adult) by serial year: egen child_count = sum(child) gen people = 1 + (adult_count-1)*.7 + .5*child_count if adult_count > 0 replace people = child_count*.5 if adult_count == 0 gsort serial year -earnings by serial year: gen head = (_n==1) gen temp_head_earnings = (head==1)*earnings by serial year: egen head_earnings=max(temp_head_earnings) drop temp_head_earnings drop if head_earnings==0 foreach var of varlist earnings-pre_tax { bys serial year: egen `var'_HH=sum(`var'/people) gen l`var'_HH=log(`var'_HH) bys year: egen trim_temp = pctile(`var'_HH),p(.5) /* Trim the bottom half a percent; see beginning of Section 4.2 of the Heathcote paper */ replace `var'_HH = . if `var'_HH == 0 replace l`var'_HH = . if `var'<=trim_temp & trim_temp ~= . replace `var'_HH=. if `var'<=trim_temp & trim_temp ~= . drop trim_temp }