program define sub_dec /***********************************************************/ /* computes additive decompositions of MLD and THEIL by */ /* subgroup and appropriate bootstrap confidence intervals */ /* */ /* syntax: sub_dec */ /* where is a weight */ /* is income */ /* is the subgroup variable; */ /* subgroups are observations */ /* with same values of */ /* */ /* if no weights are needed, generate a variable with all */ /* values set to 1 and use it as weight variable */ /* */ /* Note: the bootstrap is a random process and may lead to */ /* missing values when statistics are calculated from the */ /* resample; this problem vanishes with increasing sample */ /* size */ /* */ /* for details see Biewen (2002): Bootstrap inference for */ /* inequality, mobility and poverty measurement, Journal */ /* of Econometrics, Vol. 108, pp. 317 – 342 */ /* */ /* copyright Martin Biewen 2001 */ /***********************************************************/ version 6 /* parse syntax */ syntax varlist(min=3 max=3) /* set number of bootstrap replications and confidence level */ tempname reps conf nobs scalar `reps' = 100 scalar `conf' = 0.95 scalar `nobs' = _N /* check if incomes > 0 */ quietly sum `2' if r(min) <= 0 { di in red _newline "error: incomes must be strictly positive" exit } /* compute estimates */ ind_dec `1' `2' `3' tempname nsubg scalar `nsubg' = r(nosubg) local i = 1 while `i' <= `nsubg' { tempname mldw`i' thlw`i' rmldw`i' rthlw`i' wmld`i' wthl`i' ps`i' is`i' scalar `mldw`i'' = r(mldw`i') scalar `thlw`i'' = r(thlw`i') scalar `rmldw`i'' = r(mldw`i')/r(mld) scalar `rthlw`i'' = r(thlw`i')/r(thl) scalar `wmld`i'' = r(wmld`i') scalar `wthl`i'' = r(wthl`i') scalar `ps`i'' = r(ps`i') scalar `is`i'' = r(is`i') local i = `i' + 1 } tempname mldb rmldb mld rthlb thlb thl scalar `mldb' = r(mldb) scalar `thlb' = r(thlb) scalar `rmldb' = r(mldb)/r(mld) scalar `rthlb' = r(thlb)/r(thl) scalar `mld' = r(mld) scalar `thl' = r(thl) /* preserve and initialize post */ preserve local i = 1 while `i' <= `nsubg' { tempname p`i' tempfile b`i' quietly postfile `p`i'' mw`i' tw`i' rmw`i' rtw`i' wm`i' wt`i' p`i' i`i' using `b`i'', replace local i = `i' + 1 } tempname p tempfile b quietly postfile `p' mb tb rmb rtb m t using `b', replace /* bootstrap loop */ local j = 1 while `j' <= `reps' { restore, preserve bsample ind_dec `1' `2' `3' local i = 1 while `i' <= `nsubg' { post `p`i'' sqrt(`nobs') * (r(mldw`i') - `mldw`i'') /* */ sqrt(`nobs') * (r(thlw`i') - `thlw`i'') /* */ sqrt(`nobs') * (r(mldw`i')/r(mld) - `rmldw`i'') /* */ sqrt(`nobs') * (r(thlw`i')/r(thl) - `rthlw`i'')/* */ sqrt(`nobs') * (r(wmld`i') - `wmld`i'') /* */ sqrt(`nobs') * (r(wthl`i') - `wthl`i'') /* */ sqrt(`nobs') * (r(ps`i') - `ps`i'') /* */ sqrt(`nobs') * (r(is`i') - `is`i'') local i = `i' + 1 } post `p' sqrt(`nobs') * (r(mldb) - `mldb') /* */ sqrt(`nobs') * (r(thlb) - `thlb') /* */ sqrt(`nobs') * (r(mldb)/r(mld) - `rmldb') /* */ sqrt(`nobs') * (r(thlb)/r(thl) - `rthlb') /* */ sqrt(`nobs') * (r(mld) - `mld') /* */ sqrt(`nobs') * (r(thl) - `thl') local j = `j' + 1 } local i = 1 while `i' <= `nsubg' { postclose `p`i'' local i = `i' + 1 } postclose `p' /* generate lower and upper confidence bounds */ /* for each subgroup */ local i = 1 while `i' <= `nsubg' { use `b`i'', clear sort mw`i' tempname lmw`i' umw`i' scalar `lmw`i'' = `mldw`i'' - mw`i'[`reps' - int((-(`conf'/2) + 0.5) * `reps')] / sqrt(`nobs') scalar `umw`i'' = `mldw`i'' - mw`i'[int((-(`conf'/2) + 0.5) * `reps')] / sqrt(`nobs') sort tw`i' tempname ltw`i' utw`i' scalar `ltw`i'' = `thlw`i'' - tw`i'[`reps' - int((-(`conf'/2) + 0.5) * `reps')] / sqrt(`nobs') scalar `utw`i'' = `thlw`i'' - tw`i'[int((-(`conf'/2) + 0.5) * `reps')] / sqrt(`nobs') sort rmw`i' tempname lrmw`i' urmw`i' scalar `lrmw`i'' = `rmldw`i'' - rmw`i'[`reps' - int((-(`conf'/2) + 0.5) * `reps')] / sqrt(`nobs') scalar `urmw`i'' = `rmldw`i'' - rmw`i'[int((-(`conf'/2) + 0.5) * `reps')] / sqrt(`nobs') sort rtw`i' tempname lrtw`i' urtw`i' scalar `lrtw`i'' = `rthlw`i'' - rtw`i'[`reps' - int((-(`conf'/2) + 0.5) * `reps')] / sqrt(`nobs') scalar `urtw`i'' = `rthlw`i'' - rtw`i'[int((-(`conf'/2) + 0.5) * `reps')] / sqrt(`nobs') sort wm`i' tempname lwm`i' uwm`i' scalar `lwm`i'' = `wmld`i'' - wm`i'[`reps' - int((-(`conf'/2) + 0.5) * `reps')] / sqrt(`nobs') scalar `uwm`i'' = `wmld`i'' - wm`i'[int((-(`conf'/2) + 0.5) * `reps')] / sqrt(`nobs') sort wt`i' tempname lwt`i' uwt`i' scalar `lwt`i'' = `wthl`i'' - wt`i'[`reps' - int((-(`conf'/2) + 0.5) * `reps')] / sqrt(`nobs') scalar `uwt`i'' = `wthl`i'' - wt`i'[int((-(`conf'/2) + 0.5) * `reps')] / sqrt(`nobs') sort p`i' tempname lp`i' up`i' scalar `lp`i'' = `ps`i'' - p`i'[`reps' - int((-(`conf'/2) + 0.5) * `reps')] / sqrt(`nobs') scalar `up`i'' = `ps`i'' - p`i'[int((-(`conf'/2) + 0.5) * `reps')] / sqrt(`nobs') sort i`i' tempname li`i' ui`i' scalar `li`i'' = `is`i'' - i`i'[`reps' - int((-(`conf'/2) + 0.5) * `reps')] / sqrt(`nobs') scalar `ui`i'' = `is`i'' - i`i'[int((-(`conf'/2) + 0.5) * `reps')] / sqrt(`nobs') local i = `i' + 1 } /* for the other statistics */ use `b', clear sort mb tempname lmb umb scalar `lmb' = `mldb' - mb[`reps' - int((-(`conf'/2) + 0.5) * `reps')] / sqrt(`nobs') scalar `umb' = `mldb' - mb[int((-(`conf'/2) + 0.5) * `reps')] / sqrt(`nobs') sort tb tempname ltb utb scalar `ltb' = `thlb' - tb[`reps' - int((-(`conf'/2) + 0.5) * `reps')] / sqrt(`nobs') scalar `utb' = `thlb' - tb[int((-(`conf'/2) + 0.5) * `reps')] / sqrt(`nobs') sort rmb tempname lrmb urmb scalar `lrmb' = `rmldb' - rmb[`reps' - int((-(`conf'/2) + 0.5) * `reps')] / sqrt(`nobs') scalar `urmb' = `rmldb' - rmb[int((-(`conf'/2) + 0.5) * `reps')] / sqrt(`nobs') sort rtb tempname lrtb urtb scalar `lrtb' = `rthlb' - rtb[`reps' - int((-(`conf'/2) + 0.5) * `reps')] / sqrt(`nobs') scalar `urtb' = `rthlb' - rtb[int((-(`conf'/2) + 0.5) * `reps')] / sqrt(`nobs') sort m tempname lm um scalar `lm' = `mld' - m[`reps' - int((-(`conf'/2) + 0.5) * `reps')] / sqrt(`nobs') scalar `um' = `mld' - m[int((-(`conf'/2) + 0.5) * `reps')] / sqrt(`nobs') sort t tempname lt ut scalar `lt' = `thl' - t[`reps' - int((-(`conf'/2) + 0.5) * `reps')] / sqrt(`nobs') scalar `ut' = `thl' - t[int((-(`conf'/2) + 0.5) * `reps')] / sqrt(`nobs') /* print out results */ di _newline di "Subgroup decomposition with bootstrap confidence intervals" di "----------------------------------------------------------" di "Confidence level = " %3.2f `conf' ", " %4.0f `reps' " bootstrap replications di "----------------------------------------------------------" di _newline di "Contributions to overall inequality measured by MLD" di "---------------------------------------------------" di _newline di "Total" di "-----" di "MLD" _col(13) " = " in green %7.6f `mld' in yellow ", confidence interval " in green "[" %7.6f `lm' " ; " %7.6f `um' "]" di _newline di "Within-group: absolute contributions" di "------------------------------------" local i = 1 while `i' <= `nsubg' { di "subgroup `i'" _col(13) " = " in green %7.6f `mldw`i'' in yellow ", confidence interval " in green "[" %-7.6f `lmw`i'' " ; " %-7.6f `umw`i'' "]" local i = `i' + 1 } di _newline di "Within-group: relative contributions" di "------------------------------------" local i = 1 while `i' <= `nsubg' { di "subgroup `i'" _col(13) " = " in green %7.6f `rmldw`i'' in yellow ", confidence interval " in green "[" %-7.6f `lrmw`i'' " ; " %-7.6f `urmw`i'' "]" local i = `i' + 1 } di _newline di "Between-group: absolute contribution" di "------------------------------------" di "MLDB" _col(13) " = " in green %7.6f `mldb' in yellow ", confidence interval " in green "[" %7.6f `lmb' " ; " %7.6f `umb' "]" di _newline di "Between-group: relative contribution" di "------------------------------------" di "MLDB" _col(13) " = " in green %7.6f `rmldb' in yellow ", confidence interval " in green "[" %7.6f `lrmb' " ; " %7.6f `urmb' "]" di _newline di "Within-group MLD" di "-----------------------" local i = 1 while `i' <= `nsubg' { di "subgroup `i'" _col(13) " = " in green %7.6f `wmld`i'' in yellow ", confidence interval " in green "[" %-7.6f `lwm`i'' " ; " %-7.6f `uwm`i'' "]" local i = `i' + 1 } di _newline di "Population shares" di "-----------------" local i = 1 while `i' <= `nsubg' { di "subgroup `i'" _col(13) " = " in green %7.6f `ps`i'' in yellow ", confidence interval " in green "[" %-7.6f `lp`i'' " ; " %-7.6f `up`i'' "]" local i = `i' + 1 } di _newline di _newline di "Contributions to overall inequality measured by THEIL" di "-----------------------------------------------------" di _newline di "Total" di "-----" di "THEIL" _col(13) " = " in green %7.6f `thl' in yellow ", confidence interval " in green "[" %7.6f `lt' " ; " %7.6f `ut' "]" di _newline di "Within-group: absolute contributions" di "------------------------------------" local i = 1 while `i' <= `nsubg' { di "subgroup `i'" _col(13) " = " in green %7.6f `thlw`i'' in yellow ", confidence interval " in green "[" %-7.6f `ltw`i'' " ; " %-7.6f `utw`i'' "]" local i = `i' + 1 } di _newline di "Within-group: relative contributions" di "------------------------------------" local i = 1 while `i' <= `nsubg' { di "subgroup `i'" _col(13) " = " in green %7.6f `rthlw`i'' in yellow ", confidence interval " in green "[" %-7.6f `lrtw`i'' " ; " %-7.6f `urtw`i'' "]" local i = `i' + 1 } di _newline di "Between-group: absolute contribution" di "------------------------------------" di "THEILB" _col(13) " = " in green %7.6f `thlb' in yellow ", confidence interval " in green "[" %7.6f `ltb' " ; " %7.6f `utb' "]" di _newline di "Between-group: relative contribution" di "------------------------------------" di "THEILB" _col(13) " = " in green %7.6f `rthlb' in yellow ", confidence interval " in green "[" %7.6f `lrtb' " ; " %7.6f `urtb' "]" di _newline di "Within-group THEIL" di "-----------------------" local i = 1 while `i' <= `nsubg' { di "subgroup `i'" _col(13) " = " in green %7.6f `wthl`i'' in yellow ", confidence interval " in green "[" %-7.6f `lwt`i'' " ; " %-7.6f `uwt`i'' "]" local i = `i' + 1 } di _newline di "Income shares" di "-------------" local i = 1 while `i' <= `nsubg' { di "subgroup `i'" _col(13) " = " in green %7.6f `is`i'' in yellow ", confidence interval " in green "[" %-7.6f `li`i'' " ; " %-7.6f `ui`i'' "]" local i = `i' + 1 } di _newline restore end program define ind_dec, rclass /* returns r(mldw1), r(mldw2),..., r(mldb), r(mld) */ /* and r(thlw1), r(thlw2),..., r(thlb), r(thl)*/ quietly { /* initialize mld, thl */ tempname mld thl scalar `mld' = 0 scalar `thl' = 0 /* generate subgroup dummies */ tempvar grpvar flag subg gen `grpvar' = `3' sort `grpvar' by `grpvar': generate byte `flag' = (_n==1) gen `subg' = sum(`flag') tempname nosubg /* number of subgroups */ scalar `nosubg' = `subg'[_N] return scalar nosubg = `nosubg' local i = 1 while `i' <= `nosubg' { tempvar d`i' generate `d`i'' = 0 replace `d`i'' = 1 if `subg' == `i' local i = `i' + 1 } /* calculate moments m0 m1 */ tempvar w x wx gen `w' = `1' gen `x' = `2' gen `wx' = `w' * `x' tempname m0 m1 sum `w' scalar `m0' = r(mean) sum `wx' scalar `m1' = r(mean) /* for each subgroup: calculate other moments */ local i = 1 while `i' <= `nosubg' { /* compute products */ tempvar d`i'w d`i'wx d`i'y d`i'z gen `d`i'w' = `d`i'' * `w' gen `d`i'wx' = `d`i'' * `w' * `x' gen `d`i'y' = `d`i'' * `w' * log(`x') gen `d`i'z' = `d`i'' * `w' * `x' * log(`x') /* calculate moments */ tempname md`i'w md`i'wx md`i'y md`i'z sum `d`i'w' scalar `md`i'w' = r(mean) sum `d`i'wx' scalar `md`i'wx' = r(mean) sum `d`i'y' scalar `md`i'y' = r(mean) sum `d`i'z' scalar `md`i'z' = r(mean) /* calculate subgroup contributions */ tempname mldw`i' thlw`i' wmld`i' wthl`i' ps`i' is`i' scalar `mldw`i'' = (`md`i'w'/`m0') * (-`md`i'y'/`md`i'w'+log(`md`i'wx'/`md`i'w')) scalar `thlw`i'' = (1/`m1') * (`md`i'z'-`md`i'wx'*log(`md`i'wx'/`md`i'w')) scalar `wmld`i'' = (-`md`i'y'/`md`i'w'+log(`md`i'wx'/`md`i'w')) scalar `wthl`i'' = (`md`i'z'/`md`i'wx'-log(`md`i'wx'/`md`i'w')) scalar `ps`i'' = (`md`i'w'/`m0') scalar `is`i'' = (`md`i'wx'/`m1') scalar `mld' = `mld' + `mldw`i'' scalar `thl' = `thl' + `thlw`i'' local i = `i' + 1 } /* end of subgroup loop */ /* calculate between-group contributions */ tempname mldb thlb scalar `mldb' = 0 scalar `thlb' = 0 local i = 1 while `i' <= `nosubg' { scalar `mldb' = `mldb' - log(`md`i'wx'*`m0'/(`m1'*`md`i'w'))*`md`i'w'/`m0' scalar `thlb' = `thlb' + (`md`i'wx'/`m1')*log(`md`i'wx'*`m0'/(`m1'*`md`i'w')) local i = `i' + 1 } /* complete calculation of overall inequality */ scalar `mld' = `mld' + `mldb' scalar `thl' = `thl' + `thlb' /* return results */ local i = 1 while `i' <= `nosubg' { return scalar mldw`i' = `mldw`i'' return scalar thlw`i' = `thlw`i'' return scalar wmld`i' = `wmld`i'' return scalar wthl`i' = `wthl`i'' return scalar ps`i' = `ps`i'' return scalar is`i' = `is`i'' local i = `i' + 1 } return scalar mldb = `mldb' return scalar thlb = `thlb' return scalar mld = `mld' return scalar thl = `thl' } /* end of quietly */ end