********************************************************************************
*																			   *
*                        	SCHOOLING EFFICIENCY							   *
*                       Sarah Ferber   -  12.04.2022                           *
*																			   *
********************************************************************************

clear all
set more off
set maxvar 10000
set segmentsize 64m

do "E:\Tübingen\01_Paper 1\01_Stata\00_Do Files\0_paths"
cd "$wd" 

use ssa_num_final2, clear
	
/// Generate Region variable
gen region = .
	replace region = 1 if inlist(country, 2, 4, 7, 8, 11, 12, 13, 16, 19, 24, 25, 26, 27, 33, 46, 47 )
	replace region = 2 if inlist(country, 5, 14, 17, 18, 20, 22, 29, 32, 35, 36, 37, 41, 44, 45, 48, 50, 51)
	replace region = 3 if inlist(country, 6, 10, 39, 40, 42, 43, 49, 52)
	replace region = 3 if inlist(country, 3, 15, 23, 28, 30)
	label define region_label 1 "Western Africa" 2 "Eastern Africa" 3 "Central Africa" 4 "Southern Africa"
	label values region region_label

/// Outcome Variable
sum abcc
	gen abcc_hdi6 = (abcc - r(min))/(r(max)-r(min))
gen edu_years6 = edu_years2
	replace edu_years6 = 6 if edu_years2>6 & edu_years2!=. 
gen edu_hdi6 = (edu_years6 - 0)/(6)
gen abcc_edu_ratio6 = abcc_hdi6 / edu_hdi6
gen ln_abcc_edu_ratio6 = ln(abcc_edu_ratio6)


/* 
* Multicollinearity check:

global gis_controls_check col_duration railway_dummy diamonds_num explorer_dummy ///
		nut_availmean  soil_workmean malaria_mean missions_dummy petrol_full_pc ///
		rugmean trade_dummy tsetse_mortisan ln_past_to_ag pop_density area ///
		tsetse_palpalis dist_capital_km dist_coast_km nut_retmean soil_toxmean


reg ln_abcc_edu_ratio6 height $socio_controls $gis_controls_check i.birth_decade i.region, cluster(adm1_code)
vif

// Exclude: nut_retmean soil_toxmean

*/

/// Controls
global gis_controls col_duration railway_dummy diamonds_num explorer_dummy ///
		nut_availmean soil_toxmean soil_workmean malaria_mean missions_dummy petrol_full_pc ///
		rugmean trade_dummy tsetse_mortisan ln_past_to_ag pop_density area ///
		tsetse_palpalis dist_capital_km dist_coast_km
		
		
global socio_controls age_cohab rel_isl rel_fract



/////////////// Main Tables ////////////////////

*** Table A.3 - Descriptives

sum abcc ln_abcc_edu_ratio6 edu_years height age_cohab rel_isl rel_fract
exit
*** Figure A.1 - Relationship ABCC and Education Years

gen edu_years_cat = .
	replace edu_years_cat = 0 if edu_years<2
	replace edu_years_cat = 1 if edu_years>=2 & edu_years<4
	replace edu_years_cat = 2 if edu_years>=4 & edu_years<6
	replace edu_years_cat = 3 if edu_years>=6 & edu_years<13

label define edu_years_cat_label 0 "<2 Years" 1 "2-4 Years" 2 "4-6 Years" 3 "More Than 6 Years" 
label values edu_years_cat edu_years_cat_label

preserve
	collapse (mean) meanabcc = abcc (sd) sdabcc = abcc (count) n=abcc, by(edu_years_cat)
	gen high_abcc = meanabcc + invttail(n-1, 0.025)*(sdabcc / sqrt(n))
	gen low_abcc = meanabcc - invttail(n-1, 0.025)*(sdabcc / sqrt(n))

	graph twoway (bar meanabcc edu_years_cat, barw(0.8)) ///
	(rcap high_abcc low_abcc edu_years_cat, color(maroon)), ///
	xlabel(0 "<2 Years" 1 "2-4 Years" 2 "4-6 Years" 3 "More Than 6 Years") ///
	ytitle("ABCC Index") xtitle("") legend(off) ///
	title("Average ABCC Index by Years of Schooling") 
	
	graph export "$figures\abcc_eduyears.png", as(png) replace
	
restore

*** Figure A.2 - ABCC Trends over Time
preserve
	collapse(mean) abcc, by(birth_decade)
	save abcc_africa, replace
restore

preserve
	collapse(mean) abcc, by(country birth_decade)
	drop if country == .
	append using abcc_africa
	replace country = 99 if country == .

	line abcc birth_decade if country == 99, lwidth(0.7) || ///
		line abcc birth_decade if country == 3, lpattern(dash) lcolor(black) || ///
		line abcc birth_decade if country == 31, lpattern(longdash) lcolor(black) || ///
		line abcc birth_decade if country == 21, lpattern(dash_dot) lcolor(black) || ///
		line abcc birth_decade if country == 47, lpattern(shortdash) lcolor(black) /// 
		yscale(range(40 100)) ylabel(40 (20) 100) ///
		ytitle("ABCC Index") xtitle("Birth Decade") ///
		title("Development of ABCC Index over Time") ///
		legend(label(1 "Average SSA")) legend(label(2 "Benin")) legend(label(3 "Niger")) ///
		legend(label(4 "Kenya")) legend(label(5 "Zimbabwe")) legend(holes(2 3 4) rows(2))
	
	graph export "$figures\abcc_examples_time.png", as(png) replace
restore

*** Table 1: Baseline OLS
reg ln_abcc_edu_ratio6 height, cluster(adm1_code)
	est store m1
reg ln_abcc_edu_ratio6 height i.birth_decade i.region, cluster(adm1_code) 
	est store m2
reg ln_abcc_edu_ratio6 height $socio_controls i.birth_decade i.region, cluster(adm1_code)
	est store m3
reg ln_abcc_edu_ratio6 height $socio_controls $gis_controls i.birth_decade i.region, cluster(adm1_code)
	est store m4
	
esttab m1 m2 m3 m4, keep(height)

#delimit;
outreg2 [m1 m2 m3 m4] using "$tables/main_ols_jds", 
word excel dec(3) replace label  ///
drop(*.region *.birth_decade $gis_controls);
#delimit cr
	
*** Table 2: Instrumental Variable Regression
reg height deviation_percent5m if ln_abcc_edu_ratio6!=., cluster(adm1_code)
	est store m1
ivregress 2sls ln_abcc_edu_ratio6 (height = deviation_percent5m), cluster(adm1_code) first
	est store m2
reg height deviation_percent5m i.birth_decade i.region if ln_abcc_edu_ratio6!=., cluster(adm1_code)
	est store m3
ivregress 2sls ln_abcc_edu_ratio6 i.birth_decade i.region (height = deviation_percent5m), cluster(adm1_code) first
	est store m4
reg height deviation_percent5m $socio_controls i.birth_decade i.region if ln_abcc_edu_ratio6!=., cluster(adm1_code) 
	est store m5
ivregress 2sls ln_abcc_edu_ratio6 $socio_controls i.birth_decade i.region (height = deviation_percent5m), cluster(adm1_code) first
	est store m6
reg height deviation_percent5m $socio_controls $gis_controls i.birth_decade i.region if ln_abcc_edu_ratio6!=., cluster(adm1_code)
	est store m7
ivregress 2sls ln_abcc_edu_ratio6 $gis_controls $socio_controls i.birth_decade i.region (height = deviation_percent5m), cluster(adm1_code) first
	est store m8
	
esttab m1 m2 m3 m4 m5 m6 m7 m8, keep(deviation_percent5m height)
	
#delimit;
outreg2 [m1 m2 m3 m4 m5 m6 m7 m8] using "$tables/main_iv_jds", 
 word excel dec(3) replace label  ///
drop(*.region *.birth_decade $gis_controls);
#delimit cr


/// Robustness Checks

*** Table 3: Exclusion of extreme dry/wet values and by urban/rural areas
xtile rain_dec = flow_5, n(10)

reg height deviation_percent5m $socio_controls $gis_controls i.birth_decade i.region if ln_abcc_edu_ratio6!=. & !(inlist(rain_dec, 10, 1)), cluster(adm1_code)
	est store m1
ivregress 2sls ln_abcc_edu_ratio6 $gis_controls $socio_controls i.birth_decade i.region (height = deviation_percent5m) if !(inlist(rain_dec, 10, 1)), cluster(adm1_code) first
	est store m2
reg height deviation_percent5m $socio_controls $gis_controls i.birth_decade i.region if ln_abcc_edu_ratio6!=. & !(inlist(rain_dec, 10, 9, 2, 1)), cluster(adm1_code)
	est store m3
ivregress 2sls ln_abcc_edu_ratio6 $gis_controls $socio_controls i.birth_decade i.region (height = deviation_percent5m) if !(inlist(rain_dec, 10, 9, 2, 1)), cluster(adm1_code) first
	est store m4
reg height deviation_percent5m $socio_controls $gis_controls i.birth_decade i.region if ln_abcc_edu_ratio6!=. & residence<=0.5, cluster(adm1_code)
	est store m5
ivregress 2sls ln_abcc_edu_ratio6 $gis_controls $socio_controls i.birth_decade i.region (height = deviation_percent5m) if residence<=0.5, cluster(adm1_code) first
	est store m6
reg height deviation_percent5m $socio_controls $gis_controls i.birth_decade i.region if ln_abcc_edu_ratio6!=. & residence>0.5, cluster(adm1_code)
	est store m7
ivregress 2sls ln_abcc_edu_ratio6 $gis_controls $socio_controls i.birth_decade i.region (height = deviation_percent5m) if residence>0.5, cluster(adm1_code) first
	est store m8

	
esttab m1 m2 m3 m4 m5 m6 m7 m8, keep(deviation_percent5m height)

#delimit;
outreg2 [m1 m2 m3 m4 m5 m6 m7 m8] using "$tables/robust_rainextreme", 
word excel dec(3) replace label  ///
drop(*.region *.birth_decade $socio_controls $gis_controls);
#delimit cr


*** Table S.2:  Alternative Specifications with Different ABCC Cutoffs 

* ABCC 95 Cutoff
reg height deviation_percent5m $socio_controls $gis_controls i.birth_decade i.region if ln_abcc_edu_ratio6!=. & abcc<=95, cluster(adm1_code)
est store m1
ivregress 2sls ln_abcc_edu_ratio6 $gis_controls $socio_controls i.birth_decade i.region (height = deviation_percent5m) if abcc<=95, cluster(adm1_code) first
est store m2

* ABCC 90 Cutoff
reg height deviation_percent5m $socio_controls $gis_controls i.birth_decade i.region if ln_abcc_edu_ratio6!=. & abcc<=90, cluster(adm1_code)
est store m3
ivregress 2sls ln_abcc_edu_ratio6 $gis_controls $socio_controls i.birth_decade i.region (height = deviation_percent5m) if abcc<=90, cluster(adm1_code) first
est store m4

* ABCC 85 Cutoff
reg height deviation_percent5m $socio_controls $gis_controls i.birth_decade i.region if ln_abcc_edu_ratio6!=. & abcc<=85, cluster(adm1_code)
est store m5
ivregress 2sls ln_abcc_edu_ratio6 $gis_controls $socio_controls i.birth_decade i.region (height = deviation_percent5m) if abcc<=85, cluster(adm1_code) first
est store m6

esttab m1 m2 m3 m4 m5 m6, keep(deviation_percent5m height)

#delimit;
outreg2 [m1 m2 m3 m4 m5 m6] using "$tables/robust_iv_alt_cutoff", 
word excel dec(3) replace label  ///
drop(*.region *.birth_decade $socio_controls $gis_controls);
#delimit cr


*** Table S.3: Alternative Specifications with Different Years of Schooling Limits 

gen abcc_hdi4 = (abcc - 29.8698)/(100-29.8698)
gen edu_years4 = edu_years 
	replace edu_years4 = 4 if edu_years>4 & edu_years!=. 
gen edu_hdi4 = (edu_years4 - 0)/(4)
gen abcc_edu_ratio4 = abcc_hdi4 / edu_hdi4
gen ln_abcc_edu_ratio4 = ln(abcc_edu_ratio4)

gen abcc_hdi5 = (abcc - 29.8698)/(100-29.8698)
gen edu_years5 = edu_years 
	replace edu_years5 = 5 if edu_years>5 & edu_years!=. 
gen edu_hdi5 = (edu_years5 - 0)/(5)
gen abcc_edu_ratio5 = abcc_hdi5 / edu_hdi5
gen ln_abcc_edu_ratio5 = ln(abcc_edu_ratio5)

gen abcc_hdi7 = (abcc - 29.8698)/(100-29.8698)
gen edu_years7 = edu_years 
	replace edu_years7 = 7 if edu_years>7 & edu_years!=. 
gen edu_hdi7 = (edu_years7 - 0)/(7)
gen abcc_edu_ratio7 = abcc_hdi7 / edu_hdi7
gen ln_abcc_edu_ratio7 = ln(abcc_edu_ratio7)

gen abcc_hdi8 = (abcc - 29.8698)/(100-29.8698)
gen edu_years8 = edu_years 
	replace edu_years8 = 8 if edu_years>8 & edu_years!=. 
gen edu_hdi8 = (edu_years8 - 0)/(8)
gen abcc_edu_ratio8 = abcc_hdi8 / edu_hdi8
gen ln_abcc_edu_ratio8 = ln(abcc_edu_ratio8)

reg height deviation_percent5m $socio_controls $gis_controls i.birth_decade i.region if ln_abcc_edu_ratio4!=., cluster(adm1_code)
est store m1
ivregress 2sls ln_abcc_edu_ratio4 $gis_controls $socio_controls i.birth_decade i.region (height = deviation_percent5m), cluster(adm1_code) first
est store m2

reg height deviation_percent5m $socio_controls $gis_controls i.birth_decade i.region if ln_abcc_edu_ratio5!=., cluster(adm1_code)
est store m3
ivregress 2sls ln_abcc_edu_ratio5 $gis_controls $socio_controls i.birth_decade i.region (height = deviation_percent5m), cluster(adm1_code) first
est store m4

reg height deviation_percent5m $socio_controls $gis_controls i.birth_decade i.region if ln_abcc_edu_ratio7!=., cluster(adm1_code)
est store m5
ivregress 2sls ln_abcc_edu_ratio7 $gis_controls $socio_controls i.birth_decade i.region (height = deviation_percent5m), cluster(adm1_code) first
est store m6

reg height deviation_percent5m $socio_controls $gis_controls i.birth_decade i.region if ln_abcc_edu_ratio8!=., cluster(adm1_code)
est store m7
ivregress 2sls ln_abcc_edu_ratio8 $gis_controls $socio_controls i.birth_decade i.region (height = deviation_percent5m), cluster(adm1_code) first
est store m8

esttab m1 m2 m3 m4 m5 m6 m7 m8, keep(deviation_percent5m height)

#delimit;
outreg2 [m1 m2 m3 m4 m5 m6 m7 m8] using "$tables/robust_iv_alt_yrschool", 
word excel dec(3) replace label  ///
keep(deviation_percent5m height);
#delimit cr



*** Table S.4: Exclusion of extreme IV values
xtile deviation_percent5m_xtiles = deviation_percent5m, n(10)

reg height deviation_percent5m $socio_controls $gis_controls i.birth_decade i.region if ln_abcc_edu_ratio6!=. & !(inlist(deviation_percent5m_xtiles, 10, 1)), cluster(adm1_code)
	est store m1
ivregress 2sls ln_abcc_edu_ratio6 $gis_controls $socio_controls i.birth_decade i.region (height = deviation_percent5m) if !(inlist(deviation_percent5m_xtiles, 10, 1)), cluster(adm1_code) first
	est store m2
reg height deviation_percent5m $socio_controls $gis_controls i.birth_decade i.region if ln_abcc_edu_ratio6!=. & !(inlist(deviation_percent5m_xtiles, 10, 9, 2, 1)), cluster(adm1_code)
	est store m3
ivregress 2sls ln_abcc_edu_ratio6 $gis_controls $socio_controls i.birth_decade i.region (height = deviation_percent5m) if !(inlist(deviation_percent5m_xtiles, 10, 9, 2, 1)), cluster(adm1_code) first
	est store m4

esttab m1 m2 m3 m4, keep(deviation_percent5m height)

	
#delimit;
outreg2 [m1 m2 m3 m4] using "$tables/robust_iv_drop", 
word excel dec(3) replace label  ///
keep(deviation_percent5m height);
#delimit cr

*** Table S.5: Alternative IVs
reg height deviation_percent4m $socio_controls $gis_controls i.birth_decade i.region if ln_abcc_edu_ratio6!=., cluster(adm1_code)
	est store m1
ivregress 2sls ln_abcc_edu_ratio6 $gis_controls $socio_controls i.birth_decade i.region (height = deviation_percent4m), cluster(adm1_code) first
	est store m2
reg height deviation_percent3m $socio_controls $gis_controls i.birth_decade i.region if ln_abcc_edu_ratio6!=., cluster(adm1_code)
	est store m3
ivregress 2sls ln_abcc_edu_ratio6 $gis_controls $socio_controls i.birth_decade i.region (height = deviation_percent3m), cluster(adm1_code) first
	est store m4
reg height rain_below20_5m $socio_controls $gis_controls i.birth_decade i.region if ln_abcc_edu_ratio6!=., cluster(adm1_code)
	est store m5
ivregress 2sls ln_abcc_edu_ratio6 $gis_controls $socio_controls i.birth_decade i.region (height = rain_below20_5m), cluster(adm1_code) first
	est store m6

esttab m1 m2 m3 m4 m5 m6, keep(deviation_percent* rain_below20_5m height)

	
#delimit;
outreg2 [m1 m2 m3 m4 m5 m6] using "$tables/robust_iv_altspec", 
word excel dec(3) replace label  ///
keep(deviation_percent* rain_below20_5m height);
#delimit cr

/*


* Table S.6: Time and Spatial Autocorrelation 
tab birth_decade, gen(bdec)
tab region, gen (reg)
gen const = 1 

	
ols_spatial_HAC ln_abcc_edu_ratio6 height const, lat(_Y) lon(_X) timevar(birth_decade) ///
	panelvar(admin1) dist(400) lag(2)
est store m1
ols_spatial_HAC ln_abcc_edu_ratio6 height bdec2 bdec3 bdec4 bdec5 reg2 reg3 const, lat(_Y) lon(_X) timevar(birth_decade) ///
	panelvar(admin1) dist(400) lag(2)
est store m2
ols_spatial_HAC ln_abcc_edu_ratio6 height $socio_controls bdec2 bdec3 bdec4 bdec5 reg2 reg3 const, lat(_Y) lon(_X) timevar(birth_decade) ///
	panelvar(admin1) dist(400) lag(2)
est store m3
ols_spatial_HAC ln_abcc_edu_ratio6 height $socio_controls $gis_controls bdec2 bdec3 bdec4 bdec5 reg2 reg3 const, lat(_Y) lon(_X) timevar(birth_decade) ///
	panelvar(admin1) dist(400) lag(2)
est store m4

esttab m1 m2 m3 m4, keep(height)


#delimit;
outreg2 [m1 m2 m3 m4] using "$tables/robust_spatial", 
word excel dec(3) replace label  ///
keep(height const);
#delimit cr





























