*-------------------------------------------------------------------------------
* Description: Plot average birth weight by parent race and age.
* Author:      Brendan M. Price
*
* Notes:
* 1. I plot the regression curves and confidence intervals before plotting the
*    scatter points to ensure the latter show up in front of the former.
* 2. The .pdf file produced by this file is quite large, and large .pdf files
*    embedded in a TeX document render slowly. To avoid this issue, I use the
*    MacOS gs utility to generate a rasterized (.jpg) file that loads quickly.
*-------------------------------------------------------------------------------


* Prepare the data
*-------------------------------------------------------------------------------

* Load natality counts
use "$basepath/data/vital.dta", clear

* Drop missings
keep if !missing(birth_weight)

* Retain white and Black mothers
keep if inlist(race, 1, 2)

* Use a random subsample to widen confidence intervals for display purposes
keep if runiform() <= .10

* Create bins of maternal age, with age centered at the midpoint of each bin
xtile age_bin = age, nquantiles(20)
bysort age_bin: egen min_age = min(age)
bysort age_bin: egen max_age = max(age)
replace age_bin = 0.5 * (min_age + max_age)

* Compute average birth weight by age, separately for boys and girls
bysort race age_bin: egen mean_weight = mean(birth_weight)

* Show confidence intervals over the full age range
sum age
local age_min = r(min)
local age_max = r(max)


* Create a bin scatter with overlaid quadratic regression curves
*-------------------------------------------------------------------------------

#delimit ;
twoway
	(qfitci birth_weight age if race == 1, range(`age_min' `age_max') clcolor("scheme p1") acolor(gs14))
	(qfitci birth_weight age if race == 2, range(`age_min' `age_max') clcolor("scheme p2") acolor(gs14))
	(scatter mean_weight age_bin if race == 1, mcolor("scheme p1") msymbol("scheme p1"))
	(scatter mean_weight age_bin if race == 2, mcolor("scheme p2") msymbol("scheme p2")),
	xtitle("Maternal age (binned)")
	ytitle("Mean birth weight (grams)")
	xscale(range(9 51))
	xlabel(10(5)50)
	yscale(range(2790 3410))
	ylabel(2800(100)3400)
	text(3325 18 "White", color("scheme p1"))
	text(3070 18 "Black", color("scheme p2"))
	xsize(8)
	ysize(4)
	scale(*1.3)
	legend(off);
#delimit cr

* Export the figure
graph export "$basepath/out/birth_qfit.pdf", as(pdf) $serif replace

* Create a .jpg version of the figure
shell gs -dSAFER -dBATCH -dNOPAUSE -sDEVICE=jpeg -r600 -sOutputFile="$basepath/out/birth_qfit.jpg" "$basepath/out/birth_qfit.pdf"
