version 12.1
set type double


global TERMINAL "/mnt/data0/work/MPF_FINAL"

* --------------------------------------
* This code computes the size distribution by number of UPCs reported in Table 4
* --------------------------------------
clear
cd ${TERMINAL}/data
set type double
use RedoFullPostestimationV2_v5.dta, clear
sort panel_year quarter product_group_code firmid upc
*
* drop product groups: 3501 (FRESH MEAT) and 4001 (FRESH PRODUCE)
sort product_group_code
drop if product_group_code==3501 | product_group_code==4001

* Generate number of UPCs bins 
gen double bin = 1 if numbprodfirm == 1
replace bin = 2 if numbprodfirm > 1 & numbprodfirm < 6 & bin==.
replace bin = 3 if numbprodfirm > 5 & numbprodfirm < 11  & bin==.
replace bin = 4 if numbprodfirm > 10 & numbprodfirm < 21  & bin==.
replace bin = 5 if numbprodfirm > 20 & numbprodfirm < 51  & bin==.
replace bin = 6 if numbprodfirm > 50 & numbprodfirm < 101  & bin==.
replace bin = 7 if bin==.

* Generate median, std of (log) upc sales in each year - quarter - product group - firm - bin
sort panel_year quarter product_group_code firmid bin 
gen lnupcsales = ln(value)
bys panel_year quarter product_group_code firmid bin : egen medianupcsales = median(value)
bys panel_year quarter product_group_code firmid bin : egen sdupcsales = sd(value)
bys panel_year quarter product_group_code firmid bin : egen sdlnupcsales = sd(lnupcsales)

* Collapse to the year - quarter - product group - firm - bin level 
collapse (count) upc (sum) value (firstnm) medianupcsales sdupcsales sdlnupcsales, by(panel_year quarter product_group_code firmid bin) fast
	
* Collapse to the year - quarter - product group - bin level 
collapse (count) firmid upc (sum) value (firstnm) medianupcsales sdupcsales sdlnupcsales, by(panel_year quarter product_group_code bin) fast
		
* Generate year-quarter-product group sales shares (y_q_pg_weight)
bys panel_year quarter product_group_code: egen totalval = sum(value)
egen totalsales = sum(value)
gen y_q_pg_weight = totalval/totalsales
egen sum_w=sum(y_q_pg_weight)

* Compute the sales share weighted statistics 
gen sharevaluebin = value/totalval
gen avgupcsales = value/upc
gen avgupcmktshare = avgupcsales/totalval
foreach X in firmid upc medianupcsales sharevaluebin avgupcsales avgupcmktshare sdupcsales sdlnupcsales{
    gen y_q_pg_w_`X'=y_q_pg_weight*`X'
}

* Collapse to the bin level by summing up the year-quarter-product group sales shares weighted statistics 
collapse (mean) mean_sdlnupcsales=sdlnupcsales (median) median_sdlnupcsales=sdlnupcsales (sum) y_q_pg_w_firmid y_q_pg_w_upc y_q_pg_w_medianupcsales y_q_pg_w_sharevaluebin y_q_pg_w_avgupcsales y_q_pg_w_avgupcmktshare y_q_pg_w_sdupcsales y_q_pg_w_sdlnupcsales, by(bin) fast

* Save results 
save Nr_UPC_whole_sample_final.dta, replace
