version 12.1
set type double


global TERMINAL "/mnt/data0/work/MPF_FINAL"

* --------------------------------------
* This code computes the descriptive statistics reported in Table 1 
* --------------------------------------
clear
set more off
set type double
cd ${TERMINAL}/data/
use RedoFullPostestimationV2_v5.dta, clear
sort panel_year quarter product_group_code firmid upc
drop firmlist
*
* drop product groups: 3501 (FRESH MEAT) and 4001 (FRESH PRODUCE)
sort product_group_code
drop if product_group_code==3501 | product_group_code==4001
*
* Compute average, median, std, max, 10th and 90th percentile of upc sales 
bys panel_year quarter product_group_code: egen upcsales = mean(value)
by panel_year quarter product_group_code: egen medupcsales = median(value)
by panel_year quarter product_group_code: egen sdupcsales = sd(value)
by panel_year quarter product_group_code: egen maxupcsales = max(value)
by panel_year quarter product_group_code: egen upcsales10 = pctile(value), p(10)
by panel_year quarter product_group_code: egen upcsales90 = pctile(value), p(90)


* Collapse the data at year - quarter - product group - firm level 
collapse (count) upc (sum) value rawb (firstnm) firmsalestotal totalsales upcsales medupcsales sdupcsales maxupcsales upcsales10 upcsales90, by(panel_year quarter product_group_code firmid) fast
gen double lnfirmsales = ln(value)
gen double firmsales=value
rename upc upcs
bys panel_year quarter product_group_code: egen firmlist=count(firmid)
order panel_year quarter product_group_code firmid firmlist upcs


* Compute average, meadian, std, max, 10th and 90th percentile of:
* (1) number of products sold by the firm (upc) (2) number of firms in a product group (firmlist)
* (3) firms' sales (value) (4) log firms' sales (lnfirmsales) 

foreach X in upcs firmlist firmsales lnfirmsales{
	bys panel_year quarter product_group_code: egen mean`X' = mean(`X')
	bys panel_year quarter product_group_code: egen med`X' = median(`X')
	bys panel_year quarter product_group_code: egen sd`X' = sd(`X')
	bys panel_year quarter product_group_code: egen `X'10 = pctile(`X'), p(10)
	bys panel_year quarter product_group_code: egen `X'90 = pctile(`X'), p(90)
	bys panel_year quarter product_group_code: egen max`X' = max(`X')
}
* Collapse the data at year - quarter - product group level 
collapse (count) firmid  (sum) upcs value rawb (firstnm) meanfirmlist medfirmlist sdfirmlist firmlist10 firmlist90 maxfirmlist upcsales medupcsales sdupcsales maxupcsales upcsales10 upcsales90 meanupcs medupcs sdupcs upcs10 upcs90 maxupcs meanfirmsales medfirmsales sdfirmsales firmsales10 firmsales90 maxfirmsales meanlnfirmsales medlnfirmsales sdlnfirmsales lnfirmsales10 lnfirmsales90 maxlnfirmsales, by(panel_year quarter product_group_code) fast
save coll_by_yqpg.dta, replace

* Compute year - quarter - product group share of sales (y_q_pg_weight) 
use coll_by_yqpg.dta, replace
egen total_value=sum(value) 
gen y_q_pg_weight=value/total_value

* Compute descriptive stat of the sales share weighted mean, median, std etc.
foreach X in  firmid upcsales medupcsales sdupcsales maxupcsales upcsales10 upcsales90 meanupcs medupcs sdupcs upcs10 upcs90 maxupcs meanfirmsales medfirmsales sdfirmsales firmsales10 firmsales90 maxfirmsales meanlnfirmsales medlnfirmsales sdlnfirmsales lnfirmsales10 lnfirmsales90 maxlnfirmsales {
	sum `X' [aweight = y_q_pg_weight], d
}
* Largest firm : 
sum maxfirmsales [aweight = y_q_pg_weight], d
sum maxlnfirmsales [aweight = y_q_pg_weight], d
sum maxupcs [aweight = y_q_pg_weight], d
*Largest barcode: 
sum maxupcsales [aweight = y_q_pg_weight], d
