version 12.1
set type double


global TERMINAL "/mnt/data0/work/MPF_FINAL"

clear all
set more off
set type double
cd ${TERMINAL}/results/reg_output/geometric_mean
use RedoDecompositionData_avgDiff_v5.dta, clear


* ------------------------------------------------
* Decomposition Cross Section: 50 Largest Firms
* ------------------------------------------------
** drop product groups 
* 501 and 6005 have less than 50 firms, all other groups have at least 50 firms
drop if product_group_code == 501 | product_group_code == 6005
* drop product groups: 3501 (FRESH MEAT) and 4001 (FRESH PRODUCE)
sort product_group_code
drop if product_group_code==3501 | product_group_code==4001
**
* generate the ranking
gen sales=exp(logtotalsales) 
collapse (mean)  sales logtotalsales lnGeomeanAut lnFirmQuality lnAQratio lnmftterm logfirmprod, by(product_group_code firmid) fast
bys  product_group_code: egen totalsales=sum(sales) 


gen share=sales/totalsales
by  product_group_code: egen cumul1 = rank(share), field 
sort product_group_code cumul1
replace cumul1 = 51 if cumul1 > 51


* generate the k-differenced variable (i.e. differencing relative to the average firm) 
bys product_group_code cumul1: egen  meanlnsales = mean(logtotalsales)
by product_group_code cumul1: egen  meanlnFQ = mean(lnFirmQuality)
by product_group_code cumul1: egen  meanlnAQ = mean(lnAQratio)
by product_group_code cumul1: egen  meanlnMft = mean(lnmftterm)
by product_group_code cumul1: egen  meanlnProducts = mean(logfirmprod)
by product_group_code cumul1: egen  meanlnGeomeanAut = mean(lnGeomeanAut)
*
bys product_group_code: egen totalfirstsales = mean(logtotalsales)
by product_group_code: egen totalfirstquality = mean(lnFirmQuality)
by product_group_code: egen totalfirstcost = mean(lnAQratio)
by product_group_code: egen totalfirstmarkup = mean(lnmftterm)
by product_group_code: egen totalfirstproducts = mean(logfirmprod)
by product_group_code: egen totalfirstlnGeomeanAut = mean(lnGeomeanAut)
* 
gen difflnGeomeanAut = meanlnGeomeanAut - totalfirstlnGeomeanAut
gen difflnSales = meanlnsales - totalfirstsales
gen difflnQuality = meanlnFQ - totalfirstquality
gen difflnCost = meanlnAQ - totalfirstcost
gen difflnMarkup = meanlnMft - totalfirstmarkup
gen difflnProducts = meanlnProducts - totalfirstproducts
gen Firmsales = exp(logtotalsales)
* 
* generate the product group sales weight 
bys product_group_code: egen totalsales1 = sum(Firmsales)
egen totalsales2 = sum(Firmsales)
gen pgweight = totalsales1/totalsales2
* generate the product group sales weighted k-differenced variables 
gen wdifflnSales = difflnSales*pgweight
gen wdifflnQuality =difflnQuality*pgweight
gen wdifflnCost = difflnCost*pgweight
gen wdifflnMarkup = difflnMarkup*pgweight
gen wdifflnProducts = difflnProducts*pgweight
gen wdifflnGeomeanAut = difflnGeomeanAut*pgweight
* keep the top 50 firms 
drop if cumul1==51
* compute the sum of sales weighted k-differenced variables for each rank 
collapse (sum) wdifflnSales wdifflnGeomeanAut wdifflnQuality wdifflnCost wdifflnMarkup wdifflnProducts, by(cumul1) fast
* produce figure 4 



twoway scatter wdifflnQuality wdifflnGeomeanAut wdifflnCost wdifflnMarkup wdifflnProducts wdifflnSales,  ///
	msymbol(d t s X o) scheme(s2mono) ///
	xtitle("Difference in Log Sales from Average") ///
	legend(lab(1 "Firm Appeal") lab(2 "Average Marginal Cost") ///
	lab(3 "Cost Dispersion") lab(4 "Markup") lab(5 "Product Scope"))


graph export "${TERMINAL}/results/figures/Fig1_top50_share_v5.eps", replace
*graph export "${TERMINAL}/results/figures/Fig1_top50_share_v5.tif", replace


