version 12.1
set type double


global TERMINAL "/mnt/data0/work/MPF_FINAL"

* --------------------------------------
* This code computes the size distribution by decile and rank reported in Table 2 and 3
* --------------------------------------
foreach type in "Decile" "Rank" {

    clear
    cd ${TERMINAL}/data
    set type double
    use RedoFullPostestimationV2_v5.dta, clear
    sort panel_year quarter product_group_code firmid upc
    *
    * drop product groups: 3501 (FRESH MEAT) and 4001 (FRESH PRODUCE)
    sort product_group_code
    drop if product_group_code==3501 | product_group_code==4001

    * Compute firm (log) sales, upc log sales and standard deviation 
    bys panel_year quarter product_group_code firmid: egen firmsales = total(value)
    gen logtotalsales = ln(firmsales)
    gen logupcsales = ln(value)
    by panel_year quarter product_group_code firmid: egen sdupcsales = sd(value) if numbprodfirm > 1
    by panel_year quarter product_group_code firmid: egen sdlnupcsales = sd(logupcsales) if numbprodfirm > 1

    * Collapse the data	to year - quarter - product group - firm 
    collapse (sum) value (firstnm) firmsales logtotalsales sdupcsales sdlnupcsales numbprodfirm sharefirmgroup, by(panel_year quarter product_group_code firmid) fast

    if "`type'"=="Decile" {
        egen decile = xtile(logtotalsales), by(panel_year quarter product_group_code) nq(10)
        }
    else if "`type'"=="Rank" {
        bys  panel_year quarter product_group_code: egen double decile = rank(firmsales), field
        }

    sort panel_year quarter product_group_code decile
    order panel_year quarter product_group_code decile firmid firmsales numbprodfirm sharefirmgroup

    * Compute year - quarter - product group (-decile) sales, number of firms and number of products  
    bys panel_year quarter product_group_code: egen tsales = total(firmsales)
    bys panel_year quarter product_group_code decile: egen N = nvals(firmid)
    bys panel_year quarter product_group_code: egen TN = nvals(firmid)
    gen decilefirmshare = N/TN
    bys panel_year quarter product_group_code decile: egen decilesales = total(firmsales)
    gen tshare = decilesales/tsales
    bys panel_year quarter product_group_code firmid : gen nUPC = numbprodfirm

    * Compute mean, std and medan number of products (UPCs) and sales share in each year-quarter-decile 
    bys panel_year quarter product_group_code decile: egen avg_nUPC = mean(nUPC)
    by panel_year quarter product_group_code decile: egen sd_nUPC = sd(nUPC)
    by panel_year quarter product_group_code decile: egen med_nUPC = median(nUPC)
    by panel_year quarter product_group_code decile: egen sd_tshare = sd(tshare)
    by panel_year quarter product_group_code decile: egen avg_share = mean(sharefirmgroup)
    by panel_year quarter product_group_code decile: egen avg_sdu = mean(sdupcsales)
    by panel_year quarter product_group_code decile: egen avg_sdlnu = mean(sdlnupcsales)
    by panel_year quarter product_group_code decile: egen avg_ln_sales = mean(logtotalsales)
    by panel_year quarter product_group_code decile: egen med_share = median(sharefirmgroup)
    bys panel_year quarter product_group_code: egen totalnfirms = nvals(firmid)
    bys panel_year quarter decile:egen totalsales = sum(firmsales)

    * Collapse to year - quarter - decile (rank) - product group 
    sort panel_year quarter decile product_group_code
    collapse (sum) value firmsales (firstnm) N decilefirmshare tshare avg_nUPC sd_nUPC sd_tshare avg_share med_nUPC med_share avg_sdu avg_sdlnu avg_ln_sales  totalnfirms (min) min_nUPC=nUPC min_tshare=tshare (max) max_nUPC=nUPC max_tshare=tshare, by(panel_year quarter decile product_group_code) fast

    * Compute year - quarter - product group share of sales (pgweight) 
    bys panel_year quarter product_group_code: egen pgsales = sum(value)
    egen total_yqpg_sales=sum(value)
    gen pgweight = pgsales/total_yqpg_sales
    * Compute year - quarter - product group sales share weighted statistics 
        foreach X in N decilefirmshare tshare avg_nUPC avg_share med_nUPC med_share avg_sdu avg_sdlnu avg_ln_sales sd_nUPC  sd_tshare{
        gen w`X'=`X'*pgweight
        }
    * Sum the sales share weighted statistics up to decile level 
    collapse (sum) wN wdecilefirmshare wtshare wavg_nUPC wavg_share wmed_nUPC wmed_share wavg_sdu wavg_sdlnu wavg_ln_sales wsd_nUPC wsd_tshare (min) min_min_nUPC=min_nUPC min_min_tshare=min_tshare (max) max_max_nUPC=max_nUPC max_max_tshare=max_tshare , by(decile) fast
    * Save results


    save size_distribution_by_`type'_final_v5.dta, replace
}
