version 12.1
set type double


global TERMINAL "/mnt/data0/work/MPF_FINAL"

clear all
set more off
set type double
cd ${TERMINAL}/data
use RedoFullPostestimationV2_v5.dta, clear
*
* drop product groups: 3501 (FRESH MEAT) and 4001 (FRESH PRODUCE)
sort product_group_code
drop if product_group_code==3501 | product_group_code==4001

*
* set the directory where results are saved 
cd ${TERMINAL}/results/reg_output/geometric_mean
*
* drop missing values 
drop if firmqualityt == . | upcqualityt == . | aut == .| SigmaU == . | SigmaF == . | value == . | mft == . | grouppriceindex == .
*
* define variables 
bys product_group_code firmid panel_year quarter: egen firmproduc = count(value)
bys panel_year quarter product_group_code firmid: egen totalfirmsales = total(value)
bys panel_year quarter product_group_code: egen productgroupsales = total(value)
gen firmshare2=totalfirmsales/productgroupsales
bys product_group_code firmid panel_year quarter: egen geomeanAut = gmean(aut)
gen double AQratio = ((aut/geomeanAut)/upcqualityt)^(1-SigmaU)
*
** define cutoff for small firms: firms with less than 0.5% share and drop them 
*drop if firmshare2<0.005 // don't drop them 
*
* take logs 
gen double loggeomeanaut = log(geomeanAut)
gen double logfirmprod = log(firmproduc)
gen double logtotalsales = log(totalfirmsales)
gen double lnFirmQuality = ln(firmqualityt)
* 
collapse (firstnm) size1_units (mean) loggeomeanaut firmproduc logfirmprod logtotalsales firmqualityt mft SigmaF SigmaU (sum) AQratio, by(panel_year quarter product_group_code firmid) fast
replace AQratio = AQratio/firmproduc
*
sort panel_year quarter
egen time=group(panel_year quarter)
sort product_group_code firmid
egen pf=group(product_group_code firmid)

xtset pf time
*
rename loggeomeanaut lnGeomeanAut
gen double lnFirmQuality = ln(firmqualityt)
gen double lnAQratio = ln(AQratio) 
*
cd ${TERMINAL}/results/reg_output/geometric_mean

* NEW SET UP TO SUM TO ONE
gen double lnmftterm = ln(mft)*(1-SigmaF)
replace lnGeomeanAut = (1-SigmaF)*lnGeomeanAut
replace lnFirmQuality = (SigmaF-1)*lnFirmQuality
replace lnAQratio = lnAQratio * (1-SigmaF)/(1-SigmaU)
replace logfirmprod = logfirmprod * (1-SigmaF)/(1-SigmaU)
gen resid = lnAQratio+lnmftterm+lnGeomeanAut

la var lnmftterm "$(1-\sigma_f)\ln m_{ft}$"
la var resid "Residual Terms"
la var logfirmprod "$\ln\left(\#\mathrm{UPC}_{ft}\right)$"
la var lnFirmQuality "$\ln\varphi_{ft}$"
la var lnAQratio "$\ln\left[\sum_u\left(\frac{a_{ut}}{\varphi_{ut}}\right)^{1-\sigma_u}\right]$"
la var logtotalsales "$\ln s_{ft}$"

drop if lnFirmQuality == . 
sort product_group_code
egen pglist=group(product_group_code)
* compute the product group averages across firms 
foreach X in lnFirmQuality logfirmprod resid logtotalsales {
    bys panel_year quarter product_group_code: egen double a`X'=mean(`X')
}
* compute the double differenced variables 
foreach X in lnFirmQuality logfirmprod resid logtotalsales {
    gen double d`X'=`X'-a`X'
}
* run the variance decompistion regressions 
foreach X in lnFirmQuality logfirmprod resid {
    parmby "reg d`X' dlogtotalsales", by(product_group_code) saving("qje/VD_PG_regs_`X'", replace)
}
* save data 
save qje/PG_decomposition_data, replace
