version 12.1
set type double
global TERMINAL "/mnt/data0/work/MPF_FINAL"



clear all
set more off
set type double

* Want to exclude small firms from estimates
* But include them in calculating the turnover rate
clear all
set more off
set type double
cd ${TERMINAL}/data

use RedoFullPostestimationV2_v5.dta, clear
*
* drop product groups: 3501 (FRESH MEAT) and 4001 (FRESH PRODUCE)
sort product_group_code
drop if product_group_code==3501 | product_group_code==4001
*
* set the directory where results are saved
cd ${TERMINAL}/results/reg_output/geometric_mean
*
* Drop product codes
drop if product_group_code==4505 | product_group_code==5515
* Drop missing values
drop if firmqualityt == . | upcqualityt == . | aut == .| SigmaU == . | SigmaF == . | value == . | mft == . | grouppriceindex == .
* Keep the time periods used for differentiation
keep if panel_year == 2004 & quarter == 4 | panel_year == 2011 & quarter == 4
replace time = 1 if panel_year == 2004
replace time = 2 if panel_year == 2011
bys product_group_code firmid panel_year quarter: gen list = _n
gen count = 1 if list == 1
bys product_group_code firmid: egen timeperiods = sum(count)
sum timeperiods,d
drop if timeperiods !=2
* generate variables
bys product_group_code firmid panel_year quarter: egen double firmproduc = count(value)
gen double avgsales = firmsalestotal/firmproduc
bys panel_year quarter product_group_code firmid: egen double totalfirmsales = total(value)
* generate per period firm share of product group
bys panel_year quarter product_group_code: egen double productgroupsales = total(value)
gen firmshare2=totalfirmsales/productgroupsales
* define the cutoff rule for large firms: firms with market shares larger than 0.5%
drop if firmshare2<0.005

* Define the marketing and product upgrading effects
gen double lnuqual = lnupcquality/firmproduc
bys upc  firmid product_group_code (panel_year quarter): gen double lag_uqual=lnuqual[_n-1] if time[_n] == time[_n-1]+1 & upc[_n] == upc[_n-1]
gen double current=(lnuqual!=.)
gen double previous=(lag_uqual!=.)
gen double common=((current==1 & previous==1))
gen double current_only=(current==1 & common==0)

bys firmid product_group_code panel_year quarter: egen double common_total = total(cond(common==1,lnuqual,.))
bys firmid product_group_code panel_year quarter: egen double common_lag_total = total(cond(common==1,lag_uqual,.))
bys firmid product_group_code panel_year quarter: egen double current_only_total = total(cond(current_only==1,lnuqual,.))
gen double current_total = common_total + current_only_total

bys firmid product_group_code (panel_year quarter): gen double lag_current_total = current_total[_n-1] if time[_n] == time[_n-1]+1 & firmid[_n] == firmid[_n-1] & product_group_code[_n] == product_group_code[_n-1]
bys firmid product_group_code (panel_year quarter): gen double lag_lnfirmquality = lnfirmquality[_n-1] if time[_n] == time[_n-1]+1 & firmid[_n] == firmid[_n-1] & product_group_code[_n] == product_group_code[_n-1]
gen double previous_only_total = lag_current_total - common_lag_total


* marginal cost price index
bys product_group_code panel_year quarter: egen double pgsales = sum(value)
gen double share2 = value/pgsales
bys upc  product_group_code (panel_year quarter): gen double lag_aut=aut[_n-1] if time[_n] == time[_n-1]+1 & upc[_n] == upc[_n-1]
bys upc  product_group_code (panel_year quarter): gen double lag_share2=share2[_n-1] if time[_n] == time[_n-1]+1 & upc[_n] == upc[_n-1]
gen double val = (1/2)*(share2+lag_share2)*ln(aut/lag_aut)
bys product_group_code: egen double lnTorn = total(cond(common==1,val,.))
gen double Torn = exp(lnTorn)

* Turnover measures
gen double indicator = 1
bys upc  firmid product_group_code (panel_year quarter): gen double lag_indicator=indicator[_n-1] if time[_n] == time[_n-1]+1 & upc[_n] == upc[_n-1]
bys product_group_code panel_year quarter: egen double common_products = total(cond(common==1,indicator,.))
bys product_group_code panel_year quarter: egen double current_only_products = total(cond(current_only==1,indicator,.))
gen double current_products = common_products + current_only_products
bys product_group_code panel_year quarter: egen double common_lag_products = total(cond(common==1,lag_indicator,.))
bys firmid product_group_code (panel_year quarter): gen double lag_current_products = current_products[_n-1] if time[_n] == time[_n-1]+1 & product_group_code[_n] == product_group_code[_n-1]
gen double previous_only_products = lag_current_products - common_lag_products
gen double turnover = (current_only_products + previous_only_products)/(current_products+lag_current_products)
sum turnover,d

*
**  Note: (1) puremarketingeffect2 is the first term in the firm quality decomposition equation
*         (2) product_quality_upgrading2 is the second term in the firm quality decomposition equation

gen double puremarketingeffect = common_total - common_lag_total
gen double puremarketingeffect1 = common_total
gen double puremarketingeffect2 = -1*common_lag_total
gen double product_quality_upgrading = current_only_total - previous_only_total
gen double product_quality_upgrading1 = current_only_total
gen double product_quality_upgrading2 = -1*previous_only_total
gen double changeinlnfirmquality = lnfirmquality - lag_lnfirmquality
gen double totalmarketingeffect = puremarketingeffect + changeinlnfirmquality
*
* generate the other variables used in the decomposition
bys product_group_code firmid panel_year quarter: egen geomeanAut = gmean(aut)
gen double AQratio = ((aut/geomeanAut)/upcqualityt)^(1-SigmaU)
* take the log of the variables
gen double loggeomeanaut = log(geomeanAut)
gen double logfirmprod = log(firmproduc)
gen double logtotalsales = log(totalfirmsales)
gen double lnFirmQuality = ln(firmqualityt)
replace lnFirmQuality = (SigmaF-1)*lnFirmQuality
*
collapse (firstnm) size1_units (mean) loggeomeanaut puremarketingeffect1 product_quality_upgrading1 puremarketingeffect2 product_quality_upgrading2  puremarketingeffect product_quality_upgrading firmproduc logfirmprod logtotalsales firmqualityt mft SigmaF SigmaU (sum) AQratio, by(panel_year quarter product_group_code firmid) fast
*
**
replace AQratio = AQratio/firmproduc
*
sort panel_year quarter
egen time=group(panel_year quarter)
sort product_group_code firmid
egen pf=group(product_group_code firmid)

xtset pf time

rename loggeomeanaut lnGeomeanAut
gen double lnFirmQuality = ln(firmqualityt)
gen double lnAQratio = ln(AQratio)
gen double lnmftterm = ln(mft)*(1-SigmaF)

cd ${TERMINAL}/results/reg_output/geometric_mean

la var logfirmprod "$\ln\left(\#\mathrm{UPC}_{ft}\right)$"
la var lnFirmQuality "$\ln\varphi_{ft}$"
la var lnmftterm "$(1-\sigma_f)\ln m_{ft}$"
la var lnAQratio "$\ln\left[\sum_u\left(\frac{a_{ut}}{\varphi_{ut}}\right)^{1-\sigma_u}\right]$"
la var logtotalsales "$\ln s_{ft}$"


* NEW SET UP TO SUM TO ONE
replace lnGeomeanAut = (1-SigmaF)*lnGeomeanAut
replace lnFirmQuality = (SigmaF-1)*lnFirmQuality
replace lnAQratio = lnAQratio * (1-SigmaF)/(1-SigmaU)
replace logfirmprod = logfirmprod * (1-SigmaF)/(1-SigmaU)
replace puremarketingeffect =(SigmaF-1)*puremarketingeffect
replace product_quality_upgrading = (SigmaF-1)*product_quality_upgrading
replace puremarketingeffect1 =(SigmaF-1)*puremarketingeffect1
replace product_quality_upgrading1 = (SigmaF-1)*product_quality_upgrading1
replace puremarketingeffect2 =(SigmaF-1)*puremarketingeffect2
replace product_quality_upgrading2= (SigmaF-1)*product_quality_upgrading2

* generate the time differenced variable of interest (t=2011Q4, t-1=2004Q4)
gen double d_logtotalsales = d.logtotalsales
gen double d_lnFQ = d.lnFirmQuality
gen double d_lnAQR = d.lnAQratio
gen double d_lnMft = d.lnmftterm
gen double d_lnfirmprod = d.logfirmprod
gen double d_lnGeomeanAut = d.lnGeomeanAut
gen double d_puremarketingeffect=puremarketingeffect
gen double d_product_quality_upgrading=product_quality_upgrading
gen double d_puremarketingeffect1=puremarketingeffect1
gen double d_product_quality_upgrading1=product_quality_upgrading1
gen double d_puremarketingeffect2=puremarketingeffect2
gen double d_product_quality_upgrading2=product_quality_upgrading2


save decomp_data_whole_sample_v3_diffAvg_v5.dta,replace
*save decomp_data_whole_sample_v3_diffAvg_v5_large.dta,replace
keep if panel_year ==2011
* generate the average of the time differenced variables
foreach X in d_logtotalsales d_lnFQ d_lnAQR d_lnMft d_lnfirmprod d_lnGeomeanAut d_puremarketingeffect d_product_quality_upgrading d_puremarketingeffect1 d_product_quality_upgrading1 d_puremarketingeffect2 d_product_quality_upgrading2{
bys panel_year quarter product_group_code: egen double a`X'=mean(`X')
}
* generate the k-differenced time differenced variables (time differenced firm level variable minus the product group average of the time differenced firm level variable)
foreach X in d_logtotalsales d_lnFQ d_lnAQR d_lnMft d_lnfirmprod d_lnGeomeanAut d_puremarketingeffect d_product_quality_upgrading d_puremarketingeffect1 d_product_quality_upgrading1 d_puremarketingeffect2 d_product_quality_upgrading2{
gen double d_`X'=`X'-a`X'
}
*save decomp_data_whole_sample_v3_diffAvg2_v5.dta,replace
*save decomp_data_whole_sample_v3_diffAvg2_v5_large.dta,replace
*
* run the regressions
foreach X in d_d_lnFQ  d_d_lnAQR  d_d_lnMft  d_d_lnfirmprod  d_d_lnGeomeanAut  d_d_product_quality_upgrading2{
parmby "reg `X' d_d_logtotalsales", by(product_group_code) saving("qje/VDG_PG_regs_`X'`small'", replace)
}

save PG_decomposition_data_G, replace

use  pg_turn, clear


egen pg_rank = rank(turn), field
gen grp = (pg_rank<=10)

drop pg_rank
replace turn = -turn 
egen pg_rank = rank(turn), field
replace grp = 2*(pg_rank<=10) if grp==0
replace turn = -turn 

merge 1:m product_group_code using PG_decomposition_data_G, keep(3) nogen

cd ${TERMINAL}/results/reg_output/geometric_mean/qje
foreach X in d_d_lnFQ  d_d_lnAQR  d_d_lnMft  d_d_lnfirmprod  d_d_lnGeomeanAut  d_d_product_quality_upgrading2{
    parmby "reg `X' d_d_logtotalsales", by(grp) saving("VDG_TurnPool_regs_`X'`small'", replace)
}


