version 12.1
global TERMINAL "/mnt/data0/work/MPF_FINAL"
set type double


clear all
set more off
set matsize 11000
set maxvar 32767
set type double
	
	
cd ${TERMINAL}/data/


use NestedMPFPriceIndexRedo_v3.dta, clear
	drop if panel_year == 1994
	drop time

	sort upc  panel_year quarter
	egen double time = group(panel_year quarter)

	sort firmid product_group_code panel_year quarter 


	merge m:1 firmid product_group_code panel_year quarter using basedata_v3.dta
	** don't drop base upc  **
	drop _merge

	bys panel_year quarter: egen double totalval = total(value)
	gen double sharefirm = firmsalestotal/totalval
	
	********** Merge in estimated sigma_u's

	sort product_group_code
	merge m:1 product_group_code using EstimationStaggeredCon_brand2, keepus(sigu omega)
	drop if _merge !=3
	drop _merge

***********************************************8

gen double SigmaU = sigu

	
bys time product_group_code firmid: egen double meanshare = mean(lnshareprodfirm)
 
 gen double geomeanshare = exp(meanshare)
 
 by time product_group_code firmid: egen double meanprice = mean(lnprice)
 
 gen double geomeanprice = exp(meanprice)
 
 gen double upcqualityt = (price/geomeanprice)*((shareprodfirm/geomeanshare)^(1/(SigmaU-1))) 


 gen double relativeshare = shareprodfirm/geomeanshare
 
 bys time product_group_code firmid: egen double relsharesum = sum(relativeshare)
 
 
 gen double firmpriceindexother = (relsharesum ^ (1/(1-SigmaU)))*geomeanprice
 
 gen double relup = (price/upcqualityt)^(1-SigmaU)

 bys time product_group_code firmid: egen double relusum = sum(relup)
 
 gen double firmpriceindex = relusum^(1/(1-SigmaU))
 
 gen double lnfirmpriceindex = ln(firmpriceindex)
 
 *******************************************************

		
	gen double lnshare_base = ln(shareprodfirm_base)
	sort product_group_code firmid upc  panel_year quarter
	gen double diffbaseshare = lnshare_base[_n] - lnshare_base[_n-1] if time[_n] == time[_n-1]+1 & firmid[_n] == firmid[_n-1]
	gen double lag_lnfirmpriceindex= lnfirmpriceindex[_n-1] if time[_n] == time[_n-1]+1 & firmid[_n] == firmid[_n-1]

	bys firmid product_group_code panel_year quarter: egen double meanlspf = mean(lnshareprodfirm)
	gen double geomean = exp(meanlspf)
	gen double lngeomean = ln(geomean)
	gen double geomeanratio = shareprodfirm/geomean
	bys firmid product_group_code panel_year quarter: egen double sumgeomeanratio = sum(geomeanratio)
	gen double lngeoratio = ln(sumgeomeanratio)
	bys firmid product_group_code panel_year quarter: egen double lngeoprice = mean(lnprice)



	
	gen double Satochange = lnfirmpriceindex - lag_lnfirmpriceindex



	
	bys upc  firmid product_group_code (panel_year quarter): gen double lag_lngeoratio=lngeoratio[_n-1] if time[_n] == time[_n-1]+1 & firmid[_n] == firmid[_n-1]
	bys upc  firmid product_group_code (panel_year quarter): gen double lag_lngeoprice=lngeoprice[_n-1] if time[_n] == time[_n-1]+1 & firmid[_n] == firmid[_n-1]
	bys upc  firmid product_group_code (panel_year quarter): gen double lag_lngeomean=lngeomean[_n-1] if time[_n] == time[_n-1]+1 & firmid[_n] == firmid[_n-1]

	
	bys upc  firmid product_group_code (panel_year quarter): gen double lag_lnsharefirmgroup=lnsharefirmgroup[_n-1] if time[_n] == time[_n-1]+1 & firmid[_n] == firmid[_n-1]
	bys upc  firmid product_group_code (panel_year quarter): gen double lag_sharefirm=sharefirm[_n-1] if time[_n] == time[_n-1]+1 & firmid[_n] == firmid[_n-1]
	bys product_group_code (panel_year quarter): gen double lag_sharegroup=sharegroup[_n-1] if time[_n] == time[_n-1]+1


	bys firmid product_group_code panel_year quarter: egen testlaggeo=mean(lag_lngeoratio)
	replace lag_lngeoratio = testlaggeo if lag_lngeoratio == .
	
	bys firmid product_group_code panel_year quarter: egen testlagme=mean(lag_lngeomean)
	replace lag_lngeomean = testlagme if lag_lngeomean == .
	
	by firmid product_group_code panel_year quarter: egen testlagprice=mean(lag_lngeoprice)
	replace lag_lngeoprice = testlagprice if lag_lngeoprice == .

	by firmid product_group_code panel_year quarter: egen testlagshareg=mean(lag_lnsharefirmgroup)
	replace lag_lnsharefirmgroup = testlagshareg if lag_lnsharefirmgroup == .

	by firmid product_group_code panel_year quarter: egen testlagshare=mean(lag_sharefirm)
	replace lag_sharefirm = testlagshare if lag_sharefirm == .

	save Prebasebrand_brand_v2.dta, replace

* XXXXXXXXXXX


	use Prebasebrand_brand_v2.dta, clear
	sort product_group_code firmid product_module_code panel_year quarter

	save Prebasebrand2_brand_v2.dta, replace

	** Choose base brand for each firm ** 

	*Criteria 1: find firms available in all years
	sort product_group_code firmid
	egen double firmmod = group(firmid)

	gen double test = sharefirm-lag_sharefirm
		
	drop if test == .

	

	bys upc firmid product_group_code: egen double countobs = count(value)

	


	collapse (firstnm) firmsalestotal sharefirm sharefirmgroup, by(firmid product_group_code panel_year quarter) fast
		
		

		bys firmid product_group_code: gen double count_yr = _N
		bys product_group_code: egen double max_count_yr = max(count_yr)
		keep if count_yr==max_count_yr
		
	*Criteria 2: find largest firms satisfying criteria 1

	collapse (sum) firmsalestotal, by(firmid product_group_code) fast
		gsort product_group_code -firmsalestotal
		drop if firmsalestotal == 0 | firmsalestotal == .
		drop if firmid == .
		** largest good as base **
		keep if _n==1 | product_group_code!= product_group_code[_n-1]
		
		keep firmid product_group_code
		sort product_group_code firmid 
		save basefirms_brand_v2.dta, replace
		
	use Prebasebrand_brand_v2.dta, clear
	
	
	
	sort product_group_code firmid


	merge m:1 product_group_code firmid  using basefirms_brand_v2.dta
	keep if _merge == 3



	bys upc firmid product_group_code: egen double countobs = count(value)

	
	

	rename lnsharefirmgroup lnsharefirmgroup_base
	rename lag_lnsharefirmgroup lag_lnsharefirmgroup_base
	rename Satochange Satochange_base
	rename lngeoratio lngeoratio_base
	rename lngeomean lngeomean_base
	rename lag_lngeomean lag_lngeomean_base
	rename lag_lngeoratio lag_lngeoratio_base
	rename lngeoprice lngeoprice_base
	rename lag_lngeoprice lag_lngeoprice_base
	gen double product_group_code_base = product_group_code
	gen double firmid_base = fm

	collapse (firstnm) lnsharefirmgroup_base lag_lnsharefirmgroup_base Satochange_base lngeoratio_base  lag_lngeoratio_base product_group_code_base firmid_base lngeomean_base lag_lngeomean_base lngeoprice_base lag_lngeoprice_base, by(product_group_code panel_year quarter) fast

	duplicates drop product_group_code panel_year quarter, force


	sort product_group_code panel_year quarter

	save basefirmdata_brand_v2.dta, replace

		*****
		


use Prebasebrand_brand_v2.dta, clear

	sort product_group_code panel_year quarter 
	merge m:1 product_group_code panel_year quarter using basefirmdata_brand_v2.dta
	
	drop _merge
	
	

	**********************************


	*** variables for GMM estimation ***
	

	*** Firm ***
	gen double ddfirmshare = (lnsharefirmgroup - lag_lnsharefirmgroup) - (lnsharefirmgroup_base - lag_lnsharefirmgroup_base)
	gen double ddSato = Satochange - Satochange_base
	gen double ddrelativeshare = lngeoratio - lag_lngeoratio - (lngeoratio_base - lag_lngeoratio_base)
	gen double firmprodshare = exp(lnshareprodfirm)
	gen double ddgeomeanshare = lngeomean - lag_lngeomean - (lngeomean_base - lag_lngeomean)
	
	
	*** Firms ***
	keep fm upc2 upc upc_base lag_lnfirmpriceindex  rawb lnsharefirmgroup lag_lnsharefirmgroup lnsharefirmgroup_base  lag_lnsharefirmgroup_base product_module_code firmid product_group_code panel_year quarter time  firmbuyers ddfirmshare ddSato ddrelativeshare firmid_base lngeoprice lngeoprice_base lag_lngeoprice lag_lngeoprice_base firmprodshare ddgeomeanshare
	*keep fm ddupcprice upc2 upc  upc_base  rawb lnsharefirmgroup lag_lnsharefirmgroup lnsharefirmgroup_base  lag_lnsharefirmgroup_base product_module_code firmid product_group_code panel_year quarter time  firmbuyers ddfirmshare ddSato ddrelativeshare firmid_base lngeoprice lngeoprice_base lag_lngeoprice lag_lngeoprice_base dropupc2 firmprodshare ddgeomeanshare
	*bys upc firmid product_group_code: egen double countobs = count(ddupcprice)
	*sum countobs,d

	collapse  (firstnm) ddSato ddfirmshare ddrelativeshare firmbuyers firmid_base lnsharefirmgroup lag_lnsharefirmgroup lngeoprice lngeoprice_base lag_lngeoprice lag_lngeoprice_base lnsharefirmgroup_base lag_lnsharefirmgroup_base ddgeomeanshare, by(product_group_code firmid fm panel_year quarter time) fast

	sort product_group_code firmid panel_year quarter time
	
	
	gen double weight = 1/firmbuyers[_n]+1/firmbuyers[_n-1] if time[_n]==time[_n-1]+1 & firmid[_n]==firmid[_n-1]
	
	gen double bias = weight
	
	* Trim by Sato-Vartia index
	drop if ddSato == .
	drop if ddfirmshare == .
	drop if ddrelativeshare == .	

	* Drop base firm, bad firms
	drop if fm == firmid_base

	
	bys firmid product_group_code: egen double countobs = count(ddfirmshare)
	
	
	* Weighting
	drop if weight == .
	gen double countweight = countobs^(3/2)
	replace weight = weight ^(-1/2)
	replace weight = weight*countweight
	drop countweight countobs


	sort firmid time 

	
	drop time
	sort panel_year quarter
	egen double time = group(panel_year quarter)


	drop if product_group_code == .
	sort product_group_code firmid  time

	drop panel_year quarter
	

	order fm product_group_code ddfirmshare ddSato ddrelativeshare weight  lnsharefirmgroup lag_lnsharefirmgroup lnsharefirmgroup_base lag_lnsharefirmgroup_base lngeoprice lag_lngeoprice lngeoprice_base lag_lngeoprice_base

	saveold PreGMM2V2_brand_v2.dta, replace

* ***************************************
* This code estimates the elasticity parameter SigmaF and merges it together with the SigmaU and delta estimates. 
* ***************************************


** Prepare double differenced variables for the estimation and drop missing values 
gen double ddgeomeanprice = lngeoprice- lag_lngeoprice - (lngeoprice_base-lag_lngeoprice_base)

* merge in the double differenced relative price, geometric mean of the relative price etc. 
sort product_group_code
merge m:1 product_group_code using EstimationStaggeredCon_brand2, keepus(sigu omega)
tab _merge
drop if _merge !=3
drop _merge

* clean 
drop if ddSato == .
drop if ddrelativeshare ==.
drop if ddgeomeanprice == .
gen double ddpft = ddSato
drop if ddpft == .

* rename double differenced variables to the name used by the estimation routine. 
gen double yother = ddfirmshare
gen double x1other = ddpft
gen double x2other = ddgeomeanprice
gen double x3other = ddrelativeshare*(1/(1-sigu))

* trim extreme values 
sum ddfirmshare,d
local ddfmin = `r(p1)'
local ddfmax = `r(p99)'
drop if ddfirmshare < `ddfmin'
drop if ddfirmshare > `ddfmax'
sum ddSato,d
local ddsmin = `r(p1)'
local ddsmax = `r(p99)'
drop if ddSato < `ddsmin'
drop if ddSato > `ddsmax'

* drop product groups with small number of observations
bys product_group_code fm: egen double t = count(yother)
bys product_group_code: egen tsum = sum(t)
bys product_group_code: egen countobs = count(t)
drop if countobs < 2
sort product_group_code
egen double pglist = group(product_group_code)

* saved the dataset used in the estimation 
save estimationdata2sls_brand_v2.dta,replace

* estimate SigmaF by using the IV approach proposed by Broda and Weinstein (2006) 
use estimationdata2sls_brand_v2.dta, clear
*
sum pglist, meanonly
local max = `r(max)'
*
forvalues i = 1/`max' {
forvalues k = 1/1 {
use estimationdata2sls_brand_v2.dta,clear
keep if pglist == `i'
local pg = product_group_code
local obs = countobs
local sumt = tsum
local f = `k'+0.0001

xtset fm time

reg yother x1other, robust

capture noisily:ivreg2 yother (x1other = x3other), gmm2s robust endogtest(x1other) small first 

local rc = _rc

if `rc' == 0 {
capture: local sigf_est = (1-(_b[x1other]))

disp `sigf_est'

capture: local sigf_se = _se[x1other] 

disp `sigf_se'

capture: local ssr = e(rss) 
capture: local r2 = e(r2) 
}

drop _all
set obs 1
gen group = `pg'
if `rc' == 0 {
capture: gen double sigf = `sigf_est' 
capture: gen double sigf_SE = `sigf_se' 
capture: gen double SSR = `ssr' 
capture: gen double obs = `obs' 
capture: gen double timesum = `sumt' 
capture: gen double r2 = `r2'
}

if `i' > 1 | `k' >1 {
	append using FirmNew2slsV2_brand_v2.dta
	}
	save FirmNew2slsV2_brand_v2.dta, replace
	
}
}


use FirmNew2slsV2_brand_v2.dta, clear

rename group product_group_code
rename SSR SSR_2
rename obs obsother
rename r2 rn_2
sort product_group_code

* merge in the SigmaU and delta estimates 
merge m:1 product_group_code using EstimationStaggeredCon_brand2
tab _merge
drop _merge 
* drop missing values 
gen double ssrtest = SSR_2+SSR
drop if SSR == .
drop if sigf_SE == 0
drop if sigf == .
drop if sigu == .
gsort product_group_code ssrtest
collapse (firstnm) sigu sigf omega sigu_SE sigf_SE omega_SE ssrtest SSR SSR_2 obs obsother r2 rn_2, by(product_group_code)
sum sigf,d
* drop if the number of observations is less than 30
drop if obs < 30

* save results
cd ${TERMINAL}/results/
save NewGMMresultsV2_brand_v2.dta, replace
