version 12.1
set type double


global TERMINAL "/mnt/data0/work/MPF_FINAL"

clear all
set more off
set matsize 11000
set maxvar 32767
	
	
cd ${TERMINAL}/data/


use NestedMPFPriceIndexRedo_v3.dta, clear
	drop if panel_year == 1994
	drop time

	sort upc  panel_year quarter
	egen double time = group(panel_year quarter)

	sort firmid product_group_code panel_year quarter 


	merge m:1 firmid product_group_code panel_year quarter using basedata_v3.dta
	** don't drop base upc  **
	drop _merge

	bys panel_year quarter: egen double totalval = total(value)
	gen double sharefirm = firmsalestotal/totalval
	
	********** Merge in estimated sigma_u's

	sort product_group_code
	merge product_group_code using  EstimationStaggeredConTEST2.dta, keep(sigu omega)
	drop if _merge !=3
	drop _merge

***********************************************8

gen double SigmaU = sigu

	
bys time product_group_code firmid: egen double meanshare = mean(lnshareprodfirm)
 
 gen double geomeanshare = exp(meanshare)
 
 by time product_group_code firmid: egen double meanprice = mean(lnprice)
 
 gen double geomeanprice = exp(meanprice)
 
 gen double upcqualityt = (price/geomeanprice)*((shareprodfirm/geomeanshare)^(1/(SigmaU-1))) 


 gen double relativeshare = shareprodfirm/geomeanshare
 
 bys time product_group_code firmid: egen double relsharesum = sum(relativeshare)
 
 
 gen double firmpriceindexother = (relsharesum ^ (1/(1-SigmaU)))*geomeanprice
 
 gen double relup = (price/upcqualityt)^(1-SigmaU)

 bys time product_group_code firmid: egen double relusum = sum(relup)
 
 gen double firmpriceindex = relusum^(1/(1-SigmaU))
 
 gen double lnfirmpriceindex = ln(firmpriceindex)
 
 *******************************************************

		
	gen double lnshare_base = ln(shareprodfirm_base)
	sort product_group_code firmid upc  panel_year quarter
	gen double diffbaseshare = lnshare_base[_n] - lnshare_base[_n-1] if time[_n] == time[_n-1]+1 & firmid[_n] == firmid[_n-1]
	gen double lag_lnfirmpriceindex= lnfirmpriceindex[_n-1] if time[_n] == time[_n-1]+1 & firmid[_n] == firmid[_n-1]

	bys firmid product_group_code panel_year quarter: egen double meanlspf = mean(lnshareprodfirm)
	gen double geomean = exp(meanlspf)
	gen double lngeomean = ln(geomean)
	gen double geomeanratio = shareprodfirm/geomean
	bys firmid product_group_code panel_year quarter: egen double sumgeomeanratio = sum(geomeanratio)
	gen double lngeoratio = ln(sumgeomeanratio)
	bys firmid product_group_code panel_year quarter: egen double lngeoprice = mean(lnprice)



	
	gen double Satochange = lnfirmpriceindex - lag_lnfirmpriceindex



	
	bys upc  firmid product_group_code (panel_year quarter): gen double lag_lngeoratio=lngeoratio[_n-1] if time[_n] == time[_n-1]+1 & firmid[_n] == firmid[_n-1]
	bys upc  firmid product_group_code (panel_year quarter): gen double lag_lngeoprice=lngeoprice[_n-1] if time[_n] == time[_n-1]+1 & firmid[_n] == firmid[_n-1]
	bys upc  firmid product_group_code (panel_year quarter): gen double lag_lngeomean=lngeomean[_n-1] if time[_n] == time[_n-1]+1 & firmid[_n] == firmid[_n-1]

	
	bys upc  firmid product_group_code (panel_year quarter): gen double lag_lnsharefirmgroup=lnsharefirmgroup[_n-1] if time[_n] == time[_n-1]+1 & firmid[_n] == firmid[_n-1]
	bys upc  firmid product_group_code (panel_year quarter): gen double lag_sharefirm=sharefirm[_n-1] if time[_n] == time[_n-1]+1 & firmid[_n] == firmid[_n-1]
	bys product_group_code (panel_year quarter): gen double lag_sharegroup=sharegroup[_n-1] if time[_n] == time[_n-1]+1


	bys firmid product_group_code panel_year quarter: egen testlaggeo=mean(lag_lngeoratio)
	replace lag_lngeoratio = testlaggeo if lag_lngeoratio == .
	
	bys firmid product_group_code panel_year quarter: egen testlagme=mean(lag_lngeomean)
	replace lag_lngeomean = testlagme if lag_lngeomean == .
	
	by firmid product_group_code panel_year quarter: egen testlagprice=mean(lag_lngeoprice)
	replace lag_lngeoprice = testlagprice if lag_lngeoprice == .

	by firmid product_group_code panel_year quarter: egen testlagshareg=mean(lag_lnsharefirmgroup)
	replace lag_lnsharefirmgroup = testlagshareg if lag_lnsharefirmgroup == .

	by firmid product_group_code panel_year quarter: egen testlagshare=mean(lag_sharefirm)
	replace lag_sharefirm = testlagshare if lag_sharefirm == .

	save Prebasebrand_v3.dta, replace




	use Prebasebrand_v3.dta, clear
	sort product_group_code firmid product_module_code panel_year quarter

	save Prebasebrand2_v3.dta, replace

	** Choose base brand for each firm ** 

	*Criteria 1: find firms available in all years
	sort product_group_code firmid
	egen double firmmod = group(firmid)

	gen double test = sharefirm-lag_sharefirm
		
	drop if test == .

	

	bys upc firmid product_group_code: egen double countobs = count(value)

	


	collapse (firstnm) firmsalestotal sharefirm sharefirmgroup, by(firmid product_group_code panel_year quarter) fast
		
		

		bys firmid product_group_code: gen double count_yr = _N
		bys product_group_code: egen double max_count_yr = max(count_yr)
		keep if count_yr==max_count_yr
		
	*Criteria 2: find largest firms satisfying criteria 1

	collapse (sum) firmsalestotal, by(firmid product_group_code) fast
		gsort product_group_code -firmsalestotal
		drop if firmsalestotal == 0 | firmsalestotal == .
		drop if firmid == .
		** largest good as base **
		keep if _n==1 | product_group_code!= product_group_code[_n-1]
		
		keep firmid product_group_code
		sort product_group_code firmid 
		save basefirms_v3.dta, replace
		
	use Prebasebrand_v3.dta, clear
	
	
	
	sort product_group_code firmid


	merge m:1 product_group_code firmid  using basefirms_v3.dta
	keep if _merge == 3



	bys upc firmid product_group_code: egen double countobs = count(value)

	
	

	rename lnsharefirmgroup lnsharefirmgroup_base
	rename lag_lnsharefirmgroup lag_lnsharefirmgroup_base
	rename Satochange Satochange_base
	rename lngeoratio lngeoratio_base
	rename lngeomean lngeomean_base
	rename lag_lngeomean lag_lngeomean_base
	rename lag_lngeoratio lag_lngeoratio_base
	rename lngeoprice lngeoprice_base
	rename lag_lngeoprice lag_lngeoprice_base
	gen double product_group_code_base = product_group_code
	gen double firmid_base = fm

	collapse (firstnm) lnsharefirmgroup_base lag_lnsharefirmgroup_base Satochange_base lngeoratio_base  lag_lngeoratio_base product_group_code_base firmid_base lngeomean_base lag_lngeomean_base lngeoprice_base lag_lngeoprice_base, by(product_group_code panel_year quarter) fast

	duplicates drop product_group_code panel_year quarter, force


	sort product_group_code panel_year quarter

	save basefirmdata_v3.dta, replace

		*****
		


use Prebasebrand_v3.dta, clear

	sort product_group_code panel_year quarter 
	merge m:1 product_group_code panel_year quarter using basefirmdata_v3.dta
	
	drop _merge
	
	

	**********************************


	*** variables for GMM estimation ***
	

	*** Firm ***
	gen double ddfirmshare = (lnsharefirmgroup - lag_lnsharefirmgroup) - (lnsharefirmgroup_base - lag_lnsharefirmgroup_base)
	gen double ddSato = Satochange - Satochange_base
	gen double ddrelativeshare = lngeoratio - lag_lngeoratio - (lngeoratio_base - lag_lngeoratio_base)
	gen double firmprodshare = exp(lnshareprodfirm)
	gen double ddgeomeanshare = lngeomean - lag_lngeomean - (lngeomean_base - lag_lngeomean)
	
	
	*** Firms ***
	keep fm upc2 upc upc_base lag_lnfirmpriceindex  rawb lnsharefirmgroup lag_lnsharefirmgroup lnsharefirmgroup_base  lag_lnsharefirmgroup_base product_module_code firmid product_group_code panel_year quarter time  firmbuyers ddfirmshare ddSato ddrelativeshare firmid_base lngeoprice lngeoprice_base lag_lngeoprice lag_lngeoprice_base firmprodshare ddgeomeanshare
	*keep fm ddupcprice upc2 upc  upc_base  rawb lnsharefirmgroup lag_lnsharefirmgroup lnsharefirmgroup_base  lag_lnsharefirmgroup_base product_module_code firmid product_group_code panel_year quarter time  firmbuyers ddfirmshare ddSato ddrelativeshare firmid_base lngeoprice lngeoprice_base lag_lngeoprice lag_lngeoprice_base dropupc2 firmprodshare ddgeomeanshare
	*bys upc firmid product_group_code: egen double countobs = count(ddupcprice)
	*sum countobs,d

	collapse  (firstnm) ddSato ddfirmshare ddrelativeshare firmbuyers firmid_base lnsharefirmgroup lag_lnsharefirmgroup lngeoprice lngeoprice_base lag_lngeoprice lag_lngeoprice_base lnsharefirmgroup_base lag_lnsharefirmgroup_base ddgeomeanshare, by(product_group_code firmid fm panel_year quarter time) fast

	sort product_group_code firmid panel_year quarter time
	
	
	gen double weight = 1/firmbuyers[_n]+1/firmbuyers[_n-1] if time[_n]==time[_n-1]+1 & firmid[_n]==firmid[_n-1]
	
	gen double bias = weight
	
	* Trim by Sato-Vartia index
	drop if ddSato == .
	drop if ddfirmshare == .
	drop if ddrelativeshare == .	

	* Drop base firm, bad firms
	drop if fm == firmid_base

	
	bys firmid product_group_code: egen double countobs = count(ddfirmshare)
	
	
	* Weighting
	drop if weight == .
	gen double countweight = countobs^(3/2)
	replace weight = weight ^(-1/2)
	replace weight = weight*countweight
	drop countweight countobs


	sort firmid time 

	
	drop time
	sort panel_year quarter
	egen double time = group(panel_year quarter)


	drop if product_group_code == .
	sort product_group_code firmid  time

	drop panel_year quarter
	

	order fm product_group_code ddfirmshare ddSato ddrelativeshare weight  lnsharefirmgroup lag_lnsharefirmgroup lnsharefirmgroup_base lag_lnsharefirmgroup_base lngeoprice lag_lngeoprice lngeoprice_base lag_lngeoprice_base

	saveold PreGMM2V2_v3.dta, replace

