version 12.1
global TERMINAL "/mnt/data0/work/MPF_FINAL"
set type double


***************************************************************************
* Pre GMM Preparation
* Generate data for estimation, and also data to be used in post-estimation
****************************************************************************


	clear all
	set more off
	set matsize 11000
	set maxvar 32767
	set type double
	
	
	cd ${TERMINAL}/data/
	use KiltsQuarterlyCleanV2_v4.dta



	* Drop nonsense or incomplete observations
	drop if quantity <=0
	drop if quantity == .
	drop if value <=0 
	drop if value == .
	drop if price == .
	drop if upc == .
	drop if rawb == .



	duplicates drop upc  product_module_code brand_code firmid product_group_code panel_year quarter, force

	* Create time, brand, and firm identifiers
	sort upc  panel_year quarter
	egen double time = group(panel_year quarter)

	sort product_group_code firmid product_module_code brand_code
	egen double bm = group(product_group_code firmid product_module_code brand_code) 
	sort product_group_code firmid product_module_code
	egen double fm = group(product_group_code firmid) 

	gen double lnprice = ln(price)

	
	* Find number of buyers for each brand, firm, product group
	bys product_group_code firmid product_module_code brand_code panel_year quarter: egen double brandbuyers = sum(rawb) 
	
	bys product_group_code firmid panel_year quarter: egen double firmbuyersold = sum(rawb) 
	
	
	bys product_group_code panel_year quarter: egen double groupbuyers = sum(rawb) 



	* Sales by brand, firm, product group
	bys product_group_code firmid product_module_code brand_code panel_year quarter: egen double brandsalestotal = total(value)
	bys product_group_code firmid panel_year quarter: egen double firmsalestotal = total(value)
	bys product_group_code panel_year quarter: egen double groupsalestotal = total(value)
	bys panel_year quarter: egen double totalsalesquart = total(value)
	

	* Find shares of sales, by firm, brand, and product-group
	gen double shareprodbrand = value/brandsalestotal
	gen double sharebrandfirm = brandsalestotal/firmsalestotal
	gen double sharefirmgroup = firmsalestotal/groupsalestotal
	gen double sharegroupagg = groupsalestotal/totalsalesquart
	gen double lnshareprodbrand = ln(shareprodbrand)
	gen double lnsharebrandfirm = ln(sharebrandfirm)
	gen double lnsharefirmgroup = ln(sharefirmgroup)
	gen double lnsharegroupagg = ln(sharegroupagg)


	gen double weightedrawb = shareprodbrand*rawb
	
	bys product_group_code firmid panel_year quarter: egen double firmbuyers = sum(weightedrawb)

	sort product_group_code firmid product_module_code brand_code upc 

	egen double upc2 = group(upc)

	save NestedMPFPriceIndexRedo_brand.dta, replace
	
	drop if panel_year == 1994
	drop time
	sort upc  panel_year quarter
	egen double time = group(panel_year quarter)

	**Base good is defined as the largest good available in all years

	** Base good needs at least 20 rawb **
	

	*Criteria 1: find goods available in all years
		bys panel_year quarter: egen double totalval = total(value)
		gen double sharebrand = brandsalestotal/totalval
		gen double sharefirm = firmsalestotal/totalval
		gen double sharegroup = groupsalestotal/totalval
		gen double share = value/totalval
		gen double lnshare=ln(share)
		
		
		sort product_group_code firmid upc  panel_year quarter
		gen double logprice = lnprice[_n] - lnprice[_n-1] if time[_n] == time[_n-1]+1 & upc[_n] == upc[_n-1]
		gen double logshare = lnshare[_n] - lnshare[_n-1] if time[_n] == time[_n-1]+1 & upc[_n] == upc[_n-1]

	
		
		drop if rawb < 20
		
		bys product_group_code firmid brand_code upc : gen double count_yr = _N
		bys product_group_code firmid brand_code: egen double max_count_yr = max(count_yr)
		keep if count_yr==max_count_yr
		
	*Criteria 2: find largest goods satisfying criteria 1
		keep bm fm product_group_code firmid product_module_code brand_code upc  shareprodbrand value
		collapse (sum) value, by(product_group_code firmid fm brand_code bm upc ) fast
		gsort bm -value
		** largest good as base **
		drop if value == 0
		drop if upc == .
		keep if _n==1 | bm!= bm[_n-1]
	
		keep product_group_code firmid brand_code upc 
		sort product_group_code firmid brand_code upc 
		save baseupcs_brand.dta, replace
		
	use NestedMPFPriceIndexRedo_brand.dta, clear
	drop if panel_year == 1994
	drop time

	sort upc  panel_year quarter
	egen double time = group(panel_year quarter)

	sort product_group_code firmid brand_code upc 
	merge m:1  product_group_code firmid brand_code upc  using baseupcs_brand.dta
	keep if _merge == 3

	drop if rawb < 20
	

	sum rawb,d

	keep bm fm product_group_code firmid brand_code panel_year quarter lnprice shareprodbrand upc  upc2 lnshareprodbrand time
	rename lnprice lnprice_base
	rename shareprodbrand shareprodbrand_base
	rename upc2 upc_base


	gen double lnshareprodbrand_base = ln(shareprodbrand_base)

	
	bys upc_base brand_code firmid product_group_code (panel_year quarter): gen double lag_lnshareprodbrand_base=lnshareprodbrand_base[_n-1] if time[_n] == time[_n-1]+1 & upc_base[_n] == upc_base[_n-1]

	* produces identical values *
	*bys upc_base _base product_module_code brand_code (panel_year quarter): gen double lag_lnshareprodbrand_basetest=cond(time[_n] == time[_n-1]+1,lnshareprodbrand_base[_n-1],.)

	by upc_base brand_code firmid product_group_code (panel_year quarter): gen double lag_shareprodbrand_base=shareprodbrand_base[_n-1] if time[_n] == time[_n-1]+1 & upc_base[_n] == upc_base[_n-1]
	by upc_base brand_code firmid product_group_code (panel_year quarter): gen double lag_lnprice_base=lnprice_base[_n-1] if time[_n] == time[_n-1]+1 & upc_base[_n] == upc_base[_n-1]

	drop if upc_base == .


	duplicates drop firmid brand_code product_group_code panel_year quarter, force
	sort firmid brand_code product_group_code panel_year quarter

	save basedata_brand.dta, replace
		
	use NestedMPFPriceIndexRedo_brand.dta, clear
	drop if panel_year == 1994
	drop time

	sort upc  panel_year quarter
	egen double time = group(panel_year quarter)

	sort firmid brand_code product_group_code panel_year quarter 


	merge m:1 firmid brand_code product_group_code panel_year quarter using basedata_brand.dta
	** don't drop base upc  **
	drop _merge

	*** variables for GMM estimation ***
	*gen double dropfirm = 0
	*replace dropfirm = 1 if fm == firmid_base
	gen double dropupc = 0
	replace dropupc = 1 if upc2 == upc_base
	replace dropupc = 1 if rawb < 20

	gen double dropupc2 = 0
	replace dropupc2 = 1 if rawb < 20

	*** UPC ***
	*** error was here: no lag_lnprice or lag_lnshareprodbrand!
	
	bys upc brand_code  firmid product_group_code (panel_year quarter): gen double lag_lnprice=lnprice[_n-1] if time[_n] == time[_n-1]+1 & upc[_n] == upc[_n-1]
	by upc brand_code firmid product_group_code (panel_year quarter): gen double lag_lnshareprodbrand=lnshareprodbrand[_n-1] if time[_n] == time[_n-1]+1 & upc[_n] == upc[_n-1]
	
	gen double ddupcprice = (lnprice - lag_lnprice) - (lnprice_base - lag_lnprice_base)
	gen double ddfirmprodshare = (lnshareprodbrand - lag_lnshareprodbrand) - (lnshareprodbrand_base - lag_lnshareprodbrand_base)
	
	
	
	
	keep   fm upc2 upc  upc_base  rawb product_module_code firmid brand_code bm product_group_code panel_year quarter time rawb dropupc ddfirmprodshare ddupcprice
	
	sort product_group_code firmid brand_code upc panel_year quarter time
	
	
	gen double weight = 1/rawb[_n]+1/rawb[_n-1] if time[_n]==time[_n-1]+1 & upc[_n]==upc[_n-1]
	gen double bias1 = weight
	drop if ddupcprice == .
	drop if ddfirmprodshare == .



	
	
	* Drop if bad UPC, too small, base UPC
	drop if upc2 == upc_base
	drop if rawb < 20
	drop if dropupc == 1
	drop dropupc
	
	bys upc firmid brand_code product_group_code: egen double countobs = count(ddupcprice)
	sum countobs,d

	
	
	* Weights
	drop if weight == .
	gen double countweight = countobs^(3/2)
	replace weight = weight ^(-1/2)
	replace weight = weight*countweight
	drop countweight countobs

	


	sort upc time 

	

	drop time
	sort panel_year quarter
	egen double time = group(panel_year quarter)

	

	drop if product_group_code == .
	sort product_group_code firmid upc time

	
	
	drop product_module_code firmid brand_code upc2  rawb

	order upc bm fm product_group_code ddupcprice ddfirmprodshare weight

	save PreGMM1V2_brand.dta, replace

* ***************************************
* This code estimates the elasticity parameter SigmaU 
* ***************************************


** trim the double differenced variables 
sum ddupcprice,d
local umin = `r(p1)'
local umax = `r(p99)'
replace ddupcprice = . if ddupcprice <= `umin'
replace ddupcprice = . if ddupcprice >= `umax'
*
sum ddfirmprodshare,d
local fmin = `r(p1)'
local fmax = `r(p99)'
replace ddfirmprodshare = . if ddfirmprodshare <= `fmin'
replace ddfirmprodshare = . if ddfirmprodshare >= `fmax'
drop if ddupcprice == .
drop if ddfirmprodshare == .

** generate variables used in the estimation 
* generate the squate of the double differenced variables 
gen double y2 = ddupcprice*ddupcprice
gen double z1 = ddupcprice*ddfirmprodshare
gen double z2 = ddfirmprodshare*ddfirmprodshare
* generate the product group - upc average of the squared - double differenced variables 
bys product_group_code upc: egen double weight_bar = mean(weight)
by product_group_code upc: egen double y2_bar = mean(y2)	
by product_group_code upc: egen double z1_bar = mean(z1)	
by product_group_code upc: egen double z2_bar = mean(z2)
bys product_group_code upc: egen double b1 = mean(bias1)

** clean: drop upcs with less than 10 or 20 observations in a given product group 
bys product_group_code upc: egen double t2 = count(y2)
sum t2,d
drop if t2 < 10
drop if t2 < 20 & product_group_code != 2004 & product_group_code != 5505 & product_group_code != 5509 & product_group_code != 5517 & product_group_code != 6005 & product_group_code != 6006 & product_group_code != 6007 & product_group_code != 6013

* generate the weighted mean of the squared - double differenced variables 
bys product_group_code: egen double sum_weight = total(weight_bar)
gen double weight_bar_other = weight_bar/sum_weight
foreach var in y2_bar z1_bar z2_bar {
		replace `var' = `var'*weight_bar_other
}
collapse (mean) y2_bar z1_bar z2_bar t2 weight_bar_other b1 ddfirmprodshare ddupcprice, by(product_group_code upc) fast

* drop product group with less than 3 observations
bys product_group_code: egen countobs2 = count(t2)
drop if countobs2 < 3

* save the dataset used for the estimation of SigmaU
sort product_group_code
egen double pglist = group(product_group_code)
save estimationdata_brand.dta,replace

* estimate SigmaU using the method proposed by Broda and Weinstein (2006) and a nonlinear solver 
sum pglist, meanonly
local max = `r(max)'

forvalues i = 1/`max' {
forvalues k = 1/5 {
use estimationdata_brand.dta,clear
keep if pglist == `i'
local pg = product_group_code
local u = `k'+0.001
local w = (`k')/4
capture noisily: nl (y2_bar ={c1=0}*b1+((({sigu=`u'}-2)*({omega})-1)/((1+{omega})*({sigu}-1)))*z1_bar+({omega=`w'}/((1+{omega})*({sigu}-1)))*z2_bar), eps(1e-10) vce(robust) hasconstant(c1)
local rc = _rc

if `rc' == 0 {
capture: local sigu_est = _b[sigu: _cons] 
capture: local omega_est = _b[omega: _cons] 
capture: local sigu_se = _se[sigu: _cons] 
capture: local omega_se = _se[omega: _cons] 
capture: local ssr = e(rss) 
capture: local r2 = e(r2) 
capture: local obs = countobs2
}


drop _all
set obs 1
gen group = `pg'
if `rc' == 0 {
capture: gen double sigu = `sigu_est' 
capture: gen double omega = `omega_est' 
capture: gen double sigu_SE = `sigu_se' 
capture: gen double omega_SE = `omega_se' 
capture: gen double SSR = `ssr' 
capture: gen double r2 = `r2' 
capture: gen double obs = `obs' 
}

if `i' > 1 | `k' > 1{
	append using EstimationStaggeredCon_brand.dta
	}
	save EstimationStaggeredCon_brand.dta, replace
}
}

use EstimationStaggeredCon_brand.dta, clear
* clean the estimation output 
drop if sigu < 0 | omega < 0
drop if sigu_SE == 0
drop if omega_SE == 0
sort group SSR
collapse (firstnm) sigu omega sigu_SE omega_SE SSR r2 obs, by(group) fast
rename group product_group_code
sort product_group_code
save EstimationStaggeredCon_brand2.dta, replace
