version 12.1
set type double


global TERMINAL "/mnt/data0/work/MPF_FINAL"

cd ${TERMINAL}/data/
	
	*Use Kilts 2004-2009
	use "${TERMINAL}/data/KILTS/KiltsNationalQuarterly.dta", clear
	
	
	* Clean variables
	*replace upc_ver_uc = 1 if upc_ver_uc == .

	sort upc upc_ver_uc panel_year quarter


	drop if value <= 0 
	drop if quantity <= 0

	drop if value == .
	drop if quantity == .
	drop if rawb == .




	* Clean the data


 *** THE CORRECT NUMBER OF DIGITS FOR THE FIRM IDENTIFIER DEPENDS ON THE LENGTH OF THE UPC STRING
 
	gen double upc2 = upc
	gen str20 stringu = string(upc2,"%20.0g")
	gen double stringlength = length(stringu)
	gen firmid2 = substr(stringu,1,5) if stringlength == 10
	replace firmid2 = substr(stringu,1,6) if stringlength == 11
	replace firmid2 = substr(stringu,1,7) if stringlength == 12

	* these last cases are in total much less than 1% of barcodes
	replace firmid2 = substr(stringu,1,4) if stringlength == 9
	drop if stringlength < 9
	
	** drop the unmatched barcodes (about 4% of obs)
	sort upc
	merge m:1 upc using "${TERMINAL}/data/GS1/unmatchedupc.dta"
	keep if _merge == 1
	drop _merge
	
	

	destring firmid2, replace
	gen double firmid = firmid2
	drop firmid2 upc2 stringu

	

	bys upc upc_ver_uc: egen double newpg = mode(product_group_code), minmode
	replace product_group_code = newpg if newpg != .
	drop newpg
	
		

	*** Adjust for multipacks and oz's

	** convert everything to oz **
	
	* 16 ounces in a pound
	gen sizeam = size1_amount*16 if size1_units == "PO"
	
	* 32 ounces in a quart
	replace sizeam = size1_amount*32 if size1_units == "QT"
	
	*33.814 ounces in a liter
	replace sizeam = size1_amount*33.814 if size1_units == "LI"
	
	* 0.033814 ounces in a ml
	replace sizeam = size1_amount*0.033814 if size1_units == "ML"
	
	
	* cubic foot, foot, yard, square feet
	* one yard is 3 foot
	replace sizeam = 3*size1_amount if size1_units == "YD"

	* expired is also a unit
	
	replace sizeam = size1_amount if sizeam == .


	
	drop price
	gen double adjustment = multi*sizeam


	replace quantity = quantity*adjustment


	
	drop if upc == .
	drop if firmid == .
	drop if product_group_code == .

	duplicates drop upc upc_ver_uc firmid product_group_code panel_year quarter, force

	gen double price = value/quantity

	gen double upc_actual = upc

	sort upc upc_ver_uc
	egen double upc2 = group(upc upc_ver_uc)
	drop upc
	rename upc2 upc


	sort upc panel_year quarter

	tab panel_year



	save KiltsQuarterlyCleanV2_v4.dta, replace
