version 12.1
set type double


global TERMINAL "/mnt/data0/work/MPF_FINAL"

********************************************************
* This code takes the KILTS Data and aggregate it to the city - quarter level 
* from household - city - week level.
* Combine datasets (deleting intermediate steps)
*********************************************************
clear
set more off
set type double
set max_memory 256g
set segmentsize 512m
cd ${TERMINAL}/data/KILTS/

** Step 1: Take KILTS data at household - city - week frequency and keep variables on demographic, geographic, 
* and product ownership of panelists; save it as "HouseholdScantrack200`j'.dta" j=4:11
	forvalues j =4(1)11{
	
		if `j' < 10 {
		use "${TERMINAL}/data/KILTS/nielsen_extracts/HMS/200`j'/Annual_Files/panelists_200`j'.dta", clear
		}

		if `j' >= 10 {
		use "${TERMINAL}/data/KILTS/nielsen_extracts/HMS/20`j'/Annual_Files/panelists_20`j'.dta", clear
		}

		keep household_code panel_year projection_factor projection_factor_magnet scantrack_market_code scantrack_market_descr

		if `j' < 10 {
		save HouseholdScantrack200`j'_v2.dta, replace
		}
		
		if `j' >= 10 {
		save HouseholdScantrack20`j'_v2.dta, replace
		}
		
		}

** Step 2: Use KILTS data on shopping trips (date, store, total spending) and purchases (of specific products)
* and merge it to "HouseholdScantrack200`j'.dta" j=4:11 from the previous step
	forvalues i = 4(1)11 {
		
		
		if `i' < 10 {
		use "${TERMINAL}/data/KILTS/nielsen_extracts/HMS/200`i'/Annual_Files/trips_200`i'.dta", clear
		merge 1:m trip_code_uc using "${TERMINAL}/data/KILTS/nielsen_extracts/HMS/200`i'/Annual_Files/purchases_200`i'.dta"
		drop _merge
		* And merge in the demographic/geographic data
		merge m:1 household_code using HouseholdScantrack200`i'_v2.dta
		drop _merge
		erase HouseholdScantrack200`i'_v2.dta
		}
		
		if `i' >= 10 {
		use "${TERMINAL}/data/KILTS/nielsen_extracts/HMS/20`i'/Annual_Files/trips_20`i'.dta", clear
		merge 1:m trip_code_uc using "${TERMINAL}/data/KILTS/nielsen_extracts/HMS/20`i'/Annual_Files/purchases_20`i'.dta"
		drop _merge
		* And merge in the demographic/geographic data
		merge m:1 household_code using HouseholdScantrack20`i'_v2.dta
		drop _merge
		erase HouseholdScantrack20`i'_v2.dta
		}
		
		
		compress
		
		* generate value of sales 
		gen value = total_price_paid - coupon_value
		
		* Cleaning: Drop observations with missing or negative values on sales and quantities 
		drop if value<=0
		drop if value == .
		drop if quantity <=0
		drop if quantity == .
		
		* generate price as unit value and drop if price is zero or negative 
		gen price = value/quantity
		drop if price <= 0	

	* Create quarter variables: 1st quarter = 1 (Jan-Mar), 2nd quarter = 2 (Apr-June), 
	* 3rd quarter = 3 (July-Sept), 4th quarter = 4 (Oct-Dec)
		gen m = substr(purchase_date,6,2)
		destring m, replace
		gen quarter = .
		replace quarter = 1 if m <=3
		replace quarter = 2 if m>3 & m<=6
		replace quarter = 3 if m>6 & m<=9
		replace quarter = 4 if m>9 & m<=12
		
	*Define median price and quantity by scantrack and quarter 		
		bys upc upc_ver_uc scantrack_market_code panel_year quarter: egen double medprice = median(price)
		by upc upc_ver_uc scantrack_market_code panel_year quarter: egen double medquant = median(quantity)

	* Cleaning: drop observations with extreme values in quantities and prices: 
	* prices: observations with prices larger than 3 times the median price and below 1/3 of the median price
	* quantity above 24 times the median quantity 
		drop if price > (3)*medprice
		drop if price < (1/3)*medprice
		drop if quantity > 24*medquant
	
	* Create variables from projections
		gen rawb = 1 
		gen buyers = rawb*projection_factor
		replace quantity = quantity*projection_factor
		replace value = value*projection_factor
    * Create the quarter - product (upc) cross section at national level in each year
	    sort panel_year quarter scantrack_market_code upc upc_ver_uc
		collapse (sum) quantity value rawb buyers projection_factor, by(panel_year quarter scantrack_market_code upc upc_ver_uc) fast
	** {{{{{{{{{{{{{{{{  add scantrack_market_code} !!!! figure out the cities with less then 1000 and drop them 
	* 		collapse (sum) quantity value rawb buyers projection_factor, by(panel_year quarter upc upc_ver_uc scantrack_market_code) fast

** Step 3: Merge the quarter - city - product cross section in each year with the product characteristics file
		
		merge m:1 upc upc_ver_uc using "${TERMINAL}/data/KILTS/nielsen_extracts/HMS/Master_Files/Latest/products.dta"
		
		drop _merge
		** {{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{ CHECK IF THIS MUST BE HERE }}}}}}}}}}}}}}}}}}}}}}}}
		gen price = value/quantity
	* Save the merged dataset as QuarterlyMktScantrack200`i'.dta i=4:11
		if `i' < 10 {
		save QuarterlyMktScantrack200`i'_v2.dta, replace
		}
		
		if `i' >= 10 {
		save QuarterlyMktScantrack20`i'_v2.dta, replace
		}
		
		}
** Step 4: Create the year - quarter - product panel and save it as KiltsNationalQuarterly.dta: 
* this dataset is cleaned by data_cleaning.do and the resulting dataset is used for estimation
	use QuarterlyMktScantrack2004_v2.dta, clear
	** {{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{ CHECK IF THIS CAN BE DONE BY A LOOP }}}}}}}}}}}}}}}}}}
	append using QuarterlyMktScantrack2005_v2.dta
	append using QuarterlyMktScantrack2006_v2.dta
	append using QuarterlyMktScantrack2007_v2.dta
	append using QuarterlyMktScantrack2008_v2.dta
	append using QuarterlyMktScantrack2009_v2.dta
	append using QuarterlyMktScantrack2010_v2.dta
	append using QuarterlyMktScantrack2011_v2.dta
	
	save Kilts_city_quarterly.dta, replace
	* Discard the yearly cross secitons
	erase QuarterlyMktScantrack2004_v2.dta
	erase QuarterlyMktScantrack2005_v2.dta
	erase QuarterlyMktScantrack2006_v2.dta
	erase QuarterlyMktScantrack2007_v2.dta
	erase QuarterlyMktScantrack2008_v2.dta
	erase QuarterlyMktScantrack2009_v2.dta
	erase QuarterlyMktScantrack2010_v2.dta
	erase QuarterlyMktScantrack2011_v2.dta
	
** Generate firm ID
gen upc2 = upc
gen str20 stringu = string(upc2,"%20.0g")
gen firmid2 = substr(stringu,1,5)
destring firmid2, replace
gen firmid = firmid2
drop firmid2 upc2 stringu

save Kilts_city_quarterly.dta, replace

