/*

Multiple-Product Firms and Product Switching
This Stata .do file uses datasets created by the accompanying .do file 20060523_create_data.do to generate the results in the paper.
Note that a mapping of sections of the code to the tables and footnotes of the paper is listed below
v2009.2.4

This program has the following sections
4: Look for firm-products gaps (Discussed in section "Potential P-C Mismeasurement")
5: Compute share of MP, MI and MS firms (Table 1)
7: Compute product, industry and sector switching by year (Tables 3,5, Footnote 21,27)  
8: Distribution of within-firm output shares (Table 10, Figure 2)
9: Mean differences between SP and MP firms in 1997 (Table 2, Footnote 29)
10: Average output share of recently added and dropped products, industries and sectors (Table 9,A3)
11: Product attributes in 1997 (Table A2)
12: Activity and concomitant attribute changes (Table 4, Footnote 28)
13: Firm-product drop regressions for 1992to1997 (Table 8, Footnote 35)
14: Product add and drop rates (Figure 1)
16: Count firms births and deaths each year (Footnote 26)
17: Add regression (Table 7, Footnote 33)
18: Intensive, extensive regressions (Footnote 41)
20: Crosstab of MP vs mplant by year (Footnote 24)
21: Old Decomp (Footnote 32)
22: New Decomp (Table 6)
23: Turbulence (Footnote 31)
25: Coproduction (Table 11)
26: Difference between r8797 and c8797 (Footnote 21)
28: M&A (Table 12,A5)
30: Shares of codes and value lost when moving from the raw to c8797 sample (Footnote 21)

For variable names see accompanying stata .do file 20060523_create_data.do.

Census Data
As noted in the paper, our analysis uses datasets created from data gathered and maintained by the U.S. Census Bureau. These data are not available to the general public and can only be accessed by permission of the Census Bureau. Guidelines for gaining such permission are available on the Center for Economic Studies (CES) website, www.ces.census.gov. 

If you spot any problems with this code, please email peter.schott@yale.edu

*/ 



**0 Preliminaries
clear
set more off
set mem 2700m



**4 Look for firmid-product gaps, i.e., where firm-product is produced, then not, then again
**
** Note: additional anomolies can be checked for as needed
capture log close
log using ps4_basic_part4, text replace

use fp5_02_c8797, clear
replace pt_pv=0 if pt_pv<0

*see how many years in this sample
levelsof year, local(ylist)
local ynum=0
foreach l in `ylist' {
	local y`ynum' = `l'
	local ynum    = `ynum'+1
	local ylast   = `l'
}

**look for gaps in the 7282, 8797 and 7297 samples
if `ynum'==3 & `y0'==1987 {
	gen j=pt_pv~=.
	tostring j, g(i) 
	keep firmid ssic5 year i pt_pv
	reshape wide i pt_pv, i(firmid ssic5) j(year)
	gen str10 pstring = i1987+i1992+i1997
	local c=1
	gen gap_length = 0
	foreach x in 101 {
		 replace gap_length=`c' if strpos(pstring,"`x'")~=0
		local c=`c'+1
	} 
	tab gap_length
	tab gap_length [fw=int(pt_pv1997/1000)]
}

if `ynum'==3 & `y0'==1972 {

	gen j=pt_pv~=.
	tostring j, g(i) 
	keep firmid ssic5 year i pt_pv
	reshape wide i pt_pv, i(firmid ssic5) j(year)
	gen str10 pstring = i1972+i1977+i1982
	local c=1
	gen gap_length = 0
	foreach x in 101 {
		 replace gap_length=`c' if strpos(pstring,"`x'")~=0
		local c=`c'+1
	} 
	tab gap_length
	tab gap_length [fw=int(pt_pv/1000)]
	
	
}

if `ynum'==6 & `y0'==1972 {
	gen j=pt_pv~=.
	tostring j, g(i) 
	keep firmid ssic5 year i pt_pv
	reshape wide i pt_pv, i(firmid ssic5) j(year)
	gen str10 pstring = i1972+i1977+i1982
	gen anomoly = 0

	local a1 = "101010 100101 100010 100001 010101 010010 010001 001010 001001 000101"
	local a2 = "110101 110010 110001 011010 011001 001101 111010 111001 011101 111101"

	foreach x in `a1' `a2' {
		replace anomoly=`x' if pstring=="`x'"
	}
	tab anomoly
	tab anomoly [fw=int(pt_pv1997/1000)]
}

drop i*
sort firmid ssic5
save ps4_inout_c8797, replace

log close






**5 Compute MP firm distribution for sic5,4,2
**
**  NOTE: create fp4_02 and fp2_02 here for use down below
**
capture log close
log using ps4_basic_part5, text replace

*5 digit
use fp5_02_c8797, clear

*see how many years in this sample
levelsof year, local(ylist)
local ynum=0
foreach l in `ylist' {
	local y`ynum' = `l'
	local ynum    = `ynum'+1
	local ylast   = `l'
}

gen nprod5=(pt_pv~=. & pt_pv~=0)
collapse (sum) pt_pv rpt_pv nprod5 (mean) tvs rtvs, by(firmid year)
gen mp5=nprod5>1 
replace mp5=. if nprod5==0
gen rv=rtvs/1000000
table mp5                 , c(count pt_pv sum rv mean nprod5) f(%15.1fc) 
*table mp5 if year==`ylast', c(count pt_pv sum rv mean nprod5) f(%15.1fc) 
keep firmid year tvs mp5 nprod5
sort firmid year
save d5_5, replace

*4 digit
use fp5_02_c8797, clear

gen ssic4 = substr(ssic5,1,4)
collapse (mean) ar tvs numprods te pw oe sw ww ow tae mr br cm exp rtvs rtae rww row rsw (sum) rpt_pv pt_pv, by(firmid ssic4 year)
replace pt_pv=. if pt_pv==0
replace rpt_pv=. if rpt_pv==0
save fp4_02_c8797, replace
gen nprod4=(pt_pv~=. & pt_pv~=0)
collapse (sum) pt_pv rpt_pv nprod4 (mean) tvs rtvs, by(firmid year)
gen mp4=nprod4>1 
replace mp4=. if nprod4==0
gen rv=rtvs/1000000
table mp4                 , c(count pt_pv sum rv mean nprod4) f(%20.1fc) 
*table mp4 if year==`ylast', c(count pt_pv sum rv mean nprod4) f(%20.1fc) 
keep firmid year tvs mp4 nprod4
sort firmid year
save d5_4, replace

*2 digit
use fp5_02_c8797, clear

gen ssic2 = substr(ssic5,1,2)
collapse (mean) ar tvs numprods te pw oe sw ww ow tae mr br cm exp rtvs rtae rww row rsw (sum) rpt_pv pt_pv, by(firmid ssic2 year)
replace pt_pv=. if pt_pv==0
replace rpt_pv=. if rpt_pv==0
save fp2_02_c8797, replace
gen nprod2=(pt_pv~=. & pt_pv~=0)
collapse (sum) pt_pv rpt_pv nprod2 (mean) tvs rtvs, by(firmid year)
gen mp2=nprod2>1 
replace mp2=. if nprod2==0
gen rv=rtvs/1000000
table mp2                 , c(count pt_pv sum rv mean nprod2) f(%20.1fc) 
*table mp2 if year==`ylast', c(count pt_pv sum rv mean nprod2) f(%20.1fc) 
keep firmid year tvs mp2 nprod2
sort firmid year
save d5_2, replace

*combine mp flags into one file
*note this file will have mp#=. for any year in which the firm is not active
use d5_5, clear

merge firmid year using d5_4
tab _merge
drop _merge
sort firmid year 
merge firmid year using d5_2
tab _merge
drop _merge
*table year, c(sum mp5 sum mp4 sum mp2)
sort firmid year
save ps4_mpdummies_c8797, replace


**Now display distribution of MP5 firms by 2digit msic for 1997
use ps4_mpdummies_c8797, clear
keep if year==1997

sort firmid year
merge firmid year using ps4_msic_c8797, keep(msic)
tab _merge
drop if _merge==2
drop _merge

gen msic2 = int(msic/100)
gen v     = tvs/1000000
gen mpv   = mp5*v
table msic2, c(count mp5 sum mp5 sum v sum mpv) format(%20.0fc)

save d5_msic, replace

log close








**7 Compute product switching activity by year and sic 5,4,2

**make datasets
foreach x in 5 4 2 {
	use fp`x'_02_c8797, clear
	
	capture drop _merge

	*add birthdeath info: note that born, died are in years, birthyear, deathyear are flags
	*note that firm,years are only in the suing file if it is their birth or death year
	sort firmid year
	merge firmid year using ps4_birthdeath_c8797
	tab _merge
	drop if _merge==2
	drop _merge

	*confirm that above merge results in missing obs
	inspect birthyear
	inspect deathyear
	egen bornyr=mean(born), by(firmid)
	egen diedyr=mean(died), by(firmid)

	*birthdeath file only has birthyear, deathyear obs in those years
	*will be missing otherwise, so here set them to zero to prevent problems below
	replace birthyear=0 if birthyear~=1
	replace deathyear=0 if deathyear~=1

	*create wasadded and dropped variables
	*note that can't be wasadded if firm is new or wasdropped if firm died
	*note that his is a "full" dataset, so the replace commands should be fine
	keep firmid year ssic`x' pt_pv tvs birthyear deathyear rtvs born died bornyr diedyr 
	sort firmid ssic`x' year
	gen wasadded`x'   = firmid[_n]==firmid[_n-1] & ssic`x'[_n]==ssic`x'[_n-1] & pt_pv[_n-1]==. & pt_pv[_n]~=.
	replace wasadded`x'=0 if birthyear==1
	gen wasdropped`x' = firmid[_n]==firmid[_n-1] & ssic`x'[_n]==ssic`x'[_n-1] & pt_pv[_n-1]~=. & pt_pv[_n]==.
	replace wasdropped`x'=0 if deathyear[_n-1]==1 & firmid==firmid[_n-1]

	*table year, c(sum wasadded`x' sum wasdropped`x' count wasadded`x')	

	save ps4_wasadded_wasdropped_`x'_c8797, replace

	*the birth death vars are all firm-year, so they should be fine in the collapse
	*note that born,died are zero unless it is they year they happen
	collapse (sum) wasadded`x' wasdropped`x' (mean) bornyr diedyr birthyear deathyear tvs rtvs born died, by(firmid year)
	
	sum wasadded`x' wasdropped`x'
	gen netadded`x' = wasadded`x' - wasdropped`x'

	*add in firms major industry code
	sort firmid year
	merge firmid year using ps4_msic_c8797, keep(msic)	
	tab _merge
	drop _merge

	*see how many firms,value in sample and how much is lost by if statements below
	gen rv=rtvs/1000000
	table year                , c(sum rv count rv)
	table year if birthyear==1, c(sum rv count rv)

	*merge in mp dummies from above
	sort firmid year
	merge firmid year using ps4_mpdummies_c8797
	tab _merge
	keep if _merge==3
	drop _merge

	*note: some firms have years where they appear inactive between active years. set mp=0 for those
	*years
	replace mp5=0 if mp5==.
	replace mp4=0 if mp4==.
	replace mp2=0 if mp2==.

	sort firmid year
	foreach z in mp5 mp4 mp2 tvs {
		gen lag_`z' = `z'[_n-1] if firmid==firmid[_n-1] & year==year[_n-1]+5
	}

	save nadb`x'temp_c8797, replace

	*create indicator for activity
	*Note: when a firm has a gap in its production, it will have tvs==. even though it will show up again in a 
	*      later census. when firms with a single product drop only this can be a problem -- they don't die, but 
	*      the patter of their tvs makes it look like they do. thus the reason for the drop command below. can't 
	*      just drop if tvs==. need to allow for the fact that the droponly's are there.
	*
	*Note: the above means that these firms get no weight when we sum rv.
	*
	**drop if tvs==. & (wasadded`x'==0 & wasdropped`x'==0)
	**drop if 

	gen nadb`x' = ""
	replace nadb`x' = "1. neither"   if wasadded`x'==0 & wasdropped`x'==0 
	replace nadb`x' = "2. add only"  if wasadded`x'> 0 & wasdropped`x'==0
	replace nadb`x' = "3. drop only" if wasadded`x'==0 & wasdropped`x'> 0
	replace nadb`x' = "4. both"      if wasadded`x'> 0 & wasdropped`x'> 0
	replace nadb`x' = "" if wasadded`x'==.
	replace nadb`x' = "" if wasdropped`x'==.

	*next should show that netadded = 0 for nadb==1
	tab netadded`x' if nadb`x'=="1. neither"

	*gen netadd dummy
	gen na`x' =""
	replace na`x' = "1. no activity" if nadb`x'=="1. neither"
	replace na`x' = "2. no change"   if netadded==0 & nadb`x'~="1. neither" & nadb`x'~=""
	replace na`x' = "3. net added"   if netadded>0
	replace na`x' = "4. net dropped" if netadded<0
	
	*gen index for use in table section below
	gen idx = year~=1987 & birthyear~=1 & tvs~=. & lag_tvs~=.

	sort firmid year
	merge firmid year using ps4_mplant_c8797, keep(mplant)
	tab _merge
	drop if _merge==2
	drop _merge

	sort firmid  year 
	merge firmid year using /rdcprojects/br00544/data/pswitch4/ps4_firmchars
	tab _merge
	drop if _merge==2
	drop _merge

	replace tvs=. if tvs==0
	replace te=.  if te==0

	sort firmid year
	save ps4_nadb`x'_c8797, replace

}

**make tables
capture log close
log using ps4_basic_part7, text replace

foreach x in 5 4 2 {
	use ps4_nadb`x'_c8797, clear

	*correction for sp droponly and survive	
	*replace nadb`x' = "1. neither" if nadb`x'=="3. drop only" & lag_mp`x'==0

	keep if idx
	drop if nadb`x'==""

	egen p75tvs    = pctile(tvs), p(75) by(year)
	gen exporter   = exp>0 & exp~=.
	gen bigtvs     = tvs>p75tvs
	sort firmid
	merge firmid using ps4_dormant_c8797, keep(dormant)
	tab _merge
	drop if _merge==2
	drop _merge
	
	display ["ALL"]	
	table nadb`x' if dormant==0 & idx               , c(count wasadded) f(%10.0fc)
	display ["MP"]	
	table nadb`x' if dormant==0 & idx & lag_mp`x'==1, c(count wasadded) f(%15.0fc)
	display ["EXP"]	
	table nadb`x' if dormant==0 & idx & exporter    , c(count wasadded) f(%10.0fc)
	display ["BIGTVS"]	
	table nadb`x' if dormant==0 & idx & bigtvs      , c(count wasadded) f(%10.0fc)
	display ["MPLANT"]	
	table nadb`x' if dormant==0 & idx & mplant      , c(count wasadded) f(%10.0fc)
	
	display ["ALL"]	
	table nadb`x' if dormant==0 & idx               , c(sum rv) f(%10.0fc)
	display ["MP"]	
	table nadb`x' if dormant==0 & idx & lag_mp`x'==1, c(sum rv) f(%15.0fc)
	display ["EXP"]	
	table nadb`x' if dormant==0 & idx & exporter    , c(sum rv) f(%10.0fc)
	display ["BIGTVS"]	
	table nadb`x' if dormant==0 & idx & bigtvs      , c(sum rv) f(%10.0fc)
	display ["MPLANT"]	
	table nadb`x' if dormant==0 & idx & mplant      , c(sum rv) f(%10.0fc)

	display ["ALL"]	
	table na`x' if dormant==0 & idx               , c(count wasadded) f(%10.0fc)
	display ["MP"]	
	table na`x' if dormant==0 & idx & lag_mp`x'==1, c(count wasadded) f(%15.0fc)
	display ["EXP"]	
	table na`x' if dormant==0 & idx & exporter    , c(count wasadded) f(%10.0fc)
	display ["BIGTVS"]	
	table na`x' if dormant==0 & idx & bigtvs      , c(count wasadded) f(%10.0fc)
	display ["MPLANT"]	
	table na`x' if dormant==0 & idx & mplant      , c(count wasadded) f(%10.0fc)
	
	display ["ALL"]	
	table na`x' if dormant==0 & idx               , c(sum rv) f(%10.0fc)
	display ["MP"]	
	table na`x' if dormant==0 & idx & lag_mp`x'==1, c(sum rv) f(%15.0fc)
	display ["EXP"]	
	table na`x' if dormant==0 & idx & exporter    , c(sum rv) f(%10.0fc)
	display ["BIGTVS"]	
	table na`x' if dormant==0 & idx & bigtvs      , c(sum rv) f(%10.0fc)
	display ["MPLANT"]	
	table na`x' if dormant==0 & idx & mplant      , c(sum rv) f(%10.0fc)
}


*2-digit msic in the prior year (before switching occurred)
*check above nadb trends by 2-digit sic sector, placing firms in a sector according to their 
*don't report sic 21 and only want neither and total
use ps4_nadb5_c8797, replace
gen msic2 = int(msic/100)
sort firmid year
gen lag_msic2 = msic2[_n-1] if firmid==firmid[_n-1] & year==year[_n-1]+5
drop if lag_msic2==21
table lag_msic2 nadb5 if idx & nadb5=="1. neither", c(count wasadded) f(%10.0fc)
table lag_msic2       if idx & nadb5~=""          , c(count wasadded) f(%10.0fc)

log close




**8 Distribution of within-firm output shares
capture log close
log using ps4_basic_part8, text replace

use fp5_02_c8797, clear
drop t1 
gen t1      = (pt_pv~=. & pt_pv~=0)
egen nprod5 = total(t1), by(firmid year)
drop t1
sort firmid year pt_pv
egen sumpv  = total(pt_pv), by(firmid year)
gen pvshare = pt_pv/sumpv
drop if pt_pv==.
egen r=rank(pvshare), field by(firmid year)
table r nprod if nprod5<=10, c(mean pvshare) f(%9.3fc)


gen maxshare1=pvshare if r==1
egen maxshare=mean(maxshare1), by(firmid year)
gen maxsharerat=pvshare/maxshare

gen lmaxsharerat=ln(maxsharerat)
gen lr=ln(r)

reg lr lmaxsharerat if nprod==4
reg lr lmaxsharerat if nprod==6
reg lr lmaxsharerat if nprod==8
reg lr lmaxsharerat if nprod==10

save d8, replace

collapse (mean) te (sum) pt_pv, by(firmid year)
gsort -pt_pv
egen pt_pv_rank=rank(pt_pv), field
gen lpt_pv_rank=ln(pt_pv_rank)
gen lpt_pv=ln(pt_pv)
reg lpt_pv_rank lpt_pv
sort te
egen te_rank=rank(te), field
gen lte_rank=ln(te_rank)
gen lte=ln(te)
reg lte_rank lte

log close




**9 Mean Differences between SP and MP firms in 1997
use ps4_acrtfp1997_c8797, clear
sort firmid year
save, replace

use fp5_02_c8797, clear
drop if tvs==.	
collapse (mean) tvs te tae sw ww pw ow oe exp, by(firmid year)

*create vars
gen ltvs     = ln(tvs)
gen lte      = ln(te)
gen ltvste   = ln(tvs/te)
gen lkl      = ln(tae/te)
gen lwage    = ln(sw/te)
gen lpwage   = ln(ww/pw)
gen lnwage   = ln(ow/oe)
gen exporter = exp>0 & exp~=.

sort firmid year 
merge firmid year using ps4_acrtfp1997_c8797, keep(acrtfp_p1)
tab _merge
drop _merge

sort firmid year
merge firmid year using ps4_msic_c8797, keep(msic)
tab _merge
drop _merge

sort firmid year
merge firmid year using ps4_mpdummies_c8797
tab _merge
drop _merge

**create index for constant obs
gen idx = ltvs~=. & lte~=. & ltvste~=. & lkl~=. & lwage~=. & lpwage~=. & lnwage~=. & exporter~=. & acrtfp_p1~=. & mp5~=. & mp4~=. & mp2~=. & msic~=.
tab idx
tab idx year
sort firmid year
keep if idx==1
save d9, replace


capture log close
log using ps4_basic_part9, text replace

use d9, clear	

*see how many years in this sample
levelsof year, local(ylist)
local ynum=0
foreach l in `ylist' {
	local y`ynum' = `l'
	local ynum    = `ynum'+1
	local ylast   = `l'
}

quietly {
  foreach x in 5 4 2 {
  
  	
 	foreach r in ltvs lte ltvste lkl lwage lpwage lnwage exporter acrtfp_p1 {
		areg `r' mp`x', a(msic)
		noisily display "MP Pooled" [`x'] "  coef: " %5.3fc _b[mp`x'] "  se: " %5.3fc _se[mp`x'] "  N: " e(N) "  R2: " %5.3fc e(r2) "  var: " ["`r'"]
	}
	

	
	foreach r in ltvs lte ltvste lkl lwage lpwage lnwage exporter acrtfp_p1 {
		areg `r' mp`x' if year==`ylast', a(msic)
		noisily display "MP LastYr" [`x'] "  coef: " %5.3fc _b[mp`x'] "  se: " %5.3fc _se[mp`x'] "  N: " e(N) "  R2: " %5.3fc e(r2) "  var: " ["`r'"]			
	}
	
	noisily display " "
	noisily display " "
	
  }
}
log close





**10 Average output share of recently added and dropped
capture log close
log using ps4_basic_part10, text replace

*prep lag tvs and sumpv
use fp5_02_c8797, clear

keep firmid year tvs pt_pv rpt_pv rtvs
rename pt_pv sumpv 
rename rpt_pv rsumpv
collapse (mean) tvs rtvs (sum) sumpv rsumpv, by(firmid year)
sort firmid year
gen lag_tvs    = tvs[_n-1]   if firmid[_n]==firmid[_n-1]
gen lag_sumpv  = sumpv[_n-1] if firmid[_n]==firmid[_n-1]
gen lag_rtvs   = rtvs[_n-1]   if firmid[_n]==firmid[_n-1]
gen lag_rsumpv = rsumpv[_n-1] if firmid[_n]==firmid[_n-1]
rename tvs check_tvs
rename sumpv check_sumpv
rename rtvs check_rtvs
rename rsumpv check_rsumpv
save ps4_lagtvs, replace

*compute wasadded and wasdropped as share of prior year sumpv
quietly {  
  foreach x in 5 4 2 {
	
	use fp`x'_02_c8797, clear

	keep firmid ssic`x' year pt_pv rpt_pv

	*compute lag pt_pv
	sort firmid ssic`x' year
	gen lag_pt_pv  = pt_pv[_n-1] if firmid[_n]==firmid[_n-1] & ssic`x'[_n]==ssic`x'[_n-1]
	gen lag_rpt_pv = rpt_pv[_n-1] if firmid[_n]==firmid[_n-1] & ssic`x'[_n]==ssic`x'[_n-1]

	*retreive lag total output
	sort firmid year 
	merge firmid year using ps4_lagtvs, keep(lag_tvs lag_sumpv lag_rtvs lag_rsumpv)
	
	tab _merge
	drop _merge

	*add birthdeath info
	sort firmid year
	merge firmid year using ps4_birthdeath_c8797
	
	tab _merge
	drop _merge

	sort firmid ssic`x' year
	gen wasadded`x'   = firmid[_n]==firmid[_n-1] & ssic`x'[_n]==ssic`x'[_n-1] & pt_pv[_n-1]==. & pt_pv[_n]~=.
	replace wasadded`x'=0 if birthyear==1
	gen wasdropped`x' = firmid[_n]==firmid[_n-1] & ssic`x'[_n]==ssic`x'[_n-1] & pt_pv[_n-1]~=. & pt_pv[_n]==.
	replace wasdropped`x'=0 if deathyear[_n-1]==1

	*first nominal	
	gen t1   = wasadded`x'*pt_pv
	egen vwa = total(t1), by(firmid year)
 	gen t2 = wasdropped`x'*lag_pt_pv
	egen vwd = total(t2), by(firmid year)
	egen sumpv = total(pt_pv), by(firmid year)
	drop t1 t2
	gen svwa_lag = vwa/lag_sumpv
	gen svwd_lag = vwd/lag_sumpv
	gen svwa = vwa/sumpv
	gen svwd = vwd/sumpv
	noisily display [`x']
	noisily table year if year~=1972 & svwa~= 0 & svwa~=., c(mean svwa mean svwa_lag)
	noisily table year if year~=1972 & svwd~= 0 & svwd~=., c(mean svwd mean svwd_lag)

	*now real
	gen t1   = wasadded`x'*rpt_pv
	egen rvwa = total(t1), by(firmid year)
 	gen t2 = wasdropped`x'*lag_rpt_pv
	egen rvwd = total(t2), by(firmid year)
	egen rsumpv = total(rpt_pv), by(firmid year)
	drop t1 t2
	gen srvwa_lag = rvwa/lag_rsumpv
	gen srvwd_lag = rvwd/lag_rsumpv
	gen srvwa = rvwa/rsumpv
	gen srvwd = rvwd/rsumpv	
	noisily display [`x']
	noisily table year if year~=1972 & srvwa~= 0 & srvwa~=., c(mean srvwa mean srvwa_lag)
	noisily table year if year~=1972 & srvwd~= 0 & srvwd~=., c(mean srvwd mean srvwd_lag)
	save d10_`x', replace
  }
}

log close




**11 Product attributes
**
**  NOTE: uses only 1997 data so no need to make real
capture log close
log using ps4_basic_part11, text replace

use fp5_02_c8797, clear

*see how many years in this sample
levelsof year, local(ylist)
local ynum=0
foreach l in `ylist' {
	local y`ynum' = `l'
	local ynum    = `ynum'+1
	local ylast   = `l'
}

*create vars
gen kl      = tae/te
gen nl      = oe/te
gen temp    = tae/tvs
egen sunk   = median(temp), by(ssic5 year)
gen lsunk   = ln(sunk)

*table
gen ssic2   = substr(ssic5,1,2)
*table ssic2 if year==`ylast'              , c(mean kl sd kl mean nl sd nl)
*table ssic2 if year==`ylast'              , c(mean sunk sd sunk)
table ssic2 if year==`ylast' [fw=int(tvs)], c(mean kl sd kl mean nl sd nl)
table ssic2 if year==`ylast' [fw=int(tvs)], c(mean sunk sd sunk)

collapse (mean) kl nl sunk lsunk [fw=int(tvs)], by(ssic5 year)
sort ssic5 year

save ps4_prodattributes_c8797, replace


**count sevens in each 5 by year (for constant sample)
use fp5_02_c8797, clear
levelsof year, local(ylist)
local ynum=0
foreach l in `ylist' {
	local y`ynum' = `l'
	local ynum    = `ynum'+1
	local ylast   = `l'
}

foreach l in `ylist' {
 
 	local y = `l'-1900
 
	use /rdcprojects/br00544/data/pswitch4/cmf_base`y', clear 
	keep curpc  
	rename curpc ssic7
	duplicates drop ssic7, force
	gen str5 ssic5 = substr(ssic7,1,5)
	gen str2 ssic2 = substr(ssic7,1,2)
	gen str3 suffix3 = substr(ssic7,5,3)
	destring ssic2, force g(sic2)
	save temp_ssic7_`l', replace
}

use  temp_ssic7_`y0'
foreach l in `ylist' {
	append using temp_ssic7_`l'

}
save temp_ssic7_c8798, replace

*drop superfluous and also use constant sample sic5's 
use temp_ssic7_c8798, clear
duplicates drop ssic7, force
*drop if suffix3=="00N" | suffix3=="00P" | suffix3=="00-" | suffix3=="000" | suffix3=="002"
drop if suffix3=="00N" | suffix3=="00P" | suffix3=="00-" | suffix3=="002"
drop if sic2<20 | sic2>39
gen n7 = 1
collapse (sum) n7, by(ssic5)

sort ssic5 
merge ssic5 using /rdcprojects/br00544/data/pswitch4/curpc5_c8797_01, keep(ssic5) 
tab _merge
keep if _merge==3 
drop _merge

gen str2 ssic2 = substr(ssic5,1,2)
table ssic2, c(mean n7)

sort ssic5
save ps4_7in5_c8797, replace

log close







**12 Concomitant activity and attribute changes
use fp5_02_c8797, clear

collapse (mean) rtvs tvs te rsw sw, by(firmid year)

*add major industry code
sort firmid year
merge firmid year using ps4_msic_c8797, keep(msic)
tab _merge
drop _merge

*add prodmix dummies
*note merge codes should be 1 & 3 but no 2's
*merge==1 is due to fact that fp5_02 is a "full" firm x ssic5 x year dataset, and therefore includes
*  ssic5-year before the firm becomes active
sort firmid year
merge firmid year using ps4_prodmix_c8797, keep(prodmix)
tab _merge
drop _merge

*add back in tfp and indicators of activity
sort firmid year
merge firmid year using ps4_nadb5_c8797, keep(nadb5 na5 netadded5)
tab _merge
drop _merge

*add tfp
sort firmid year
merge firmid year using ps4_acrtfp1997_c8797, keep(acrtfp_p1)
tab _merge
drop _merge

*compute changes
gen lrtvs   = ln(rtvs)
gen lte     = ln(te)
gen lrwage  = ln(rsw/te)
gen lrtvste = ln(rtvs/te) 
gen ltvs   = ln(tvs)
gen lwage  = ln(sw/te)
gen ltvste = ln(tvs/te)

sort firmid year
foreach zzz in lrtvs lte lrwage lrtvste ltvs lwage ltvste acrtfp_p1 {
	gen d`zzz' = `zzz'[_n] - `zzz'[_n-1] if firmid[_n]==firmid[_n-1] & year[_n]==year[_n-1]+5
	gen ad`zzz' = abs(d`zzz')
}

*gen activity vars
drop if nadb5==""
gen add  = nadb5=="2. add only"
gen drop = nadb5=="3. drop only"
gen both = nadb5=="4. both"
gen none = nadb5=="1. neither"

*gen netadd, netdrop vars where left out category is no change
*this variable should only be defined if nadb5 is defined
gen netadd  = na5=="3. net added"
gen netdrop = na5=="4. net dropped"
gen netnone = na5=="2. no change"

*gen number added or dropped where left out category is no change
*this variable should only be defined if nadb5 is defined
gen numadd  = netadded * (netadded>0)
gen numdrop = netadded * (netadded<0)

gen idx = dlrtvs~=. & dlte~=. & dlrwage~=. & dlrtvste~=. & dacrtfp_p1~=. & msic~=. & prodmix~=.
tab idx

save d12, replace


capture log close
log using ps4_basic_part12, text replace
use  d12, replace

levelsof year, local(ylist)
local ynum=0
foreach l in `ylist' {
	local y`ynum' = `l'
	local ynum    = `ynum'+1
	local ylast   = `l'
}

keep if idx==1
egen ypm=group(year prodmix)

quietly {

  noisily display "2a na, nd msic "
  foreach zzz in dlrtvs dlte dlrwage dlrtvste dltvs dlwage dltvste dacrtfp_p1 {
	xi: areg `zzz' netadd netdrop i.year, a(msic) cl(msic)
	noisily display "nd " %6.4fc _b[netdrop] " " %5.3fc _se[netdrop] " na " %6.4fc _b[netadd] " "  %6.4fc _se[netadd]  "  N: " e(N) "  R2: " %5.3fc e(r2) " " ["`zzz'"]  
  }
  noisily display ""
  
}

log close





**13 Firm-product drop regressions for 1992->1997
**  Run on MP firms only
**  Firm vars are relative to firms with same product mix
**  Firm-product vars are relative to firms in same product
capture log close
log using ps4_basic_part13_details, replace

use fp5_02_c8797, clear

sort firmid year
merge firmid year using ps4_birthdeath_c8797
tab _merge
drop if _merge==2
drop _merge

sort firmid  year 
merge firmid year using /rdcprojects/br00544/data/pswitch4/ps4_firmchars
tab _merge
*keep if _merge==3
drop _merge
gen ltvste = ln(tvs/te)

sort firmid  year 
merge firmid year using ps4_acrtfp1997_c8797, keep(acrtfp_p1)
tab _merge
*keep if _merge==3
drop _merge

*add prodmix dummies
sort firmid year
merge firmid year using ps4_prodmix_c8797, keep(prodmix)
drop if _merge==2
tab _merge
drop _merge

keep firmid year ssic5 pt_pv tvs birthyear deathyear prodmix acrtfp* ltvste
sort firmid ssic5 year
gen tbd = firmid[_n+1]==firmid[_n] & ssic`x'[_n+1]==ssic5[_n] & pt_pv[_n+1]==. & pt_pv[_n]~=.
replace tbd=0 if deathyear==1
replace tbd=. if pt_pv==.

*compute product tenure for each firm
gen t1  = pt_pv~=. & pt_pv~=0
gen t2  = t1*year
replace t2=. if t2==0
egen t3 = min(t2), by(firmid ssic5)
gen tenure = year-t3
replace tenure=. if tvs==.
replace tenure=tenure+5      /*so logs can be taken below*/
tab tenure
drop t1-t3

*compute firm age
gen t1  = birthyear*year
replace t1=. if birthyear==0
egen t2 = mean(t1), by(firmid)
gen age = year-t2
replace age=. if tvs==.
replace age=age+5      /*so logs can be taken below*/
tab age
drop t1 t2

*compute mean product-level tenure and size and mean firm-level age and size
egen meanpt_pv  = mean(pt_pv), by(ssic5 year)
egen meantenure = mean(tenure), by(ssic5 year)
egen t1         = tag(firmid year)
replace t1=. if t1==0
gen t2          = t1*age
gen t3          = t1*tvs
egen meanage    = mean(t2), by(prodmix year)
egen meantvs    = mean(t3), by(prodmix year)
drop t1-t3

*compute relative numbers
foreach zzz in pt_pv tenure age tvs {
	gen d`zzz'     = ln(`zzz') - ln(mean`zzz')
}

egen nprod5 = count(pt_pv), by(firmid year)
gen lnprod5 = ln(nprod5)

sort firmid year
merge firmid year using ps4_mplant_c8797, keep(mplant)
tab _merge
drop if _merge==2
drop _merge

gen idx  = deathyear~=1 & year==1992 & dage~=. & dtvs~=. & nprod5>1 & nprod5~=. & dpt_pv~=. & acrtfp_p1~=. & ltvste~=.
gen idx2 = deathyear~=1 & year==1987 & dage~=. & dtvs~=. & nprod5>1 & nprod5~=. & dpt_pv~=. & acrtfp_p1~=. & ltvste~=.

gen ssic4    = substr(ssic5,1,4)
gen ssic2    = substr(ssic5,1,2)
gen t1       = pt_pv~=0 & pt_pv~=.
egen n_in_4  = total(t1), by(firmid ssic4 year)
egen n_in_2  = total(t1), by(firmid ssic4 year)
gen ln_in_4  = ln(n_in_4)
gen ln_in_2  = ln(n_in_2)

*demean each var by ssic5 for next reg
foreach x in tbd dpt_pv dtenure {
	egen m`x' = mean(`x'), by(ssic5 year)
	egen m1`x' = mean(`x'), by(firmid year)
	gen  n`x' = `x'-m`x'
	gen  n1`x' = `x'-m1`x'
	gen  n2`x' = `x'-m`x'-m1`x'
}

save ps4_dropreg, replace
log close


capture log close
log using ps4_basic_part13.log, replace

use  ps4_dropreg, clear
reg tbd  dpt_pv  dtenure     if idx==1, cl(firmid)
reg n1tbd n1dpt_pv n1dtenure if idx==1, cl(firmid)
reg n2tbd n2dpt_pv n2dtenure if idx==1, cl(firmid)


log close





**14 Product Add and Drop Rates
**   Uses pre-collapse file created in nadb routine above

capture log close
log using ps4_basic_part14, text replace

use ps4_wasadded_wasdropped_5_c8797, clear

**read in firmgrowth variable from section 12
sort firmid year
merge firmid year using d12, keep(dlrtvs)
tab _merge
drop if _merge==2
drop _merge


*create vars for counts of producers in each period
gen n     = pt_pv~=0
gen nlag  = pt_pv[_n-1]~=0
replace nlag = 0 if firmid~=firmid[_n-1] & ssic5~=ssic5[_n-1]
gen nadd  = wasadded5
gen ndrop = wasdropped5

gen n_pos     = n     & dlrtvs>0 & dlrtvs~=.
gen nlag_pos  = nlag  & dlrtvs>0 & dlrtvs~=.
gen nadd_pos  = nadd  & dlrtvs>0 & dlrtvs~=.
gen ndrop_pos = ndrop & dlrtvs>0 & dlrtvs~=.

gen n_neg     = n     & dlrtvs<0 & dlrtvs~=.
gen nlag_neg  = nlag  & dlrtvs<0 & dlrtvs~=.
gen nadd_neg  = nadd  & dlrtvs<0 & dlrtvs~=.
gen ndrop_neg = ndrop & dlrtvs<0 & dlrtvs~=.

save d14, replace

collapse (sum) n nlag nadd ndrop *_pos *_neg, by(ssic5 year)

save ps4_adddroprate, replace

use ps4_adddroprate, clear

levelsof year, local(ylist)
local ynum=0
foreach l in `ylist' {
	local y`ynum' = `l'
	local ynum    = `ynum'+1
	local ylast   = `l'
}
drop if year==`y0'

gen addrate      = nadd      / ((n     + nlag    )/2)
gen addrate_pos  = nadd_pos  / ((n_pos + nlag_pos)/2)
gen addrate_neg  = nadd_neg  / ((n_neg + nlag_neg)/2)
gen droprate     = ndrop     / ((n     + nlag    )/2)
gen droprate_pos = ndrop_pos / ((n_pos + nlag_pos)/2)
gen droprate_neg = ndrop_neg / ((n_neg + nlag_neg)/2)
gen den          = (n + nlag)/2

bysort year: pwcorr addrate     droprate    , sig
bysort year: pwcorr addrate_pos droprate_pos, sig
bysort year: pwcorr addrate_neg droprate_neg, sig

**gen ssic2    = substr(ssic5,1,2)
**table ssic2, c(mean addrate mean droprate)
**bysort ssic2: pwcorr addrate droprate, sig

**create graphs
gen idx = 1
DEFINE SECTORS THAT VIOLATE DISCLOSURE
foreach x in `slist1' {	
	replace idx=0 if ssic5=="`x'"
}

keep if idx==1
collapse addrate droprate den idx, by(ssic5)
label var addrate "Mean Add Rate"
label var droprate "Mean Drop Rate"
scatter addrate droprate if idx==1              , xlabel(0(.1).5) ylabel(0(.1).5)                  saving(ps4_adscatter7, replace)
scatter addrate droprate if idx==1 [fw=int(den)], xlabel(0(.1).5) ylabel(0(.1).5)                  saving(ps4_adscatter8, replace)
scatter addrate droprate if idx==1 			, xlabel(0(.1).5) ylabel(0(.1).5) m(i) mlab(ssic5) saving(ps4_adscatter9, replace)

log close


 
 



**16 count firm births and deaths each year
capture log close 
log using ps4_basic_part16, text replace 

use fp5_02_c8797, clear 
collapse (mean) tvs, by(firmid year)
drop if tvs==.
drop if tvs==0
sort firmid year
merge firmid year using ps4_birthdeath_c8797
tab _merge
save d_16, replace
table year, c(count tvs sum birthyear sum deathyear)

log close





**17 Add regression 
use ps4_acrtfp1997_c8797, clear
sort firmid year
save, replace


use ps4_nadb5_c8797, clear
	
sort firmid  year 
merge firmid year using /rdcprojects/br00544/data/pswitch4/ps4_firmchars
tab _merge
*keep if _merge==3
drop _merge

sort firmid  year 
merge firmid year using ps4_acrtfp1997_c8797, keep(acrtfp_p1)
tab _merge
*keep if _merge==3
drop _merge

sort firmid  year 
merge firmid year using /rdcprojects/br00544/data/pswitch4/ps4_firmage, keep(age)
tab _merge
*keep if _merge==3
drop _merge

sort firmid year
merge firmid year using ps4_prodmix_c8797, keep(prodmix)
*drop if _merge==2
tab _merge
drop _merge

gen ltvste = ln(tvs/te)
gen lte    = ln(te)
gen lage   = ln(age)

sort firmid year
foreach s in prodmix ltvste lte lage acrtfp_p1 {
	gen lag_`s' = `s'[_n-1] if firmid==firmid[_n-1] & year==year[_n-1]+5
} 

*drops
drop if nadb==""

xi i.year
drop if wasadded==.
gen add = wasadded>0
capture drop idx
gen idx = lag_prodmix~=. & add~=. & lag_acrtfp_p1~=. & lag_ltvste~=. & lag_lte~=. & lag_lage~=. & lag_mp5~=.

save ps4_addreg_c8797, replace


capture log close 
log using ps4_basic_part17, text replace 
use ps4_addreg_c8797, clear

keep if idx==1

foreach zzz in lag_acrtfp_p1 {
	areg add `zzz'                  if lag_mp5==0, a(lag_prodmix)
	areg add `zzz' lag_lte lag_lage if lag_mp5==0, a(lag_prodmix)
	
}

foreach zzz in lag_acrtfp_p1 {
	areg add `zzz'                  if lag_mp5==1, a(lag_prodmix)
	areg add `zzz' lag_lte lag_lage if lag_mp5==1, a(lag_prodmix)
	
}

log close






**18 Intensive/extensive stuff
**  (New)
capture log close
log using ps4_basic_part18, text replace

**A. restricted sample
use fp5_02_c8797, clear

levelsof year, local(ylist)
local ynum=0
foreach l in `ylist' {
	local y`ynum' = `l'
	local ynum    = `ynum'+1
	local ylast   = `l'
}


gen nprod=1
replace nprod=0 if pt_pv==. | pt_pv==0
collapse (sum) nprod pt_pv rpt_pv, by(firmid year)
drop if nprod==0
gen lpv     = ln(pt_pv)
gen avg     = pt_pv/nprod
replace avg = . if nprod==0 | rpt_pv==0
gen lavg    = ln(avg)
gen lnprod  = ln(nprod)
gen idx     = lnprod~=. & lavg~=. & lpv~=. 

reg lnprod lpv if year==1997 & idx==1
reg lavg   lpv if year==1997 & idx==1 
pwcorr lavg lnprod if nprod!=1, sig
pwcorr lavg lnprod if nprod!=1 & year==1987, sig
pwcorr lavg lnprod if nprod!=1 & year==1992, sig
pwcorr lavg lnprod if nprod!=1 & year==1997, sig
reg lavg lnprod if nprod!=1
reg lavg lnprod if nprod!=1 & year==1987
reg lavg lnprod if nprod!=1 & year==1992
reg lavg lnprod if nprod!=1 & year==1997

log close


**B. raw all-years sample
use /rdcprojects/br00544/data/pswitch4/fp5_02, clear

levelsof year, local(ylist)
local ynum=0
foreach l in `ylist' {
	local y`ynum' = `l'
	local ynum    = `ynum'+1
	local ylast   = `l'
}

**count number of prods
gen nprod = pt_pv~=. & pt_pv~=0
collapse (sum) nprod pt_pv rpt_pv, by(firmid year)
gen lpv     = ln(pt_pv)
gen avg     = pt_pv/nprod
replace avg = . if nprod==0 | rpt_pv==0
gen lavg    = ln(avg)
gen lnprod  = ln(nprod)

**run decomposition
reg lnprod lpv if year==`ylast'
reg lavg   lpv if year==`ylast' 

pwcorr lavg lnprod if nprod!=1, sig
pwcorr lavg lnprod if nprod!=1 & year==1972, sig
pwcorr lavg lnprod if nprod!=1 & year==1977, sig
pwcorr lavg lnprod if nprod!=1 & year==1982, sig
pwcorr lavg lnprod if nprod!=1 & year==1987, sig
pwcorr lavg lnprod if nprod!=1 & year==1992, sig
pwcorr lavg lnprod if nprod!=1 & year==1997, sig
reg lavg lnprod if nprod!=1
reg lavg lnprod if nprod!=1 & year==1972
reg lavg lnprod if nprod!=1 & year==1977
reg lavg lnprod if nprod!=1 & year==1982
reg lavg lnprod if nprod!=1 & year==1987
reg lavg lnprod if nprod!=1 & year==1992
reg lavg lnprod if nprod>1 & year==1997

save d_18r, replace


log close





**20 Breakdown of mplant vs mp
capture log close
log using ps4_basic_part20, text replace

*use plant data to determine number of plants per firm
use ps4_mpdummies_c8797, clear
keep firmid year mp5 

levelsof year, local(ylist)
local ynum=0
foreach l in `ylist' {
	local y`ynum' = `l'
	local ynum    = `ynum'+1
	local ylast   = `l'
}

sort firmid year
merge firmid year using ps4_mplant_c8797, keep(mplant)
tab _merge
drop _merge
save d_20, replace

foreach x in `ylist' {
	display [`x']
	tab mplant mp5 if year==`x'
}

log close




**21 Aggregate decomposition -- no break and two subperiods
capture log close
log using ps4_basic_part21, text replace

use /rdcprojects/br00544/data/pswitch4/fp5_02, clear
keep if year>=1987
save /rdcprojects/br00544/data/pswitch4/r8797/fp5_02_r8797, replace

use /rdcprojects/br00544/data/pswitch4//ps4_firmage, clear
keep if year>=1987
gen deathyear = year==died
save /rdcprojects/br00544/data/pswitch4/r8797/ps4_birthdeath_r8797, replace


foreach xxx in c8797 r8797 {

	display ["   "]
	display [" OLD DECOMP FOR SAMPLE `xxx'  "]
	display ["   "]

	use /rdcprojects/br00544/data/pswitch4/`xxx'/fp5_02_`xxx', clear

	keep firmid year tvs pt_pv rtvs rpt_pv
	rename pt_pv sumpv
	rename rpt_pv rsumpv
	collapse (mean) rtvs tvs (sum) rsumpv sumpv, by(firmid year)
	sort firmid year
	gen lag_tvs    = tvs[_n-1]   if firmid[_n]==firmid[_n-1]
	gen lag_sumpv  = sumpv[_n-1] if firmid[_n]==firmid[_n-1]
	gen lag_rtvs   = rtvs[_n-1]   if firmid[_n]==firmid[_n-1]
	gen lag_rsumpv = rsumpv[_n-1] if firmid[_n]==firmid[_n-1]
	rename tvs check_tvs
	rename sumpv check_sumpv
	rename rtvs check_rtvs
	rename rsumpv check_rsumpv
	save /rdcprojects/br00544/data/pswitch4/`xxx'/ps4_lagtvs_`xxx', replace

	*first, decomposition for post-balance code drop sample
	use /rdcprojects/br00544/data/pswitch4/`xxx'/fp5_02_`xxx', clear

	*drop pt_pv tvs 

	*compute current sum pv and retreive lag total output from file computed above
	egen rsumpv = total(rpt_pv), by(firmid year)
	sort firmid year 
	merge firmid year using /rdcprojects/br00544/data/pswitch4/`xxx'/ps4_lagtvs_`xxx', keep(lag_rtvs lag_rsumpv)
	tab _merge
	drop _merge

	*compute lag pt_pv and intensive marging change
	sort firmid ssic5 year
	gen lag_rpt_pv     = rpt_pv[_n-1] if firmid[_n]==firmid[_n-1] & ssic5[_n]==ssic5[_n-1]
	gen intensive      = rpt_pv-lag_rpt_pv
	gen intensive_pos  = intensive if intensive>0
	gen intensive_neg  = intensive if intensive<=0

	*add birthdeath info and compute change due to extensive margin
	*note that since we are still at the firmid-sic5 level, extensive_add and _drop can't both be nonzero
	*thus the reason for the replace commands in defining them; otherwise extensive is always zero 
	sort firmid year
	merge firmid year using /rdcprojects/br00544/data/pswitch4/`xxx'/ps4_birthdeath_`xxx'
	tab _merge
	*keep if _merge==3
	drop _merge

	sort firmid ssic5 year
	gen wasadded5   = firmid[_n]==firmid[_n-1] & ssic5[_n]==ssic5[_n-1] & rpt_pv[_n-1]==. & rpt_pv[_n]~=.
	replace wasadded5=0 if birthyear==1
	gen wasdropped5 = firmid[_n]==firmid[_n-1] & ssic5[_n]==ssic5[_n-1] & rpt_pv[_n-1]~=. & rpt_pv[_n]==.
	replace wasdropped5=0 if deathyear[_n-1]==1
	
	gen extensive_add  = rpt_pv*wasadded5
	replace extensive_add=0 if extensive_add==.
	gen extensive_drop = -lag_rpt_pv*wasdropped5
	replace extensive_drop=0 if extensive_drop==.
	gen extensive      = extensive_add+extensive_drop

	save /rdcprojects/br00544/data/pswitch4/`xxx'/ps4_aggdecomp_01_`xxx', replace
	*use /rdcprojects/br00544/data/pswitch4/`xxx'/ps4_aggdecomp_01_`xxx', clear
	
	*sum the intensive and extensive margins to the firmid-year level before accountring for entry exit
	collapse (sum) intensive* extensive* (mean) rsumpv lag_rsumpv rtvs lag_rtvs deathyear birthyear, by(firmid year)

	*compute changes due to entry and exit
	*note that since we entry and exit can't both be nonzero
	*thus the reason for the replace commands in defining them; otherwise entryexit is always zero
	sort firmid year
	gen exit           = -lag_rsumpv*deathyear[_n-1]
	replace exit=0 if exit==.
	gen entry          = rsumpv*birthyear
	replace entry=0 if entry==.
	gen entryexit      = entry + exit

	*here are values; shares after next collapse
	table year, c(sum rsumpv    sum entryexit     sum extensive sum intensive) f(%15.0fc)
	table year, c(sum entryexit sum entry         sum exit)                    f(%15.0fc)
	table year, c(sum extensive sum extensive_add sum extensive_drop)          f(%15.0fc)
	table year, c(sum intensive sum intensive_pos sum intensive_neg)           f(%15.0fc)

	save /rdcprojects/br00544/data/pswitch4/`xxx'/ps4_aggdecomp_02_`xxx', replace

	*collapse to compute shares
	collapse (sum) lag_rsumpv entryexit extensive* intensive* entry exit,   by(year)
	foreach zzz in entryexit entry exit extensive extensive_add extensive_drop intensive intensive_pos intensive_neg {
		gen sh_`zzz' = `zzz'/lag_rsumpv*100
	}
	
	table year, c(sum sh_entryexit sum sh_extensive sum sh_intensive) f(%15.2fc)
	table year, c(sum sh_entryexit sum sh_entry sum sh_exit) f(%5.2fc)
	table year, c(sum sh_extensive sum sh_extensive_add sum sh_extensive_drop) f(%5.2fc)
	table year, c(sum sh_intensive sum sh_intensive_pos sum sh_intensive_neg) f(%5.2fc)

	*check the shares across all years
	gen t=1
	gen t1=year==1977
	gen t2 = t1*lag_rsumpv
	rename t2 rsumpv_1972
	replace entryexit = 0 if year==1972
	replace entry=0 if year==1972
	collapse (sum) rsumpv_1972 entryexit extensive* intensive* entry exit,   by(t)
	foreach zzz in entryexit entry exit extensive extensive_add extensive_drop intensive intensive_pos intensive_neg {
		gen sh_`zzz' = `zzz'/rsumpv_1972*100
	}
	table t, c(sum sh_entryexit sum sh_extensive sum sh_intensive) f(%15.2fc)
	table t, c(sum sh_entryexit sum sh_entry sum sh_exit) f(%5.2fc)
	table t, c(sum sh_extensive sum sh_extensive_add sum sh_extensive_drop) f(%5.2fc)
	table t, c(sum sh_intensive sum sh_intensive_pos sum sh_intensive_neg) f(%5.2fc)

	save /rdcprojects/br00544/data/pswitch4/`xxx'/ps4_aggdecomp_03_`xxx', replace
}

log close






**22 New decomposition:
**
**   For each year-ssic5 what fraction of output is by:
**		a. Entirely new firms
**		b. Firms new to this product
**		c. Firms that were in this product in year t-5
**   For each year-ssic5 what fraction of output is by:
**		a. Firms that will die by t+5
**		b. Firms that will exit the product but not die by t+5
**		c. Firms that will remain in the product by year t+5
**
**   NOTE: also break down above in terms of counts
**
**   NOTE: start with ps4_aggdecomp from above, which is a firm x product x year
**         dataset
**
**
**  (NEW)
capture log close
log using ps4_basic_part22, text replace

foreach xxx in c8797 {

	*generate file with total employment by product-year
	use /rdcprojects/br00544/data/pswitch4/`xxx'/fp5_02_`xxx', clear
	collapse (sum) te, by(ssic5 year)
	rename te sumte
	drop if ssic5==""
	sort ssic5 year
	save /rdcprojects/br00544/data/pswitch4/`xxx'/ps4_te, replace
	

	*first do add version
	use /rdcprojects/br00544/data/pswitch4/`xxx'/ps4_aggdecomp_01_`xxx', clear
	
	
	*see how many years in this sample
	levelsof year, local(ylist)
	local ynum=0
	foreach l in `ylist' {
		local y`ynum' = `l'
		local ynum = `ynum'+1
		local ylast = `l'
	}
	
	gen type       = ""
	replace type   = "continuer" if rpt_pv~=. & rpt_pv[_n-1]~=. & firmid==firmid[_n-1] & ssic5==ssic5[_n-1]
	replace type   = "adder"     if rpt_pv~=. & rpt_pv[_n-1]==. & firmid==firmid[_n-1] & ssic5==ssic5[_n-1]
	replace type   = "entrant"   if type=="adder" & birthyear==1
	
	*for value
	gen vcontinuer = pt_pv if type=="continuer"
	gen ventrant   = pt_pv if type=="entrant"
	gen vadder     = pt_pv if type=="adder"
	
	*for count
	gen ncontinuer = 1     if type=="continuer"
	gen nentrant   = 1     if type=="entrant"
	gen nadder     = 1     if type=="adder"
	gen n          = 1     if type~=""

	save /rdcprojects/br00544/data/pswitch4/`xxx'/d_22_a_`xxx', replace

	collapse (sum) ncontinuer nentrant nadder vcontinuer ventrant vadder pt_pv n, by(ssic5 year)
	
	*merge in total employment data for weighting in tables
	
	sort ssic5 year
	merge ssic5 year using /rdcprojects/br00544/data/pswitch4/`xxx'/ps4_te, keep(sumte)
	tab _merge
	keep if _merge==3
	drop _merge

	rename pt_pv sumpv
	gen ssic2        = substr(ssic5,1,2)
	gen sh_continuer = vcontinuer/sumpv*100
	gen sh_entrant   = ventrant/sumpv*100
	gen sh_adder     = vadder/sumpv*100

	gen sh_ncontinuer = ncontinuer/n*100
	gen sh_nentrant   = nentrant/n*100
	gen sh_nadder     = nadder/n*100

	gen vtest         = sh_continuer + sh_entrant + sh_adder
	sum vtest
	gen ntest         = sh_ncontinuer + sh_nentrant + sh_nadder
	sum ntest
	
	*mean and sd of values
	table year               , c(mean sh_continuer mean sh_entrant mean sh_adder) f(%15.0fc)
	table year [fw=int(sumpv)]  , c(mean sh_continuer mean sh_entrant mean sh_adder) f(%15.0fc)
	table year [fw=int(sumte)]  , c(mean sh_continuer mean sh_entrant mean sh_adder) f(%15.0fc)
	table ssic2 if year==1992, c(mean sh_continuer mean sh_entrant mean sh_adder) f(%15.0fc)
	table ssic2 [fw=int(sumpv)] if year==1992, c(mean sh_continuer mean sh_entrant mean sh_adder) f(%15.0fc)
	table ssic2 [fw=int(sumte)] if year==1992, c(mean sh_continuer mean sh_entrant mean sh_adder) f(%15.0fc)
	*table year, c(sd   sh_continuer sd   sh_entrant sd   sh_adder) f(%15.0fc)	

	table year               , c(mean sh_ncontinuer mean sh_nentrant mean sh_nadder) f(%15.0fc)
	table ssic2 if year==1992, c(mean sh_ncontinuer mean sh_nentrant mean sh_nadder) f(%15.0fc)
	*table year, c(sd   sh_continuer sd   sh_entrant sd   sh_adder) f(%15.0fc)		
	
}


	*second do drop version
	use /rdcprojects/br00544/data/pswitch4/`xxx'/ps4_aggdecomp_01_`xxx', clear
	
	sort firmid ssic5 year
	gen type = "" 
	replace type = "continuer" if rpt_pv~=. & rpt_pv[_n+1]~=. & firmid==firmid[_n+1] & ssic5==ssic5[_n+1]
	replace type = "dropper"   if rpt_pv~=. & rpt_pv[_n+1]==. & firmid==firmid[_n+1] & ssic5==ssic5[_n+1]
	replace type = "exiter"  if type=="dropper" & deathyear==1
	
	gen vcontinuer = pt_pv if type=="continuer"
	gen vexiter    = pt_pv if type=="exiter"
	gen vdropper   = pt_pv if type=="dropper"

	gen ncontinuer = 1     if type=="continuer"
	gen nexiter    = 1     if type=="exiter"
	gen ndropper   = 1     if type=="dropper"
	gen n          = 1     if type~=""
	
	save /rdcprojects/br00544/data/pswitch4/`xxx'/d_22_d_`xxx', replace
	
	collapse (sum) ncontinuer nexiter ndropper vcontinuer vexiter vdropper pt_pv n, by(ssic5 year)
	
	*merge in total employment data for weighting in tables
	
	sort ssic5 year
	merge ssic5 year using /rdcprojects/br00544/data/pswitch4/`xxx'/ps4_te, keep(sumte)
	tab _merge
	keep if _merge==3
	drop _merge

	rename pt_pv sumpv
	gen ssic2        = substr(ssic5,1,2)

	gen sh_continuer = vcontinuer/sumpv*100
	gen sh_exiter    = vexiter/sumpv*100
	gen sh_dropper   = vdropper/sumpv*100

	gen sh_ncontinuer = ncontinuer/n*100
	gen sh_nexiter    = nexiter/n*100
	gen sh_ndropper   = ndropper/n*100

	gen vtest = sh_continuer + sh_exiter + sh_dropper
	sum vtest
	gen ntest = sh_ncontinuer + sh_nexiter + sh_ndropper
	sum ntest

	*mean and sd
	table year               , c(mean sh_continuer mean sh_exiter mean sh_dropper) f(%15.0fc)
	table year [fw=int(sumpv)]  , c(mean sh_continuer mean sh_exiter mean sh_dropper) f(%15.0fc)
	table year [fw=int(sumte)]  , c(mean sh_continuer mean sh_exiter mean sh_dropper) f(%15.0fc)
	table ssic2 if year==1992, c(mean sh_continuer mean sh_exiter mean sh_dropper) f(%15.0fc)
	table ssic2 [fw=int(sumpv)] if year==1992, c(mean sh_continuer mean sh_exiter mean sh_dropper) f(%15.0fc)
	table ssic2 [fw=int(sumte)] if year==1992, c(mean sh_continuer mean sh_exiter mean sh_dropper) f(%15.0fc)
	*table year, c(sd   sh_continuer sd   sh_exiter sd   sh_dropper) f(%15.0fc)

	table year               , c(mean sh_ncontinuer mean sh_nexiter mean sh_ndropper) f(%15.0fc)
	table ssic2 if year==1992, c(mean sh_ncontinuer mean sh_nexiter mean sh_ndropper) f(%15.0fc)
	*table year, c(sd   sh_continuer sd   sh_exiter sd   sh_dropper) f(%15.0fc)
	

}
log close





**23 Turbulence
**  (NEW)

use ps4_aggdecomp_01_c8797, clear

gen  lintensive   = ln(intensive)
egen mlintensive  = mean(lintensive), by(ssic5 year)
egen sdlintensive = sd(lintensive), by(ssic5 year)

collapse (mean) mlintensive sdlintensive, by(ssic5 year)
sort ssic5 year
save ps4_turbulence_c8797, replace


capture log close
log using ps4_basic_part23, text replace

use  ps4_adddroprate, clear

levelsof year, local(ylist)
local ynum=0
foreach l in `ylist' {
	local y`ynum' = `l'
	local ynum    = `ynum'+1
	local ylast   = `l'
}

drop if year==`y0'

gen addrate  = nadd  / ((n+nlag)/2)
gen droprate = ndrop / ((n+nlag)/2)
gen ssic2    = substr(ssic5,1,2)

sort ssic5 year
merge ssic5 year using ps4_turbulence_c8797, keep(mlintensive sdlintensive)
tab _merge
drop _merge

sort ssic5
merge ssic5 using ps4_7in5_c8797, keep(n7)
tab _merge
drop _merge

sort ssic5 year
merge ssic5 year using ps4_prodattributes_c8797
tab _merge
drop _merge

local ylast=1997
egen minrate=rowmin(addrate droprate)
gen la  = ln(addrate/(1-addrate))
gen ld  = ln(droprate/(1-droprate))
gen lm  = ln(minrate/(1-minrate))
gen ln7 = ln(n7)
gen lsd = ln(sdlintensive) 

foreach l in addrate droprate {
	reg   `l' sd           if year==`ylast'
	reg   `l' sd lsunk ln7 if year==`ylast'
	tobit `l' sd           if year==`ylast', ll(0) ul(1)
	tobit `l' sd lsunk ln7 if year==`ylast', ll(0) ul(1)
}

log close








**25 Co-production
**
** a Create co-prod matrix
**
**    a1. Read in firm-product dataset. 
**    a2. Drop SP firms 
**    a3. within-firm cross with same (as in mna)
**
** b Summarize co-prod at the two-digit level.
**
**    Number in each cell is the number of firms with that pattern even if they have more than one
**    combination of products that fits it. 
**
**
** c Summarize co-adding at the two digit level
**
**    i is the year t production
**    j is the year t+5 production
**

capture log close
log using ps4_basic_part25, text replace

use fp5_02_c8797, clear

gen i=pt_pv~=0 & pt_pv~=1
egen nprod = sum(i), by(firmid year)
sum nprod
drop if nprod==1
keep firmid ssic5 year tvs
rename ssic5 ssic5i
sort ssic5i
egen gi=group(ssic5i)
sort firmid year
save junk_coprodi_c8797, replace

rename ssic5i ssic5j
rename gi gj
sort firmid year
save junk_coprodj_c8797, replace
joinby firmid year using junk_coprodi_c8797
sort firmid year ssic5i ssic5j
order firmid year ssic5i ssic5j

**drop dups & opposites
drop if ssic5i==ssic5j
drop if gi<gj

save ps4_coprod5_c8797, replace

**b
use ps4_coprod5_c8797, clear

gen ssic2i = substr(ssic5i,1,2)
gen ssic2j = substr(ssic5j,1,2)
destring ssic2i, force g(s2i)
destring ssic2j, force g(s2j)
gen ncombs=1
collapse (sum) ncombs, by(firmid year ssic2i ssic2j)
sum ncombs


table ssic2i ssic2j if ssic2i~="21" & ssic2j~="21", c(sum ncombs)

save d_25, replace


log close




**26  Difference between r8797 and c8797
**    Compare number of ssic5 and total value of ssic5 in r8797 v c8797 samples

capture log close
log using ps4_basic_part26, text replace

use /rdcprojects/br00544/data/pswitch4/r8797/fp5_02_r8797, clear
codebook ssic5
sort ssic5
save /rdcprojects/br00544/data/pswitch4/r8797/fp5_02_r8797, replace

use /rdcprojects/br00544/data/pswitch4/curpc5_87, clear 
sort curpc5 
foreach yr in 92 97 { 
	merge curpc5 using /rdcprojects/br00544/data/pswitch4/curpc5_`yr' 
	tab _merge  
	keep if _merge==3
	drop _merge 
	sort curpc5 
	gen year=`yr' 
	tab year 
	drop year 
}	 
codebook curpc5
rename curpc5 ssic5
sort ssic5
merge ssic5 using /rdcprojects/br00544/data/pswitch4/r8797/fp5_02_r8797
keep if _merge==3
sort firmid ssic5 year
save contemp, replace

use /rdcprojects/br00544/data/pswitch4/r8797/fp5_02_r8797, clear
replace pt_pv=. if pt_pv==0
drop if pt_pv==.
keep firmid ssic5 year pt_pv
replace pt_pv=. if pt_pv==0
rename pt_pv raw_pv
sort firmid ssic5 year
merge firmid ssic5 year using contemp, keep(pt_pv)
tab _merge
replace pt_pv=. if pt_pv==0
rename pt_pv constant_pv

**examine value
gen rawv = raw_pv/1000000
gen v    = constant_pv/1000000
table year, c(sum rawv sum v)
table year, c(count rawv count v)
save d26, replace

**examine number of ssic5
collapse (sum) raw_pv constant_pv, by(ssic5 year)
replace raw_pv=. if raw_pv==0
replace constant_pv=. if constant_pv==0
gen x=1
sort year
by year: table x, c(count raw_pv count constant_pv)

log close






**Section 27: check section 26

foreach yr in 87 92 97 { 
	use /rdcprojects/br00544/jense002/FTDPROJ/Justin/AD/cmf`yr'prod, clear 
	keep curpc pv ppn
	gen curpc5=substr(curpc,1,5)
	gen beg=substr(curpc,1,1)
	keep if beg=="2" | beg=="3"
	sort ppn
	merge ppn using /rdcprojects/br00544/data/pswitch4/ps4_plantchars, keep(firmid)
	save /rdcprojects/br00544/data/pswitch4/d27_`yr', replace
	collapse (sum) pv, by(curpc5)
	sort curpc5
	save /rdcprojects/br00544/data/pswitch4/testraw_`yr', replace
	 
} 

capture log close
log using ps4_basic_part27, text replace

use /rdcprojects/br00544/data/pswitch4/testraw_87, clear
sort curpc
replace pv=pv/1000000
rename pv pv87
merge curpc using /rdcprojects/br00544/data/pswitch4/testraw_92
rename _merge _merge92
replace pv=pv/1000000
rename pv pv92
sort curpc
merge curpc using /rdcprojects/br00544/data/pswitch4/testraw_97
rename _merge _merge97
replace pv=pv/1000000
rename pv pv97
sort curpc
gen end = substr(curpc,5,5)
gen idx = pv87~=. & pv92~=. & pv97~=.
table idx, c(sum pv87 sum pv92 sum pv97) f(%9.0fc)
table idx, c(count pv87 count pv92 count pv97) f(%9.0fc)
table idx if end~="0", c(count pv87 count pv92 count pv97) f(%9.0fc)

log close





**Section 28: M&A Check

capture log close
log using ps4_basic_part28_details, text replace


**a Create plant birth/death indicators as well as firm-plant-year tvs for merging in below
**   
**  Note: This readin file contains pt_pv,tvs,ar,frmnu,censt and has already dropped based on balance codes 
**
**  Note: Birth year is the **first** year the plant id appears in the CMF
**  Note: Death year is the **last**  year the plant id appears
**
use ps4_plantlevel_c8797, clear 
drop if ar==1
collapse (mean) tvs, by(ppn year) 
drop if tvs==0 | tvs==. 
egen born = min(year), by(ppn) 
egen died = max(year), by(ppn) 
gen pbirthyear = born==year 
gen pdeathyear = died==year 
keep if pbirthyear==1 | pdeathyear==1 
sort ppn year 
save ps4_pbirthdeath_c8797, replace 

use ps4_plantlevel_c8797, clear 
drop if ar==1
collapse (mean) tvs, by(firmid ppn year)  
sort firmid ppn year
save ps4_fptvs_c8797, replace 

  
**b Create indicators for plants that are: opened, added, closed, divested
**
**  Start with plantlevel data, find out which firms-plants ever appear together
**  Create junkfillin which contains all possible firm-plant-year matches
**  Add in info about plant births and deaths and firm-plant tvs
**  Create activity vars
**
use ps4_plantlevel_c8797, clear 
collapse (mean) tvs, by(firmid ppn year) 
 
*create full firmid-ppn-year dataset but only cross firmid x ppn for combinations that appear together 
*at least once; save mapping of group variable to firmid ppn for merging in in a second 
egen temp = group(firmid ppn) 
sort temp 
save junk_temp_c8797, replace 
drop firmid ppn tvs
fillin temp year 
 
*merge in group var's firmids and ppns 
*verify that _merge only equals 3
sort temp 
merge temp using junk_temp_c8797, keep(firmid ppn) 
tab _merge 
drop if _merge==2 
drop _merge 
sort firmid ppn year 

*merge in plant birth and death 
*note that this merge has to be done at the plant year level so we know plant birth and death irrespective of the firm with which it is paired
*verify that _merge does not equal 2
sort ppn year 
merge ppn year using ps4_pbirthdeath_c8797, keep(pbirthyear pdeathyear) 
tab _merge 
drop if _merge==2  
drop _merge 
 
*merge in plant tvs 
*note that this merge has to be done at the firmdid plant year level so we only see plant activity with non-counter-factual pairings
*verify that _merge does not equal 2 and that replace command is not binding
*note that this merge will result in tvs=. for firmid--ppn--year combinations before the firm and ppn meet, and after they part
sort firmid ppn year 
merge firmid ppn year using ps4_fptvs_c8797, keep(tvs) 
tab _merge 
drop if _merge==2  
drop _merge 
replace tvs=. if tvs==0
replace tvs=. if _fillin==1

save junkfillin, replace

*generate flags for plants that are acquired or dropped and opened or closed
*basic idea: if firm-plant has tvs this year but not last year then it must have been acquired unless it was born
use junkfillin, clear
sort firmid ppn year  
gen p_wasacquired = tvs~=. & tvs[_n-1]==. & ppn==ppn[_n-1] & firmid==firmid[_n-1] & year==year[_n-1]+5 
gen p_wasopened   = p_wasacquired==1 & pbirthyear==1
replace p_wasacquired=0 if pbirthyear==1 

gen p_wasdivested = tvs==. & tvs[_n-1]~=. & ppn==ppn[_n-1] & firmid==firmid[_n-1] & year==year[_n-1]+5 
gen p_wasclosed   = p_wasdivested==1 & pdeathyear[_n-1]==1
replace p_wasdivested=0 if pdeathyear[_n-1]==1 & p_wasdivested==1 
 
gen p_incumbent   = tvs~=. & tvs[_n-1]~=. & ppn==ppn[_n-1] & firmid==firmid[_n-1] & year==year[_n-1]+5

*verify that when p_incumbent==1, all others ~=1 (verified by JRP 01/03/07)
*verify that cases of all 5 equaling zero are possible (hand check) (verified by JRP 01/03/07)
*verify that none of these categories have missing values (verified by JRP 01/03/07)
codebook p_*
foreach x in p_wasacquired p_wasopened p_wasclosed p_wasdivested {
	tab p_incumbent `x'
}

sort firmid ppn year 
drop temp 
save ps4_plantstatus_c8797, replace

**c collapse plant status indicators to firm-x-year level

*create firmid-ppn-ssic5-year dataset for loop below 
foreach x in 2 5 {
	use ps4_plantlevel_c8797, clear 
	gen ssic2=substr(ssic5,1,2) 
	collapse (mean) tvs pt_pv rpt_pv, by(firmid ssic`x' ppn year) 
	egen temp = group(firmid ppn ssic`x') 
	sort temp 
	save junk_temp`x'_c8797, replace 
	drop firmid ppn ssic`x' tvs
	fillin temp year 

	*merge in group var's firmids and ppns
	*verify that _merge only equals 3 
	sort temp 
	merge temp using junk_temp`x'_c8797, keep(firmid ppn ssic`x') 
	tab _merge 
	drop if _merge==2 
	drop _merge 
	sort firmid ppn ssic`x' year
	
 
	*merge in plant-type dummies from last section
	display ["merge in plant-type dummies from last section"]
	*verify that _merge ~=2
	sort firmid ppn year 
	merge firmid ppn year using ps4_plantstatus_c8797, keep(p_wasacquired p_wasopened p_wasclosed p_wasdivested p_incumbent) 
	tab _merge 
	drop if _merge==2  
	drop _merge 

	*collapse to firm-ssic5-year dataset, but count plants of each type
	collapse (sum) p_incumbent p_wasacquired p_wasopened p_wasdivested p_wasclosed pt_pv rpt_pv, by(firmid ssic`x' year)
	sort firmid ssic`x' year
	save ps4_plantstatus`x'_c8797, replace
}


**d Merge the plant status indicators into the section 7 dataset
foreach x in 5 2 {

	*read in firm-ssic`x'-year wasadded/dropped indicator from section 7 and compare
	*justin -- i need to know the results of this merge and compare
	display ["compare wasadded`x' from section to fwasadded here"]
	use ps4_wasadded_wasdropped_`x'_c8797, clear
	sort firmid ssic`x' year
	merge firmid ssic`x' year using ps4_plantstatus`x'_c8797, keep (p_* pt_pv rpt_pv)
	tab _merge 
	drop if _merge==2
	drop _merge
	
	*create activity vars based on count; use vars from section 7 to be consistent
	sort firmid ssic`x' year
	gen add_incumbent  = wasadded`x'==1 & p_incumbent>0 
	gen add_acquired   = wasadded`x'==1 & p_wasacquired>0 & p_wasacquired~=.
	gen add_opened     = wasadded`x'==1 & p_wasopened>0 & p_wasopened~=.
	gen drop_incumbent = wasdropped`x'==1 & p_incumbent>0
	gen drop_divested  = wasdropped`x'==1 & p_wasdivested>0 & p_wasdivested~=.
	gen drop_closed    = wasdropped`x'==1 & p_wasclosed>0 & p_wasclosed~=.
	
	*fix for missing plant status info
	replace add_incumbent=1 if wasadded`x'==1 & p_incumbent==0 & p_wasacquired==0 & p_wasopened==0
	replace drop_incumbent=1 if wasdropped`x'==1 & p_incumbent==0 & p_wasdivested==0 & p_wasclosed==0
	
	*check the two activity vars
	gen add_test  = add_incumbent + add_acquired + add_opened 
	gen drop_test = drop_incumbent + drop_divested + drop_closed
	tab wasadded`x' add_test if year>1987
	tab wasdropped`x' drop_test if year>1987

	save ps4_mnacount`x'_1_c8797, replace	
}

capture log close
log using ps4_basic_part28.log, replace

*e
*ssic`x' level add/droptype
foreach x in  5 2 { 

	*first report count and value at product level 
	*1 - existing only
	*2 - add/divest only 
	*3 - open/close only
	*4 - combination with M&A
	*5 - combinateion with no M&A

 	use ps4_mnacount`x'_1_c8797, clear
	
	sort firm ssic`x' year

	gen addtype`x'=. 
	replace addtype`x' = 5 if add_acquired==0 & (add_incumbent~=0 | add_opened~=0)
	replace addtype`x' = 4 if add_acquired~=0 & (add_incumbent~=0 | add_opened~=0)
	replace addtype`x' = 1 if add_incumbent~=0 & add_acquired==0 & add_opened==0
	replace addtype`x' = 2 if add_incumbent==0 & add_acquired~=0 & add_opened==0
	replace addtype`x' = 3 if add_incumbent==0 & add_acquired==0 & add_opened~=0

	gen droptype`x'=.
	replace droptype`x' = 5 if drop_divested==0 & (drop_incumbent~=0 | drop_closed~=0)
	replace droptype`x' = 4 if drop_divested~=0 & (drop_incumbent~=0 | drop_closed~=0)
	replace droptype`x' = 1 if drop_incumbent~=0 & drop_divested==0 & drop_closed==0
	replace droptype`x' = 2 if drop_incumbent==0 & drop_divested~=0 & drop_closed==0
	replace droptype`x' = 3 if drop_incumbent==0 & drop_divested==0 & drop_closed~=0
 

	**create indicator for firms that are in the secton 7 sample
	sort firmid year
	merge firmid year using ps4_nadb`x'_c8797, keep(nadb`x' na`x' idx)
	tab _merge
	gen sect7firm = _merge==2 | _merge==3
	rename idx sect7idx
	rename nadb`x' sect7nadb`x'
	rename na`x' sect7na`x'
	drop _merge 

	*create dormancy dummy for dropping below because we have no info about their behavior
 	sort firmid 
 	merge firmid using ps4_dormant_c8797, keep(dormant)
 	tab _merge
 	drop if _merge==2
 	drop _merge
	replace addtype`x'=. if dormant~=0
	replace droptype`x'=. if dormant~=0

	*results
 	sort firmid ssic`x' year
 	replace rpt_pv=0 if rpt_pv<0 | rpt_pv==.
	gen rv=rpt_pv/1000000
	gen rvlag = rpt_pv[_n-1]/1000000
	
	tab addtype`x'    if sect7idx==1 & dormant==0
	table addtype`x'  if sect7idx==1 & dormant==0, c(sum rv) f(%5.2fc)
	tab droptype`x'   if sect7idx==1 & dormant==0
	table droptype`x' if sect7idx==1 & dormant==0, c(sum rvlag) f(%5.2fc)

	save ps4_mnacount`x'_2_c8797, replace

}

*f
*firm level add/droptype
foreach x in 5 2 {

	use ps4_mnacount`x'_2_c8797, clear

	*now report count and value at firm level 
	collapse (sum) pt_pv add_* drop_* wasadded`x' wasdropped`x' p_wasacquired p_wasdivested (mean) dormant sect7firm sect7idx /*birthyear deathyear*/, by(firmid year sect7nadb`x') 
	sum
 	sort firmid year
 	merge firmid year using /rdcprojects/br00544/data/pswitch4/ps4_firmchars, keep(rtvs tvs)
 	tab _merge

 	drop if _merge==2
 	drop _merge 
 	 
 	gen addtype`x'=. 
	replace addtype`x' = 5 if add_acquired==0 & (add_incumbent~=0 | add_opened~=0)
	replace addtype`x' = 4 if add_acquired~=0 & (add_incumbent~=0 | add_opened~=0)
	replace addtype`x' = 1 if add_incumbent~=0 & add_acquired==0 & add_opened==0
	replace addtype`x' = 2 if add_incumbent==0 & add_acquired~=0 & add_opened==0
	replace addtype`x' = 3 if add_incumbent==0 & add_acquired==0 & add_opened~=0
	
	gen droptype`x'=.
	replace droptype`x' = 5 if drop_divested==0 & (drop_incumbent~=0 | drop_closed~=0)
	replace droptype`x' = 4 if drop_divested~=0 & (drop_incumbent~=0 | drop_closed~=0)
	replace droptype`x' = 1 if drop_incumbent~=0 & drop_divested==0 & drop_closed==0
	replace droptype`x' = 2 if drop_incumbent==0 & drop_divested~=0 & drop_closed==0
	replace droptype`x' = 3 if drop_incumbent==0 & drop_divested==0 & drop_closed~=0

	*correct for dormancy, which adds ~3k obs to type2
	gen rv=rtvs/1000000
	tab addtype`x'    if sect7idx==1 & dormant==0
	table addtype`x'  if sect7idx==1 & dormant==0, c(sum rv) f(%9.2fc)
	tab droptype`x'   if sect7idx==1 & dormant==0
	table droptype`x' if sect7idx==1 & dormant==0, c(sum rv) f(%9.2fc)

	/*
	*correct for 2 digit data
	replace addtype`x'=1 if (sect7nadb`x'=="2. add only" | sect7nadb`x'=="4. both") & dormant==0 & sect7idx==1 & addtype`x'==.
	replace droptype`x'=1 if (sect7nadb`x'=="3. drop only" | sect7nadb`x'=="4. both") & dormant==0 & sect7idx==1 & droptype`x'==.

	tab addtype`x'    if sect7idx==1 & dormant==0 & (sect7nadb`x'=="2. add only" | sect7nadb`x'=="4. both")
	table addtype`x'  if sect7idx==1 & dormant==0 & (sect7nadb`x'=="2. add only" | sect7nadb`x'=="4. both"), c(sum rv) f(%9.2fc) 
	tab droptype`x'   if sect7idx==1 & dormant==0 & (sect7nadb`x'=="3. drop only" | sect7nadb`x'=="4. both")
	table droptype`x' if sect7idx==1 & dormant==0 & (sect7nadb`x'=="3. drop only" | sect7nadb`x'=="4. both"), c(sum rv) f(%9.2fc) 
	*/

	*correct for dormancy, which adds ~3k obs to type2
	/*
	tab addtype`x'    if sect7firm==1
	table addtype`x'  if sect7firm==1, c(sum rv) f(%9.2fc)
	tab droptype`x'   if sect7firm==1
	table droptype`x' if sect7firm==1, c(sum rv) f(%9.2fc)
	*/
	sort firmid year
	save ps4_mnacount`x'_3_c8797, replace	
} 



*g
*look at nadb for mna vs non mna firms
foreach x in 5 2 {

	use ps4_mnacount`x'_3_c8797, clear

	sort firmid year
	merge firmid year using ps4_nadb`x'_c8797, keep(nadb`x')
	tab _merge 
	drop _merge
	
	gen active = p_wasacquired>1 | p_wasdivested>1
	
	table nadb`x' if dormant==0 & sect7idx & active==1, c(count wasadded) f(%10.0fc)
	table nadb`x' if dormant==0 & sect7idx & active==0, c(count wasadded) f(%10.0fc)

	save ps4_mnacount`x'_4_c8797, replace	
} 

log close





**29 Firm-level new decomposition

capture log close
log using ps4_basic_part29, text replace

*identify was added and to-be-dropped products
quietly {
	foreach x in 5 4 2 {
		use fp`x'_02_c8797, clear
		
		keep firmid ssic`x' year pt_pv
		
		*add birthdeath info
		sort firmid year
		merge firmid year using ps4_birthdeath_c8797
		tab _merge
		drop _merge
		
		sort firmid ssic`x' year
		gen wasadded`x' = firmid[_n]==firmid[_n-1] & ssic`x'[_n]==ssic`x'[_n-1] & pt_pv[_n-1]==. & pt_pv[_n]~=.
		replace wasadded`x' = 0 if birthyear==1
		gen willdrop`x' = firmid[_n]==firmid[_n+1] & ssic`x'[_n]==ssic`x'[_n+1] & pt_pv[_n+1]==. & pt_pv[_n]~=.
		replace willdrop`x'=0 if deathyear[_n]==1
		
		**compute shares
		gen av = wasadded`x'*pt_pv
		gen dv = willdrop`x'*pt_pv
		egen sumpv = total(pt_pv), by(firmid year)
		egen sumav = total(av), by(firmid year)
		egen sumdv = total(dv), by(firmid year)
		gen as = av/sumpv*100
		gen ds = dv/sumpv*100
		
		noisily display [`x']
		noisily table year if birthyear~=1 & as~=0, c(mean as)
		noisily table year if deathyear~=1 & ds~=0, c(mean ds)
	}
}






**30 Generate shares of codes and value lost when moving from the raw sample to the constant sample and then dropping trailing zeros

** Create Unique List of curpc5 for each year
** Also generates the number of sic5s for each year
** Also generates sum(pv) for each year

log using ps4_basic_part30.log, replace

foreach yr in 72 77 82 87 92 97 {
	use /rdcprojects/br00544/jense002/FTDPROJ/Justin/AD/cmf`yr'prod, clear
	keep ppn curpc pv year
	gen curpc5=substr(curpc,1,5)
	gen curpc1=substr(curpc,1,1)
	destring curpc1, replace
	drop if curpc1<2
	drop if curpc1>3
	egen value=sum(pv)
	display [`yr']
	sum value, det
	codebook value
	drop curpc1
	codebook curpc5
	drop value
	sort curpc5
	save curpc5_val_`yr', replace
	keep curpc5
	duplicates drop curpc5, force
	sort curpc5
	save curpc5_`yr', replace
}

** Create constant list of sic5s for 7297 sample

use curpc5_72
sort curpc5
foreach yr in 77 82 87 92 97 {
	merge using curpc5_`yr'
	tab _merge
	keep if _merge==3
	drop _merge
	save curpc5_7297, replace
}

use curpc5_7297, clear
duplicates drop curpc5, force
drop if curpc5==""
display ["Constant 7297 sample"]
codebook curpc5
sort curpc5
save curpc5_7297, replace

** Create constant list of sic5s for 7282 sample

use curpc5_72
sort curpc5
foreach yr in 77 82 {
	merge using curpc5_`yr'
	tab _merge
	keep if _merge==3
	drop _merge
	save curpc5_7282, replace
}

use curpc5_7282, clear
duplicates drop curpc5, force
drop if curpc5==""
display ["Constant 7282 sample"]
codebook curpc5
sort curpc5
save curpc5_7282, replace


** Create constant list of sic5s for 8797 sample

use curpc5_87
sort curpc5
foreach yr in 92 97 {
	merge using curpc5_`yr'
	tab _merge
	keep if _merge==3
	drop _merge
	save curpc5_8797, replace
}

use curpc5_8797, clear
duplicates drop curpc5, force
drop if curpc5==""
display ["Constant 8797 sample"]
codebook curpc5
sort curpc5
save curpc5_8797, replace

** Create dataset that drops zeros with no additional information from each of the constant samples created above

foreach xxx in 7297 7282 8797 {
	use curpc5_`xxx', clear
	gen last0  = substr(curpc5,5,1)=="0" 
	gen curpc4 = substr(curpc5,1,4) 
	egen n5    = count(last0), by(curpc4) 
	egen n5_0  = total(last0), by(curpc4) 
	gen i      = n5==1 & n5_0==1 
	tab last0 i     
	drop if last0==1 & i==0 
	tab last0 i 
	drop last0-i 
	display [`xxx']
	duplicates drop curpc5, force
	codebook curpc5
	save curpc5_`xxx'_no0, replace
}


**Calculate Values for each year based on "constant" samples

**7282 sample

foreach yr in 72 77 82 87 92 97 {
	use curpc5_val_`yr', clear
	sort curpc5
	merge curpc5 using curpc5_7282
	keep if _merge==3
	egen value=sum(pv)
	display [`yr']
	codebook value
	codebook curpc5
	save curpc5_`yr'_c7282, replace
}

foreach yr in 72 77 82 87 92 97 {
	use curpc5_val_`yr', clear
	sort curpc5
	merge curpc5 using curpc5_7282_no0
	keep if _merge==3
	egen value=sum(pv)
	display [`yr']
	codebook value
	codebook curpc5
	save curpc5_`yr'_c7282_no0, replace
}

**8797 sample

foreach yr in 72 77 82 87 92 97 {
	use curpc5_val_`yr', clear
	sort curpc5
	merge curpc5 using curpc5_8797
	keep if _merge==3
	egen value=sum(pv)
	display [`yr']
	codebook value
	codebook curpc5
	save curpc5_`yr'_c8797, replace
}

foreach yr in 72 77 82 87 92 97 {
	use curpc5_val_`yr', clear
	sort curpc5
	merge curpc5 using curpc5_8797_no0
	keep if _merge==3
	egen value=sum(pv)
	display [`yr']
	codebook value
	codebook curpc5
	save curpc5_`yr'_c8797_no0, replace
}

**7297 sample

foreach yr in 72 77 82 87 92 97 {
	use curpc5_val_`yr', clear
	sort curpc5
	merge curpc5 using curpc5_7297
	keep if _merge==3
	egen value=sum(pv)
	display [`yr']
	codebook value
	codebook curpc5
	save curpc5_`yr'_c7297, replace
}

foreach yr in 72 77 82 87 92 97 {
	use curpc5_val_`yr', clear
	sort curpc5
	merge curpc5 using curpc5_7297_no0
	keep if _merge==3
	egen value=sum(pv)
	display [`yr']
	codebook value
	codebook curpc5
	save curpc5_`yr'_c7297_no0, replace
}


log close




