





aaa
clear

global source C:\Users\Public\Documents\data\DADS_Panel\
global root C:\Users\Public\Documents\LA
cd "$root\output"
global graphs $root\graphs031220
global tables $root\tables051019



						***********************************
						*** DATA CONSTRUCTION : FRANCE ****
						***********************************
						
{						
						
						
						
						

*** Create long panel + very basic checks
{

global varliste an annai depr dept comr comt depnai age dp debremu finremu sir ///
			    sx apen apet apen2 apet2 cs2 nbsa_ent nbsa_et nic4 pcs4 nninouv nnifict ///
				nnihc snr avr sbr entsir msbr_ent pan25 ///
				dip_tot
				
use $varliste using "$source\PanelDADSEDP2015" if an >= 2002 & an != ., clear


* Renaming, basic checks and cleaning

drop nnifict

* Individuals not born in October of the right year
egen Nnihc = nvals(nnihc), by(nninouv)
drop if Nnihc != 1 // drop 3k
drop if nnihc == "1" //  drop 6
drop nnihc Nnihc

* Put municipalities in 5-digit format
foreach s in r t {
replace com`s' = dep`s' + com`s'
drop dep`s'
drop if strlen(com`s') != 5 // drop 300k, 44k, 32k, 0
drop if com`s' == "00000"
}

* Clean age / year of birth
egen MissingAge = max( age == . ), by(nninouv)
drop if MissingAge == 1
drop MissingAge 


* Filter on age
	* First make sure that age is monotonically increasing
gen check = age - an
egen Nc = nvals(check) , by(nninouv)
sort nninouv an
bys nninouv: gen problem = ( age == age[_n-1]+2 & an == an[_n-1]+1 )
replace age = age - 1 if problem == 1
egen age2 = min(age), by(nninouv an) // 713,358 changes
count if age2 != age //  254,873
drop age
rename age age
drop problem check Nc

gen check = age - an
egen Nc = nvals(check) , by(nninouv)
drop if Nc != 1
drop check Nc // drop 240k

* Filter age
keep if age >= 25 & age <= 64 // drop 3.3m

* Gender
egen Ns = nvals(sx), by(nninouv)
drop if Ns == 2 // drop 76
drop Ns
drop if sx == ""

*count// 16,148,943

rename an year
rename depnai birth_dep

rename debremu start
rename finremu end

rename sx sex

rename sir siren

rename nbsa_ent firm_size
rename nbsa_et plant_size

rename nic4 nic

rename pcs4 cs4

rename nninouv id
rename entsir date_join_firm

rename sbr gwc // gross wage in constant euros
rename snr nwc // net wage in constant euros
rename avr bc // benefits in constant euros
rename msbr_ent payrolln // total wages of firm

* Check that pan25 is constant by individual
egen DistinctPan25 = nvals(pan25), by(id)
drop if DistinctPan25 == 2 // drop 47k
drop DistinctPan25

* Sort
sort id year

* Clean start and end dates
gen days = end - start + 1
gen check = days - dp
su check, d

*count //  12,908,265
*count if days != . //  12,895,826
*count if dp !=  . // 12,908,239
*count if check > 0 // 681,148

egen MissingSpell = max( (days == . ) | ( dp == . ) ), by(id)
drop if MissingSpell == 1
drop MissingSpell // drop 100k

drop check 
	
replace year = year - 1 if end < 0
replace start = 360 + start if end < 0
replace end = 360 + end if end < 0

replace start = max(start,1)
replace days = end-start + 1

* Save
save "dads_0", replace

}
*

*** More cleaning 
{

use "dads_0", clear

order id year start end days
sort id year start end

* Clean net earnings and gross earnings (rough 25% tax rate)
replace gwc = nwc * 1.25 if nwc > 0  & gwc == 0 // 42k changes
replace nwc = gwc / 1.25 if nwc == 0 & gwc >  0 // 430k changes
drop if gwc == 0 & nwc == 0 // drop 1m

duplicates drop id year siren nic start end, force // drop 65
duplicates drop id year siren     start end, force // drop 1 more

save "dads_01", replace

}


*** Select only highest spell per quarter
{

use "dads_01", clear
sort id year start end

gen q1 =  start <= 90
gen q2 = 2 * ( ( start > 90 & start <= 180 ) | ( start <= 90 & end > 90 ) )
gen q3 = 3 * ( ( start > 180 & start <= 270 ) | ( start <= 180 & end > 180 ) )
gen q4 = 4 * ( end > 270 )

expand 4

sort id year siren nic start end
bys id year siren nic start end: gen Q = _n

order id siren nic year start end Q q*

* Keep only relevant quarter-spells
keep if inlist(Q,q1,q2,q3,q4)

* Define daily and quarterly wage
foreach var in gwc nwc {
gen d`var' = `var' / days
gen q`var' = 90 * d`var'
}

* Define income from each spell in a given quarter
	* Define start and end dates by quarter
gen start1 = max( start , 1   )		if Q == 1
gen start2 = max( start , 91  )		if Q == 2
gen start3 = max( start , 181 )		if Q == 3
gen start4 = max( start , 271 )		if Q == 4

gen end1 = min( end , 90  )		if Q == 1
gen end2 = min( end , 180 )		if Q == 2
gen end3 = min( end , 270 )		if Q == 3
gen end4 = min( end , 360 )		if Q == 4

	* Define number of days in each quarter of the spell
forvalues i=1(1)4 {
gen days`i' = end`i' - start`i' + 1
}

gen Qw = 0
forvalues i=1(1)4 {
replace Qw = dgwc * days`i' if Q == `i'
}

drop start1 start1 start2 start3 start4 end1 end2 end3 end4

order id year Q Qw
sort id year Q Qw
bys id year Q (Qw) : gen highest = (_n==_N)
drop if highest == 0 // drop 3.4m
drop highest				
	
rename Q q

order id year q qnwc qgwc

save "dads_1", replace

count 		// 34m spell-quarter
distinct id // 1 352 560 individuals

}
*


*** Aggregate earnings and employment at annual level
{


* Sum earnings and days worked
use "dads_1", clear

forvalues i=1(1)4{
replace days`i' = 0 if days`i' == .
}

gen Days = days1 + days2 + days3 + days4

collapse (sum) qnwc qgwc Days, by(id year)

su Days, d
drop if Days < 1 // drop 3k

save "dads_1_earnings", replace


* Take all other variables from highest paying spell at annual level

use "dads_01", clear
sort id year gwc
bys id year (gwc) : gen highest = (_n==_N)
drop if highest == 0 // drop 3.6m
drop highest				

save "dads_1_demographics", replace
drop gwc nwc days start end

merge 1:1 id year using "dads_1_earnings"
drop if _m == 1
drop _m
rename qgwc gwc
rename qnwc nwc 

egen minyear = min(year), by(id)
egen maxyear = max(year), by(id)

save "dads_2", replace


}



*** Fill in unemployment periods
{

** Prepare filling dataset
clear
set obs 14
gen year = 2001 + _n
sort year
save "temp/ally", replace


** Create full list of id year
use id minyear maxyear using "dads_2", clear

sort id
bys id: keep if _n == 1

cross using "temp/ally" // Takes 20sec to run
count // 18m

save "temp/allidy", replace

use "temp/allidy", clear
merge 1:1 id year using "dads_2"

gen employed = _m == 3
drop _m

** Some additional variables
gen TotalYears = maxyear - minyear + 1

save "dads_3", replace


}


*** Variables during non-employment periods
{

use "dads_3", clear
drop dp date_join bc pan25

replace siren = "Non employed" if employed == 0
replace nic   = "Non employed" if employed == 0
replace firm_size = . if employed == 0
replace plant_size = . if employed == 0
replace payrolln = . if employed == 0

replace gwc = 0 if employed == 0
replace nwc = 0 if employed == 0
replace Days = 0 if employed == 0

sort id year

foreach var in birth_dep annai sex apen apet apen2 apet2 cs2 cs4 dip_tot comr comt {
bys id: replace `var' = `var'[_n-1] if employed == 0 & _n > 1
}

keep if age >= 25 & age <= 64
drop if year < minyear

save "dads_4", replace
 
}



*** Make occupations/industry/siren consistent
{

use id year siren apet apet2 cs4 using "dads_4" if inlist(year,2007,2008,2009)==1, clear

fegen gsiren = group(siren)

foreach year in 2007 2008 2009 {
egen gsiren`year' = max( gsiren * ( year == `year' ) ), by(id)
}

keep if gsiren2007 == gsiren2008 & gsiren2008 == gsiren2009
drop if year == 2008

replace apet = apet2 if year == 2009
drop apet2

drop cs4
drop gsiren*
drop siren

reshape wide apet , i(id) j(year)

gen n = 1
fcollapse (sum) n , by(apet2007 apet2009)

egen SumByApet2009 = sum(n), by(apet2009)
gen FracApet2007ByApet2009 = n / SumByApet2009

egen MaxFracApet2007ByApet2009= max(FracApet2007ByApet2009), by(apet2009)
su MaxFracApet2007ByApet2009, d // p50 = 0.87

keep if apet2009 != ""
sort apet2009 FracApet2007ByApet2009
bys apet2009: keep if _n == _N

keep apet2007 apet2009

save "CrosswalkApet2008", replace


}


*** More cleaning
{

use "dads_4", clear
gen pcs4 = lower(cs4)
drop cs4
rename pcs4 cs4

rename apet2 apet2009
merge m:1 apet2009 using "CrosswalkApet2008"

sort id year
replace apet = apet2007 if year >= 2008 & _m == 3
drop _m apet2* apen apen2

gen FracEmp = Days / 360
drop Days

foreach var in nwc gwc {
gen d`var' = `var' / FracEmp
}


foreach var in apet {
bys id: replace `var' = `var'[_n-1] if employed == 0 & _n > 1
}

drop siren

save "dads_5", replace

}


*** City ranks, migrants and factor variables for regressions
{

* City ranks
use "dads_5", clear
keep if employed == 1
fcollapse (mean) nwc, by(comr)

sort nwc
gen zC = _n / _N

keep comr zC
save "CityRanks", replace

* Merge in city ranks and migrants only (only one migration)
use "dads_5", clear
merge m:1 comr using "CityRanks", nogen keep(3)

sort id year
bys id: gen mig = comr[_n] != comr[_n-1] & _n > 1

gen YearsSinceMig = mig
bys id: replace YearsSinceMig = YearsSinceMig[_n-1] + 1 if mig == 0
bys id: gen CumMig = sum(mig)
egen TimeInComr = max(YearsSinceMig), by(id CumMig)

order id year comr mig YearsSince CumMig TimeIn

* Define as a long migration only migrations where individuals stay
* at least 3 years in destination
gen migLong = mig
replace migLong = 0 if TimeIn < 3

bys id: gen CumMigLong = sum(migLong)



egen SumMig = sum(mig), by(id)
tab SumMig

egen SumMigLong = sum(migLong), by(id)
tab SumMigLong


*** CHOOSE WHICH TYPE OF MIGRATION TO USE
global MigType // Long // Long or empty

* keep if SumMig >= 1
keep if SumMig$MigType >= 1

* Count as migration only if saty two consecutive years in the destination

* identify year of first migration
sort id year
gen MigFirst = CumMig$MigType == 1 & mig$MigType == 1

egen YearMig = max( year * ( MigFirst == 1 ) ), by(id)
replace YearMig = YearMig - 1

drop CumMig*

gen h = year - YearMig

sort id year

rename dip_tot dip
gen agebin = floor(age / 5)

save "dads_51", replace


* Time-0 variables
use "dads_51", clear
keep if h == 0

	* Drop those with missing info at time 0
keep if cs4 != ""
keep if cs2 != ""
keep if apet != ""
keep if dip != ""

rename comt comtt
rename comr comt
merge m:1 comt using "temp/zer", nogen keep(3)

rename comt comr
rename comtt comt
rename ze zer

global FactorList comr comt birth_dep annai sex apet cs2 cs4 dip zer

foreach var in $FactorList {
fegen g`var'= group(`var')
}

keep id h g*
drop gwc

save "dads_52", replace


* Merge in time-0 variables in panel
use "dads_51", clear

keep id year h agebin gwc nwc dgwc dnwc FracEmp zC mig$MigType SumMig$MigType

merge m:1 id h using "dads_52", nogen

egen NoZe = max( ( gzer == . ) * ( h == 0 ) ), by(id)
drop if NoZe == 1 // drop 400k
drop NoZe


foreach var in $FactorList {
egen MAXg`var'= max(g`var'), by(id)
drop g`var'
rename MAXg`var' g`var'
}

drop if gcomr == .

foreach var in gwc nwc dgwc dnwc FracEmp zC {
egen `var'0 = max( `var' * ( h == 0 ) ), by(id)
}

foreach var in zC {
egen `var'1 = max( `var' * ( h == 1 ) ), by(id)
}


foreach var in gwc nwc dgwc dnwc FracEmp {
gen l`var' = log(`var')
gen l`var'0 = log(`var'0)
gen Diff_l`var' = l`var' - l`var'0
}
gen Diff_FracEmp = FracEmp - FracEmp0

gen dzC = zC - zC0


fegen gid = group(id)
drop id


save "dads_6", replace


}


}




						*******************
						*** REGRESSIONS ***
						*******************


{



*** Output for paper
{

use "dads_6", clear
keep if h >= -10 & h <= 12
gen hShift = h + 11 // means that 0 is 11

keep if gsex == 2

egen minh = min(h), by(gid)
egen maxh = max(h), by(gid)

gen Post = h > 0

** Sample selection: quite important
keep if minh <= -2
keep if maxh >= 4
* keep if minh >= -4

forvalues h=1(1)23{
foreach t in C {
gen z`t'1_hShift`h' = ( hShift == `h' ) * z`t'1
}
}

* Keep only single migrants
keep if SumMig$MigType >= 1 // & SumMig$MigType <= 2

* Keep only narrow enough window
global Lower = 7
global Upper = 20

** Graph with simple means: does not look very good: mostly pre-trends
preserve

keep if SumMig$MigType == 1

gen QzC1 = 1 if zC1 <= 0.2
replace QzC1 = 5 if zC1 >= 0.8
reg Diff_lnwc i.hShift
predict res, residuals
keep if QzC1 != .
fcollapse (mean) res, by(h QzC1)
line res h if QzC == 1 || line res h if QzC == 5
restore




keep if SumMig$MigType == 1
distinct gid

gen Cons = 1

* Regressors

global Cluster gzer#gdip

global VeryLowControls gzer#Post gzer#Post#c.h

global LowControls gcomr#Post gcomr#Post#c.h

global HighControls $LowControls ///
					agebin#Post gcs4#Post ///
					agebin#Post#c.h gcs4#Post#c.h
global RankType C

global RHSnonparam i.hShift z${RankType}1 z${RankType}1_hShift$Lower-z${RankType}1_hShift10 ///
			       z${RankType}1_hShift12-z${RankType}1_hShift$Upper

global RHSparam    i.hShift c.z${RankType}1#Post c.z${RankType}1#c.h#Post

global Sample      hShift >= $Lower & hShift <= $Upper


** Compute worker level pre-trend
reghdfe Diff_lnwc i.hShift if h <= 0 & $Sample , ///
	    a(WorkerLevelPre = gid WorkerSlopePre = gid#c.h)

sort gid year
foreach var in WorkerLevelPre WorkerSlopePre {
bys gid: replace `var' = `var'[_n-1] if _n > 1		
}
gen WorkerGrowth = WorkerSlopePre * h

gen Diff_lnwc_detrend = Diff_lnwc - WorkerLevelPre - WorkerGrowth



** Non-parametric results

* Basic controls
reghdfe Diff_lnwc $RHSnonparam if $Sample , ///
	    a($LowControls) ///
		vce(cluster $Cluster)
estimates store Reg01
estimates save "export031820/Reg01", replace

ShowCoef z${RankType}1 hShift 11 $Lower 10 12 $Upper Reg01
	
	
* Full controls
reghdfe Diff_lnwc $RHSnonparam if $Sample , ///
	    a($HighControls) ///
		vce(cluster $Cluster)
estimates store Reg02
estimates save "export031820/Reg02", replace

ShowCoef z${RankType}1 hShift 11 $Lower 10 12 $Upper Reg02

* With worker pre-trends: basic
reghdfe Diff_lnwc_detrend $RHSnonparam if $Sample , ///
	    a($LowControls) ///
		vce(cluster $Cluster)
estimates store Reg03
estimates save "export031820/Reg03", replace

ShowCoef z${RankType}1 hShift 11 $Lower 10 12 $Upper Reg03


* With worker pre-trends: full controls
reghdfe Diff_lnwc_detrend $RHSnonparam if $Sample , ///
	    a($HighControls) ///
		vce(cluster $Cluster)
estimates store Reg04
estimates save "export031820/Reg04", replace

ShowCoef ShowCoef z${RankType}1 hShift 11 $Lower 10 12 $Upper Reg04


** Single slope results

* Basic controls
reghdfe Diff_lnwc $RHSparam if $Sample , ///
	    a($LowControls) ///
		vce(cluster $Cluster)
estimates store Reg11
estimates save "export031820/Reg11", replace

	
* Full controls
reghdfe Diff_lnwc $RHSparam if $Sample , ///
	    a($HighControls) ///
		vce(cluster $Cluster)
estimates store Reg12
estimates save "export031820/Reg12", replace

* With worker pre-trends: basic
reghdfe Diff_lnwc_detrend $RHSparam if $Sample , ///
	    a($LowControls) ///
		vce(cluster $Cluster)
estimates store Reg13
estimates save "export031820/Reg13", replace


* With worker pre-trends: full controls
reghdfe Diff_lnwc_detrend $RHSparam if $Sample , ///
	    a($HighControls) ///
		vce(cluster $Cluster)
estimates store Reg14
estimates save "export031820/Reg14", replace


	
}


}

