# delimit ;
set more 1;

if "`3'"=="ind" {log using "d:\wage phillips\stata\log\prep-resid`1'-`2'-`4'-ind", replace};
if "`3'"!="ind" {log using "d:\wage phillips\stata\log\prep-resid`1'-`2'-`4'", replace};

/* Generate standardized wage, ue and THS residuals by state, msa, or non-msa
   by year for NAIRU study.
   
   Residuals are:
   
   1) Hourly wage for all workers
   2) Weekly wage for full-time
   3) Hourly wage for hourly workers
   
   PARAMETERS:
   
   1 - Year
   2 - Grouping variable (state, msa70, msa85, msa70a)
   3 - Use industry dummies in regressions (YES: ind, NO: noind)
   4 - All, 1 or 2: corresponds to keeping only the odd or even survey 
       months (or both) to create independent samples.

   NOTE: there is extraneous code & comments that are relevant to other work

*/;

* Open source;
clear;
set mem 200m;
set matsize 800;
use "e:\morg\annual\morg`1'",clear;
keep if age>=16;

* Code MSAs and also non-MSA areas by state if we are not grouping by state;
if "`2'"!="state" {;
    if `1'<=85 {;
       ** Just use SMSA rankings here;
       gen msa70 = smsarank;
       ** Non-msas by state;
       replace msa70 = state*100 if smsarank==0;
       };

    if `1'>85 & `1'<=94 {do "d:\wage phillips\stata\do\geocode86.do" `1'};
       ** Create compatible MSAs to old rankings, plus some that aren't included **;
       ** but can be tracked in future **;

    if `1'>94 {do "d:\wage phillips\stata\do\geocode95.do" `1'};
       ** Create compatible MSAs to old rankings, plus some that aren't included **;
       ** but can be tracked in future **;

    ** finally, run program to create msa70a;
    do "d:\wage phillips\stata\do\msa22.do";
    };

* Drop if no value for grouping variable;
drop if `2'==.;

* Drop half of sample if requested;
if "`4'"=="1" {;
  tab intmonth;
  keep if (intmonth==1)|(intmonth==3)|(intmonth==5)|
          (intmonth==7)|(intmonth==9)|(intmonth==11);
};

if "`4'"=="2" {;
  tab intmonth;
  keep if (intmonth==2)|(intmonth==4)|(intmonth==6)|
          (intmonth==8)|(intmonth==10)|(intmonth==12);
};

* If we are using msas, only keep in-msa obs (not balance of state) so that;
* purging regressions will be reasonably meaningful ; 
if "`2'"!="state" {drop if `2'>100 | `2'==.};

* Drop nilf and flag unemployed;
if `1'<=88 {;
   drop if esr>3;
   gen byte ue=esr==3;
   };
if `1'>=89 & `1'<94 {;
   drop if lfsr89>4;
   gen byte ue = lfsr89==3 | lfsr89==4;
   };
if `1'>=94 {;
   drop if lfsr94>4;
   gen byte ue = lfsr94==3 | lfsr94==4;
   };

* Consistent education categories for regression;
if `1'<92 {;
  gen byte educ=gradeat - (gradecp==2);
  replace educ=0 if educ<0;
  gen byte ed1   = (educ==0);
  gen byte ed2   = (educ>=1 & educ<=4);
  gen byte ed3   = (educ>=5 & educ<=8);
  gen byte ed4   = (educ==9);
  gen byte ed5   = (educ==10);
  gen byte ed6   = (educ==11);
  gen byte edhsg = (educ==12 & gradecp==1);
  gen byte edsmc = (educ>=13 & educ<=15) | (educ==12 & gradecp==2);
  gen byte edclg = (educ==16 | educ==17);
  gen byte edgtc = (educ>17);

  };
   
if `1'>=92 {;
  gen byte ed1   = (grade92==31);
  gen byte ed2   = (grade92==32);
  gen byte ed3   = (grade92==33 | grade92==34);
  gen byte ed4   = (grade92==35);
  gen byte ed5   = (grade92==36);
  gen byte ed6   = (grade92==37);
  gen byte edhsg = (grade92==38 & grade92==39);
  gen byte edsmc = (grade92>=40 & grade92<=42);
  gen byte edclg = (grade92==43);
  gen byte edgtc = (grade92>=44 & grade92<=46);
   };

* Educational categories for distribution of labor force;
if `1'<92 {;
   gen higrade = gradeat - (gradecp==2);
   gen byte hsd=higrade<12;
   gen byte hsg=higrade==12 & gradecp==1;
   gen byte smc=(higrade>12 & higrade<16) | (higrade==12 & gradecp==2);
   gen byte clg=(higrade==16 | higrade==17);
   gen byte gtc=(higrade>17);
   assert hsd+hsg+smc+clg+gtc==1;
   };
   
if `1'>=92 {;
   gen byte hsd= grade92<38;
   gen byte hsg= grade92==38 | grade92==39;
   gen byte smc= grade92>=40 & grade92<=42;
   gen byte clg= grade92==43;
   gen byte gtc= grade92>43;
   assert hsd+hsg+smc+clg+gtc==1;
   };



** Gender and race vars;
gen byte female= sex==2;
gen byte black = race==2;
gen byte other = race>2;
gen byte bfem=black*female;
gen byte ofem=other*female;

** Gender and race for distribution;
gen byte rs_wm = race==1 & sex==1;
gen byte rs_wf = race==1 & sex==2;
gen byte rs_bm = race==2 & sex==1;
gen byte rs_bf = race==2 & sex==2;
gen byte rs_om = race>2 & sex==1;
gen byte rs_of = race>2 & sex==2;


** Education-gender interactions;
gen byte fed1   = female*ed1  ;
gen byte fed2   = female*ed2  ;
gen byte fed3   = female*ed3  ;
gen byte fed4   = female*ed4  ;
gen byte fed5   = female*ed5  ;
gen byte fed6   = female*ed6  ;
gen byte fedhsg = female*edhsg;
gen byte fedsmc = female*edsmc;
gen byte fedclg = female*edclg;
gen byte fedgtc = female*edgtc;

** Potential experience;
if `1'<92 {;
  gen exp1=max(age-educ-6,0);
  };

if `1'>=92 {;
  * Impute years of schooling to get experience;

  replace race=3 if race>3;
  gen edyrs=.;

  * men, white;
  replace edyrs = .32  if (race==1 & sex==1 & (grade92 ==31|grade92==00));
  replace edyrs = 3.19 if (race==1 & sex==1 & grade92 ==32);
  replace edyrs = 7.24 if (race==1 & sex==1 & (grade92 ==33 | grade92==34));
  replace edyrs = 8.97 if (race==1 & sex==1 & grade92 == 35);
  replace edyrs = 9.92 if (race==1 & sex==1 & grade92 == 36);
  replace edyrs = 10.86 if (race==1 & sex==1 & grade92 ==37);
  replace edyrs = 11.58 if (race==1 & sex==1 & grade92 ==38);
  replace edyrs = 11.99 if (race==1 & sex==1 & grade92 ==39);
  replace edyrs = 13.48 if (race==1 & sex==1 & grade92 ==40);
  replace edyrs = 14.23 if (race==1 & sex==1 & (grade92 ==41 | grade92==42));
  replace edyrs = 16.17 if (race==1 & sex==1 & grade92 ==43);
  replace edyrs = 17.68 if (race==1 & sex==1 & grade92 ==44);
  replace edyrs = 17.71 if (race==1 & sex==1 & grade92 ==45);
  replace edyrs = 17.83 if (race==1 & sex==1 & grade92 ==46);
  
  * female, white;
  replace edyrs = 0.62 if (race==1 & sex==2 & (grade92 ==31|grade92==00));
  replace edyrs = 3.15 if (race==1 & sex==2 & grade92 ==32);
  replace edyrs = 7.23 if (race==1 & sex==2 & (grade92 ==33 | grade92==34));
  replace edyrs = 8.99 if (race==1 & sex==2 & grade92 == 35);
  replace edyrs = 9.95 if (race==1 & sex==2 & grade92 == 36);
  replace edyrs = 10.87 if (race==1 & sex==2 & grade92 ==37);
  replace edyrs = 11.73 if (race==1 & sex==2 & grade92 ==38);
  replace edyrs = 12.00 if (race==1 & sex==2 & grade92 ==39);
  replace edyrs = 13.35 if (race==1 & sex==2 & grade92 ==40);
  replace edyrs = 14.22 if (race==1 & sex==2 & (grade92 ==41 | grade92==42));
  replace edyrs = 16.15 if (race==1 & sex==2 & grade92 ==43);
  replace edyrs = 17.64 if (race==1 & sex==2 & grade92 ==44);
  replace edyrs = 17.00 if (race==1 & sex==2 & grade92 ==45);
  replace edyrs = 17.76 if (race==1 & sex==2 & grade92 ==46);
  
  * men, black;
  replace edyrs = .92  if (race==2 & sex==1 & (grade92 ==31|grade92==00));
  replace edyrs = 3.28 if (race==2 & sex==1 & grade92 ==32);
  replace edyrs = 7.04 if (race==2 & sex==1 & (grade92 ==33 | grade92==34));
  replace edyrs = 9.02 if (race==2 & sex==1 & grade92 == 35);
  replace edyrs = 9.91 if (race==2 & sex==1 & grade92 == 36);
  replace edyrs = 10.90 if (race==2 & sex==1 & grade92 ==37);
  replace edyrs = 11.41 if (race==2 & sex==1 & grade92 ==38);
  replace edyrs = 11.98 if (race==2 & sex==1 & grade92 ==39);
  replace edyrs = 13.57 if (race==2 & sex==1 & grade92 ==40);
  replace edyrs = 14.33 if (race==2 & sex==1 & (grade92 ==41 | grade92==42));
  replace edyrs = 16.13 if (race==2 & sex==1 & grade92 ==43);
  replace edyrs = 17.51 if (race==2 & sex==1 & grade92 ==44);
  replace edyrs = 17.83 if (race==2 & sex==1 & grade92 ==45);
  replace edyrs = 18.00 if (race==2 & sex==1 & grade92 ==46);
  
  * female, black;
  replace edyrs = 0.00 if (race==2 & sex==2 & (grade92 ==31|grade92==00));
  replace edyrs = 2.90 if (race==2 & sex==2 & grade92 ==32);
  replace edyrs = 7.03 if (race==2 & sex==2 & (grade92 ==33 | grade92==34));
  replace edyrs = 9.05 if (race==2 & sex==2 & grade92 == 35);
  replace edyrs = 9.99 if (race==2 & sex==2 & grade92 == 36);
  replace edyrs = 10.85 if (race==2 & sex==2 & grade92 ==37);
  replace edyrs = 11.64 if (race==2 & sex==2 & grade92 ==38);
  replace edyrs = 12.00 if (race==2 & sex==2 & grade92 ==39);
  replace edyrs = 13.43 if (race==2 & sex==2 & grade92 ==40);
  replace edyrs = 14.33 if (race==2 & sex==2 & (grade92 ==41 | grade92==42));
  replace edyrs = 16.04 if (race==2 & sex==2 & grade92 ==43);
  replace edyrs = 17.69 if (race==2 & sex==2 & grade92 ==44);
  replace edyrs = 17.40 if (race==2 & sex==2 & grade92 ==45);
  replace edyrs = 18.00 if (race==2 & sex==2 & grade92 ==46);
  
  * men, other;
  replace edyrs = .62  if (race>=3 & sex==1 & (grade92 ==31|grade92==00));
  replace edyrs = 3.24 if (race>=3 & sex==1 & grade92 ==32);
  replace edyrs = 7.14 if (race>=3 & sex==1 & (grade92 ==33 | grade92==34));
  replace edyrs = 9.00 if (race>=3 & sex==1 & grade92 == 35);
  replace edyrs = 9.92 if (race>=3 & sex==1 & grade92 == 36);
  replace edyrs = 10.88 if (race>=3 & sex==1 & grade92 ==37);
  replace edyrs = 11.50 if (race>=3 & sex==1 & grade92 ==38);
  replace edyrs = 11.99 if (race>=3 & sex==1 & grade92 ==39);
  replace edyrs = 13.53 if (race>=3 & sex==1 & grade92 ==40);
  replace edyrs = 14.28 if (race>=3 & sex==1 & (grade92 ==41 | grade92==42));
  replace edyrs = 16.15 if (race>=3 & sex==1 & grade92 ==43);
  replace edyrs = 17.60 if (race>=3 & sex==1 & grade92 ==44);
  replace edyrs = 17.77 if (race>=3 & sex==1 & grade92 ==45);
  replace edyrs = 17.92 if (race>=3 & sex==1 & grade92 ==46);
  
  * female, other;
  replace edyrs = 0.31 if (race>=3 & sex==2 & (grade92 ==31|grade92==00));
  replace edyrs = 3.03 if (race>=3 & sex==2 & grade92 ==32);
  replace edyrs = 7.13 if (race>=3 & sex==2 & (grade92 ==33 | grade92==34));
  replace edyrs = 9.02 if (race>=3 & sex==2 & grade92 == 35);
  replace edyrs = 9.97 if (race>=3 & sex==2 & grade92 == 36);
  replace edyrs = 10.86 if (race>=3 & sex==2 & grade92 ==37);
  replace edyrs = 11.69 if (race>=3 & sex==2 & grade92 ==38);
  replace edyrs = 12.00 if (race>=3 & sex==2 & grade92 ==39);
  replace edyrs = 13.47 if (race>=3 & sex==2 & grade92 ==40);
  replace edyrs = 14.28 if (race>=3 & sex==2 & (grade92 ==41 | grade92==42));
  replace edyrs = 16.10 if (race>=3 & sex==2 & grade92 ==43);
  replace edyrs = 17.67 if (race>=3 & sex==2 & grade92 ==44);
  replace edyrs = 17.20 if (race>=3 & sex==2 & grade92 ==45);
  replace edyrs = 17.88 if (race>=3 & sex==2 & grade92 ==46);

  assert edyrs!=.;
  gen exp1=max(age-edyrs-6,0);
  };

* Experience quartic interacted with gender;
gen exp2=exp1^2/100;
gen exp3=exp1^3/1000;
gen exp4=exp1^4/10000;
gen fexp1=female*exp1;
gen fexp2=female*exp2;
gen fexp3=female*exp3;
gen fexp4=female*exp4;

** Age for distribution;
gen byte m1619 = age>=16 & age<=19 & sex==1;
gen byte m2024 = age>=20 & age<=24 & sex==1;
gen byte m2554 = age>=25 & age<=54 & sex==1;
gen byte m5564 = age>=55 & age<=64 & sex==1;
gen byte m65up = age>=65 & sex==1;

gen byte f1619 = age>=16 & age<=19 & sex==2;
gen byte f2024 = age>=20 & age<=24 & sex==2;
gen byte f2554 = age>=25 & age<=54 & sex==2;
gen byte f5564 = age>=55 & age<=64 & sex==2;
gen byte f65up = age>=65 & sex==2;

assert m1619 + m2024 + m2554 + m5564 + m65up + 
       f1619 + f2024 + f2554 + f5564 + f65up==1;

** Marital status;
gen byte marfem = marital>=1 & marital<=3 & sex==2;
gen byte marmale = marital>=1 & marital<=3 & sex==1;

** Union membership and coverage;
if `1'>=83 {;
   gen byte unmem = unionmme==1;
   gen byte uncov = unioncov==1;
   };
if `1'<83 {;
   gen byte unmem = .;
   gen byte uncov = .;
   };

** Self-employed;
   if `1'<=88 {gen byte se = classer==3};
   if `1'>=89 & `1'<=93 {gen byte se=classer2==5};
   if `1'>93  {gen byte se = class94 == 6 | class94 ==7};

   * Should be reported earnings for self-employed, but...
   count if earnke!=. & se==1;
   replace se=0 if earnwke!=.;


   * Generate major industry dummies;
   if `1'<=82 {;
      sort ind70;
      merge ind70 using "d:\wage phillips\stata\data\ind70";
      drop if _merge==2;
      assert _merge==3 if ind70!=.;
      drop _merge;
      };

   if `1'>82 & `1'<=91 {;
      sort ind80;
      merge ind80 using "d:\wage phillips\stata\data\ind80";
      drop if _merge==2;
      assert _merge==3 if ind80!=. & ind80!=991; ** 991 is unemployed ex-military **;
      drop _merge;
      };

   if `1'>=92 {;
      rename ind80 ind90;
      sort ind90;
      merge ind90 using "d:\wage phillips\stata\data\ind90";
      drop if _merge==2;
      assert _merge==3 if ind80!=.;
      drop _merge;
      };

   /* The major industries are as follows:

       1 - Agriculture, forestry and fisheries
       2 - Mining
       3 - Construction
       4 - Durable manufacturing
       5 - Nondurable manufacturing
       6 - Transportation
       7 - Communications
       8 - Utilities and sanitary services
       9 - Wholesale trade
      10 - Retail trade
      11 - FIRE
      12 - Private households
      13 - Business and repair
      14 - Personal services
      15 - Entertainment and recreation
      16 - Hospitals
      17 - Health services except hospitals
      18 - Education
      19 - Social services
      20 - Misc. professional services
      21 - Forestry, fishing, hunting and trapping
      22 - Public administration

      We will aggregate further to:

      inagri - Agriculture
      inmine - Mining
      intran - Transport/communications/utilities
      inmand - Durable manufacturing
      inmann - Non-durable manufacturing
      incons - Construction
      inwtrd - Wholesale
      inrtrd - Retail
      infire - FIRE
      insvcs - Services
      ingovt - Government

   */;

   ** Agregate industries;
   rename mind7090 mind;
   gen byte inagri = mind==1 | mind==21;
   gen byte inmine = mind==2;
   gen byte intran = mind==6 | mind==7 | mind==8;
   gen byte inmand = mind==4;
   gen byte inmann = mind==5;
   gen byte incons = mind==3;
   gen byte inwtrd = mind==9;
   gen byte inrtrd = mind==10;
   gen byte infire = mind==11;
   gen byte insvcs = mind>=12 & mind<=20;
   gen byte ingovt = mind==22;
   assert (inagri+inmine+intran+inmand+inmann+incons+inwtrd+inrtrd+infire+insvcs
          +ingovt==1) | ind7090==.;
   gen byte inmiss= ind7090==.;


* Inflate censored weekly obs and truncate all hourly earnings at JW value;
rename earnwke wkwage;
replace wkwage=999*1.5 if (`1'<89 & wkwage>=999 & wkwage!=.);
replace wkwage=1923*1.5 if (`1'>=89 & wkwage==1923 & wkwage!=.);
gen hrwage=wkwage/uhourse;
if `1'<89  {scalar MAXWG=int((999/35)*100*1.5)/100};
if `1'>=89 {scalar MAXWG=int((1923/35)*100*1.5)/100};
scalar list MAXWG;
replace hrwage=MAXWG if hrwage>MAXWG & hrwage!=.;
replace hrwage=. if ue==1;
replace wkwage=. if ue==1;

* Windsorize bottom 1% of hourly and weekly earnings sample;
* (old code is below);

summ hrwage [aw=earnwt], detail;
gen wind_hr = hrwage<=_result(16) if hrwage!=.;
replace hrwage=_result(16) if wind_hr & hrwage!=.;
summ hrwage if wind_hr;

summ wkwage [aw=earnwt], detail;
gen wind_wk = wkwage<=_result(16) if wkwage!=.;
replace wkwage=_result(16) if wind_wk & wkwage!=.;
summ wkwage if wind_wk;

drop wind_hr wind_wk;

/*
genpct wind_hr hrwage if hrwage!=. [aw=earnwt],p(1);
summ hrwage if !wind_hr & hrwage!=. [aw=earnwt];
disp _result(5);
replace hrwage=_result(5) if wind_hr & hrwage!=.;
summ hrwage if wind_hr;

genpct wind_wk wkwage if wkwage!=. [aw=earnwt],p(1);
summ wkwage if !wind_wk & wkwage!=. [aw=earnwt];
disp _result(5);
replace wkwage=_result(5) if wind_wk & wkwage!=.;
summ wkwage if wind_wk;
drop wind_hr wind_wk;
*/

gen lnhrw = ln(hrwage);
gen lnwkw = ln(wkwage);
gen lnftw = ln(wkwage) if uhourse>=35;

summ hrwage wkwage lnhrw lnwkw;

gen mdhrw=hrwage;
gen mdwkw=wkwage;
gen mdlnhw=lnhrw;
gen mdlnww=lnwkw;
gen mdlnftw=lnftw;


* Weights;
gen byte one=1;
replace earnwt=earnwt/12;
summ one [aw=earnwt];

* Flag temps;
if `1'<83 { gen byte ths= (ind70==737 & paidhre==1)};
if `1'>=83  { gen byte ths= (ind80==731 & paidhre==1)};
replace ths=. if ue==1;

* Flag unemployed who appear to be temps;
if `1'<83 { gen byte tempue= (ind70==737 & ue==1)};
if `1'>=83  { gen byte tempue= (ind80==731 & ue==1)};


* SMSA status;
gen byte smsa=(smsastat==1);
drop smsastat;

* Sample flags;
gen byte hrwk = hrwage!=.;
gen byte hrlywk = hrwage!=. & paidhre==1;
gen byte ftwk = hrwage!=. & uhourse>=35;
assert ue!=.;

* Generate weights for each sample;
egen uewt=sum(earnwt), by(`2');
egen hrwt=sum(earnwt) if hrwk, by(`2');
egen hrlywt=sum(earnwt) if hrlywk, by(`2');
egen ftwt=sum(earnwt) if ftwk, by(`2');

* Generate obs for each sample;
egen ueobs=sum(one), by(`2');
egen hrobs=sum(one) if hrwk, by(`2');
egen hrlyobs=sum(one) if hrlywk, by(`2');
egen ftobs=sum(one) if ftwk, by(`2');

* Summarize data;
summ [aw=earnwt];

****************************************************************************;
****************************************************************************;
** PART I: ESTIMATE VARIANCES & COVARIANCES AFTER SUBTRACTING GROUP MEANS  *;
****************************************************************************;
****************************************************************************;

********************;
* Estimate means   *;
********************;

egen m_ue=sum(ue*earnwt/uewt), by(`2');
egen m_thshr=sum(ths*earnwt/hrwt) if hrwk, by(`2');
egen m_thshrl=sum(ths*earnwt/hrlywt) if hrlywk, by(`2');
egen m_thsft=sum(ths*earnwt/hrlywt) if ftwk, by(`2');
egen m_wghr=sum(lnhrw*earnwt/hrwt) if hrwk, by(`2');
egen m_wgft=sum(lnwkw*earnwt/ftwt) if ftwk, by(`2');
egen m_wghrly=sum(lnhrw*earnwt/hrlywt) if hrlywk, by(`2');


****************************************************************************;
****************************************************************************;
** PART II: ESTIMATE RESIDUAL VARIANCES & COVARIANCES AFTER REMOVING      **;
**          ESTIMATED IMPACT OF OBSERVABLES                               **;
****************************************************************************;
****************************************************************************;

** first append mean obs **;
append using "d:\wage phillips\stata\data\stmns7999";

**********************************************************************;
** Basic hourly earnings regression                                  *;
**********************************************************************;

if "`3'"!="ind" {;
   areg lnhrw
        ed1-ed6  edsmc edclg edgtc
        fed1-fed6 fedsmc fedclg fedgtc
        exp1-exp4
        fexp1-fexp4
        female black other bfem ofem
        [aw=earnwt] if hrwk, a(`2');
};

if "`3'"=="ind" {;
   areg lnhrw
        ed1-ed6  edsmc edclg edgtc
        fed1-fed6 fedsmc fedclg fedgtc
        exp1-exp4
        fexp1-fexp4
        female black other bfem ofem
        inagri inmine intran inmand inmann incons inwtrd inrtrd infire ingovt
        [aw=earnwt] if hrwk, a(`2');
};

* Store residual degrees of freedom;
capture scalar drop df_hr;
scalar df_hr=_result(5);

* Generate residuals;
* Individual;
predict ehr if hrwk;
replace ehr=lnhrw-ehr;

* Generate predicted;
predict temp if sample==1;
summ temp if sample==1;
capture scalar drop mn;
scalar mn=_result(3);
gen phr=mn+ehr;
drop temp;

* State mean;
egen stehr=sum(ehr*earnwt) if hrwk, by(`2');
egen mnwt=sum(earnwt) if hrwk, by(`2');
replace stehr=stehr/mnwt;
tab `2' [aw=earnwt],summ(stehr);
drop mnwt;

**********************************************************************;
** Weekly FT earnings regression                                     *;
**********************************************************************;

if "`3'"!="ind" {;
   areg lnwkw
        ed1-ed6  edsmc edclg edgtc
        fed1-fed6 fedsmc fedclg fedgtc
        exp1-exp4
        fexp1-fexp4
        female black other bfem ofem
        [aw=earnwt] if ftwk, a(`2');
};

if "`3'"=="ind" {;
   areg lnwkw
        ed1-ed6  edsmc edclg edgtc
        fed1-fed6 fedsmc fedclg fedgtc
        exp1-exp4
        fexp1-fexp4
        female black other bfem ofem
        inagri inmine intran inmand inmann incons inwtrd inrtrd infire ingovt
        [aw=earnwt] if ftwk, a(`2');
};

* Store residual degrees of freedom;
capture scalar drop df_ft;
scalar df_ft=_result(5);

* Generate residuals;
* Individual;
predict eft if ftwk;
replace eft=lnwkw-eft;

* Generate predicted;
predict temp if sample==1;
summ temp if sample==1;
capture scalar drop mn;
scalar mn=_result(3);
gen pft=mn+eft;
drop temp;

* State mean;
egen steft=sum(eft*earnwt) if ftwk, by(`2');
egen mnwt=sum(earnwt) if ftwk, by(`2');
replace steft=steft/mnwt;
tab `2' [aw=earnwt],summ(steft);
drop mnwt;

**********************************************************************;
** Hourly workers earners regression                                 *;
**********************************************************************;

if "`3'"!="ind" {;
   areg lnhrw
        ed1-ed6  edsmc edclg edgtc
        fed1-fed6 fedsmc fedclg fedgtc
        exp1-exp4
        fexp1-fexp4
        female black other bfem ofem
        [aw=earnwt] if hrlywk, a(`2');
};

if "`3'"=="ind" {;
   areg lnhrw
        ed1-ed6  edsmc edclg edgtc
        fed1-fed6 fedsmc fedclg fedgtc
        exp1-exp4
        fexp1-fexp4
        female black other bfem ofem
        inagri inmine intran inmand inmann incons inwtrd inrtrd infire ingovt
        [aw=earnwt] if hrlywk, a(`2');
};

* Store residual degrees of freedom;
capture scalar drop df_hrly;
scalar df_hrly=_result(5);

* Generate residuals;

* Individual;
predict ehrly if hrlywk;
replace ehrly=lnhrw-ehrly;

* Generate predicted;
predict temp if sample==1;
summ temp if sample==1;
capture scalar drop mn;
scalar mn=_result(3);
gen phrly=mn+ehrly;
drop temp;

* State mean;
egen stehrly=sum(ehrly*earnwt) if hrlywk, by(`2');
egen mnwt=sum(earnwt) if hrlywk, by(`2');
replace stehrly=stehrly/mnwt;
tab `2' [aw=earnwt],summ(stehrly);
drop mnwt;

**********************************************************************;
** Unemployment regression -- basic, no ind controls                **;
**********************************************************************;

   areg ue
        ed1-ed6  edsmc edclg edgtc
        fed1-fed6 fedsmc fedclg fedgtc
        exp1-exp4
        fexp1-fexp4
        female black other bfem ofem
        [aw=earnwt], a(`2');

* Store residual degrees of freedom;
capture scalar drop df_ue;
scalar df_ue=_result(5);

predict eue if ue!=.;
replace eue=ue-eue;

* Generate predicted;
predict temp if sample==1;
summ temp if sample==1;
capture scalar drop mn;
scalar mn=_result(3);
gen pue=mn+eue;
drop temp;


egen steue=sum(eue*earnwt) if ue!=., by(`2');
egen mnwt=sum(earnwt) if ue!=., by(`2');
replace steue=steue/mnwt;
tab `2' [aw=earnwt],summ(steue);
drop mnwt;

**********************************************************************;
** Unemployment regression -- basic, with ind controls              **;
**********************************************************************;

   areg ue
        ed1-ed6  edsmc edclg edgtc
        fed1-fed6 fedsmc fedclg fedgtc
        exp1-exp4
        fexp1-fexp4
        inagri inmine intran inmand inmann incons inwtrd inrtrd infire ingovt inmiss
        female black other bfem ofem
        [aw=earnwt], a(`2');

* Store residual degrees of freedom;
capture scalar drop df_ue;
scalar df_ue=_result(5);

predict euei if ue!=.;
replace euei=ue-euei;

* Generate predicted;
predict temp if sample==1;
summ temp if sample==1;
capture scalar drop mn;
scalar mn=_result(3);
gen puei=mn+euei;
drop temp;


egen steuei=sum(euei*earnwt) if ue!=., by(`2');
egen mnwt=sum(earnwt) if ue!=., by(`2');
replace steuei=steuei/mnwt;
tab `2' [aw=earnwt],summ(steuei);
drop mnwt;

/*
**********************************************************************;
** THS worker regression -- all workers                             **;
**********************************************************************;

if "`3'"!="ind" {;
   areg ths
        ed1-ed6  edsmc edclg edgtc
        fed1-fed6 fedsmc fedclg fedgtc
        exp1-exp4
        fexp1-fexp4
        female black other bfem ofem
        [aw=earnwt] if hrwk, a(`2');
};

if "`3'"=="ind" {;
   areg ths
        ed1-ed6  edsmc edclg edgtc
        fed1-fed6 fedsmc fedclg fedgtc
        exp1-exp4
        fexp1-fexp4
        female black other bfem ofem
        inagri inmine intran inmand inmann incons inwtrd inrtrd infire ingovt
        [aw=earnwt] if hrwk, a(`2');
};

* Generate residuals;
predict ethshr if hrwk;
replace ethshr=ths-ethshr;

egen stethshr=sum(ethshr*earnwt) if hrwk, by(`2');
egen mnwt=sum(earnwt) if hrwk, by(`2');
replace stethshr=stethshr/mnwt;
tab `2' [aw=earnwt],summ(stethshr);
drop mnwt;

**********************************************************************;
** THS worker regression -- hourly workers                          **;
**********************************************************************;

if "`3'"!="ind" {;
   areg ths
        ed1-ed6  edsmc edclg edgtc
        fed1-fed6 fedsmc fedclg fedgtc
        exp1-exp4
        fexp1-fexp4
        female black other bfem ofem
        [aw=earnwt] if hrlywk, a(`2');
};

if "`3'"=="ind" {;
   areg ths
        ed1-ed6  edsmc edclg edgtc
        fed1-fed6 fedsmc fedclg fedgtc
        exp1-exp4
        fexp1-fexp4
        female black other bfem ofem
        inagri inmine intran inmand inmann incons inwtrd inrtrd infire ingovt
        [aw=earnwt] if hrlywk, a(`2');
};

* Generate residuals;
predict ethshrly if hrlywk;
replace ethshrly=ths-ethshrly;

egen stethrly=sum(ethshrly*earnwt) if hrlywk, by(`2');
egen mnwt=sum(earnwt) if hrlywk, by(`2');
replace stethrly=stethrly/mnwt;
tab `2' [aw=earnwt],summ(stethrly);
drop mnwt;

**********************************************************************;
** THS worker regression -- full-time workers                       **;
**********************************************************************;

if "`3'"!="ind" {;
   areg ths
        ed1-ed6  edsmc edclg edgtc
        fed1-fed6 fedsmc fedclg fedgtc
        exp1-exp4
        fexp1-fexp4
        female black other bfem ofem
        [aw=earnwt] if ftwk, a(`2');
};

if "`3'"=="ind" {;
   areg ths
        ed1-ed6  edsmc edclg edgtc
        fed1-fed6 fedsmc fedclg fedgtc
        exp1-exp4
        fexp1-fexp4
        female black other bfem ofem
        inagri inmine intran inmand inmann incons inwtrd inrtrd infire ingovt
        [aw=earnwt] if ftwk, a(`2');
};

* Generate residuals;
predict ethsft if ftwk;
replace ethsft=ths-ethsft;

egen stethsft=sum(ethsft*earnwt) if ftwk, by(`2');
egen mnwt=sum(earnwt) if ftwk, by(`2');
replace stethsft=stethsft/mnwt;
tab `2' [aw=earnwt],summ(stethsft);
drop mnwt;
*/

*********************************************************************;
*********************************************************************;
** Cleanup and close                                                *;
*********************************************************************;
*********************************************************************;

/*; 
    Purge observations that are not pertinent to each sub-sample so that
    collapse works correctly
*/;

drop if sample==1;

replace m_thshr=. if !hrwk;
replace m_thsft=. if !ftwk;
replace m_thshrl=. if !hrlywk;
replace m_wghr=. if !hrwk;
replace m_wgft=. if !ftwk;
replace m_wghrly=. if !hrlywk;

replace ehr=. if !hrwk;
replace eft=. if !ftwk;
replace ehrly=. if !hrlywk;

replace phr=. if !hrwk;
replace pft=. if !ftwk;
replace phrly=. if !hrlywk;

/*
replace ethshr=. if !hrwk;
replace ethsft=. if !ftwk;
replace ethshrly=. if !hrlywk;
*/
** Save working data set;
* desc;
* save /tmp/workset.dta,replace;
* summ [aw=earnwt];

** Generate means by grouping variable;
keep     `2' m_ue m_thshr m_thshrl m_thsft m_wghr m_wgft m_wghrly 
         uewt hrwt hrlywt ftwt ftobs ueobs hrobs hrlyobs 
         stehr ehr eft ehrly eue euei phr pft phrly pue puei 
         /* ethshr ethshrly ethsft */
         year ue ths tempue hrwage wkwage lnhrw lnwkw lnftw 
         hsd hsg smc clg gtc rs_wm rs_wf rs_bm rs_bf rs_om rs_of
         m1619 m2024 m2554 m5564 m65up f1619 f2024 f2554 f5564 f65up 
         inagri inmine intran inmand inmann incons inwtrd inrtrd
         infire insvcs ingovt inmiss unmem uncov se
         smsa marfem marmale
         mdhrw mdwkw mdlnhw mdlnww mdlnftw
         earnwt;
         
collapse (mean)
         m_ue m_thshr m_thshrl m_thsft m_wghr m_wgft m_wghrly 
         uewt hrwt hrlywt ftwt ftobs ueobs hrobs hrlyobs 
         stehr ehr eft ehrly eue euei phr pft phrly pue puei 
         /* ethshr ethshrly ethsft */
         year ue ths tempue hrwage wkwage lnhrw lnwkw lnftw 
         hsd hsg smc clg gtc rs_wm rs_wf rs_bm rs_bf rs_om rs_of
         m1619 m2024 m2554 m5564 m65up f1619 f2024 f2554 f5564 f65up 
         inagri inmine intran inmand inmann incons inwtrd inrtrd
         infire insvcs ingovt inmiss unmem uncov se
         smsa marfem marmale
         (median) mdhrw mdwkw mdlnhw mdlnww mdlnftw
         [aw=earnwt], by(`2');

** Label data **;
label var m_ue      "Unemp";
label var m_thshr   "THS share of emp";
label var m_thshrl  "THS share of hourly emp";
label var m_thsft   "THS share of FT emp";
label var m_wghr    "Mean wage, all";
label var m_wgft    "Mean ft wkly wage";
label var m_wghrly  "Mean wage, hrly wrkrs";
label var uewt      "Sum wt for ue smple";
label var hrwt      "Sum wt for full wage smple";
label var hrlywt    "Sum wt for hrly smple";
label var ftwt      "Sum wt for full-time smple";
label var ftobs     "Obs for full-time smple";
label var ueobs     "Obs for UE smple";
label var hrobs     "Obs for full wage smple";
label var hrlyobs   "Obs for hrly wrkr smple";
label var ehr       "Mn wg resid, all";
label var eft       "Mn wkly wg resid, ft";
label var ehrly     "Mn wg resid, hrly wk";
label var eue       "Mn ue resid, no ind";
label var euei      "Mn ue resid, control ind";
label var phr       "Mn wg pred, all";
label var pft       "Mn wkly wg pred, ft";
label var phrly     "Mn wg pred, hrly wk";
label var pue       "Mn ue pred, no ind";
label var puei      "Mn ue pred, contol ind";
/*
label var ethshr    "Mn ths resid";
label var ethshrly  "Mn ths resid, hrly smple";
label var ethsft    "Mn ths resid, FT smple";
*/
label var year      "Year";
label var ue        "unemp rate";
label var ths       "THS share of emp";
label var tempue    "unemp temps";
label var hrwage    "mean hourly wage" ;
label var wkwage    "mean weekly wage" ;
label var lnhrw     "mean ln(hourly wage)" ;
label var lnwkw     "mean ln(weekly wage)";
label var lnftw     "mean ln(wage) for FT";
label var hsd       "high school dropout";
label var hsg       "high school grad";
label var smc       "some college";
label var clg       "college grad";
label var gtc       "greater than college";
label var rs_wm     "white male";
label var rs_wf     "white female";
label var rs_bm     "black male";
label var rs_bf     "black female";
label var rs_om     "other male";
label var rs_of     "other female";
label var m1619     "male age 16-19";
label var m2024     "male age 20-24";
label var m2554     "male age 25-54";
label var m5564     "male age 55-64";
label var m65up     "male age 65+";
label var f1619     "female age 16-19";
label var f2024     "female age 20-24";
label var f2554     "female age 25-54";
label var f5564     "female age 55-64";
label var f65up     "female age 65+";
label var inagri    "ind agriculture";
label var inmine    "ind mining";
label var intran    "ind trans and util" ;
label var inmand    "ind durable manuf";
label var inmann    "ind non-dur manu";
label var incons    "ind construction";
label var inwtrd    "ind wholesale trade";
label var inrtrd    "ind retail trade";
label var infire    "ind FIRE";
label var insvcs    "ind services";
label var ingovt    "ind government";
label var inmiss    "ind missing";
label var unmem     "union membership";
label var uncov     "union coverage";
label var se        "self-employed";
label var smsa      "residing in MSA";
label var marfem    "married female";
label var marmale   "married male";
label var mdhrw     "median hourly wage";
label var mdwkw     "median weekly wage";
label var mdlnhw    "median log hourly wage";
label var mdlnww    "median log weekly wage";
label var mdlnftw   "median log full-time weekly wage";

** rename variables if in odd or even dataset **;
if "`4'"=="1" | "`4'"=="2" {;
rename m_ue      m_ue`4';
rename m_thshr   m_thshr`4';
rename m_thshrl  m_thsh1`4';
rename m_thsft   m_thsft`4';
rename m_wghr    m_wghr`4';
rename m_wgft    m_wgft`4';
rename m_wghrly  m_wghrl`4';
rename uewt      uewt`4';
rename hrwt      hrwt`4';
rename hrlywt    hrlywt`4';
rename ftwt      ftwt`4';
rename ftobs     ftobs`4';
rename ueobs     ueobs`4';
rename hrobs     hrobs`4';
rename hrlyobs   hrlyobs`4';
rename stehr     stehr`4';
rename ehr       ehr`4';
rename eft       eft`4';
rename ehrly     ehrly`4';
rename eue       eue`4';
rename euei      euei`4';
rename phr       phr`4';
rename pft       pft`4';
rename phrly     phrly`4';
rename pue       pue`4';
rename puei      puei`4';
rename ue        ue`4';
rename ths       ths`4';
rename tempue    tempue`4';
rename hrwage    hrwage`4';
rename wkwage    wkwage`4';
rename lnhrw     lnhrw`4';
rename lnwkw     lnwkw`4';
rename lnftw     lnftw`4';
rename hsd       hsd`4';
rename hsg       hsg`4';
rename smc       smc`4';
rename clg       clg`4';
rename gtc       gtc`4';
rename rs_wm     rs_wm`4';
rename rs_wf     rs_wf`4';
rename rs_bm     rs_bm`4';
rename rs_bf     rs_bf`4';
rename rs_om     rs_om`4';
rename rs_of     rs_of`4';
rename m1619     m1619`4';
rename m2024     m2024`4';
rename m2554     m2554`4';
rename m5564     m5564`4';
rename m65up     m65up`4';
rename f1619     f1619`4';
rename f2024     f2024`4';
rename f2554     f2554`4';
rename f5564     f5564`4';
rename f65up     f65up`4';
rename inagri    inagri`4';
rename inmine    inmine`4';
rename intran    intran`4';
rename inmand    inmand`4';
rename inmann    inmann`4';
rename incons    incons`4';
rename inwtrd    inwtrd`4';
rename inrtrd    inrtrd`4';
rename infire    infire`4';
rename insvcs    insvcs`4';
rename ingovt    ingovt`4';
rename inmiss    inmiss`4';
rename unmem     unmem`4';
rename uncov     uncov`4';
rename se        se`4';
rename smsa      smsa`4';
rename marfem    marfem`4';
rename marmale   marmale`4';
rename mdhrw     mdhrw`4';
rename mdwkw     mdwkw`4';
rename mdlnhw    mdlnhw`4';
rename mdlnww    mdlnww`4';
rename mdlnftw   mdlnftw`4';
 
};


label data "Means,Resids in `1' by `2'";
sort `2';

if "`3'"!="ind" {save "d:\wage phillips\stata\data\resid-`2'-`1'-`4'",replace};
if "`3'"=="ind" {save "d:\wage phillips\stata\data\resid-`2'-`1'-ind-`4'",replace};

summ [aw=uewt];
summ [aw=hrwt];
summ [aw=ftwt];
summ [aw=hrlywt];

log close;
