% Descriptive statistics for the State Employment dataset

clearvars;
small = 1.0e-8;
big = 1.0e+8;
this_date = datestr(now,'yyyymmdd');
% Set random number seed for reproducibility
rng(9876234);

% -- File Directories  
datadir='../Data/'; 
outdir = 'out/';
figdir = 'fig/';
matdir = 'mat/';

% Add paths for Matlab Utility Functions
addpath('../Matlab_Utility_Programs/');

% Read Employment Data
SData = readtable([datadir 'StateEmployment.xlsx']);
emp_data = SData{:,3:end};
demp_data = 1200*dif(log(emp_data),1);
% Get the state names
states = SData.Properties.VariableNames(3:end);
% Get the data
calvec = SData.Year + (SData.Month-1)/12;
calds = [SData.Year SData.Month];
% Number of states
n_states = length(states);
ismpl = smpl(calvec, [1990 2], [2019 12],12);
calvec = calvec(ismpl==1,1);
calds = calds(ismpl==1,:);
demp_data = demp_data(ismpl==1,:);
dnobs = length(calvec);

% Figure 1 in paper 
% States to Plot
state_plot = {'NJ','IL','CA','IA','WV'};
state_labels = {'New Jersey','Illinois','California','Iowa','West Virginia'};
n_plot = length(state_plot);
data_plot = zeros(dnobs,n_plot);
offset1 = 50;
offset = offset1;
d_offset = 25;
for i = 1:n_plot
    state = state_plot{i};
    idx = find(strcmp(states,state));
    data_plot(:,i) = demp_data(:,idx)-offset;
    offset = offset + d_offset;
end

fig = figure('units','normalized','outerposition',[0 0 0.5 0.75]);  % Dell
leg_fs = 25;
ax_fs = 25;
lab_fs_1 = 25;
lw_1 = 1.0;
lw_2 = 2.5;

lab_fs_2 = ceil(lab_fs_1*0.8);

plot(calvec,demp_data,'LineWidth',lw_1);

hold on;
for i = 1:n_plot
    plot(calvec,data_plot(:,i),'LineWidth',lw_2);
end
hold off;
xlim = [1990 2020];
set(gca,'FontSize',ax_fs);
% suppress the y-axis
set(gca,'YTickLabel',[]);
%  Add Labels
xlab_1 = 2004;
ylab_1 = 25;
lab_1 = '(a) All States';
text(xlab_1,ylab_1,lab_1,'FontSize',lab_fs_1);
xlab_2 = xlab_1;
ylab_2 = ylab_1-offset1-10;
lab_2 = '(b) Select States';
text(xlab_2,ylab_2,lab_2,'FontSize',lab_fs_1);
d_off = d_offset;
ylab_3 = ylab_2-7;
xlab_3 = 1991;
for i = 1:n_plot
    text(xlab_3,ylab_3,state_labels{i},'FontSize',lab_fs_2);
    ylab_3 = ylab_3 - d_off;
end

figure_to_png(figdir, 'Figure_1');
close(fig);

% Results in Table 1 in paper and mentioned in text
perc = [10 25 50 75 90]'; % Percentiles reported in Table 1

% File for output
fid = fopen([outdir 'state_emp_desc_statistics_' this_date '.txt'],'w');
fprintf(fid,'Descriptive Statistics for State Employment Growth Rates \n');
fprintf(fid,'Sample: 1990:2 to 2019:12 \n');


% Compute the average pairwise correlation
cormat = corr(demp_data);
cormat = cormat - diag(diag(cormat));
avg_corr = sum(sum(cormat))/(n_states*(n_states-1));
fprintf(fid,'Average pairwise correlation of employment growth rates = %6.2f \n',avg_corr);

fprintf(fid,'\n\n');
% Compute full-sample AR estimates
n_ar = 12;
i_constant = 1;
ar_coef_mat = zeros(n_ar,n_states);
ar_se_mat = zeros(n_states,1);
ar_const_mat = zeros(n_states,1);
residual_mat = zeros(dnobs,n_states);
for i_state = 1:n_states
    [arcoef,constant,ser,residuals] = uar_c_residual(demp_data(:,i_state),n_ar,i_constant);
    ar_coef_mat(:,i_state) = arcoef;
    ar_se_mat(i_state,1) = ser;
    ar_const_mat(i_state,1) = constant;
    residual_mat(:,i_state) = residuals;
end
% AR results in Table 1
ar_coef_sum = sum(ar_coef_mat,1)';
pct_ar_coef_sum = prctile(ar_coef_sum,perc);
pct_ar_se = prctile(ar_se_mat,perc);

% Construct a Garch(1,1) with t-distributed residuals
% Garch(1,1) with t-distributed residuals
% Estimate GARCH(1,1) model
garch_parms = zeros(3,n_states);
sd_resid = zeros(dnobs,n_states);
garch_t_df = zeros(n_states,1);
for i_state = 1:n_states
    y = residual_mat(:,i_state);
    y = packr(y);
    T = length(y);
    Mdl = garch('Garchlags',1,'ARCHLags',1);
    Mdl.Distribution = 't';
    % Estimate model ... do not print out results
    EstMdl = estimate(Mdl,y,'Display','off');
    a = EstMdl.ARCH{1};
    b = EstMdl.GARCH{1};
    nu = EstMdl.Distribution.DoF;
    garch_t_df(i_state,1) = nu;
    garch_parms(:,i_state) = [a b nu]';
    V=infer(EstMdl,y);
    std_resid = sqrt(V);
    sd_resid(1+dnobs-T:end,i_state) = std_resid;
end
archsum = garch_parms(1,:)+garch_parms(2,:);
pct_garchsum = prctile(archsum',perc);
pct_garch_t_df = prctile(garch_t_df,perc);

% Compute p-values for Nyblom statistics
nyblom_stat_mean = zeros(n_states,1);
n_ar = 3;  % Used for HAC covariance matrix for Nyblom mean
for i_state = 1:n_states
    y = demp_data(:,i_state);
    y = y - mean(y);
    y = y/std(y);
    e=cumsum(y);
    nyb=sum(e.^2)/(dnobs^2);
    i_constant = 0;
    [arcoef,constant,ser] = uar_c(y,n_ar,i_constant);
    sum_ar = sum(arcoef);
    tmp = (1-sum_ar)^2;
    nyb_rob=nyb*tmp;
    nyblom_stat_mean(i_state,1) = nyb_rob;
end

% Sum of AR Coefficients
nyblom_stat_ar = zeros(n_states,1);
n_ar = 12;
for i_state = 1:n_states
    y = demp_data(:,i_state);
    dy = dif(y,1);
    Z = [ones(dnobs,1) NaN(dnobs,n_ar-1)];
    for i = 1:n_ar-1
        Z(:,i+1) = lag(dy,i);
    end
    X = lag(y,1);
    Y = y;
    tmp = packr([Y X Z]);
    Y = tmp(:,1);
    X = tmp(:,2);
    Z = tmp(:,3:end);
    W = [X Z];
    b = W\Y;
    u = Y - W*b;
    score = X.*u;
    score = score/std(score);
    e = cumsum(score);
    T = length(e);
    nyb=sum(e.^2)/(T^2);
    nyblom_stat_ar(i_state,1) = nyb;
end

% Get p-values
T = dnobs;
nrep = 1000;
y = randn(T,nrep);
y = y - mean(y);
e=cumsum(y);
nyb=sum(e.^2)/(T^2);
nyb = nyb';

pvalue_nyb = zeros(n_states,2);
for i_state = 1: n_states;
    pvalue_nyb(i_state,1) = mean(nyb>nyblom_stat_mean(i_state,1));
    pvalue_nyb(i_state,2) = mean(nyb>nyblom_stat_ar(i_state,1));
end

pct_nyblom_mean = prctile(pvalue_nyb(:,1),perc);
pct_nyblom_arsum = prctile(pvalue_nyb(:,2),perc);

% Print results to file
fprintf(fid,'Results for Table 1 \n');
fprintf(fid,'Statistic    10th Perc   25th Perc   50th Perc   75th Perc   90th Perc \n');
fprintf(fid,'-----------------------------------------------------------------------\n');
fprintf(fid,'Sum AR Coef  %6.2f      %6.2f      %6.2f      %6.2f      %6.2f \n',pct_ar_coef_sum');
fprintf(fid,'AR Std Err   %6.2f      %6.2f      %6.2f      %6.2f      %6.2f \n',pct_ar_se');
fprintf(fid,'GarchSum     %6.2f      %6.2f      %6.2f      %6.2f      %6.2f \n',pct_garchsum');      
fprintf(fid,'Garch t-df   %6.1f      %6.1f      %6.1f      %6.1f      %6.1f \n',pct_garch_t_df');
fprintf(fid,'Nyblom Mean  %6.2f      %6.2f      %6.2f      %6.2f      %6.2f \n',pct_nyblom_mean');
fprintf(fid,'Nyblom ARSum %6.2f      %6.2f      %6.2f      %6.2f      %6.2f \n',pct_nyblom_arsum');
fprintf(fid,'-----------------------------------------------------------------------\n');