% POOS forecast summary for European IP
clearvars;
small = 1.0e-8;
big = 1.0e+8;
this_date = datestr(now,'yyyymmdd');

% -- File Directories  
datadir='../Data/'; 
outdir = 'out/';
figdir = 'fig/';
matdir = 'mat/';
umdir = '../Fortran_fcsts/';  % location of forecasts from UM's Fortran implementations

% Add paths for Matlab Utility Functions
addpath('../Matlab_Utility_Programs/');


% Dates for the various forecast files ... 
% AR forecasts
first_date_ar = [1975 7];
last_date_ar = [2023 10];
% UM forecasts
first_date_um = [1985 6];
last_date_um = [2023 10];

% Dates for forecast evaluation
first_fcst = [1985 6]; % First forecast period evaluation period
last_fcst = [2019 6];  % Last forecast period evaluation period
% first_fcst = [2020 6]; % First forecast period evaluation period
% last_fcst = [2023 4];  % Last forecast period evaluation period
% first_fcst = [2021 6]; % First forecast period evaluation period
% last_fcst = [2023 4];  % Last forecast period evaluation period

n_h = 12;                % Number of forecast periods in the forecast files
h_rpt = [1 3 6];         % Horizons to Report
n_max_rpt = max(h_rpt);  % Maximum horizon

% File names
% Suffix
f_suffix = ['_' this_date '_' num2str(first_fcst(1)) '_' num2str(first_fcst(2)) '_' num2str(last_fcst(1)) '_' num2str(last_fcst(2))];
% CSV file for Tabular Results
csv_file_name = [outdir 'IPEurope' f_suffix '.csv'];            % CSV File for tabular results  
rel_mse_name = [matdir 'IPEurope_rel_rmse_sector' f_suffix '.mat'];  % CSV File for relative mse results --- saved for future use
boxplot_file_name = [figdir 'IPEurope_boxplots' f_suffix];      % Boxplot file name

% Set up a calendar to use across forecasts
[dnobs_all,calvec_all,calds_all] = calendar_make([1975 1],[2023 10],12);

% Percentiles for quantile forecasts
pct_vec = [0.05 0.10 1/6 0.25 0.50 0.75 5/6 0.90 0.95];   % These quantiles should not be changed ... they are the quantiles computed by UM's Fortran code
n_quant = length(pct_vec);

% First and last date for each forecast
t_first_ar = find(calds_all(:,1) == first_date_ar(1) & calds_all(:,2) == first_date_ar(2));
t_last_ar = find(calds_all(:,1) == last_date_ar(1) & calds_all(:,2) == last_date_ar(2));
t_first_um = find(calds_all(:,1) == first_date_um(1) & calds_all(:,2) == first_date_um(2));
t_last_um = find(calds_all(:,1) == last_date_um(1) & calds_all(:,2) == last_date_um(2));
dnobs_ar = t_last_ar - t_first_ar + 1;
dnobs_um = t_last_um - t_first_um + 1;

% Dates for the evaluation period
t_first_fcst = find(calds_all(:,1) == first_fcst(1) & calds_all(:,2) == first_fcst(2));
t_last_fcst = find(calds_all(:,1) == last_fcst(1) & calds_all(:,2) == last_fcst(2));

% Load the IP Europe data
load([datadir 'IP_Europe_Data.mat']);
first_date_data = calds_m(1,:);
last_date_data = calds_m(end,:);
t_first_data = find(calds_all(:,1) == first_date_data(1) & calds_all(:,2) == first_date_data(2));
t_last_data = find(calds_all(:,1) == last_date_data(1) & calds_all(:,2) == last_date_data(2));
n_sectors = size(lfd_data,2);
data_mat = NaN(dnobs_all,n_sectors);
data_mat(t_first_data:t_last_data,:) = lfd_data;

% Load the AR forecasts
load([matdir 'ar_fcst_ipeurope.mat']);
fcsts_ar = NaN(n_h,n_sectors,dnobs_all);
fcsts_ar(:,:,t_first_ar:t_last_ar) = rslt_fcsts;
actual_ar = NaN(n_h,n_sectors,dnobs_all);
actual_ar(:,:,t_first_ar:t_last_ar) = rslt_actual;
sd_fcsts_ar = NaN(n_h,n_sectors,dnobs_all);
sd_fcsts_ar(:,:,t_first_ar:t_last_ar) = sd_fcsts;
sd_fcsts_accum_ar = NaN(n_h,n_sectors,dnobs_all);
sd_fcsts_accum_ar(:,:,t_first_ar:t_last_ar) = sd_fcsts_accum;

% Load the UM forecasts
scl_um = 1;  % Scaling factor for UMs forecasts
% UM Models
UM_Models = {'FFFFF0','TFFFF0','TTFFF0','TTTFF0','TTTTF0','TTTTT0','TTTTT1'};
n_models = length(UM_Models);
fcsts_um = NaN(n_h,n_sectors,dnobs_all,n_models);
actual_um = NaN(n_h,n_sectors,dnobs_all);
quantiles_um_accum = NaN(n_quant,n_h,n_sectors,dnobs_all,n_models);
for i_model = 1:n_models
    load([umdir 'IP_Europe' UM_Models{i_model} '.mat' ]);
    fcsts_um(:,:,t_first_um:t_last_um,i_model) = fcsts/scl_um;
    quantiles_um_accum(:,:,:,t_first_um:t_last_um,i_model) = quantfs(:,:,:,1:dnobs_um)/scl_um;
end
for i_h = 1:n_h
        fcsts0(i_h,:,end-i_h+1:end) = NaN;
end
actual_um(:,:,t_first_um:t_last_um) = fcsts0/scl_um;

% Save these over the max horizon reported
n_h = n_max_rpt;
fcsts_ar = fcsts_ar(1:n_h,:,:);
actual_ar = actual_ar(1:n_h,:,:);
sd_fcsts_ar = sd_fcsts_ar(1:n_h,:,:);
sd_fcsts_accum_ar = sd_fcsts_accum_ar(1:n_h,:,:);
fcsts_um = fcsts_um(1:n_h,:,:,:);
actual_um = actual_um(1:n_h,:,:);
quantiles_um_accum = quantiles_um_accum(:,1:n_h,:,:,:);


% Check some actual values to make sure things are aligned properly
i_h = 6;
i_sec = 12;
t = find(calds_all(:,1) == 2005 & calds_all(:,2) == 1);
a_ar = actual_ar(i_h,i_sec,t);
a_um = actual_um(i_h,i_sec,t);
a_data = data_mat(i_h+t,i_sec);
if abs(a_ar - a_data) > small
    error('Actuals from AR and data do not match');
end
if abs(a_ar - a_um) > small
    error('Actuals from AR and um do not match');
end

% Create outlier adjusted versions of the data --- used to check robustness -- see below
thr = 4;
tflag = 3;
x_window = 3;
data_mat_adj = NaN(dnobs_all,n_sectors);
for i = 1:n_sectors
    data_mat_adj(:,i) = adjout_2(data_mat(:,i),thr,tflag,x_window);
end

% Create a matrix of actuals and outliers adjusted actuals
actual = NaN(n_h,n_sectors,dnobs_all);
actual_adj = NaN(n_h,n_sectors,dnobs_all);
for i = 1:n_sectors
    for j = 1:n_h
        actual(j,i,1:end-j) = data_mat(1+j:end,i);
        actual_adj(j,i,1:end-j) = data_mat_adj(1+j:end,i);
    end
end

% Compute Forecast Errors for raw series
errs_ar = fcsts_ar - actual;
errs_um = fcsts_um - repmat(actual,1,1,1,n_models);
% Forecast Errors for adjusted series
errs_ar_adj = fcsts_ar - actual_adj;
errs_um_adj = fcsts_um - repmat(actual_adj,1,1,1,n_models);

% Save Results to a CSV File
file_name = csv_file_name;
fid = fopen(file_name,'w');
fprintf(fid,'IP Europe Results \n');
fprintf(fid,['Sample Period:  %4i:%2i to %4i:%2i \n\n'],[first_fcst last_fcst]);

% Tabulate RMSE for AR and relative RMSE for UM models
fprintf(fid,'\n\n Results for Table 2 (not reported)  \n');
e_ar = errs_ar;
e_um = errs_um;
% Note that forecasts were for growth rates from t to t+1 .. convert forecast for t to t+h
e_ar = cumsum(e_ar,1);
e_um = cumsum(e_um,1);
table_out_rmse_paper;

% Tabulate Quantile and Interval results for AR and UM models
% Compute Quantiles for predictive distributions from AR model (Gaussian);
quantiles_ar_accum = NaN(n_quant,n_h,n_sectors,dnobs_all);
quantiles_gvar_accum = NaN(n_quant,n_h,n_sectors,dnobs_all);
fcsts_ar_accum = cumsum(fcsts_ar,1);
%  Get normal quantiles
z_vec = norminv(pct_vec);
for i_q = 1:n_quant
    quantiles_ar_accum(i_q,:,:,:) = fcsts_ar_accum + z_vec(i_q)*sd_fcsts_accum_ar;
end
% Relative Quantile and Interval loss
fprintf(fid,'\n\n Quantiles and Intervals (Accumulated .. t to t+h Data) ... Pooled .. for table 3 (not reported) in paper \n');
actual_accum = cumsum(actual,1);
table_out_quantiles_paper;

% --- Addendum -- Pooled results using outlier adjusted data
fprintf(fid,'\n\n\n Addendum: Results for Table 2 in paper (Outlier Adjusted Data)  \n');
e_ar = errs_ar_adj;
e_um = errs_um_adj;
% Note that forecasts were for growth rates from t to t+1 .. convert forecast for t to t+h
e_ar = cumsum(e_ar,1);
e_um = cumsum(e_um,1);
table_out_rmse_paper;

% Produce Boxplots for relative RMSE by sector
e_ar = errs_ar;
e_um = errs_um;
e_ar = cumsum(e_ar,1);
e_um = cumsum(e_um,1);
rel_rmse_sector = NaN(n_h,n_sectors,n_models);
for i_h = 1:n_h
    for i_sec = 1:n_sectors
        for i_model = 1:n_models
            tmp_um = squeeze(e_um(i_h,i_sec,:,i_model));
            tmp_ar = squeeze(e_ar(i_h,i_sec,:));
            tmp_um = tmp_um(t_first_fcst:t_last_fcst);
            tmp_ar = tmp_ar(t_first_fcst:t_last_fcst);
            rel_rmse_sector(i_h,i_sec,i_model) = sqrt(mean(tmp_um.^2))/sqrt(mean(tmp_ar.^2));
        end
    end
end

% Save the rel_rmse_sector results
save(rel_mse_name,'rel_rmse_sector');

% Construct Boxplots
% Plot Raw Returns and Standardized Returns
bp_labels =["I","II","III","IV","V","VI","VII"];
colors = lines(3);
Color1  = colors(1,:);
ax_fs = 18;
figure('Units', 'normalized', 'Position', [0 0 0.5 0.75]); % For Dell Wide Monitor
for i = 1:length(h_rpt); 
    i_h = h_rpt(i);
    x = squeeze(rel_rmse_sector(i_h,:,:));
    subplot(2,2,i);
    boxplot(x,'labels',bp_labels,'plotstyle','traditional','colors','b');   
    boxes = findobj(gca,'Tag','Box');

    % Fill each box with blue
    for j = 1:length(boxes)
        patch(get(boxes(j),'XData'), get(boxes(j),'YData'), Color1, ...
          'FaceAlpha', 0.5); % Adjust transparency (0 = transparent, 1 = solid)
    end
    title(['h = ' num2str(i_h)]);
    % ylim([0.50 1.25]);
    ylim([0.7 1.20]);
    set(gca,'FontSize',ax_fs);
    xlabel('Model');
    ylabel('Relative RMSE');
    grid on;
end
saveas(gcf, [boxplot_file_name '.png']);
close(gcf);
