% Examine World Bank Data 
%
clear all;
small = 1.0e-05;

% Read Series
dname = ['WB_Series.xlsx'];
Series_Table = readtable(dname);

% Read Data
dname = ['WB_2006.xlsx'];
Data_Table_2006 = readtable(dname);
Ty_2006 = Data_Table_2006(:,{'CountryCode','Lattitude','Longitude'});
nc = size(Ty_2006,1);

dname = ['WB_2007.xlsx'];
Data_Table_2007 = readtable(dname);
Ty_2007 = Data_Table_2007(:,{'CountryCode','Lattitude','Longitude'});

dname = ['WB_2008.xlsx'];
Data_Table_2008 = readtable(dname);
Ty_2008 = Data_Table_2008(:,{'CountryCode','Lattitude','Longitude'});

dname = ['WB_2009.xlsx'];
Data_Table_2009 = readtable(dname);
Ty_2009 = Data_Table_2009(:,{'CountryCode','Lattitude','Longitude'});

dname = ['WB_2010.xlsx'];
Data_Table_2010 = readtable(dname);
Ty_2010 = Data_Table_2010(:,{'CountryCode','Lattitude','Longitude'});

dname = ['WB_2011.xlsx'];
Data_Table_2011 = readtable(dname);
Ty_2011 = Data_Table_2011(:,{'CountryCode','Lattitude','Longitude'});

dname = ['WB_2012.xlsx'];
Data_Table_2012 = readtable(dname);
Ty_2012 = Data_Table_2012(:,{'CountryCode','Lattitude','Longitude'});

dname = ['WB_2013.xlsx'];
Data_Table_2013 = readtable(dname);
Ty_2013 = Data_Table_2013(:,{'CountryCode','Lattitude','Longitude'});

dname = ['WB_2014.xlsx'];
Data_Table_2014 = readtable(dname);
Ty_2014 = Data_Table_2014(:,{'CountryCode','Lattitude','Longitude'});

dname = ['WB_2015.xlsx'];
Data_Table_2015 = readtable(dname);
Ty_2015 = Data_Table_2015(:,{'CountryCode','Lattitude','Longitude'});

% Summarize Results for each series
ns = size(Series_Table,1);
kk = 0;
for is = 1:ns;
    sname = Series_Table{is,'SeriesCode'};
    sname_str = char(sname);
    sname2_str = strrep(sname_str,'.','_');
    iname_str = char(Series_Table{is,'IndicatorName'});
    iname_str = strrep(iname_str,'%','Percent');
    x = NaN(nc,10);
    x(:,1) = Data_Table_2006{:,sname2_str};
    x(:,2) = Data_Table_2007{:,sname2_str};
    x(:,3) = Data_Table_2008{:,sname2_str};
    x(:,4) = Data_Table_2009{:,sname2_str};
    x(:,5) = Data_Table_2010{:,sname2_str};
    x(:,6) = Data_Table_2011{:,sname2_str};
    x(:,7) = Data_Table_2012{:,sname2_str};
    x(:,8) = Data_Table_2013{:,sname2_str};
    x(:,9) = Data_Table_2014{:,sname2_str};
    x(:,10) = Data_Table_2015{:,sname2_str};
    
    ii = 1 - isnan(sum(x,2));  % ii == 1 for a country with 10 years of data
    nc_x = sum(ii);
    fprintf('%3i of %3i \n',[is ns]);
    fprintf(['Series: ' sname_str '  Name: ' iname_str  '\n']);
    fprintf('   Number of countries: %3i \n',nc_x); 
    
    % Check to see if '%' is in title
    pct_ind = contains(Series_Table{is,'IndicatorName'},'%');
    % Check to see of index is in title
    index_ind = contains(Series_Table{is,'IndicatorName'},'index');
    
    % Determine log or level
    if pct_ind == 1
        y = x;
        tcode = 'level';
    elseif index_ind == 1
        y = x;
        tcode = 'level';
    elseif min(min((packr([x])))) <= 0
        y = x;
        tcode = 'level';
    else 
        y = log(x);
        tcode = 'log';
    end
    
    % Form country specific demeaned series
    mean_y = mean(y,2);
    ydm = y - repmat(mean_y,1,10);
    
    % Eliminate series with 10 or fewer non-zero values
    jj = mean(abs(ydm),2);
    ii = packr(jj);
    ll = ii > small;
    if sum(ll) <= 10
        ydm = NaN(size(ydm));
    end
    
    % Check for outliers .. replace with missing values
    ydm_vec = ydm(:);
    yoa = adjout_a(ydm_vec,5);
    noa = sum(isnan(yoa))-sum(isnan(ydm_vec));
    fprintf(['Transformation: ' tcode '\n']);
    fprintf('Number of outliers: %3i \n',noa);
    nyoa = sum(isnan(yoa)==0);
    fprintf('Number of obs in yoa: %3i \n',nyoa);
    ym = yoa - mean(packr(yoa));
    ykur = mean(packr(ym).^4)/(mean(packr(ym).^2)^2);
    fprintf('   Kurtosis %5.1f \n',ykur);
    if ykur > 20
        yoa = NaN(size(yoa));
    end
    yoa_mat = reshape(yoa,[],10) + repmat(mean_y,1,10);
    % replace with missing values if unbalanced
    jj = isnan(sum(yoa_mat,2));  % ii == 1 for a country with 10 years of data
    tmp = NaN(size(yoa_mat));
    yoa_mat(jj==1,:) = tmp(jj==1,:);
    
% %     if ykur > 10
% %      ykur
% %      fig = figure;
% %      subplot(2,2,1);
% %      histogram(x_2015);
% %      subplot(2,2,2);
% %      histogram(x_2005);
% %      subplot(2,2,3);
% %      histogram(y);
% %      subplot(2,2,4);
% %      histogram(yoa);
% %      waitforbuttonpress;
% %      close(fig);
% %     end
%     
    if sum(isnan(sum(yoa_mat,2))==0) >= 100;
        kk = kk+1;
        SeriesCode{kk,1} = sname2_str;
        Topic{kk,1} = Series_Table.Topic{is};
        IndicatorName{kk,1} = Series_Table.IndicatorName{is};
        LongDefinition{kk,1} = Series_Table.LongDefinition{is};
        Tcode{kk,1} = tcode;
        llcode{kk,1} = sum(ll);
        Ty_2006 = [Ty_2006 table(yoa_mat(:,1),'VariableNames',{sname2_str})];  
        Ty_2007 = [Ty_2007 table(yoa_mat(:,2),'VariableNames',{sname2_str})];
        Ty_2008 = [Ty_2008 table(yoa_mat(:,3),'VariableNames',{sname2_str})];
        Ty_2009 = [Ty_2009 table(yoa_mat(:,4),'VariableNames',{sname2_str})];
        Ty_2010 = [Ty_2010 table(yoa_mat(:,5),'VariableNames',{sname2_str})];
        Ty_2011 = [Ty_2011 table(yoa_mat(:,6),'VariableNames',{sname2_str})];
        Ty_2012 = [Ty_2012 table(yoa_mat(:,7),'VariableNames',{sname2_str})];
        Ty_2013 = [Ty_2013 table(yoa_mat(:,8),'VariableNames',{sname2_str})];
        Ty_2014 = [Ty_2014 table(yoa_mat(:,9),'VariableNames',{sname2_str})];
        Ty_2015 = [Ty_2015 table(yoa_mat(:,10),'VariableNames',{sname2_str})];
    end 
end
Series_Table_Panel = table(SeriesCode,llcode,Tcode,Topic,IndicatorName,LongDefinition);
writetable(Series_Table_Panel,'Series_Data_Panel.xlsx','Sheet','Series_Table','Writemode','replacefile');
writetable(Ty_2006,'Series_Data_Panel.xlsx','Sheet','Data_2006');
writetable(Ty_2007,'Series_Data_Panel.xlsx','Sheet','Data_2007');
writetable(Ty_2008,'Series_Data_Panel.xlsx','Sheet','Data_2008');
writetable(Ty_2009,'Series_Data_Panel.xlsx','Sheet','Data_2009');
writetable(Ty_2010,'Series_Data_Panel.xlsx','Sheet','Data_2010');
writetable(Ty_2011,'Series_Data_Panel.xlsx','Sheet','Data_2011');
writetable(Ty_2012,'Series_Data_Panel.xlsx','Sheet','Data_2012');
writetable(Ty_2013,'Series_Data_Panel.xlsx','Sheet','Data_2013');
writetable(Ty_2014,'Series_Data_Panel.xlsx','Sheet','Data_2014');
writetable(Ty_2015,'Series_Data_Panel.xlsx','Sheet','Data_2015');

% Functions 
function [yoa] = adjout_a(y,thr)
% -- Adjust for outliers using fraction of IQR
% -- Disregard Zeros
% -- Replace outliers with missing values

a = unique(packr(y));
if size(a,1) < 4
    yoa = y;
else
 ii = isnan(y) + (y == 0) + (y==100);  % Zero or missing
 z = y(ii==0);
 pct_vec = [0.25 0.50 0.75];
 tmp = pctile(z,pct_vec);
 zm = tmp(2);
 iqr = tmp(3)-tmp(1);
 iii = abs(z-zm) <= (thr*iqr);
 za = NaN(size(z,1),1);
 za(iii==1) = z(iii==1);
 yoa = y;
 yoa(ii==0) = za;
end

end


