% Estimate CSI weights using MC generated Data

clear all;
small = 1.0e-10;
big = 1.0e+6;                  
rng(63761);

p1 = path;
path(p1,'../m_utilities');

 % -- File Directories  
  outdir = 'out/';
  figdir = 'fig/';
  matdir = 'mat/';
  
  % Label for this run
  run_label = 'benchmark';
  
  % -- Read in Data --- 
  load_data = 1;  % 1 if reloading data from Excel, etc 
  mtoq_agg = 0;   % Temporal aggregation indicator of monthly to quarterly data
  pc_comp_data_calendar_m_and_q_prices;
  pc_comp_data_calendar_m_and_q_real;
  % Data Series Used
  dp_agg = dp_agg_q;
  dp_agg_xfe = dp_agg_xfe_q;
  dp_agg_xe = dp_agg_xe_q;
  dp_aggs = [dp_agg dp_agg_xfe dp_agg_xe];
  namevec_aggs = {'dp_agg';'dp_agg_xfe';'dp_agg_xe'};
  dp_disagg = dp_disagg_q;
  calvec_p = calvec_qp;
  calvec_r = calvec_qr;
  dnobs_p = dnobs_qp;
  calds_p = calds_qp;
  dp_disagg_zero = [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]';   % Coefficients set to zero
  'Coefficients set to zero:'
  labelvec_disagg(dp_disagg_zero==1)
  dp_non_zero = 1 - dp_disagg_zero;
  n_p = size(dp_disagg,2);
  dp_disagg_yoy = dif(ma(dp_disagg,4),4);
 
  % Load CAI
  str = [matdir 'cai'];load(str);
  
  % Iteration Parameters
  nit = 150;
  options = optimset('Display','off');
  
  first_date = [1984 1];
  last_date = [2019 1];
  ismpl_p = smpl(calvec_p,first_date,last_date,4);
  ismpl_r = smpl(calvec_r,first_date,last_date,4);
  calvec_est = calvec_p(ismpl_p==1);

  % Step 1:  Regress ybp_pc onto inflation measures .. compute R2
  y = cai(ismpl_r==1);
  x = dp_disagg_yoy(ismpl_p==1,dp_non_zero'==1);
  x = x - mean(x);

  % Estimate DGP for Simulations
  % ... univariate data
  unar = 12;
  uvar_par.nlag = unar;
  uvar_par.icomp = 1;
  uvar_par.iconst = 0;
  ydm = y - mean(y);
  usmpl_par.calvec = calvec_r(ismpl_r==1);
  usmpl_par.nper = 4;
  usmpl_par.nfirst = first_date;
  usmpl_par.nlast = last_date;
  uvarout = varest(ydm,uvar_par,usmpl_par);
  
  % ... var data
  vnar = 2;
  vvar_par.nlag = vnar;
  vvar_par.icomp = 1;
  vvar_par.iconst = 0;
  vsmpl_par.calvec = calvec_r(ismpl_p==1);
  vsmpl_par.nper = 4;
  vsmpl_par.nfirst = first_date;
  vsmpl_par.nlast = last_date;
  vvarout = varest(x,vvar_par,vsmpl_par);
  
  % Generate data
  nrep = 500;
  T = size(y,1);
  ar_coef = uvarout.betahat;
  se_ar = sqrt(uvarout.seps);
  n_initial = 100;
  y_mat = generate_data_univariate_ar(T,nrep,ar_coef,se_ar,n_initial);
  
  Q = vvarout.coef.Q;
  M = vvarout.coef.M;
  G = vvarout.coef.G;
  x_mat = generate_data_VAR_companion(T,nrep,Q,M,G,n_initial);
  
  % Carry out analysis for each draw of Y and X;
  
  r2_save = NaN(nrep+1,1);
  for irep = 1:nrep+1;
    tic;
    if irep > 1;
     ymc = y_mat(:,irep-1);
     xmc = squeeze(x_mat(:,:,irep-1));
    end;
    if irep == 1;
      ymc = y;
      xmc = x;
    end;
    ymc = ymc-mean(ymc);
    xmc = xmc-repmat(mean(xmc),size(xmc,1),1);

    fdata.z = xmc;
    fdata.y = ymc;
    theta_max = zeros(size(xmc,2)-1,1);
    fmin = 0;
    fmin_save = fmin;
    theta_save = theta_max;  
    for i = 1:nit;
      if floor(i/2) == ceil(i/2);
         theta = theta + .02*randn(size(x,2)-1,1);
      else;
          theta = 0.5*randn(size(x,2)-1,1);
    end;
    theta = fminsearch(@(x) myfun4(x,fdata),theta,options); 
    tmp = myfun4(theta,fdata);
    if tmp < fmin;
      theta_max = theta;
      fmin = tmp;
      theta_save = [theta_max theta_save];
      fmin_save = [fmin;fmin_save];
    end;
   end;
   b_logit = logit_wght(theta_save(:,1));
   w_logit = zeros(n_p,1);
   w_logit(dp_non_zero==1) = b_logit;
   zmc = xmc*b_logit;
   szz = sum(zmc.^2);
   syy = sum(ymc.^2);
   szy = sum(zmc.*ymc);
   r2 = (szy^2)/(szz*syy);
   r2_save(irep) = r2;
   irep
   toc
end;

% Compute p-value of in-sample r2
r2_data = r2_save(1);
r2_mc = r2_save(2:end);
pvalue = mean(r2_mc > r2_data);

% Print out Results
outfile_name = [outdir 'r2_mc_table_' run_label '.out'];
fileID = fopen(outfile_name,'w');
fprintf(fileID,'In-Sample R2 (pvalue): %5.2f (%6.3f) \n',[r2_data pvalue]);

path(p1);


  