clear; clc;

%% Add directory "...\ISVM_Code"
addpath('..');

%% Load the dataset
data = load('IV_Data_Sample.mat');
vars = fieldnames(data);
level = data.(vars{1}); % IV
money = data.(vars{2}); % Log-moneyness
mtr = data.(vars{3}); % Time-to-maturity
d = data.(vars{6}); % Dividend yield
date = data.(vars{7}); % Dates
r = data.(vars{9}); % Risk-free interest rate
clear('data', 'vars')
num_days = length(date);

%% Estimating the regression coefficients beta

% Initialization
beta_00 = []; beta_10 = []; beta_01 = []; beta_02 = []; beta_11 = []; beta_20 = [];
r_valid = []; d_valid = [];

% Data filters (see Section 5)
% 1. Only use data with 10 <= mtr =< 60 days, if at least 4 maturities are available, or use four maturities closest to this range
% 2. Only use money in the range [-vol*sqrt(\tau), vol*sqrt(\tau)]
min_mtr = 10/360; max_mtr = 60/360; % The first criterion is to collect maturities in [min_mtr, max_mtr]

for i = 1 : num_days
    %%%% Begin data filtering %%%%
    t_lst = (mtr(i,:)/360);
    k_lst = money(i,:);
    Regdata = level(i,:);
    unique_t = unique(t_lst(~isnan(t_lst)));
    a = find(t_lst == min(unique_t(unique_t >= min_mtr))); % Find the positions of shortest maturity no less than min_mtr
    b = a(abs(k_lst(a)) == min(abs(k_lst(a)))); % Find the position closest to ATM; b may not be a scalar
    k_boundary = mean(Regdata(b))*sqrt(t_lst); % Calculate the boundary of k
    
    index1 = find(abs(k_lst) <= k_boundary); % Find the positions of remaining data
    t_lst = t_lst(index1);
    k_lst = k_lst(index1);
    Regdata = Regdata(index1);
    r_lst = r(i, index1)/100;
    
    % Filter the data with maturities close to [min_mtr, max_mtr]
    uni_mtr = unique(t_lst);
    
    if length(uni_mtr) < 4 % Skip this day
        continue
    end
    
    if sum(uni_mtr >= min_mtr & uni_mtr <= max_mtr) >= 4
        target_t = uni_mtr(uni_mtr >= min_mtr & uni_mtr <= max_mtr);
    elseif sum(uni_mtr >= min_mtr & uni_mtr <= max_mtr) == 3 % We need to find one more candidate closest to [min_mtr, max_mtr]
        l_candi = max(uni_mtr(uni_mtr < min_mtr & uni_mtr >= 8/360));
        r_candi = min(uni_mtr(uni_mtr > max_mtr & uni_mtr <= 160/360));
        if length(l_candi) + length(r_candi) == 0
            continue
        elseif length(l_candi) ~= 0 & length(r_candi) == 0
            target_t = [l_candi, uni_mtr(uni_mtr >= min_mtr & uni_mtr <= max_mtr)];
        elseif length(l_candi) == 0 & length(r_candi) ~= 0
            target_t = [uni_mtr(uni_mtr >= min_mtr & uni_mtr <= max_mtr), r_candi];
        else
            combine_candi = [l_candi, r_candi];
            dist_candi = [min_mtr - l_candi, r_candi - max_mtr];
            sort_dist = sort(dist_candi);
            f_candi = combine_candi(dist_candi == sort_dist(1));
            target_t = sort([uni_mtr(uni_mtr >= min_mtr & uni_mtr <= max_mtr), f_candi]);
        end
    elseif sum(uni_mtr >= min_mtr & uni_mtr <= max_mtr) == 2 % We need to find two more candidates closest to [min_mtr, max_mtr]
        l_candi = uni_mtr(uni_mtr < min_mtr & uni_mtr >= 8/360);
        r_candi = uni_mtr(uni_mtr > max_mtr & uni_mtr <= 160/360);
        if length(l_candi) + length(r_candi) < 2
            continue
        elseif length(l_candi) + length(r_candi) == 2
            target_t = [l_candi, uni_mtr(uni_mtr >= min_mtr & uni_mtr <= max_mtr), r_candi];
        else
            combine_candi = [l_candi, r_candi];
            dist_candi = [min_mtr - l_candi, r_candi - max_mtr];
            sort_dist = sort(dist_candi);
            f_candi = combine_candi(dist_candi == sort_dist(1) | dist_candi == sort_dist(2));
            target_t = sort([uni_mtr(uni_mtr >= min_mtr & uni_mtr <= max_mtr), f_candi]);
        end
    elseif sum(uni_mtr >= min_mtr & uni_mtr <= max_mtr) == 1 % We need to find three more candidates closest to [min_mtr, max_mtr]
        l_candi = uni_mtr(uni_mtr < min_mtr & uni_mtr >= 8/360);
        r_candi = uni_mtr(uni_mtr > max_mtr & uni_mtr <= 160/360);
        if length(l_candi) + length(r_candi) < 3
            continue
        elseif length(l_candi) + length(r_candi) == 3
            target_t = [l_candi, uni_mtr(uni_mtr >= min_mtr & uni_mtr <= max_mtr), r_candi];
        else
            combine_candi = [l_candi, r_candi];
            dist_candi = [min_mtr - l_candi, r_candi - max_mtr];
            sort_dist = sort(dist_candi);
            f_candi = combine_candi(dist_candi == sort_dist(1) | dist_candi == sort_dist(2) | dist_candi == sort_dist(3));
            target_t = sort([uni_mtr(uni_mtr >= min_mtr & uni_mtr <= max_mtr), f_candi]);
        end
    else
        continue
    end
    
    index2 = ismember(t_lst, target_t); % Check whether the entries of t_lst belong to the target maturity set
    
    t_lst = t_lst(index2)';
    k_lst = k_lst(index2)';
    Regdata = Regdata(index2)';
    r_lst = r_lst(index2);
    r_valid = [r_valid, mean(r_lst)];                
    d_valid = [d_valid, d(i,1)/100];
    %%%% End data filtering %%%%
    
    % Bivariate regressions
    k_matrix = [ones(sum(index2), 1), k_lst, k_lst.^2, t_lst, k_lst.*t_lst, t_lst.^2, (t_lst.^2).*k_lst]; % See equation (36)
    coef = regress(Regdata, k_matrix);
    beta_00 = [beta_00, coef(1)]; % beta_00 coincides with the spot vol (see equation (8))
    beta_01 = [beta_01, coef(2)]; beta_02 = [beta_02, coef(3)]; beta_10 = [beta_10, coef(4)];
    beta_11 = [beta_11, coef(5)]; beta_20 = [beta_20, coef(6)];
end

std_01 = std(beta_01); std_02 = std(beta_02); std_10 = std(beta_10);
std_11 = std(beta_11); std_20 = std(beta_20);

%% Parametric estimation
% We consider two cases
% Case 1 (Exact identification): GMM moment conditions are constructed by beta_01, beta_02, beta_10, beta_11
% Case 2 (Over-identification): GMM moment conditions are constructed by beta_01, beta_02, beta_10, beta_11, and beta_20
param_initial = [2 0.0625 0.25 -0.7]; % Initial parameters for optimization

% Case 1 (Exact identification)
fun = @(params)(moment_fun('exact_gn', r_valid, d_valid, [params(1) params(2) params(3) params(4)], beta_00, beta_01, beta_10, beta_11, beta_02, beta_20, ...
                            std_01, std_10, std_11, std_02, std_20)); % Objective function
options = optimoptions('fsolve', 'Algorithm', 'Levenberg-Marquardt', 'MaxFunctionEvaluations', 60000, 'MaxIterations', 6000); % Options for numerically solving g_n = 0
% Estimation
param_exact = fsolve(fun, param_initial, options); % This result correponds to the first column of Table 1 of the code guide file
% Asymptotic standard deviation
var_exact = asy_var('exact', r_valid, d_valid, param_exact, beta_00, beta_01, beta_10, beta_11, beta_02, beta_20, ...
                    std_01, std_10, std_11, std_02, std_20);
std_exact = diag(var_exact).^(1/2); % This result corresponds to the second column of Table 1 of the code guide file

% Case 2 (Over-identification)
% Estimation
param_over = TwoStepGMM(r_valid, d_valid, param_initial, beta_00, beta_01, beta_10, beta_11, beta_02, beta_20, ...
                        std_01, std_10, std_11, std_02, std_20); % This result corresponds to the third column of Table 1 of the code guide file
% Asymptotic standard deviation
var_over = asy_var('over', r_valid, d_valid, param_over, beta_00, beta_01, beta_10, beta_11, beta_02, beta_20, ...
                   std_01, std_10, std_11, std_02, std_20);
std_over = diag(var_over).^(1/2); % This result corresponds to the fourth column of Table 1 of the code guide file