% Bootstrap_TwoPassGMM_JFQA_v2.m
% ========================================

% Bootstrapping "Creative Destruction and Asset Pricing results (second
% revision) along the lines of Maio and Santa Clara 2012, see Appendix of
% the paper

% We bootstrap cross-sectional R-squared, MAE t-stats of lambda and gamma
% estimates

% version: November 29, 2014
% ========================================
clear
clc

% 1. set here the model, test assets and sampling frequency
model=3;            % relevant choices:
                    % 1  = CAPM
                    % 2  = Fama French model
                    % 3  = CDRM (referred to as PAG in the code)
                    % 6  = MP-CDRM three-pass regression using GMM results,
                    %     including Lewellen/Wald test
                    % 8  = extended CDRM with orthogonalized HML and SMB
                    %     (w.r.t. patenting growth)
                    % 10 = CD hedge portfolio model
testassets=1;       % 1  = 25 size-B/M, vw
                    % 2  = 25 size-B/M, ew
                    % 3  = 25sb+ind17
                    % 6  = B/M single sort
freq=1;             % 1  = annual
                    % 2  = quarterly
                    
% In this paper, we report bootstrap results for model-testassets-freq
% 1-1-1
% 1-1-2
% 1-3-1
% 3-1-1
% 3-1-2
% 3-3-1
% 2-1-1
% 2-3-1
% 6-1-1
% 6-1-2
% 6-3-1

% Bootstrap filenames
fr='_freq_';
as='_assets_';
ma='.mat';
% for R2
bn1='bootstrap_r2_model_';
filename_r2=[bn1,num2str(model),fr,num2str(freq),as,num2str(testassets),ma];
% for lambda_t
bn2='bootstrap_lambda_tstats_model_';
filename_lambda_t=[bn2,num2str(model),fr,num2str(freq),as,num2str(testassets),ma];
% for lambdas
% implied gamma t-statistics (=SDF-b t-statistics) are written out
bn3='bootstrap_gamma_tstats_model_';
filename_gamma_t=[bn3,num2str(model),fr,num2str(freq),as,num2str(testassets),ma];
% for Fama-French, CDRM and MP-CDRM: implied ICAPM parameters
bn4='bootstrap_MAE_model_';
filename_MAE=[bn4,num2str(model),fr,num2str(freq),as,num2str(testassets),ma];

% for Fama-French, CDRM and MP-CDRM: implied ICAPM parameters
if model==2 || model==3 || model==4 || model==6
    bn5='bootstrap_rra_tstat_model_';
    filename_rra_t=[bn5,num2str(model),fr,num2str(freq),as,num2str(testassets),ma];
    if model==3 || model==4 || model==6
       bn6='bootstrap_gamma_N_tstat_model_';
       filename_gamma_N_t=[bn6,num2str(model),fr,num2str(freq),as,num2str(testassets),ma];
    end
end

% ========================================
no_samples=4999;        % bootstrap replications
run_boot=1;             % set to 1 if you want to run the bootstrap, set 
                        % to 0 if the output files are already there
% ========================================

if run_boot==1
   alreadyin=0;
   % data input
   [ excess_returns, factors, mim_base_assets, mim_proj_var, pag, pagm ] = data_input( freq, alreadyin, testassets, model );
   
   % for convenience, define
   T=size(excess_returns,1);
   N=size(excess_returns,2);
   K=size(factors,2);
   c_fac=[];

   rng(1112);           % random number replicator to replicate the results
   
   % initialize bootstrap files
   maio_r2=zeros(no_samples+1,1);
   MAE_vec=zeros(no_samples+1,1);
   tstats_lambda=zeros(no_samples+1,K);
   tstats_gamma=zeros(no_samples+1,K);
   if model==2 || model==3 || model==4 || model==6 || model==8 || model==10
      rra_t_vec=zeros(no_samples+1,1);      
                        % t-stat of rra coefficient implied by the beta rep
      if model==3 || model==4 || model==6
         gamma_N_t_vec=zeros(no_samples+1,1);
                        % t-stat of gamma_N for CDRM and MP_CDRM
      end  
   end
   
   % keep the original data
   excess_returns_keep=excess_returns;
   factors_keep=factors;
   
   if model==6          % mimicking portfolio two-pass GMM
      mim_proj_var_keep=mim_proj_var;
      mim_base_assets_keep=mim_base_assets;
      
      tstats_lambda=zeros(no_samples+1,K+1);
      lambdas=zeros(no_samples+1,K+1);
                        % projection factor is not a factor
      tstats_gamma=zeros(no_samples+1,K+1);                  
   end
   
   % 2. resample the data
   
   for j=1:no_samples+1
       % bootstrap indices for returns
       indexvec_er=ceil(rand(T,1)*T);
       excess_returns=excess_returns_keep(indexvec_er,:);
       % bootstrap indices for factors
       indexvec_f=ceil(rand(T,1)*T);
       factors=factors_keep(indexvec_f,:);
       
       if model==6
          mim_proj_var=mim_proj_var_keep(indexvec_f,:);
          mim_base_assets=mim_base_assets_keep(indexvec_f,:);
       end
       
       if j==no_samples+1   % for the final estimation use the real data
          excess_returns=excess_returns_keep;
          factors=factors_keep;
          if model==6
              mim_proj_var=mim_proj_var_keep;
              mim_base_assets=mim_base_assets_keep;
          end
       end
       
       % 3. run the two-pass regression as usual
       if model<=4 || model==8 || model==10
          % in order to run the two-pass regression/GMM all model 1-4 and 8
          % and 10 use this code block:
          
          % include the twopass_regression.m file
          two_pass_regression_GMM;        
                                
          % t-stats for the estimates have not yet been computed
          t=param_est_OLS./param_se_OLS;
          % lambdas
          lambdas_mat=param_est_OLS(N*(K+1)+1:size(param_est_OLS,1));
          lambdas_t_mat=t(N*(K+1)+1:size(param_est_OLS,1));
          % enter the bootstrap matrices
          tstats_lambda(j,:)=lambdas_t_mat';
          
          % report results
          display('*************************************************');
          display(' Estimated lambdas                               ');
          display(lambdas_mat);
          display(' lambdas t-stats                                 ');
          disp(lambdas_t_mat);
          display(' lambdas p-values in percent                     ');
          disp((1-normcdf(abs(lambdas_t_mat)))*100*2);
          display('*************************************************');
          
          % R-squared along the lines of Maio Santa-Clara 2012 and MAE
          mexret_fit=model_implied_expected_return;
          mexret=mean(excess_returns)';
          alpha_hat=mexret-mexret_fit;
          alpha_hat_demeaned=alpha_hat-mean(alpha_hat)';
          mexret_demeaned=mexret-mean(mexret)';
          
          rsq=1-((sum(alpha_hat_demeaned.^2))/(sum(mexret_demeaned.^2)));
          MAE=mean(abs(alpha_hat));
          
          display('*************************************************');
          display(' Mean absolute pricing error x 100                ');
          display(MAE*100);
          display('*************************************************');
          
          if model==2 || model==3 || model==4|| model==8 || model==10
             % ICAPM consistency based on beta-representation
             display(' Plausibility of the implied risk aversion coefficients');
             display(' of the ICAPM (Maio and Santa-Clara(2012))             ');
             % compare to equation 12 in CDRM paper
             gamma_vec=inv(1/T*(factors'*factors)-(mean(factors)')*mean(factors))*lambdas_mat;
             display(' rra estimate computed from beta_representation        ');
             rra=gamma_vec(1,1);
             disp(rra);
             
             vc_lambda=par_var(size(par_var,1)-K+1:size(par_var,1),size(par_var,1)-K+1:size(par_var,1));
             vc_gamma=inv(1/T*(factors'*factors)-(mean(factors)')*mean(factors))*(vc_lambda/T)*inv(1/T*(factors'*factors)-(mean(factors)')*mean(factors));
             se_gamma=sqrt(diag(vc_gamma));
             t_gamma=gamma_vec./se_gamma;
             
             display('*************************************************');
             rra_t=rra/se_gamma(1,1);
             display(' rra estimates t-stat                            ');
             display(rra_t);
             display(' rra estimates s.e.                              ');
             display(se_gamma(1,1));             
             display('*************************************************');
             
             % write in bootstrap files
             rra_t_vec(j,:)=rra_t;
             tstats_gamma(j,:)=t_gamma';
             if model==3 || model==4    % only for CDRM and MP-CDRM
                 gamma_N=gamma_vec(2,1);
                 gamma_N_t=gamma_vec(2,1)/se_gamma(2,1);
                 display(' gamma_N implied by beta representations ');
                 disp(strcat(' gamma_N        =  ',num2str(gamma_N)));
                 disp(strcat(' gamma_N s.e.   =  ',num2str(se_gamma(2,1))));
                 disp(strcat(' gamma_N t-stat =  ',num2str(gamma_N_t)));
                 %write in bootstrap file
                 gamma_N_t_vec(j,:)=gamma_N_t;
             end
          end
          
       elseif model==6
           analysis=7;
           c_fac=K+size(mim_proj_var,2);
           
           % estimates by OLS mimicking weights, betas, and lambdas
           [ startvals ] = get_parameters( mim_base_assets, mim_proj_var, analysis, excess_returns, factors );
           param=startvals;
           [ Avar_g_T,Avar_param ] = var_GMM( param, analysis, excess_returns, factors, factor_type, c_fac, mim_proj_var, mim_base_assets );
           [ betalambda ] = get_beta_lambda( param, analysis, excess_returns, factors, c_fac );
           % needed for implied expected return
           t_stat=param./sqrt(diag(Avar_param)/size(mim_proj_var,1));
           lambda_cs_market_tstat=t_stat(N*(1+K+size(mim_proj_var,2))+1:N*(1+K+size(mim_proj_var,2))+1);
           lambda_cs_pagm_tstat=t_stat(N*(1+K+size(mim_proj_var,2))+2:N*(1+K+size(mim_proj_var,2))+2);
           
           % extract lambda_W and lambda_N from the parameter vector, call
           % it lambdas_mat
           lambdas_mat=param(N*(1+K+size(mim_proj_var,2))+1:N*(1+K+size(mim_proj_var,2))+2);
           % extract the mimicking portfolio parameters
           mim_par=param(N*(1+c_fac)+2*c_fac+size(mim_proj_var,2)+1:N*(1+c_fac)+2*c_fac+size(mim_proj_var,2)+size(mim_base_assets,2));
           % construct the vector of factors: market return and mimicking
           % portfolio, call it factors_mat
           mim_pag=mim_base_assets*mim_par;
           factors_mat=[factors,mim_pag];
           % extract the variance covariance sub-matrix of the lambdas
           vc_lambda=Avar_param(N*(1+K+size(mim_proj_var,2))+1:N*(1+K+size(mim_proj_var,2))+2,N*(1+K+size(mim_proj_var,2))+1:N*(1+K+size(mim_proj_var,2))+2);
           
           % compute implied gamma from beta representation seee Eqn (12)
           % in the paper
           display(' RRA and gamma_N recovered from beta estimates ');
           gamma_vec=inv(1/T*(factors_mat'*factors_mat)-(mean(factors_mat)')*mean(factors_mat))*lambdas_mat;
           % VC matrix of gamma estimates: see Appendix A-1 in the paper
           vc_gamma=inv(1/T*(factors_mat'*factors_mat)-(mean(factors_mat)')*mean(factors_mat))*(vc_lambda/T)*inv(1/T*(factors_mat'*factors_mat)-(mean(factors_mat)')*mean(factors_mat));
           se_gamma=sqrt(diag(vc_gamma));
           t_gamma=gamma_vec./se_gamma;
           
           display('*************************************************');
           display(' rra estimate computed from beta representation  ');
           rra=gamma_vec(1,1);
           display(rra);
           display(' rra estimates t-stat                            ');
           rra_t=rra/se_gamma(1,1);
           display(rra_t);                        
           display(' rra estimates s.e.                              ');
           display(se_gamma(1,1));             
           display('*************************************************');
           
           % write to bootstrap file
           rra_t_vec(j,:)=rra_t;
           tstats_gamma(j,:)=t_gamma';
           gamma_N_t=gamma_vec(2,1)/se_gamma(2,1);
           
           display(' gamma_N implied by beta representations        ');
           disp(strcat(' gamma_N        =  ',num2str(gamma_vec(2,1))));
           disp(strcat(' gamma_N s.e.   =  ',num2str(se_gamma(2,1))));
           disp(strcat(' gamma_N t-stat =  ',num2str(gamma_N_t)));
           
           % write to bootstrap file
           gamma_N_t_vec(j,:)=gamma_N_t;
           
           % compute mean realized excess returns and average predicted
           % excess returns
           model_implied_expected_return=sum(betalambda);
           mexret_fit=model_implied_expected_return;
           % R-squared along the lines of Maio Santa-Clara 2012 and MAE
           mexret=mean(excess_returns)';
           alpha_hat=mexret-mexret_fit;
           alpha_hat_demeaned=alpha_hat-mean(alpha_hat)';
           mexret_demeaned=mexret-mean(mexret)';
          
           rsq=1-((sum(alpha_hat_demeaned.^2))/(sum(mexret_demeaned.^2)));
           MAE=mean(abs(alpha_hat));
           
           display('*************************************************');
           display(' Mean absolute pricing error x 100               ');
           display(MAE*100);
           display('*************************************************');
           
           % compute a J-type/GRS statistic for the validity of the asset
           % pricing model
           [ J_last_N, p_value_J_last_N, g_T_last_N, t_stats_last_N, corr_g_T, Avar_g_T_last_N ] = Jstat_GRS( param, analysis, excess_returns, factors, factor_type, c_fac, mim_proj_var, mim_base_assets );
           
           display('*************************************************');
           display(' J/Shanken statistic from GMM                    ');
           display(J_last_N);
           disp(strcat(' p-value =  ',num2str(p_value_J_last_N)));
           display('*************************************************');
           display(' Avg. pricing errors (g_T) and t-stats           ');
           disp([g_T_last_N,t_stats_last_N]);
           display('*************************************************');
           
           % write bootstrapped lambdas and t-stats
           tstats_lambda(j,:)=[lambda_cs_market_tstat,lambda_cs_pagm_tstat];
       end
       
       maio_r2(j)=rsq;      % bootstrapped R^2_cs
       MAE_vec(j)=MAE;      % bootstrapped MAE
       
   end
   
   % 4. save the data
   
   save(filename_r2,'maio_r2');
   save(filename_MAE,'MAE_vec');
   save(filename_lambda_t,'tstats_lambda');
   save(filename_gamma_t,'tstats_gamma');
   
   % for Fama-French, CDRM and MP-CDRM: implied ICAPM parameters
   if model==2 || model==3 || model==4 || model==6
      save(filename_rra_t,'rra_t_vec'); 
      if model==3 || model==4 || model==6
         save(filename_gamma_N_t,'gamma_N_t_vec'); 
      end
   end
   
end

% from here the code could be run separately

display('*************************************************');
display(' Analysis of bootstrap results                   ');
disp(strcat(' model      =  ',num2str(model)));
disp(strcat(' testassets =  ',num2str(testassets)));
disp(strcat(' frequency  =  ',num2str(freq)));
display('*************************************************');

load(filename_r2);
load(filename_lambda_t);
load(filename_gamma_t);
load(filename_MAE);

% for Fama-French, CDRM and MP-CDRM: implied ICAPM parameters
if model==2 || model==3 || model==4 || model==6
  load(filename_rra_t); 
  if model==3 || model==4 || model==6
     load(filename_gamma_N_t); 
  end
end

% bootstrap p-values for the lambdas
out1=abs(tstats_lambda)>abs(repmat(tstats_lambda(no_samples+1,:),[size(tstats_lambda,1),1]));
pval_tstat_lambda=sum(out1)/no_samples*100;

display('*************************************************');
display(' bootstrap p-values in percent for lambdas       ');
disp(pval_tstat_lambda);
display('*************************************************');

% bootstrap p-values for the implied gammas
out1=abs(tstats_gamma)>abs(repmat(tstats_gamma(no_samples+1,:),[size(tstats_gamma,1),1]));
pval_tstat_gamma=sum(out1)/no_samples*100;

display('*************************************************');
display(' bootstrap p-values in percent for model implied gammas  ');
disp(pval_tstat_gamma);
display('*************************************************');

% bootstrap for RRA and gamma_N t-stats as implied by beta-representation
if model==2 || model==3 || model==4 || model==6
   out2=abs(rra_t_vec)>abs(repmat(rra_t_vec(no_samples+1,:),[size(rra_t_vec,1),1]));
   pval_rra_t=sum(out2)/no_samples*100;
   
   display('*************************************************');
   display(' bootstrap p-values in percent for model implied RRA  ');
   disp(pval_rra_t);
   display('*************************************************');
   
   if model==3 || model==4 || model==6
      out3=abs(gamma_N_t_vec)>abs(repmat(gamma_N_t_vec(no_samples+1,:),[size(gamma_N_t_vec,1),1]));
      pval_gamma_N_t=sum(out3)/no_samples*100;
      
      display('*************************************************');
      display(' bootstrap p-values in percent for model implied gamma_N  ');
      disp(pval_gamma_N_t);
      display('*************************************************');
   end
end

% p-value for R^2 in percent
out4=maio_r2>repmat(maio_r2(no_samples+1,:),[size(maio_r2,1),1]);
pval_maio_r2=sum(out4)/no_samples*100;
display('*************************************************');
display(' bootstrapped p-value of R^2 in percent          ');
disp(pval_maio_r2);
display('*************************************************');

% p-value for MAE in percent
out5=MAE_vec>repmat(MAE_vec(no_samples+1,:),[size(MAE_vec,1),1]);
pval_MAE=sum(out5)/no_samples*100;
display('*************************************************');
display(' bootstrapped p-value of MAE in percent          ');
disp(pval_MAE);
display('*************************************************');