 /* Bootstrap_TwoPassGMM_JFQApercemo.prg
    =============================
Bootstrapping "Creative Destruction and Asset Pricing results 
(second revision) along the lines of Maio and Santa Clara 2012
see Appendix A.2 in the paper
We bootstrap cross-sectional R-squared, MAE t-stats of lamdba and gamma estimates
results are printed on the screen 
This version: November 29, 2014
*/

/***********************************************************************/
library pgraph; graphset; //A base GAUSS version is sufficient
#include GMM_NCB_Collection_JFQA_v2.src; //include procedures file
/***********************************************************************/
//1. Set here the model, test assets and sampling frequency
model=3;      /* relevant choices are:
                1=CAPM
                2=Fama French model
                3=CDRM (referred to as PAG in the code)
                6=MP-CDRM three-pass regression using GMM results, including Lewellen/Wald test 
		8=extended CDRM with orthogonalized HML and SMB (w.r.t patenting growth)
		10=CD hedge portfolio model*/
/***********************************************************************/
testassets=1; //1= 25 size-B/M, vw; 2= 25 size-B/M, ew: 3= 25sb+ind17, 6= b/M single sort
freq =1 ;     //1 = annual, 2 = quarterly 
/***********************************************************************/
/* In the paper we report bootstrap results for model-testassets-freq
3-1-1
3-1-2
3-3-1
2-1-1
2-1-1
2-3-1
6-1-1
6-1-2
6-3-1
1-1-1
1-1-2
1-3-1
*/

//Bootstrap filenames
//for R2
filename_r2 = "bootstrap_r2_model_" $+ ftos(model,"%*.*lf",1,0) $+ "_freq_" $+ ftos(freq,"%*.*lf",1,0) $+ "_assets_" $+ ftos(testassets,"%*.*lf",1,0);
//for lambda_t
filename_lambda_t = "bootstrap_lambda_tstats_model_" $+ ftos(model,"%*.*lf",1,0) $+ "_freq_" $+ ftos(freq,"%*.*lf",1,0) $+ "_assets_" $+ ftos(testassets,"%*.*lf",1,0);
//for lambdas
//Implied gamma t-statistics (=- SDF-b t-statistics) are written out
filename_gamma_t = "bootstrap_gamma_tstats_model_" $+ ftos(model,"%*.*lf",1,0) $+ "_freq_" $+ ftos(freq,"%*.*lf",1,0) $+ "_assets_" $+ ftos(testassets,"%*.*lf",1,0);
filename_MAE = "bootstrap_MAE_model_" $+ ftos(model,"%*.*lf",1,0) $+ "_freq_" $+ ftos(freq,"%*.*lf",1,0) $+ "_assets_" $+ ftos(testassets,"%*.*lf",1,0);
//for Fama-French, CDRM and MP-CDRM: implied ICAPM parameters
if model==2 or model==3 or model==4 or model == 6;
    filename_rra_t="bootstrap_rra_tstat_model_" $+ ftos(model,"%*.*lf",1,0) $+ "_freq_" $+ ftos(freq,"%*.*lf",1,0) $+ "_assets_" $+ ftos(testassets,"%*.*lf",1,0);
    if model==3 or model==4 or model==6;//only for CDRM and MP_CDRM
        filename_gamma_N_t="bootstrap_gamma_N_tstat_model_" $+ ftos(model,"%*.*lf",1,0) $+ "_freq_" $+ ftos(freq,"%*.*lf",1,0) $+ "_assets_" $+ ftos(testassets,"%*.*lf",1,0);
    endif;
endif;

no_samples =4999; //boostrap replications
run_boot=1; //set to one if you want to run the bootstrap, set to zero if output files are already there

/***********************************************************************/
if run_boot==1;//if you want to run the bootstrap
    nlsys_dummy=0; //do not change
    #include data_input_GMM_all.run;//include data input procedure
    seed=1112; //random generator seed to replicate results
    
    // Initialize bootstrap files
    maio_r2=zeros(no_samples+1,1);
    MAE_vec=zeros(no_samples+1,1);
    tstats_lambda=zeros(no_samples+1,cols(factors));
    if model==2 or model==3 or model==4 or model==6 or model==8 or model==9 or model==10;
        rra_t_vec=zeros(no_samples+1,1); //t-stat of rra coefficient implied by beta rep
        if model==3 or model==4 or model==6;
            gamma_N_t_vec=zeros(no_samples+1,1); //and t-stat of gamma_N for CDRM and MP-CDRM
        endif;
    endif;
    tstats_gamma=zeros(no_samples+1,cols(factors));
    
    excess_returns_keep=excess_returns; //keep the original data
    factors_keep=factors;
    if model==6;//mimicking portfolio two-pass GMM
        mim_proj_var_keep=mim_proj_var; 
        mim_base_assets_keep=mim_base_assets;
        tstats_lambda=zeros(no_samples+1,cols(factors)+1);//see next line
        lambdas=zeros(no_samples+1,cols(factors)+1);//projection factor is not in factors
        tstats_gamma=zeros(no_samples+1,cols(factors)+1);//same for implied gammas
    endif;
    // Resample the data
    for j(1,no_samples+1,1);
    //bootstrap indices for returns
    indexvec_er=ceil(rndus(rows(excess_returns_keep),1,seed)*rows(excess_returns_keep));
    excess_returns=excess_returns_keep[indexvec_er,.];
    //bootstrap indices for factors
    indexvec_f=ceil(rndus(rows(factors_keep),1,seed)*rows(factors_keep));
    factors=factors_keep[indexvec_f,.];

    if model ==6; //for mimicking pf with GMM
        mim_proj_var=mim_proj_var_keep[indexvec_f,.];
        mim_base_assets=mim_base_assets_keep[indexvec_f,.];
    endif;

    if j==no_samples+1; //for the final estimation use the real data
        excess_returns=excess_returns_keep;
        factors=factors_keep;
        if model==6;
            mim_proj_var=mim_proj_var_keep; 
            mim_base_assets=mim_base_assets_keep;
        endif;
    endif;

    //now run as usual run the two-pass regression
    if model .le 4 or model==8 or model==9 or model==10;
        // run the two-pass regression/GMM  all models 1-4 and 9 and 10 use this 
        #include Two_Pass_Regression_GMM_JFQA.run; // code block for all models
        N=cols(excess_returns); //Define for convenience
        K=cols(factors);
        // t-stats for estimates have not yet been computed we do it here ..
        t=param_est_ols./param_se_OLS;//all t-stats
        //lambdas
        lambdas_mat=param_est_ols[cols(excess_returns)*(cols(factors)+1)+1:rows(param_est_ols)];
        lambdas_t_mat=t[cols(excess_returns)*(cols(factors)+1)+1:rows(param_est_ols)];
        //Enter the bootstrap matrices
        tstats_lambda[j,.]=lambdas_t_mat';
        //Report results
        "******************";
        "Estimated lambdas";;
        lambdas_mat;
        "Lambdas t-stats";;
        lambdas_t_mat;
        "Lambdas p-values in %";;
        (1-cdfn(abs(lambdas_t_mat)))*100*2;
        "******************";
    
        //R-squared along the lines of Maio Santa-Clara 2012 and MAE
        mexret_fit = model_implied_expected_return;
        mexret = meanc(excess_returns);
        alpha_hat=mexret-mexret_fit;
        alpha_hat_demeaned=alpha_hat-meanc(alpha_hat);
        mexret_demeaned=mexret-meanc(mexret);
        rsq=1-((sumc(alpha_hat_demeaned.^2))/(sumc(mexret_demeaned.^2)));
        MAE=meanc(abs(alpha_hat));
	"****************************************************";
	"Mean absolute pricing error x 100 :  ";MAE*100;
	"****************************************************";
          
        if model==2 or model==3 or model==4 or model == 8 or model == 9 or model == 10; // ICAPM consistency based on beta-representation
            "Plausibility of the implied risk aversion coefficients of the ICAPM (Maio and Santa Clara 2012)";
            gamma_vec=invpd(1/rows(factors)*(factors'*factors)-meanc(factors)*(meanc(factors))')*lambdas_mat; 
            //compare previous line to eqn 12 in CDRM paper!!!
            "rra estimate computed from beta-representation ";; gamma_vec[1];
            vc_lambda=par_var[rows(par_var)-cols(factors)+1:rows(par_var),rows(par_var)-cols(factors)+1:rows(par_var)];
            vc_gamma=invpd(1/rows(factors)*(factors'*factors)-meanc(factors)*(meanc(factors))')*
            (vc_lambda/(rows(factors)))*invpd(1/rows(factors)*(factors'*factors)-meanc(factors)*(meanc(factors))');
            se_gamma=sqrt(diag(vc_gamma));
            t_gamma=gamma_vec./se_gamma;//gamma t-statistics
            "******************************************************";
            rra=gamma_vec[1]; "rra ";rra;
            "rra estimate's t-stat. "; rra_t=rra/se_gamma[1]; rra_t;
            "rra estimate's s.e. "; se_gamma[1];
            //Write in bootstrap files
            rra_t_vec[j,.]=rra_t;
            tstats_gamma[j,.]=t_gamma';
            if model==3 or model==4; //only for CDRM and MP-CDRM
                gamma_N=gamma_vec[2];
                gamma_N_t=gamma_vec[2]/se_gamma[2];
                "gamma_N implied by beta representations";
                "gamma_N ";; gamma_vec[2];
                "gamma_N s.e.  ";;se_gamma[2];;"gamma_N t-stat ";;gamma_vec[2]/se_gamma[2];
                //Write in bootstrap files
                gamma_N_t_vec[j,.]=gamma_N_t;
                ;
            endif; // model 3 or 4
        endif; //model 2,3,4,8,9 10     
 
    elseif model==6;
        analysis=7; //do not change
        GLS=0; // do not change
        c_fac=cols(factors)+cols(mim_proj_var);
        {startvals}=get_params; // Estimates by OLS mimicking weights, betas, and lambdas
        param=startvals;
        {Avar_g_T,Avar_param}=var_GMM(param);        
        {betalambda}=beta_lambda(param);//needed for implied expected return
        t_stat=param./sqrt(diag(Avar_param)/rows(mim_proj_var));
        lambda_cs_market_tstat=t_stat[cols(excess_returns)*(1+cols(factors)+cols(mim_proj_var))+1:cols(excess_returns)*(1+cols(factors)+cols(mim_proj_var))+1];
        lambda_cs_pagm_tstat=t_stat[cols(excess_returns)*(1+cols(factors)+cols(mim_proj_var))+2:cols(excess_returns)*(1+cols(factors)+cols(mim_proj_var))+2];
      
        //extract lambda_W and lambda_~N from parameter vector, call it lambdas_mat
        lambdas_mat=param[cols(excess_returns)*(1+cols(factors)+cols(mim_proj_var))+1:cols(excess_returns)*(1+cols(factors)+cols(mim_proj_var))+2];
        //extract the mimicking portfolio parameters 
        mim_par=param[cols(excess_returns)*(c_fac+1)+2*c_fac+cols(mim_proj_var)+1:cols(excess_returns)*(c_fac+1)+2*c_fac+cols(mim_proj_var)+cols(mim_base_assets)];
        //construct the vector of factors: market return and mimicking portfolio, call it factors
        mim_pag=mim_base_assets*mim_par;
        factors_mat=factors~mim_pag;
        //extract the variance covariance sub-matrix of the lambdas
        vc_lambda=Avar_param[cols(excess_returns)*(1+cols(factors)+cols(mim_proj_var))+1:cols(excess_returns)*(1+cols(factors)+cols(mim_proj_var))+2, cols(excess_returns)*(1+cols(factors)+cols(mim_proj_var))+1:cols(excess_returns)*(1+cols(factors)+cols(mim_proj_var))+2];
        //compute implied gamma form beta representation see Eqn (12) in the paper
        "RRA and gamma_N recovered from beta estimates";
        gamma_vec=invpd(1/rows(factors_mat)*(factors_mat'*factors_mat)-meanc(factors_mat)*(meanc(factors_mat))')*lambdas_mat; 
        //VC matrix of gammma estimates: see appendix A-1 in the paper
        vc_gamma=invpd(1/rows(factors_mat)*(factors_mat'*factors_mat)-meanc(factors_mat)*(meanc(factors_mat))')*
        (vc_lambda/(rows(factors_mat)))*invpd(1/rows(factors_mat)*(factors_mat'*factors_mat)-meanc(factors_mat)*(meanc(factors_mat))');
        se_gamma=sqrt(diag(vc_gamma));
        t_gamma=gamma_vec./se_gamma;
        "******************************************************";
        "rra estimate computed from beta-representation ";; rra=gamma_vec[1]; rra;
        "rra estimate's t-stat. "; rra_t=rra/se_gamma[1]; rra_t;
        "rra estimate's s.e. "; se_gamma[1];
        //Write out for bootstrap
        rra_t_vec[j,.]=rra_t;
        tstats_gamma[j,.]=t_gamma';//all gammas t-statistics
        gamma_N_t=gamma_vec[2]/se_gamma[2];//gamma_N t-stat
        "gamma_N implied by beta representations";
        "gamma_N ";;gamma_vec[2];
        "gamma_N s.e.  ";;se_gamma[2];;"gamma_N t-stat ";;gamma_vec[2]/se_gamma[2];
        //Write to bootstrap file
        gamma_N_t_vec[j,.]=gamma_N_t;
                
        // Compute mean realized excess returns and average predicted excess returns
        mexret=meanc(excess_returns);
        model_implied_expected_return=sumc(betalambda);
        //R-squared along the lines of Maio Santa-Clara 2012
        mexret_fit = model_implied_expected_return;
        mexret = meanc(excess_returns);
        alpha_hat=mexret-mexret_fit;
        alpha_hat_demeaned=alpha_hat-meanc(alpha_hat);
        mexret_demeaned=mexret-meanc(mexret);
        rsq=1-((sumc(alpha_hat_demeaned.^2))/(sumc(mexret_demeaned.^2)));
        MAE=meanc(abs(alpha_hat));
	"****************************************************";
	"Mean absolute pricing error x 100 :  ";MAE*100;
	"****************************************************";
        /* Compute a J-type/GRS statistic for the validity of the asset pricing model */
        {J_last_N, p_value_J_last_N,g_T_last_N,t_stat_last_N,corr_g_T,var_g_T}=Jstat_GRS(param);
        "****************************************";
        "J/Shanken-statistic from GMM"; J_last_N;
        "p-value"; p_value_J_last_N;
        "****************************************";
        "Avg. pricing errors (g_T)and t-stats"; g_T_last_N~t_stat_last_N;   
        "****************************************";
        //write bootstrapped lambdas and t-stats
        tstats_lambda[j,.]=lambda_cs_market_tstat~lambda_cs_pagm_tstat;
    endif; /* ends model distinction */
    "******************************************************";
    maio_r2[j]=rsq; //bootstrapped R^2_cs write out
    MAE_vec[j]=MAE; //bootstrapped MAE write out
endfor; //bootstrap loop

 save path=.\ ^filename_r2=maio_r2; 
 save path=.\ ^filename_MAE=MAE_vec;
 save path=.\ ^filename_lambda_t=tstats_lambda;
 save path=.\ ^filename_gamma_t=tstats_gamma;
 
//for Fama-French, CDRM and MP-CDRM: implied ICAPM parameters
//bit redundant as these are the gamma tstats..
 if model==2 or model==3 or model==4 or model == 6;
    save path=.\ ^filename_rra_t=rra_t_vec;
    if model==3 or model==4 or model==6;//only for CDRM and MP_CDRM
        save path=.\ ^filename_gamma_N_t=gamma_N_t_vec;
    endif;
 endif;
endif; //if run_boot=1

// from here could be run seperately if needed
"*******************Analyse bootstrap results***************";
"Model: ";;model;
"Testassets: ";;testassets;
"Frequency: 1=annual, 2=quarterly ";freq;
"****************************************";

loadm maio_r2 = ^filename_r2;
loadm tstats_lambda = ^filename_lambda_t;
loadm tstats_gamma = ^filename_gamma_t;
loadm MAE_vec=^filename_MAE;

if model==2 or model==3 or model==4 or model == 6;//Fama-French, CDRM and MP-CDRM
    loadm rra_t_vec=^filename_rra_t;    
    if model==3 or model==4 or model==6;//only for CDRM and MP_CDRM
         loadm gamma_N_t_vec=^filename_gamma_N_t;
    endif;
endif;

//bootstrap p-values for the lambdas
out1=abs(tstats_lambda).>abs(tstats_lambda[no_samples+1,.]);
pval_tstat_lambda=sumc(out1)/no_samples*100;
"****************************************";
"bootstrap P-values in percent for lambdas";
pval_tstat_lambda;
"****************************************";
//bootstrap p-values for the implied gammas
out1=abs(tstats_gamma).>abs(tstats_gamma[no_samples+1,.]);
pval_tstat_gamma=sumc(out1)/no_samples*100;
"****************************************";
"bootstrap P-values in percent for model-implied gammas";
pval_tstat_gamma;
"****************************************";
//boostrap for RRA and gamma_N t-stats as implied by beta-representation
if model==2 or model==3 or model==4 or model == 6;//Fama-French, CDRM and MP-CDRM
    out2=abs(rra_t_vec).>abs(rra_t_vec[no_samples+1,.]);
    pval_rra_t=sumc(out2)/no_samples*100;
    "****************************************";
    "bootstrap P-val. in percent for model-implied RRA:";
    pval_rra_t;
    "****************************************";
    if model==3 or model==4 or model==6;//for CDRM and MP_CDRM
        out3=abs(gamma_N_t_vec).>abs(gamma_N_t_vec[no_samples+1,.]);
        pval_gamma_N_t=sumc(out3)/no_samples*100;
        "****************************************";
        "bootstrap P-val. in percent for model-implied gamma_N:";
        pval_gamma_N_t;
    "****************************************";     
    endif;
endif;

// p-value for R2 in percent
out4=maio_r2.>maio_r2[no_samples+1];
"bootstrapped p-value of R2 in percent";
meanc(out4)*100;
"****************************************";
// p-value for MAE in percent
out6=MAE_vec.<MAE_vec[no_samples+1];
"bootstrapped p-value of MAE in percent";
meanc(out6)*100;
"****************************************";
