function [ excess_returns, factors, mim_base_assets, mim_proj_var, pag, pagm ] = data_input( freq, alreadyin, testassets, model )
% This function reads in the data and delivers two essential variables
% excess returns (T x N (number of test assets)) and  factors (T x K 
% (number of factors)) for mimicking portfolio CDRM
% mim_base_assets:  the base assets onto with pg is projected
% mim_proj_var:     the patent growth variable to be projected

% data inputs are Complete_A and Complete_Q, the annual and quarterly data
% bases (supplied in .xls, .fmt and .mat   format)

mim_base_assets=[];
mim_proj_var=[];
%% 1.) read in data annually or quarterly

if freq==1                  % annual data

    if alreadyin==0         % reads in the data anew
        Complete_A=xlsread('Complete_A.xls',1,'A2:EX83');  
        save 'Complete_A.mat';
    elseif alreadyin==1     % uses matrix pre-stored in .m format
        load 'Complete_A.mat';
    end 

    mktrf=Complete_A(:,4);  % market return - risk-free rate  (net return)
    rf=Complete_A(:,3);     % risk-free rate                  (net return) 
    smb=Complete_A(:,6);    % SMB                             (net return)
    hml=Complete_A(:,5);    % HML                             (net return)
    pag=Complete_A(:,8);    % invention activity proxy: pg
    pagm=Complete_A(:,9);   % excess return of mimicking portfolio 
                            % estimated upfront
    base_assets=Complete_A(:,10:15);    
                            % FF base asset returns as not returns, not in
                            % excess of rf
    sb25=Complete_A(:,16:40);
                            % value-weighted 25 B/M sorted portfolios 
                            %                               (excess returns)
    sb25ew=Complete_A(:,41:65);
                            % equally-weighted 25 B/M sorts (excess returns)
    ind17ew=Complete_A(:,103:119);
                            % 17 ew industry portfolios     (excess returns)
    s10=Complete_A(:,135:144);             
                            % 10 size sorted portfolios     (value-weighted)
    b10=Complete_A(:,145:154);
                            % 10 B/M sorted portfolios      (value-weighted)
    
elseif freq==2              % quarterly data
   
    if alreadyin==0         % reads in data anew
        Complete_Q=xlsread('Complete_Q.xls',1,'A2:EN237');  
        save 'Complete_Q.mat';
    elseif alreadyin==1     % uses matrix pre-stored in .m format
        load 'Complete_Q.mat';
    end     
    
    mktrf=Complete_Q(:,5);  % market return - risk-free rate  (net return)
    rf=Complete_Q(:,4);     % risk-free rate                  (net return) 
    smb=Complete_Q(:,7);    % SMB                             (net return)
    hml=Complete_Q(:,6);    % HML                             (net return)
    pag=Complete_Q(:,8);    % invention activity proxy: pg

    base_assets=Complete_Q(:,9:14);    
                            % FF base asset returns as not returns, not in
                            % excess of rf
    sb25=Complete_Q(:,15:39);
                            % value-weighted 25 B/M sorted portfolios 
                            %                               (excess returns)
    sb25ew=Complete_Q(:,40:64);
                            % equally-weighted 25 B/M sorts (excess returns)
    ind17ew=Complete_Q(:,102:118);
                            % 17 ew industry portfolios     (excess returns)
    s10=Complete_Q(:,125:134);             
                            % 10 size sorted portfolios     (value-weighted)
    b10=Complete_Q(:,135:144);
                            % 10 B/M sorted portfolios      (value-weighted) 
   
   % Mimicking portfolio computation
   
   xmat=[ones(size(base_assets,1),1),base_assets-repmat(rf,[1,6])];
   mpweights=(xmat'*xmat)\(xmat'*pag);
   mpweights=mpweights(2:size(mpweights,1));
   pagm=(base_assets-repmat(rf,[1,6]))*mpweights;
                            
end

% For CAPM and CDRM, we want the market (net) return, not the excess return
mkt=mktrf+rf;

%% 2.) type of test assets

if testassets==1            % value-weighted size-B/M sorts
    exretmat=sb25;
elseif testassets==2        % equal-weighted size-B/M sorts
    exretmat=sb25ew;
elseif testassets==3        % industry augmented
    exretmat=[sb25,ind17ew];
elseif testassets==4     
    exretmat=[smb,hml];
elseif testassets==5        % single sort size  (value weighted)
    exretmat=s10;
elseif testassets==6        % single sort B/M   (value weighted)
    exretmat=b10;
end

%% 3.) type of model

if model==1                 % CAPM
    facmat0=mkt;                      % market return (not excess)
elseif model==2             % Fama-French three factors model
    facmat0=[mktrf,hml,smb];          % excess market return
elseif model==3             % CDRM
    facmat0=[mkt,pag];                % market return (not excess) and pag
elseif model==4             % MP-CDRM with estimated mimicking portfolio 
                            % excess returns
    facmat0=[mkt,pagm];     
elseif model==6 || model==7
    mim_base_assets=base_assets-repmat(rf,[1 size(base_assets,2)]);   
                                      % base assets onto with pag is 
                                      % projected
    mim_proj_var=pag;                 % patent growth variable to be 
                                      % projected  
    facmat0=mkt;                      % market (net) return 
elseif model==8             % an extended CDRM model with orthogonalized 
                            % HML and SMB w.r.t PAG
    x_HML=[ones(size(exretmat,1),1),mkt,pag];  
    b_HML=(x_HML'*x_HML)\x_HML'*hml;% regress HML on pag
    e_HML=hml-x_HML*b_HML;
    HML_ortho=e_HML+repmat(b_HML(1,1),[size(hml,1),1]);
          
    x_SMB=[ones(size(exretmat,1),1),mkt,pag];  
    b_SMB=(x_SMB'*x_SMB)\x_SMB'*smb;% regress SMB on pag
    e_SMB=smb-x_SMB*b_SMB;
    SMB_ortho=e_SMB+repmat(b_SMB(1,1),[size(smb,1),1]);
    
    facmat0=[mkt,pag,HML_ortho,SMB_ortho];
                                      % with orthogonalized HML and SMB
elseif model==9             % an extended CDRM model with orthogonalized 
                            % HML and SMB w.r.t PAGM
                            
                            % regress HML on pag and market
    x_HML=[ones(size(exretmat,1)),mkt,pagm];  
    b_HML=(x_HML'*X_HML)\X_HML'*x_HML;% regress HML on pag
    e_HML=HML-x_HML*b_HML;
    HML_ortho=e_HML+b_HML(1,1);
          
    x_SMB=[ones(size(exretmat,1)),mkt,pagm];  
    b_SMB=(x_SMB'*X_SMB)\X_SMB'*x_SMB;% regress SMB on pag
    e_SMB=SMB-x_SMB*b_SMB;
    SMB_ortho=e_SMB+b_SMB(1,1);

    facmat0=[mkt,pag,HML_ortho,SMB_ortho];
                                      % with orthogonalized HML and SMB
elseif model==10            % model with long-short high and low patenting 
                            % activity beta
    CDH=exretmat(:,21)-exretmat(:,5); % Creative Destruction hedge 
                                      % portfolio model
    % it is long in large growth stocks (negative invention beta) and 
    % short in small value stocks (large positive invention beta)                                  
    
    facmat0=[mkt,CDH];
end


%% 4.) Descriptive statistics

display('*************************************************');
display('* mean - stdc - AC(1)*100 patenting growth (pg) *');
display('*************************************************');
tem=lagmatrix(pag,1);
display([mean(pag)*100,std(pag)*100,corr(pag(2:size(pag,1)),tem(2:size(pag,1)))*100]);
display('*************************************************');
display('* mean - stdc - AC(1)*100 SMB                   *');
display('*************************************************');
tem=lagmatrix(smb,1);
display([mean(smb)*100,std(smb)*100,corr(smb(2:size(smb,1)),tem(2:size(smb,1)))*100]);
display('*************************************************');
display('* mean - stdc - AC(1)*100 HML                   *');
display('*************************************************');
tem=lagmatrix(hml,1);
display([mean(hml)*100,std(hml)*100,corr(hml(2:size(hml,1)),tem(2:size(hml,1)))*100]);
display('*************************************************');
display('* mean - stdc - AC(1)*100 r^W                   *');
display('*************************************************');
tem=lagmatrix(mkt,1);
display([mean(mkt)*100,std(mkt)*100,corr(mkt(2:size(mkt,1)),tem(2:size(mkt,1)))*100]);

if freq==1
    display('* mean - stdc - AC(1)*100 Mimicking Portfolio excess return *');
    tem=lagmatrix(pagm,1);
    display([mean(pagm)*100,std(pagm)*100,corr(pagm(2:size(pagm,1)),tem(2:size(pagm,1)))*100]);
    display('*************************************************');
    display('* correlation matrix risk factors * 100         *');
    display('*************************************************');
    display('* r^W, SMB, HML, patenting growth, mimicking portfolio  *');
    display(corr([mkt,smb,hml,pag,pagm])*100);
elseif freq==2
    display('* correlation matrix risk factors * 100         *');
    display('*************************************************');
    display('* r^W, SMB, HML, patenting growth               *');
    display(corr([mkt,smb,hml,pag])*100);
    
end



%% 5.) fill excess returns and factors

excess_returns=exretmat;

factors=facmat0;

end

