%macro specreg(data=,depvar=,specreg=,regendo=,regexo=,instr=,density=,trim=,het=,hetvar=,ilpm=,nboot=);
/* Author: Nicolas Moreau, University of Reunion Island, January 2017 */

options nodate nonumber; /* To remove the default SAS date and page number */
ods noproctitle;         /* To remove SAS procedure title */

data begtime;
begtime=time();          /* Computation time */

/************************************************************************/
/*                       Descriptive statistics                         */
/************************************************************************/

title "Desccriptive statistics";
proc means data=&data N Nmiss min P10 Q1 Median Mean Std Q3 P90 max maxdec=2;
var &depvar &specreg &regendo &regexo &instr;
run;
title ;

proc iml;
 
/*************************************************************************/
/*                     Error messages and warnings                       */
/*************************************************************************/

error=0;
%if &depvar= %then %do;
 print "ERROR: no dependent variable specified, estimation procedure stopped";
 error=error+1;
%end;
%else %do;
 use &data;
 read all var {&depvar} into d;
 close &data;
 call sortndx(ndx,d,1);
 nbvalues_d=nrow(uniqueby(d,1,ndx));
 min_d=min(d);
 max_d=max(d);
 pb=(nbvalues_d ^=2 | min_d ^=0 | max_d ^=1);
 error=error+pb;
 if pb=1 then print "ERROR: the dependent variable &depvar is not binary 0 1, estimation procedure stopped";
%end;
%if &specreg= %then %do;
 print "ERROR: no special regressor specified, estimation procedure stopped";
 error=error+1;
%end;
%if (&regexo= & &regendo=) %then %do;
 print "ERROR: no regressors specified, estimation procedure stopped";
 error=error+1;
%end;
%if (&regexo^= & &regendo=) %then %do;
 print "REMARK: no endogenous regressors specified, OLS estimation instead";
%end;
%if &instr= %then %do;
 print "REMARK: no instruments specified";
%end;
%if (&instr^= & &regendo^=) %then %do;
 %let nbz=%sysfunc(countw(&instr)); 
 %let nbxendo=%sysfunc(countw(&regendo)); 
 %put There are &nbz excluded instruments specified;
 %put There are &nbxendo endogenous regressors specified;
 if (&nbz<&nbxendo) then do;
  print "ERROR: the number of instruments is less than the number of endogenous regressors, estimation procedure stopped"; 
  error=error+1;
 end;
%end;
%if (&instr= & &regendo^=) %then %do;
 error=error+1;
 print "ERROR: the number of endogenous regressors is positive but not any instruments specified, estimation procedure stopped"; 
%end;
call symputx('error',error);
%put The number of specification errors detected is &error;

/***********************************************************************************************/
/* Estimation of "bhat", the estimated coefficients of S in an OLS linear regression of V on S */
/* + construction of the vector of residuals "uhat" from this regression                       */        
/***********************************************************************************************/

start uhat_;
N=nrow(v);
ncolS=ncol(S);                  /* Number of colums */
ss=J(ncolS+1,ncolS+1,0);        /* Add the constant term not included yet */
sv=J(ncolS+1,1,0);
do i=1 to N;
 ss=ss+(1||s[i,])`*(1||s[i,]);  /* Include the constant term in S */
 sv=sv+(1||s[i,])`*v[i];
end;
bhat=inv(ss)*sv; 
uhat=J(N,1,0);                  /* The residuals "uhat" from this regression */ 
do i=1 to N;
 uhat[i]=v[i]-(1||s[i,])*bhat;
end;
finish;

/*********************************************************/
/* Evaluate fhat(i) the nonparametric density of uhat(i) */
/*********************************************************/

 /* With density=1 and density=2, direct evaluation of the kernel density at any point "i" involves N kernel evaluations, requiring N*N kernel evaluations.
    With density=3 and density=4, direct evaluation of the kernel density at any point "i" involves 401 kernel evaluations from a grid, requiring N*401 
    kernel evaluations. It significantly decreases calculation time for large N. */ 

/* density= 1 and density=3: Standard Normal Kernel (mean=0, standard deviation=1) with Silverman's rule of thumb for bandwidth selection  
   density= 2 and density=4: Epanechnikov Kernel with Silverman's rule of thumb for bandwidth selection
   The default is density=, the sorted data density estimator of Lewbel (2000) and Lewbel and Schennach (2007) */

start fhat_;
%if &density=1 %then %do;
 quartile=quartile(uhat);
 IQ=(quartile[4]-quartile[2]);        /* Interquartile range */
 free quartile;
 std=std(uhat);                       /* Standard deviation */
 h=0.9#min(std,IQ/1.34)#N##(-0.2);    /* Silverman's rule of thumb */
 fhat=0*N;
 do i=1 to N;
  fhat=fhat+PDF('NORMAL',(uhat[i]-uhat)/h,0,1)/(N#h); /* Standard nonparametric kernel density estimator */
 end;
%end;
%else %if &density=2 %then %do;
 quartile=quartile(uhat);
 IQ=(quartile[4]-quartile[2]); /* Interquartile range */
 free quartile;
 std=std(uhat);
 fhat=0*N;
 h=1.159#(15##0.2)#min(std,IQ/1.34)#N##(-0.2);     /* Silverman's rule of thumb */
 do i=1 to N;
  fhat=fhat+0.75#(1-((uhat[i]-uhat)/h)##2)/(N#h)#(abs((uhat[i]-uhat)/h)<1); /* Epanechnikov (quadratic) nonparametric kernel density estimator */
 end;
%end;
%else %if &density=3 %then %do;
 fhat=J(N,1,0);
 quartile=quartile(uhat);
 IQ=(quartile[4]-quartile[2]);           /* Interquartile range */
 free quartile;
 std=std(uhat);
 h=0.9#min(std,IQ/1.34)#N##(-0.2);       /* Silverman's rule of thumb */
 gridl=min(uhat);                        /* The lowest value of the grid is the minimum observed value for uhat */
 gridu=max(uhat);                        /* The highest value of the grid is the maximum observed value for uhat */
 grid=do(gridl,gridu,(gridu-gridl)/400); /* Grid with equally 401 spaced bin centers. at */ 
 loc=1:ncol(grid); 
 mindiff=J(N,ncol(grid),0);
 do i=1 to N;
  difference=abs(uhat[i]-grid);            /* Evaluate for uhat(i) the 401 differences with the grid points */
  mindiffi=(difference=difference[,><]);   /* Assign uhat(i) to the nearest grid point (the min distance) */ 
  randomloc=sample(loc(mindiffi=1),1);     /* Ties are randomly assigned to one and only one grid point */
  mindiff[i,]=mindiffi#(loc=randomloc);
 end;
 count=mindiff[+,];                         /* Calculate the number of observations assigned to each grid point (bin counts) */ 
 do i=1 to N;
  kde=count#PDF('NORMAL',(uhat[i]-grid)/h,0,1)/(N#h);
  fhati=kde[,+];
  fhat[i]=fhati;
 end;
%end;
%else %if &density=4 %then %do;
 fhat=J(N,1,0);
 quartile=quartile(uhat);
 IQ=(quartile[4]-quartile[2]); /* Interquartile range */
 free quartile;
 std=std(uhat);
 h=1.159#(15##0.2)#min(std,IQ/1.34)#N##(-0.2);   
 gridl=min(uhat);
 gridu=max(uhat);
 grid=do(gridl,gridu,(gridu-gridl)/400);
 loc=1:ncol(grid); 
 mindiff=J(N,ncol(grid),0);
 do i=1 to N;
  difference=abs(uhat[i]-grid);            /* Evaluate for uhat(i) the 401 differences with the grid points */
  mindiffi=(difference=difference[,><]);   /* Assign uhat(i) to the nearest grid point */ 
  randomloc=sample(loc(mindiffi=1),1);     /* Ties are randomly assigned to one and only one grid point */
  mindiff[i,]=mindiffi#(loc=randomloc);
 end;
 count=mindiff[+,];                        /* Calculate the number of observations assigned to each grid point (bin counts) */ 
 do i=1 to N;
  kde=count#0.75#(1-((uhat[i]-grid)/h)##2)/(N#h)#(abs((uhat[i]-grid)/h)<1); 
  fhati=kde[,+];
  fhat[i]=fhati; 
 end;
%end;
%else %if &density= %then %do;
  /* To compute the sorted data density estimator, we need to sort the data set from lowest to highest values of uhat
     and evaluate for each i, the difference between uhata(i) and uhatb(i), the values that come immediately after and before 
     uhat(i), respectively. Before doing this, we must remove any ties (see Dong and Lewbell, 2015); */   
 indx=(1:N)`;                        /* Internal index number */
 dataset=uhat||indx;  
 call sort(dataset,1);               /* Sort the data set from lowest to highest values of uhat (uhat: first column of matrix "dataset") */
 uniq=uniqueby(dataset[,1],1);       /* Remove ties. Identifying rows with first occurrence of each value in sorted uhat */
 uniqval=dataset[uniq,1];            /* Remove ties. Keeping first occurrence of each value in sorted uhat */
 uhatb=lag(uniqval,1);               /* Create uhat-, the value that comes immediately before uhat(i) */
 uhata=lag(uniqval,-1);              /* Create uhat+, the value that comes immediately after uhat(i) */
 minu=min(uniqval);                  /* Minimum value of uhat */
 maxu=max(uniqval);                  /* Maximum value of uhat */ 
 uhatb[loc(uhatb=.)]=minu;           /* Endpoints: if no uhat in the data is smaller than uhat(i), then let uhatb(i)=uhat(i). See Dong and Lewbell (2015), footnote 1 */
 uhata[loc(uhata=.)]=maxu;           /* Endpoints: if no uhat in the data is larger than uhat(i), then let uhata(i)=uhat(i). See Dong and Lewbell (2015), foornote 1 */
 fhat_=2/(N#(uhata-uhatb));          /* Density estimator for unique values of uhat */
 uniq=uniq//N+1;                     /* Add a row with value N+1 */
 do i=1 to nrow(uniq)-1;             /* Assign density estimates to all observations, including ties */
  idx = uniq[i]:(uniq[i+1]-1);
  dataset[idx,1]=fhat_[i];           /* Fhat is replacing uhat in matrix "dataset" */
 end;
 free fhat_ uniq uniqval uhatb uhata minu maxu;
 call sort(dataset,2);               /* Sort again by original order of indx */ 
 fhat=dataset[,1];
 free dataset;
%end;
free uhat;
finish;

/******************/
/* Construct That */ 
/******************/

start That_;
N0=N;
/* Division by 0 not possible ! Observations with fhat=0 are excluded */
selec=loc(fhat>0);
d=d[selec,];
v=v[selec,];
fhat=fhat[selec,];
That=(d-(v>=0))/fhat;                           /* Estimator without heteroscedasticity */
X=X[selec,];  
Z=Z[selec,];
N=nrow(d);
div0=N0-N; 
free fhat N0;
finish;

/*****************/
/* Trim the data */
/*****************/

start trim_;
ntrim=&trim+0;                         /* Trick to avoid problems with &trim#N/100 when &trim is empty */ 
nbobs=ceil(ntrim#N/100); 
r=rank(That);  
keeptrim=(nbobs<r & r<=N-nbobs); 
That=That[loc(keeptrim=1),];           /* Keep the observations with lowest percentile< That(i) <= highest percentile */
X=X[loc(keeptrim=1),];                 /* Select the corresponding observations in X */ 
Z=Z[loc(keeptrim=1),];                 /* Select the corresponding observations in Z */
d=d[loc(keeptrim=1),];                 /* Select the corresponding observations in d */
v=v[loc(keeptrim=1),];                 /* Select the corresponding observations in v */ 
idx=t(1:N);
discarded_obs=idx[loc(keeptrim=0)];    /* Identify the deleted observations */
nbdiscarded=nrow(discarded_obs);  
call symputx("discarded",nbdiscarded);
free idx nbdiscarded;
N=nrow(d); 
finish;

/***************************************************************************************************/
/* Estimate bvihat, the estimated coefficients of X in D=I(X*beta+V+e>=0) from the IV regression */  
/* of That on X using instruments Z                                                                */
/***************************************************************************************************/

start estimates_;
k=ncol(x)+1;     /* Add the constant term */
kz=ncol(z)+1;    /* Add the constant term */
zz=J(kz,kz,0);
xz=J(k,kz,0);
zt=J(kz,1,0);
do i=1 to N;
 zz=zz+(1||z[i,])`*(1||z[i,]); /* Add the constant term */
 xz=xz+(1||x[i,])`*(1||z[i,]);
 zt=zt+(1||z[i,])`*That[i,];
end; 
izz=inv(zz);
betahat=inv(xz*izz*xz`)*xz*izz*zt;
xbhat=(J(N,1,1)||x)*betahat;   /* Fitted values */
finish;

/**************************************************************************/
/* Marginal effects, see Lewbel, Dong and Yang (2012), p 826 for formulae */
/**************************************************************************/
    
start meffects_;
xbhat=v+xbhat;
quartile=quartile(xbhat);
IQ=(quartile[4]-quartile[2]); /* Interquartile range */
free quartile;
std=std(xbhat);
der_AIF=J(N,1,0);             /* AIF: Average Index Function, der_IAF: first derivatives of AIF w.r.t X */   
%if &density=1 or &density=3 %then %do;
  h=0.9#min(std,IQ/1.34)#N##(-0.2);
  do i=1 to N;
   arg=(xbhat[i]-xbhat)/h;
   kerndens=PDF('NORMAL',arg,0,1);                        /* Individual contributions to kernel density estimate for observation i */
   sum_kern=sum(kerndens);                                /* Kernel density estimate for observation i */
   AIF=sum(d#kerndens)/sum_kern;                          /* Average Index Function fo observation i */
   der_AIF[i]=sum( -(d-AIF)#arg#kerndens )/(h#sum_kern);  /* First derivative of Standard normal Kernel is -arg*kerndens */ 
  end;
%end;
%else %do;
 h=1.159#(15##0.2)#min(std,IQ/1.34)#N##(-0.2);   
 do i=1 to N;
  arg=(xbhat[i]-xbhat)/h;
  kerndens=0.75#(1-arg##2)#(abs(arg)<1);                         /* Individual contributions to kernel density estimate for observation i */
  sum_kern=sum(kerndens);                                        /* Kernel density estimate for observation i */
  AIF=sum(d#kerndens)/sum_kern;                                  /* Average Index Function fo observation i */
  der_AIF[i]=sum( -(d-AIF)#1.5#arg#(abs(arg)<1) )/(h#sum_kern);  /* First derivative of Epanechikov Kernel is -1.5*arg if abs(arg)<1, 0 otherwise */ 
 end;
%end;
free arg kerndens sum_kern AIF;
mean_meffects=mean(der_AIF)*(betahat//1);           /* Marginal effects at the mean, marginal effect of the special regressor added */                
free der_AIF;
finish;

%if &error=0 %then %do;

/***********************************************************************************************/
/***                           Estimation on the original data set                           ***/
/***********************************************************************************************/
 
/*       Read the data set   */

 use &data;
 read all var {&depvar} into d;
 read all var {&specreg} into v;
 %if (&regexo^= & &regendo^=  ) %then %do;
  read all var {&regendo &regexo} into X; 
  read all var {&regexo &instr} into Z;
  titlename={Intercept &specreg &regendo &regexo};   /* Used later to edit the results */
 %end;
 %else %if (&regexo= & &regendo^=  ) %then %do;
  read all var {&regendo} into X; 
  read all var {&instr} into Z;  
  titlename={Intercept &specreg &regexo};
 %end;
 %else %if (&regexo^=  & &regendo= ) %then %do;
  read all var {&regexo} into X;
  read all var {&regexo} into Z;
  titlename={Intercept &specreg &regendo};
 %end;
 close &data;

/*   Evaluate the sign of the coefficient of V on D from a preliminary instrumental linear probability model */
 slope=1;
 %if ilpm ^= %then %do; 
  zz=0;xz=0;zd=0;
  N=nrow(d);
  do i=1 to N;
   zz=zz+(1||v[i,]||z[i,])`*(1||v[i,]||z[i,]);         /* Add the constant term and the special regressor to the list of instruments */
   xz=xz+(1||v[i,]||x[i,])`*(1||v[i,]||z[i,]);         /* Add the constant term and the special regressor to the list of regressors */
   zd=zd+(1||v[i,]||z[i,])`*d[i,];
  end; 
  izz=inv(zz);
  ixPzx=inv(xz*izz*xz`);
  bivlpm=inv(xz*izz*xz`)*xz*izz*zd;

  zoz=0;
  do i=1 to N;
   uhati2=(d[i]-(1||v[i,]||x[i,])*bivlpm)##2;
   zoz=zoz+(N/(N-ncol(x+2)))#uhati2#(1||v[i,]||z[i,])`*(1||v[i,]||z[i,]);
  end;
  var_h=ixPzx*xz*izz*zoz*izz*xz`*ixPzx;   /* Variance covariance estimator robust to heteroscedasticity of unknown form */
  se_h=sqrt(vecdiag(var_h));
  tstat=bivlpm/se_h;
  pvalue=round(2#(1-probnorm(abs(tstat))),.001);

  resultIVLPB=round(bivlpm,.001)||round(se_h,.001)||round(tstat,.001)||pvalue;
  titlecol={"Parameter Estimate" "Standard Error" "T-statistic" "P-value"};
  titlename=t(titlename);
  print resultIVLPB [rowname=titlename colname=titlecol label="Preliminary estimation with the instrumental variables linear probability model"];

  slope=(bivlpm[2,]>0);    /* If the estimate for the special regressor is positive then slope=1, 0 otherwise */
  free xz zz zd zoz ixPzx bivlpm uhati2 var_h se_h tstat pvalue;
 
 /* The special regressor "V" must have a positive effect on the binary dependent variable "D" */

  if (slope=0) then v=-v;
 %end;
 /* The special regressor "V" must have mean zero */

 descriptivev=t(quartile(v));
 meanv=mean(v);
 stdv=std(v);

 vbar=mean(v);
 if (vbar ^= 0) then v=v-vbar;

/* Residuals from the OLS regression of "V" on "S". S" is the union of all the elements of X and Z  */

 use &data;
 %if (&regexo^= & &regendo^=  ) %then %do;
  read all var {&regendo &regexo &instr} into S;
 %end;
 %else %if (&regexo= & &regendo^=  ) %then %do;
  read all var {&regendo &instr} into S;   
 %end;
 %else %if (&regexo^=  & &regendo= ) %then %do;
  read all var {&regexo} into S;
 %end;
 close &data;

 run uhat_; create uhat var {uhat};append;close;
 
/* Check homoskedasticity of the special regressor "V": White's test for heteroskedasticity  */

 uhat2=uhat##2;  /* Squared uhat  */

/* Stild is the matrix that consists of all the elements of S, and all the squares and cross products of all the elements of S */
 %if &hetvar= %then %do;
  Stild=S;                     /* Initialize Stild */
  do i=1 to ncolS;
   do j=i to ncolS;
    Stild=Stild||S[,i]#S[,j];  /* Squares and cross products */
   end;
  end;

 /* Check for duplicate variables in Stild that need to be deleted.     
    For instance, with S=(dummy var1 var1*var), Stild includes dummy, dummy*dummy (=dummy),
    var1, var1*var1, var1*var1 and var1*var1*var1*var1. Stild must include one and only one occurence of dummy and var1*var1. */

  col0=(Stild=0);
  sum_col0=col0[+,];                     /* Multiply complementary dummy variables generates columns of 0 in Stild */ 
  Stild=Stild[,loc(sum_col0<N)];         /* These columns are deleted */ 
  corr=corr(Stild); 
  duplicate=J(ncol(Stild),ncol(Stild),0);
  do i=1 to ncol(Stild)-1;
   do j=i+1 to ncol(Stild);
    duplicate[i,j]=(abs(corr[i,j])>0.9999); 
   end;
  end; 
  selec=loc(duplicate[+,]=0);            /* If the sample correlation between columns A and B of Stild is nearly 1,column B is deleted */ 
  nselec=ncol(selec); 
  if nselec^=ncol(Stild) then print "Message: duplicate variables in Stild were found and deleted",
                                     "The number of distinct columns in stild (not including the constant term) is" nselec[label=" "];  
  Stild=Stild[,selec];  
 %end;
 %else %do;
  use &data;
  read all var {&hetvar} into hetV;
  close &data;
  Stild=S||hetV;
 %end; 
 ncStild=ncol(Stild);                                 /* Number of columns */              
 stildstild=J(ncStild+1,ncStild+1,0);                 /* Account for the constant term which is not included yet */
 stilduhat2=J(ncStild+1,1,0);
  /* White's test */
 do i=1 to N;
  stildstild=stildstild+(1||stild[i,])`*(1||stild[i,]);  /* Add the constant term to Stild */
  stilduhat2=stilduhat2+(1||stild[i,])`*uhat2[i];
 end;
 chat=inv(stildstild)*stilduhat2; 
 free Stilduhat2 stildstild selec;

 RSS=0;
 TSS=0;
 uhat2bar=mean(uhat2);
 Stildchat=J(N,1,0);
 do i=1 to N;
  Stildchati=(1||stild[i,])*chat;
  RSS=RSS+(uhat2[i]-Stildchati)##2;
  TSS=TSS+(uhat2[i]-uhat2bar)##2;
  Stildchat[i,]=Stildchati;
 end;
 R2=1-RSS/TSS; /* centered R2 */
 White=N*R2;
 pvalueW=1-probchi(White,ncStild);
 WhiteTest=White||pvalueW;
 call symputx('pvalueW',pvalueW);

 Title={"Test statistic" "P-value"};
 print "White's test for heteroscedasticity of the special regressor &specreg",
       WhiteTest [colname=Title label="White's test"];

 %if (&het=  & &pvalueW<0.05) %then %do;
  uhat=uhat/sqrt(abs(Stildchat)); 
 %end; 

 /* Evaluate the nonparametric density of uhat */

 run fhat_;

/* Construct That */

 run That_;
 if div0>0 then print "Warning!" div0[label= " "] "observation(s) was(were) deleted due to density estimate(s) equal(s) to 0";
 free div0;
 %if (&het=   & &pvalueW<0.05) %then %do; 
  Stildchat=Stildchat[selec,];
  That=That#sqrt(abs(Stildchat));     /* Estimator with heteroscedasticity */
 %end;
 free Stildchat selec;

 /* Trim the data by deleting the bottom and top values of That */

 %if &trim ^= %then %do;
  run Trim_;
  create discarded_obs var {discarded_obs};append;close;  /* Create a dataset that includes the deleted observations */
  free discarded_obs;
 %end;

 /* Validity checks */

 var_V=round(var(v),.001); 
 kurtosis_V=kurtosis(v);
 prob={1,5,10,25,50,75,90,95,99}/100;   /* Percentiles */
 call qntl(pctl_v,V,prob);   

/* Estimate bvihat, the estimated coefficients of X in D=I(X*beta+V+e>=0) from the IV regression */  
/* of That on X using instruments Z                                                                */

 run estimates_;
 betahat0=betahat;               /* Save the estimates in betahat0 */ 

 /* Validity checks */

 var_xbhat=round(var(xbhat),.001);
 call qntl(pctl_xbhat,xbhat,prob);          

 spread=(var_V||t(round(pctl_v,.001)))//(var_xbhat||t(round(pctl_xbhat,.001)));
 free var_V pctl_v var_xbhat pctl_xbhat;

/*** Marginal effects ***/

 run meffects_;
 mean_meffects0=mean_meffects;   /* Save the estimates in mean_meffects0 */

/*********************************************************************************/
/***                         Bootstrap statistical inference                   ***/
/*********************************************************************************/

 %if &Nboot^= %then %do; 
  use &data;
  read all var {&specreg} into v_;
  read all var {&depvar} into d_;
  %if (&regexo^= & &regendo^=  ) %then %do;
   read all var {&regendo &regexo} into X_; 
   read all var {&regexo &instr} into Z_; 
  %end;
  %else %if (&regexo= & &regendo^=  ) %then %do;
   read all var {&regendo} into X_; 
   read all var {&instr} into Z_; 
  %end;
  %else %if (&regexo^=  & &regendo= ) %then %do;
   read all var {&regexo} into X_;
   read all var {&regexo} into Z_; 
  %end;
  close &data;
  s_=s;
  %if (&het=    & &pvalueW<0.05) %then %do;
   stild_=stild;
  %end;
  free s stild;
  
  call randseed(12345);
  bootselec=sample(1:N,&Nboot//N,"Replace");    /* Create Nboot sets of index numbers randomly chosen with replacement. Each set includes N elements. */
  create bootselec from bootselec;              /* Create the dataset that includes the randomly chosen observations */ 
  append from bootselec;
  close; 

  bvihat_=J(&Nboot,k,.);       /* Initialize the matrix of estimates. Row "j" of this matrix will correspond to the "jth" bootstrap replication estimates of the parameters */
  mean_meffects_=J(&Nboot,k+1,.);/* Initialize the matrix of estimates for marginal effects. Row "j" will correspond to the "jth" bootstrap replication estimates of marginal effects */
  do j=1 to &Nboot;                                                                             
   z=z_[bootselec[,j],];       /* Select the corresponding observations in z */
   v=v_[bootselec[,j],];       /* Select the corresponding observations in v */
   s=s_[bootselec[,j],];       /* Select the corresponding observations in s */
   d=d_[bootselec[,j],];       /* Select the corresponding observations in d */
   x=x_[bootselec[,j],];       /* Select the corresponding observations in x */

   /* The special regressor "V" must have mean 0 */

   descriptivev=t(quartile(v));
   meanv=mean(v);
   stdv=std(v);
   vbar=mean(v);
   if (vbar ^= 0) then v=v-vbar;

   /* Residuals from the OLS regression of "V" on "S" */
   run uhat_;

   %if (&het=    & &pvalueW<0.05) %then %do;
    Stild=Stild_[bootselec[,j],];
    stildstild=J(ncStild+1,ncStild+1,0);   /* Account for the constant term which is not included yet */
    stilduhat2=J(ncStild+1,1,0);
    do i=1 to N;
     stildstild=stildstild+(1||stild[i,])`*(1||stild[i,]); /* Add the constant term to Stild */
     stilduhat2=stilduhat2+(1||stild[i,])`*uhat[i]##2;
    end;  
    chat=inv(stildstild)*stilduhat2; 
    free Stilduhat2 stildstild;
    Stildchat=(J(N,1,1)||stild)*chat;
    uhat=uhat/sqrt(abs(Stildchat));
    free stild chat;
   %end;
   /* Evaluate the nonparametric density of uhat */
   run fhat_;
   /* Construct That */
   run That_;
   if div0>0 then print "Warning!" div0[label=" "] "observation(s) was(were) deleted due to density estimate(s) equal(s) to 0","bootstrap sample" j[label=" "];
   free div0;
   %if (&het=   & &pvalueW<0.05) %then %do; 
    Stildchat=Stildchat[selec,];
    That=That#sqrt(abs(Stildchat));     /* Estimator with heteroscedasticity */
   %end;
   free Stildchat selec;

   /* Trim the data */
   %if &trim ^= %then %do;
    run Trim_;
   %end;
   /* Estimate bvihat, the estimated coefficients of X in D=I(X*beta+V+e>=0) from the IV regression */ 
   /* of That on X using instruments Z                                                                */
   run estimates_;
   bvihat_[j,1:k]=betahat`;
   /* Marginal effects */
   run meffects_;
   mean_meffects_[j,]=mean_meffects`;
  end;
  create bviboot from bvihat_; /* Create the dataset that includes the bootstrap replication estimates of the model parameters */
  append from bvihat_;
  close;  
  create mean_meffects_boot from mean_meffects_;
  append from mean_meffects_;  /* Create the dataset that includes the bootstrap replication estimates of the marginal effects */
  close; 

  /* Bootstrapped estimate of standard errors and hypothesis testing */
 
  se=std(bvihat_[,k]); 
  se_boot=t(std(bvihat_));
  tstat_boot=betahat0/se_boot;
  pvalue_boot=2#(1-probnorm(abs(tstat_boot)));

  se_me_boot=t(std(mean_meffects_));
  tstat_me_boot=mean_meffects0/se_me_boot;
  pvalue_me_boot=2#(1-probnorm(abs(tstat_me_boot)));

 /* Hypothesis testing and confidence intervals with the percentile method */

  prob={0.5,2.5,5,95,97.5,99.5}/100;
  call qntl(pctl_b_,bvihat_,prob);          /* Percentiles for 99%, 95% and 90% CIs, respectively */
  call qntl(pctl_me_,mean_meffects_,prob);  /* Percentiles for 99%, 95% and 90% CIs, respectively */
 
  pctl_b=t(round(pctl_b_,.001));            /* Rounded values */
  pctl_me=t(round(pctl_me_,.001));          /* Rounded values */
 %end;

/***********************/
/** Edit the results ***/
/***********************/

 %if (&regexo ^= & &regendo ^=) %then %do;
  titlename={Intercept &regendo &regexo};
 %end;
 %if (&regexo ^= & &regendo= ) %then %do;
  titlename={Intercept &regexo};
 %end;
 %if (&regexo= & &regendo^= ) %then %do;
  titlename={Intercept &regendo};
 %end;
 %if &trim^= %then %do;
  print "Trimming applied: the left-most and right-most &trim.% of observations are excluded, &discarded observations are discarded";
 %end;
 if slope=0 then do;
  print "Negative effect of &specreg on &depvar detected : the negative of &specreg used instead";
 end;
 %if (&het=  & &pvalueW<0.05) %then %do;
  print "Heteroscedasticity in &specreg detected: the estimator that accounts for heteroscedasticity is used";
 %end;

 Print "Excess kurtosis for special regressor &specreg is" kurtosis_V [label=" "],
       "Caution! Negative value for excess kurtosis may weaken the results";
           
 rowvalidity={"Special regressor","Xbhat"};
 colvalidity={"Variance" "1%" "5%" "10" "25%" "50%" "75%" "90%" "95%" "99%"}; 

 print spread [rowname=rowvalidity colname=colvalidity label="Comparing measures of spread of the special regressor to those of the fitted values Xbhat"],
       "Caution! The spread of the special regressor is requested to be comparable or larger";
 
 titlename=t(titlename);
 titlename_=titlename//{&specreg};

 %if &nboot^= %then %do;
  result=betahat0||se_boot||tstat_boot||pvalue_boot;
  result_me=mean_meffects0||se_me_boot||tstat_me_boot||pvalue_me_boot;

  titlecol={"Parameter estimate" "Standard error" "T-statistic" "P-value"};
  titlecol_={"0.5%" "2.5%" "5%" "95%" "97.5%" "99.5%"};

  print result [rowname=titlename colname=titlecol label="Estimates and Statistical Inference with bootstrap standard errors"];
  print pctl_b [rowname=titlename colname=titlecol_ label="Bootstrap Confidence intervals for the model parameters with the percentile method"];

  print result_me [rowname=titlename_ colname=titlecol label="Marginal effects at the mean and Statistical Inference with bootstrap standard errors"];
  print pctl_me [rowname=titlename_ colname=titlecol_ label="Bootstrap Confidence Intervals for marginal effects at the mean with the percentile method"];
 %end;
 %else %do;
  titlecol={"Parameter estimate, original sample"};
  titlecol_={"Estimates of marginal effects at the mean, original sample"};
  print betahat0 [rowname=titlename colname=titlecol label="Estimates"];
  print mean_meffects0 [rowname=titlename_ colname=titlecol_ label="Marginal effects at the mean"];
 %end;
 quit;
%end;

/* Computation time */

data endtime;
endtime=time(); 
 
data _null_;
set begtime;set endtime;
duration=endtime-begtime;
hours_=duration/3600;
hours=int(hours_);
minutes_=(hours_-hours)*60;
minutes=int(minutes_);
seconds=(minutes_-minutes)*60;
call symputx('hours',hours);
call symputx('minutes',minutes);
call symputx('seconds',seconds);
run;
%put Computation time for SPECREG procedure is &hours hours, &minutes minutes and &seconds seconds. ;
%mend specreg;