**********************************************************************; * Example of Non-Linear Decomposition Technique for Logit Model *; * Simplified Version without Standard Errors *; * Updated on 7/3/13 *; * Used in: *; * Fairlie, Robert W. 1999 "The Absence of the African-American Owned *; * Business: An Analysis of the Dynamics of Self-Employment," *; * Journal of Labor Economics, 17(1): 80-108. *; * Fairlie, Robert W. 2005. "An Extension of the Blinder-Oaxaca *; * Decomposition Technique to Logit and Probit Models," Journal of *; * Economic and Social Measurement, 30(4): 305-316. *; **********************************************************************; libname sasdata 'c:\temp\comp\decomp\examples'; options obs=max; options nolabel ls=75 ps=140; title 'Home Computer - Pooled 2 (All Races) Coefficients'; * specify number of simulations; * NOTE: change this to 1000 for final run; %let numsims=10; * define race variables to be included only in pooled logits; %let r=4; %let racevars=black latino natamer asian; * specify number and names of independent variables; %let k=31; * define categories of variables for decomposition; %let group1= female age; %let group2= married prevmar children chld617; %let group3= hsgrad somcol college gradsch; %let group4= inc1015 inc1520 inc2025 inc2530 inc3035 inc3540 inc4050 inc5060 inc6075 incgt75; %let group5= midatlan encent wncent satlan escent wscent mountain pacific; %let group6= notcc notmsa notid; * rename minority variables; %let mgroup1= mfemale mage; %let mgroup2= mmarried mprevmar mchildre mchld617; %let mgroup3= mhsgrad msomcol mcollege mgradsch; %let mgroup4= minc1015 minc1520 minc2025 minc2530 minc3035 minc3540 minc4050 minc5060 minc6075 mincgt75; %let mgroup5= mmidatla mencent mwncent msatlan mescent mwscent mmountai mpacific; %let mgroup6= mnotcc mnotmsa mnotid; * rename white variables; %let wgroup1= wfemale wage; %let wgroup2= wmarried wprevmar wchildre wchld617; %let wgroup3= whsgrad wsomcol wcollege wgradsch; %let wgroup4= winc1015 winc1520 winc2025 winc2530 winc3035 winc3540 winc4050 winc5060 winc6075 wincgt75; %let wgroup5= wmidatla wencent wwncent wsatlan wescent wwscent wmountai wpacific; %let wgroup6= wnotcc wnotmsa wnotid; * combine groups; %let vars= &group1 &group2 &group3 &group4 &group5 &group6; %let mvars= &mgroup1 &mgroup2 &mgroup3 &mgroup4 &mgroup5 &mgroup6; %let wvars= &wgroup1 &wgroup2 &wgroup3 &wgroup4 &wgroup5 &wgroup6; * prepare original data for program; data temp; set sasdata.finaldecomp00; mergeobs=1; * define dependent variable; y=hcomp; * delete observations with any missing values for dep or indep vars; if y=. or hsgrad=. or inc1015=. then delete; * define sample - e.g. only keep working-age adults for this run; if age<25 or age>55 then delete; * define subset of data for estimating coefficients; * use this to estimate coefficients using only one group e.g. white coefficients; * currently set to use all groups (pooled sample) to etimate coefficents; data temp2; set temp; * create minority sample with minority variable names; * define which minority group is used; data minority (keep=ym &mvars mergeobs); set temp; array varsa(&k) &vars; array mvarsa(&k) &mvars; ym=y; do i=1 to &k; mvarsa(i)=varsa(i); end; if black=1 then output; * create full white sample with white variable names; data white (keep=yw &wvars mergeobs); set temp; array varsa(&k) &vars; array wvarsa(&k) &wvars; yw=y; do i=1 to &k; wvarsa(i)=varsa(i); end; if white=1 then output; * print out full sample means; proc means data=minority; title2 'Minority Means'; proc means data=white; title2 'White Means - Full Sample'; * calculate means of dependent variables for full sample; * these values are used to calculate the total gap in the decomposition; proc means data=minority noprint; var ym; output out=ymdata mean=ymfull; proc means data=white noprint; var yw; output out=ywdata mean=ywfull; * estimate logit model to obtain coefficients; * set for pooled or specific group sample above; proc logistic data=temp2 outest=orgcoefs covout descending; model y=&racevars &vars / link=logit; title2 'Logit for Coefficients'; * remove race dummies from coefficient dataset; * only need this for pooled estimates; data coefs (drop=&racevars _link_ _type_ _status_ _name_ _lnlike_); set orgcoefs; mergeobs=1; if _n_=1; /* coefs are in first row */ * calculate predicted probabilities for both samples; data white; merge white coefs; by mergeobs; array coefsa(&k) &vars; array wvarsa(&k) &wvars; xbeta=intercept; do i=1 to &k; xbeta=xbeta+wvarsa(i)*coefsa(i); end; wordprob=exp(xbeta)/(1+exp(xbeta)); data minority; merge minority coefs; by mergeobs; array coefsa(&k) &vars; array mvarsa(&k) &mvars; xbeta=intercept; do i=1 to &k; xbeta=xbeta+mvarsa(i)*coefsa(i); end; mordprob=exp(xbeta)/(1+exp(xbeta)); * sort minority data by predicted probabilities for later matching; proc sort data=minority; by mordprob; * create empty starting dataset for simulations; data means2; set _null_; * create macro for simulations; %macro simulate; %do i=1 %to &numsims; * first, delete white observations to match black sample size; data white1; set white; random1=ranuni(&i); proc sort data=white1; by random1; data white2 (drop=ym); merge minority (keep=ym) white1; if ym=. then delete; /* deletes extra white observations */ * second, reorder random white subsample by predicted probabilities; proc sort data=white2; by wordprob; * third, merge datasets together for matching; data combined; merge white2 minority; * calculate decomposition components; data combined; set combined; one=1; array coefsa(&k) &vars; * define distribution switches as arrays; array x0a(&k) &wgroup1 &wgroup2 &wgroup3 &wgroup4 &wgroup5 &wgroup6; array x1a(&k) &mgroup1 &wgroup2 &wgroup3 &wgroup4 &wgroup5 &wgroup6; array x2a(&k) &mgroup1 &mgroup2 &wgroup3 &wgroup4 &wgroup5 &wgroup6; array x3a(&k) &mgroup1 &mgroup2 &mgroup3 &wgroup4 &wgroup5 &wgroup6; array x4a(&k) &mgroup1 &mgroup2 &mgroup3 &mgroup4 &wgroup5 &wgroup6; array x5a(&k) &mgroup1 &mgroup2 &mgroup3 &mgroup4 &mgroup5 &wgroup6; array x6a(&k) &mgroup1 &mgroup2 &mgroup3 &mgroup4 &mgroup5 &mgroup6; xb0=intercept; xb1=intercept; xb2=intercept; xb3=intercept; xb4=intercept; xb5=intercept; xb6=intercept; * perform white to black variable distribution switches; do i=1 to &k; xb0=xb0+x0a(i)*coefsa(i); xb1=xb1+x1a(i)*coefsa(i); xb2=xb2+x2a(i)*coefsa(i); xb3=xb3+x3a(i)*coefsa(i); xb4=xb4+x4a(i)*coefsa(i); xb5=xb5+x5a(i)*coefsa(i); xb6=xb6+x6a(i)*coefsa(i); end; * calculate various predicted probabilities; pred0=exp(xb0)/(1+exp(xb0)); pred1=exp(xb1)/(1+exp(xb1)); pred2=exp(xb2)/(1+exp(xb2)); pred3=exp(xb3)/(1+exp(xb3)); pred4=exp(xb4)/(1+exp(xb4)); pred5=exp(xb5)/(1+exp(xb5)); pred6=exp(xb6)/(1+exp(xb6)); * calculate decomposition estimates for this simulation; proc means data=combined noprint; var yw ym pred0-pred6; output out=means1 mean=; * append latest simulation results to all previous simulation results; data means2; set means2 means1; %end; %mend; run; * turn off notes because macro generates a lot of information; * remove this option for debugging; options nonotes; * run simulation - note that it runs numsims times because of do loop above; %simulate; run; * calculate contribution estimates from changes in predicted probabilities; data means2; set means2; cont1=pred0-pred1; cont2=pred1-pred2; cont3=pred2-pred3; cont4=pred3-pred4; cont5=pred4-pred5; cont6=pred5-pred6; cont7=pred6-pred7; cont8=pred7-pred8; * calculate means of decomposition runs; proc means data=means2; title2 'Mean Values of Contribution Estimates from Simulations'; var yw pred0 pred1-pred6 ym cont1-cont6; output out=meandecomp mean=; * append the full sample means for ys and calculate percent contributions; * dataset now has only one obs for the means; data meandecomp; merge meandecomp ywdata ymdata; gap=ywfull-ymfull; perc1=cont1/gap; perc2=cont2/gap; perc3=cont3/gap; perc4=cont4/gap; perc5=cont5/gap; perc6=cont6/gap; * format output for final decomposition table; * outputs contribution estimates, gap percents and standard errors; proc means data=meandecomp; title2 'Final Output for Table - Mean Values of Decomposition Runs'; var ywfull ymfull gap cont1 perc1 cont2 perc2 cont3 perc3 cont4 perc4 cont5 perc5 cont6 perc6; * run decomposition for all variables; proc means data=white; var yw wordprob; title2 'Mean Predicted Probability for White Full Sample'; proc means data=minority; var ym mordprob; title2 'Mean Predicted Probability for Minority Full Sample'; run;