*****************************************************************; * Example of Non-Linear Decomposition Technique for Logit Model *; * Simpler Version without Standard Errors *; * Updated on 11/6/06 *; * Used in: Fairlie, Robert W. 1999 "The Absence of the African-American Owned Business: An Analysis of the Dynamics of Self-Employment," Journal of Labor Economics, 17(1): 80-108., and Fairlie, Robert W. 2005. "An Extension of the Blinder-Oaxaca Decomposition Technique to Logit and Probit Models," Journal of Economic and Social Measurement, 30(4): 305-316. *****************************************************************; * Main Decomposition for Logit; * Uses Pooled 2 (All Races) Coefficients; libname sasdata '/j/comp/decomp/examples'; options obs=max; options nolabel ls=75 ps=140; title 'Home Computer - Pooled 2 (All Races) Coefficients'; * specify number of simulations; * NOTE: change this to 1000 for final run; %let numsims=10; * define race variables to be included only in pooled logits; %let r=5; %let racevars=black mexican othhisp natamer asian; * specify number and names of independent variables; %let k=31; %let vars= female age married prevmar children chld617 hsgrad somcol college gradsch inc1015 inc1520 inc2025 inc2530 inc3035 inc3540 inc4050 inc5060 inc6075 incgt75 midatlan encent wncent satlan escent wscent mountain pacific notcc notmsa notid; %let mvars= mfemale mage mmarried mprevmar mchildre mchld617 mhsgrad msomcol mcollege mgradsch minc1015 minc1520 minc2025 minc2530 minc3035 minc3540 minc4050 minc5060 minc6075 mincgt75 mmidatla mencent mwncent msatlan mescent mwscent mmountai mpacific mnotcc mnotmsa mnotid; %let wvars= wfemale wage wmarried wprevmar wchildre wchld617 whsgrad wsomcol wcollege wgradsch winc1015 winc1520 winc2025 winc2530 winc3035 winc3540 winc4050 winc5060 winc6075 wincgt75 wmidatla wencent wwncent wsatlan wescent wwscent wmountai wpacific wnotcc wnotmsa wnotid; * prepare original data for program; data temp; set sasdata.finaldecomp00; mergeobs=1; * define dependent variable; y=hcomp; * delete observations with missing values for dep or indep vars; if y=. or hsgrad=. or inc1015=. then delete; * create extra ethnic/racial group variables; if racecode=3 and spnnum in (1,2,3) then mexican=1; else mexican=0; if racecode=3 and spnnum>=4 then othhisp=1; else othhisp=0; * only keep working-age adults; if age<25 or age>55 then delete; * define subset of data for estimating coefficients; data temp2; set temp; * create minority sample with minority variable names; * define which minority group is used; data minority (keep=ym &mvars mergeobs); set temp; array varsa(&k) &vars; array mvarsa(&k) &mvars; ym=y; do i=1 to &k; mvarsa(i)=varsa(i); end; if black=1 then output; * create full white sample with white variable names; data white (keep=yw &wvars mergeobs); set temp; array varsa(&k) &vars; array wvarsa(&k) &wvars; yw=y; do i=1 to &k; wvarsa(i)=varsa(i); end; if white=1 then output; * print out full sample means; proc means data=minority; title2 'Minority Means'; proc means data=white; title2 'White Means - Full Sample'; * calculate means of dependent variables for full sample; * these values are used to calculate the total gap in the decomposition; proc means data=minority noprint; var ym; output out=ymdata mean=ymfull; proc means data=white noprint; var yw; output out=ywdata mean=ywfull; * estimate logit model to obtain pooled coefficients; proc logistic data=temp2 outest=orgcoefs covout descending; model y=&racevars &vars / link=logit; title2 'Logit for Coefficients'; * remove race dummies from coefficient dataset; * only need this for pooled estimates; data coefs (drop=&racevars _link_ _type_ _status_ _name_ _lnlike_); set orgcoefs; mergeobs=1; if _n_=1; /* coefs are in first row */ * calculate predicted probabilities for both samples; data white; merge white coefs; by mergeobs; array coefsa(&k) &vars; array wvarsa(&k) &wvars; xbeta=intercept; do i=1 to &k; xbeta=xbeta+wvarsa(i)*coefsa(i); end; wordprob=exp(xbeta)/(1+exp(xbeta)); data minority; merge minority coefs; by mergeobs; array coefsa(&k) &vars; array mvarsa(&k) &mvars; xbeta=intercept; do i=1 to &k; xbeta=xbeta+mvarsa(i)*coefsa(i); end; mordprob=exp(xbeta)/(1+exp(xbeta)); * sort minority data by predicted probabilities for later matching; proc sort data=minority; by mordprob; * create empty starting dataset for simulations; data means2; set _null_; * create macro for simulations; %macro simulate; %do i=1 %to &numsims; * first, delete white observations to match black sample size; data white1; set white; random1=ranuni(&i); proc sort data=white1; by random1; data white2 (drop=ym); merge minority (keep=ym) white1; if ym=. then delete; /* deletes extra white observations */ * second, reorder random white subsample by predicted probabilities; proc sort data=white2; by wordprob; * third, merge datasets together for matching; data combined; merge white2 minority; * calculate decomposition components; data combined; set combined; one=1; array coefsa(&k) &vars; * define distribution switches as arrays; array x0a(&k) wfemale wage wmarried wprevmar wchildre wchld617 whsgrad wsomcol wcollege wgradsch winc1015 winc1520 winc2025 winc2530 winc3035 winc3540 winc4050 winc5060 winc6075 wincgt75 wmidatla wencent wwncent wsatlan wescent wwscent wmountai wpacific wnotcc wnotmsa wnotid; array x1a(&k) mfemale mage wmarried wprevmar wchildre wchld617 whsgrad wsomcol wcollege wgradsch winc1015 winc1520 winc2025 winc2530 winc3035 winc3540 winc4050 winc5060 winc6075 wincgt75 wmidatla wencent wwncent wsatlan wescent wwscent wmountai wpacific wnotcc wnotmsa wnotid; array x2a(&k) mfemale mage mmarried mprevmar mchildre mchld617 whsgrad wsomcol wcollege wgradsch winc1015 winc1520 winc2025 winc2530 winc3035 winc3540 winc4050 winc5060 winc6075 wincgt75 wmidatla wencent wwncent wsatlan wescent wwscent wmountai wpacific wnotcc wnotmsa wnotid; array x3a(&k) mfemale mage mmarried mprevmar mchildre mchld617 mhsgrad msomcol mcollege mgradsch winc1015 winc1520 winc2025 winc2530 winc3035 winc3540 winc4050 winc5060 winc6075 wincgt75 wmidatla wencent wwncent wsatlan wescent wwscent wmountai wpacific wnotcc wnotmsa wnotid; array x4a(&k) mfemale mage mmarried mprevmar mchildre mchld617 mhsgrad msomcol mcollege mgradsch minc1015 minc1520 minc2025 minc2530 minc3035 minc3540 minc4050 minc5060 minc6075 mincgt75 wmidatla wencent wwncent wsatlan wescent wwscent wmountai wpacific wnotcc wnotmsa wnotid; array x5a(&k) mfemale mage mmarried mprevmar mchildre mchld617 mhsgrad msomcol mcollege mgradsch minc1015 minc1520 minc2025 minc2530 minc3035 minc3540 minc4050 minc5060 minc6075 mincgt75 mmidatla mencent mwncent msatlan mescent mwscent mmountai mpacific wnotcc wnotmsa wnotid; array x6a(&k) mfemale mage mmarried mprevmar mchildre mchld617 mhsgrad msomcol mcollege mgradsch minc1015 minc1520 minc2025 minc2530 minc3035 minc3540 minc4050 minc5060 minc6075 mincgt75 mmidatla mencent mwncent msatlan mescent mwscent mmountai mpacific mnotcc mnotmsa mnotid; xb0=intercept; xb1=intercept; xb2=intercept; xb3=intercept; xb4=intercept; xb5=intercept; xb6=intercept; * perform white to black variable distribution switches; do i=1 to &k; xb0=xb0+x0a(i)*coefsa(i); xb1=xb1+x1a(i)*coefsa(i); xb2=xb2+x2a(i)*coefsa(i); xb3=xb3+x3a(i)*coefsa(i); xb4=xb4+x4a(i)*coefsa(i); xb5=xb5+x5a(i)*coefsa(i); xb6=xb6+x6a(i)*coefsa(i); end; * calculate various predicted probabilities; pred0=exp(xb0)/(1+exp(xb0)); pred1=exp(xb1)/(1+exp(xb1)); pred2=exp(xb2)/(1+exp(xb2)); pred3=exp(xb3)/(1+exp(xb3)); pred4=exp(xb4)/(1+exp(xb4)); pred5=exp(xb5)/(1+exp(xb5)); pred6=exp(xb6)/(1+exp(xb6)); * calculate decomposition estimates for this simulation; proc means data=combined noprint; var yw ym pred0-pred6; output out=means1 mean=; * append latest simulation results to all previous simulation results; data means2; set means2 means1; %end; %mend; run; * turn off notes because macro generates a lot of information; * remove this option for debugging; options nonotes; * run simulation - note that it runs numsims times because of do loop above; %simulate; run; * calculate contribution estimates from changes in predicted probabilities; data means2; set means2; cont1=pred0-pred1; cont2=pred1-pred2; cont3=pred2-pred3; cont4=pred3-pred4; cont5=pred4-pred5; cont6=pred5-pred6; cont7=pred6-pred7; cont8=pred7-pred8; * calculate means of decomposition runs; proc means data=means2; title2 'Mean Values of Contribution Estimates from Simulations'; var yw pred0 pred1-pred6 ym cont1-cont6; output out=meandecomp mean=; * append the full sample means for ys and calculate percent contributions; * dataset now has only one obs for the means; data meandecomp; merge meandecomp ywdata ymdata; gap=ywfull-ymfull; perc1=cont1/gap; perc2=cont2/gap; perc3=cont3/gap; perc4=cont4/gap; perc5=cont5/gap; perc6=cont6/gap; * format output for final decomposition table; * outputs contribution estimates, gap percents and standard errors; proc means data=meandecomp; title2 'Final Output for Table - Mean Values of Decomposition Runs'; var ywfull ymfull gap cont1 perc1 cont2 perc2 cont3 perc3 cont4 perc4 cont5 perc5 cont6 perc6; * run decomposition for all variables; proc means data=white; var yw wordprob; title2 'Mean Predicted Probability for White Full Sample'; proc means data=minority; var ym mordprob; title2 'Mean Predicted Probability for Minority Full Sample'; endsas;