$ontext DEA bootstrapping example Erwin Kalvelagen, october 2004 References: Mei Xue, Patrick T. Harker "Overcoming the Inherent Dependency of DEA Efficiency Scores: A Bootstrap Approach", Tech. Report, Department of Operations and Information Management, The Wharton School, University of Pennsylvania, April 1999 http://opim.wharton.upenn.edu/~harker/DEAboot.pdf $offtext sets i 'hospital (DMU)' /h1*h100/ j 'inputs and outputs' / FTE 'The number of full time employees in the hospital in FY 1994-95' Costs 'The expenses of the hospital ($million) in FY 1994-95' PTDAYS 'The number of the patient days produced by the hospital in FY 1994-95' DISCH 'The number of patient discharges produced by the hospital in FY 1994-95' BEDS 'The number of patient beds in the hospital in FY 1994-95' FORPROF 'Dummy variable, one if it is for-profit hospital, zero otherwise' TEACH 'Dummy variable, one if it is teaching hospital, zero otherwise' RES 'The number of the residents in the hospital in FY 1994-95' CONST 'Constant term in regression model' / inp(j) 'inputs' /FTE,Costs/ outp(j) 'outputs' /PTDAYS,DISCH/ ; table data(i,j) FTE Costs PTDAYS DISCH BEDS FORPROF TEACH RES h1 1571.86 174 71986 12665 365 h2 816.54 69.9 53081 5861 224 h3 533.74 61.7 25030 4951 286 1 h4 805.2 75.4 34163 11877 256 h5 3908.1 396 187462 42735 829 1 136.8 h6 727.72 63.9 31330 8402 194 h7 2571.75 220 130077 26877 620 1 42.81 h8 521 89.1 43390 8598 290 1 h9 718 50 27896 6113 150 1 23.21 h10 1504.85 121 75941 16427 393 h11 1234.49 84.6 57080 14180 317 h12 873 68.8 48932 12060 281 h13 1067.17 85.8 50436 11317 278 h14 668 47.5 67909 6235 244 h15 452.35 36.4 25200 6860 155 1 1 13.31 h16 1523 97.4 59809 13180 394 h17 3152 198 108631 22071 578 1 195.67 h18 871.96 30.7 17925 4605 160 h19 2901.86 290 130004 24133 549 1 126.89 h20 902.4 78.2 35743 8664 236 1 12.08 h21 194.69 10.9 15555 1530 132 h22 713.51 62.6 32558 8966 138 h23 557.36 23.8 12728 2291 276 1 h24 2259.2 120 74061 12942 348 1 14.52 h25 462.22 32.4 28886 6101 134 h26 1212.1 97.3 74194 12681 342 h27 2391.94 192 89843 18396 336 1 229.19 h28 1637 162 80468 21345 415 h29 501 37.9 26813 4594 166 1 h30 412.1 40.2 23217 6044 160 1 h31 738.56 27 11514 3052 144 1 h32 414.1 35.7 55611 4354 200 h33 1097 105 59443 13101 242 1 26.32 h34 742 62.8 42542 8739 172 h35 1010 97.1 47246 12073 269 1 1.1 h36 440.6 34.2 30773 4305 201 h37 1203.3 85.4 50710 11470 247 1 13.82 h38 2558.01 195 128450 20441 571 1 5.42 h39 215.45 8.409936 65743 578 238 h40 599.3 30.4 23299 5338 173 h41 480.55 29.5 34279 6560 169 1 h42 634.51 29.9 27157 5198 141 h43 1211.9 91.4 90008 17666 320 1 6.25 h44 285.5 23.9 16473 2873 135 h45 1030.36 67.1 43486 9467 235 1 6.44 h46 1374.81 95.5 74279 11862 284 h47 953.56 49.8 47934 10553 207 h48 561.11 41.7 24800 5498 132 h49 644 57.1 39663 8604 260 h50 376.55 19.6 22003 4759 143 h51 404.79 32.8 27566 7871 190 1 h52 397.9 29.4 26072 4248 170 h53 374.2 3.944649 4179 819 156 h54 1702 100 114603 17235 438 1 11.81 h55 148.09 5.013379 51660 771 172 h56 253.48 16.9 17599 4044 178 h57 1445.68 99.3 81041 12912 475 1 17.53 h58 414.1 26.5 20432 4068 129 h59 642.58 48.5 42733 5983 181 1 h60 203.75 13 16923 3467 146 1 h61 421.8 18.3 16179 2840 160 h62 320.62 17.3 18882 3370 160 h63 679.79 25.6 27561 4447 308 1 11.33 h64 2382 226 166559 26019 787 1 7.08 h65 559.29 58.1 40534 8806 342 1 h66 568.15 35 37120 7242 158 h67 2408.04 155 70392 9538 266 1 111.33 h68 632.34 54.6 37228 6359 175 h69 917.22 55.2 42135 7294 215 h70 554.34 56.9 32352 3320 205 1 1 h71 780 75.9 39213 7154 172 h72 663.82 56.9 34180 5284 200 h73 1424 146 107457 18198 432 1 2.75 h74 313 20.7 20110 5967 165 1 h75 778 78.4 51496 12302 390 h76 863.37 62 50957 10557 228 h77 3509.12 290 109673 19213 469 1 290.53 h78 1593.82 152 82400 17707 474 1 11.64 h79 466 40.1 30647 7265 164 1 h80 666.38 48.2 28048 5182 153 h81 998.8 121 45513 6855 238 1 88.86 h82 1018 98.2 61176 11386 350 h83 3238.28 326 122118 19068 514 1 146.33 h84 1431.1 107 48900 10623 208 h85 1735.99 273 84118 16458 278 1 158.4 h86 1769 190 105741 19256 478 1 0.93 h87 484.56 36.2 24070 6464 125 h88 204.7 13.9 28137 1615 135 1 h89 1706.58 287 75153 13465 367 1 91.56 h90 1029.11 71.9 49993 6690 252 1 4 h91 1167.2 111 75004 21334 350 h92 1657.58 116 77753 17528 413 h93 1017.16 88.5 64147 11135 316 h94 1532.7 153 99998 17391 395 1 4.8 h95 1462 113 119107 16053 484 1 0.5 h96 1133.8 109 55540 15566 355 1 8.51 h97 609 48.2 71817 5639 376 1 1 h98 301.31 20.2 43214 2153 141 h99 1930.08 201 87197 19315 418 h100 1573.3 177 88124 19661 458 1 69.71 ; data(i,'CONST') = 1; *------------------------------------------------------------------------------- * PHASE 1: Estimation of b(j) * * Run standard Constant Returns to Scale (CCR) Input-oriented DEA model * followed by linear regression OLS estimation *------------------------------------------------------------------------------- * * this is the standard DEA model * instead of 100 small models we solve one big model, see * http://www.gams.com/~erwin/dea/dea.pdf * parameter x(inp,i) 'inputs of DMU i' y(outp,i) 'outputs of DMU i' ; alias(i,j0); positive variables v(inp,j0) 'input weights' u(outp,j0) 'output weights' ; variable eff(j0) 'efficiency' z 'objective variable' ; equations objective(j0) 'objective function: maximize efficiency' normalize(j0) 'normalize input weights' limit(i,j0) "limit other DMU's efficiency" totalobj ; totalobj.. z =e= sum(j0, eff(j0)); objective(j0).. eff(j0) =e= sum(outp, u(outp,j0)*y(outp,j0)); normalize(j0).. sum(inp, v(inp,j0)*x(inp,j0)) =e= 1; limit(i,j0).. sum(outp, u(outp,j0)*y(outp,i)) =l= sum(inp, v(inp,j0)*x(inp,i)); model dea /totalobj,objective, normalize, limit/; alias (i,iter); x(inp,i) = data(i,inp); y(outp,i) = data(i,outp); option limrow=0; option limcol=0; dea.solprint=2; dea.solvelink=2; option lp=cplex; solve dea using lp maximizing z; abort$(dea.modelstat<>1) "LP was not optimal"; display "------------------------------------ DEA MODEL ------------------------", eff.l; * * now solve the regression problem * efficiency = b0 + b1*BEDS + b2*FORPROF + b3*TEACH + b4*RES * See http://www.gams.com/~erwin/regression/regression.pdf * set e(j) 'explanatory variables' /BEDS,FORPROF,TEACH,RES,CONST/; parameter Xmat(i,e) 'regression data matrix'; Xmat(i,e) = data(i,e); variable sse 'sum of squared errors' b(e) 'coefficients to be estimated' ; equation sumsq 'dummy objective function' fit(i) 'regression equation' ; sumsq.. sse =n= 0; fit(i).. eff.l(i) =e= sum(e, Xmat(i,e)*b(e)); model regression /sumsq,fit/; regression.solprint=0; regression.solvelink=2; option lp=ls; solve regression using lp minimizing sse; * * standard errors and pvalues * parameters bhat(e) 'estimates' se(e) 'standard error' tval(e) 't-values' pval(e) 'p-values: Pr(>|t|)' ; bhat(e) = b.l(e); se(e) = b.m(e); execute_load 'ls.gdx',tval,pval; parameter ols(e,*); ols(e,'estimates') = bhat(e); ols(e,'std.error') = se(e); ols(e,'t value') = tval(e); ols(e,'p value') = pval(e); display "------------------------------------ OLS MODEL ------------------------", ols; *------------------------------------------------------------------------------- * PHASE 2: BOOTSTRAP algorithm *------------------------------------------------------------------------------- set s 'sample' /sample1*sample2000/; parameter bs(s,i) 'bootstrap sample'; bs(s,i) = trunc( uniform(1,card(i)+0.999999999) ); *display bs; * sanity check: loop((s,i), abort$(bs(s,i)<1) "Check bs for entries < 1"; abort$(bs(s,i)>card(i)) "Check bs for entries > card(i)"; ); alias(i,ii); set mapbs(s,i,ii); mapbs(s,i,ii)$(bs(s,i) = ord(ii)) = yes; * this mapping says the i'th sample data record is the ii'th record * in the original data (for sample s) loop((s,i), abort$(sum(mapbs(s,i,ii),1)<>1) "mapbs is not unique"; ); parameter data_sample(i,j); parameter sb(s,e) 'b(e) for each sample s'; * reduce printing to listing file: regression.solprint=2; loop(s, * * solve dea model * data_sample(i,j) = sum(mapbs(s,i,ii),data(ii,j)); x(inp,i) = data_sample(i,inp); y(outp,i) = data_sample(i,outp); option lp=cplex; solve dea using lp maximizing z; abort$(dea.modelstat<>1) "LP was not optimal"; * * solve OLS model * Xmat(i,e) = data_sample(i,e); option lp=ls; solve regression using lp minimizing sse; sb(s,e) = b.l(e); ); * * get statistics * parameter bbar(e) "Averaged estimates"; bbar(e) = sum(s, sb(s,e)) / card(s); parameter sehat(e) "Standard errors of bootstrap algorithm"; sehat(e) = sqrt(sum(s, sqr(sb(s,e)-bbar(e)))/(card(s)-1)); parameter tbootstrap(e) "t statistic for bootstrap"; tbootstrap(e) = bhat(e)/sehat(e); scalar df 'degrees of freedom'; df = card(i) - (card(e) - 1) - 1; parameter pbootstrap(e) "p-values for bootstrap"; * * pvalue = 2 * pt( abs(tvalue), df) * = 2 * 0.5 * pbeta( df / (df + sqr(abs(tvalue))), df/2, 0.5) * = betareg( df / (df+sqr(tvalue)), df/2, 0.5) * pbootstrap(e) = betareg( df / (df+sqr(tbootstrap(e))), df/2, 0.5); parameter bootstrap(e,*); bootstrap(e,'estimates') = bhat(e); bootstrap(e,'std.error') = sehat(e); bootstrap(e,'t value') = tbootstrap(e); bootstrap(e,'p value') = pbootstrap(e); display "------------------------------------ BOOTSTRAP MODEL ------------------------", bootstrap;