|
Sample Balancing for
Extremely Small Population Response Rates Bruce Ratner, Ph.D. DATA sample_data; do j = 1 to 1500000; if j le 10000 then RESPONSE=1; else RESPONSE=0; output; end; RUN; title1' sample_data RESPONSE rate '; PROC FREQ; table RESPONSE; RUN; PROC MEANS data=sample_data; var RESPONSE; output out=sam_CNTS (drop=_TYPE_ _FREQ_) mean=pop_RespRate sum=sam_RR; RUN; DATA sam_CNTS; set sam_CNTS; m=1; RUN; DATA nr rr; set sample_data; uni=uniform(12345); m=1; if RESPONSE=0 then output nr; if RESPONSE=1 then output rr; RUN; PROC SORT data=nr ; by uni; RUN; DATA nr; merge nr sam_CNTS;by m; sam_NR=sam_RR*5; if _n_ le (sam_NR); RUN; DATA nr_rr; set nr rr; pop_NRespRate=1-pop_RespRate; _r=compress(put(pop_RespRate,8.7),'.'); _n=compress(put(pop_NRespRate,8.7),'.'); pop_RR=input(_r,10.); pop_NR=input(_n,10.); m=1; RUN; DATA sample_down; merge nr_rr nr; by m; WT=1; if RESPONSE=0 then WT=int( (sam_RR/sam_NR)/(pop_RR/pop_NR) ); RUN; title1' sample_down RESPONSE rate'; PROC MEANS data=sample_down;var RESPONSE; freq WT; RUN; 1 800 DM STAT-1, or e-mail at br@dmstat1.com. |
|