vandeWiel.c

Go to the documentation of this file.
00001 
00015 #include <R.h>
00016 #include <Rmath.h>
00017 #include <Rdefines.h>
00018                     
00019 
00031 typedef struct {
00032     long length;
00033     double *c;
00034     double *x;
00035 } celW;
00036 
00037 double binomi(int m, int n) { 
00038 
00039     double bin = 1;
00040     double bin1 = 1;
00041     double bin2 = 1;
00042     int i, j;
00043         
00044     for (i = 1; i <= n; i++) bin1 = bin1 * (m + 1 -i);
00045     for (j = 1; j <= n; j++) bin2 = bin2 * j;
00046     bin = bin1/bin2;
00047     
00048     return(bin);
00049 }
00050 
00051 celW** reserveW(int a, int b)
00052 {
00053     long res = 0;
00054     int i, j;
00055     celW** W;
00056     
00057     /* <FIXME>: need to free memory in case Calloc barfs
00058        Writing R Extension advertises .on.exit but
00059        I still need a pointer to the memory.
00060        </FIXME>
00061     */
00062 
00063     W = Calloc(a + 1, celW*);
00064 
00065     for (i = 0; i <= a; i++)
00066         W[i] = Calloc(b + 1, celW);
00067         
00068     for (i = 0; i <= a; i++) {
00069         for (j = i; j <= b; j++) {
00070             res = (long) binomi(j,i);
00071             /* the majority of memory is freed on exit and error
00072                thanks to S_alloc */
00073             W[i][j].c = (double *) S_alloc(res, sizeof(double));
00074             W[i][j].x = (double *) S_alloc(res, sizeof(double));
00075         }
00076         R_CheckUserInterrupt();
00077     }
00078     return(W);
00079 }
00080 
00081 void FreeW(int a, celW **W)
00082 {
00083      int i;
00084  
00085      for (i = a; i >= 0; i--)
00086          Free(W[i]);
00087          
00088      Free(W);
00089 }
00090 
00091 void initW(int a, int b, celW **W) {
00092 
00093     int i, j;
00094 
00095     for (i = 1; i <= a; i++)
00096     for (j = 0; j <= b; j++) {
00097         W[i][j].length = 0;
00098     }
00099     for (j = 0; j <= b; j++) {
00100         W[0][j].length = 1;
00101         W[0][j].c[0] = 1;  
00102         W[0][j].x[0] = 0;  
00103     }
00104 }
00105 
00106 void mult(celW *tem, int a, int b, int rank, double *rs) {
00107 
00108     /*
00109 
00110     mult multiplies the polynomial c_i*x^(l_i) by x^(rs[rank]), 
00111     which means adding the exponents
00112 
00113     */
00114 
00115     int j;
00116     for (j = 0; j < tem[0].length; j++)
00117         tem[0].x[j] += rs[rank];
00118 }
00119 
00120 void plus(celW **W, celW *tempie, int a, int b) {
00121 
00122     /*
00123 
00124     plus adds terms with the same exponents after multiplication 
00125     with 1 + x^(rs[rank]), so c1*x^j + c2*x^j becomes (c1+c2)*x^j
00126 
00127     */
00128 
00129     int elep = 0;
00130     int k = 0;
00131     int test = 1;
00132     int i, j;
00133     
00134     for (i = 0; i < W[a][b-1].length; i++) {
00135 
00136         test = 1;
00137         
00138         for (j = elep; j < tempie[0].length && test==1; j++) {
00139         
00140             if (tempie[0].x[j] - 0.000001 <= W[a][b-1].x[i]
00141                 && W[a][b-1].x[i] <= tempie[0].x[j] + 0.000001) {
00142 
00143                 tempie[0].c[j] += W[a][b-1].c[i];
00144                 test = 0;
00145                 elep = j;             
00146             }
00147         }
00148          
00149         if (test == 1) {
00150             tempie[0].c[tempie[0].length + k] = W[a][b-1].c[i];
00151             tempie[0].x[tempie[0].length + k] = W[a][b-1].x[i];
00152             k++;
00153         }
00154         R_CheckUserInterrupt();
00155     }
00156     tempie[0].length += k;
00157 }
00158 
00159 void mymergesort(celW temptw, long tijd)
00160 {
00161 
00162     /*
00163     
00164     mymergesort composes one sorted list (increasing exponents of 
00165     the polynomial) from two separately sorted lists. c1*x^3 + c2*x^5 
00166     and  c3*x^4 + c4*x^7  becomes  c1*x^3 + c3*x^4 + c2*x^5 + c4*x^7.
00167 
00168     */
00169 
00170     celW copiep;
00171     int t1 = 0; 
00172     int t2 = 0;
00173     int i, j;
00174 
00175     copiep.c = Calloc(temptw.length, double);
00176     copiep.x = Calloc(temptw.length, double);
00177         
00178     for (i = 0; i < temptw.length; i++) {
00179         copiep.c[i] = temptw.c[i];
00180         copiep.x[i] = temptw.x[i];
00181     }
00182     
00183     for (j = 0; j < temptw.length; j++) {
00184         if (t1 <= tijd-1 && t2 <= temptw.length - tijd - 1) {
00185             if (copiep.x[t1] < copiep.x[tijd + t2]) {
00186                 temptw.x[j] = copiep.x[t1];
00187                 temptw.c[j] = copiep.c[t1];
00188                 t1++;
00189             } else {
00190                 temptw.x[j] = copiep.x[tijd + t2];
00191                 temptw.c[j] = copiep.c[tijd + t2];
00192                 t2++;
00193             }
00194         } else {
00195             if (t1 > tijd - 1) {
00196                 temptw.x[j] = copiep.x[tijd + t2];
00197                 temptw.c[j] = copiep.c[tijd + t2];
00198                 t2++; 
00199             } else {   
00200                 temptw.x[j] = copiep.x[t1];
00201                 temptw.c[j] = copiep.c[t1];
00202                 t1++;
00203             }
00204         }   
00205         R_CheckUserInterrupt();       
00206     } 
00207     Free(copiep.c);
00208     Free(copiep.x);
00209 }
00210 
00211 void fillcell(celW **W, int i1, int j1, int r, double *rs) {
00212     
00213     /*
00214 
00215     fillcell makes the new recursive polynomial W[i1][j1] from 
00216     W[i1 - 1][j1 - 1] and W[i1][j1 - 1]. j1 is the total number of 
00217     rank scores assigned so far to either of the two groups, 
00218     i1 is the number of rank scores assigned to the smallest sample.
00219 
00220     */
00221 
00222     long tijd;
00223     celW temp2;
00224     int j, j2;
00225 
00226     temp2.c = Calloc(W[i1 - 1][j1 - 1].length + 
00227                      W[i1][j1 - 1].length, double);
00228     temp2.x = Calloc(W[i1 - 1][j1 - 1].length + 
00229                      W[i1][j1 - 1].length, double);
00230     temp2.length = W[i1 - 1][j1 - 1].length;
00231 
00232     for (j = 0; j < temp2.length; j++) {
00233        temp2.c[j] = W[i1 - 1][j1 - 1].c[j];
00234        temp2.x[j] = W[i1 - 1][j1 - 1].x[j];
00235     }
00236 
00237     if (i1 == j1) {       
00238         mult(&temp2, i1 - 1, j1 - 1, r, rs); 
00239     } else {           
00240         mult(&temp2, i1 - 1, j1 - 1, r, rs);                        
00241         tijd = temp2.length;                                
00242         plus(W, &temp2, i1, j1);                            
00243         mymergesort(temp2, tijd);                              
00244     }
00245 
00246     W[i1][j1].length = temp2.length;
00247 
00248     for (j2 = 0; j2 < temp2.length; j2++) {
00249         W[i1][j1].c[j2] = temp2.c[j2];
00250         W[i1][j1].x[j2] = temp2.x[j2];
00251     }          
00252 
00253     Free(temp2.c);
00254     Free(temp2.x);
00255 }
00256 
00257 void mirrorW(celW **W,int ce, int bep, int start, double *rs) {   
00258 
00259     /* 
00260 
00261     mirrorW contains a trick to speed op computations considerably. 
00262     By symmetry arguments it is easy to find W[i][tot] from W[tot-i][tot]
00263 
00264     */
00265 
00266     double totsum = 0;
00267     long len;
00268     int r, h;
00269     
00270     for (r = 0; r < bep; r++) totsum += rs[start + r];
00271     
00272     len = W[bep-ce][bep].length;
00273         
00274     for (h = 0; h < len; h++) {
00275         W[ce][bep].length = W[bep-ce][bep].length;
00276         W[ce][bep].c[len-1-h] = W[bep-ce][bep].c[h];
00277         W[ce][bep].x[len-1-h] = totsum - W[bep-ce][bep].x[h];
00278     }
00279 }
00280 
00281 void makeW(celW **W, int a, int b, int start, double *rs) {
00282 
00283 
00284     /* 
00285 
00286     makeW simply determines whether a new polynomial W[i][j] 
00287     can be found from mirrorW (if W[j-i][j] is available) or needs 
00288     to be constructed via multiplication etc.
00289 
00290     */
00291 
00292     long i,j;
00293     int rank;
00294     int hulp;
00295 
00296     for (j = 1; j <= b; j++) {  /* verander naar 0!! */
00297 
00298         if (j < a) {
00299             hulp = j; 
00300         } else {
00301             hulp = a;
00302         }
00303 
00304         for (i=1; i <= hulp; i++) {   
00305             if (i <= j/2 || j == 1) {
00306                 rank = start+j;
00307                 fillcell(W, i, j, rank - 1, rs);
00308             } else {
00309                 mirrorW(W, i, j, start, rs);
00310             }                               
00311             R_CheckUserInterrupt();
00312         }
00313     }
00314 }
00315 
00316 void cumulcoef(celW **W, int i1, int j1) {
00317 
00318 
00319     /*
00320 
00321     cumulcoef recursively adds the coefficients of the 
00322     sorted polynomial. So, 3*x^4 + 4*x^6 + 2*x^7  becomes  
00323     3*x^4 + 7*x^6 + 9*x^7.  
00324 
00325     */
00326     double coef = 0;
00327     int i;
00328      
00329     for(i = 0; i < W[i1][j1].length; i++) {
00330         W[i1][j1].c[i] += coef;
00331         coef = W[i1][j1].c[i];
00332     }
00333 }
00334 
00335 double numbersmall(int c, int b, double ob, celW **W1, celW **W2) {
00336 
00337 
00338     /*
00339 
00340     numbersmall is the core of the split-up algorithm. 
00341     
00342     It efficiently combines two polynomials which have used 
00343     complementary sets of rank scores and computes their contribution 
00344     to the tail-probability. 
00345 
00346     */
00347 
00348     double tot = 0;
00349     long le;
00350     int test = 1;
00351     int be = b/2;
00352     int bp = (b+1)/2;
00353     int tempel = 0;
00354     int i, j, h;
00355     
00356     for (h = 0; h <= c; h++) {
00357 
00358         tempel = 0;
00359         le = W2[c-h][bp].length;
00360         
00361         for (i = 0; i < W1[h][be].length; i++) {
00362             test = 1;
00363             for (j = tempel; j < le && test == 1; j++) {
00364                 if (W1[h][be].x[i] + W2[c-h][bp].x[le-j-1] <= ob) {
00365                     tot += W1[h][be].c[i] * W2[c - h][bp].c[le - j -1];
00366                     tempel = j;
00367                     test = 0;
00368                 }
00369             }
00370         }
00371     }
00372     return(tot);
00373 }
00374        
00375 SEXP R_split_up_2sample(SEXP scores, SEXP m, SEXP obs) {
00376         
00377     /*
00378 
00379     R interface to the split-up algorithm. 
00380 
00381     `scores' is a REAL vector giving the scores of the total sample 
00382     and `m' is a scalar integer with the sample size of one group.
00383     `obs' is the scalar observed test statistic, namely the
00384     sum of the `m' scores measured in one group.
00385 
00386     */
00387 
00388     int b, c, d, u;
00389     double tot, bino, prob;
00390     double ob;  
00391     SEXP ans;
00392 
00393     celW **W1;
00394     celW **W2;
00395     double *rs;
00396 
00397     b = LENGTH(scores);
00398     rs = REAL(scores);
00399     c = INTEGER(m)[0];
00400     d = b - INTEGER(m)[0];
00401     ob = REAL(obs)[0];
00402 
00403     /* total number of possible permutations */
00404     bino = binomi(b, c);
00405 
00406     /* allocate and initialise memory */
00407     W1 = reserveW(c, (b+1)/2);
00408     initW(c, (b+1)/2, W1);
00409     W2 = reserveW(c, (b+1)/2);
00410     initW(c, (b+1)/2, W2);
00411     
00412     makeW(W1, c, b/2, 0, rs);  
00413     makeW(W2, c, (b+1)/2, b/2, rs);
00414 
00415     for (u = 0; u <= c; u++) cumulcoef(W2, u, (b+1)/2);
00416 
00417     /* number of permutations <= ob */
00418     tot = numbersmall(c, b, ob, W1, W2);
00419     
00420     /* probability */
00421     prob = tot/bino; 
00422     
00423     /* free memory: this will _not_ take place 
00424        in case of an error */
00425     FreeW(c, W1);
00426     FreeW(c, W2);
00427 
00428     /* return to R */
00429     PROTECT(ans = allocVector(REALSXP, 1));
00430     REAL(ans)[0] = prob;
00431     UNPROTECT(1);
00432     return(ans);
00433 }

Generated on Tue Apr 24 16:56:51 2007 for coin by  doxygen 1.4.6