Utils.c

Go to the documentation of this file.
00001 
00009 #include "party.h"
00010                 
00011                 
00023 void C_kronecker (const double *A, const int m, const int n,
00024                   const double *B, const int r, const int s,
00025                   double *ans) {
00026 
00027     int i, j, k, l, mr, js, ir;
00028     double y;
00029 
00030     mr = m * r;
00031     for (i = 0; i < m; i++) {
00032         ir = i * r;
00033         for (j = 0; j < n; j++) {
00034             js = j * s;
00035             y = A[j*m + i];
00036             for (k = 0; k < r; k++) {
00037                 for (l = 0; l < s; l++) {
00038                     ans[(js + l) * mr + ir + k] = y * B[l * r + k];
00039                 }
00040             }
00041         }
00042     }
00043 }  
00044 
00045 
00052 SEXP R_kronecker (SEXP A, SEXP B) {
00053 
00054     /*  The Kronecker product, a real (mr x ns) matrix */
00055     SEXP ans; 
00056     int *adim, *bdim;
00057 
00058     if (!isReal(A) || !isReal(B)) 
00059         error("R_kronecker: A and B are not of type REALSXP");
00060 
00061     if (isMatrix(A)) {
00062         adim = INTEGER(getAttrib(A, R_DimSymbol));
00063     } else {
00064         /* assume row vectors */
00065         adim = Calloc(2, int);
00066         adim[0] = 1;
00067         adim[1] = LENGTH(A);
00068     }
00069     
00070     if (isMatrix(B)) {
00071         bdim = INTEGER(getAttrib(B, R_DimSymbol));
00072     } else {
00073         /* assume row vectors */
00074         bdim = Calloc(2, int);
00075         bdim[0] = 1;
00076         bdim[1] = LENGTH(B);
00077     }
00078 
00079     PROTECT(ans = allocMatrix(REALSXP, 
00080                               adim[0] * bdim[0], 
00081                               adim[1] * bdim[1]));
00082     C_kronecker(REAL(A), adim[0], adim[1], 
00083                 REAL(B), bdim[0], bdim[1], REAL(ans));
00084     if (!isMatrix(A)) Free(adim); 
00085     if (!isMatrix(B)) Free(bdim);
00086     UNPROTECT(1);
00087     return(ans);
00088 }
00089 
00090 
00102 void CR_La_svd(SEXP jobu, SEXP jobv, SEXP x, SEXP s, SEXP u, SEXP v,
00103                SEXP method)
00104 {
00105     int *xdims, n, p, lwork, info = 0;
00106     double *work, *xvals, tmp;
00107     /* const char * meth; not used*/
00108 
00109     if (!(isString(jobu) && isString(jobv)))
00110         error(("'jobu' and 'jobv' must be character strings"));
00111     if (!isString(method))
00112         error(("'method' must be a character string"));
00113     /* meth = CHAR(STRING_ELT(method, 0)); not used */
00114     xdims = INTEGER(coerceVector(getAttrib(x, R_DimSymbol), INTSXP));
00115     n = xdims[0]; p = xdims[1];
00116     xvals = Calloc(n * p, double);
00117     /* work on a copy of x */
00118     Memcpy(xvals, REAL(x), (size_t) (n * p));
00119 
00120     {
00121         int ldu = INTEGER(getAttrib(u, R_DimSymbol))[0],
00122             ldvt = INTEGER(getAttrib(v, R_DimSymbol))[0];
00123         int *iwork= (int *) R_alloc(8*(n<p ? n : p), sizeof(int));
00124 
00125         /* ask for optimal size of work array */
00126         lwork = -1;
00127         F77_CALL(dgesdd)(CHAR(STRING_ELT(jobu, 0)),
00128                          &n, &p, xvals, &n, REAL(s),
00129                          REAL(u), &ldu,
00130                          REAL(v), &ldvt,
00131                          &tmp, &lwork, iwork, &info);
00132         if (info != 0)
00133             error(("error code %d from Lapack routine '%s'"), info, "dgesdd");
00134         lwork = (int) tmp;
00135         work = Calloc(lwork, double);
00136         F77_CALL(dgesdd)(CHAR(STRING_ELT(jobu, 0)),
00137                          &n, &p, xvals, &n, REAL(s),
00138                          REAL(u), &ldu,
00139                          REAL(v), &ldvt,
00140                          work, &lwork, iwork, &info);
00141         if (info != 0)
00142             error(("error code %d from Lapack routine '%s'"), info, "dgesdd");
00143     }
00144     Free(work); Free(xvals);
00145 }
00146 
00153 void C_svd (SEXP x, SEXP svdmem) {
00154 
00155     int p, i;
00156     double *du, *dv;
00157 
00158     if (!isMatrix(x) || !isReal(x))
00159         error("x is not a real matrix");
00160 
00161     du = REAL(GET_SLOT(svdmem, PL2_uSym));
00162     dv = REAL(GET_SLOT(svdmem, PL2_vSym));
00163     p = INTEGER(GET_SLOT(svdmem, PL2_pSym))[0];
00164     if (nrow(x) != p) error("svd p x error");
00165     for (i = 0; i < p*p; i++) {
00166         du[i] = 0.0;
00167         dv[i] = 0.0;
00168     }
00169     CR_La_svd(GET_SLOT(svdmem, PL2_jobuSym), 
00170         GET_SLOT(svdmem, PL2_jobvSym), x, GET_SLOT(svdmem, PL2_sSym), 
00171         GET_SLOT(svdmem, PL2_uSym), GET_SLOT(svdmem, PL2_vSym), 
00172         GET_SLOT(svdmem, PL2_methodSym));
00173     /* return(R_NilValue); */
00174 }
00175 
00182 SEXP R_svd (SEXP x, SEXP svdmem) {
00183 
00184     C_svd(x, svdmem);
00185     return(R_NilValue);
00186 }
00187 
00188 
00197 void C_MPinv (SEXP x, double tol, SEXP svdmem, SEXP ans) {
00198 
00199     SEXP d, u, vt;
00200     int i, j, p, k, *positive;
00201     double *dd, *du, *dvt, *dMPinv;
00202     double *drank;
00203     
00204     drank = REAL(GET_SLOT(ans, PL2_rankSym));
00205     dMPinv = REAL(GET_SLOT(ans, PL2_MPinvSym));
00206 
00207     C_svd(x, svdmem);
00208     d = GET_SLOT(svdmem, PL2_sSym);
00209     dd = REAL(d);
00210     u = GET_SLOT(svdmem, PL2_uSym);
00211     du = REAL(u);
00212     vt = GET_SLOT(svdmem, PL2_vSym);
00213     dvt = REAL(vt);
00214     p = LENGTH(d);
00215 
00216     if (tol * dd[0] > tol) tol = tol * dd[0];
00217 
00218     positive = Calloc(p, int); 
00219     
00220     drank[0] = 0.0;
00221     for (i = 0; i < p; i++) {
00222         if (dd[i] > tol) {
00223             positive[i] = 1;
00224             drank[0] += 1.0;
00225         } 
00226     }
00227     
00228     for (j = 0; j < p; j++) {
00229         if (positive[j]) {
00230             for (i = 0; i < p; i++)
00231                 du[j * p + i] *= (1 / dd[j]);
00232         }
00233     }
00234     
00235     for (i = 0; i < p; i++) {
00236         for (j = 0; j < p; j++) {
00237             dMPinv[j * p + i] = 0.0;
00238             for (k = 0; k < p; k++) {
00239                 if (positive[k])
00240                     dMPinv[j * p + i] += dvt[i * p + k] * du[p * k + j]; 
00241             }
00242         }
00243     }
00244 
00245     Free(positive);
00246 }
00247 
00255 SEXP R_MPinv (SEXP x, SEXP tol, SEXP svdmem) {
00256 
00257     SEXP ans;
00258     int p;
00259 
00260     if (!isMatrix(x) || !isReal(x))
00261         error("R_MPinv: x is not a real matrix");
00262 
00263     if (nrow(x) != ncol(x)) 
00264         error("R_MPinv: x is not a square matrix");
00265 
00266     if (!isReal(tol) || LENGTH(tol) != 1)
00267         error("R_MPinv: tol is not a scalar real");
00268     
00269     p = nrow(x);
00270     if (p != INTEGER(GET_SLOT(svdmem, PL2_pSym))[0])
00271         error("R_MPinv: dimensions don't match");
00272 
00273     PROTECT(ans = NEW_OBJECT(MAKE_CLASS("LinStatExpectCovarMPinv")));
00274     SET_SLOT(ans, PL2_MPinvSym, PROTECT(allocMatrix(REALSXP, p, p)));
00275     SET_SLOT(ans, PL2_rankSym, PROTECT(allocVector(REALSXP, 1)));
00276     
00277     C_MPinv(x, REAL(tol)[0], svdmem, ans);
00278     
00279     UNPROTECT(3);
00280     return(ans);
00281 }
00282 
00290 double C_max(const double *x, const int n) {
00291    double tmp = 0.0;
00292    int i;
00293    
00294    for (i = 0; i < n; i++) {
00295        if (x[i] > tmp) tmp = x[i];
00296    }
00297    return(tmp);
00298 }
00299 
00300 
00306 SEXP R_max(SEXP x) {
00307 
00308     SEXP ans;
00309     int n;
00310     
00311     if (!isReal(x)) 
00312         error("R_max: x is not of type REALSXP");
00313     n = LENGTH(x);
00314     PROTECT(ans = allocVector(REALSXP, 1));
00315     REAL(ans)[0] = C_max(REAL(x), n);
00316     UNPROTECT(1);
00317     return(ans);
00318 }
00319 
00320 
00327 void C_abs(double *x, int n) {
00328 
00329     int i;
00330     for (i = 0; i < n; i++) x[i] = fabs(x[i]);
00331 }
00332 
00333 
00339 SEXP R_abs(SEXP x) {
00340 
00341     SEXP ans;
00342     int n;
00343     
00344     if (!isReal(x)) 
00345         error("R_max: x is not of type REALSXP");
00346     n = LENGTH(x);
00347     PROTECT(ans = duplicate(x));
00348     C_abs(REAL(ans), n);
00349     UNPROTECT(1);
00350     return(ans);
00351 }
00352 
00353 
00365 void C_matprod(double *x, int nrx, int ncx,
00366                double *y, int nry, int ncy, double *z)
00367 {
00368     char *transa = "N", *transb = "N";
00369     double one = 1.0, zero = 0.0;
00370     int i;
00371 
00372     if (nrx > 0 && ncx > 0 && nry > 0 && ncy > 0) {
00373         F77_CALL(dgemm)(transa, transb, &nrx, &ncy, &ncx, &one,
00374                         x, &nrx, y, &nry, &zero, z, &nrx);
00375     } else /* zero-extent operations should return zeroes */
00376         for(i = 0; i < nrx*ncy; i++) z[i] = 0;
00377 }
00378 
00379 
00386 SEXP R_matprod(SEXP x, SEXP y) {
00387 
00388     SEXP ans;
00389     
00390     int nrx, ncx, nry, ncy;
00391     
00392     nrx = nrow(x);
00393     ncx = ncol(x);
00394     nry = nrow(y);
00395     ncy = ncol(y);
00396 
00397     if (ncx != nry)
00398         error("R_matprod: dimensions don't match");
00399     PROTECT(ans = allocMatrix(REALSXP, nrx, ncy));
00400     C_matprod(REAL(x), nrx, ncx, REAL(y), nry, ncy, REAL(ans));
00401     UNPROTECT(1);
00402     return(ans);
00403 }
00404 
00405 
00417 void C_matprodT(double *x, int nrx, int ncx,
00418                 double *y, int nry, int ncy, double *z)
00419 {
00420     char *transa = "N", *transb = "T";
00421     double one = 1.0, zero = 0.0;
00422     int i;
00423 
00424     if (nrx > 0 && ncx > 0 && nry > 0 && ncy > 0) {
00425         F77_CALL(dgemm)(transa, transb, &nrx, &nry, &ncy, &one,
00426                         x, &nrx, y, &nry, &zero, z, &nrx);
00427     } else /* zero-extent operations should return zeroes */
00428         for(i = 0; i < nrx*nry; i++) z[i] = 0;
00429 }
00430 
00431 
00438 SEXP R_matprodT(SEXP x, SEXP y) {
00439 
00440     SEXP ans;
00441     int nrx, ncx, nry, ncy;
00442     
00443     nrx = nrow(x);
00444     ncx = ncol(x);
00445     nry = nrow(y);
00446     ncy = ncol(y);
00447 
00448     if (ncx != ncy)
00449         error("R_matprod: dimensions don't match");
00450     PROTECT(ans = allocMatrix(REALSXP, nrx, nry));
00451     C_matprodT(REAL(x), nrx, ncx, REAL(y), nry, ncy, REAL(ans));
00452     UNPROTECT(1);
00453     return(ans);
00454 }
00455 
00456 
00465 void C_SampleNoReplace(int *x, int m, int k, int *ans) {
00466      
00467     int i, j, n = m;
00468 
00469     for (i = 0; i < m; i++)
00470         x[i] = i;
00471     for (i = 0; i < k; i++) {
00472         j = n * unif_rand(); 
00473         ans[i] = x[j];
00474         x[j] = x[--n];  
00475     }
00476 }
00477 
00478 
00484 SEXP R_permute(SEXP m) {
00485     
00486     SEXP x, ans;
00487     int n;
00488     
00489     n = INTEGER(m)[0];
00490     PROTECT(x = allocVector(INTSXP, n));
00491     PROTECT(ans = allocVector(INTSXP, n));
00492     C_SampleNoReplace(INTEGER(x), n, n, INTEGER(ans));
00493     UNPROTECT(2);
00494     return(ans);
00495 }
00496 
00497 
00504 SEXP R_rsubset(SEXP m, SEXP k) {
00505     
00506     SEXP x, ans;
00507     int n, j;
00508     
00509     n = INTEGER(m)[0];
00510     j = INTEGER(k)[0];
00511     PROTECT(x = allocVector(INTSXP, n));
00512     PROTECT(ans = allocVector(INTSXP, j));
00513     C_SampleNoReplace(INTEGER(x), n, j, INTEGER(ans));
00514     UNPROTECT(2);
00515     return(ans);
00516 }
00517 
00518 /* Unequal probability sampling; without-replacement case */
00519 
00520 void C_ProbSampleNoReplace(int n, double *p, int *perm,
00521                            int nans, int *ans)
00522 {
00523     double rT, mass, totalmass;
00524     int i, j, k, n1;
00525 
00526     /* Record element identities */
00527     for (i = 0; i < n; i++)
00528         perm[i] = i + 1;
00529 
00530     /* Sort probabilities into descending order */
00531     /* Order element identities in parallel */
00532     revsort(p, perm, n);
00533 
00534     /* Compute the sample */
00535     totalmass = 1;
00536     for (i = 0, n1 = n-1; i < nans; i++, n1--) {
00537         rT = totalmass * unif_rand();
00538         mass = 0;
00539         for (j = 0; j < n1; j++) {
00540             mass += p[j];
00541             if (rT <= mass)
00542                 break;
00543         }
00544         ans[i] = perm[j];
00545         totalmass -= p[j];
00546         for(k = j; k < n1; k++) {
00547             p[k] = p[k + 1];
00548             perm[k] = perm[k + 1];
00549         }
00550     }
00551 }
00552 
00553 
00561 int i_in_set(int i, int *iset, int p) {
00562 
00563     int j, is = 0;
00564         
00565     if (p == 0) return(0);
00566                     
00567     for (j = 0; j < p; j++) {
00568         if (iset[j] == i) {  
00569             is = 1;
00570             break; 
00571         }
00572     }
00573     return(is);
00574 }
00575 
00576 int C_i_in_set(int i, SEXP set) {
00577     if (LENGTH(set) > 0)
00578         return(i_in_set(i, INTEGER(set), LENGTH(set)));
00579     else 
00580         return(0);
00581 }
00582     
00583 int nrow(SEXP x) {
00584     return(INTEGER(getAttrib(x, R_DimSymbol))[0]);
00585 }
00586 
00587 int ncol(SEXP x) {
00588     return(INTEGER(getAttrib(x, R_DimSymbol))[1]);
00589 }
00590 
00591 /* compute index of variable with smallest p-value 
00592    (and largest test statistic in case two or more p-values coincide -- 
00593     should not happen anymore since we use 1 - (1 - p)^k for Bonferroni adjustment)
00594 */
00595 int C_whichmax(double *pvalue, double *teststat, int ninputs) {
00596 
00597     int ans = -1, j;
00598     double tmppval = 0.0, tmptstat = 0.0;
00599        
00600     /* <FIXME> can we switch to the log scale here? </FIXME> */
00601 
00602     tmppval = 0.0;
00603     tmptstat = 0.0;
00604     for (j = 0; j < ninputs; j++) {
00605         if (pvalue[j] > tmppval) {
00606             ans = j;
00607             tmppval = pvalue[j];
00608             tmptstat = teststat[j];
00609         } else {
00610             if (pvalue[j] == tmppval && teststat[j] > tmptstat) {  
00611                 ans = j;
00612                 tmppval = pvalue[j];
00613                 tmptstat = teststat[j];
00614             }
00615         }
00616     }
00617     return(ans);
00618 }
00619 
00620 SEXP R_whichmax(SEXP x, SEXP y) {
00621     SEXP ans;
00622     
00623     if (LENGTH(x) != LENGTH(y)) error("different length");
00624     PROTECT(ans = allocVector(INTSXP, 1));
00625     INTEGER(ans)[0] = C_whichmax(REAL(x), REAL(y), LENGTH(x));
00626     UNPROTECT(1);
00627     return(ans);
00628 }
00629 
00630 SEXP R_listplus(SEXP a, SEXP b, SEXP which) {
00631 
00632     int na, nb, i, j, *iwhich;
00633     double *dae, *dbe;
00634     SEXP ae, be;
00635 
00636     na = LENGTH(a);
00637     nb = LENGTH(b);
00638     if (na != nb) error("a and b are of different length");
00639     
00640     iwhich = LOGICAL(which);
00641     
00642     for (i = 0; i < na; i++) {
00643         if (iwhich[i]) continue;
00644         
00645         ae = VECTOR_ELT(a, i);
00646         be = VECTOR_ELT(b, i);
00647 
00648         if (LENGTH(ae) != LENGTH(be)) 
00649             error("elements %d are of different length", i);
00650             
00651         if (!isReal(ae) || !isReal(be))
00652             error("elements %d are not of type double", i);
00653             
00654         dae = REAL(ae);
00655         dbe = REAL(be);
00656         for (j = 0; j < LENGTH(ae); j++) 
00657             dae[j] += dbe[j];
00658     }
00659     return(a);
00660 }
00661 
00662 SEXP R_modify_response(SEXP x, SEXP vf) {
00663 
00664     double *src, *tar;
00665     int i, n;
00666     
00667     src = REAL(x);
00668     n = LENGTH(x);
00669 
00670     tar = REAL(get_transformation(vf, 1));
00671     for (i = 0; i < n; i++)
00672         tar[i] = src[i];
00673 
00674     tar = REAL(get_test_trafo(vf));
00675     for (i = 0; i < n; i++)
00676         tar[i] = src[i];
00677 
00678     tar = REAL(get_predict_trafo(vf));
00679     for (i = 0; i < n; i++)
00680         tar[i] = src[i];
00681 
00682     tar = REAL(get_variable(vf, 1));
00683     for (i = 0; i < n; i++)
00684         tar[i] = src[i];
00685                                           
00686     return(R_NilValue);
00687 }
00688 
00689 double F77_SUB(unifrnd)(void) { return unif_rand(); }
00690 
00691 void C_SampleSplitting(int n, double *prob, int *weights, int k) {
00692 
00693     int i;
00694     double *tmpprob;
00695     int *ans, *perm;
00696 
00697     tmpprob = Calloc(n, double);
00698     perm = Calloc(n, int);
00699     ans = Calloc(k, int);
00700     for (i = 0; i < n; i++) tmpprob[i] = prob[i];
00701 
00702     C_ProbSampleNoReplace(n, tmpprob, perm, k, ans);
00703     for (i = 0; i < n; i++) weights[i] = 0;
00704     for (i = 0; i < k; i++)
00705         weights[ans[i] - 1] = 1;
00706     Free(tmpprob); Free(perm); Free(ans);
00707 }
00708 
00714 void C_remove_weights(SEXP subtree, int removestats) {
00715 
00716     SET_VECTOR_ELT(subtree, S3_WEIGHTS, R_NilValue);
00717     
00718     if (!S3get_nodeterminal(subtree)) {
00719         if (removestats) {
00720             SET_VECTOR_ELT(VECTOR_ELT(subtree, S3_CRITERION), 
00721                            S3_iCRITERION, R_NilValue);
00722             SET_VECTOR_ELT(VECTOR_ELT(subtree, S3_CRITERION), 
00723                            S3_STATISTICS, R_NilValue);
00724         }
00725         C_remove_weights(S3get_leftnode(subtree), removestats);
00726         C_remove_weights(S3get_rightnode(subtree), removestats);
00727     }
00728 }
00729 
00730 SEXP R_remove_weights(SEXP subtree, SEXP removestats) {
00731 
00732     C_remove_weights(subtree, LOGICAL(removestats)[0]);
00733     return(R_NilValue);
00734 }
00735 
00736 double* C_tempweights(int j, SEXP weights, SEXP fitmem, SEXP inputs) {
00737 
00738     int nobs, *iNAs, i, k;
00739     double *dw, *dweights;
00740     SEXP NAs;
00741     
00742     dw = REAL(get_weights(fitmem));
00743     nobs = LENGTH(weights);
00744     dweights = REAL(weights);
00745     NAs = get_missings(inputs, j);
00746     iNAs = INTEGER(NAs);
00747     if (length(NAs) == 0) return(dw);
00748     for (i = 0; i < nobs; i++) dw[i] = dweights[i];
00749     for (k = 0; k < LENGTH(NAs); k++)
00750         dw[iNAs[k] - 1] = 0.0;
00751     
00752     return(dw);
00753 }