RandomForest.c

Go to the documentation of this file.
00001 
00009 #include "party.h"
00010 
00011 
00021 SEXP R_Ensemble(SEXP learnsample, SEXP weights, SEXP fitmem, SEXP controls) {
00022             
00023      SEXP nweights, tree, where, ans;
00024      double *dnweights, *dweights, sw = 0.0, *prob, tmp;
00025      int nobs, i, b, B , nodenum = 1, *iweights, *iweightstmp, 
00026          *iwhere, replace, fraction, wgrzero = 0, realweights = 0;
00027      
00028      B = get_ntree(controls);
00029      nobs = get_nobs(learnsample);
00030      
00031      PROTECT(ans = allocVector(VECSXP, B));
00032 
00033      iweights = Calloc(nobs, int);
00034      iweightstmp = Calloc(nobs, int);
00035      prob = Calloc(nobs, double);
00036      dweights = REAL(weights);
00037 
00038      for (i = 0; i < nobs; i++) {
00039          /* sum of weights */
00040          sw += dweights[i];
00041          /* number of weights > 0 */
00042          if (dweights[i] > 0) wgrzero++;
00043          /* case weights or real weights? */
00044          if (dweights[i] - ftrunc(dweights[i]) > 0) 
00045              realweights = 1;
00046      }
00047      for (i = 0; i < nobs; i++)
00048          prob[i] = dweights[i]/sw;
00049 
00050      replace = get_replace(controls);
00051      /* fraction of number of obs with weight > 0 */
00052      if (realweights) {
00053          /* fraction of number of obs with weight > 0 for real weights*/
00054          tmp = (get_fraction(controls) * wgrzero);
00055      } else {
00056          /* fraction of sum of weights for case weights */
00057          tmp = (get_fraction(controls) * sw);
00058      }
00059      fraction = (int) ftrunc(tmp);
00060      if (ftrunc(tmp) < tmp) fraction++;
00061 
00062      if (!replace) {
00063          if (fraction < 10)
00064              error("fraction of %f is too small", fraction);
00065      }
00066 
00067      /* <FIXME> can we call those guys ONCE? what about the deeper
00068          calls??? </FIXME> */
00069      GetRNGstate();
00070   
00071      for (b  = 0; b < B; b++) {
00072          SET_VECTOR_ELT(ans, b, tree = allocVector(VECSXP, NODE_LENGTH + 1));
00073          SET_VECTOR_ELT(tree, NODE_LENGTH, where = allocVector(INTSXP, nobs));
00074          iwhere = INTEGER(where);
00075          for (i = 0; i < nobs; i++) iwhere[i] = 0;
00076      
00077          C_init_node(tree, nobs, get_ninputs(learnsample), 
00078                      get_maxsurrogate(get_splitctrl(controls)),
00079                      ncol(get_predict_trafo(GET_SLOT(learnsample, 
00080                                                    PL2_responsesSym))));
00081 
00082          /* generate altered weights for perturbation */
00083          if (replace) {
00084              /* weights for a bootstrap sample */
00085              rmultinom((int) sw, prob, nobs, iweights);
00086          } else {
00087              /* weights for sample splitting */
00088              C_SampleSplitting(nobs, prob, iweights, fraction);
00089          }
00090 
00091          nweights = S3get_nodeweights(tree);
00092          dnweights = REAL(nweights);
00093          for (i = 0; i < nobs; i++) dnweights[i] = (double) iweights[i];
00094      
00095          C_TreeGrow(tree, learnsample, fitmem, controls, iwhere, &nodenum, 1);
00096          nodenum = 1;
00097      }
00098 
00099      PutRNGstate();
00100 
00101      Free(prob); Free(iweights); Free(iweightstmp);
00102      UNPROTECT(1);
00103      return(ans);
00104 }

Generated on Wed Jun 20 15:55:33 2007 for party by  doxygen 1.4.6