Main Page | Directories | File List | File Members | Related Pages

IndependenceTest.c

Go to the documentation of this file.
00001 
00009 #include "party.h"
00010 
00011 
00021 void C_TeststatPvalue(const SEXP linexpcov, const SEXP varctrl, 
00022                       double *ans_teststat, double *ans_pvalue) {
00023     
00024     double releps, abseps, tol;
00025     int maxpts;
00026     
00027     maxpts = get_maxpts(varctrl);
00028     tol = get_tol(varctrl);
00029     abseps = get_abseps(varctrl);
00030     releps = get_releps(varctrl);
00031     
00032     /* compute the test statistic */
00033     ans_teststat[0] = C_TestStatistic(linexpcov, get_teststattype(varctrl), 
00034                                   get_tol(varctrl));
00035 
00036     /* compute the p-value if requested */                                  
00037     if (get_pvalue(varctrl))
00038         ans_pvalue[0] =  C_ConditionalPvalue(ans_teststat[0], linexpcov, 
00039                                          get_teststattype(varctrl),
00040                                          tol, &maxpts, &releps, &abseps);
00041     else
00042         ans_pvalue[0] = 1.0;
00043 }
00044 
00053 void C_TeststatCriterion(const SEXP linexpcov, const SEXP varctrl, 
00054                          double *ans_teststat, double *ans_criterion) {
00055     
00056     C_TeststatPvalue(linexpcov, varctrl, ans_teststat, ans_criterion);
00057     
00058     /* the node criterion is to be MAXIMISED, 
00059        i.e. 1-pvalue or test statistic \in \[0, \infty\] */
00060     if (get_pvalue(varctrl))
00061         ans_criterion[0] = 1 - ans_criterion[0];
00062     else
00063         ans_criterion[0] = ans_teststat[0];
00064     
00065 }
00066 
00067 
00081 void C_IndependenceTest(const SEXP x, const SEXP y, const SEXP weights, 
00082                         const SEXP ScoreMatrix, SEXP Mlinexpcov, 
00083                         const int ORDERED, SEXP linexpcov, SEXP varctrl, 
00084                         SEXP ans) {
00085     
00086     /* compute linear statistic and its conditional expectation and
00087        covariance
00088     */
00089     C_LinStatExpCov(REAL(x), ncol(x), REAL(y), ncol(y), 
00090                     REAL(weights), nrow(x), 1, 
00091                     GET_SLOT(linexpcov, PL2_expcovinfSym), linexpcov);
00092 
00093     /* if one (or both) variables are ordered multiply linear statistic,
00094        expectation and covariance
00095      */
00096     if (ORDERED) {
00097         C_MLinearStatistic(linexpcov, ScoreMatrix, Mlinexpcov);
00098         
00099         /* for quadform type test statistics, compute the Moore-Penrose inverse
00100            of the covariance matrix
00101          */
00102         if (get_teststattype(varctrl) == 2) 
00103             C_LinStatExpCovMPinv(Mlinexpcov, get_tol(varctrl));
00104             
00105         /* compute test statistic and pvalue */
00106         C_TeststatPvalue(Mlinexpcov, varctrl, &REAL(ans)[0], &REAL(ans)[1]);
00107     } else {
00108         /* for unordered variables */
00109         if(get_teststattype(varctrl) == 2) 
00110             C_LinStatExpCovMPinv(linexpcov, get_tol(varctrl));
00111         C_TeststatPvalue(linexpcov, varctrl, &REAL(ans)[0], &REAL(ans)[1]);
00112     }
00113 }
00114 
00115 
00127 SEXP R_IndependenceTest(SEXP x, SEXP y, SEXP weights, SEXP ScoreMatrix, 
00128                         SEXP Mlinexpcov, SEXP linexpcov, SEXP varctrl) {
00129                         
00130     SEXP ans;
00131     int scores = 0;
00132     
00133     PROTECT(ans = allocVector(REALSXP, 2));
00134     if (ScoreMatrix != R_NilValue && Mlinexpcov != R_NilValue)
00135         scores = 1;
00136         
00137     C_IndependenceTest(x, y, weights, ScoreMatrix, Mlinexpcov, scores, 
00138                        linexpcov, varctrl, ans);
00139     UNPROTECT(1);
00140     return(ans);
00141 }
00142 
00143 
00157 void C_GlobalTest(const SEXP learnsample, const SEXP weights, 
00158                   SEXP fitmem, const SEXP varctrl, 
00159                   const SEXP gtctrl, const double minsplit, 
00160                   double *ans_teststat, double *ans_criterion) {
00161 
00162     int ninputs, nobs, yORDERED, xORDERED, j, i, k, RECALC = 1, type;
00163     SEXP responses, inputs, y, x, xmem, Mxmem, expcovinf;
00164     SEXP thiswhichNA;
00165     double *thisweights, *dweights, *pvaltmp;
00166     int *ithiswhichNA, RANDOM, mtry, *randomvar, *index;
00167     int *dontuse, *dontusetmp;
00168     
00169     ninputs = get_ninputs(learnsample);
00170     nobs = get_nobs(learnsample);
00171     responses = GET_SLOT(learnsample, PL2_responsesSym);
00172     inputs = GET_SLOT(learnsample, PL2_inputsSym);
00173     dweights = REAL(weights);
00174     
00175     yORDERED = is_ordinal(responses, 1);
00176     y = get_transformation(responses, 1);
00177     
00178     expcovinf = GET_SLOT(fitmem, PL2_expcovinfSym);
00179     C_ExpectCovarInfluence(REAL(y), ncol(y), REAL(weights), 
00180                            nobs, expcovinf);
00181     
00182     if (REAL(GET_SLOT(expcovinf, PL2_sumweightsSym))[0] < minsplit) {
00183         for (j = 0; j < ninputs; j++) {
00184             ans_teststat[j] = 0.0;
00185             ans_criterion[j] = 0.0;
00186         }
00187     } else {
00188 
00189         dontuse = INTEGER(get_dontuse(fitmem));
00190         dontusetmp = INTEGER(get_dontusetmp(fitmem));
00191     
00192         for (j = 0; j < ninputs; j++) dontusetmp[j] = !dontuse[j];
00193     
00194         /* random forest */
00195         RANDOM = get_randomsplits(gtctrl);
00196         mtry = get_mtry(gtctrl);
00197         if (RANDOM & (mtry > ninputs)) {
00198             warning("mtry is larger than ninputs, using mtry = inputs");
00199             mtry = ninputs;
00200             RANDOM = 0;
00201         }
00202         if (RANDOM) {
00203             index = Calloc(ninputs, int);
00204             randomvar = Calloc(mtry, int);
00205             GetRNGstate();
00206             C_SampleNoReplace(index, ninputs, mtry, randomvar);
00207             PutRNGstate();
00208             j = 0;
00209             for (k = 0; k < mtry; k++) {
00210                 j = randomvar[k];
00211                 while(dontuse[j] && j < ninputs) j++;
00212                 if (j == ninputs) 
00213                     error("not enough variables to sample from");
00214                 dontusetmp[j] = 0;
00215             }
00216             Free(index);
00217             Free(randomvar);
00218         }
00219 
00220         for (j = 1; j <= ninputs; j++) {
00221 
00222             if ((RANDOM && dontusetmp[j - 1]) || dontuse[j - 1]) {
00223                 ans_teststat[j - 1] = 0.0;
00224                 ans_criterion[j - 1] = 0.0;
00225                 continue; 
00226             }
00227         
00228             x = get_transformation(inputs, j);
00229             xORDERED = is_ordinal(inputs, j);
00230 
00231             xmem = get_varmemory(fitmem, j);
00232             if (!has_missings(inputs, j)) {
00233                 C_LinStatExpCov(REAL(x), ncol(x), REAL(y), ncol(y),
00234                                 REAL(weights), nrow(x), !RECALC, expcovinf,
00235                                 xmem);
00236             } else {
00237                 thisweights = REAL(get_weights(fitmem, j));
00238                 thiswhichNA = get_missings(inputs, j);
00239                 ithiswhichNA = INTEGER(thiswhichNA);
00240                 for (i = 0; i < nobs; i++) thisweights[i] = dweights[i];
00241                 for (k = 0; k < LENGTH(thiswhichNA); k++)
00242                     thisweights[ithiswhichNA[k] - 1] = 0.0;
00243                 C_LinStatExpCov(REAL(x), ncol(x), REAL(y), ncol(y),
00244                                 thisweights, nrow(x), RECALC, 
00245                                 GET_SLOT(xmem, PL2_expcovinfSym),
00246                                 xmem);
00247             }
00248             if (yORDERED || xORDERED) {
00249                 Mxmem = get_varMmemory(fitmem, j);
00250                 C_MLinearStatistic(xmem, get_Mscorematrix(fitmem, j), Mxmem);
00251                 if (get_teststattype(varctrl) == 2)
00252                     C_LinStatExpCovMPinv(Mxmem, get_tol(varctrl));
00253                     C_TeststatCriterion(Mxmem, varctrl, &ans_teststat[j - 1], 
00254                                         &ans_criterion[j - 1]);
00255             } else {
00256                 if(get_teststattype(varctrl) == 2)
00257                     C_LinStatExpCovMPinv(xmem, get_tol(varctrl));
00258                 C_TeststatCriterion(xmem, varctrl, &ans_teststat[j - 1], 
00259                                     &ans_criterion[j - 1]);
00260             }
00261         }                
00262 
00263         type = get_testtype(gtctrl);
00264         switch(type) {
00265             /* Bonferroni */
00266             case BONFERRONI: 
00267                     for (j = 0; j < ninputs; j++) {
00268                         ans_criterion[j] = 1 - (1 - ans_criterion[j])*ninputs;
00269                         if (ans_criterion[j] < 0) 
00270                             ans_criterion[j] = 0.0;
00271                     }
00272                     break;
00273             /* Monte-Carlo */
00274             case MONTECARLO: 
00275                     pvaltmp = Calloc(ninputs, double);
00276                     C_MonteCarlo(ans_criterion, learnsample, weights, fitmem, 
00277                                  varctrl, gtctrl, pvaltmp);
00278                     for (j = 0; j < ninputs; j++)
00279                         ans_criterion[j] = 1 - pvaltmp[j];
00280                     Free(pvaltmp);
00281                     break;
00282             /* aggregated */
00283             case AGGREGATED: 
00284                     error("C_GlobalTest: aggregated global test not yet implemented");
00285                     break;
00286             /* raw */
00287             case RAW: break;
00288             default: error("C_GlobalTest: undefined value for type argument");
00289                      break;
00290         }
00291     }
00292 }
00293 
00294 
00304 SEXP R_GlobalTest(SEXP learnsample, SEXP weights, SEXP fitmem, 
00305                   SEXP varctrl, SEXP gtctrl) {
00306 
00307     SEXP ans, teststat, criterion;
00308     
00309     PROTECT(ans = allocVector(VECSXP, 2));
00310     SET_VECTOR_ELT(ans, 0, 
00311         teststat = allocVector(REALSXP, get_ninputs(learnsample)));
00312     SET_VECTOR_ELT(ans, 1, 
00313         criterion = allocVector(REALSXP, get_ninputs(learnsample)));
00314 
00315     C_GlobalTest(learnsample, weights, fitmem, varctrl, gtctrl, 0, 
00316                  REAL(teststat), REAL(criterion));
00317     
00318     UNPROTECT(1);
00319     return(ans);
00320 }

Generated on Thu Feb 16 09:11:58 2006 for party by  doxygen 1.4.2