Ring et al. 2017 Hematocrit spline fits and residuals

Caroline Ring

2018-01-23

library(httk)
library(survey)
library(data.table)
library(ggplot2)
all.reth <- levels(nhanes_mec_svy$variables[, ridreth1])
all.gendr <- levels(nhanes_mec_svy$variables[, riagendr])

Plot hematocrit vs. age with spline fits.

par(mfcol=c(5,2),
    mar=c(2,2,2.5,2)+0.1,
    oma=c(4,4,4,0),
    mgp=c(1,1,0))

for (gendr in all.gendr){
  for (r in all.reth){
    grsub <- subset(nhanes_mec_svy, riagendr==gendr & ridreth1==r & is.finite(loglbxhct))
    svyplot(loglbxhct~ridexagm, grsub, style="transparent", 
            basecol="gray50", xlab=NA, ylab=NA, cex.axis=1.5)
    lines(sort(unique(grsub$variables[,ridexagm])), 
          predict(spline_hematocrit[gender==gendr & reth==r,
                                    hct_spline[[1]]],
                  sort(unique(grsub$variables[,ridexagm])))$y)
    title(paste(gendr, r), line=0.5, cex.main=1.5)
    }
  }
title(xlab="Age, months",
      ylab="Log hematocrit, log %",
      outer=TRUE,
      line=2, cex.lab=2)
title(main="Hematocrit vs. age, with spline fits",
      outer=TRUE,
      line=1, cex.main=2)

Plot log hematocrit residuals vs. age.

par(mfcol=c(5,2),
    mar=c(2,2,2.5,2)+0.1,
    oma=c(4,4,4,0),
    mgp=c(1,1,0))

for (gendr in all.gendr){
  for (r in all.reth){
    grsub <- subset(nhanes_mec_svy, riagendr==gendr & ridreth1==r & !is.na(loglbxhct))
    grsub <- update(grsub, loglbxhct_resid=loglbxhct- 
                      predict(spline_hematocrit[gender==gendr & 
                                                    reth==r,
                                                  hct_spline[[1]]],
                              grsub$variables[,ridexagm])$y)
    svyplot(loglbxhct_resid~ridexagm, grsub, style="transparent", 
            xlab=NA, ylab=NA, cex.axis=1.5)
    title(paste(gendr, r), line=0.5, cex.main=1.5)
    }
  }
title(xlab="Age, months",
      ylab="Log hematocrit residual, log %", 
      outer=TRUE,
      line=2, cex.lab=2)
title(main = "Hematocrit residuals vs. age",
      outer=TRUE, line=1, cex.main=2)

Make QQ plots of log hematocrit residual quantiles vs. normal quantiles.

par(mfcol=c(5,2),
    mar=c(2,2,2.5,2)+0.1,
    oma=c(4,4,4,0),
    mgp=c(1,1,0))

for (gendr in all.gendr){
  for (r in all.reth){
    grsub <- subset(nhanes_mec_svy, riagendr==gendr & ridreth1==r & !is.na(loglbxhct))
    grsub <- update(grsub, loghctresid=loglbxhct- 
                      predict(spline_hematocrit[gender==gendr & 
                                                    reth==r,
                                                  hct_spline[[1]]],
                              grsub$variables[,ridexagm])$y)
qprobs <- ppoints(n=length(grsub$prob))
norm.q <- qnorm(p=qprobs)
loghctresid.q <- svyquantile(~loghctresid, 
                            design=grsub,
                            quantiles=qprobs)
plot(y=loghctresid.q,
     x=norm.q, #Plot weighted points
     type='p',
     col="gray70",
     xlab=NA, ylab=NA)
#Add qqline: passing through first and third quartiles
x <- qnorm(p=c(0.25, 0.75))
y <- as.vector(svyquantile(~loghctresid, 
                           design=grsub,
                           quantiles=c(0.25, 0.75)))
m <- diff(y)/diff(x)
intercept <- -m*x[1]+y[1]
abline(a=intercept, b=m)
title(paste(gendr, r), line=0.5, cex.main=1.5)
}
}
title(xlab='Normal quantiles',
     ylab='log Hct resid quantiles',
     outer=TRUE,
     line=2,
     cex.lab=2)

Clearly, the log residuals aren’t normally distributed for most combinations of race and gender. For this reason, I decided to use a kernel density estimate of the residual distribution as a way to non-parametrically estimate the residual distribution, rather than assuming a particular theoretical parametric distribution.

KDE for residuals

Here are the KDEs for the log hematocrit residuals for each gender and race/ethnicity combination.

par(mfcol=c(5,2),
    mar=c(2,2,2.5,2)+0.1,
    oma=c(4,4,4,0),
    mgp=c(1,1,0))

for (gendr in all.gendr){
  for (r in all.reth){
    grsub <- subset(nhanes_mec_svy, riagendr==gendr & ridreth1==r & !is.na(loglbxhct))
    grsub <- update(grsub, loghctresid=loglbxhct- 
                      predict(spline_hematocrit[gender==gendr & 
                                                    reth==r,
                                                  hct_spline[[1]]],
                              grsub$variables[,ridexagm])$y)
    tmp.kde <- ks::kde(x=grsub$variables[, loghctresid],
                   w=grsub$variables[, wtmec6yr])
    plot(tmp.kde, xlab=NA, ylab=NA, xlim=c(-0.65,0.4), drawpoints=TRUE, col.pt="black")
    title(paste(gendr, r), line=0.5, cex.main=1.5)
    }
  }
title(xlab='Log hematocrit residual',
     ylab='KDE density',
     outer=TRUE,
     line=2,
     cex.lab=2)