This vignette generates the data and figures from the partition coefficient evaluation paper.
library(httk)
library(gdata)
library(ggplot2)
library(viridis)
library(CensRegMod)
library(gmodels)
library(gplots)
library(scales)
library(colorspace)
We first filter the measured rat Kp data, pc.data. Then the old and new Kp predictions are made, along with error and improvement measures, and these are all consolidated into a table for analysis and plotting. Note that the final table contains log10-transformed values and error and improvements derived from subtracting these values. Only relevant rat values are used. Compounds with Funbound.plasma and partition coefficients of zero are removed as well as compounds with approximated Funbound.plasma values.
pc.table <- NULL
pc.data <- subset(pc.data,fu != 0 & Exp_PC != 0 & Tissue %in% c("Adipose","Bone","Brain","Gut",
"Heart","Kidney","Liver","Lung","Muscle","Skin","Spleen","Blood Cells") &
tolower(Species) == 'rat' & !CAS %in% c('10457-90-6','5786-21-0','17617-23-1','69-23-8','2898-12-6',
'57562-99-9','59-99-4','2955-38-6','155-97-5','41903-57-5','58-55-9','77-32-7','59-05-2','60-54-8'))
cas.list <- get_cheminfo(model='schmitt',species='rat')
cas.list <- cas.list[cas.list %in% pc.data[,'CAS']]
ma.data.list <- subset(chem.physical_and_invitro.data,!is.na(logMA))[,'CAS']
for(this.cas in cas.list){
parameters <- parameterize_schmitt(chem.cas=this.cas,species='rat')
init.parameters <- parameters
charge <- calc_ionization(chem.cas=this.cas,pH=7.4)$fraction_charged
if(!this.cas %in% ma.data.list){
init.parameters$MA <- 10^(0.999831 - 0.016578*38.7 + 0.881721 * log10(parameters$Pow))
}
pcs <- predict_partitioning_schmitt(parameters=parameters,species='rat',regression=F)
init.pcs <- predict_partitioning_schmitt(parameters=init.parameters,species='rat',regression=F)
for(this.tissue in subset(pc.data,CAS==this.cas)[,'Tissue']){
if(this.tissue == 'Blood Cells') this.pc <- 'rbc'
else this.pc <- this.tissue
pc.table <- rbind(pc.table,cbind(as.data.frame(this.cas),as.data.frame(this.tissue),
as.data.frame(log10(init.pcs[[which(substr(names(init.pcs),2,nchar(names(init.pcs))-3)
== tolower(this.pc))]] * init.parameters$Funbound.plasma)),
as.data.frame(log10(pcs[[which(substr(names(pcs),2,nchar(names(pcs))-3)
== tolower(this.pc))]] * parameters$unadjusted.Funbound.plasma)),
as.data.frame(log10(init.pcs[[which(substr(names(init.pcs),2,nchar(names(init.pcs))-3)
== tolower(this.pc))]] * init.parameters$unadjusted.Funbound.plasma)),
as.data.frame(log10(pcs[[which(substr(names(pcs),2,nchar(names(pcs))-3)
== tolower(this.pc))]] * parameters$Funbound.plasma)),
as.data.frame(log10(subset(pc.data,CAS==this.cas & Tissue==this.tissue)[,'Exp_PC'])),
as.data.frame(subset(pc.data,CAS==this.cas & Tissue==this.tissue)[,'LogP']),as.data.frame(charge),
as.data.frame(as.character(subset(pc.data,CAS == this.cas)[1,'A.B.N'])),
as.data.frame(subset(pc.data,CAS == this.cas)[1,'fu'])))
}
}
colnames(pc.table) <- c('CAS','Tissue','fup.adjustment','ma.adjustment','init.Predicted',
'Predicted','Experimental','logP','charge','type','fup')
init.error <- pc.table[,'Experimental'] - pc.table[,'init.Predicted']
fup.error <- pc.table[,'Experimental'] - pc.table[,'fup.adjustment']
ma.error <- pc.table[,'Experimental'] - pc.table[,'ma.adjustment']
final.error <- pc.table[,'Experimental'] - pc.table[,'Predicted']
fup.improvement <- abs(init.error) - abs(fup.error)
ma.improvement <- abs(init.error) - abs(ma.error)
final.improvement <- abs(init.error) - abs(final.error)
pc.table <- cbind(pc.table,fup.improvement,ma.improvement, final.improvement,
final.error,init.error,ma.error,fup.error)
scientific_10 <- function(x) {
out <- gsub("1e", "10^", scientific_format()(x))
out <- gsub("\\+","",out)
out <- gsub("10\\^01","10",out)
out <- parse(text=gsub("10\\^00","1",out))
}
init.plot <- ggplot() + geom_point(data=pc.table,aes(10^(init.Predicted),10^(Experimental))) +
geom_abline() + labs(y=expression(paste("Measured ",K[p])), x=expression(paste("Predicted ",K[p]))) +
theme(axis.text=element_text(size=16),axis.title=element_text(size=16),
plot.title=element_text(size=18,hjust = 0.5)) + scale_x_log10(label=scientific_10,limits=c(0.01,10^4.5)) +
scale_y_log10(label=scientific_10,limits=c(0.01,10^4.5)) + ggtitle('(A)')
final.plot <- ggplot() + geom_point(data=pc.table,aes(10^(Predicted),10^(Experimental))) +
geom_abline() + labs(y=expression(paste("Measured ",K[p])),x=expression(paste("Predicted ",K[p]))) +
theme(axis.text=element_text(size=16),axis.title=element_text(size=16),
plot.title=element_text(size=18,hjust=0.5)) + scale_x_log10(label=scientific_10,limits=c(0.01,10^4.5)) +
scale_y_log10(label=scientific_10,limits=c(0.01,10^4.5)) + ggtitle('(B)')
fup.change.plot <- ggplot() +
geom_point(data=pc.table[order(pc.table[,'fup.improvement'],decreasing=F),],
aes(10^(fup.adjustment),10^(Experimental),color=fup.improvement)) + geom_abline() +
labs(y=expression(paste("Measured ",K[p])),x=expression(paste("Predicted ",K[p])),color='Improvement') +
theme(axis.text=element_text(size=16),axis.title=element_text(size=16)) +
scale_x_log10(label=scientific_10,limits=c(0.01,10^4.5)) +
scale_y_log10(label=scientific_10,limits=c(0.01,10^4.5)) +
scale_color_viridis(direction=-1,option='inferno')
ma.subset <- subset(pc.table,!CAS %in% ma.data.list)
ma.change.plot <- ggplot() +
geom_point(data=ma.subset[order(ma.subset[,'ma.improvement'],decreasing=F),]
,aes(10^(ma.adjustment),10^(Experimental),color=ma.improvement)) + geom_abline() +
labs(y=expression(paste("Measured ",K[p])),x=expression(paste("Predicted ",K[p])),color='Improvement') +
theme(axis.text=element_text(size=16),axis.title=element_text(size=16)) +
scale_x_log10(label=scientific_10,limits=c(0.01,10^4.5)) +
scale_y_log10(label=scientific_10,limits=c(0.01,10^4.5)) +
scale_color_viridis(direction=-1,option='inferno')
Now we calculate and plot the regressions for all tissues, together with their 95% confidence intervals.
regressions <- NULL
for(tissue in as.character(unique(pc.table[,'Tissue']))){
fit <- lm(Experimental ~ Predicted ,data=subset(pc.table,Tissue==tissue))
smry <- summary(fit)
est <- estimable(fit, cm=diag(2), beta0=c(0,1), joint.test=TRUE)
regressions <- rbind(regressions,cbind(tissue,as.data.frame(fit$coefficients[['(Intercept)']]),
as.data.frame(fit$coefficients[['Predicted']]),
as.data.frame(smry$coefficients[['Predicted','Pr(>|t|)']]),
as.data.frame(smry$sigma),as.data.frame(smry$r.squared),
as.data.frame(smry[[11]][1,1]),as.data.frame(smry[[11]][2,2]),
as.data.frame(smry[[11]][1,2]),as.data.frame(smry$df[2]),as.data.frame(est[[3]])))
}
colnames(regressions) <- c('Tissue','Intercept','Slope','P-value','SE','R-squared',
'Int Var','Slp Var','Cov','df','estimable')
x.cf <- seq(-2,3.5,.01)
for(tissue in as.character(unique(pc.table[,'Tissue']))){
conf <- qt(0.975,df=subset(regressions,Tissue==tissue)[['df']]+1) *
subset(regressions,Tissue==tissue)[['SE']] *
sqrt(subset(regressions,Tissue==tissue)[['Int Var']] +
x.cf^2 * subset(regressions,Tissue==tissue)[['Slp Var']] +
2 * x.cf * subset(regressions,Tissue==tissue)[['Cov']] + 1)
line <- subset(regressions,Tissue==tissue)[['Intercept']] +
x.cf * subset(regressions,Tissue==tissue)[['Slope']]
y.cf <- line + conf
y.ncf <- line - conf
cf <- cbind(as.data.frame(x.cf),as.data.frame(y.cf),as.data.frame(y.ncf))
if(tissue == 'Blood Cells'){
eval(parse(text= paste('Blood <- ggplot() + labs(y=expression(paste("Inferred ",K[p])),
x=expression(paste("Predicted ",K[p]))) + geom_abline(linetype = "dashed") +
geom_point(data=subset(pc.table,Tissue == \'',tissue,'\'),aes(10^(Predicted),
10^(Experimental))) + theme(axis.text=element_text(size=14),
axis.title=element_text(size=14),plot.title=element_text(size=14,hjust=0.5)) +
scale_x_log10(label=scientific_10,limits=c(0.01,1000)) +
scale_y_log10(label=scientific_10,limits=c(0.01,1000)) +
geom_line(data=cf,aes(10^(x.cf),10^(y.cf))) +
geom_line(data=cf,aes(10^(x.cf),10^(y.ncf))) +
geom_abline(intercept=subset(regressions,Tissue==tissue)[[\'Intercept\']],
slope=subset(regressions,Tissue==tissue)[[\'Slope\']]) +
ggtitle(\'Red Blood Cells\')',sep='')))
}else{
eval(parse(text= paste(tissue,' <- ggplot() + labs(y=expression(paste("Measured ",K[p]))
,x=expression(paste("Predicted ",K[p]))) + geom_abline(linetype = "dashed") +
geom_point(data=subset(pc.table,Tissue == \'',tissue,'\'),
aes(10^(Predicted),10^(Experimental))) + theme(axis.text=element_text(size=14),
axis.title=element_text(size=14),plot.title=element_text(size=14,hjust=0.5)) +
scale_x_log10(label=scientific_10,limits=c(0.01,1000)) +
scale_y_log10(label=scientific_10,limits=c(0.01,1000)) +
geom_line(data=cf,aes(10^(x.cf),10^(y.cf))) +
geom_line(data=cf,aes(10^(x.cf),10^(y.ncf))) +
geom_abline(intercept=subset(regressions,Tissue==tissue)[[\'Intercept\']],
slope=subset(regressions,Tissue==tissue)[[\'Slope\']]) +
ggtitle(\'',tissue,'\')',sep='')))
}
}
In vivo volume of distribution data are compared with the predictions, and errors are calculated. Regressions and improvements are calculated and plotted.
obach <- subset(Obach2008,CAS %in% get_cheminfo(model='schmitt'))
vd.table <- NULL
for(this.cas in obach[,'CAS']){
parameters <- parameterize_schmitt(chem.cas=this.cas)
init.parameters <- parameters
if(!this.cas %in% ma.data.list){
init.parameters$MA <- 10^(0.999831 - 0.016578*37 + 0.881721 * log10(parameters$Pow))
}
pcs <- predict_partitioning_schmitt(parameters=parameters,regression=F)
init.pcs <- predict_partitioning_schmitt(parameters=init.parameters,regression=F)
reg.pcs <- predict_partitioning_schmitt(parameters=parameters,regression=T)
vdist <- calc_vdist(parameters=c(pcs,Funbound.plasma=parameters$Funbound.plasma))
init.vdist <- calc_vdist(parameters=c(init.pcs,Funbound.plasma=parameters$unadjusted.Funbound.plasma))
reg.vdist <- calc_vdist(parameters=c(reg.pcs,Funbound.plasma=parameters$Funbound.plasma))
vd.table <- rbind(vd.table,cbind(as.data.frame(this.cas),as.data.frame(log10(init.vdist)),
as.data.frame(log10(vdist)),as.data.frame(log10(reg.vdist)),
as.data.frame(log10(subset(obach,CAS==this.cas)[['VDss (L/kg)']]))))
}
colnames(vd.table) <- c('CAS','init.vdist','adjusted.vdist','calibrated.vdist','Experimental')
init.error <- vd.table[,'Experimental'] - vd.table[,'init.vdist']
adjustment.error <- vd.table[,'Experimental'] - vd.table[,'adjusted.vdist']
calibration.error <- vd.table[,'Experimental'] - vd.table[,'calibrated.vdist']
adjustment.improvement <- abs(init.error) - abs(adjustment.error)
calibration.improvement <- abs(adjustment.error) - abs(calibration.error)
vd.table <- cbind(vd.table,adjustment.improvement,calibration.improvement,
init.error,adjustment.error,calibration.error)
fit <- lm(Experimental ~ calibrated.vdist,data=vd.table)
smry <- summary(fit)
calibrated.reg <- cbind(as.data.frame(fit$coefficients['(Intercept)']),
as.data.frame(fit$coefficients['calibrated.vdist']),
as.data.frame(smry$coefficients['calibrated.vdist','Pr(>|t|)']),
as.data.frame(smry$sigma),as.data.frame(smry$r.squared))
fit <- lm(Experimental ~ init.vdist,data=vd.table)
smry <- summary(fit)
init.reg <- cbind(as.data.frame(fit$coefficients['(Intercept)']),
as.data.frame(fit$coefficients['init.vdist']),
as.data.frame(smry$coefficients['init.vdist','Pr(>|t|)']),
as.data.frame(smry$sigma),as.data.frame(smry$r.squared))
fit <- lm(Experimental ~ adjusted.vdist,data=vd.table)
smry <- summary(fit)
adjustment.reg <- cbind(as.data.frame(fit$coefficients['(Intercept)']),
as.data.frame(fit$coefficients['adjusted.vdist']),
as.data.frame(smry$coefficients['adjusted.vdist','Pr(>|t|)']),
as.data.frame(smry$sigma),as.data.frame(smry$r.squared))
colnames(init.reg) <- colnames(adjustment.reg) <-
colnames(calibrated.reg) <- c('Intercept','Slope','P-value','Std Err','R-squared')
init.vd.plot <- ggplot(vd.table,aes(10^(init.vdist),10^(Experimental))) + geom_point() +
geom_abline(intercept = init.reg[['Intercept']], slope = init.reg[["Slope"]]) +
geom_abline(linetype = "dashed") + xlab("Predicted Volume of Distribution") +
ylab("Measured Volume of Distribution") + theme(axis.text=element_text(size=16),
axis.title=element_text(size=16),plot.title=element_text(size=18,hjust = 0.5)) +
scale_x_log10(label=scientific_10,limits=c(10^(-1.5),10^(8.5))) +
scale_y_log10(label=scientific_10,limits=c(10^(-1.5),10^(8.5))) +
ggtitle('(A)')
adjustment.plot <- ggplot() +
geom_point(data=vd.table[order(vd.table[,'adjustment.improvement'],decreasing=F),],
aes(10^(adjusted.vdist),10^(Experimental),color=adjustment.improvement)) +
geom_abline(intercept = adjustment.reg[['Intercept']],slope = adjustment.reg[["Slope"]]) +
geom_abline(linetype = "dashed") + xlab("Predicted Volume of Distribution") +
ylab("Measured Volume of Distribution") + theme(legend.position = c(.95, .95),
legend.justification = c("right", "top"),legend.box.just = "right",
legend.margin = margin(6, 6, 6, 6),axis.text=element_text(size=16),
axis.title=element_text(size=16),plot.title=element_text(size=18,hjust = 0.5)) +
scale_x_log10(limits=c(10^(-1.5),10^(3))) + scale_y_log10(limits=c(10^(-1.5),10^(3))) +
ggtitle('(B)') + scale_color_viridis(direction=-1,option='inferno')
calibration.plot <- ggplot() +
geom_point(data=vd.table[order(vd.table[,'calibration.improvement'],decreasing=F),],
aes(10^(calibrated.vdist),10^(Experimental),color=calibration.improvement)) +
geom_abline(intercept = calibrated.reg[['Intercept']],slope = calibrated.reg[["Slope"]]) +
geom_abline(linetype = "dashed") + xlab("Predicted Volume of Distribution") +
ylab("Measured Volume of Distribution") + theme(legend.position = c(.95, .95),
legend.justification = c("right", "top"),legend.box.just = "right",
legend.margin = margin(6, 6, 6, 6),axis.text=element_text(size=16),
axis.title=element_text(size=16),plot.title=element_text(size=18,hjust = 0.5)) +
scale_x_log10(limits=c(10^(-1.5),10^(3))) + scale_y_log10(limits=c(10^(-1.5),10^(3))) +
ggtitle('(C)') + scale_color_viridis(direction=-1,option='inferno')
Now we pull in the in vivo blood to plasma ratios, use these to calculate the inferred red blood cell to plasma ratios, and then make predictions for these values. A censored regressions is performed, and predictions are plotted against errors.
rb2p.data <- subset(chem.physical_and_invitro.data,!is.na(Human.Rblood2plasma))
measured.rb2p <- NULL
measured.krbc <- NULL
predicted.rb2p <- NULL
predicted.krbc <- NULL
cas <- NULL
charge <- NULL
fup <- NULL
logP <- NULL
pka_donor <- NULL
pka_accept <- NULL
for(this.cas in rb2p.data[rb2p.data[,'CAS'] %in% get_cheminfo(model='schmitt'),'CAS']){
rb2p <- get_rblood2plasma(chem.cas=this.cas)
krbc <- (rb2p + .44 - 1) / 0.44
measured.rb2p <- c(measured.rb2p,rb2p)
measured.krbc <- c(measured.krbc,krbc)
parameters <- parameterize_schmitt(chem.cas=this.cas)
pcs <- predict_partitioning_schmitt(parameters=parameters)
predicted.krbc <- c(predicted.krbc,pcs[['Krbc2pu']] * parameters$Funbound.plasma)
cas <- c(cas,this.cas)
charge <- c(charge,calc_ionization(chem.cas=this.cas,pH=7.4)$fraction_charged)
fup <- c(fup,parameters$unadjusted.Funbound.plasma)
logP <- c(logP,log10(parameters$Pow))
pka_donor <- c(pka_donor,paste(parameters$pKa_Donor,collapse=','))
pka_accept <- c(pka_accept,paste(parameters$pKa_Accept,collapse=','))
}
predicted.rb2p <- 1 - 0.44 + 0.44 * predicted.krbc
rb2p.table <- cbind(as.data.frame(cas),as.data.frame(predicted.rb2p),as.data.frame(measured.rb2p))
colnames(rb2p.table) <- c('cas','predicted.rb2p','measured.rb2p')
error <- log10(rb2p.table[,'measured.rb2p']) - log10(rb2p.table[,'predicted.rb2p'])
rb2p.table <- cbind(rb2p.table,error,charge,fup,logP)
error <- log10(measured.krbc) - log10(predicted.krbc)
krbc.table <- cbind(as.data.frame(cas),as.data.frame(predicted.krbc),as.data.frame(measured.krbc),
as.data.frame(error),charge,fup,logP,pka_donor,pka_accept)
pdta <- data.frame(x = predicted.krbc,
y = measured.krbc)
pdta$y[pdta$y <= 0.1] <- 0.1
pdta$Censoring <- factor(c("Not Censored","Censored")[as.numeric(pdta$y <= 0.1) + 1])
y <- measured.krbc
x <- cbind(rep(1, length(y)),-1 * log10(predicted.krbc))
colnames(x) <- c("Intercept","Predicted")
cc <- as.numeric(y <= 0.1)
y[y < 0.1] <- 0.1
y <- -log10(y)
out <- em.cens(cc, x, y, dist="Normal")
censored.regression <- ggplot() +
geom_point(data=pdta,aes(x=x,y=y, color=Censoring)) +
scale_x_log10(limits=c(.0009,40)) + scale_y_log10(limits=c(.1,4),breaks=c(.1,.5,2.5)) +
labs(y=expression(paste("Inferred ",K[p])),x=expression(paste("Predicted ",K[p]))) +
geom_abline(intercept=0, slope=1, linetype='dashed') +
theme(axis.text=element_text(size=16),axis.title=element_text(size=16),
plot.title=element_text(size=18,hjust=0.5),legend.position = c(0.11, .8)) +
geom_abline(slope=out$betas[2],intercept=-out$betas[1]) + ggtitle('(B)')
rb2p.plot <- ggplot(rb2p.table,aes(predicted.rb2p,measured.rb2p)) +
geom_point() + scale_x_log10(lim=c(.52,18)) +
scale_y_log10(lim=c(.52,2.5),breaks=c(0.5,1,2)) + geom_abline(linetype='dashed') +
labs(y="Measured B:P Ratio ",
x="Predicted B:P Ratio") +
theme(axis.text=element_text(size=16),axis.title=element_text(size=16),
plot.title=element_text(size=18,hjust=0.5)) + ggtitle('(A)')
Lastly, we make the heatmap.
heatmap.table <- NULL
for(this.cas in get_cheminfo(model='schmitt')){
parms <- parameterize_schmitt(chem.cas=this.cas)
pcs <- predict_partitioning_schmitt(parameters=parms)
heatmap.table <- cbind(heatmap.table,log10(unlist(pcs)[1:11]*parms$Funbound.plasma))
}
rownames(heatmap.table) <- c('Adipose','Bone','Brain','Gut','Heart',
'Kidney','Liver','Lung','Muscle','Skin','Spleen')
colnames(heatmap.table) <- rep("",dim(heatmap.table)[2])
pal <- function (n, h = c(260, -328), c = 80, l = c(30, 100), power = 1.5,
fixup = TRUE, gamma = NULL, alpha = 1, ...)
{
if (!is.null(gamma))
warning("'gamma' is deprecated and has no effect")
if (n < 1L)
return(character(0L))
h <- rep(h, length.out = 2L)
c <- c[1L]
l <- rep(l, length.out = 2L)
power <- rep(power, length.out = 2L)
rval <- seq(1, -1, length = n)
rval <- hex(polarLUV(L = l[2L] - diff(l) * abs(rval)^power[2L],
C = c * abs(rval)^power[1L], H = ifelse(rval > 0, h[1L],
h[2L])), fixup = fixup, ...)
if (!missing(alpha)) {
alpha <- pmax(pmin(alpha, 1), 0)
alpha <- format(as.hexmode(round(alpha * 255 + 1e-04)),
width = 2L, upper.case = TRUE)
rval <- paste(rval, alpha, sep = "")
}
return(rval)
}
hclust.ave <- function(x) hclust(x, method="ward.D2")
par(cex.main=1.5,cex.lab=2)
lhei <- c(1,4)
lwid <- c(1.25,2)
lmat <- rbind(c(2,3),c(4,1))
heatmap.2(heatmap.table,dendrogram='column',col=pal,trace="none",
hclustfun=hclust.ave,key.xlab=expression(paste(log[10],K[p]," Value")),
key.ylab=expression(paste("Number of ",K[p])),
key.title="Partition Coefficients",xlab="Chemicals",cex.lab=2,
lmat=lmat,margins=c(2,5),lwid=lwid,lhei=lhei)