1 Invitrodb version of IUF data

  • Recently added chemicals that were not sourced by ToxCast.
  • Are these assays really interpretable in the "up" direction?
# asid = 30 is IUF, 27 = UKN
iuf <- tcplLoadAeid(val=30, fld='asid',add.fld='acid')

# load and filter the mc5 using coarse filters
mc5 <- tcplPrepOtpt((tcplLoadData(lvl=5, fld='aeid', val=iuf[,aeid], type='mc')))
mc6 <- tcplPrepOtpt(tcplLoadData(lvl=6, fld='m4id', val=mc5$m4id, type='mc'))
setDT(mc6)
mc6_mthds <- mc6[ , .( mc6_mthd_id = paste(mc6_mthd_id, collapse=",")), by = m4id]
mc6_flags <- mc6[ , .( flag = paste(flag, collapse=";")), by = m4id]
mc5$mc6_flags <- mc6_mthds$mc6_mthd_id[match(mc5$m4id, mc6_mthds$m4id)]
mc5[, flag.length := ifelse(!is.na(mc6_flags), count.fields(textConnection(mc6_flags), sep =','), NA)]

# filter the dataset, with coarse filters
#mc5[hitc==1 & flag.length < 3, use.me := 1]
#mc5[hitc==1 & is.na(flag.length), use.me := 1]
#mc5[hitc==1 & flag.length >= 3, use.me := 0]
#mc5[fitc %in% c(36,45), use.me := 0]
#mc5[hitc==-1, use.me := 0] # make hitc interpretable as a positive sum
#mc5[use.me==0, modl_ga := as.numeric(NA)]
#mc5[use.me==0, hitc := 0]
#mc5[hitc==0, modl_ga := as.numeric(NA)]

mc5[hitc==1,ac50_uM := ifelse(!is.na(modl_ga), 10^modl_ga, NA)]
  • 123 unique substances now included
length(unique(mc5$dsstox_substance_id)) #123
## [1] 123
mc2.mthd <- tcplMthdLoad(id=iuf$acid, lvl=2, type='mc')
#mc2.mthd <- merge.data.table(iuf, mc2.mthd, by='acid')

mc3.mthd <- tcplMthdLoad(id=iuf$aeid,lvl=3, type='mc')
#mc3.mthd <- merge.data.table(iuf, mc3.mthd, by='aeid')

mc4.mthd <- tcplMthdLoad(id=iuf$aeid, lvl=4, type='mc')

mc5.mthd <- tcplMthdLoad(id=iuf$aeid, lvl=5, type='mc')


mc2.mthd[, mc2.mthds := paste0(mthd, collapse = ', '), by=list(acid)]
mc3.mthd[, mc3.mthds := paste0(mthd, collapse = ', '), by=list(aeid)]
mc4.mthd[, mc4.mthds := paste0(mthd, collapse = ', '), by=list(aeid)]
mc5.mthd[, mc5.mthds := paste0(mthd, collapse = ', '), by=list(aeid)]

iuf[mc2.mthd, mc2.mthds := mc2.mthds, on='acid']
iuf[mc3.mthd, mc3.mthds := mc3.mthds, on='aeid']
iuf[mc4.mthd, mc4.mthds := mc4.mthds, on='aeid']
iuf[mc5.mthd, mc5.mthds := mc5.mthds, on='aeid']

1.1 Examine the mc5 cutoff and bmad.

datatable(unique(mc5[,c('aeid','aenm', 'coff','bmad')]), 
          filter='top', 
          options=list(pagelength=25, autoWidth=FALSE,  scrollX=TRUE, initComplete = JS(
    "function(settings, json) {",
    "$('body').css({'font-family': 'Calibri'});",
    "}"
  )))

1.2 Examine all methods across the suite.

datatable(iuf, 
          filter='top', 
          options=list(pagelength=25, autoWidth=FALSE,  scrollX=TRUE, initComplete = JS(
    "function(settings, json) {",
    "$('body').css({'font-family': 'Calibri'});",
    "}"
  )))

2 Comparison to NPC data from EFSA Appendix A

  • Here is a table of the NPC1-5 related assays from the EFSA Report Appendix A.
  • Manual mapping was a bit complex:
  • We only seemed to have cytotoxicity and viability at 120 hr for BPC2-5 (not 72 hr);
  • Not sure of the difference between cytotoxicity and viability at 120 hr; and,
  • the biggest point, was not sure if the potency values from the EFSA appendix were in the "up" or "down" direction.

  • I assumed they were all in the "down" direction.

efsa.app <- as.data.table(read.xlsx('./EFSA_Appendix A_comparison_table.xlsx', 
                                      sheet = 'A1', colNames = TRUE))

#mc5 <- as.data.table(read.xlsx('./mc5_mc6_IUF_NPC2-5_15OCT2020.xlsx',sheet=1,colNames=TRUE))

npc.col <- grep('NPC',colnames(efsa.app[,c(1:50)]))
npc.data <- melt.data.table(efsa.app[,c(1:50)], 
                            id.vars = c("DTXSID","compound.name","CAS.#"),
                            measure.vars = npc.col,
                            value.name = c('potency'),
                            variable.name = c('aenm_efsa')
)

npc.data[, BMR := tstrsplit(aenm_efsa, ".", fixed=TRUE, keep=c(1))]

#npc.data <- npc.data[!aenm_efsa %in% c("BMC30.proliferation.BrdU.(NPC1b)",
#                                       "BMC30.proliferation.area.(NPC1a)", 
#                                       "BMC30.viability.(NPC1)",
#                                       "BMC10.cytotoxicity.(NPC1)")
#                     ]
unique(npc.data$aenm_efsa)
##  [1] BMC30.proliferation.BrdU.(NPC1b)                  
##  [2] BMC30.proliferation.area.(NPC1a)                  
##  [3] BMC30.viability.(NPC1)                            
##  [4] BMC10.cytotoxicity.(NPC1)                         
##  [5] BMC10.migration.distance.radial.glia.(NPC2a;.72h) 
##  [6] BMC10.migration.distance.radial.glia.(NPC2a;.120h)
##  [7] BMC30.migration.distance.neurons.(NPC2b)          
##  [8] BMC30.migration.distance.oligodendrocytes.(NPC2c) 
##  [9] BMC30.neuronal.differentiation.(NPC3)             
## [10] BMC30.neurite.length.(NPC4)                       
## [11] BMC30.neurite.area.(NPC4)                         
## [12] BMC30.oligodendrocyte.differentiation.(NPC5)      
## [13] BMC30.cell.number.(NPC2-5)                        
## [14] BMC10.cytotoxicity.(72.NPC2-5)                    
## [15] BMC10.cytotoxicity.(120h;.NPC2-5)                 
## [16] BMC30.viabiltiy.(120h;.NPC2-5)                    
## 16 Levels: BMC30.proliferation.BrdU.(NPC1b) ...
  • In this code chunk, I manually assigned aeid numbers.
# this manual mapping is very imperfect. I'm not sure what the directionality of the EFSA BMRs are. Maybe some of these go up?
# for tcpl, we created up and down.

# adding in the NPC1 data
npc.data[aenm_efsa=='BMC30.proliferation.BrdU.(NPC1b)', aeid := 2771]
npc.data[aenm_efsa=='BMC30.proliferation.area.(NPC1a)', aeid := 2773]
npc.data[aenm_efsa=='BMC30.viability.(NPC1)', aeid := 2775] # not sure the difference between cytotoxicity and viability endpoint. All we have in ToxCast is viability.

# NPC2-5
npc.data[aenm_efsa=='BMC10.migration.distance.radial.glia.(NPC2a;.72h)', aeid := 2938]
npc.data[aenm_efsa=='BMC10.migration.distance.radial.glia.(NPC2a;.120h)', aeid := 2940]
npc.data[aenm_efsa=='BMC30.migration.distance.neurons.(NPC2b)', aeid := 2942]
npc.data[aenm_efsa=='BMC30.migration.distance.oligodendrocytes.(NPC2c)', aeid := 2944]
npc.data[aenm_efsa=='BMC30.neuronal.differentiation.(NPC3)', aeid := 2946]
npc.data[aenm_efsa=='BMC30.neurite.length.(NPC4)', aeid := 2948]
npc.data[aenm_efsa=='BMC30.neurite.area.(NPC4)', aeid := 2950]
npc.data[aenm_efsa=='BMC30.oligodendrocyte.differentiation.(NPC5)', aeid := 2952]
npc.data[aenm_efsa=='BMC10.cytotoxicity.(72.NPC2-5)', aeid := 2954]
npc.data[aenm_efsa=='BMC10.cytotoxicity.(120h;.NPC2-5)', aeid := 2956]
npc.data[aenm_efsa=='BMC30.cell.number.(NPC2-5)', aeid := 2958]
npc.data[aenm_efsa=='BMC30.viabiltiy.(120h;.NPC2-5)', aeid := 2960]

#str(npc.data)
col.num <- c('potency')
npc.data[, (col.num) := lapply (.SD, as.numeric), .SDcols = col.num ]
#npc.data[is.na(potency)]
col.chr <- c('aenm_efsa')
npc.data[, (col.chr) := lapply (.SD, as.character), .SDcols = col.chr]
  • In this chunk, I prepare the comparison data.
aeid.tbl <- unique(mc5[,c('aeid','aenm')])

npc.data.comp <- npc.data[DTXSID %in% mc5[,dsstox_substance_id]]
npc.data.comp <- npc.data.comp[!is.na(DTXSID)]
npc.data.comp <- npc.data.comp[!is.na(aeid)]
#duplicated(npc.data.comp, by=c('DTXSID','aeid'))

mc5.comp <- mc5[aeid %in% npc.data.comp[,aeid]]
npc.data.comp[,dtxsid_aeid := paste0(DTXSID,'_',aeid)]
mc5.comp[,dtxsid_aeid := paste0(dsstox_substance_id,'_',aeid)]

mc5.comp <- merge.data.table(npc.data.comp[,c('DTXSID', 'dtxsid_aeid', 'compound.name', 
                                              'potency', 'BMR', 'aenm_efsa')],
                             mc5.comp[,c('dsstox_substance_id','dtxsid_aeid', 'm4id','chnm','aeid','aenm',
                                         'modl_acc','modl_acb','modl_ga','bmad','coff','hitc')],
                             by='dtxsid_aeid')

mc5.comp[,log10.potency := log10(potency)]
  • Note that the thresholds for positives differ slightly.
  • For NPC1 endpoints, do we want coff=30?
  • How do we understand baseline across endpoints and the difference from baseline needed for a true signal?
datatable(unique(mc5.comp[,c('aeid','aenm','coff','bmad','BMR')]), 
          filter='top', 
          options=list(pagelength=25, autoWidth=FALSE,  scrollX=TRUE, initComplete = JS(
    "function(settings, json) {",
    "$('body').css({'font-family': 'Calibri'});",
    "}"
  )))

2.1 Comparison of BMD and ACC

  • these values are in log10-micromolar units
  • solid line is unity, dotted lines indicate +/- 0.5 log10-micromolar
  • note the summary of the fit of modl_acc to log10-potency (EFSA BMDs). They are very close, with IQR of the residuals being about +/- 0.1.
fit <- lm(modl_acc ~ log10.potency,
          data=mc5.comp)
summary(fit)
## 
## Call:
## lm(formula = modl_acc ~ log10.potency, data = mc5.comp)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.49300 -0.10018  0.09331  0.22165  1.21685 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   -0.25271    0.02933  -8.616 1.57e-15 ***
## log10.potency  1.00934    0.02633  38.329  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4036 on 213 degrees of freedom
##   (1579 observations deleted due to missingness)
## Multiple R-squared:  0.8734, Adjusted R-squared:  0.8728 
## F-statistic:  1469 on 1 and 213 DF,  p-value: < 2.2e-16

  • Rotenone looks really different in several assay endpoints in terms of potency.
rotenone <- mc5.comp[chnm=='Rotenone' & aeid %in% c(2938,2946,2948), m4id]

tcplPlotM4ID(rotenone[1], lvl=6)

tcplPlotM4ID(rotenone[2], lvl=6)

tcplPlotM4ID(rotenone[3], lvl=6)

2.2 Comparison of ToxCast AC50 and EFSA BMDs

  • ACC appears just very slightly closer to the EFSA BMDs than AC50 (see below).
fit <- lm(modl_ga ~ log10.potency,
          data=mc5.comp)
summary(fit)
## 
## Call:
## lm(formula = modl_ga ~ log10.potency, data = mc5.comp)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.80135 -0.16775  0.02092  0.20890  1.70376 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    0.05809    0.03118   1.863   0.0638 .  
## log10.potency  0.98067    0.02803  34.985   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4308 on 216 degrees of freedom
##   (1576 observations deleted due to missingness)
## Multiple R-squared:   0.85,  Adjusted R-squared:  0.8493 
## F-statistic:  1224 on 1 and 216 DF,  p-value: < 2.2e-16

2.3 Comparison of ToxCast AC50 and ACC (concentration at cut-off)

2.4 Comparison of ToxCast ACC (concentration at cut-off) and ACB (concentration at upper end of baseline)

  • Note you can see here that the ToxCast ACB > ACC for NPC5, which has the largest BMAD.
  • Currently, NPC5 cutoff is the greater of 30% and 2BMAD (which is about ~60%) and 30%.

3 Hitcall comparison

  • here is the full table.
datatable(mc5.comp[,c('chnm','aenm_efsa','aenm','potency','log10.potency','hitc','modl_acc')], 
          filter='top', 
          options=list(pagelength=25, autoWidth=FALSE,  scrollX=TRUE, initComplete = JS(
    "function(settings, json) {",
    "$('body').css({'font-family': 'Calibri'});",
    "}"
  )))
  • The following comparisons may be flawed because I assumed that all effects for EFSA Appendix A are in the "down" direction, and I was not sure that is true.
  • Instances where we get a hitcall==1 in ToxCast and no potency value in EFSA Appendix A.
mc5.comp[is.na(log10.potency) & hitc==1]
  • Instances where we get a hitcall==0 in ToxCast and a potency value in EFSA Appendix A.
  • A majority of these are in NPC5 - likely because the only way we could approximate baseline noise leads to a BMAD that is approximately 30%.
mc5.comp[hitc==0 & !is.na(potency)]
  • the number of instances where ToxCast hitc==0 and there is a non-NA potency in the EFSA report:
table(mc5.comp[hitc==0 & !is.na(potency)]$aenm)
## 
##                    IUF_NPC2-5_count_120hr_dn 
##                                            2 
##                IUF_NPC2-5_cytotoxicity_120hr 
##                                            2 
##                 IUF_NPC2-5_cytotoxicity_72hr 
##                                            1 
##           IUF_NPC2a_ glia_migration_120hr_dn 
##                                           14 
##             IUF_NPC2a_glia_migration_72hr_dn 
##                                            5 
##          IUF_NPC2b_neuron_migration_120hr_dn 
##                                            4 
## IUF_NPC2c_oligodendrocyte_migration_120hr_dn 
##                                            5 
##                IUF_NPC3_neuron_diff_120hr_dn 
##                                            8 
##               IUF_NPC4_neurite_area_120hr_dn 
##                                            8 
##             IUF_NPC4_neurite_length_120hr_dn 
##                                           11 
##       IUF_NPC5_oligodendrocyte_diff_120hr_dn 
##                                           37 
##                IUF-NPC2-5_viability_120hr_dn 
##                                            1
  • A PDF was created that contains the curves that were hitc==0 in ToxCast and positive in EFSA report.
  • Note that I dropped curve-filtering so that we do not remove curves that we would remove from the MEA NFA set (3+ flags on fitting and/or ac50 value below the conc range screened).
m4id.list <- mc5.comp[hitc==0 & !is.na(potency)]$m4id
mc5.comp2 <- mc5.comp[hitc==0 & !is.na(potency)]

graphics.off()
  pdf(file=paste('hitc0_efsa1',
                 format(Sys.Date(),
                        "%y%m%d.pdf"),
                 sep="_"),
      height=6,
      width=10,
      pointsize=10)
  tcplPlotM4ID(mc5.comp2$m4id, lvl=6)

graphics.off()
  • Another PDF was created that contains the curves that were hitc==1 in ToxCast and not positive in the EFSA report.
mc5.comp3 <- mc5.comp[hitc==1 & is.na(potency)]

graphics.off()
  pdf(file=paste('hitc1_efsa0',
                 format(Sys.Date(),
                        "%y%m%d.pdf"),
                 sep="_"),
      height=6,
      width=10,
      pointsize=10)
  tcplPlotM4ID(mc5.comp3$m4id, lvl=6)

graphics.off()