mn.cec <- as.data.table(read.xlsx('./source/mn_CEC_full_list_opera_dashboard.xlsx', sheet = 1, colNames = TRUE))
mn.guid <- as.data.table(read.xlsx('./source/mdh_guidance_list_opera_test_dashboard.xlsx', sheet=1, colNames = TRUE))
mn.cand <- as.data.table(read.xlsx('./source/mdh_candidate_pool_opera_test_dashboard.xlsx', sheet=1, colNames = TRUE))
mn.cec[, list_membership := paste('CEC_283')]
mn.guid[, list_membership := paste('Guidance_List')]
mn.cand[, list_membership := paste('Candidate_Pool')]
# make mn.chem union list
mn.chem <- rbind(mn.cec, mn.guid, mn.cand)
mn.chem[, list_memberships := paste0(unique(list_membership), collapse=", "), by=list(DTXSID)]
#unique(mn.chem$list_memberships)
mn.chem[,c('list_membership','INPUT','FOUND_BY'):= NULL]
mn.chem <- unique(mn.chem)
mn.dtxsid <- unique(mn.chem$DTXSID)#283
chems <- tcplLoadChem(field='dsstox_substance_id',val=unique(mn.chem[,DTXSID]))
mn.chem[,in.toxcast := 'no']
mn.chem[DTXSID %in% chems[,dsstox_substance_id], in.toxcast := 'yes']
datatable(mn.chem[,c('DTXSID','in.toxcast')], filter='top',
options=list(pagelength=25, autoWidth=FALSE, scrollX=TRUE, initComplete = JS(
"function(settings, json) {",
"$('body').css({'font-family': 'Calibri'});",
"}"
)))
#nrow(mn.chem[in.toxcast=='yes']) #270
mc5 <- tcplPrepOtpt(tcplLoadData(val=chems$spid, lvl=5, type = 'mc',fld='spid'))
mc6 <- tcplPrepOtpt(tcplLoadData(lvl=6, fld='m4id', val=mc5$m4id, type='mc'))
setDT(mc6)
mc6_mthds <- mc6[ , .( mc6_mthd_id = paste(mc6_mthd_id, collapse=",")), by = m4id]
mc6_flags <- mc6[ , .( flag = paste(flag, collapse=";")), by = m4id]
mc5$mc6_flags <- mc6_mthds$mc6_mthd_id[match(mc5$m4id, mc6_mthds$m4id)]
mc5[, flag.length := ifelse(!is.na(mc6_flags), count.fields(textConnection(mc6_flags), sep =','), NA)]
# filter the dataset, with coarse filters
mc5[hitc==1 & flag.length < 3, use.me := 1]
mc5[hitc==1 & is.na(flag.length), use.me := 1]
mc5[hitc==1 & flag.length >= 3, use.me := 0]
mc5[fitc %in% c(36,45), use.me := 0]
mc5[hitc==-1, use.me := 0] # make hitc interpretable as a positive sum
mc5[use.me==0, modl_ga := as.numeric(NA)]
mc5[use.me==0, hitc := 0]
mc5[hitc==0, modl_ga := as.numeric(NA)]
mc5[hitc==1,ac50_uM := ifelse(!is.na(modl_ga), 10^modl_ga, NA)]
mc5.hitc1 <- mc5[hitc==1]
cerapp <- as.data.table(dbGetQuery(con, "select model_generic_chemical_cerapp_score.*
from prod_internal_invitrodb_v3_3.model_generic_chemical_cerapp_score;"))
compara <- as.data.table(dbGetQuery(con, "select model_generic_chemical_compara_scores.*
from prod_internal_invitrodb_v3_3.model_generic_chemical_compara_scores;"))
er <- as.data.table(dbGetQuery(con, "select * from prod_internal_invitrodb_v3_3.model_generic_chemical_er_scores;"))
ar <- as.data.table(dbGetQuery(con, "select * from prod_internal_invitrodb_v3_3.model_generic_chemical_ar_scores;"))
It can be helpful to take a look at hit-rate in ToxCast/Tox21. A low hit-rate can indicate a number of potential issues: lack of bioactivity; lack of in vitro bioavailability and/or lack of amenability of the physicochemical properties to HTS; chemical degradation in sample or assay; limited screening of the biological space (i.e., biological target is absent); requirement for metabolism.
What we see in examination of hit-rate for this set are some chemicals that may require manual selection of a reasonable concentration value as a POD.
We could substitute the highest concentration screened (100 uM) or take the therapeutic range (10-25 mg/L, with 10 mg/L equivalent to 46 uM). In latter part of this work (for AED calculation), we use 46 uM.
We also see some disinfection by-products where we may want to select bioactivity values from the in vitro genotoxicity literature (see DBP example table based on Plewa et al. 2010). For now, these are just listed here, but we could do more using these values. We might have to mine the literature for an appropriate HTTK dataset for DBPs.
load(file='./output/mn_mc5.RData')
hit.rates <- mc5[ , list(
active.percent = round((lw(hitc==1)/.N)*100,2), #active percent
inactive.percent = round((lw(hitc==0)/.N)*100,2), #inactive percent
total.assay.screened = .N, #total number of aeids tested in mc
active.assay.count = as.double(lw(hitc==1)), # active count
inactive.assay.count = as.double(lw(hitc==0)) #inactive count
), by = list(chnm, casn, dsstox_substance_id)]
#range(hit.rates$total.assay.screened) #235 to 1993
#median(hit.rates$total.assay.screened) #395.5
hit.rates <- hit.rates[order(active.percent)]
datatable(hit.rates, filter='top',
options=list(pagelength=25, autoWidth=FALSE, scrollX=TRUE, initComplete = JS(
"function(settings, json) {",
"$('body').css({'font-family': 'Calibri'});",
"}"
)))
What if some chemicals fall on the extremes for domain of applicability for the current HTS?
mn.chem[,log10VP := log10(as.numeric(VAPOR_PRESSURE_MMHG_OPERA_PRED))]
col.num <- c('AVERAGE_MASS','OCTANOL_WATER_PARTITION_LOGP_OPERA_PRED')
mn.chem[, (col.num) := lapply (.SD, as.numeric), .SDcols = col.num ]
hit.rate.opera <- merge.data.table(mn.chem,
hit.rates,
by.x='DTXSID',
by.y='dsstox_substance_id')
hit.rate.opera[,log10VP.caution := ifelse(log10VP > 1, "Caution - High VP", NA)]
hit.rate.opera[,logP.caution := NA]
hit.rate.opera[OCTANOL_WATER_PARTITION_LOGP_OPERA_PRED < -2|OCTANOL_WATER_PARTITION_LOGP_OPERA_PRED > 7,logP.caution := "Caution - Bioavailability based on LogP"]
#hit.rate.opera[!is.na(log10VP.caution)]
datatable(hit.rate.opera[active.percent < 2, c('PREFERRED_NAME','DTXSID','log10VP','AVERAGE_MASS','OCTANOL_WATER_PARTITION_LOGP_OPERA_PRED',
'active.percent','total.assay.screened','active.assay.count')],
filter='top',
options=list(pagelength=25, autoWidth=FALSE, scrollX=TRUE,
initComplete = JS(
"function(settings, json) {",
"$('body').css({'font-family': 'Calibri'});",
"}"
))) %>% formatStyle('log10VP',
backgroundColor = styleInterval(c(-1,1),
c('green','yellow','red')),
fontWeight = 'bold') %>% formatStyle('OCTANOL_WATER_PARTITION_LOGP_OPERA_PRED',
backgroundColor = styleInterval(c(-2,1,7,11), c('red','yellow','green','yellow','red')),
fontWeight = 'bold')
datatable(hit.rate.opera[active.percent < 0.5, c('DTXSID','PREFERRED_NAME','AVERAGE_MASS','log10VP','OCTANOL_WATER_PARTITION_LOGP_OPERA_PRED','logP.caution','log10VP.caution','active.percent','total.assay.screened')],
filter='top',
options=list(pagelength=25, autoWidth=FALSE, scrollX=TRUE,
initComplete = JS(
"function(settings, json) {",
"$('body').css({'font-family': 'Calibri'});",
"}"
))) %>% formatStyle('log10VP',
backgroundColor = styleInterval(c(-1,1),
c('green','yellow','red')),
fontWeight = 'bold') %>% formatStyle('OCTANOL_WATER_PARTITION_LOGP_OPERA_PRED',
backgroundColor = styleInterval(c(-2,1,7,11), c('red','yellow','green','yellow','red')),
fontWeight = 'bold')
For a first pass, get a bioactivity threshold (5th percentile). We've used this before in APCRA and TSCA work.
mc5[,ac50_5p := quantile(ac50_uM, probs=c(0.05),na.rm=TRUE), by=list(dsstox_substance_id)]
mc5.dist <- unique(mc5[,c('casn','dsstox_substance_id','chnm','ac50_5p')])
mc5.dist[chnm=='Meprobamate', ac50_5p := 45.8]
datatable(mc5.dist, filter='top',
options=list(pagelength=25, autoWidth=FALSE, scrollX=TRUE, initComplete = JS(
"function(settings, json) {",
"$('body').css({'font-family': 'Calibri'});",
"}"
)))
should we consider POD-QSAR approaches for repeat dose since the data landscape is sparse for empirical BER?
The circles represent the AED based on the 5th percentile AC50.
# 5th percentile value
mc5.dist.df <- as.data.frame(subset(mc5.dist, casn %in% get_cheminfo(species='Human'))) #only 127/264 covered by empirical
load_sipes2017() # load in silico data
## Loading predictions from Sipes et al. (2017) for 8758 chemicals.
## Existing data are not being overwritten.
## Please wait...
mc5.dist.df <- as.data.frame(subset(mc5.dist, casn %in% get_cheminfo(species='Human'))) # only 260/264 - much better
We get much better coverage of the chemical space if we include the Sipes et al 2017 in silico HTTK dataset.
mc5.dist[chnm=='Meprobamate', ac50_5p := 45.8]
new.row <- mc5[chnm=='Meprobamate'& aeid==759]
new.row <- new.row[c(1),]
new.row[,ac50_uM := 45.8]
new.row[,c('ac50_5p'):= NULL]
mc5.hitc1 <- rbind(mc5.hitc1, new.row, fill=TRUE)
# 5th percentile value
mc5.dist.df <- as.data.frame(subset(mc5.dist, casn %in% get_cheminfo(species='Human', model='pbtk'))) #257 substances
aed.hu.css.pbtk.df=data.frame()
for (i in 1:nrow (mc5.dist.df))
{
aed.hu.pbtk.95 <-(calc_mc_oral_equiv(conc=mc5.dist.df$ac50_5p[i],
chem.cas=mc5.dist.df$casn[i],
which.quantile=c(0.95),
model = 'pbtk',
species='Human',
restrictive.clearance=T,
output.units='mgpkgpday'))
aed.hu.pbtk.50 <-(calc_mc_oral_equiv(conc=mc5.dist.df$ac50_5p[i],
chem.cas=mc5.dist.df$casn[i],
which.quantile=c(0.50),
model = 'pbtk',
species='Human',
restrictive.clearance=T,
output.units='mgpkgpday'))
aed.hu.css.95 <-(calc_mc_oral_equiv(conc=mc5.dist.df$ac50_5p[i],
chem.cas=mc5.dist.df$casn[i],
which.quantile=c(0.95),
model = '3compartmentss',
species='Human',
restrictive.clearance=T,
output.units='mgpkgpday'))
aed.hu.css.50 <-(calc_mc_oral_equiv(conc=mc5.dist.df$ac50_5p[i],
chem.cas=mc5.dist.df$casn[i],
which.quantile=c(0.50),
model = '3compartmentss',
species='Human',
restrictive.clearance=T,
output.units='mgpkgpday'))
aed.hu.css.pbtk.df<-rbind(aed.hu.css.pbtk.df,
cbind(
mc5.dist.df$dsstox_substance_id[i],
mc5.dist.df$casn[i],
mc5.dist.df$chnm[i],
mc5.dist.df$ac50_5p[i],
aed.hu.pbtk.95,
aed.hu.pbtk.50,
aed.hu.css.95,
aed.hu.css.50))
}
# all ac50 values
mc5.df <- as.data.frame(subset(mc5.hitc1, casn %in% get_cheminfo(species='Human', model='pbtk')))
all.aed.hu.css.df=data.frame()
for (i in 1:nrow (mc5.df))
{
aed.ac50.hu.css.95 <-(calc_mc_oral_equiv(conc=mc5.df$ac50_uM[i],
chem.cas=mc5.df$casn[i],
which.quantile=c(0.95),
species='Human',
restrictive.clearance=T,
output.units='mgpkgpday',
model='3compartmentss'))
all.aed.hu.css.df<-rbind(all.aed.hu.css.df,
cbind(
mc5.df$dsstox_substance_id[i],
mc5.df$casn[i],
mc5.df$chnm[i],
mc5.df$ac50_uM[i],
aed.ac50.hu.css.95))
}
save(all.aed.hu.css.df,
aed.hu.css.pbtk.df,
file='./output/aed_from_hu_httk_insilico_21jan2021.RData')
load('./output/aed_from_hu_httk_insilico_21jan2021.RData')
aed.hu.css.pbtk.dt <- as.data.table(aed.hu.css.pbtk.df)
all.aed.hu.css.dt <- as.data.table(all.aed.hu.css.df)
setnames(all.aed.hu.css.dt,
c('V1','V2','V3','V4'),
c('dsstox_substance_id','casn','chnm', 'ac50_uM'))
setnames(aed.hu.css.pbtk.dt,
c('V1','V2','V3','V4'),
c('dsstox_substance_id','casn','chnm', 'ac50_5p'))
col.num <- c('ac50_5p', 'aed.hu.css.95', 'aed.hu.css.50','aed.hu.pbtk.95','aed.hu.pbtk.50')
aed.hu.css.pbtk.dt[, (col.num) := lapply (.SD, as.numeric), .SDcols = col.num ]
aed.hu.css.pbtk.dt <- aed.hu.css.pbtk.dt[order(-aed.hu.css.95)]
col.num <- c('ac50_uM', 'aed.ac50.hu.css.95')
all.aed.hu.css.dt[, (col.num) := lapply (.SD, as.numeric), .SDcols = col.num ]
all.aed.hu.css.dt <- all.aed.hu.css.dt[order(-aed.ac50.hu.css.95)]
all.aed.hu.css.dt$AED_5p <- aed.hu.css.pbtk.dt$aed.hu.css.95[match(all.aed.hu.css.dt$dsstox_substance_id,
aed.hu.css.pbtk.dt$dsstox_substance_id)]
hit.rates$AED_5p <- aed.hu.css.pbtk.dt$aed.hu.css.95[match(hit.rates$dsstox_substance_id,
aed.hu.css.pbtk.dt$dsstox_substance_id)]
p <- ggplot(data=all.aed.hu.css.dt,
aes(x=reorder(factor(chnm),-AED_5p),
y=aed.ac50.hu.css.95, text = paste(chnm,', ',aed.ac50.hu.css.95, 'mg/kg/day')))+
geom_boxplot(outlier.shape=4)+
geom_point(aes(x=reorder(factor(chnm),-AED_5p),
y=AED_5p), color="#95D840FF", shape=17, size = 2) +
xlab('')+
ylab('log10-mg/kg/day')+
theme_bw() +
theme(legend.position="none",
legend.title = element_blank(),
axis.text.x = element_text(angle=45, vjust=0.5),
plot.title = element_text(hjust=0.5))+
scale_y_continuous(trans='log10',
breaks= c(0.000000001,
0.00000001,
0.0000001,
0.000001,
0.00001,
0.0001,
0.001,
0.01,
0.1,
1,
10,
100,
1000),
labels = c(0.000000001,
0.00000001,
0.0000001,
0.000001,
0.00001,
0.0001,
0.001,
0.01,
0.1,
1,
10,
100,
1000
))+
coord_flip(ylim=c(0.000000001,1000))+ ggtitle('AEDs using HTTK in silico')
fig <- ggplotly(p, tooltip=c('text'), height=2500, width=700)
fig
seem3 <- fread('./source/SupTable1-all.chem.preds-2018-04-23.txt')
seem.mn <- seem3[dsstox_substance_id %in% aed.hu.css.pbtk.dt[,dsstox_substance_id]]
seem.mn$chnm <- aed.hu.css.pbtk.dt$chnm[match(seem.mn$dsstox_substance_id,
aed.hu.css.pbtk.dt$dsstox_substance_id)]
seem.mn$AED_5p <- all.aed.hu.css.dt$AED_5p[match(seem.mn$dsstox_substance_id,
all.aed.hu.css.dt$dsstox_substance_id)]
seem.mn[,seem3.num := 10^seem3]
seem.mn[,seem3.u95.num := 10^seem3.u95]
seem.mn[,ber := AED_5p/seem3.u95.num]
all.aed.hu.css.dt$ber <- seem.mn$ber[match(all.aed.hu.css.dt$dsstox_substance_id,
seem.mn$dsstox_substance_id)]
all.aed.hu.css.dt[is.na(ber), ber := 1e10]
all.aed.hu.css.dt$seem3.num <- seem.mn$seem3.num[match(all.aed.hu.css.dt$dsstox_substance_id,
seem.mn$dsstox_substance_id)]
all.aed.hu.css.dt$seem3.u95.num <- seem.mn$seem3.u95.num[match(all.aed.hu.css.dt$dsstox_substance_id,
seem.mn$dsstox_substance_id)]
r <- ggplot(data=all.aed.hu.css.dt[ber < 10000],
aes(x=reorder(factor(chnm),-ber),
y=aed.ac50.hu.css.95,
text = paste(chnm,
', ','POD-NAM =',aed.ac50.hu.css.95, 'mg/kg/day', ',', 'SEEM3.upper95 = ', seem3.u95.num, 'mg/kg/day')))+
#geom_boxplot(outlier.shape=4)+
geom_point(aes(x=reorder(factor(chnm),-ber),
y=AED_5p), color="#95D840FF", shape=17, size = 2) +
geom_linerange(
aes(x=reorder(factor(chnm),-ber),
ymin=seem3.num,
ymax=seem3.u95.num), color="#1F968BFF", lty='dashed')+
xlab('')+
ylab('log10-mg/kg/day')+
theme_bw() +
theme(
legend.title = element_blank(),
axis.text.x = element_text(angle=45, vjust=0.5),
plot.title = element_text(hjust=0.5))+
scale_y_continuous(trans='log10',
breaks= c(0.000000001,
0.00000001,
0.0000001,
0.000001,
0.00001,
0.0001,
0.001,
0.01,
0.1,
1,
10,
100,
1000),
labels = c(0.000000001,
0.00000001,
0.0000001,
0.000001,
0.00001,
0.0001,
0.001,
0.01,
0.1,
1,
10,
100,
1000
))+
coord_flip(ylim=c(0.000000001,1000))+ ggtitle('Bioactivity:exposure ratio plot')
fig3 <- ggplotly(r, tooltip=c('text','seem3.upper95'), width=800, height = 1500)
fig3
er.ar <- merge.data.table(mn.chem[,c('DTXSID','PREFERRED_NAME','CASRN')],
cerapp[,c('CASRN','CHEMICAL_NAME','input_SMILES','InChI_Key',
'Potency_class_2_binding', 'Potency_class_2_agonist', 'Potency_class_2_antagonist','consensus_2_binding', 'consensus_2_agonist','consensus_2_antagonist')],
by = 'CASRN',
all.x=TRUE,
all.y=FALSE)
er.ar <- merge.data.table(er.ar,
compara[, c('dsstox_substance_id','consensus_binding', 'consensus_agonist','consensus_antagonist')],
by.x='DTXSID',
by.y='dsstox_substance_id',
all.x=TRUE,
all.y=FALSE)
colnames(er) <- paste0('er_model_', colnames(er))
er.ar <- merge.data.table(er.ar,
er,
by.x='CASRN',
by.y='er_model_CASRN',
all.x=TRUE,
all.y=FALSE)
colnames(ar) <- paste0('ar_model_', colnames(ar))
er.ar <- merge.data.table(er.ar,
ar,
by.x='CASRN',
by.y='ar_model_CASRN',
all.x=TRUE,
all.y=FALSE)
col.num <- c('consensus_2_binding', 'consensus_2_agonist', 'consensus_2_antagonist')
er.ar[, (col.num) := lapply (.SD, as.numeric), .SDcols = col.num ]
# indicate any active result from compara consensus qsar
er.ar[is.na(consensus_binding & consensus_agonist & consensus_antagonist),compara.flag := 2] # if data not available
er.ar[consensus_binding==0 & consensus_agonist==0 & consensus_antagonist==0, compara.flag := 0] # if data are all negative
er.ar[consensus_binding==1|consensus_agonist==1|consensus_antagonist==1, compara.flag := 1] # if any mode is positive
# indicate any active result from cerapp consensus qsar
er.ar[is.na(consensus_2_agonist & consensus_2_binding & consensus_2_antagonist),cerapp.flag := 2]
er.ar[consensus_2_agonist==0 & consensus_2_binding==0 & consensus_2_antagonist==0, cerapp.flag := 0]
er.ar[consensus_2_agonist==1|consensus_2_binding==1|consensus_2_antagonist==1, cerapp.flag := 1]
# create positive, equivocal, and negative code on ToxCast AR model
er.ar[is.na(ar_model_Agonist_AUC & ar_model_Antagonist_AUC), toxcast.ar.flag := 2] # data not available
er.ar[ar_model_Agonist_AUC < 0.1 & ar_model_Antagonist_AUC < 0.1|ar_model_Antagonist_AUC > 0.1 & ar_model_Antagonist_Confidence_Score <= 2, toxcast.ar.flag := 0] # grouped equivocals into the negative space
er.ar[ar_model_Agonist_AUC >= 0.1|ar_model_Antagonist_AUC >= 0.1 & ar_model_Antagonist_Confidence_Score >2,
toxcast.ar.flag := 1]
# create positive, equivocal, and negative code on ToxCast ER model
er.ar[is.na(er_model_Agonist_AUC & er_model_Antagonist_AUC), toxcast.er.flag := 2] # no data available
er.ar[er_model_Agonist_AUC< 0.1 & er_model_Antagonist_AUC < 0.1, toxcast.er.flag := 0] # grouped equivocals into the negative space
er.ar[er_model_Agonist_AUC >= 0.1|er_model_Antagonist_AUC >= 0.1, toxcast.er.flag := 1]
# require toxcast er model data to trump cerapp call
er.ar[toxcast.er.flag ==2 & cerapp.flag==2, final.er.flag :=0.1] # no data
er.ar[toxcast.er.flag ==2 & cerapp.flag==0, final.er.flag :=0] # only cerapp data and it's negative
er.ar[toxcast.er.flag ==2 & cerapp.flag==1, final.er.flag :=0.5] # only cerapp data and it's positive
er.ar[toxcast.er.flag ==0 & cerapp.flag %in% c(0,1,2), final.er.flag := 0] # er model available and negative
er.ar[toxcast.er.flag ==1 & cerapp.flag %in% c(0,1,2), final.er.flag := 1] # er model available and positive
# require toxcast ar model data to trump compara call
er.ar[toxcast.ar.flag ==2 & compara.flag==2, final.ar.flag :=0.1] # no data
er.ar[toxcast.ar.flag ==2 & compara.flag==0, final.ar.flag :=0] # only cerapp data and it's negative
er.ar[toxcast.ar.flag ==2 & compara.flag==1, final.ar.flag :=0.5] # only cerapp data and it's positive
er.ar[toxcast.ar.flag ==0 & compara.flag %in% c(0,1,2), final.ar.flag := 0] # er model available and negative
er.ar[toxcast.ar.flag ==1 & compara.flag %in% c(0,1,2), final.ar.flag := 1] # er model available and positive
er.ar[, ed.flag.sum := sum(final.er.flag, final.ar.flag), by=list(DTXSID)]
er.ar.long <- melt.data.table(er.ar,
id.vars = c('DTXSID', 'CASRN','PREFERRED_NAME', 'ed.flag.sum',
'er_model_pseudo_AC50_median','er_model_pseudo_AC50_min','er_model_Agonist_AUC',
'er_model_Antagonist_AUC',
'ar_model_pseudo_AC50_median','ar_model_pseudo_AC50_min',
'ar_model_Agonist_AUC','ar_model_Antagonist_AUC'),
measure.vars = c('cerapp.flag',
'compara.flag',
'toxcast.er.flag',
'toxcast.ar.flag',
'final.ar.flag',
'final.er.flag'),
variable.name = 'flags',
value.name = 'flag.values')
er.ar.long2 <- melt.data.table(er.ar.long,
id.vars = c('DTXSID','CASRN','PREFERRED_NAME','ed.flag.sum'),
measure.vars = c('er_model_pseudo_AC50_median', 'er_model_pseudo_AC50_min',
'er_model_Agonist_AUC','er_model_Antagonist_AUC',
'ar_model_pseudo_AC50_median','ar_model_pseudo_AC50_min',
'ar_model_Agonist_AUC','ar_model_Antagonist_AUC'),
variable.name = 'toxcast_models',
value.name = 'toxcast_model_values')
er.ar.long <- er.ar.long[order(-ed.flag.sum)]
er.ar.long2 <- unique(er.ar.long2) # this is for potency values. Maybe return to this later.
sc2.stm <- tcplPrepOtpt(tcplLoadData(lvl=2,type='sc',fld='aeid',val=c(1691,1858)))
#write.csv(sc2.stm, './source/sc2_4todd_stm_newmthd_28jul2020.csv')
mc5.stm <- tcplPrepOtpt(tcplLoadData(lvl=5,type='mc',fld='aeid',val=c(1691,1858)))
sc <- sc2.stm$dsstox_substance_id
mc <- mc5.stm$dsstox_substance_id
stm.tested <- c(sc,mc)
stm.dtxsids <- unique(stm.tested)
flags <- as.data.table(er.ar)
flags <- flags[,c('CASRN','DTXSID','PREFERRED_NAME','compara.flag', 'cerapp.flag', 'toxcast.ar.flag', 'toxcast.er.flag', 'final.er.flag', 'final.ar.flag', 'ed.flag.sum')]
flags[,stm.tested := 0]
flags[DTXSID %in% stm.dtxsids, stm.tested := 1]
stm.mc6 <- tcplPrepOtpt(tcplLoadData(lvl=6, fld='m4id', val=mc5.stm$m4id, type='mc'))
setDT(stm.mc6)
mc6_mthds <- stm.mc6[ , .( mc6_mthd_id = paste(mc6_mthd_id, collapse=",")), by = m4id]
mc6_flags <- stm.mc6[ , .( flag = paste(flag, collapse=";")), by = m4id]
mc5.stm$mc6_flags <- mc6_mthds$mc6_mthd_id[match(mc5.stm$m4id, mc6_mthds$m4id)]
mc5.stm[, flag.length := ifelse(!is.na(mc6_flags), count.fields(textConnection(mc6_flags), sep =','), NA)]
# filter the dataset, with coarse filters
mc5.stm[hitc==1 & flag.length < 3, use.me := 1]
mc5.stm[hitc==1 & is.na(flag.length), use.me := 1]
mc5.stm[hitc==1 & flag.length >= 3, use.me := 0]
mc5.stm[fitc %in% c(36,45), use.me := 0]
mc5.stm[hitc==-1, use.me := 0] # make hitc interpretable as a positive sum
mc5.stm[use.me==0, modl_ga := as.numeric(NA)]
mc5.stm[use.me==0, hitc := 0]
mc5.stm[hitc==0, modl_ga := as.numeric(NA)]
dev.flag <- dcast.data.table(mc5.stm, dsstox_substance_id + chnm ~ aenm,
value.var = 'modl_ga',
fun.aggregate= max)
dev.flag[,stm.cyto.dist := ifelse(!is.na(STM_H9_Viability_norm), STM_H9_Viability_norm - STM_H9_OrnCyssISnorm_ratio_dn, 3 - STM_H9_OrnCyssISnorm_ratio_dn)]
dev.flag[,dev.flag := 0]
dev.flag[!is.na(STM_H9_OrnCyssISnorm_ratio_dn), dev.flag := 1]
dev.flag[,dev.flag.specific := 0]
dev.flag[stm.cyto.dist >0.25, dev.flag.specific := 1]
flags <- merge.data.table(flags,
dev.flag,
by.x='DTXSID',
by.y='dsstox_substance_id',
all.x=TRUE,
all.y=FALSE)
flags[stm.tested==1 & is.na(dev.flag), dev.flag := 0]
flags[stm.tested==1 & is.na(dev.flag.specific), dev.flag.specific := 0]
flags <- flags[,c('chnm') := NULL]
flags$ber <- all.aed.hu.css.dt$ber[match(flags$DTXSID,all.aed.hu.css.dt$dsstox_substance_id)]
setnames(flags, c('final.ar.flag','final.er.flag','dev.flag','dev.flag.specific'), c('AR','ER','DEV','DEV-S'))
flags[stm.tested==0 & is.na(DEV), DEV := 0.1]
flags[stm.tested==0 & is.na(`DEV-S`), `DEV-S` := 0.1]
aed.tbl.ed <- melt.data.table(flags,
id.vars = c('DTXSID',
'CASRN',
'PREFERRED_NAME',
'ber'),
measure.vars = c(
'AR',
'ER',
'DEV',
'DEV-S'),
variable.name = 'cats',
value.name = 'cat_values')
aed.tbl.ed <- aed.tbl.ed[order(-ber)]
chnm.order <- factor(unique(aed.tbl.ed$PREFERRED_NAME), levels = unique(aed.tbl.ed$PREFERRED_NAME), ordered=TRUE)
aed.tbl.ed[,chnm.factor := factor(PREFERRED_NAME) ]