Aims
To aid the cosmetic industry in the EU in developing predictive tools, we have leveraged the information available in the EPA ToxCast database (DB) and ToxRefDB to evaluate the potential systemic toxicity of 58 chemicals that are identified as either ingredients or traces of contaminates in personal care products, fragrances and cosmetics based on the personal care products council (PCPC).

Setting up the Necessary Libaries

sessionInfo()
## R version 3.5.3 (2019-03-11)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 16299)
## 
## Matrix products: default
## 
## locale:
## [1] LC_COLLATE=English_United States.1252 
## [2] LC_CTYPE=English_United States.1252   
## [3] LC_MONETARY=English_United States.1252
## [4] LC_NUMERIC=C                          
## [5] LC_TIME=English_United States.1252    
## 
## attached base packages:
## [1] grid      stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] tcpl_2.0.2        httk_1.9.2        gridExtra_2.3    
##  [4] ggthemes_4.2.0    ggplot2_3.1.1     DT_0.6           
##  [7] dplyr_0.8.1       RMySQL_0.10.17    DBI_1.0.0        
## [10] data.table_1.12.2 magrittr_1.5     
## 
## loaded via a namespace (and not attached):
##  [1] deSolve_1.21       tidyselect_0.2.5   xfun_0.7          
##  [4] purrr_0.3.2        mitools_2.4        splines_3.5.3     
##  [7] lattice_0.20-38    colorspace_1.4-1   expm_0.999-4      
## [10] htmltools_0.3.6    yaml_2.2.0         chron_2.3-53      
## [13] blob_1.1.1         survival_2.44-1.1  rlang_0.3.4       
## [16] pillar_1.4.1       glue_1.3.1         withr_2.1.2       
## [19] bit64_0.9-7        RColorBrewer_1.1-2 gsubfn_0.7        
## [22] plyr_1.8.4         stringr_1.4.0      munsell_0.5.0     
## [25] gtable_0.3.0       htmlwidgets_1.3    mvtnorm_1.0-10    
## [28] memoise_1.1.0      evaluate_0.14      knitr_1.23        
## [31] parallel_3.5.3     proto_1.0.0        Rcpp_1.0.1        
## [34] scales_1.0.0       truncnorm_1.0-8    bit_1.1-14        
## [37] digest_0.6.19      stringi_1.4.3      msm_1.6.7         
## [40] survey_3.36        numDeriv_2016.8-1  tools_3.5.3       
## [43] sqldf_0.4-11       RSQLite_2.1.1      lazyeval_0.2.2    
## [46] tibble_2.1.1       crayon_1.3.4       pkgconfig_2.0.2   
## [49] Matrix_1.2-17      assertthat_0.2.1   rmarkdown_1.13    
## [52] R6_2.4.0           compiler_3.5.3

0.1 Data Set-up

This script is to create cosmetics target organ & counts and the LEL from the study and what study type

Reading in Data from database and cosmetic chemicals

#con <- dbConnect(drv=RMySQL::MySQL(), group="toxrefdb_1_3")

## To query ToxRefDBv1
#tr_all<-dbGetQuery(con, "SELECT chemical.*, study.*,dose.*, tg.*, dtg.*,dtg_effect.*,effect.*, endpoint.*
#                   FROM
#                   dev_toxrefdb_1_3.chemical INNER JOIN
#                   (dev_toxrefdb_1_3.study INNER JOIN
#                   (dev_toxrefdb_1_3.dose INNER JOIN
#                   (dev_toxrefdb_1_3.tg INNER JOIN
#                   (dev_toxrefdb_1_3.dtg INNER JOIN
#                   (dev_toxrefdb_1_3.dtg_effect INNER JOIN
#                   (dev_toxrefdb_1_3.endpoint INNER JOIN
#                   dev_toxrefdb_1_3.effect 
#                   ON endpoint.endpoint_id=effect.endpoint_id)
#                   ON dtg_effect.effect_id=effect.effect_id)
#                   ON dtg_effect.dtg_id=dtg.dtg_id)
#                   ON tg.tg_id=dtg.tg_id AND tg.tg_id=effect.tg_id)
#                   ON dose.dose_id=dtg.dose_id)
#                   ON study.study_id=tg.study_id AND study.study_id=dose.study_id)
#                   ON chemical.chemical_id=study.chemical_id") %>%
#  data.table()

## Load cosmetic-relevant chemicals (CRCs)
cosmetic_chem  <- read.csv("Cosmetic_60_chems_LELs_2018MAR28.csv", header=TRUE, as.is = T, check.names=F) %>% 
  data.table()

0.1.1 How many of the CRCs are in toxRef

Function that corrects some of the ToxRefDBv1

trCorrections <- function(dat){

  ## Remove studies that were entered twice
  rep <- c("3695", "3696","3681","3684","3688","3687", "3689", "3692", "3694") ##studies that were entered twice
  dat[study_id %in% rep, study_id := 0]
  dat <- dat[study_id != 0]

  ##Correcting Errors
  dat[study_year == "19987", study_year := 1987]
  dat[study_year == "9184", study_year := 1984]
  dat[substance_purity == "8805", substance_purity := 88.05]
  dat[substance_purity > 100, substance_purity := 100]

  ##Putting missing data under the same terminology
  dat[study_source == "unassigned", study_source := "unknown"]
  dat[is.na(strain_group), strain_group := "other"]
  dat[strain_group == "other", strain_group := species] ## All strain other ==> species
  dat[admin_method =="[Not Specified]", admin_method := admin_route] ## All admin_method[Not Specified] to admin method

}

Allometric scaling of all doses and filter data

tr_all[!is.na(dose_level), dose_no := max(dose_level, na.rm = TRUE) , by = study_id]
tr_all[ species == "mouse", HED := dose_adjusted * 0.081]
tr_all[ species == "rat", HED := dose_adjusted * 0.162]
tr_all[ species == "guinea_pig", HED := dose_adjusted * 0.216]
tr_all[ species == "rabbit", HED := dose_adjusted * 0.324]
tr_all[ species == "dog", HED := dose_adjusted * 0.541]
tr_all[ species == "hamster", HED := dose_adjusted * 0.135]
tr_all[ species == "primate", HED := dose_adjusted]

tr_all[!is.na(dose_level), dose_no := max(dose_level, na.rm=TRUE) , by = study_id]
tr_all[ HED > 0, log_dose_adjusted := round(log10(HED), 1)]
tr_all[!is.na(log_dose_adjusted), ldt := min(log_dose_adjusted), by = study_id]
tr_all[!is.na(log_dose_adjusted), hdt := max(log_dose_adjusted), by = study_id]
tr_all[ , dose_spacing := (hdt-ldt)/(dose_no-1)]

tr_all2 <- trCorrections(tr_all)

tr_all2[ chemical_casrn %in% cosmetic_chem$TestSubstance_CASRN, group:="cosmetic_chem"]
tr_all2[is.na(group) , group:="toxref"]

ar <- c("Oral")
sp <- c("mouse", "rat", "dog", "rabbit")
st <- c("SUB","CHR", "DEV", "MGR", "SAC" , "DNT" )
ec <- c('systemic')
ls <- c('adult')
gn <- c('F0')

tr_org <- data.table(tr_all2) %>% .[data_entry_level == 'all effects' &
                              data_usability == 'acceptable' &
                              data_entry_status == 'complete'&
                              dose_adjusted > 0 &
                              dose_adjusted_unit == 'mg/kg/day' & 
                              dose_level > 0 &
                              admin_route %in% ar &
                              study_type %in% st &
                              species %in% sp &
                              effect_category %in% ec &
                              life_stage %in% ls &
                              generation %in% gn & 
                              effect_target_class == "organ" &
                              dose_no > 1] %>%
  .[ , list(chemical_id, chemical_id_type, chemical_casrn, chemical_name,
            study_id,
            study_source,
            study_type,
            species,
            admin_route,
            admin_method,
            effect_category,
            effect_type,
            effect_target,
            effect_target_class,
            treatment_related,
            dose_no,
            HED,
            log_dose_adjusted,
            ldt,
            hdt,
            dose_spacing)] %>%
  unique()


tr_lel <- data.table(tr_all2) %>% .[data_entry_level == 'all effects' &
                              data_usability == 'acceptable' &
                              data_entry_status == 'complete'&
                              dose_adjusted > 0 &
                              dose_adjusted_unit == 'mg/kg/day' & 
                              dose_level > 0 &
                              admin_route %in% ar &
                              study_type %in% st &
                              species %in% sp &
                              effect_category %in% ec &
                              life_stage %in% ls &
                              generation %in% gn & 
                              dose_no > 1] %>%
  .[ , list(chemical_id, chemical_id_type, chemical_casrn, chemical_name,
            study_id,
            study_source,
            study_type,
            species,
            admin_route,
            admin_method,
            effect_category,
            effect_type,
            effect_target,
            effect_target_class,
            treatment_related,
            dose_no,
            HED,
            log_dose_adjusted,
            ldt,
            hdt,
            dose_spacing)] %>%
  unique()
#tr_lel_pub <- tr_lel[ , c("treatment_related", "dose_no", "HED", "log_dose_adjusted", "ldt", "hdt", "dose_spacing"):=NULL]

#write.csv(tr_lel_pub, file="tr_lel.csv")

Calculate the organ LELs by chemicals in both log10 and non transformed dose

tr_lel[ effect_target_class=="organ", tr_eff := max(treatment_related, na.rm = TRUE), 
    by = c("study_id", "effect_target")]

tr_lel[ tr_eff==1, orglel_chem:= min(log10(HED[treatment_related==1]), na.rm = TRUE), 
    by = c("chemical_id", "effect_target")]

tr_lel[ , tr:= max(treatment_related, na.rm = TRUE), 
    by = c("study_id")]

tr_lel[ tr==1, lel_chem:=min(log10(HED[treatment_related==1]), na.rm = TRUE), by = chemical_id]

tr_lel[ tr==1, lel_st:=min(log10(HED[treatment_related==1]), na.rm = TRUE), by = c("chemical_id", "study_id")]

Grouping Cosmetic and ToxRef Chemicals

tr_lel[ chemical_casrn %in% cosmetic_chem$TS_CASRN, group:="cosmetic_chem"]
tr_lel[is.na(group) , group:="toxref"]
tr_lel[ , chem_group:=uniqueN(chemical_id), by=group]
tr_lel[ , st_mean:=round(mean(lel_st, na.rm = TRUE), 2), by=c("study_type", "group")]

tr_supp <- tr_lel[treatment_related==1] # supplamental table

tr_lel2 <- tr_lel[ , list(chemical_casrn, chemical_name, effect_target, orglel_chem, group)] %>%
  unique() %>% 
  .[ , organ_mean:=round(mean(orglel_chem, na.rm = TRUE), 2), by=c("effect_target", "group")] %>% 
  .[ , organ_sd:=round(sd(orglel_chem, na.rm = TRUE), 2), by=c("effect_target", "group")] %>%
  .[ , organ_median:=round(median(orglel_chem, na.rm = TRUE), 2), by=c("effect_target", "group")] %>%
  .[ , organ_iqr:=IQR(orglel_chem, na.rm = TRUE), by=c("effect_target", "group")] %>%
  .[ orglel_chem != "NA", org_count:=.N, by=c( "effect_target", "group")] %>%
  .[ group == "cosmetic_chem", per_org_count:=round(((org_count/58)*100), 2)] %>%
  .[ group == "toxref", per_org_count:=round(((org_count/870)*100), 2)] %>%
  .[ , num.org.target:=.N, by=chemical_name]
tr_lel.ct<-tr_lel2[ , list(chemical_casrn, chemical_name, group, num.org.target)] %>% unique()

write.csv(tr_lel.ct, file="tr_num_org_target.csv")

How many CRCs are in ToxRefDB

CRC_tr <- tr_lel[ , list(chemical_casrn, chemical_name, admin_route, group)] %>% unique() %>%
  .[ group=="cosmetic_chem"]

ACTOR Chemical Functional Use Categories (Figure 1)

actor <- data.table(NewUseTable)

actor_crc <- actor[ CASRN %in% CRC_tr$chemical_casrn ] 

actor_crc2 <- reshape2::melt(actor_crc, id.vars=c("CASRN", "COMMON.NAME", "TXCODE")) %>% data.table() %>%
  .[ , COMMON.NAME:=tolower(COMMON.NAME)] %>%
  .[ , variable:=tolower(variable)] 
## Warning in melt.data.table(actor_crc, id.vars = c("CASRN", "COMMON.NAME", :
## 'measure.vars' [ANTIMICROBIAL, COLORANT, FERTILIZER, FOOD.ADDITIVE, ...]
## are not all of the same type. By order of hierarchy, the molten data
## value column will be of type 'double'. All measure variables not of type
## 'double' will be coerced too. Check DETAILS in ?melt.data.table for more on
## coercion.
setnames(actor_crc2, "CASRN", "chemical_casrn" )

actor_crc2 <- merge(CRC_tr, actor_crc2, by="chemical_casrn") %>%
  .[ value > 0] %>%
  .[ , var_ct:=.N, by=variable]
acp1 <- ggplot(actor_crc2, aes(variable)) +
  geom_text(stat='count', aes(label=..count..), y=2, vjust=-.2) +
  theme_classic() +
  theme(panel.border = element_blank(),
        axis.line = element_blank(),
        plot.margin = rep(unit(0,"null"),4),
        panel.spacing = unit(0,"null"),
        axis.title.x=element_blank(),
        axis.text.x=element_blank(),
        axis.ticks.x=element_blank(),
        axis.text.y=element_blank(),
        axis.ticks.y=element_blank()) +
  ylab("") 

acp2 <- ggplot(actor_crc2, aes(chemical_name, variable)) +
  geom_tile(aes(fill=factor(value))) +
  theme_classic() +
  scale_fill_grey() +
  coord_flip() +
  theme(plot.margin = rep(unit(0,"null"),4),
        panel.spacing = unit(0,"null"),
    axis.text.x = element_text( angle=90, hjust=0.95, vjust=0.2),
        axis.text.y = element_text( size = 7), legend.position="none") +
  xlab("Chemicals") +
  ylab(" General Use Categories") 

acp1.1 <- ggplot_gtable(ggplot_build(acp1))
acp2.1<- ggplot_gtable(ggplot_build(acp2))

acp1.1$widths <- acp2.1$widths

gridExtra::grid.arrange(acp1.1, acp2.1, ncol=1, heights=c(1,10))

0.2 In Vivo Analysis

The percentage of chemicals with effects by organs (Figure 2)

chem_affected <- tr_lel2[ orglel_chem != "NA", list(effect_target, org_count, per_org_count, group)] %>% unique() %>%
  .[order(-per_org_count)] %>%
  .[ !(.$effect_target=='[other]') , ]

chem_affected <- chem_affected[!(effect_target %in% c('tissue nos','uncertain primary site'))]
length(unique(chem_affected$effect_target)) #68
## [1] 68
tissues <- chem_affected[group=='cosmetic_chem' & per_org_count > 1]$effect_target
chem_affected_plot <- chem_affected[effect_target %in% tissues]

organ_plot <- chem_affected_plot[ group == "toxref"] %>%
  .[ , list(effect_target, org_count)] %>% unique() %>%
  .[order(-org_count)]

ggplot() +
  geom_col(data=chem_affected_plot, 
           aes(x=effect_target, y=per_org_count, fill=group),position_dodge2(width=1,preserve='single'))  +
  coord_flip() +
  scale_x_discrete(limits=organ_plot$effect_target) +
  ylab("Percentage of Chemicals with Effects") +
  xlab("Organs") +
  scale_fill_grey(name='Group',breaks=c('toxref', 'cosmetic_chem'),labels=c('ToxRef', 'CRC')) +
  theme_bw() +
  theme(panel.border=element_rect(size=2), legend.position = "bottom")

The number of study types per chemical

chem_st <- tr_lel[group=="cosmetic_chem"] %>%
 .[ , list(chemical_casrn, study_id, chemical_name, study_type)] %>% 
  unique() %>% 
  dcast( ., chemical_casrn + chemical_name~study_type)
## Using 'study_type' as value column. Use 'value.var' to override
## Aggregate function missing, defaulting to 'length'
datatable(chem_st)
#write.csv(chem_st, file = "CRC_study_type.csv")

Table with mean and sd

tr_stat <- tr_lel2[group=="toxref"] %>% 
  .[orglel_chem != "NA"] %>% 
  .[ , c("orglel_chem", "chemical_casrn", "chemical_name"):=NULL] %>%
  .[ , group:=NULL] %>%
  unique()
setnames(tr_stat, c("organ_mean", "organ_sd", "org_count", "per_org_count"), 
         c("toxrefdb_mean", "toxrefdb_sd", "toxrefdb_count", "toxrefdb_percent"))

#datatable(tr_stat)

#write.csv(tr_stat, file = "ToxRef_SummaryStat.csv")

cc_stat <- tr_lel2[group=="cosmetic_chem"] %>% 
  .[orglel_chem != "NA"] %>% 
  .[ , c("orglel_chem", "chemical_casrn", "chemical_name"):=NULL] %>%
  .[ , group:=NULL] %>%
  unique()
setnames(cc_stat, c("organ_mean", "organ_sd", "org_count", "per_org_count"), 
         c("cosmetic_mean", "cosmetic_sd", "cosmetic_count", "cosmetic_percent"))

#datatable(tr_stat)

#write.csv(cc_stat, file = "CRC_SummaryStat.csv")
org_compar <- merge(tr_stat, cc_stat)

0.2.1 Analysis A: by tissue, compressing study_types (Figure 3)

organ_plot <- chem_affected[ group == "cosmetic_chem"] %>%
  .[ , list(effect_target, org_count)] %>% 
  unique() %>%
  .[order(-org_count)]

plot_org <- c("liver",
              "kidney",
              "stomach",
              "spleen",
              "testes",
              "uterus",
              "thymus",
              "adrenal gland",
              "lung",
              "brain",
              "heart",
              "bone marrow", 
              "lymph node",
              "thyroid gland")

dat_hit <-tr_lel[effect_target %in% plot_org] %>% unique() 

dat_hit$group <- as.factor(dat_hit$group)
organ.kruskal <- dat_hit %>%
  group_by(effect_target) %>%
  summarize(kruskal_p.value = kruskal.test(orglel_chem ~group)$p.value) %>%
  data.table()

# get information from Figure 3
fig3_organ_summary <-tr_lel2[ , list(group, effect_target, org_count, organ_mean, organ_sd, organ_median, organ_iqr)] %>% unique()

fig3_organ_summary <- fig3_organ_summary[order(-org_count,group)]

#write.csv(fig3_organ_summary, file="Fig3_organ_summary.csv")


ggplot(dat_hit) +
  geom_boxplot(aes(x=effect_target, y=orglel_chem, fill=group)) +
  theme_igray() +
  scale_fill_grey(start = 0.9, end = 0.6) +
  theme(axis.text.y = element_text(size = rel(1.4)),
        axis.title.y = element_text(size= rel(1.4)),
        axis.text.x = element_text(size = rel(1.4)),
        axis.title.x = element_text(size= rel(1.4))) +
  coord_flip() +
  ylab("LEL log10(mg/kg/day)") +
  xlab("Effected Organ") +
  theme(legend.position = "bottom")

0.2.2 Analysis B: by study type, compressing tissue (Figure 4)

tr_lel2 <- tr_lel[ , list(chemical_casrn, chemical_name, effect_target, orglel_chem, group)] %>%
  unique() %>% 
  .[ , organ_mean:=round(mean(orglel_chem, na.rm = TRUE), 2), by=c("effect_target", "group")] %>% 
  .[ , organ_sd:=round(sd(orglel_chem, na.rm = TRUE), 2), by=c("effect_target", "group")] %>%
  .[ orglel_chem != "NA", org_count:=.N, by=c( "effect_target", "group")] %>%
  .[ group == "cosmetic_chem", per_org_count:=round(((org_count/58)*100), 2)] %>%
  .[ group == "toxref", per_org_count:=round(((org_count/870)*100), 2)] %>%
  .[ , num.org.target:=.N, by=chemical_name]

lelbystudy <- tr_lel[ , list(chemical_casrn, chemical_name, study_type, lel_chem, group)] %>%
  unique() %>% 
  .[ , study_type_mean:=round(mean(lel_chem, na.rm = TRUE), 2), by=c("study_type", "group")] %>% 
  .[ , study_type_sd:=round(sd(lel_chem, na.rm = TRUE), 2), by=c("study_type", "group")] %>%
  .[ , study_type_median:=round(median(lel_chem, na.rm = TRUE), 2), by=c("study_type", "group")] %>%
  .[ , study_type_iqr:=(IQR(lel_chem, na.rm = TRUE)), by=c("study_type", "group")] %>%
  .[ lel_chem != "NA", study_count:=.N, by=c( "study_type", "group")]

uniqlelbystudy <- unique(lelbystudy[,c('group', 
                                       'study_type', 
                                       'study_type_mean', 
                                       'study_type_sd', 
                                       'study_type_median',
                                       'study_type_iqr',
                                       'study_count')])
uniqlelbystudy <-uniqlelbystudy[order(study_type, group)]

write.csv(uniqlelbystudy, file = "Fig4_study_type_stats.csv")

cos_chem_st <- tr_lel[group=="cosmetic_chem"] %>%
  .[ , list(chemical_name, study_type )] %>%
  unique() %>%
  .[ , cosmetic_chem:=.N, by=study_type] %>%
  .[ , chemical_name:=NULL] %>%
  unique()

tr_chem_st <- tr_lel[group=="toxref"] %>%
  .[ , list(chemical_name, study_type )] %>%
  unique() %>%
  .[ , toxref:=.N, by=study_type] %>%
  .[ , chemical_name:=NULL] %>%
  unique()

st_summary <- merge(cos_chem_st, tr_chem_st) %>% 
  melt(., id.vars= "study_type") 
st_boxplot <- ggplot(tr_lel) +
  geom_boxplot(aes(x=factor(study_type, level=c('MGR', 'DNT', 'DEV', 'CHR', 'SUB', 'SAC')), 
                   y=lel_chem, 
                   fill=group)) +
  #geom_point(aes(x=factor(study_type, level=c('MGR', 'DNT', 'DEV', 'CHR', 'SUB', 'SAC')),
  #               y=st_mean, shape=group), size=3)+
  geom_text(data=st_summary , 
            aes(x=factor(study_type, level=c('MGR', 'DNT', 'DEV', 'CHR', 'SUB', 'SAC')), 
                y= 4, 
                label=value, 
                group=variable),  size=5,
            position = position_dodge(1)) +
  theme_igray() +
  scale_fill_grey(start = 0.9, end = 0.6, name='Group',breaks=c('toxref', 'cosmetic_chem'),labels=c('ToxRef', 'CRC')) +
  #scale_shape(name='Group',breaks=c('toxref', 'cosmetic_chem'),labels=c('ToxRef', 'CRC'))+
  theme(axis.text.y = element_text(size = rel(1.4)),
        axis.title.y = element_text(size= rel(1.4)),
        axis.text.x = element_text(size = rel(1.4)),
        axis.title.x = element_text(size= rel(1.4))) +
  coord_flip() +
  ylab("LEL log10(mg/kg/day)") +
  xlab("Study Type") +
  theme(legend.position = "bottom")

st_boxplot

0.3 In Vitro Analysis

0.3.1 What the AC50 spread and burst dose for the 58 chemicals in toxcast (Figure 5)

mc5 <- tcplPrepOtpt(tcplLoadData(lvl=5, type='mc'))
colnames(mc5)
mc5 <- na.omit(mc5, cols='casn')
cosmetics.mc5 <- mc5[casn %in% chem_st$chemical_casrn]

unique(cosmetics.mc5$casn)
#---------------------------------------------------------------------#
# Prepare all cyto burst information for cosmetics
#---------------------------------------------------------------------#

#-----get max and min log10(AC50) for each cosmetic
colnames(cosmetics.mc5)
cosmetics.mc5[,max.modlga.by.chem := max(modl_ga, na.rm = TRUE), by=casn]
cosmetics.mc5[,min.modlga.by.chem := min(modl_ga, na.rm = TRUE), by=casn]


#-----Load all chemical IDs
chems <- tcplLoadChem()
chids <- chems[casn %in% unique(cosmetics.mc5[, casn]),]

#-----Generate the cyto burst data for HT-H295R mc chemicals
cyto.cosmetics <- tcplCytoPt(chid = unique(chids[, chid]), min.test = 3)

#-----Add cyto burst values to mc5

toxcast_dat <- merge(cosmetics.mc5, cyto.cosmetics[, c("casn", "cyto_pt_um")], by = "casn")

#-----log the data for plotting

toxcast_dat[, log_cyto_pt_um := log10(cyto_pt_um)]

#-----Add selectivity metric

toxcast_dat[, log_selectivity := .(pmin(log_cyto_pt_um) - min.modlga.by.chem)] 

Getting the hit rate of accross ToxCast Assay by Chemical

hit.rates <- toxcast_dat[ , list(
  total.cnt  = .N, #total number of aeids tested in mc
  active.cnt  = as.double(lw(hitc==1)),  # active count
  inactive.cnt  = as.double(lw(hitc==0)),  #inactive count
  active.pct = round((lw(hitc==1)/.N)*100,2), #active percent
  inactive.pct = round((lw(hitc==0)/.N)*100,2) #inactive percent
), by = list(chid, casn, chnm)]

hit.rates<-hit.rates[order(-active.pct)]

# datatable(hit.rates)

#write.csv(hit.rates, file="CRC_hit_rates.csv")

Plote AC50 spread with burst and active assay percentage

toxcast_dat2 <- toxcast_dat[ , list(casn, chnm, modl_ga, log_cyto_pt_um)] %>% 
  unique() %>% 
  .[complete.cases(.),]

dat2_hit <- merge(toxcast_dat2, hit.rates, by="chnm")
setnames(dat2_hit, "casn.x", "TS_CASRN")
dat3 <- merge(dat2_hit, cosmetic_chem, by="TS_CASRN")
dat4 <- dat3[ , list(chnm, active.pct)] %>% unique()

p1 <- ggplot(dat3) +
  geom_violin(aes(chnm, modl_ga)) +
  geom_point(aes(chnm, log_cyto_pt_um), shape = 21,   size=4) +
  scale_fill_manual(values=c("white", "black")) + # white = 0 , black = 1
  scale_y_continuous(breaks=seq(-6, 4, 1), limits = c(-6,4)) +
  coord_flip() +
  scale_color_grey() +
  theme_classic() +
  theme(legend.position="none") +
  ylab("log10(AC50) (uM)") +
  xlab("Cosmetic Chemicals")

p2 <- ggplot(dat4, aes(chnm, active.pct)) +
  geom_col() +
  coord_flip() +
  scale_color_grey() + 
  theme_minimal() +
  theme(axis.title.y=element_blank(),
        axis.text.y=element_blank(),
        axis.ticks.y=element_blank()) +
  ylab("Active Hit Rates")

gridExtra::grid.arrange(p1, p2, ncol=2, widths=2:1)

0.4 TK Analysis

Linking in vivo with in vitro using httk

Calculate the HUMAN-based administered dose equivalents (ADEs) using httk

How many of the CRCs can we do TK on

#-----get max and min log10(AC50) for each cosmetic
colnames(cosmetics.mc5)
cosmetics.mc5[ , max.modlga.by.chem := max(modl_ga, na.rm = TRUE), by=casn]
cosmetics.mc5[ , min.modlga.by.chem := min(modl_ga, na.rm = TRUE), by=casn]
cosmetics.mc5[ , min.modlga.chem.unlog := 10^min.modlga.by.chem]
cosmetics.mc5[ , max.modlga.chem.unlog := 10^max.modlga.by.chem]
cosmetics.mc5.subset <- cosmetics.mc5[ , list(casn, 
                                              max.modlga.by.chem, 
                                              min.modlga.by.chem, 
                                              min.modlga.chem.unlog, 
                                              max.modlga.chem.unlog )] %>% 
  unique()

setnames(cosmetics.mc5.subset, "casn", "chemcas")
#need to first confirm which casn's have httk information otherwise a loop will fail
ac50pct.subset<-as.data.frame(subset(cosmetics.mc5.subset, chemcas %in% get_cheminfo(species='Human')))

colnames(ac50pct.subset)
dim(ac50pct.subset)
length(unique(ac50pct.subset$chemcas)) # 19 chemicals total

Calculate the ADE for the minimum AC50 of cosmetic chemicals

ade.df <- data.table()

for (casn in ac50pct.subset[,'chemcas']) {
  aed_50 <- (calc_mc_oral_equiv(conc=subset(ac50pct.subset,chemcas==casn)[['min.modlga.chem.unlog']], chem.cas=casn, 
                                which.quantile=c(0.5), species='Human', method='dr', well.stirred.correction=F))
  max_aed <- (calc_mc_oral_equiv(conc=100, chem.cas=casn, 
                                 which.quantile=c(0.95), species='Human', method='dr', well.stirred.correction=F))
  ade.df <- rbind(ade.df,cbind(casn, aed_50, max_aed))
}


ade.df[ , aed_50:=as.numeric(aed_50)]
ade.df[ , log_aed_50:=log10(aed_50)]
ade.df[ , ac50:="min"]

colnames(ade.df)[1] <- "chemical_casrn"
setkey(ade.df, "chemical_casrn")
setkey(tr_lel, "chemical_casrn")

httk_compare_min <- ade.df[tr_lel] %>% .[ !is.na(max_aed)]
httk_compare_min[ , lel_chem:=min(lel_st), by="chemical_casrn"]

Calculate the ADE for the maximum AC50 of cosmetic chemicals

ade.df.max <- data.table()

for (casn in ac50pct.subset[,'chemcas']) {
  aed_50<-(calc_mc_oral_equiv(conc=subset(ac50pct.subset,chemcas==casn)[['max.modlga.chem.unlog']], chem.cas=casn, 
                              which.quantile=c(0.5), species='Human', method='dr', well.stirred.correction=F))
  max_aed <- (calc_mc_oral_equiv(conc=100, chem.cas=casn, 
                                 which.quantile=c(0.95), species='Human', method='dr', well.stirred.correction=F))
  ade.df.max<-rbind(ade.df.max,cbind(casn, aed_50, max_aed))
}

ade.df.max[ , aed_50:=as.numeric(aed_50)]
ade.df.max[ , log_aed_50:=log10(aed_50)]
ade.df.max[ , ac50:="max"]

colnames(ade.df.max)[1] <- "chemical_casrn"
setkey(ade.df.max, "chemical_casrn")
setkey(tr_lel, "chemical_casrn")

httk_compare_max <- ade.df.max[tr_lel] %>% .[ !is.na(max_aed)]
httk_compare_max[ , lel_chem:=min(lel_st), by="chemical_casrn"]

0.5 COSMOS TTC dataset

TTC_cramer.class <- read.delim("TTC_cramer_class.txt")

ttc <- data.table(TTC_cramer.class) %>% 
  .[ Cramer.Class.ToxTree..v2.6.0. == "Low (Class I)", federated_ttc:=log10(46/1000) ] %>%
  .[ Cramer.Class.ToxTree..v2.6.0. == "Intermediate (Class II)", federated_ttc:=log10(6.2/1000) ] %>%
  .[ Cramer.Class.ToxTree..v2.6.0. == "High (Class III)", federated_ttc:=log10(2.3/1000) ]

setnames(ttc, "CAS.RN", "chemical_casrn")

ttc_crc <- ttc[ `chemical_casrn` %in% CRC_tr$chemical_casrn ] 
ttc_tr <- merge(ttc_crc, tr_lel, by="chemical_casrn") 

tp1<-ggplot(ttc_tr) +
  geom_point(aes(chemical_name, federated_ttc)) +
  geom_line(aes(chemical_name, lel_st), color="blue")+
  coord_flip() +
  xlab("lel by study")

Expocast Data

expocast 2014 numbers from Wambaugh et al

expo <- read.csv("SupplementalTable1-070314.txt.csv") %>% data.table()

setnames(expo, "CASRN", "chemical_casrn")

Create spread plot for cosmetic chemcials

ade_plot <- rbind(httk_compare_min, httk_compare_max)

ade.ttc <- merge(ade_plot, ttc_crc)
ade.ttc.ex <- merge(ade.ttc, expo, by="chemical_casrn" )
ade.ttc.ex[ , Total.median:=log10(Total.median)]

cos_plot <- ggplot(ade.ttc.ex) +
  geom_line(aes(chemical_name, log_aed_50), color="blue", size=1.5, alpha=0.5) + 
  geom_line(aes(chemical_name, lel_st), size=1.5) +
  geom_point(aes(chemical_name, lel_chem), size=3) +
  geom_point(aes(chemical_name, federated_ttc), size=3, color="red", shape=2) +
  geom_point(aes(chemical_name,  Total.median), size=3, color="green4", shape=8) +
  theme_igray() +
  coord_flip() +
  scale_y_continuous(limits=c(-10, 4), breaks=seq(-10, 4))  +
  xlab("CRC") +
  theme(axis.title.x = element_blank(),
        axis.text.x = element_blank())

Calculate the ADE for the minimum AC50 for ToxRef chemicals

#-----------------------------------------------------------------------------------#
#Calculate the HUMAN-based administered dose equivalents (ADEs) using httk
#-----------------------------------------------------------------------------------#

tr_httk <- tr_lel[ group=="toxref"]
tr_mc5<- mc5[casn %in% tr_httk$chemical_casrn ]



#---------------------------------------------------------------------#
# Prepare all cyto burst information for cosmetics
#---------------------------------------------------------------------#

#-----get max and min log10(AC50) for each cosmetic
colnames(tr_mc5)
tr_mc5[,max.modlga.by.chem := max(modl_ga, na.rm = TRUE), by=casn]
tr_mc5[,min.modlga.by.chem := min(modl_ga, na.rm = TRUE), by=casn]
tr_mc5[,min.modlga.chem.unlog := 10^min.modlga.by.chem]
tr_mc5[,max.modlga.chem.unlog := 10^max.modlga.by.chem]
tr_mc5.subset <- tr_mc5[ , list(casn, max.modlga.by.chem, min.modlga.by.chem, min.modlga.chem.unlog, max.modlga.chem.unlog )] %>% unique()
#-----------------------------------------------------------------------------------#
#Calculate the HUMAN-based administered dose equivalents (ADEs) using httk
#-----------------------------------------------------------------------------------#

setnames(tr_mc5.subset, "casn", "chemcas")
#need to first confirm which casn's have httk information otherwise a loop will fail
ac50pct.subset.tr<-as.data.frame(subset(tr_mc5.subset, chemcas %in% get_cheminfo(species='Human')))

colnames(ac50pct.subset.tr)
dim(ac50pct.subset.tr)
length(unique(ac50pct.subset.tr$chemcas)) # 281 chemicals total

ade.tr=data.table()
for (casn in ac50pct.subset.tr[,'chemcas'])
{
  aed_50<-(calc_mc_oral_equiv(conc=subset(ac50pct.subset.tr,chemcas==casn)[['min.modlga.chem.unlog']], chem.cas=casn, 
                              which.quantile=c(0.5), species='Human', method='dr', well.stirred.correction=F))
  max_aed <- (calc_mc_oral_equiv(conc=100, chem.cas=casn, 
                                 which.quantile=c(0.95), species='Human', method='dr', well.stirred.correction=F))
  ade.tr<-rbind(ade.tr,cbind(casn, aed_50, max_aed))
}

ade.tr[ , aed_50:=as.numeric(aed_50)]
ade.tr[ , log_aed_50:=log10(aed_50)]

colnames(ade.tr)[1] <- "chemical_casrn"
ade.tr <- data.table(ade.tr)
setkey(ade.tr, "chemical_casrn")
setkey(tr_lel, "chemical_casrn")

httk_compare_tr <- ade.tr[tr_lel] %>% .[ !is.na(max_aed)]
httk_compare_tr[ , lel_chem:=min(lel_st), by="chemical_casrn"]

Calculate the AED for the maximum AC50 of ToxRef chemicals

ade.tr.max=data.table()
for (casn in ac50pct.subset.tr[,'chemcas'])
{
  aed_50<-(calc_mc_oral_equiv(conc=subset(ac50pct.subset.tr,chemcas==casn)[['max.modlga.chem.unlog']], 
                              chem.cas=casn, 
                              which.quantile=c(0.5), 
                              species='Human', 
                              method='dr', 
                              well.stirred.correction=F))
  max_aed <- (calc_mc_oral_equiv(conc=100, chem.cas=casn, 
                                 which.quantile=c(0.95), species='Human', method='dr', well.stirred.correction=F))
   ade.tr.max<-rbind(ade.tr.max,cbind(casn, aed_50, max_aed))
}


ade.tr.max[ , aed_50:=as.numeric(aed_50)]
ade.tr.max[ , log_aed_50:=log10(aed_50)]

colnames(ade.tr.max)[1] <- "chemical_casrn"
ade.tr.max <- data.table(ade.tr.max)
setkey(ade.tr.max, "chemical_casrn")
setkey(tr_lel, "chemical_casrn")

httk_compare_tr_max <- ade.tr.max[tr_lel] %>% .[ !is.na(max_aed)]
httk_compare_tr_max[ , lel_chem:=min(lel_st), by="chemical_casrn"]

Create Density plot for ToxRefDB

httk_tr_plot <- httk_compare_tr[ , list(chemical_casrn, 
                                        chemical_name, 
                                        study_id, 
                                        lel_chem, 
                                        lel_st)] %>% 
  unique()

write.csv(httk_tr_plot, file="LEL_non_CRC.csv")

httk_ac50min_plot <- httk_compare_tr[ , list(chemical_casrn, 
                                             chemical_name, 
                                             log_aed_50)] %>% 
  unique() %>%
  .[ , ac50:="min"]

httk_ac50max_plot <- httk_compare_tr_max[ , list(chemical_casrn, 
                                                 chemical_name, 
                                                 log_aed_50)] %>% 
  unique() %>%
  .[ , ac50:="max"]

tr_plot <- ggplot() +
  geom_density(data=httk_tr_plot, aes(lel_st), size=1) +
  geom_density(data=httk_ac50min_plot, aes(log_aed_50), color="steelblue3", size=1) +
  geom_density(data=httk_ac50max_plot, aes(log_aed_50), color="blue3", size=1, linetype="dashed") +
  theme_igray() +
  scale_x_continuous(limits=c(-10, 4), breaks=seq(-10, 4)) +
  xlab("log10(mg/kg/day)") +
  ylab("305 non-CRC")

Combinding cosmetic range plot with toxrefdb range density plot

httkp1 <- ggplot_gtable(ggplot_build(cos_plot))
httkp2 <- ggplot_gtable(ggplot_build(tr_plot))
maxWidth = unit.pmax(httkp1$widths[2:3], httkp2$widths[2:3])
httkp1$widths[2:3] <- maxWidth
httkp2$widths[2:3] <- maxWidth

gridExtra::grid.arrange(httkp1, httkp2, ncol=1, heights=2:1)