Aims
To develop predictive tools that can be applied towards producing safer chemicals, we focused on chemicals used in products subject to in vivo testing restrictions in the EU. We surveyed information available in developing predictive tools, we have surveyed the information available in the EPA ToxCast database (invitrodb version 2, and ToxRefDB version 1 to evaluate the potential in vitro and in vivo activity of 58 chemicals that are identified as either ingredients or contaminates in personal care products, fragrances, and cosmetics based on information from the Personal Care Products Council (PCPC).

Loading libraries

sessionInfo()
## R version 3.5.1 (2018-07-02)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 16299)
## 
## Matrix products: default
## 
## locale:
## [1] LC_COLLATE=English_United States.1252 
## [2] LC_CTYPE=English_United States.1252   
## [3] LC_MONETARY=English_United States.1252
## [4] LC_NUMERIC=C                          
## [5] LC_TIME=English_United States.1252    
## 
## attached base packages:
## [1] grid      stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] tcpl_2.0.1        knitr_1.21        httk_1.8         
##  [4] gridExtra_2.3     ggthemes_4.0.1    ggplot2_3.1.0    
##  [7] DT_0.5            dplyr_0.7.8       RMySQL_0.10.15   
## [10] DBI_1.0.0         data.table_1.11.8 magrittr_1.5     
## 
## loaded via a namespace (and not attached):
##  [1] deSolve_1.21       tidyselect_0.2.5   xfun_0.4          
##  [4] purrr_0.2.5        splines_3.5.1      lattice_0.20-35   
##  [7] colorspace_1.3-2   expm_0.999-3       htmltools_0.3.6   
## [10] yaml_2.2.0         chron_2.3-53       blob_1.1.1        
## [13] survival_2.42-3    rlang_0.3.0.1      pillar_1.3.0      
## [16] glue_1.3.0         withr_2.1.2        bit64_0.9-7       
## [19] RColorBrewer_1.1-2 gsubfn_0.7         bindrcpp_0.2.2    
## [22] bindr_0.1.1        plyr_1.8.4         stringr_1.3.1     
## [25] munsell_0.5.0      gtable_0.2.0       htmlwidgets_1.3   
## [28] mvtnorm_1.0-8      memoise_1.1.0      evaluate_0.12     
## [31] parallel_3.5.1     proto_1.0.0        Rcpp_1.0.0        
## [34] scales_1.0.0       truncnorm_1.0-8    bit_1.1-14        
## [37] digest_0.6.18      stringi_1.2.4      msm_1.6.6         
## [40] survey_3.34        numDeriv_2016.8-1  tools_3.5.1       
## [43] sqldf_0.4-11       RSQLite_2.1.1      lazyeval_0.2.1    
## [46] tibble_1.4.2       crayon_1.3.4       pkgconfig_2.0.2   
## [49] Matrix_1.2-14      assertthat_0.2.0   rmarkdown_1.11    
## [52] R6_2.3.0           compiler_3.5.1

0.1 Data Extraction

This script is to create cosmetics target organ & counts and the LEL from the study and what study type

Reading in Data from database and cosmetic chemicals

con <- dbConnect(drv=RMySQL::MySQL(), group="toxrefdb_1_3")

## To query ToxRefDBv1
tr_all<-dbGetQuery(con, "SELECT chemical.*, study.*,dose.*, tg.*, dtg.*,dtg_effect.*,effect.*, endpoint.*
                   FROM
                   dev_toxrefdb_1_3.chemical INNER JOIN
                   (dev_toxrefdb_1_3.study INNER JOIN
                   (dev_toxrefdb_1_3.dose INNER JOIN
                   (dev_toxrefdb_1_3.tg INNER JOIN
                   (dev_toxrefdb_1_3.dtg INNER JOIN
                   (dev_toxrefdb_1_3.dtg_effect INNER JOIN
                   (dev_toxrefdb_1_3.endpoint INNER JOIN
                   dev_toxrefdb_1_3.effect 
                   ON endpoint.endpoint_id=effect.endpoint_id)
                   ON dtg_effect.effect_id=effect.effect_id)
                   ON dtg_effect.dtg_id=dtg.dtg_id)
                   ON tg.tg_id=dtg.tg_id AND tg.tg_id=effect.tg_id)
                   ON dose.dose_id=dtg.dose_id)
                   ON study.study_id=tg.study_id AND study.study_id=dose.study_id)
                   ON chemical.chemical_id=study.chemical_id") %>%
  data.table()

## Load cosmetic-relevant chemicals (CRCs)
cosmetic_chem  <- read.csv("Cosmetic_60_chems_LELs_2018MAR28.csv", header=TRUE, as.is = T, check.names=F) %>% 
  data.table()

0.1.1 How many of the CRCs are in ToxRefDBv1

Function that corrects some known errors in the ToxRefDBv1

trCorrections <- function(dat){

  ## Remove studies that were entered twice
  rep <- c("3695", "3696","3681","3684","3688","3687", "3689", "3692", "3694") ##studies that were entered twice
  dat[study_id %in% rep, study_id := 0]
  dat <- dat[study_id != 0]

  ##Correcting Errors
  dat[study_year == "19987", study_year := 1987]
  dat[study_year == "9184", study_year := 1984]
  dat[substance_purity == "8805", substance_purity := 88.05]
  dat[substance_purity > 100, substance_purity := 100]

  ##Putting missing data under the same terminology
  dat[study_source == "unassigned", study_source := "unknown"]
  dat[is.na(strain_group), strain_group := "other"]
  dat[strain_group == "other", strain_group := species] ## All strain other ==> species
  dat[admin_method =="[Not Specified]", admin_method := admin_route] ## All admin_method[Not Specified] to admin method

}

Allometric scaling of all doses and filter data

tr_all[!is.na(dose_level), dose_no := max(dose_level, na.rm = TRUE) , by = study_id]
tr_all[ species == "mouse", HED := dose_adjusted * 0.081]
tr_all[ species == "rat", HED := dose_adjusted * 0.162]
tr_all[ species == "guinea_pig", HED := dose_adjusted * 0.216]
tr_all[ species == "rabbit", HED := dose_adjusted * 0.324]
tr_all[ species == "dog", HED := dose_adjusted * 0.541]
tr_all[ species == "hamster", HED := dose_adjusted * 0.135]
tr_all[ species == "primate", HED := dose_adjusted]

tr_all[!is.na(dose_level), dose_no := max(dose_level, na.rm=TRUE) , by = study_id]
tr_all[ HED > 0, log_dose_adjusted := round(log10(HED), 1)]
tr_all[!is.na(log_dose_adjusted), ldt := min(log_dose_adjusted), by = study_id]
tr_all[!is.na(log_dose_adjusted), hdt := max(log_dose_adjusted), by = study_id]
tr_all[ , dose_spacing := (hdt-ldt)/(dose_no-1)]

tr_all2 <- trCorrections(tr_all)

tr_all2[ chemical_casrn %in% cosmetic_chem$TestSubstance_CASRN, group:="cosmetic_chem"]
tr_all2[is.na(group) , group:="toxref"]

ar <- c("Oral")
sp <- c("mouse", "rat", "dog", "rabbit")
st <- c("SUB","CHR", "DEV", "MGR", "SAC" , "DNT" )
ec <- c('systemic')
ls <- c('adult')
gn <- c('F0')

tr_org <- data.table(tr_all2) %>% .[data_entry_level == 'all effects' &
                              data_usability == 'acceptable' &
                              data_entry_status == 'complete'&
                              dose_adjusted > 0 &
                              dose_adjusted_unit == 'mg/kg/day' & 
                              dose_level > 0 &
                              admin_route %in% ar &
                              study_type %in% st &
                              species %in% sp &
                              effect_category %in% ec &
                              life_stage %in% ls &
                              generation %in% gn & 
                              effect_target_class == "organ" &
                              dose_no > 1] %>%
  .[ , list(chemical_id, chemical_id_type, chemical_casrn, chemical_name,
            study_id,
            study_source,
            study_type,
            species,
            admin_route,
            admin_method,
            effect_category,
            effect_type,
            effect_target,
            effect_target_class,
            treatment_related,
            dose_no,
            HED,
            log_dose_adjusted,
            ldt,
            hdt,
            dose_spacing)] %>%
  unique()


tr_lel <- data.table(tr_all2) %>% .[data_entry_level == 'all effects' &
                              data_usability == 'acceptable' &
                              data_entry_status == 'complete'&
                              dose_adjusted > 0 &
                              dose_adjusted_unit == 'mg/kg/day' & 
                              dose_level > 0 &
                              admin_route %in% ar &
                              study_type %in% st &
                              species %in% sp &
                              effect_category %in% ec &
                              life_stage %in% ls &
                              generation %in% gn & 
                              dose_no > 1] %>%
  .[ , list(chemical_id, chemical_id_type, chemical_casrn, chemical_name,
            study_id,
            study_source,
            study_type,
            species,
            admin_route,
            admin_method,
            effect_category,
            effect_type,
            effect_target,
            effect_target_class,
            treatment_related,
            dose_no,
            HED,
            log_dose_adjusted,
            ldt,
            hdt,
            dose_spacing)] %>%
  unique()

Calculate the organ LELs by chemicals in both log10 and non transformed dose

tr_lel[ effect_target_class=="organ", tr_eff := max(treatment_related, na.rm = TRUE), 
    by = c("study_id", "effect_target")]

tr_lel[ tr_eff==1, orglel_chem:= min(log10(HED[treatment_related==1]), na.rm = TRUE), 
    by = c("chemical_id", "effect_target")]

tr_lel[ , tr:= max(treatment_related, na.rm = TRUE), 
    by = c("study_id")]

tr_lel[ tr==1, lel_chem:=min(log10(HED[treatment_related==1]), na.rm = TRUE), by = chemical_id]

tr_lel[ tr==1, lel_st:=min(log10(HED[treatment_related==1]), na.rm = TRUE), by = c("chemical_id", "study_id")]

Grouping Cosmetic and ToxRef Chemicals

tr_lel[ chemical_casrn %in% cosmetic_chem$TS_CASRN, group:="cosmetic_chem"]
tr_lel[is.na(group) , group:="toxref"]
tr_lel[ , chem_group:=uniqueN(chemical_id), by=group]
tr_lel[ , st_mean:=round(mean(lel_st, na.rm = TRUE), 2), by=c("study_type", "group")]

tr_supp <- tr_lel[treatment_related==1] # supplemental table

tr_lel2 <- tr_lel[ , list(chemical_casrn, chemical_name, effect_target, orglel_chem, group)] %>%
  unique() %>% 
  .[ , organ_mean:=round(mean(orglel_chem, na.rm = TRUE), 2), by=c("effect_target", "group")] %>% 
  .[ , organ_med:=round(median(orglel_chem, na.rm = TRUE), 2), by=c("effect_target", "group")] %>% 
  .[ , organ_sd:=round(sd(orglel_chem, na.rm = TRUE), 2), by=c("effect_target", "group")] %>%
  .[ orglel_chem != "NA", org_count:=.N, by=c( "effect_target", "group")] %>%
  .[ group == "cosmetic_chem", per_org_count:=round(((org_count/58)*100), 2)] %>%
  .[ group == "toxref", per_org_count:=round(((org_count/870)*100), 2)] %>%
  .[ , num.org.target:=.N, by=chemical_name]

#write.csv(tr_lel2, file="organ_summarystats_by_group_29Jan2019.csv")

How many CRCs are in ToxRefDB

CRC_tr <- tr_lel[ , list(chemical_casrn, chemical_name, admin_route, group)] %>% unique() %>%
  .[ group=="cosmetic_chem"]

ACTOR

CRC general use

actor <- data.table(NewUseTable)
actor[CASRN=='52645-53-1',FLAME.RETARDANT := 0]

actor_crc <- actor[ CASRN %in% CRC_tr$chemical_casrn ] 

actor_crc2 <- reshape2::melt(actor_crc, id.vars=c("CASRN", "COMMON.NAME", "TXCODE")) %>% data.table() %>%
  .[ , COMMON.NAME:=tolower(COMMON.NAME)] %>%
  .[ , variable:=tolower(variable)] 

setnames(actor_crc2, "CASRN", "chemical_casrn" )

actor_crc2 <- merge(CRC_tr, actor_crc2, by="chemical_casrn") %>%
  .[ value > 0] %>%
  .[ , var_ct:=.N, by=variable]
acp1 <- ggplot(actor_crc2, aes(variable)) +
  geom_text(stat='count', aes(label=..count..), y=2, vjust=-.2) +
  theme_classic() +
  theme(panel.border = element_blank(),
        axis.line = element_blank(),
        plot.margin = rep(unit(0,"null"),4),
        panel.spacing = unit(0,"null"),
        axis.title.x=element_blank(),
        axis.text.x=element_blank(),
        axis.ticks.x=element_blank(),
        axis.text.y=element_blank(),
        axis.ticks.y=element_blank()) +
  ylab("") 

acp2 <- ggplot(actor_crc2, aes(chemical_name, variable)) +
  geom_tile(aes(fill=factor(value))) +
  theme_classic() +
  scale_fill_grey() +
  coord_flip() +
  theme(plot.margin = rep(unit(0,"null"),4),
        panel.spacing = unit(0,"null"),
    axis.text.x = element_text( angle=90, hjust=0.95, vjust=0.2),
        axis.text.y = element_text( size = 7), legend.position="none") +
  xlab("Chemicals") +
  ylab(" General Use Categories") 

acp1.1 <- ggplot_gtable(ggplot_build(acp1))
acp2.1<- ggplot_gtable(ggplot_build(acp2))

acp1.1$widths <- acp2.1$widths

gridExtra::grid.arrange(acp1.1, acp2.1, ncol=1, heights=c(1,10))

0.2 In Vivo Analysis

The percentage of chemicals with effects by organs

chem_affected <- tr_lel2[ orglel_chem != "NA", list(effect_target, org_count, per_org_count, group)] %>% unique() %>%
  .[order(-per_org_count)] %>%
  .[ !(.$effect_target=='[other]') , ]

organ_plot <- chem_affected[ group == "toxref"] %>%
  .[ , list(effect_target, org_count)] %>% unique() %>%
  .[order(-org_count)]

ggplot(chem_affected , aes(x=effect_target, y=per_org_count, fill= group)) +
  geom_bar(stat="identity",position = "dodge")  +
  coord_flip() +
  scale_x_discrete(limits=organ_plot$effect_target ) +
  ylab("Percentage of Chemicals with Effects") +
  xlab("Organs") +
  scale_fill_grey(name="Group",labels=c("CRC", "ToxRefDBv1")) +
  theme_bw() +
  theme(panel.border=element_rect(size=2), legend.position = "bottom")

The number of study types per chemical

chem_st <- tr_lel[group=="cosmetic_chem"] %>%
 .[ , list(chemical_casrn, study_id, chemical_name, study_type)] %>% 
  unique() %>% 
  dcast( ., chemical_casrn + chemical_name~study_type)
## Using 'study_type' as value column. Use 'value.var' to override
## Aggregate function missing, defaulting to 'length'
datatable(chem_st)
#write.csv(chem_st, file = "CRC_study_type.csv")

Table with mean and sd

tr_stat <- tr_lel2[group=="toxref"] %>% 
  .[orglel_chem != "NA"] %>% 
  .[ , c("orglel_chem", "chemical_casrn", "chemical_name"):=NULL] %>%
  .[ , group:=NULL] %>%
  unique()
setnames(tr_stat, c("organ_mean", "organ_med", "organ_sd", "org_count", "per_org_count"), 
         c("toxrefdb_mean", "toxrefdb_med", "toxrefdb_sd", "toxrefdb_count", "toxrefdb_percent"))

datatable(tr_stat)
cc_stat <- tr_lel2[group=="cosmetic_chem"] %>% 
  .[orglel_chem != "NA"] %>% 
  .[ , c("orglel_chem", "chemical_casrn", "chemical_name"):=NULL] %>%
  .[ , group:=NULL] %>%
  unique()
setnames(cc_stat, c("organ_mean", "organ_med", "organ_sd", "org_count", "per_org_count"), 
         c("cosmetic_mean", "cosmetic_med", "cosmetic_sd", "cosmetic_count", "cosmetic_percent"))

datatable(tr_stat)
org_compar <- merge(tr_stat, cc_stat)

0.2.1 Analysis A: by tissue, compressing study_types

organ_plot <- chem_affected[ group == "cosmetic_chem"] %>%
  .[ , list(effect_target, org_count)] %>% 
  unique() %>%
  .[order(-org_count)]

plot_org <- c("liver",
              "kidney",
              "stomach",
              "spleen",
              "testes",
              "uterus",
              "thymus",
              "adrenal gland",
              "lung",
              "brain",
              "heart",
              "bone marrow", 
              "lymph node",
              "thyroid gland")

dat_hit <-tr_lel[effect_target %in% plot_org] %>% unique() 

ggplot(dat_hit) +
  geom_boxplot(aes(x=effect_target, y=orglel_chem, fill=group)) +
  theme_igray() +
  scale_fill_grey(start = 0.9, end = 0.6,     
                  name="Group",
                  labels=c("CRC", "ToxRefDBv1")) +
  theme(axis.text.y = element_text(size = rel(1.4)),
        axis.title.y = element_text(size= rel(1.4)),
        axis.text.x = element_text(size = rel(1.4)),
        axis.title.x = element_text(size= rel(1.4))) +
  coord_flip() +
  ylab("LEL log10(mg/kg/day)") +
  xlab("Affected Target Organs") +
  theme(legend.position = "bottom")

Is the average LEL by organ different between CRC and ToxRefDBv1 chemicals?

dat_hit[, group := as.factor(group)]

organ.wilcox <- dat_hit %>%
  group_by(effect_target) %>%
  summarize(wilcox_p.value = wilcox.test(orglel_chem~group)$p.value) %>%
  data.table()

kable(organ.wilcox)
effect_target wilcox_p.value
adrenal gland 0.0000000
bone marrow 0.0000004
brain 0.0000002
heart 0.0053762
kidney 0.0000000
liver 0.0000000
lung 0.0029141
lymph node 0.0000000
spleen 0.0000000
stomach 0.1656585
testes 0.0000000
thymus 0.0000133
thyroid gland 0.0005481
uterus 0.0000000

Analysis B: by study type, compressing tissue

cos_chem_st <- tr_lel[group=="cosmetic_chem"] %>%
  .[ , list(chemical_name, study_type )] %>%
  unique() %>%
  .[ , cosmetic_chem:=.N, by=study_type] %>%
  .[ , chemical_name:=NULL] %>%
  unique()

tr_chem_st <- tr_lel[group=="toxref"] %>%
  .[ , list(chemical_name, study_type )] %>%
  unique() %>%
  .[ , toxref:=.N, by=study_type] %>%
  .[ , chemical_name:=NULL] %>%
  unique()

st_summary <- merge(cos_chem_st, tr_chem_st) %>% 
  melt(., id.vars= "study_type") 
ggplot(tr_lel) +
  geom_boxplot(aes(x=study_type, 
                   y=lel_chem, 
                   fill=group)) +
  geom_text(data=st_summary , 
            aes(x=study_type, 
                y= 4, 
                label=value, 
                group=variable),  size=5,
            position = position_dodge(1)) +
  theme_igray() +
  scale_fill_grey(start = 0.9, end = 0.6,
                  name="Group",
                  labels=c("CRC", "ToxRefDBv1")) +
  theme(axis.text.y = element_text(size = rel(1.4)),
        axis.title.y = element_text(size= rel(1.4)),
        axis.text.x = element_text(size = rel(1.4)),
        axis.title.x = element_text(size= rel(1.4))) +
  coord_flip() +
  ylab("LEL log10(mg/kg/day)") +
  xlab("Study Type") +
  theme(legend.position = "bottom")

Is the average LEL by study type different between CRC and ToxRefDBv1 chemicals?

study.wilcox <- tr_lel %>%
  group_by(study_type) %>%
  summarize(wilcox_p.value = wilcox.test(lel_chem~group)$p.value) %>%
  data.table()

kable(study.wilcox)
study_type wilcox_p.value
CHR 0.0000000
DEV 0.0000000
DNT 0.7445796
MGR 0.0000000
SAC 0.0000000
SUB 0.0000000

0.3 In Vitro Analysis

0.3.1 AC50 Distribution and Hit Rates for CRCs in ToxCast

mc5 <- tcplPrepOtpt(tcplLoadData(lvl=5, type='mc'))
colnames(mc5)
mc5 <- na.omit(mc5, cols='casn')
cosmetics.mc5 <- mc5[casn %in% chem_st$chemical_casrn]

unique(cosmetics.mc5$casn)
#-----get max and min log10(AC50) for each cosmetic
colnames(cosmetics.mc5)
cosmetics.mc5[,max.modlga.by.chem := max(modl_ga, na.rm = TRUE), by=casn]
cosmetics.mc5[,min.modlga.by.chem := min(modl_ga, na.rm = TRUE), by=casn]

#-----Load all chemical IDs
chems <- tcplLoadChem()
chids <- chems[casn %in% unique(cosmetics.mc5[, casn]),]

#-----Generate the cyto burst data for HT-H295R mc chemicals
cyto.cosmetics <- tcplCytoPt(chid = unique(chids[, chid]), min.test = 3)

#-----Add cyto burst values to mc5

toxcast_dat <- merge(cosmetics.mc5, cyto.cosmetics[, c("casn", "cyto_pt_um")], by = "casn")

#-----log the data for plotting

toxcast_dat[, log_cyto_pt_um := log10(cyto_pt_um)]

#-----Add selectivity metric

toxcast_dat[, log_selectivity := .(pmin(log_cyto_pt_um) - min.modlga.by.chem)] 

Getting the hit rate of accross ToxCast Assay by Chemical

hit.rates <- toxcast_dat[ , list(
  total.cnt  = .N, #total number of aeids tested in mc
  active.cnt  = as.double(lw(hitc==1)),  # active count
  inactive.cnt  = as.double(lw(hitc==0)),  #inactive count
  active.pct = round((lw(hitc==1)/.N)*100,2), #active percent
  inactive.pct = round((lw(hitc==0)/.N)*100,2) #inactive percent
), by = list(chid, casn, chnm)]

hit.rates<-hit.rates[order(-active.pct)]

datatable(hit.rates)

Plot AC50 spread with burst and active assay percentage

toxcast_dat2 <- toxcast_dat[ , list(casn, chnm, modl_ga, log_cyto_pt_um)] %>% 
  unique() %>% 
  .[complete.cases(.),]

dat2_hit <- merge(toxcast_dat2, hit.rates, by="chnm")
setnames(dat2_hit, "casn.x", "TS_CASRN")
dat3 <- merge(dat2_hit, cosmetic_chem, by="TS_CASRN")
dat4 <- dat3[ , list(chnm, active.pct)] %>% unique()

p1 <- ggplot(dat3) +
  geom_violin(aes(chnm, modl_ga)) +
  geom_point(aes(chnm, log_cyto_pt_um), shape = 21,   size=4) +
  scale_fill_manual(values=c("white", "black")) + # white = 0 , black = 1
  scale_y_continuous(breaks=seq(-6, 4, 1), limits = c(-6,4)) +
  coord_flip() +
  scale_color_grey() +
  theme_classic() +
  theme(legend.position="none") +
  ylab("log10(AC50) (uM)") +
  xlab("Cosmetic Chemicals")+
  annotate(geom="text", 
           x = 60, 
           y = -6, 
           label = "A", 
           color = "black", 
           hjust = 0, 
           vjust = 1, 
           size = 4)

p2 <- ggplot(dat4, aes(chnm, active.pct)) +
  geom_col() +
  coord_flip() +
  scale_color_grey() + 
  theme_bw() +
  theme(axis.title.y=element_blank(),
        axis.text.y=element_blank(),
        axis.ticks.y=element_blank(),
        panel.border=element_blank()) +
  ylab("Active Hit Rates")+
  annotate(geom="text", 
           y = -8, 
           x = 60, 
           label = "B", 
           color = "black", 
           hjust = 0, 
           vjust = 1, 
           size = 4)

gridExtra::grid.arrange(p1, p2, ncol=2, widths=2:1)

#png(width = 4000, height = 4000, res=600)
#plot(arrangeGrob(p1,p2,ncol=2,widths=2:1))
#dev.off()

0.4 TK Analysis

Linking in vivo with in vitro using httk

Calculate the human-based administered dose equivalents (AEDs) using httk

How many of the CRCs have public httk information?

#-----get max and min log10(AC50) for each cosmetic
colnames(cosmetics.mc5)
cosmetics.mc5[ , max.modlga.by.chem := max(modl_ga, na.rm = TRUE), by=casn]
cosmetics.mc5[ , min.modlga.by.chem := min(modl_ga, na.rm = TRUE), by=casn]
cosmetics.mc5[ , min.modlga.chem.unlog := 10^min.modlga.by.chem]
cosmetics.mc5[ , max.modlga.chem.unlog := 10^max.modlga.by.chem]
cosmetics.mc5.subset <- cosmetics.mc5[ , list(casn, 
                                              max.modlga.by.chem, 
                                              min.modlga.by.chem, 
                                              min.modlga.chem.unlog, 
                                              max.modlga.chem.unlog )] %>% 
  unique()

setnames(cosmetics.mc5.subset, "casn", "chemcas")
#need to first confirm which casn's have httk information otherwise a loop will fail
ac50pct.subset<-as.data.frame(subset(cosmetics.mc5.subset, chemcas %in% get_cheminfo(species='Human')))

colnames(ac50pct.subset)
dim(ac50pct.subset)
length(unique(ac50pct.subset$chemcas)) # 19 chemicals total

Calcualte the AED for the minimum AC50 of cosmetic chemicals

aed.df <- data.table()

for (casn in ac50pct.subset[,'chemcas']) {
  aed_50 <- (calc_mc_oral_equiv(conc=subset(ac50pct.subset,chemcas==casn)[['min.modlga.chem.unlog']], chem.cas=casn, 
                                which.quantile=c(0.5), species='Human', method='dr', well.stirred.correction=T, restrictive.clearance=T))
  max_aed <- (calc_mc_oral_equiv(conc=100, chem.cas=casn, 
                                 which.quantile=c(0.95), species='Human', method='dr', well.stirred.correction=T, restrictive.clearance=T))
  aed.df <- rbind(aed.df,cbind(casn, aed_50, max_aed))
}


aed.df[ , aed_50:=as.numeric(aed_50)]
aed.df[ , log_aed_50:=log10(aed_50)]
aed.df[ , ac50:="min"]

colnames(aed.df)[1] <- "chemical_casrn"
setkey(aed.df, "chemical_casrn")
setkey(tr_lel, "chemical_casrn")

httk_compare_min <- aed.df[tr_lel] %>% .[ !is.na(max_aed)]
httk_compare_min[ , lel_chem:=min(lel_st), by="chemical_casrn"]

Calculate the AED for the maximum AC50 of cosmetic chemicals

aed.df.max <- data.table()

for (casn in ac50pct.subset[,'chemcas']) {
  aed_50<-(calc_mc_oral_equiv(conc=subset(ac50pct.subset,chemcas==casn)[['max.modlga.chem.unlog']], chem.cas=casn, 
                              which.quantile=c(0.5), species='Human', method='dr', well.stirred.correction=T, restrictive.clearance=T))
  max_aed <- (calc_mc_oral_equiv(conc=100, chem.cas=casn, 
                                 which.quantile=c(0.95), species='Human', method='dr', well.stirred.correction=T, restrictive.clearance=T))
  aed.df.max<-rbind(aed.df.max,cbind(casn, aed_50, max_aed))
}

aed.df.max[ , aed_50:=as.numeric(aed_50)]
aed.df.max[ , log_aed_50:=log10(aed_50)]
aed.df.max[ , ac50:="max"]

colnames(aed.df.max)[1] <- "chemical_casrn"
setkey(aed.df.max, "chemical_casrn")
setkey(tr_lel, "chemical_casrn")

httk_compare_max <- aed.df.max[tr_lel] %>% .[ !is.na(max_aed)]
httk_compare_max[ , lel_chem:=min(lel_st), by="chemical_casrn"]

0.5 COSMOS TTC dataset

TTC_cramer.class <- read.delim("TTC_cramer_class.txt")

ttc <- data.table(TTC_cramer.class) %>% 
  .[ Cramer.Class.ToxTree..v2.6.0. == "Low (Class I)", federated_ttc:=log10(46/1000) ] %>%
  .[ Cramer.Class.ToxTree..v2.6.0. == "Intermediate (Class II)", federated_ttc:=log10(6.2/1000) ] %>%
  .[ Cramer.Class.ToxTree..v2.6.0. == "High (Class III)", federated_ttc:=log10(2.3/1000) ]

setnames(ttc, "CAS.RN", "chemical_casrn")

ttc_crc <- ttc[ `chemical_casrn` %in% CRC_tr$chemical_casrn ] 
ttc_tr <- merge(ttc_crc, tr_lel, by="chemical_casrn") 

tp1<-ggplot(ttc_tr) +
  geom_point(aes(chemical_name, federated_ttc)) +
  geom_line(aes(chemical_name, lel_st), color="blue")+
  coord_flip() +
  xlab("lel by study")

Expocast Data

expocast 2014 numbers from Wambaugh et al

expo <- read.csv("SupplementalTable1-070314.txt.csv") %>% data.table()

setnames(expo, "CASRN", "chemical_casrn")

Create spread plot for cosmetic chemcials

aed_plot <- rbind(httk_compare_min, httk_compare_max)

aed.ttc <- merge(aed_plot, ttc_crc)

aed.ttc.ex <- merge(aed.ttc, expo, by="chemical_casrn" )
aed.ttc.ex[ , Total.median:=log10(Total.median)]
aed.ttc.ex[ , Total.95..ile:=log10(Total.95..ile)]
aed.ttc.ex[, tmu:= (Total.median-Total.95..ile)]

cos_plot <- ggplot(aed.ttc.ex, aes(x=chemical_name)) +
  geom_line(aes(chemical_name, log_aed_50), color="blue", size=1.5, alpha=0.5) + 
  geom_line(aes(chemical_name, lel_st), size=1.5) +
  geom_point(aes(chemical_name, lel_chem), size=3) +
  geom_point(aes(chemical_name, federated_ttc), size=3, color="#E69F00", shape=17) +
  geom_point(aes(chemical_name,  Total.median), size=3, color="red", shape=8) +
  geom_errorbar( aes( ymin=Total.median, ymax=tmu), width= 0.5, size=.2, color="red")+
  theme_igray() +
  coord_flip() +
  scale_y_continuous(limits=c(-10, 4), breaks=seq(-10, 4))  +
  xlab("CRC") +
  theme(axis.title.x = element_blank(),
        axis.text.x = element_blank())

Calculate the AED for the minimum AC50 for ToxRef chemicals

#-----------------------------------------------------------------------------------#
#Calculate the HUMAN-based administered dose equivalents (AEDs) using httk
#-----------------------------------------------------------------------------------#

tr_httk <- tr_lel[ group=="toxref"]
tr_mc5<- mc5[casn %in% tr_httk$chemical_casrn ]

#---------------------------------------------------------------------#
# Prepare all cyto burst information for cosmetics
#---------------------------------------------------------------------#

#-----get max and min log10(AC50) for each cosmetic
colnames(tr_mc5)
tr_mc5[,max.modlga.by.chem := max(modl_ga, na.rm = TRUE), by=casn]
tr_mc5[,min.modlga.by.chem := min(modl_ga, na.rm = TRUE), by=casn]
tr_mc5[,min.modlga.chem.unlog := 10^min.modlga.by.chem]
tr_mc5[,max.modlga.chem.unlog := 10^max.modlga.by.chem]
tr_mc5.subset <- tr_mc5[ , list(casn, max.modlga.by.chem, min.modlga.by.chem, min.modlga.chem.unlog, max.modlga.chem.unlog )] %>% unique()
#-----------------------------------------------------------------------------------#
#Calculate the HUMAN-based administered dose equivalents (AEDs) using httk
#-----------------------------------------------------------------------------------#

setnames(tr_mc5.subset, "casn", "chemcas")
#need to first confirm which casn's have httk information otherwise a loop will fail
ac50pct.subset.tr<-as.data.frame(subset(tr_mc5.subset, chemcas %in% get_cheminfo(species='Human')))

colnames(ac50pct.subset.tr)
dim(ac50pct.subset.tr)
length(unique(ac50pct.subset.tr$chemcas)) # 281 chemicals total

aed.tr=data.table()
for (casn in ac50pct.subset.tr[,'chemcas'])
{
  aed_50<-(calc_mc_oral_equiv(conc=subset(ac50pct.subset.tr,chemcas==casn)[['min.modlga.chem.unlog']], chem.cas=casn, 
                              which.quantile=c(0.5), species='Human', method='dr', well.stirred.correction=T,restrictive.clearance=T))
  max_aed <- (calc_mc_oral_equiv(conc=100, chem.cas=casn, 
                                 which.quantile=c(0.95), species='Human', method='dr', well.stirred.correction=T,restrictive.clearance=T))
  aed.tr<-rbind(aed.tr,cbind(casn, aed_50, max_aed))
}

aed.tr[ , aed_50:=as.numeric(aed_50)]
aed.tr[ , log_aed_50:=log10(aed_50)]

colnames(aed.tr)[1] <- "chemical_casrn"
aed.tr <- data.table(aed.tr)
setkey(aed.tr, "chemical_casrn")
setkey(tr_lel, "chemical_casrn")

httk_compare_tr <- aed.tr[tr_lel] %>% .[ !is.na(max_aed)]
httk_compare_tr[ , lel_chem:=min(lel_st), by="chemical_casrn"]

Calculate the AED for the maximum AC50 of ToxRef chemicals

aed.tr.max=data.table()
for (casn in ac50pct.subset.tr[,'chemcas'])
{
  aed_50<-(calc_mc_oral_equiv(conc=subset(ac50pct.subset.tr,chemcas==casn)[['max.modlga.chem.unlog']], 
                              chem.cas=casn, 
                              which.quantile=c(0.5), 
                              species='Human', 
                              method='dr', 
                              well.stirred.correction=T, restrictive.clearance=T))
  max_aed <- (calc_mc_oral_equiv(conc=100, chem.cas=casn, 
                                 which.quantile=c(0.95), species='Human', method='dr', well.stirred.correction=T, restrictive.clearance=T))
   aed.tr.max<-rbind(aed.tr.max,cbind(casn, aed_50, max_aed))
}


aed.tr.max[ , aed_50:=as.numeric(aed_50)]
aed.tr.max[ , log_aed_50:=log10(aed_50)]

colnames(aed.tr.max)[1] <- "chemical_casrn"
aed.tr.max <- data.table(aed.tr.max)
setkey(aed.tr.max, "chemical_casrn")
setkey(tr_lel, "chemical_casrn")

httk_compare_tr_max <- aed.tr.max[tr_lel] %>% .[ !is.na(max_aed)]
httk_compare_tr_max[ , lel_chem:=min(lel_st), by="chemical_casrn"]

Create Density plot for ToxRefDB

httk_tr_plot <- httk_compare_tr[ , list(chemical_casrn, 
                                        chemical_name, 
                                        study_id, 
                                        lel_chem, 
                                        lel_st)] %>% 
  unique()

#write.csv(httk_tr_plot, file="LEL_non_CRC.csv")

httk_ac50min_plot <- httk_compare_tr[ , list(chemical_casrn, 
                                             chemical_name, 
                                             log_aed_50)] %>% 
  unique() %>%
  .[ , ac50:="min"]

httk_ac50max_plot <- httk_compare_tr_max[ , list(chemical_casrn, 
                                                 chemical_name, 
                                                 log_aed_50)] %>% 
  unique() %>%
  .[ , ac50:="max"]

tr_plot <- ggplot() +
  geom_density(data=httk_tr_plot, aes(lel_st), size=1) +
  geom_density(data=httk_ac50min_plot, aes(log_aed_50), color="steelblue3", size=1) +
  geom_density(data=httk_ac50max_plot, aes(log_aed_50), color="blue3", size=1, linetype="dashed") +
  theme_igray() +
  scale_x_continuous(limits=c(-10, 4), breaks=seq(-10, 4)) +
  xlab("log10(mg/kg/day)") +
  ylab("305 non-CRC")

Combining cosmetic range plot with toxrefdb range density plot

httkp1 <- ggplot_gtable(ggplot_build(cos_plot))
httkp2 <- ggplot_gtable(ggplot_build(tr_plot))

httkp2$widths <- httkp1$widths

gridExtra::grid.arrange(httkp1, httkp2, ncol=1, heights=2:1)

png(width = 4000, height = 4000, res=600)
plot(arrangeGrob(httkp1,httkp2,ncol=1,heights=2:1))
dev.off()
## png 
##   2