#--------------------------------------------------------------------------------------
#
# syndrome_V01.r - code to build syndrome models, for instance for anemia
#
# January 2016
# Richard Judson
#
# US EPA
# Questions, comments to: judson.richard@epa.gov, 919-541-3085
#
#--------------------------------------------------------------------------------------
options(java.parameters = "-Xmx1000m")
library(grDevices)
library(RColorBrewer)
library(stringr)
#library(deSolve)
#library(msm)
#library(httk)
library(openxlsx)
#library(e1071)

source("utils.R")
source("ToxRefDB_clean.R")
ENDPOINT.FILE <- "../ToxRefDB/toxrefdb_endpoint_2015_03_23.xlsx"
ENDPOINT.MAP.FILE <- "../ToxRefDB/endpoint_replacements.xlsx"
INPUTDIR <<- "../input/"
VARMATDATE <<- "151020"
VARMATDIR <<- paste(INPUTDIR,"varmats_",VARMATDATE,"_internal/",sep="")
ASSAYDEFDATE <<- "151020"

#--------------------------------------------------------------------------------------
#
# This method starts with the raw ToxRef Data and builds a variety of data files
# broken out by species and study type, including hit and LEL matrices
# These all use the "original" endpoints
#
#--------------------------------------------------------------------------------------
driver <- function(prep.A=F,prep.B=F,prep.C=F,prep.D=F,prep.E=F) {
  print.current.function()
  species.list <- c("rat","rat","rat","rat","mouse","dog")
  type.list <- c("CHR","DEV","MGR","SUB","CHR","CHR")
  
  # creates files of form toxrefdb_acceptable_rat_CHR.xlsx
  if(prep.A) {
     for(i in 1:length(species.list)) {
      do.read <- T
      if(i>1) do.read <- F
      prep.ToxRefDB(do.read=T,species=species.list[i],study_type=type.list[i])
    }
  }

  # produces a unique set of endpoints, wich is then hand edited to do a final cleanup
  # output of this is ../ToxRefDB/toxrefdb_endpoint_{date}.xlsx
  if(prep.B) {
    build.endpoint.master()
    build.endpoint.clean()
  }
  
  if(prep.C) build.anemia.endpoints.0()
  
  if(prep.D) {
    for(i in 1:length(species.list)) build.study.table(species=species.list[i],study_type=type.list[i]) 
  }
  
  if(prep.E) {
    for(i in 1:length(species.list)) prep.lel.mats(species=species.list[i],study_type=type.list[i])
  }
}
#--------------------------------------------------------------------------------------
#
# Summarize the evidence for anemia
#
#--------------------------------------------------------------------------------------
anemia.summary.1 <- function() {
  print.current.function()
  species.list <- c("rat","rat","mouse","dog")
  type.list <- c("CHR","SUB","CHR","CHR")

  filename <- "../anemia_files/anemia_endpoints_2016_01_28.xlsx"
  map <- read.xlsx(filename)
  
  name.list <- c("CODE","CASRN","Name","species","study_type","study_id","source_study_alphanumeric_id","ldt","hdt","Mortality_Incr","BodyWeight_Decr","anemia_count","anemia_potency",names(map)[4:33])
  onechem <- as.data.frame(matrix(nrow=1,ncol=length(name.list)))
  names(onechem) <- name.list
  result <- NULL
  for(i in 1:length(species.list)) {
    species <- species.list[i]
    study_type <- type.list[i]
    file <- paste("../ToxRefDB/lel_mat_",species,"_",study_type,".xlsx",sep="")
    dmat <- read.xlsx(file)
    for(j in 1:dim(dmat)[1]) {
      onechem[] <- NA
      onechem[1,"CODE"] <- dmat[j,"CODE"]
      onechem[1,"CASRN"] <- dmat[j,"CASRN"]
      onechem[1,"Name"] <- dmat[j,"Name"]
      onechem[1,"species"] <- species
      onechem[1,"study_type"] <- study_type
      onechem[1,"study_id"] <- dmat[j,"study_id"]
      onechem[1,"source_study_alphanumeric_id"] <- dmat[j,"source_study_alphanumeric_id"]
      onechem[1,"ldt"] <- dmat[j,"ldt"]
      onechem[1,"hdt"] <- dmat[j,"hdt"]
      onechem[1,"Mortality_Incr"] <- dmat[j,"Mortality_Incr"]
      onechem[1,"BodyWeight_Decr"] <- dmat[j,"BodyWeight_Decr"]
            
      ldt <- dmat[j,"ldt"]
      hdt <- dmat[j,"hdt"]
      bw <- dmat[j,"BodyWeight_Decr"]
      if(is.na(bw)) bw <- 3*hdt
      hgb <- dmat[j,"Hemoglobin(HGB)_Decr"]
      hct <- dmat[j,"Hematocrit(HCT)_Decr"]
      rbc <- dmat[j,"Erythrocyte_count_Decr"]
      acount <- 0
      aratio <- 0
      if(!is.na(hct)) {
        acount <- acount+1
        aratio <- aratio+hct/bw
      }
      if(!is.na(hgb)) {
        acount <- acount+1
        aratio <- aratio+hgb/bw
      }
      if(!is.na(rbc)) {
        acount <- acount+1
        aratio <- aratio+rbc/bw
      }
      if(acount>0) {
        aratio <- aratio/acount
        aratio <- -log10(aratio)
      }
      onechem[1,"anemia_count"] <- acount 
      if(acount>0) onechem[1,"anemia_potency"] <- aratio 
      class.list <- names(map)[4:41]
      nclass <- length(class.list)
      for(k in 1:nclass) {
        classk <- class.list[k]
        endpoint.list <- map[is.element(map[,classk],1),"endpoint_final"]
        nendpoint <- length(endpoint.list)
        acount <- 0
        aratio <- 0
        for(m in 1:nendpoint) {
          endpoint <- endpoint.list[m] 
          if(is.element(endpoint,names(dmat))) {
            value <- dmat[j,endpoint]
           # cat(classk,":",endpoint,":",value,"\n")
            if(!is.na(value)) {
              acount <- acount+1
              aratio <- aratio+value/bw
            }
          }
        }
        if(acount>0) {
          aratio <- aratio/acount
          aratio <- -log10(aratio)            
          onechem[1,classk] <- aratio 
        }
        else onechem[1,classk] <- NA
        #if(classk=="reticulocytes_increase") browser()
      }      
      result <- rbind(result,onechem)
    }
  }
    
  file <- "../anemia_files/anemia_summary_1.xlsx"
  write.xlsx(result,file)
  
}
#--------------------------------------------------------------------------------------
#
# Summarize the evidence for anemia
#
#--------------------------------------------------------------------------------------
anemia.summary.2 <- function() {
  print.current.function()
  species.list <- c("rat","rat","mouse","dog")
  type.list <- c("CHR","SUB","CHR","CHR")
  
  file <- "../anemia_files/anemia_summary_1 2016-02-03.xlsx"
  raw <- read.xlsx(file)
  mask <- raw[,"study_type"]
  mask[] <- 1
  mask[is.element(raw[,"study_type"],c("DEV","MGR"))] <- 0
  raw <- raw[mask==1,]
  print(dim(raw)[1])
  name.list <- c("CODE","CASRN","Name")
  for(i in 1:length(species.list)) {
    name.list <- c(name.list,paste(species.list[i],"_",type.list[i],sep=""))
  }
  name.list <- c(name.list,"pos.tot")
  name.list <- c(name.list,"pos.lt.bw")
  code.list <- sort(unique(raw[,"CODE"]))
  nchem <- length(code.list)
  dmat <- as.data.frame(matrix(nrow=nchem,ncol=length(name.list)))
  dmat[] <- NA
  names(dmat) <- name.list
  rownames(dmat) <- code.list
  dmat[,"CODE"] <- code.list
  
  counter1 <- 0
  counter2 <- 0
  counter3 <- 0
  counter4 <- 0
  print(dim(raw)[1])
  for(i in 1:dim(raw)[1]) {
    code <- raw[i,"CODE"]
    cname <- raw[i,"Name"]
    casrn <- raw[i,"CASRN"]
    species <- raw[i,"species"]
    type <- raw[i,"study_type"]
    colname <- paste(species,"_",type,sep="")
    if(is.element(colname,names(dmat))) {
      dmat[code,"CASRN"] <- casrn
      dmat[code,"Name"] <- cname
      
      score <- raw[i,"anemia_basic"]
      if(!is.na(score)) {
        if(score>0) score <- 1
        else score <- 0
      }
      else score <- -1
      state <- dmat[code,colname]
      if(is.na(state)) {
        dmat[code,colname] <- score
        counter1 <- counter1+1
      }
      else {
        if(score>state ) {
          dmat[code,colname] <- score
          counter2 <- counter2+1
        }
        else {
          counter4 <- counter4+1
        }
      }
    }
    else counter3 <- counter3 +1 
  }
  print(counter1)
  print(counter2)
  print(counter3)
  print(counter4)
  for(i in 1:nchem) {
    count0 <- 0
    count1 <- 0
    for(j in 1:length(species.list)) {
      colname <- paste(species.list[j],"_",type.list[j],sep="")
      if(!is.na(dmat[i,colname])) if(dmat[i,colname]==0) count0 <- count0+1
      if(!is.na(dmat[i,colname])) if(dmat[i,colname]==1) count1 <- count1+1
    }
    dmat[i,"pos.tot"] <- count1+count0
    dmat[i,"pos.lt.bw"] <- count1
  }
  file <- "../anemia_files/anemia_summary_2.xlsx"
  write.xlsx(dmat,file)
  
}
#--------------------------------------------------------------------------------------
#
# Summarize the evidence for anemia
#
#--------------------------------------------------------------------------------------
anemia.summary.3 <- function() {
  print.current.function()
  species.list <- c("rat","rat","mouse","dog")
  type.list <- c("CHR","SUB","CHR","CHR")
  
  file <- "../anemia_files/anemia_summary_1 2016-02-03.xlsx"
  raw <- read.xlsx(file)
  
  name.list <- c("CODE","CASRN","Name","stype.1","hdt.1","potency.1","stype.2","hdt.2","potency.2")
  onechem <- as.data.frame(matrix(nrow=1,ncol=length(name.list)))
  names(onechem) <- name.list
  dmat <- NULL
  code.list <- sort(unique(raw[,"CODE"]))
  nchem <- length(code.list)
  for(i in 1:nchem) {
    code <- code.list[i]
    print(code)
    temp <- raw[is.element(raw[,"CODE"],code),]
    nrow <- dim(temp)[1]
    if(nrow>1) {
      for(j in 1:nrow) {
        cname <- temp[j,"Name"]
        casrn <- temp[j,"CASRN"]
        type.1 <- paste(temp[j,"species"],"_",temp[j,"study_type"],sep="")
        hdt.1 <- temp[j,"hdt"]
        pot.1 <- temp[j,"anemia_basic"]
        if(!is.na(pot.1)) {
          if(pot.1>0) pot.1 <- 1
          else pot.1 <- 0
        }
        else pot.1 <- -1
        
             
        for(k in 1:nrow) {
          if(k!=j) {
            type.2 <- paste(temp[k,"species"],"_",temp[k,"study_type"],sep="")
            hdt.2 <- temp[k,"hdt"]
            rowk <- temp[k,]
            pot.2 <- temp[k,"anemia_basic"]
            if(!is.na(pot.2)) {
              if(pot.2>0) pot.2 <- 1
              else pot.2 <- 0
            }
            else pot.2 <- -1
            onechem[1,"CODE"] <- code
            onechem[1,"CASRN"] <- casrn
            onechem[1,"Name"] <- cname
            onechem[1,"stype.1"] <- type.1
            onechem[1,"hdt.1"] <- hdt.1
            onechem[1,"potency.1"] <- pot.1
            onechem[1,"stype.2"] <- type.2
            onechem[1,"hdt.2"] <- hdt.2
            onechem[1,"potency.2"] <- pot.2
            ratio <- hdt.1/hdt.2
            if(ratio<1) ratio <- 1/ratio
            if(ratio<2) dmat <- rbind(dmat,onechem)
 
          }
        }
      }
    }
  }
  file <- "../anemia_files/anemia_summary_3.xlsx"
  write.xlsx(dmat,file)
}
#--------------------------------------------------------------------------------------
#
# Summarize the evidence for anemia
#
#--------------------------------------------------------------------------------------
anemia.summary.4 <- function(to.file=F) {
  print.current.function()
  if(to.file) {
    fname <- paste("../anemia_files/anemia_summary_4.pdf",sep="")
    pdf(file=fname,width=8,height=10,pointsize=12,bg="white",paper="letter",pagecentre=T)
    
  }
  par(mfrow=c(2,1),mar=c(5,10,4,3))
  species.list <- c("mouse","rat","dog","rat")
  type.list <- c("CHR","CHR","CHR","SUB")
  
  file <- "../anemia_files/anemia_summary_1 2016-02-03.xlsx"
  dmat <- read.xlsx(file)
  dmat.0 <- dmat
  dmat <- dmat[is.element(dmat[,"study_type"],type.list),]
  dmat <- dmat[,1:14]
  for(i in 1:dim(dmat)[1]) {
    value <- dmat[i,"anemia_basic"]
    if(!is.na(value)) {
      if(value>0) value <- 1
      else value <- 0
    }
    else value <- -1
    dmat[i,"anemia_basic"] <- value
  }
  
  tab <- as.data.frame(matrix(nrow=4,ncol=3))
  tab[] <- 0
  itab <- tab
  names(tab) <- c("Pos","Pos.BW","Neg")
  counts <- vector(mode="integer",length=length(species.list))
  for(i in 1:length(species.list)) {
    type <- type.list[i]
    species <- species.list[i]
    temp <- dmat[is.element(dmat[,"species"],species),]
    temp <- temp[is.element(temp[,"study_type"],type),]
    temp <- temp[,"anemia_basic"]
    colname <- paste(species,"_",type,sep="")
    rownames(tab)[i] <- colname
    n <- length(temp)
    counts[i] <- n
    a <- length(temp[temp==1])/n
    b <- length(temp[temp==0])/n
    c <- length(temp[temp==-1])/n
    tab[i,1] <- a
    tab[i,2] <- b
    tab[i,3] <- c
    cat(species,"\t",type,"\t",n,"\t",format(a,digits=2),"\t",format(b,digits=2),"\t",format(c,digits=2),"\n",sep="")
    
    a <- length(temp[temp==1])
    b <- length(temp[temp==0])
    c <- length(temp[temp==-1])
    itab[i,1] <- a
    itab[i,2] <- b
    itab[i,3] <- c
    cat(species,"\t",type,"\t",n,"\t",format(a,digits=2),"\t",format(b,digits=2),"\t",format(c,digits=2),"\n",sep="")
  }
  itab <- t(itab)
  
  barplot(as.matrix(t(tab)),cex.lab=1.5,cex.axis=1.5,ylab="Fraction of Studies",names.arg=c("mouse CHR","rat CHR","dog CHR","rat SUB"))
  for(i in 1:length(species.list)) {
    x <- -0.05 + i*1.2
    y <- 0.95
    text(x,y,counts[i],pos=2,cex=1.5)
  }
  name.list <- c("study.1","study.2","outgroup","pos.1","neg.1","pos.2","neg.2","p.value")
  class.all <- as.data.frame(matrix(nrow=12,ncol=length(name.list)))
  names(class.all) <- name.list
  study.list <- c("mouse_CHR","rat_CHR","dog_CHR","rat_SUB")
  colnames(itab) <- study.list
  nstudy <- length(study.list)
  counter <- 0
  for(i in 1:(nstudy-1)) {
    for(j in (i+1):nstudy) {
      for(k in 1:2) {
        counter <- counter+1
        study.i <- study.list[i]
        study.j <- study.list[j]
        if(k==1) {
          outgroup <- "LT.BW"
          pos.i <- itab[1,study.i]+itab[2,study.i]
          pos.j <- itab[1,study.j]+itab[2,study.j]
          neg.i <- itab[3,study.i]
          neg.j <- itab[3,study.j]
          #cat(pos.i,neg.i,"\n")
          #cat(pos.j,neg.j,"\n")
        }
        else {
          outgroup <- "LE.BW"
          pos.i <- itab[1,study.i]
          pos.j <- itab[1,study.j]
          neg.i <- itab[3,study.i]+itab[2,study.i]
          neg.j <- itab[3,study.j]+itab[2,study.j]
          #cat(pos.i,neg.i,"\n")
          #cat(pos.j,neg.j,"\n")
          
        }
        x <- matrix(nrow=2,ncol=2)
        x[1,1] = pos.i
        x[1,2] = neg.i
        x[2,1] = pos.j
        x[2,2] = neg.j
        res <- prop.test(x)

        class.all[counter,"study.1"] <- study.i
        class.all[counter,"study.2"] <- study.j
        class.all[counter,"outgroup"] <- outgroup
        class.all[counter,"pos.1"] <- pos.i
        class.all[counter,"neg.1"] <- neg.i
        class.all[counter,"pos.2"] <- pos.j
        class.all[counter,"neg.2"] <- neg.j
        class.all[counter,"p.value"] <- res$p.value
      }
    }
  }
  file <- "../anemia_files/Fig 1 stats.xlsx"
  write.xlsx(class.all,file)
  
  
  if(!to.file) browser()
  
  raw <- dmat.0
  hdts <- NULL
  groups <- NULL
  for(i in 1:dim(raw)[1]) {
    species <- raw[i,"species"]
    type	 <- raw[i,"study_type"]
    if(is.element(type,c("CHR","SUB"))) {
      hdt <- raw[i,"hdt"]
      score <- raw[i,"anemia_basic"]
      if(!is.na(score)) {
        if(score>0) score <- 1
        else score <- 0
      }
      else score <- -1
      g <- paste(species,"_",type,"_",score,sep="")
      hdts <- c(hdts,hdt)
      groups <- c(groups,g)
    }
  }
  name.list <- sort(unique(groups))
  name.list <- str_replace_all(name.list,"_"," ")
  name.list <- str_replace_all(name.list," -1"," neg")
  name.list <- str_replace_all(name.list," 0"," pos=BW")
  name.list <- str_replace_all(name.list," 1"," pos<BW")

  boxplot(hdts~groups,log="x",horizontal=T,las=1,xlab="Highest Dose Tested (mg/kg/day)",ylim=c(1,10000),names=name.list)
  if(to.file) dev.off()
  else browser()
  
}
#--------------------------------------------------------------------------------------
#
# Summarize the evidence for anemia (original Table 2)
#
#--------------------------------------------------------------------------------------
anemia.summary.5 <- function() {
  print.current.function()
  species.list <- c("rat","rat","mouse","dog")
  type.list <- c("CHR","SUB","CHR","CHR")
  
  file <- "../anemia_files/anemia_summary_1 2016-02-03.xlsx"
  dmat <- read.xlsx(file)
  dmat <- dmat[is.element(dmat[,"study_type"],type.list),]
  dmat <- dmat[,1:14]
  for(i in 1:dim(dmat)[1]) {
    value <- dmat[i,"anemia_basic"]
    if(!is.na(value)) {
      if(value>0) value <- 1
      else value <- 0
    }
    else value <- -1
    dmat[i,"anemia_basic"] <- value
  }
  
  name.list <- c("species.study.1","activity.class","species.study.2","rep.yes","rep.no","rep.frac")
  oneval <- as.data.frame(matrix(nrow=1,ncol=length(name.list)))
  names(oneval) <- name.list
  tab <- NULL
  for(i in 1:length(species.list)) {
    colname.1 <- paste(species.list[i],"_",type.list[i],sep="")
    for(j in 1:length(species.list)) {
      colname.2 <- paste(species.list[j],"_",type.list[j],sep="")
      for(k in 0:1) {
        oneval[1,"species.study.1"] <- colname.1
        oneval[1,"species.study.2"] <- colname.2
        oneval[1,"activity.class"] <- k
        oneval[1,"rep.yes"] <- 0
        oneval[1,"rep.no"] <- 0
        oneval[1,"rep.frac"] <- 0
        tab <- rbind(tab,oneval)
      }
    }
  }
  rownames(tab) <- paste(tab[,1],tab[,2],tab[,3],sep="")
  code.list <- sort(unique(dmat[,"CODE"]))
  nchem <- length(code.list)
  for(i in 1:nchem) {
    code <- code.list[i]
    temp <- dmat[is.element(dmat[,"CODE"],code),]
    for(j in 1:dim(temp)[1]) {
      species.1 <- temp[j,"species"]
      type.1 <- temp[j,"study_type"]
      anemia.1 <- temp[j,"anemia_basic"]
      sid.1 <- temp[j,"study_id"]
      hdt.1 <- temp[j,"hdt"]
      colname.1 <- paste(species.1,"_",type.1,sep="")
      for(k in 1:dim(temp)[1]) {
        species.2 <- temp[k,"species"]
        type.2 <- temp[k,"study_type"]
        anemia.2 <- temp[k,"anemia_basic"]
        sid.2 <- temp[k,"study_id"]
        hdt.2 <- temp[k,"hdt"]
        colname.2 <- paste(species.2,"_",type.2,sep="")
        if(sid.1!=sid.2) {
          ratio <- hdt.2/hdt.1
          if(ratio<1) ratio <- 1/ratio
          if(ratio<2) {
            if(anemia.1>-1) {
              rowname <- paste(colname.1,anemia.1,colname.2,sep="")
              if(anemia.2>-1) tab[rowname,"rep.yes"] <- tab[rowname,"rep.yes"]+1
              else tab[rowname,"rep.no"] <- tab[rowname,"rep.no"]+1
            }
          }
        }
      }
    }   
  }
  tab[,"rep.frac"] <- tab[,"rep.yes"]/(tab[,"rep.yes"]+tab[,"rep.no"])
  
  file <- "../anemia_files/anemia_summary_5.xlsx"
  write.xlsx(tab,file)
  
}
#--------------------------------------------------------------------------------------
#
# heatmap of the basic anemia data
#
#--------------------------------------------------------------------------------------
anemia.summary.6 <- function(to.file=F) {
  print.current.function()
  if(to.file) {
    fname <- paste("../anemia_files/anemia_summary_6.pdf",sep="")
    pdf(file=fname,width=7,height=10,pointsize=12,bg="white",paper="letter",pagecentre=T)
  }
  file <- "../anemia_files/anemia_summary_1 2016-02-03.xlsx"
  dmat <- read.xlsx(file)
  rown <- paste(dmat[,"Name"],"_",dmat[,"species"],"_",dmat[,"study_type"],"_",dmat[,"study_id"],sep="")
  mat <- as.matrix(dmat[,13:51])
  rownames(mat) <- rown

  mat[mat>0] <- 2
  mat[mat==0] <- 1
  mat[mat<0] <- 1
  mat[is.na(mat)] <- 0
  mat <- mat[mat[,"anemia_basic"]>0,]
  name.list <- colnames(mat)
  name.list <- name.list[!is.element(name.list,c("anemia_basic","anemia_call","liver","platelat_increase","wbc_increase","spleen","bone_marrow"))]
  mat <- mat[,name.list]
  imat <- mat
  imat[mat>0] <- 1
  cs <- colSums(imat)
  mat <- mat[,cs>4]
  result <- heatmap(t(mat),margins=c(5,10),scale="none",main="Anemia",symm=F,
                    xlab="",ylab="",cexRow=0.8,cexCol=0.5,col=brewer.pal(9,"Reds"),
                    hclustfun=function(x) hclust(d=dist(x),method="ward.D"),keep.dendro=T,verbose=F,na.rm=F)
  
  if(to.file) dev.off()
  else browser()
}
#--------------------------------------------------------------------------------------
#
# set up file for annotating hand curation
#
#--------------------------------------------------------------------------------------
anemia.summary.7 <- function() {
  print.current.function()
  file <- "../anemia_files/anemia_summary_1 2016-02-03.xlsx"
  dmat <- read.xlsx(file)
  temp <- dmat
  temp <- temp[temp[,"anemia_count"]>0,]
  temp <- temp[temp[,"anemia_potency"]>0,]
  code.list <- sort(unique(temp[,"CODE"]))
  temp <- dmat[is.element(dmat[,"CODE"],code.list),]
  temp1 <- temp[order(temp[,"anemia_potency"],decreasing=T),]
  temp2 <- temp1[order(temp1[,"Name"]),]

  ol <- vector(length=dim(temp2)[1],mode="integer")
  counter <- 1
  ol[] <- 1
  for(i in 2:length(ol)) {
    if(temp2[i,"Name"]==temp2[i-1,"Name"]) ol[i] <- counter
    else {
      counter <- counter+1
      ol[i] <- counter
    }
  }
  ol2 <- as.integer(ol%%2==0)
  temp2 <- cbind(ol,temp2)
  names(temp2)[1] <- "order"
  temp2 <- cbind(ol2,temp2)
  names(temp2)[1] <- "group"
  
  file <- "../anemia_files/anemia_summary_7.xlsx"
  write.xlsx(temp2,file)
}
#--------------------------------------------------------------------------------------
#
# set up file for annotating hand curation
#
#--------------------------------------------------------------------------------------
anemia.summary.8 <- function() {
  print.current.function()
  file <- "../anemia_files/anemia_summary_7 2016-02-03.xlsx"
  dmat <- read.xlsx(file)
  result <- as.data.frame(matrix(nrow=3,ncol=5))  
  names(result) <- c("DB class","Positive","Negative","Ambiguous","Unknown")
  rownames(result) <- c("Positive","Negative","Ambiguous")
  result[,1] <- c("Positive","Negative","Ambiguous")
  result[,c("Positive","Negative","Ambiguous","Unknown")] <- 0
  for(i in 1:dim(dmat)[1]) {
    classa <- dmat[i,"DB_Call"]
    classb <- dmat[i,"Manual_Call"]
    if(!is.na(classa)) {
      if(!is.na(classb)) {
        result[classa,classb] <- result[classa,classb]+1
      }
    }
  }
  file <- "../anemia_files/anemia_summary_8a.xlsx"
  write.xlsx(result,file)
  print(result)
  
  file <- "../anemia_files/anemia_summary_7 2016-02-03.xlsx"
  dmat <- read.xlsx(file)
  mask <- dmat[,"Manual_Call"]
  mask[] <- F
  mask[is.element(dmat[,"Manual_Call"],c("Positive","Negative"))] <- T
  dmat <- dmat[mask==T,]
  code.list <- sort(unique(dmat[,"CODE"]))
  nchem <- length(code.list)
  species.list <- c("rat","rat","mouse","dog")
  type.list <- c("CHR","SUB","CHR","CHR")
  nchem <- length(code.list)
  nstudy <- length(species.list)
  result <- as.data.frame(matrix(nrow=nchem,ncol=4+nstudy))
  name.list <- c("CODE","CASRN","Name","Positive Studies")
  for(i in 1:nstudy) name.list <- c(name.list,paste(species.list[i],"_",type.list[i],sep=""))
  names(result) <- name.list
  result[] <- ""
  result[,"Positive Studies"] <- as.integer(0)

  for(i in 1:length(code.list)) {
    code <- code.list[i]
    temp <- dmat[is.element(dmat[,"CODE"],code),]
    result[i,"CODE"] <- code
    result[i,"CASRN"] <- temp[1,"CASRN"]
    result[i,"Name"] <- temp[1,"Name"]
    counter <- 0
    for(j in 1:dim(temp)[1]) {
      species <- temp[j,"species"]
      type <- temp[j,"study_type"]
      value <- temp[j,"Manual_Call"]
      if(value=="Positive") counter <- counter+1
      colname <- paste(species,"_",type,sep="")
      if(result[i,colname]=="") result[i,colname] <- value
      else result[i,colname]<- paste(result[i,colname],"+",value,sep="")
    }
    result[i,"Positive Studies"] <- counter
  }
  result <- result[result[,"Positive Studies"]>1,]
  file <- "../anemia_files/anemia_summary_8b.xlsx"
  write.xlsx(result,file)
  print(result)
  
  species.list <- c("rat","rat","mouse","dog")
  type.list <- c("CHR","SUB","CHR","CHR")
  
  name.list <- c("species.study.1","species.study.2","rep.yes","rep.no","rep.frac")
  oneval <- as.data.frame(matrix(nrow=1,ncol=length(name.list)))
  names(oneval) <- name.list
  tab <- NULL
  for(i in 1:length(species.list)) {
    colname.1 <- paste(species.list[i],"_",type.list[i],sep="")
    for(j in 1:length(species.list)) {
      colname.2 <- paste(species.list[j],"_",type.list[j],sep="")
      oneval[1,"species.study.1"] <- colname.1
      oneval[1,"species.study.2"] <- colname.2
      oneval[1,"rep.yes"] <- 0
      oneval[1,"rep.no"] <- 0
      oneval[1,"rep.frac"] <- 0
      tab <- rbind(tab,oneval)
    }
  }
  rownames(tab) <- paste(tab[,1],tab[,2],sep="")
  code.list <- sort(unique(dmat[,"CODE"]))
  nchem <- length(code.list)
  for(i in 1:nchem) {
    code <- code.list[i]
    temp <- dmat[is.element(dmat[,"CODE"],code),]
    for(j in 1:dim(temp)[1]) {
      species.1 <- temp[j,"species"]
      type.1 <- temp[j,"study_type"]
      anemia.1 <- temp[j,"Manual_Call"]
      sid.1 <- temp[j,"study_id"]
      hdt.1 <- temp[j,"hdt"]
      colname.1 <- paste(species.1,"_",type.1,sep="")
      for(k in 1:dim(temp)[1]) {
        species.2 <- temp[k,"species"]
        type.2 <- temp[k,"study_type"]
        anemia.2 <- temp[k,"Manual_Call"]
        sid.2 <- temp[k,"study_id"]
        hdt.2 <- temp[k,"hdt"]
        colname.2 <- paste(species.2,"_",type.2,sep="")
        if(sid.1!=sid.2) {
          ratio <- hdt.2/hdt.1
          if(ratio<1) ratio <- 1/ratio
          if(ratio<2) {
            if(anemia.1=="Positive") {
              rowname <- paste(colname.1,colname.2,sep="")
              if(anemia.2!="Negative") tab[rowname,"rep.yes"] <- tab[rowname,"rep.yes"]+1
              else tab[rowname,"rep.no"] <- tab[rowname,"rep.no"]+1
            }
          }
        }
      }
    }   
  }
  tab[,"rep.frac"] <- tab[,"rep.yes"]/(tab[,"rep.yes"]+tab[,"rep.no"])
  
  file <- "../anemia_files/anemia_summary_8c.xlsx"
  write.xlsx(tab,file)
  browser()
  
}
#--------------------------------------------------------------------------------------
#
# laneplot per chemical
#
#--------------------------------------------------------------------------------------
anemia.summary.9 <- function(to.file=F,do.prep=F) {
  print.current.function()
  if(do.prep) {
    file <- "../ToxRefDB/lel_mat_rat_CHR.xlsx"
    RAT.CHR <<- read.xlsx(file)
    file <- "../ToxRefDB/lel_mat_rat_SUB.xlsx"
    RAT.SUB <<- read.xlsx(file)
    file <- "../ToxRefDB/lel_mat_dog_CHR.xlsx"
    DOG.CHR <<- read.xlsx(file)
    file <- "../ToxRefDB/lel_mat_mouse_CHR.xlsx"
    MOUSE.CHR <<- read.xlsx(file)
  }
  if(to.file) {
    fname <- paste("../anemia_files/anemia_summary_9.pdf",sep="")
    pdf(file=fname,width=7,height=10,pointsize=12,bg="white",paper="letter",pagecentre=T)
  }
  par(mfrow=c(3,1),mar=c(4,6,4,5))
  file <- "../anemia_files/anemia_summary_1 2016-02-03.xlsx"
  dmat <- read.xlsx(file)
  code.list <- sort(unique(dmat[,"CODE"]))
  nchem <- length(code.list)
  clist <- c(1e-10,1e-9,1e-8,1e-7,1e-6,1e-5,1e-4,1e-3,1e-2,1e-1,1,1e1,1e2,1e3,1e4,1e5,1e6)
  species.list <- c("rat","rat","mouse","dog")
  type.list <- c("CHR","SUB","CHR","CHR")
  for(i in 1:nchem) {
    code <- code.list[i]
    temp <- dmat[is.element(dmat[,"CODE"],code),]
    if(dim(temp)[1]>1) {
      cname <- temp[1,"Name"]
      print(cname)
      dmin <- min(temp[,"ldt"])
      dmax <- max(temp[,"hdt"])
      if(dmin==0) dmin <- 1e-5
      xmin <- clist[which.min(clist<dmin)-1]
      xmax <- clist[which.min(clist<dmax)+1]
      if(is.na(xmin)) xmin <- 1e-5
      if(is.na(xmax)) xmax <- 1e5
      plot(1~1,xlim=c(xmin,xmax),log="x",ylim=c(0,4),main=cname,cex.lab=1.1,cex.axis=1.1,type="n",xlab="Dose (mg/kg/day)",ylab="")
      for(j in 1:length(species.list) ){
        species <- species.list[j]
        type <- type.list[j]
        ymin <- j-1
        if(species=="rat" && type=="CHR") temp2 <- RAT.CHR
        if(species=="rat" && type=="SUB") temp2 <- RAT.SUB
        if(species=="dog" && type=="CHR") temp2 <- DOG.CHR
        if(species=="mouse" && type=="CHR") temp2 <- MOUSE.CHR
        temp2 <- temp2[is.element(temp2[,"CODE"],code),]
        for(k in 1:dim(temp)[1]) {
          if(temp[k,"species"]==species && temp[k,"study_type"]==type) {
            dmin <- temp[k,"ldt"]
            dmax <- temp[k,"hdt"]
            text(xmax/10,ymin+0.5,paste(species,type),pos=4)
            bwd <- temp[k,"BodyWeight_Decr"]
            mort <- temp[k,"Mortality_Incr"]
             if(!is.na(bwd)) {
              if(bwd==dmax) bwd <- dmax*0.95
              rect(bwd, ymin, dmax, ymin+0.2,lwd=1,col="blue",border="black")
            }
            if(!is.na(mort)) {
              if(mort==dmax) mort <- dmax*0.95
              rect(mort, ymin+0.2, dmax, ymin+0.4,lwd=1,col="red",border="black")
            }
            hgb <- temp2[1,"Hemoglobin(HGB)_Decr"]
            hct <- temp2[1,"Hematocrit(HCT)_Decr"]
            rbc <- temp2[1,"Erythrocyte_count_Decr"]
            if(!is.na(rbc)) points(rbc,ymin+0.4,pch=23,cex=2,bg="red")
            if(!is.na(hgb)) points(hgb,ymin+0.6,pch=23,cex=2,bg="green")
            if(!is.na(hct)) points(hct,ymin+0.8,pch=23,cex=2,bg="gray")

            hemo.list <- c("Reticulocyte_Incr",  
            "Erythrocyte_Sulphhemoglobin_Incr",
            "Erythrocyte_Anisochromia_Incr",
            "Erythrocyte_nucleatedredbloodcell(nRBC)_Incr",
            "Spherocytes_Incr",
            "Erythrocyte_JollyBodies_Incr",
            "Erythrocyte_HeinzBody_Incr", 
            "SpiculatedCells(Echinocytes/Acanthocytes)_Incr",
            "Hemosiderosis_Kidney_Incr",
            "Hemosiderosis_Liver_Incr",
            "Hemosiderosis_Spleen_Incr",                     
            "Hemosiderosis_Uterus_Incr",
            "Hemosiderosis_BoneMarrow_Incr", 
            "Lacticaciddehydrogenase(LDH)_Incr",
            "Spleen_Pigmentation_Incr",
            "Spleen_Hyperplasia_Incr",
            "Spleen_Hypertrophy_Incr",
            "Spleen_Myelodysplasia_Incr",
            "Spleen_Congestion_Incr", 
            "Leukemia_Spleen_Incr",
            "Bilirubin_Incr",
            "Erythrocyte_Methemoglobin_Incr",
            "BoneMarrow_Hyperplasia_Incr")
            for(h in 1:length(hemo.list)) {
              val <- temp2[1,hemo.list[h]]
              if(!is.na(val)) points(val,ymin+0.9,pch=8,cex=2,bg="blue")
            }
            macro.list <- c("MeanCorpuscular(Cell)Volume(MCV)_Incr","Erythrocyte_Macrocytosis_Incr")          
            for(h in 1:length(macro.list)) {
              val <- temp2[1,macro.list[h]]
              if(!is.na(val)) points(val,ymin+0.9,pch=24,cex=2,bg="blue")
            }
            micro.list <- c("Erythrocyte_Microcytosis_Incr","MeanCorpuscular(Cell)Volume(MCV)_Decr")
           for(h in 1:length(micro.list)) {
              val <- temp2[1,micro.list[h]]
              if(!is.na(val)) points(val,ymin+0.9,pch=25,cex=2,bg="blue")
            }
            rect(dmin, ymin, dmax, ymin+1,lwd=2)
           }
        }
      }
      if(!to.file) browser()
    }
  }
  if(to.file) dev.off()
}
#--------------------------------------------------------------------------------------
#
# set up file for filling in missing data from second round of hand curation
#
#--------------------------------------------------------------------------------------
anemia.summary.10 <- function(do.prep=F) {
  print.current.function()
  file <- "../anemia_files/anemia_summary_7 2016-02-03.xlsx"
  dmat <- read.xlsx(file)
  
  if(do.prep) {
    file <- "../ToxRefDB/lel_mat_rat_CHR.xlsx"
    temp <- read.xlsx(file)
    rownames(temp) <- temp[,"study_id"]
    RAT.CHR <<- temp
    file <- "../ToxRefDB/lel_mat_rat_SUB.xlsx"
    temp <- read.xlsx(file)
    rownames(temp) <- temp[,"study_id"]
    RAT.SUB <<- temp
    file <- "../ToxRefDB/lel_mat_dog_CHR.xlsx"
    temp <- read.xlsx(file)
    rownames(temp) <- temp[,"study_id"]
    DOG.CHR <<- temp
    file <- "../ToxRefDB/lel_mat_mouse_CHR.xlsx"
    temp <- read.xlsx(file)
    rownames(temp) <- temp[,"study_id"]
    MOUSE.CHR <<- temp
  }
  
  code.list <- sort(unique(dmat[,"CODE"]))
  nchem <- length(code.list)
  hit.count <- vector(length=nchem,mode="integer")
  hit.count[] <- 0
  names(hit.count) <- code.list
  for(i in 1:nchem) {
    code <- code.list[i]
    temp <- dmat[is.element(dmat[,"CODE"],code),"Manual_Call"]
    temp <- temp[is.element(temp,"Positive")]
    hit.count[i] <- length(temp)
  }
  code.list <- code.list[hit.count>1]
  dmat <- dmat[is.element(dmat[,"CODE"],code.list),1:18]
  dmat <- dmat[order(dmat[,"CODE"]),]
  cat("Chemicals with more than one positive assay: ",length(code.list),"\n")
  
  nrow <- dim(dmat)[1]
  sdata <- NULL
  for(i in 1:nrow) {
    code <- dmat[i,"CODE"]
    species <- dmat[i,"species"]
    type <- dmat[i,"study_type"]
    sid <- as.character(dmat[i,"study_id"])
    if(species=="rat" && type=="CHR") x <- RAT.CHR[sid,]
    if(species=="rat" && type=="SUB") x <- RAT.SUB[sid,]
    if(species=="dog" && type=="CHR") x <- DOG.CHR[sid,]
    if(species=="mouse" && type=="CHR") x <- MOUSE.CHR[sid,]
    sdata <- rbind(sdata,x)
  }
  dmat <- cbind(dmat,sdata)
  file <- "../anemia_files/anemia_summary_10.xlsx"
  write.xlsx(dmat,file)
}
#--------------------------------------------------------------------------------------
#
# laneplot per chemical
#
#--------------------------------------------------------------------------------------
anemia.summary.11 <- function(to.file=F) {
  print.current.function()
  if(to.file) {
    fname <- paste("../anemia_files/anemia_summary_11.pdf",sep="")
    pdf(file=fname,width=7,height=10,pointsize=12,bg="white",paper="letter",pagecentre=T)
  }
  par(mfrow=c(3,1),mar=c(4,6,4,5))
  file <- "../anemia_files/anemia_summary_10 2016-02-11.xlsx"
  dmat <- read.xlsx(file)
  code.list <- sort(unique(dmat[,"CODE"]))
  nchem <- length(code.list)
  clist <- c(1e-10,1e-9,1e-8,1e-7,1e-6,1e-5,1e-4,1e-3,1e-2,1e-1,1,1e1,1e2,1e3,1e4,1e5,1e6)
  species.list <- c("rat","rat","mouse","dog")
  type.list <- c("CHR","SUB","CHR","CHR")
  for(i in 1:nchem) {
    code <- code.list[i]
    temp <- dmat[is.element(dmat[,"CODE"],code),]
    if(dim(temp)[1]>1) {
      cname <- temp[1,"Name"]
      casrn <- temp[1,"CASRN"]
      print(cname)
      dmin <- min(temp[,"ldt"])
      dmax <- max(temp[,"hdt"])
      if(dmin==0) dmin <- 1e-5
      xmin <- clist[which.min(clist<dmin)-1]
      xmax <- clist[which.min(clist<dmax)]
      if(is.na(xmin)) xmin <- 1e-5
      if(is.na(xmax)) xmax <- 1e5
      plot(1~1,xlim=c(xmin,xmax),log="x",ylim=c(0,4),main=paste(cname,":",casrn),cex.lab=1.5,cex.axis=1.5,,cex.main=1.5,type="n",xlab="Dose (mg/kg/day)",ylab="")
      for(j in 1:length(species.list) ){
        species <- species.list[j]
        type <- type.list[j]
        ymin <- j-1
        for(k in 1:dim(temp)[1]) {
          if(temp[k,"species"]==species && temp[k,"study_type"]==type) {
            dmin <- temp[k,"ldt"]
            dmax <- temp[k,"hdt"]
            text(xmin,ymin+0.35,species,pos=4,cex=1.5)
            text(xmin,ymin+0.65,type,pos=4,cex=1.5)
            bwd <- temp[k,"BodyWeight_Decr"]
            mort <- temp[k,"Mortality_Incr"]
            if(!is.na(bwd)) {
              if(bwd==dmax) bwd <- dmax*0.95
              rect(bwd, ymin, dmax, ymin+1,lwd=1,col="lightgray",border="black")
            }
            if(!is.na(mort)) {
              if(mort==dmax) mort <- dmax*0.95
              rect(mort, ymin, dmax, ymin+1,lwd=1,col="red",border="black")
            }
            hgb <- temp[k,"Hemoglobin(HGB)_Decr"]
            hct <- temp[k,"Hematocrit(HCT)_Decr"]
            rbc <- temp[k,"Erythrocyte_count_Decr"]
            if(!is.na(rbc)) points(rbc,ymin+0.1,pch=23,cex=2,bg="red")
            if(!is.na(hgb)) points(hgb,ymin+0.3,pch=23,cex=2,bg="green")
            if(!is.na(hct)) points(hct,ymin+0.5,pch=23,cex=2,bg="gray")
             hemo.list <- c("Reticulocyte_Incr",  
                           "Erythrocyte_Sulphhemoglobin_Incr",
                           "Erythrocyte_Anisochromia_Incr",
                           "Erythrocyte_nucleatedredbloodcell(nRBC)_Incr",
                           "Spherocytes_Incr",
                           "Erythrocyte_JollyBodies_Incr",
                           "Erythrocyte_HeinzBody_Incr", 
                           "Hemosiderosis_Kidney_Incr",
                           "Hemosiderosis_Liver_Incr",
                           "Hemosiderosis_Spleen_Incr",                     
                           "Hemosiderosis_BoneMarrow_Incr", 
                           "Lacticaciddehydrogenase(LDH)_Incr",
                           "Spleen_Pigmentation_Incr",
                           "Spleen_Hyperplasia_Incr",
                           "Spleen_Congestion_Incr", 
                           "Bilirubin_Incr",
                           "Erythrocyte_Methemoglobin_Incr",
                           "BoneMarrow_Hyperplasia_Incr")
            for(h in 1:length(hemo.list)) {
              val <- temp[k,hemo.list[h]]
              if(!is.na(val)) {
                if(val>dmax) val <- dmax
                if(val<dmin) val <- dmin
                points(val,ymin+0.7,pch=8,cex=2,bg="blue",lwd=1.5)
              }
            }
            macro.list <- c("MeanCorpuscular(Cell)Volume(MCV)_Incr","Erythrocyte_Macrocytosis_Incr")          
            for(h in 1:length(macro.list)) {
              val <- temp[k,macro.list[h]]
               if(!is.na(val)) {
                 if(val>dmax) val <- dmax
                 if(val<dmin) val <- dmin
                 points(val,ymin+0.9,pch=24,cex=2,bg="blue")
              }
            }
            micro.list <- c("Erythrocyte_Microcytosis_Incr","MeanCorpuscular(Cell)Volume(MCV)_Decr")
            for(h in 1:length(micro.list)) {
              val <- temp[k,micro.list[h]]
              if(!is.na(val)) {
                if(val>dmax) val <- dmax
                if(val<dmin) val <- dmin
                points(val,ymin+0.9,pch=24,cex=2,bg="blue")
              }
            }
            rect(dmin, ymin, dmax, ymin+1,lwd=2)
            
          }
        }
      }
      if(!to.file) browser()
    }
  }
  if(to.file) dev.off()
}
#--------------------------------------------------------------------------------------
#
# build the matrix of classification values relative to BW decrease
#
#--------------------------------------------------------------------------------------
anemia.summary.12 <- function(to.file=F) {
  print.current.function()
  file <- "../anemia_files/Supplemental X post-review 2 by chemical-study-species LEL values 2016-02-12.xlsx"
  dmat <- read.xlsx(file)
  
  result <- dmat
  name.list <- names(result)
  index <- which.max(is.element(name.list,"anemia_count"))
  nrow <- dim(result)[1]
  for(i in 1:nrow) {
    bwd <- result[i,"BodyWeight_Decr"]
    if(is.na(bwd)) bwd <- 1000000
    for(k in (index+1):length(name.list)) {
      value <- result[i,k]
      if(is.na(value)) result[i,k] <- 0
      else {
        if(value>=bwd) result[i,k] <- 1
        else result[i,k] <- 2
      }
    }
  }
  file <- "../anemia_files/Supplemental X post-review 2 by chemical-study-species class values.xlsx"
  write.xlsx(result,file)
  
  index <- which.max(is.element(name.list,"Hemoglobin(HGB)_Decr"))
  name.list <- name.list[index:length(name.list)]
  exclude.list <- c("Leukocyte(WBC)_Incr","Lymphocyte_Incr","Neutrophils_Incr","Eosinophils_Incr","Platelet_Incr","Alanineaminotransferase(ALT/SGPT)_Incr","Alkalinephosphatase(ALP/ALK)_Incr","Aspartateaminotransferase(AST/SGOT)_Incr")
  name.list <- name.list[!is.element(name.list,exclude.list)] 
   
  rown <- paste(result[,"Name"],"_",result[,"species"],"_",result[,"study_type"],sep="")
  mat <- as.matrix(result[,name.list])
  rownames(mat) <- rown
  cs <- colSums(mat)
  mat <- mat[,cs>5]

  if(to.file) {
    fname <- paste("../anemia_files/anemia_post_review_heatmap.pdf",sep="")
    pdf(file=fname,width=7,height=10,pointsize=12,bg="white",paper="letter",pagecentre=T)
  }
  heatmap(t(mat),margins=c(5,10),scale="none",main="Anemia",symm=F,
          xlab="",ylab="",cexRow=0.8,cexCol=0.5,col=brewer.pal(9,"Reds"),
          hclustfun=function(x) hclust(d=dist(x),method="ward.D"),keep.dendro=T,verbose=F,na.rm=F)
  
  if(to.file) dev.off()
  else browser()
  
}  
#--------------------------------------------------------------------------------------
#
# produc the final summary file
#
#--------------------------------------------------------------------------------------
anemia.summary.13 <- function() {
  print.current.function()
   
  file <- "../anemia_files/Supplemental X post-review 2 by chemical-study-species class values 2016-03-10 v3.xlsx"
  dmat <- read.xlsx(file)
  mask <- dmat[,"Manual_Call"]
  mask[] <- F
  mask[is.element(dmat[,"Manual_Call"],c("Positive","Negative"))] <- T
  dmat <- dmat[mask==T,]
  code.list <- sort(unique(dmat[,"CODE"]))
  nchem <- length(code.list)
  species.list <- c("rat","rat","mouse","dog")
  type.list <- c("CHR","SUB","CHR","CHR")
  nchem <- length(code.list)
  nstudy <- length(species.list)
  result <- as.data.frame(matrix(nrow=nchem,ncol=4+nstudy))
  name.list <- c("CODE","CASRN","Name","Positive Studies")
  for(i in 1:nstudy) name.list <- c(name.list,paste(species.list[i],"_",type.list[i],sep=""))
  names(result) <- name.list
  result[] <- ""
  result[,"Positive Studies"] <- as.integer(0)
  
  for(i in 1:length(code.list)) {
    code <- code.list[i]
    temp <- dmat[is.element(dmat[,"CODE"],code),]
    result[i,"CODE"] <- code
    result[i,"CASRN"] <- temp[1,"CASRN"]
    result[i,"Name"] <- temp[1,"Name"]
    counter <- 0
    for(j in 1:dim(temp)[1]) {
      species <- temp[j,"species"]
      type <- temp[j,"study_type"]
      value <- temp[j,"Manual_Call"]
      call <- temp[j,"MechanismCall2"]
      
      if(value=="Positive") counter <- counter+1
      colname <- paste(species,"_",type,sep="")
      if(result[i,colname]=="") result[i,colname] <- call
      else result[i,colname]<- paste(result[i,colname],"+",call,sep="")
    }
    result[i,"Positive Studies"] <- counter
  }
  result <- result[result[,"Positive Studies"]>1,]
  file <- "../anemia_files/anemia_summary_13 v2.xlsx"
  write.xlsx(result,file)
  print(result)
}
#--------------------------------------------------------------------------------------
#
# produc the final summary file
#
#--------------------------------------------------------------------------------------
anemia.summary.13b <- function() {
  print.current.function()
  
  file <- "../manuscript/v2/S5 post-review 2 by chemical-study-species class values 2016-07-06.xlsx"
  dmat <- read.xlsx(file)
  mask <- dmat[,"Manual_Call"]
  mask[] <- F
  mask[is.element(dmat[,"Manual_Call"],c("Positive","Negative"))] <- T
  dmat <- dmat[mask==T,]
  code.list <- sort(unique(dmat[,"CODE"]))
  nchem <- length(code.list)
  species.list <- c("rat","rat","mouse","dog")
  type.list <- c("CHR","SUB","CHR","CHR")
  nchem <- length(code.list)
  nstudy <- length(species.list)
  result <- as.data.frame(matrix(nrow=nchem,ncol=4+nstudy))
  name.list <- c("CODE","CASRN","Name","Positive Studies")
  for(i in 1:nstudy) name.list <- c(name.list,paste(species.list[i],"_",type.list[i],sep=""))
  names(result) <- name.list
  result[] <- ""
  result[,"Positive Studies"] <- as.integer(0)
  
  for(i in 1:length(code.list)) {
    code <- code.list[i]
    temp <- dmat[is.element(dmat[,"CODE"],code),]
    result[i,"CODE"] <- code
    result[i,"CASRN"] <- temp[1,"CASRN"]
    result[i,"Name"] <- temp[1,"Name"]
    counter <- 0
    for(j in 1:dim(temp)[1]) {
      species <- temp[j,"species"]
      type <- temp[j,"study_type"]
      value <- temp[j,"Manual_Call"]
      call <- temp[j,"MechanismCall3"]
      
      if(value=="Positive") counter <- counter+1
      colname <- paste(species,"_",type,sep="")
      if(result[i,colname]=="") result[i,colname] <- call
      else result[i,colname]<- paste(result[i,colname],"+",call,sep="")
    }
    result[i,"Positive Studies"] <- counter
  }
  result <- result[result[,"Positive Studies"]>1,]
  file <- "../anemia_files/Table 5 anemia_summary_13 v3.xlsx"
  write.xlsx(result,file)
  print(result)
}

#--------------------------------------------------------------------------------------
#
# produce the bi-chemical files
#
#--------------------------------------------------------------------------------------
anemia.summary.14 <- function() {
  print.current.function()
  
  file <- "../anemia_files/Supplemental X post-review 2 by chemical-study-species class values 2016-02-12 v2.xlsx"
  dmat1 <- read.xlsx(file)
  

  file <- "../anemia_files/Supplemental X post-review 2 by chemical-study-species LEL values 2016-02-12.xlsx"
  dmat2 <- read.xlsx(file)
  
  code.list <- sort(unique(dmat1[,"CODE"]))
  nchem <- length(code.list)
  species.list <- c("rat","rat","mouse","dog")
  type.list <- c("CHR","SUB","CHR","CHR")
  nchem <- length(code.list)
  nstudy <- length(species.list)

  for(i in 1:nchem) {
    code <- code.list[i]
    temp <- dmat2[is.element(dmat2[,"CODE"],code),]
    cname <- temp[1,"Name"]
    name.list <- names(temp)
    exclude.list <- c("group","order","CODE","CASRN","Name","comment.round.1")
    name.list <- name.list[!is.element(name.list,exclude.list)]
    temp <- temp[,name.list]
    ncol <- length(name.list)
    mask <- vector(length=ncol,mode="integer")
    mask[] <- 1
    for(j in 13:ncol) {
      temp2 <- temp[,j]
      temp2[!is.na(temp2)] <- 1
      temp2[is.na(temp2)] <- 0
      if(sum(temp2)==0) mask[j] <- 0
    }
    temp <- temp[,mask==1]
    temp <- cbind(temp[,1],temp,stringsAsFactors=F)
    names(temp)[1] <- "MechanismCall2"
    for(j in 1:dim(temp)[1]) {
      sid <- temp[j,"study_id"]
      call <- dmat1[is.element(dmat1[,"study_id"],sid),"MechanismCall2"]
      temp[j,"MechanismCall2"] <- call
    }
    temp <- t(temp)
    file <- paste("../anemia_files/by_chemical/",cname,".xlsx",sep="")
    write.xlsx(temp,file,rowNames=T)
  }
  
}
#--------------------------------------------------------------------------------------
#
# produce table 6
#
#--------------------------------------------------------------------------------------
anemia.summary.15 <- function(to.file=F) {
  print.current.function()
  if(to.file) {
    fname <- paste("../anemia_files/anemia_summary_15.pdf",sep="")
    pdf(file=fname,width=7,height=10,pointsize=12,bg="white",paper="letter",pagecentre=T)
  }
  
  file <- "../manuscript/v2/S5 post-review 2 by chemical-study-species class values 2016-07-06.xlsx"
  dmat <- read.xlsx(file)
  mask <- dmat[,"Manual_Call"]
  mask[] <- F
  mask[is.element(dmat[,"Manual_Call"],c("Positive","Negative"))] <- T
  dmat <- dmat[mask==T,]
  
  name.list <- names(dmat)[28:dim(dmat)[2]]
  tmat <- dmat[,name.list]
  tmat <- t(as.matrix(tmat))
  rs <- rowSums(tmat)
  tmat <- tmat[rs>9,]
  namevals <- name.list
  namevals <- str_replace_all(namevals,"_"," ")
  namevals <- str_replace_all(namevals,"Incr","")
  
  namevals <- str_replace_all(namevals,"Erythrocyte count Decr","RBC Decr")
  namevals <- str_replace_all(namevals,"Hematocrit\\(HCT\\) Decr","HCT Decr")
  namevals <- str_replace_all(namevals,"Hemoglobin\\(HGB\\) Decr","HGB Decr")
  
  namevals <- str_replace_all(namevals,"MeanCorpuscular\\(Cell\\)Volume\\(MCV\\)","MCV")
  namevals <- str_replace_all(namevals,"Lacticaciddehydrogenase\\(LDH\\)","LDH")
  namevals <- str_replace_all(namevals,"Leukocyte\\(WBC\\)","Leukocyte")
  namevals <- str_replace_all(namevals,"BoneMarrow","Bone Marrow")
  namevals <- str_replace_all(namevals,"nucleatedredbloodcell\\(nRBC\\)","Nucleated RBC")
  namevals <- str_replace_all(namevals,"Erythrocyte ","")
  namevals <- str_replace_all(namevals,"HeinzBody ","Heinz Bodies")
  namevals <- str_replace_all(namevals,"JollyBodies ","Jolly Bodies")
  
  
  result <- heatmap(tmat,margins=c(5,10),scale="none",main="Anemia Markers",symm=F,
                    xlab="Study",ylab="",labRow=namevals,cexRow=0.9,cexCol=0.01,col=brewer.pal(9,"Reds"),
                    hclustfun=function(x) hclust(d=dist(x),method="ward.D"),keep.dendro=T,verbose=F,na.rm=F)
  
  if(to.file) dev.off()
  else browser()
  
}

#######################################################################################
#######################################################################################
#######################################################################################
#######################################################################################
#######################################################################################
#######################################################################################
#######################################################################################
#######################################################################################
#--------------------------------------------------------------------------------------
#
# This method builds the anemia LEL matrices broken out by species and study type
#
#--------------------------------------------------------------------------------------
anemia.lel <- function() {
  print.current.function()
  species.list <- c("rat","rat","rat","rat","mouse","dog","rabbit")
  type.list <- c("CHR","DEV","MGR","SUB","CHR","CHR","DEV")
  
  file <- "../ToxRefDB/toxrefdb_endpoint_aggregated_temp.xlsx"
  all.endpoints <- read.xlsx(file)
  file <- "../anemia_files/anemia_endpoint_index.xlsx"
  anemia.endpoints <- read.xlsx(file)
  rownames(anemia.endpoints) <- anemia.endpoints[,1]
  for(i in 1:length(species.list)) {
    species <- species.list[i]
    type <- type.list[i]
    
    file <- paste("../ToxRefDB/lel_mat_",species,"_",type,".xlsx",sep="")
    raw.lel <- read.xlsx(file)
    
    temp <- raw.lel[,is.element(names(raw.lel),anemia.endpoints[,1])]
    browser()
    for(j in 1:dim(temp)[2]) {
      cname <- names(temp)[j]
      names(temp)[j] <- anemia.endpoints[cname,2]
    }
    temp <- cbind(raw.lel[,1:6],temp)
    temp <- cbind(temp,raw.lel[,c("Mortality_Incr","BodyWeight_Decr")])
    set.1 <- names(temp)[1:6]
    set.2 <- c("Mortality_Incr","BodyWeight_Decr")
    set.3 <- anemia.endpoints[,2]
    set.3 <- unique(set.3)
    set.3 <- set.3[is.element(set.3,names(temp))]
    temp2 <- temp[,c(set.1,set.2,set.3)]
    file <- paste("../anemia_files/anemia_lel_",species,"_",type,".xlsx",sep="")
    write.xlsx(temp2,file)
    browser()
  }
}
#--------------------------------------------------------------------------------------
#
# Generate the full data file for a syndrome
#
#--------------------------------------------------------------------------------------
syndrome.data <- function(study_type="CHR",syndrome="anemia",endpoint.file="../ToxRefDB/anemia_candidate_endpoints.xlsx",species.list=c("rat","mouse","dog")) {
  print.current.function()
  effect.data <- read.xlsx(endpoint.file)
  effect.list <- effect.data[,"endpoint"]
  effect.list <- c(effect.list,"Mortality_Incr","Mortality_Offspring_Incr","BodyWeight_Decr")
  nspecies <- length(species.list)
  all.data <- NULL
  study_type <- "CHR"
  species <- "rat"
  infile <- paste("../ToxRefDB/all_data_",species,"_",study_type,".xlsx",sep="")
  temp <- read.xlsx(infile)
  temp <- temp[is.element(temp[,"endpoint_final"],effect.list),]
  all.data <- rbind(all.data,temp)
  species <- "mouse"
  infile <- paste("../ToxRefDB/all_data_",species,"_",study_type,".xlsx",sep="")
  temp <- read.xlsx(infile)
  temp <- temp[is.element(temp[,"endpoint_final"],effect.list),]
  all.data <- rbind(all.data,temp)
  species <- "dog"
  infile <- paste("../ToxRefDB/all_data_",species,"_",study_type,".xlsx",sep="")
  temp <- read.xlsx(infile)
  temp <- temp[is.element(temp[,"endpoint_final"],effect.list),]
  all.data <- rbind(all.data,temp)
  study_type <- "SUB"
  species <- "rat"
  infile <- paste("../ToxRefDB/all_data_",species,"_",study_type,".xlsx",sep="")
  temp <- read.xlsx(infile)
  temp <- temp[is.element(temp[,"endpoint_final"],effect.list),]
  all.data <- rbind(all.data,temp)
  
  outfile <- paste("../output/syndrome_chem_data_",study_type,"_",syndrome,".xlsx",sep="")
  write.xlsx(all.data,file=outfile, row.names=F)
  browser()		
}
#--------------------------------------------------------------------------------------
#
# Generate the matrix of chemicals by endpoints
#
#--------------------------------------------------------------------------------------
syndrome.matrix <- function(syndrome="anemia",endpoint.file="../ToxRefDB/anemia_candidate_endpoints.xlsx",species.list=c("rat","mouse","dog"),do.prep=T) {
  print.current.function()
  if(do.prep) {
    effect.data <- read.xlsx(endpoint.file)
    effect.list <- effect.data[,"endpoint"]
    effect.data <<- effect.data
    effect.list <<- effect.list
    infile <- paste("../output/syndrome_chem_data_",syndrome,".xlsx",sep="")
    all.data <- read.xlsx(infile)
    all.data <<- all.data
  }
  name.list <- c("CODE","Name","rat.CHR","mouse.CHR","dog.CHR","rat.SUB","anemia_class",effect.list)
  code.list <- sort(unique(all.data[,"CODE"]))
  nchem <- length(code.list)
  cat("nchem: ",nchem,"\n")
  flush.console()
  
  mat <- as.data.frame(matrix(nrow=nchem,ncol=length(name.list)))
  mat[] <- ""
  names(mat) <- name.list
  mat[,"CODE"] <- code.list
  rownames(mat) <- code.list
  for(i in 1:nchem) {
    code <- code.list[i]
    temp <- all.data[is.element(all.data[,"CODE"],code),]
    mat[i,"Name"] <- temp[1,"chemical_name"]
    cat(mat[i,"Name"] ,"\n"); flush.console()
    nrow <- dim(temp)[1]
    for(j in 1:nrow) {
      species.use <- temp[j,"species"]
      study.type.use <- temp[j,"study_type"]
      if(species.use=="rat" && study.type.use=="CHR") mat[i,"rat.CHR"] <- "Y"
      if(species.use=="mouse" && study.type.use=="CHR") mat[i,"mouse.CHR"] <- "Y"
      if(species.use=="dog" && study.type.use=="CHR") mat[i,"dog.CHR"] <- "Y"
      if(species.use=="rat" && study.type.use=="SUB") mat[i,"rat.SUB"] <- "Y"
    }
    mort.rat <- 1000000
    mort.rat.sub <- 1000000
    mort.mouse <- 1000000
    mort.dog <- 1000000
    
    bw.rat <- 1000000
    bw.rat.sub <- 1000000
    bw.mouse <- 1000000
    bw.dog <- 1000000
    if(mat[i,"rat.CHR"]=="Y") {
      temp2 <- temp[is.element(temp[,"species"],"rat"),]
      temp2 <- temp2[is.element(temp2[,"study_type"],"CHR"),]
      eflist <- sort(unique(temp2[,"endpoint_final"]))
      if(is.element("Mortality_Incr",eflist)) mort.rat <- min(temp2[is.element(temp2[,"endpoint_final"],"Mortality_Incr"),"dose"])
      if(is.element("BodyWeight_Decr",eflist)) bw.rat <- min(temp2[is.element(temp2[,"endpoint_final"],"BodyWeight_Decr"),"dose"])
      if(mort.rat==Inf || is.nan(mort.rat) || bw.rat==Inf || is.nan(bw.rat) ) browser()
    }
    if(mat[i,"rat.SUB"]=="Y") {
      temp2 <- temp[is.element(temp[,"species"],"rat"),]
      temp2 <- temp2[is.element(temp2[,"study_type"],"SUB"),]
      eflist <- sort(unique(temp2[,"endpoint_final"]))
      if(is.element("Mortality_Incr",eflist)) mort.rat.sub <- min(temp2[is.element(temp2[,"endpoint_final"],"Mortality_Incr"),"dose"])
      if(is.element("BodyWeight_Decr",eflist)) bw.rat.sub <- min(temp2[is.element(temp2[,"endpoint_final"],"BodyWeight_Decr"),"dose"])
      if(mort.rat.sub==Inf || is.nan(mort.rat.sub) || bw.rat.sub==Inf || is.nan(bw.rat.sub) ) browser()
    }
    if(mat[i,"mouse.CHR"]=="Y") {
      temp2 <- temp[is.element(temp[,"species"],"mouse"),]
      temp2 <- temp2[is.element(temp2[,"study_type"],"CHR"),]
      eflist <- sort(unique(temp2[,"endpoint_final"]))
      if(is.element("Mortality_Incr",eflist)) mort.mouse <- min(temp2[is.element(temp2[,"endpoint_final"],"Mortality_Incr"),"dose"])
      if(is.element("BodyWeight_Decr",eflist)) bw.mouse <- min(temp2[is.element(temp2[,"endpoint_final"],"BodyWeight_Decr"),"dose"])
      if(mort.mouse==Inf || is.nan(mort.mouse) || bw.mouse==Inf || is.nan(bw.mouse) ) browser()
    }
    if(mat[i,"dog.CHR"]=="Y") {
      temp2 <- temp[is.element(temp[,"species"],"dog"),]
      temp2 <- temp2[is.element(temp2[,"study_type"],"CHR"),]
      eflist <- sort(unique(temp2[,"endpoint_final"]))
      if(is.element("Mortality_Incr",eflist)) mort.dog <- min(temp2[is.element(temp2[,"endpoint_final"],"Mortality_Incr"),"dose"])
      if(is.element("BodyWeight_Decr",eflist)) bw.dog <- min(temp2[is.element(temp2[,"endpoint_final"],"BodyWeight_Decr"),"dose"])
      if(mort.dog==Inf || is.nan(mort.dog) || bw.dog==Inf || is.nan(bw.dog) ) browser()
    }
    n <- dim(temp)[1]
    if(n==1) {
      temp <- as.matrix(temp)
    }
    for(i in 1:n) {
      endpoint <- temp[i,"endpoint_final"]
      species <- temp[i,"species"]
      study.type <- temp[i,"study_type"]
      sl <- ""
      mort.dose <- 1000000
      if(species=="rat" && study.type=="CHR")   {sl <- "R";mort.dose <- mort.rat; bw.dose <- bw.rat}
      if(species=="rat" && study.type=="SUB")   {sl <- "S";mort.dose <- mort.rat.sub; bw.dose <- bw.rat.sub}
      if(species=="mouse" && study.type=="CHR") {sl <- "M";mort.dose <- mort.mouse; bw.dose <- bw.mouse}
      if(species=="dog" && study.type=="CHR")   {sl <- "D";mort.dose <- mort.dog; bw.dose <- bw.dog}
      
      if(is.element(endpoint,effect.list)) {
        dose <- min(temp[is.element(temp[,"endpoint_final"],endpoint),"dose"])
        if(dose>=bw.dose) sl <- tolower(sl)
        if(dose>=mort.dose) sl <- paste(tolower(sl),"x",sep="")
        init <- mat[code,endpoint]
        if(length(grep(sl,init))==0) init <- paste(init,sl,sep="")
        mat[code,endpoint] <- init
      }
    }
  }
  outfile <- paste("../output/matrix_chem_data_anemia.xlsx",sep="")
  write.xlsx(mat,file=outfile, row.names=F)
  browser()		
}
#--------------------------------------------------------------------------------------
#
# Generate the matrix of chemicals by endpoints
#
#--------------------------------------------------------------------------------------
syndrome.matrix.numeric <- function(syndrome="anemia",endpoint.file="../ToxRefDB/anemia_candidate_endpoints.xlsx",species.list=c("rat","mouse","dog"),do.prep=T) {
  print.current.function()
  if(do.prep) {
    effect.data <- read.xlsx(endpoint.file)
    effect.list <- effect.data[,"endpoint"]
    effect.data <<- effect.data
    effect.list <<- effect.list
    infile <- paste("../output/syndrome_chem_data_",syndrome,".xlsx",sep="")
    all.data <- read.xlsx(infile)
    all.data <<- all.data
  }
  name.list <- c("CODE","Name","rat.CHR","mouse.CHR","dog.CHR","rat.SUB","anemia_class",effect.list)
  code.list <- sort(unique(all.data[,"CODE"]))
  nchem <- length(code.list)
  cat("nchem: ",nchem,"\n")
  flush.console()
  
  mat <- as.data.frame(matrix(nrow=nchem,ncol=length(name.list)))
  mat[,1:7] <- ""
  names(mat) <- name.list
  mat[,"CODE"] <- code.list
  rownames(mat) <- code.list
  for(i in 1:nchem) {
    code <- code.list[i]
    temp <- all.data[is.element(all.data[,"CODE"],code),]
    mat[i,"Name"] <- temp[1,"chemical_name"]
    cat(mat[i,"Name"] ,"\n"); flush.console()
    nrow <- dim(temp)[1]
    for(j in 1:nrow) {
      species.use <- temp[j,"species"]
      study.type.use <- temp[j,"study_type"]
      if(species.use=="rat" && study.type.use=="CHR") mat[i,"rat.CHR"] <- "Y"
      if(species.use=="mouse" && study.type.use=="CHR") mat[i,"mouse.CHR"] <- "Y"
      if(species.use=="dog" && study.type.use=="CHR") mat[i,"dog.CHR"] <- "Y"
      if(species.use=="rat" && study.type.use=="SUB") mat[i,"rat.SUB"] <- "Y"
    }
    mort.rat <- 1000000
    mort.rat.sub <- 1000000
    mort.mouse <- 1000000
    mort.dog <- 1000000
    
    bw.rat <- 1000000
    bw.rat.sub <- 1000000
    bw.mouse <- 1000000
    bw.dog <- 1000000
    denom <- 0
    if(mat[i,"rat.CHR"]=="Y") {
      temp2 <- temp[is.element(temp[,"species"],"rat"),]
      temp2 <- temp2[is.element(temp2[,"study_type"],"CHR"),]
      eflist <- sort(unique(temp2[,"endpoint_final"]))
      if(is.element("Mortality_Incr",eflist)) mort.rat <- min(temp2[is.element(temp2[,"endpoint_final"],"Mortality_Incr"),"dose"])
      if(is.element("BodyWeight_Decr",eflist)) bw.rat <- min(temp2[is.element(temp2[,"endpoint_final"],"BodyWeight_Decr"),"dose"])
      if(mort.rat==Inf || is.nan(mort.rat) || bw.rat==Inf || is.nan(bw.rat) ) browser()
      denom <- denom+1
    }
    if(mat[i,"rat.SUB"]=="Y") {
      temp2 <- temp[is.element(temp[,"species"],"rat"),]
      temp2 <- temp2[is.element(temp2[,"study_type"],"SUB"),]
      eflist <- sort(unique(temp2[,"endpoint_final"]))
      if(is.element("Mortality_Incr",eflist)) mort.rat.sub <- min(temp2[is.element(temp2[,"endpoint_final"],"Mortality_Incr"),"dose"])
      if(is.element("BodyWeight_Decr",eflist)) bw.rat.sub <- min(temp2[is.element(temp2[,"endpoint_final"],"BodyWeight_Decr"),"dose"])
      if(mort.rat.sub==Inf || is.nan(mort.rat.sub) || bw.rat.sub==Inf || is.nan(bw.rat.sub) ) browser()
      denom <- denom+1
    }
    if(mat[i,"mouse.CHR"]=="Y") {
      temp2 <- temp[is.element(temp[,"species"],"mouse"),]
      temp2 <- temp2[is.element(temp2[,"study_type"],"CHR"),]
      eflist <- sort(unique(temp2[,"endpoint_final"]))
      if(is.element("Mortality_Incr",eflist)) mort.mouse <- min(temp2[is.element(temp2[,"endpoint_final"],"Mortality_Incr"),"dose"])
      if(is.element("BodyWeight_Decr",eflist)) bw.mouse <- min(temp2[is.element(temp2[,"endpoint_final"],"BodyWeight_Decr"),"dose"])
      if(mort.mouse==Inf || is.nan(mort.mouse) || bw.mouse==Inf || is.nan(bw.mouse) ) browser()
      denom <- denom+1
    }
    if(mat[i,"dog.CHR"]=="Y") {
      temp2 <- temp[is.element(temp[,"species"],"dog"),]
      temp2 <- temp2[is.element(temp2[,"study_type"],"CHR"),]
      eflist <- sort(unique(temp2[,"endpoint_final"]))
      if(is.element("Mortality_Incr",eflist)) mort.dog <- min(temp2[is.element(temp2[,"endpoint_final"],"Mortality_Incr"),"dose"])
      if(is.element("BodyWeight_Decr",eflist)) bw.dog <- min(temp2[is.element(temp2[,"endpoint_final"],"BodyWeight_Decr"),"dose"])
      if(mort.dog==Inf || is.nan(mort.dog) || bw.dog==Inf || is.nan(bw.dog) ) browser()
      denom <- denom+1
    }
    for(j in 1:length(effect.list)) mat[code,effect.list[j]] <- as.numeric(0)
    n <- dim(temp)[1]
    if(n==1) {
      temp <- as.matrix(temp)
    }
    for(i in 1:n) {
      endpoint <- temp[i,"endpoint_final"]
      species <- temp[i,"species"]
      study.type <- temp[i,"study_type"]
      sl <- 0
      mort.dose <- 1000000
      if(species=="rat" && study.type=="CHR")   {sl <- 2;mort.dose <- mort.rat; bw.dose <- bw.rat}
      if(species=="rat" && study.type=="SUB")   {sl <- 2;mort.dose <- mort.rat.sub; bw.dose <- bw.rat.sub}
      if(species=="mouse" && study.type=="CHR") {sl <- 2;mort.dose <- mort.mouse; bw.dose <- bw.mouse}
      if(species=="dog" && study.type=="CHR")   {sl <- 2;mort.dose <- mort.dog; bw.dose <- bw.dog}
      
      if(is.element(endpoint,effect.list)) {
        dose <- min(temp[is.element(temp[,"endpoint_final"],endpoint),"dose"])
        if(dose>=bw.dose) sl <- 1
        if(dose>=mort.dose) sl <- 0
        
        init <- mat[code,endpoint]
        if(length(grep(sl,init))==0) init <- init+sl
        mat[code,endpoint] <- init
      }
    }
    for(j in 1:length(effect.list)) mat[code,effect.list[j]] <- mat[code,effect.list[j]] /(2*denom)
  }
  outfile <- paste("../output/matrix_chem_data_numeric_anemia.xlsx",sep="")
  write.xlsx(mat,file=outfile, row.names=F)
  browser()		
}
###############################################################################################
###############################################################################################
###############################################################################################
###############################################################################################
###############################################################################################
###############################################################################################
###############################################################################################
###############################################################################################
#--------------------------------------------------------------------------------------
#
# run all for one study
#
#--------------------------------------------------------------------------------------
run.study <- function(species,study_type,do.D=F,do.E=F,do.F=F) {
  print.current.function()
  flush.console()

  if(do.D) {
    syndrome.chem.hm(species,study_type,T)
    syndrome.coor.hm(species,study_type,T)
    syndrome.hist(species,study_type,T)
  }
  if(do.E) {
    syndrome.lel(species,study_type,F)
    syndrome.allcoor.hm(species,study_type,T,T)
    syndrome.allchem.hm(species,study_type,T)
  }
  if(do.F) {
    SUB.CHR.syndrome.correlations()
    SUB.CHR.effect.correlations()
    SUB.CHR.correlations.roc()
  }
}
#--------------------------------------------------------------------------------------
#
# creat the syndrome lel matrices
#
#--------------------------------------------------------------------------------------
syndrome.lel <- function(species,study_type,do.print=F) {
  print.current.function()
  flush.console()
  infile <- paste("../syndromes/syndromes_",species,"_",study_type,".xlsx",sep="")
  syndromes <- read.xlsx(infile)
  syndromes <- syndromes[syndromes[,"useme"]==1,]
  
  slist <- c(syndromes[,"syndrome1"],syndromes[,"syndrome2"],syndromes[,"syndrome3"],syndromes[,"syndrome4"],syndromes[,"syndrome5"])
  slist <- sort(unique(slist))
  nsyndrome <- length(slist)
  
  infile <- paste("../ToxRefDB/lel_mat_",species,"_",study_type,".xlsx",sep="")
  lel.mat <- read.xlsx(infile)
  rownames(lel.mat) <- lel.mat[,"CODE"]
  chem.mat <- lel.mat[,1:6]  
  lel.mat <- lel.mat[,7:dim(lel.mat)[2]]
  nchem <- dim(chem.mat)[1]
  code.list <- chem.mat[,"CODE"]
  
  syn.hit <- matrix(nrow=nchem,ncol=nsyndrome)
  rownames(syn.hit) <- code.list
  colnames(syn.hit) <- slist
  syn.hit[] <- NA
  syn.max <- syn.hit
  syn.min <- syn.hit
  syn.pos <- syn.hit
  syn.tot <- syn.hit
  syn.hit[] <- 0
  
  for(j in 1:nchem) {
    code <- code.list[j]
    for(i in 1:nsyndrome) {
      syndrome <- slist[i]
      if(do.print) cat("[",syndrome,"]\n",sep="");flush.console()
      effect.list <- sort(unique(
        c(syndromes[is.element(syndromes[,"syndrome1"],syndrome),"endpoint"],
          syndromes[is.element(syndromes[,"syndrome2"],syndrome),"endpoint"],
          syndromes[is.element(syndromes[,"syndrome3"],syndrome),"endpoint"],
          syndromes[is.element(syndromes[,"syndrome4"],syndrome),"endpoint"],
          syndromes[is.element(syndromes[,"syndrome5"],syndrome),"endpoint"]
        )
      ))
      if(do.print) print(effect.list);flush.console()
      ne <- length(effect.list)
      nmin <- 1#syndromes.min[syndrome,"min_effect"]
      if(do.print) cat(ne,":",nmin,"\n");flush.console()
      lel <- lel.mat[j,effect.list]
      syn.tot[j,i] <- length(lel)
      lel <- lel[!is.na(lel)]
      if(length(lel)>=nmin) {
        syn.hit[j,i] <- 1
        syn.min[j,i] <- min(lel)
        syn.max[j,i] <- max(lel)
        syn.pos[j,i] <- length(lel)
      }
    }
  }
  
  syn.hit <- cbind(chem.mat,as.data.frame(syn.hit,stringsAsFactors=F))
  syn.min <- cbind(chem.mat,as.data.frame(syn.min,stringsAsFactors=F))
  syn.max <- cbind(chem.mat,as.data.frame(syn.max,stringsAsFactors=F))
  syn.pos <- cbind(chem.mat,as.data.frame(syn.pos,stringsAsFactors=F))
  syn.tot <- cbind(chem.mat,as.data.frame(syn.tot,stringsAsFactors=F))
  
  outfile <- paste("../output/syndrome_lelmat_",species,"_",study_type,".xlsx",sep="")
  write.xlsx(syn.hit,file=outfile, row.names=F)
  outfile <- paste("../output/syndrome_lelmat_min_",species,"_",study_type,".xlsx",sep="")
  write.xlsx(syn.min,file=outfile, row.names=F)
  outfile <- paste("../output/syndrome_lelmat_max_",species,"_",study_type,".xlsx",sep="")
  write.xlsx(syn.max,file=outfile, row.names=F)
  outfile <- paste("../output/syndrome_lelmat_pos_",species,"_",study_type,".xlsx",sep="")
  write.xlsx(syn.pos,file=outfile, row.names=F)
  outfile <- paste("../output/syndrome_lelmat_tot_",species,"_",study_type,".xlsx",sep="")
  write.xlsx(syn.tot,file=outfile, row.names=F)
}
#--------------------------------------------------------------------------------------
#
# Generate the network file for the lel correlations
#
#--------------------------------------------------------------------------------------
syndrome.network <- function(study_type="CHR",syndrome="Hematopoiesis",species.list=c("rat","mouse","dog"),cutoff=0.5) {
  print.current.function()
  infile <- ENDPOINT.FILE
  syndromes <- read.xlsx(infile)
  syndromes <- syndromes[syndromes[,"useme"]==1,]
  filename <- ENDPOINT.MAP.FILE
  replacements <- read.xlsx(filename)
  for(i in 1:dim(syndromes)[1]) {
    endpoint <- syndromes[i,"endpoint_final"]
    if(is.element(endpoint,replacements[,"endpoint_final"])) {
      endpoint.replace <- replacements[is.element(replacements[,"endpoint_final"],endpoint),"replacement"]
      syndromes[i,"endpoint_final"] <- endpoint.replace
    }
  }	
  
  syndromes <- syndromes[is.element(syndromes[,"study_type"],study_type),]
  slist <- c(syndromes[,"syndrome1"],syndromes[,"syndrome2"],syndromes[,"syndrome3"],syndromes[,"syndrome4"],syndromes[,"syndrome5"])
  slist <- sort(unique(slist))
  nsyndrome <- length(slist)
  
  elist1 <- syndromes[is.element(syndromes[,"syndrome1"],syndrome),"endpoint_final"]
  elist2 <- syndromes[is.element(syndromes[,"syndrome2"],syndrome),"endpoint_final"]
  elist3 <- syndromes[is.element(syndromes[,"syndrome3"],syndrome),"endpoint_final"]
  elist4 <- syndromes[is.element(syndromes[,"syndrome4"],syndrome),"endpoint_final"]
  elist5 <- syndromes[is.element(syndromes[,"syndrome5"],syndrome),"endpoint_final"]
  if(length(elist2)>0) elist1 <- c(elist1,elist2)
  if(length(elist3)>0) elist1 <- c(elist1,elist3)
  if(length(elist4)>0) elist1 <- c(elist1,elist4)
  if(length(elist5)>0) elist1 <- c(elist1,elist5)
  effect.list <- sort(unique(elist1))
  
  name.list <- c("endpoint1","edge_type","endpoint2","TP","PPV")
  
  nspecies <- length(species.list)
  all.data <- NULL
  for(i in 1:nspecies) {
    species <- species.list[i]
    infile <- paste("../output/lel_correlations_",species,"_",study_type,".xlsx",sep="")
    effect_coor <- read.xlsx(infile)
    nline <- dim(effect_coor)[1]
    mask <- vector(length=nline,mode="integer")
    mask[] <- 1
    mask[!is.element(effect_coor[,"endpoint1"],effect.list)] <- 0
    mask[!is.element(effect_coor[,"endpoint2"],effect.list)] <- 0
    effect_coor <- effect_coor[mask==1,]
    effect_coor <- effect_coor[effect_coor[,"TP"]>=5,]
    effect_coor <- effect_coor[effect_coor[,"PPV"]>=cutoff,]
    temp <- as.data.frame(matrix(nrow=dim(effect_coor)[1],ncol=length(name.list)))
    names(temp) <- name.list
    temp[,"endpoint1"] <- effect_coor[,"endpoint1"]
    temp[,"endpoint2"] <- effect_coor[,"endpoint2"]
    temp[,"TP"] <- effect_coor[,"TP"]
    temp[,"PPV"] <- effect_coor[,"PPV"]
    temp[,"edge_type"] <- paste("implies_",species,sep="")
    all.data <- rbind(all.data,temp)
  }
  
  outfile <- paste("../output/ppv_network_",study_type,"_",syndrome,".xlsx",sep="")
  write.xlsx(all.data,file=outfile, row.names=F)
  browser()		
}
#--------------------------------------------------------------------------------------
#
# do the heatmaps of the anemia matrix
#
#--------------------------------------------------------------------------------------
anemia.hm <- function(to.file=F) {
  print.current.function()
  if(to.file) {
    file <- paste("../plots/anemia_hm.pdf",sep="")
    pdf(file=file,width=8,height=10,pointsize=12,bg="white",paper="letter",pagecentre=T)
  }
  infile <- paste("../output/anemia_endpoint_map.xlsx",sep="")
  map <- read.xlsx(infile)
  endpoint.list <- map[map[,2]==2,1]
  
  infile <- paste("../output/matrix_chem_data_numeric_anemia 2015-07-23.xlsx",sep="")
  indata <- read.xlsx(infile)
  rownames(indata) <- indata[,"CODE"]
  name.list <- indata[,"Name"]
  cex.hm <- 0.8
  
  syndrome <- as.matrix(indata[,endpoint.list])
  rs <- rowSums(syndrome)
  cs <- colSums(syndrome)
  syndrome <- syndrome[rs>0.5,cs>=0.5]
  result <- heatmap(t(syndrome),margins=c(2,20),scale="none",main="Anemia",symm=F,
                    xlab="",ylab="",cexRow=0.8,cexCol=0.01,col=brewer.pal(9,"Reds"),
                    hclustfun=function(x) hclust(d=dist(x),method="ward.D"),keep.dendro=T,verbose=F,na.rm=F)
  
  if(to.file) dev.off()
  browser()
}
#--------------------------------------------------------------------------------------
#
# calculate first order associations betwene anemia classes and ToxCast assays
#
#--------------------------------------------------------------------------------------
anemia.assoc <- function() {
  print.current.function()
  infile <- paste("../output/matrix_chem_data_numeric_anemia 2015-07-23.xlsx",sep="")
  indata <- read.xlsx(infile)
  rownames(indata) <- indata[,"CODE"]
  code.list <- indata[,"CODE"]
  code.list <- code.list[is.element(code.list,CODE.LIST)]
  code.list <- sort(code.list)
  anemia <- indata[code.list,]
  mat.tested <- MAT.tested[code.list,]
  mat.z.norm <- MAT.Z.NORM[code.list,]
  mat.hitcall <- MAT.hitcall[code.list,]
  nchem <- dim(anemia)[1]
  txt <-TxT(1,2,3,4)
  s <- paste("anemia.class\tassay\t",txt$title,"\n",sep="")
  outfile <- "../output/anemia_assoc.txt"
  cat(s,file=outfile,append=F)
  class.list <- sort(unique(anemia[,"anemia_class"]))
  class.list <- class.list[!is.element(class.list,c("Equivocal","Negative",""))]
  for(i in 1:length(class.list)) {
    aclass <- class.list[i]
    mask <- vector(mode="integer",length=nchem)
    mask[] <- 0
    mask[is.element(anemia[,"anemia_class"],"Negative")] <- 1
    mask[is.element(anemia[,"anemia_class"],aclass)] <- 2
    nassay <- dim(mat.hitcall)[2]
    for(j in 1:nassay) {
      assay <- names(mat.tested)[j]
      #print(assay)
      tested <- mat.tested[,j]
      mask2 <- mask*tested
      y <- mask2[mask2>0]
      y <- y-1
      x <- mat.hitcall[mask2>0,j]
      x <- mat.z.norm[mask2>0,j]
      x[is.na(x)] <- 0
      x[x<2] <- 0
      x[x>0] <- 1
      y <- y[!is.na(x)]
      x <- x[!is.na(x)]
      
      a <- sum(x*y)
      b <- sum(x*(1-y))
      c <- sum((1-x)*y)
      d <- sum((1-x)*(1-y))
      doit <- T
      if(a<0 || b<0 || c<0 || d<0) doit <- F
      if(doit) {
        txt <- TxT(a,b,c,d)
        s <- paste(aclass,"\t",assay,"\t",txt$sval,"\n",sep="")
        cat(s,file=outfile,append=T)
        cat(s)
        #if(substr(assay,1,5)=="Tox21") browser()
      }
    }
  }
}
#--------------------------------------------------------------------------------------
#
# Generate the matrix of chemicals by endpoints
#
#--------------------------------------------------------------------------------------
anemia.physchem <- function() {
  print.current.function()
  infile <- "../output/matrix_chem_data_CHR_anemia 2015-03-26.xlsx"
  anemia <- read.xlsx(infile)
  rownames(anemia) <- anemia[,"CODE"]
  
  infile <- "../physchem/toxcast_physchem_cellstress_DFT.xlsx"
  physchem <- read.xlsx(infile)
  rownames(physchem) <- physchem[,"CODE"]
  
  code.list <- anemia[,"CODE"]
  code.list <- code.list[is.element(code.list,physchem[,"CODE"])]
  
  code.list <- sort(code.list)
  anemia <- anemia[code.list,]
  physchem <- physchem[code.list,]
  all.data <- cbind(anemia[,1:6],physchem)
  
  outfile <- "../output/anemia_physchem 2015-03-26.xlsx"
  write.xlsx(all.data,file=outfile, row.names=F)
  browser()		
}
#--------------------------------------------------------------------------------------
#
# Generate the matrix of chemicals by endpoints
#
#--------------------------------------------------------------------------------------
anemia.ttest <- function() {
  print.current.function()
  infile <- "../output/anemia_physchem 2015-03-26.xlsx"
  anemia <- read.xlsx(infile)
  rownames(anemia) <- anemia[,"CODE"]
  pred.list <- names(anemia)[35:dim(anemia)[2]]
  npred <- length(pred.list)
  code.list <- anemia[,"CODE"]
  
  class.list <- sort(unique(anemia[,"anemia_class"]))
  class.list <- class.list[1:length(class.list)-1]
  nclass <- length(class.list)
  options(warn=2)
  outfile <- "../output/anemia_physchem_stats.txt"
  s <- "aclass\tpred\tn.pos\tmean.pos\tsd.pos\tn.neg\tmean.neg\tsd.neg\tp.value\n"
  cat(s,file=outfile,append=F)
  for(j in 1:nclass) {
    aclass <- class.list[j]
    cat("\n============================\n")
    cat(aclass,"\n")
    cat("============================\n")
    
    code.list.pos <- code.list[is.element(anemia[,"anemia_class"],aclass)]
    code.list.neg <- code.list[is.element(anemia[,"anemia_class"],"No anemia")]
    if(length(code.list.pos)>2) {
      for(i in 1:npred) {
        pred <- pred.list[i]
        x.pos <- as.numeric(anemia[code.list.pos,pred])
        x.neg <- as.numeric(anemia[code.list.neg,pred])
        x.pos <- x.pos[!is.na(x.pos)]
        x.neg <- x.neg[!is.na(x.neg)]
        
        n.pos <- length(x.pos)
        mean.pos <- mean(x.pos)
        sd.pos <- sd(x.pos)
        
        n.neg <- length(x.neg)
        mean.neg <- mean(x.neg)
        sd.neg <- sd(x.neg)
        #cat(pred,n.pos,n.neg,"\n")
        #print(x.pos)
        #print(x.neg)
        
        #browser()
        ret <- t.test(x.pos,x.neg)
        if(!is.na(ret$p.value)) {
          #if(ret$p.value<0.05) {
          cat(pred,":",ret$p.value,"\n")
          s <- paste(aclass,pred,n.pos,format(mean.pos,digits=2),format(sd.pos,digits=2),n.neg,format(mean.neg,digits=2),format(sd.neg,digits=2),format(ret$p.value,digits=2),sep="\t")
          s <- paste(s,"\n",sep="")
          cat(s,file=outfile,append=T)
          #}
        }
      }
    }
  }
  
}
#--------------------------------------------------------------------------------------
#
# Generate the matrix of chemicals by endpoints
#
#--------------------------------------------------------------------------------------
anemia.toxcast <- function() {
  print.current.function()
  infile <- "../output/matrix_chem_data_CHR_anemia 2015-03-26.xlsx"
  anemia <- read.xlsx(infile)
  rownames(anemia) <- anemia[,"CODE"]
  code.list <- anemia[,"CODE"]
  code.list <- code.list[is.element(code.list,rownames(MAT.Z))]
  anemia <- anemia[code.list,]
  
  infile <- "../output/anemia_physchem 2015-03-26.xlsx"
  toxcast_physchem <- read.xlsx(infile)
  rownames(toxcast_physchem) <- toxcast_physchem[,"CODE"]
  tp.list <- c("cytotoxicity_BLA","cytotoxicity_SRB","proliferation_decrease","ER_stress","apoptosis_up","microtubule_up","mitochondrial_disruption_up","oxidative_stress_up","cell_cycle_up","heat_shock","hypoxia","estrogen_receptor","androgen_receptor","ppar_signaling","ion_channel","CYP450","GPCR","AHR","PTPN","AChE","CCL2")
  ntp <- length(tp.list)
  
  class.list <- sort(unique(anemia[,"anemia_class"]))
  class.list <- class.list[1:length(class.list)-1]
  nclass <- length(class.list)
  
  outfile <- "../output/anemia_physchem_toxcast_stats.txt"
  txt <- TxT(1,2,3,4)
  s <- paste("aclass\tassay\tbioprocess\tn.pos\tn.neg\t",txt$title,"\n",sep="")
  cat(s,file=outfile,append=F)
  
  nassay <- dim(MAT.Z)[2]
  for(j in 0:nclass) {
    if(j==0) {
      aclass <- "All"
      code.list.neg <- code.list[is.element(anemia[,"anemia_class"],"No anemia")]
      code.list.pos <- code.list[!is.element(code.list,code.list.neg)]
    }
    else {
      aclass <- class.list[j]
      code.list.pos <- code.list[is.element(anemia[,"anemia_class"],aclass)]
      code.list.neg <- code.list[is.element(anemia[,"anemia_class"],"No anemia")]
    }
    cat("\n============================\n")
    cat(aclass,"\n")
    cat("============================\n")
    flush.console()
    if(length(code.list.pos)>2) {
      for(i in 1:nassay) {
        assay <- names(MAT.Z)[i]
        bioprocess <- ASSAY.INFO[assay,"biological_process"]
        pred.pos <- MAT.Z[code.list.pos,i]
        pred.pos <- pred.pos[!is.na(pred.pos)]
        n.pos <- length(pred.pos)
        pred.pos[pred.pos<3] <- 0
        pred.pos[pred.pos>0] <- 1
        
        pred.neg <- MAT.Z[code.list.neg,i]
        pred.neg <- pred.neg[!is.na(pred.neg)]
        n.neg <- length(pred.neg)
        pred.neg[pred.neg<2] <- 0
        pred.neg[pred.neg>0] <- 1
        
        a <- sum(pred.pos)
        c <- n.pos-a
        b <- sum(pred.neg)
        d <- n.neg-b
        #cat("Z: ",assay,a,b,c,d,"\n")
        #print(pred.pos)
        #print(pred.neg)
        txt <- TxT(a,b,c,d)
        s <- paste(aclass,"\t",assay,"\t",bioprocess,"\t",n.pos,"\t",n.neg,"\t",txt$sval,"\n",sep="")
        cat(s,file=outfile,append=T)
      }
      for(i in 1:ntp) {
        assay <- tp.list[i]
        bioprocess <- assay
        pred.pos <- toxcast_physchem[code.list.pos,assay]
        pred.pos <- pred.pos[!is.na(pred.pos)]
        n.pos <- length(pred.pos)
        pred.pos[pred.pos<3] <- 0
        pred.pos[pred.pos>0] <- 1
        
        pred.neg <- toxcast_physchem[code.list.neg,assay]
        pred.neg <- pred.neg[!is.na(pred.neg)]
        n.neg <- length(pred.neg)
        pred.neg[pred.neg<2] <- 0
        pred.neg[pred.neg>0] <- 1
        a <- sum(pred.pos)
        c <- n.pos-a
        b <- sum(pred.neg)
        d <- n.neg-c
        #cat("P: ",assay,a,b,c,d,"\n")
        
        txt <- TxT(a,b,c,d)
        s <- paste(aclass,"\t",assay,"\t",bioprocess,"\t",n.pos,"\t",n.neg,"\t",txt$sval,"\n",sep="")
        cat(s,file=outfile,append=T)
      }			
    }
  }
  
}
