#--------------------------------------------------------------------------------------
#
# syndrome_V01.r - code to build syndrome models, for instance for anemia
#
# January 2016
# Richard Judson
#
# US EPA
# Questions, comments to: judson.richard@epa.gov, 919-541-3085
#
#--------------------------------------------------------------------------------------
options(java.parameters = "-Xmx1000m")
library(grDevices)
library(RColorBrewer)
library(stringr)
library(deSolve)
library(msm)
library(httk)
library(openxlsx)
library(e1071)
library(tree)

source("utils.R")
source("ToxRefDB_clean.R")
ENDPOINT.FILE <- "../ToxRefDB/toxrefdb_endpoint_2015_03_23.xlsx"
ENDPOINT.MAP.FILE <- "../ToxRefDB/endpoint_replacements.xlsx"
INPUTDIR <<- "../input/"
VARMATDATE <<- "151020"
VARMATDIR <<- paste(INPUTDIR,"varmats_",VARMATDATE,"_internal/",sep="")
ASSAYDEFDATE <<- "151020"

#--------------------------------------------------------------------------------------
#
# run all for one study
#
#--------------------------------------------------------------------------------------
run.study <- function(species,study_type,do.A=F,do.B=F,do.C=F,do.D=F,do.E=F,do.F=F) {
  cat("==========================================================================\n")
  cat("run.study\n")
  cat("==========================================================================\n")
  flush.console()
  if(do.A) prep.ToxRefDB(do.read=T,species,study_type)
  if(do.B) build.endpoint.aggregates()
  
  if(do.C) {
    build.study.table(species,study_type)
    prep.lel.mats(species,study_type)
    generate.lel.correlations(species,study_type)
  }
  if(do.D) {
    syndrome.chem.hm(species,study_type,T)
    syndrome.coor.hm(species,study_type,T)
    syndrome.hist(species,study_type,T)
  }
  if(do.E) {
    syndrome.lel(species,study_type,F)
    syndrome.allcoor.hm(species,study_type,T,T)
    syndrome.allchem.hm(species,study_type,T)
  }
  if(do.F) {
    SUB.CHR.syndrome.correlations()
    SUB.CHR.effect.correlations()
    SUB.CHR.correlations.roc()
  }
}
#--------------------------------------------------------------------------------------
#
# Prepare the ToxRefDB file
#
#--------------------------------------------------------------------------------------
prep.ToxRefDB <- function(do.read=T,species,study_type) {
  cat("==========================================================================\n")
  cat("prep.ToxRefDB\n")
  cat("==========================================================================\n")
  flush.console()
  if(do.read) {
    infile="../ToxRefDB/toxrefdb_study_tg_effect_endpoint_AUG2014_FOR_PUBLIC_RELEASE.xlsx"
    TOXREFDB.ALL <<- read.xlsx(infile)
  }
  print(dim(TOXREFDB.ALL))
  mask <- TOXREFDB.ALL[,"usability_desc"]
  mask[mask=="Acceptable Guideline (post-1998)"] <- 1
  mask[mask=="Acceptable Guideline (pre-1998)"] <- 1
  mask[mask=="Acceptable Non-guideline"] <- 1
  mask[mask=="Deficient Evaluation"] <- 0
  mask[mask=="Unacceptable"] <- 0
  mask[mask=="Unassigned"] <- 0
  temp <- TOXREFDB.ALL[mask==1,]
  print(dim(temp))
  
  mask1 <- vector(length=dim(temp)[1],mode="integer")
  mask1[] <- 0
  mask2 <- mask1
  
  mask1[is.element(temp[,"species"],species)] <- 1
  mask2[is.element(temp[,"study_type"],study_type)] <- 1
  mask <- mask1 * mask2
  
  temp <- temp[mask==1,]
  print(dim(temp))
  
  casrn.list <- sort(uniquify(temp[,"chemical_casrn"]))
  nchem <- length(unique(casrn.list))
  cat("Total CASRN: ",nchem,"\n")
  fname <- paste("../ToxRefDB/toxrefdb_acceptable_",species,"_",study_type,".xlsx",sep="")
  write.xlsx(temp,file=fname, row.names=F)
}
#--------------------------------------------------------------------------------------
#
# Build the study table
#
#--------------------------------------------------------------------------------------
build.study.table <- function(species,study_type) {
  cat("==========================================================================\n")
  cat("build.study.table\n")
  cat("==========================================================================\n")
  flush.console()
  filename <- paste("ToxRefDB/toxrefdb_acceptable_",species,"_",study_type,".xlsx",sep="")
  temp <- read.xlsx(filename)
  
  temp <- cbind(temp,temp[,1])
  names(temp)[1] <- "CODE"
  temp <- cbind(temp,temp[,1])
  names(temp)[dim(temp)[2]] <- "endpoint_raw"
  temp <- cbind(temp,temp[,1])
  names(temp)[dim(temp)[2]] <- "endpoint_final"
  temp <- cbind(temp,temp[,1])
  names(temp)[dim(temp)[2]] <- "useme"
  temp[,"endpoint_raw"] <- ""
  temp[,"endpoint_final"] <- ""
  temp[,"useme"] <- numeric(0)
  
  code.list <- paste("C",temp[,"chemical_casrn"],sep="")
  code.list <- str_replace_all(code.list,"-","")
  temp[,"CODE"] <- code.list
  all.data <- temp
  
  filename <- ENDPOINT.FILE
  aggregates <- read.xlsx(filename)
  rosetta <- aggregates[,c("endpoint_raw","endpoint_final","useme")]
  filename <- ENDPOINT.MAP.FILE
  replacements <- read.xlsx(filename)
  for(i in 1:dim(rosetta)[1]) {
    endpoint <- rosetta[i,"endpoint_final"]
    if(is.element(endpoint,replacements[,"endpoint_final"])) {
      endpoint.replace <- replacements[is.element(replacements[,"endpoint_final"],endpoint),"replacement"]
      rosetta[i,"endpoint_final"] <- endpoint.replace
    }
  }
  
  nrow <- dim(temp)[1]
  for(i in 1:nrow) {
    effect_type	    	<- all.data[i,"effect_type"]
    study_type	    	<- all.data[i,"study_type"]
    effect_target		<- all.data[i,"effect_target"]
    effect_desc			<- all.data[i,"effect_desc"]
    direction			<- all.data[i,"direction"]
    target_site			<- all.data[i,"target_site"]
    focal_diffuse		<- all.data[i,"focal_diffuse"]
    effect_category		<- all.data[i,"effect_category"]
    endpoint_category	<- all.data[i,"endpoint_category"]
    endpoint_type		<- all.data[i,"endpoint_type"]
    endpoint_system		<- all.data[i,"endpoint_system"]
    endpoint_target		<- all.data[i,"endpoint_target"]
    endpoint_lifestage	<- all.data[i,"endpoint_lifestage"]
    
    if(is.na(effect_type)) effect_type <- ""
    if(is.na(effect_target)) effect_target <- ""		
    if(is.na(effect_desc)) effect_desc <- ""
    if(is.na(direction)) direction <- ""
    if(is.na(target_site)) target_site <- ""
    if(is.na(focal_diffuse)) focal_diffuse <- ""		
    if(is.na(effect_category)) effect_category <- ""		
    if(is.na(endpoint_category)) endpoint_category <- ""
    if(is.na(endpoint_type)) endpoint_type <- ""		
    if(is.na(endpoint_system)) endpoint_system <- ""		
    if(is.na(endpoint_target)) endpoint_target <- ""		
    if(is.na(endpoint_lifestage)) endpoint_lifestage <- ""
    effect_target <- str_replace_all(effect_target," ","")
    endpoint.raw <- paste(		
      effect_type,"_",	    	
      study_type,"_",	
      effect_target,"_",		
      effect_desc,"_",			
      direction,"_",			
      target_site,"_",			
      focal_diffuse,"_",		
      effect_category,"_",		
      endpoint_category,"_",
      endpoint_type,"_",		
      endpoint_system,"_",		
      endpoint_target,"_",		
      endpoint_lifestage,sep="")
    
    all.data[i,"endpoint_raw"] <- endpoint.raw
    
    temp <- rosetta[is.element(rosetta[,"endpoint_raw"],endpoint.raw),]
    if(dim(temp)[1]>0) {
      endpoint.final <- 
        all.data[i,"endpoint_final"] <- temp[1,"endpoint_final"]
      all.data[i,"useme"] <- temp[1,"useme"]
    }
  }
  mask <- all.data[,"useme"]
  mask[is.na(mask)] <- 0
  all.data <- all.data[mask==1,]	
  outfile <- paste("ToxRefDB/all_data_",species,"_",study_type,".xlsx",sep="")
  write.xlsx(all.data,file=outfile, row.names=F)
}
#--------------------------------------------------------------------------------------
#
# read in the data
#
#--------------------------------------------------------------------------------------
prep.lel.mats <- function(species,study_type) {
  cat("==========================================================================\n")
  cat("prep.lel.mats\n")
  cat("==========================================================================\n")
  flush.console()
  filename <- paste("ToxRefDB/all_data_",species,"_",study_type,".xlsx",sep="")
  all.data <- read.xlsx(filename)
  code.list <- sort(unique(all.data[,"CODE"]))
  nchem <- length(code.list)
  
  name.list <- c("CODE","CASRN","Name","DSSTox_GSID","ldt","hdt")
  chem.data <- as.data.frame(matrix(nrow=nchem,ncol=length(name.list)),stringsAsFactors=F)
  names(chem.data) <- name.list
  rownames(chem.data) <- code.list
  chem.data[,"CODE"] <- code.list
  for(i in 1:nchem) {
    code <- code.list[i]
    temp <- all.data[is.element(all.data[,"CODE"],code),]
    chem.data[i,"Name"] <- temp[1,"chemical_name"]
    chem.data[i,"CASRN"] <- temp[1,"chemical_casrn"]
    chem.data[i,"DSSTox_GSID"] <- temp[1,"chemical_id"]
    chem.data[i,"ldt"] <- min(temp[,"ldt"])
    chem.data[i,"hdt"] <- max(temp[,"hdt"])
  }
  outfile <- paste("ToxRefDB/chemicals_",species,"_",study_type,".xlsx",sep="")
  write.xlsx(chem.data,file=outfile, row.names=F)
  
  endpoint.list <- sort(unique(all.data[,"endpoint_final"]))
  nendpoint <- length(endpoint.list)
  lel.mat <- as.data.frame(matrix(nrow=nchem,ncol=nendpoint),stringsAsFactors=F)
  names(lel.mat) <- endpoint.list
  rownames(lel.mat) <- code.list
  lel.mat[] <- NA
  for(i in 1:dim(all.data)[1]) {
    code <- all.data[i,"CODE"]
    endpoint <- all.data[i,"endpoint_final"]
    lel <- all.data[i,"dose"]
    lel.mat[code,endpoint] <- lel
  }
  hit.mat <- lel.mat
  hit.mat[is.na(lel.mat)] <- 0
  hit.mat[!is.na(lel.mat)] <- 1
  lel.mat <- cbind(chem.data,lel.mat)
  hit.mat <- cbind(chem.data,hit.mat)
  
  outfile <- paste("ToxRefDB/lel_mat_",species,"_",study_type,".xlsx",sep="")
  write.xlsx(lel.mat,file=outfile, row.names=F)
  outfile <- paste("ToxRefDB/hit_mat_",species,"_",study_type,".xlsx",sep="")
  write.xlsx(hit.mat,file=outfile, row.names=F)
}
#--------------------------------------------------------------------------------------
#
# calculate correlations between all endpoints 
#
#--------------------------------------------------------------------------------------
generate.lel.correlations <- function(species,study_type) {
  cat("==========================================================================\n")
  cat("generate.lel.correlations\n")
  cat("==========================================================================\n")
  outfile <- paste("output/lel_correlations_",species,"_",study_type,".txt",sep="")
  txt <- TxT(1,2,3,4)
  cat(paste("endpoint1\tendpoint2\t",txt$title,"\n",sep=""),file=outfile,append=F)
  
  infile <- paste("ToxRefDB/hit_mat_",species,"_",study_type,".xlsx",sep="")
  hit.mat <- read.xlsx(infile)
  rownames(hit.mat) <- hit.mat[,"CODE"]
  hit.mat <- hit.mat[,7:dim(hit.mat)[2]]
  endpoint.list <- names(hit.mat)
  nendpoint <- length(endpoint.list)
  
  for(i in 1:(nendpoint-1)) {
    e1 <- endpoint.list[i]
    for(j in (i+1):nendpoint) {
      e2 <- endpoint.list[j]
      b1 <- hit.mat[,e1]
      b2 <- hit.mat[,e2]
      a <- sum(b1*b2)
      if(a>=2) {
        b <- sum(b1*(1-b2))
        c <- sum((1-b1)*b2)
        d <- sum((1-b1)*(1-b2))
        txt <- TxT(a,b,c,d)
        b1.both <- b1[b1*b2==1]
        b2.both <- b2[b1*b2==1]
        if(txt$odds.ratio>2 && txt$p.value<0.05) {
          s <- paste(e1,"\t",e2,"\t",txt$sval,"\n",sep="")
          cat(s,file=outfile,append=T)
          
          if(a>=10) {
            cat("================================\n")
            cat(e1," - ",e2,"\n")
            cat("Examples: ",a,"\n")
            cat("OR: ",format(txt$odds.ratio,digits=2),"\n")
            cat("p.value: ",format(txt$p.value,digits=2),"\n")
            flush.console()
          }
        }
      }
      if(a>=2) {
        b <- sum((1-b1)*b2)
        c <- sum(b1*(1-b2))
        d <- sum((1-b1)*(1-b2))
        txt <- TxT(a,b,c,d)
        b1.both <- b1[b1*b2==1]
        b2.both <- b2[b1*b2==1]
        if(txt$odds.ratio>2 && txt$p.value<0.05) {
          s <- paste(e2,"\t",e1,"\t",txt$sval,"\n",sep="")
          cat(s,file=outfile,append=T)
          
          if(a>=10) {
            cat("================================\n")
            cat(e1," - ",e2,"\n")
            cat("Examples: ",a,"\n")
            cat("OR: ",format(txt$odds.ratio,digits=2),"\n")
            cat("p.value: ",format(txt$p.value,digits=2),"\n")
            flush.console()
          }
        }
      }
    }
  }
}
#--------------------------------------------------------------------------------------
#
# creat the syndrome lel matrices
#
#--------------------------------------------------------------------------------------
syndrome.lel <- function(species,study_type,do.print=F) {
  cat("==========================================================================\n")
  cat("syndrome.lel\n")
  cat("==========================================================================\n")
  flush.console()
  infile <- paste("syndromes/syndromes_",species,"_",study_type,".xlsx",sep="")
  syndromes <- read.xlsx(infile)
  syndromes <- syndromes[syndromes[,"useme"]==1,]
  
  slist <- c(syndromes[,"syndrome1"],syndromes[,"syndrome2"],syndromes[,"syndrome3"],syndromes[,"syndrome4"],syndromes[,"syndrome5"])
  slist <- sort(unique(slist))
  nsyndrome <- length(slist)
  
  infile <- paste("ToxRefDB/lel_mat_",species,"_",study_type,".xlsx",sep="")
  lel.mat <- read.xlsx(infile)
  rownames(lel.mat) <- lel.mat[,"CODE"]
  chem.mat <- lel.mat[,1:6]  
  lel.mat <- lel.mat[,7:dim(lel.mat)[2]]
  nchem <- dim(chem.mat)[1]
  code.list <- chem.mat[,"CODE"]
  
  syn.hit <- matrix(nrow=nchem,ncol=nsyndrome)
  rownames(syn.hit) <- code.list
  colnames(syn.hit) <- slist
  syn.hit[] <- NA
  syn.max <- syn.hit
  syn.min <- syn.hit
  syn.pos <- syn.hit
  syn.tot <- syn.hit
  syn.hit[] <- 0
  
  for(j in 1:nchem) {
    code <- code.list[j]
    for(i in 1:nsyndrome) {
      syndrome <- slist[i]
      if(do.print) cat("[",syndrome,"]\n",sep="");flush.console()
      effect.list <- sort(unique(
        c(syndromes[is.element(syndromes[,"syndrome1"],syndrome),"endpoint"],
          syndromes[is.element(syndromes[,"syndrome2"],syndrome),"endpoint"],
          syndromes[is.element(syndromes[,"syndrome3"],syndrome),"endpoint"],
          syndromes[is.element(syndromes[,"syndrome4"],syndrome),"endpoint"],
          syndromes[is.element(syndromes[,"syndrome5"],syndrome),"endpoint"]
        )
      ))
      if(do.print) print(effect.list);flush.console()
      ne <- length(effect.list)
      nmin <- 1#syndromes.min[syndrome,"min_effect"]
      if(do.print) cat(ne,":",nmin,"\n");flush.console()
      lel <- lel.mat[j,effect.list]
      syn.tot[j,i] <- length(lel)
      lel <- lel[!is.na(lel)]
      if(length(lel)>=nmin) {
        syn.hit[j,i] <- 1
        syn.min[j,i] <- min(lel)
        syn.max[j,i] <- max(lel)
        syn.pos[j,i] <- length(lel)
      }
    }
  }
  
  syn.hit <- cbind(chem.mat,as.data.frame(syn.hit,stringsAsFactors=F))
  syn.min <- cbind(chem.mat,as.data.frame(syn.min,stringsAsFactors=F))
  syn.max <- cbind(chem.mat,as.data.frame(syn.max,stringsAsFactors=F))
  syn.pos <- cbind(chem.mat,as.data.frame(syn.pos,stringsAsFactors=F))
  syn.tot <- cbind(chem.mat,as.data.frame(syn.tot,stringsAsFactors=F))
  
  outfile <- paste("output/syndrome_lelmat_",species,"_",study_type,".xlsx",sep="")
  write.xlsx(syn.hit,file=outfile, row.names=F)
  outfile <- paste("output/syndrome_lelmat_min_",species,"_",study_type,".xlsx",sep="")
  write.xlsx(syn.min,file=outfile, row.names=F)
  outfile <- paste("output/syndrome_lelmat_max_",species,"_",study_type,".xlsx",sep="")
  write.xlsx(syn.max,file=outfile, row.names=F)
  outfile <- paste("output/syndrome_lelmat_pos_",species,"_",study_type,".xlsx",sep="")
  write.xlsx(syn.pos,file=outfile, row.names=F)
  outfile <- paste("output/syndrome_lelmat_tot_",species,"_",study_type,".xlsx",sep="")
  write.xlsx(syn.tot,file=outfile, row.names=F)
}
#--------------------------------------------------------------------------------------
#
# test to see if the syndrome and syndroem_min files are matching 
#
#--------------------------------------------------------------------------------------
syndrome.test <- function(species,study_type,do.print=F) {
  cat("==========================================================================\n")
  cat("syndrome.test\n")
  cat("==========================================================================\n")
  flush.console()
  infile <- paste("syndromes/syndromes_",species,"_",study_type,".xlsx",sep="")
  syndromes <- read.xlsx(infile)
  syndromes <- syndromes[syndromes[,"useme"]==1,]
  
  infile <- paste("syndromes/syndromes_min_",species,"_",study_type,".xlsx",sep="")
  syndromes.min <- read.xlsx(infile)
  rownames(syndromes.min) <- syndromes.min[,"syndrome"]
  
  slist1 <- sort(unique(syndromes[,"syndrome1"]))
  slist2 <- sort(unique(syndromes[,"syndrome2"])); slist2 <- slist2[!is.na(slist2)]
  slist3 <- sort(unique(syndromes[,"syndrome3"])); slist3 <- slist3[!is.na(slist3)]
  slist4 <- sort(unique(syndromes[,"syndrome4"])); slist4 <- slist4[!is.na(slist4)]
  slist5 <- sort(unique(syndromes[,"syndrome5"])); slist5 <- slist5[!is.na(slist5)]
  slist <- c(slist1,slist2,slist3,slist4,slist5)
  
  slist.min <- syndromes.min[,"syndrome"]
  missing <- slist[!is.element(slist,slist.min)]
  if(length(missing)>0) {
    cat("Missing syndromes in syndromes.min:\n",missing,"\n")
    exit()
  }
  else cat("All syndromes match\n")
}
#--------------------------------------------------------------------------------------
#
# Generate the network file for the lel correlations
#
#--------------------------------------------------------------------------------------
syndrome.network <- function(study_type="CHR",syndrome="Hematopoiesis",species.list=c("rat","mouse","dog"),cutoff=0.5) {
  infile <- ENDPOINT.FILE
  syndromes <- read.xlsx(infile)
  syndromes <- syndromes[syndromes[,"useme"]==1,]
  filename <- ENDPOINT.MAP.FILE
  replacements <- read.xlsx(filename)
  for(i in 1:dim(syndromes)[1]) {
    endpoint <- syndromes[i,"endpoint_final"]
    if(is.element(endpoint,replacements[,"endpoint_final"])) {
      endpoint.replace <- replacements[is.element(replacements[,"endpoint_final"],endpoint),"replacement"]
      syndromes[i,"endpoint_final"] <- endpoint.replace
    }
  }	
  
  syndromes <- syndromes[is.element(syndromes[,"study_type"],study_type),]
  slist <- c(syndromes[,"syndrome1"],syndromes[,"syndrome2"],syndromes[,"syndrome3"],syndromes[,"syndrome4"],syndromes[,"syndrome5"])
  slist <- sort(unique(slist))
  nsyndrome <- length(slist)
  
  elist1 <- syndromes[is.element(syndromes[,"syndrome1"],syndrome),"endpoint_final"]
  elist2 <- syndromes[is.element(syndromes[,"syndrome2"],syndrome),"endpoint_final"]
  elist3 <- syndromes[is.element(syndromes[,"syndrome3"],syndrome),"endpoint_final"]
  elist4 <- syndromes[is.element(syndromes[,"syndrome4"],syndrome),"endpoint_final"]
  elist5 <- syndromes[is.element(syndromes[,"syndrome5"],syndrome),"endpoint_final"]
  if(length(elist2)>0) elist1 <- c(elist1,elist2)
  if(length(elist3)>0) elist1 <- c(elist1,elist3)
  if(length(elist4)>0) elist1 <- c(elist1,elist4)
  if(length(elist5)>0) elist1 <- c(elist1,elist5)
  effect.list <- sort(unique(elist1))
  
  name.list <- c("endpoint1","edge_type","endpoint2","TP","PPV")
  
  nspecies <- length(species.list)
  all.data <- NULL
  for(i in 1:nspecies) {
    species <- species.list[i]
    infile <- paste("output/lel_correlations_",species,"_",study_type,".xlsx",sep="")
    effect_coor <- read.xlsx(infile)
    nline <- dim(effect_coor)[1]
    mask <- vector(length=nline,mode="integer")
    mask[] <- 1
    mask[!is.element(effect_coor[,"endpoint1"],effect.list)] <- 0
    mask[!is.element(effect_coor[,"endpoint2"],effect.list)] <- 0
    effect_coor <- effect_coor[mask==1,]
    effect_coor <- effect_coor[effect_coor[,"TP"]>=5,]
    effect_coor <- effect_coor[effect_coor[,"PPV"]>=cutoff,]
    temp <- as.data.frame(matrix(nrow=dim(effect_coor)[1],ncol=length(name.list)))
    names(temp) <- name.list
    temp[,"endpoint1"] <- effect_coor[,"endpoint1"]
    temp[,"endpoint2"] <- effect_coor[,"endpoint2"]
    temp[,"TP"] <- effect_coor[,"TP"]
    temp[,"PPV"] <- effect_coor[,"PPV"]
    temp[,"edge_type"] <- paste("implies_",species,sep="")
    all.data <- rbind(all.data,temp)
  }
  
  outfile <- paste("output/ppv_network_",study_type,"_",syndrome,".xlsx",sep="")
  write.xlsx(all.data,file=outfile, row.names=F)
  browser()		
}
#--------------------------------------------------------------------------------------
#
# Generate the full data file for a syndrome
#
#--------------------------------------------------------------------------------------
syndrome.data <- function(study_type="CHR",syndrome="anemia",endpoint.file="ToxRefDB/anemia_candidate_endpoints.xlsx",species.list=c("rat","mouse","dog")) {
  effect.data <- read.xlsx(endpoint.file)
  effect.list <- effect.data[,"endpoint"]
  effect.list <- c(effect.list,"Mortality_Incr","Mortality_Offspring_Incr","BodyWeight_Decr")
  nspecies <- length(species.list)
  all.data <- NULL
  #	for(i in 1:nspecies) {
  #		species <- species.list[i]
  #		infile <- paste("ToxRefDB/all_data_",species,"_",study_type,".xlsx",sep="")
  #		temp <- read.xlsx(infile)
  #		temp <- temp[is.element(temp[,"endpoint_final"],effect.list),]
  #		all.data <- rbind(all.data,temp)
  #	}
  study_type <- "CHR"
  species <- "rat"
  infile <- paste("ToxRefDB/all_data_",species,"_",study_type,".xlsx",sep="")
  temp <- read.xlsx(infile)
  temp <- temp[is.element(temp[,"endpoint_final"],effect.list),]
  all.data <- rbind(all.data,temp)
  species <- "mouse"
  infile <- paste("ToxRefDB/all_data_",species,"_",study_type,".xlsx",sep="")
  temp <- read.xlsx(infile)
  temp <- temp[is.element(temp[,"endpoint_final"],effect.list),]
  all.data <- rbind(all.data,temp)
  species <- "dog"
  infile <- paste("ToxRefDB/all_data_",species,"_",study_type,".xlsx",sep="")
  temp <- read.xlsx(infile)
  temp <- temp[is.element(temp[,"endpoint_final"],effect.list),]
  all.data <- rbind(all.data,temp)
  study_type <- "SUB"
  species <- "rat"
  infile <- paste("ToxRefDB/all_data_",species,"_",study_type,".xlsx",sep="")
  temp <- read.xlsx(infile)
  temp <- temp[is.element(temp[,"endpoint_final"],effect.list),]
  all.data <- rbind(all.data,temp)
  
  outfile <- paste("output/syndrome_chem_data_",study_type,"_",syndrome,".xlsx",sep="")
  write.xlsx(all.data,file=outfile, row.names=F)
  browser()		
}
#--------------------------------------------------------------------------------------
#
# data for the PFAA test
#
#--------------------------------------------------------------------------------------
pfaa.data <- function(study_type="CHR",syndrome="anemia",endpoint.file="ToxRefDB/anemia_candidate_endpoints.xlsx",species.list=c("rat","mouse","dog")) {
  
  casrn.list <- c("375-85-9","375-95-1","335-76-2","2058-94-8","307-55-1","335-67-1","307-24-4")
  all.data <- NULL
  
  species <- "rat"; study_type <- "CHR"
  infile <- paste("ToxRefDB/all_data_",species,"_",study_type,".xlsx",sep="")
  temp <- read.xlsx(infile)
  temp <- temp[is.element(temp[,"chemical_casrn"],casrn.list),]
  cat(species,":",study_type,":",dim(temp)[1],"\n"); flush.console()
  all.data <- rbind(all.data,temp)
  
  species <- "mouse"; study_type <- "CHR"
  infile <- paste("ToxRefDB/all_data_",species,"_",study_type,".xlsx",sep="")
  temp <- read.xlsx(infile)
  temp <- temp[is.element(temp[,"chemical_casrn"],casrn.list),]
  cat(species,":",study_type,":",dim(temp)[1],"\n"); flush.console()
  all.data <- rbind(all.data,temp)
  
  species <- "dog"; study_type <- "CHR"
  infile <- paste("ToxRefDB/all_data_",species,"_",study_type,".xlsx",sep="")
  temp <- read.xlsx(infile)
  temp <- temp[is.element(temp[,"chemical_casrn"],casrn.list),]
  cat(species,":",study_type,":",dim(temp)[1],"\n"); flush.console()
  all.data <- rbind(all.data,temp)
  
  species <- "rat"; study_type <- "SUB"
  infile <- paste("ToxRefDB/all_data_",species,"_",study_type,".xlsx",sep="")
  temp <- read.xlsx(infile)
  temp <- temp[is.element(temp[,"chemical_casrn"],casrn.list),]
  cat(species,":",study_type,":",dim(temp)[1],"\n"); flush.console()
  all.data <- rbind(all.data,temp)
  
  species <- "rat"; study_type <- "DEV"
  infile <- paste("ToxRefDB/all_data_",species,"_",study_type,".xlsx",sep="")
  temp <- read.xlsx(infile)
  temp <- temp[is.element(temp[,"chemical_casrn"],casrn.list),]
  cat(species,":",study_type,":",dim(temp)[1],"\n"); flush.console()
  all.data <- rbind(all.data,temp)
  
  species <- "rabbit"; study_type <- "DEV"
  infile <- paste("ToxRefDB/all_data_",species,"_",study_type,".xlsx",sep="")
  temp <- read.xlsx(infile)
  temp <- temp[is.element(temp[,"chemical_casrn"],casrn.list),]
  cat(species,":",study_type,":",dim(temp)[1],"\n"); flush.console()
  all.data <- rbind(all.data,temp)
  
  outfile <- paste("output/PFAA_data.xlsx",sep="")
  write.xlsx(all.data,file=outfile, row.names=F)
  browser()		
}
#--------------------------------------------------------------------------------------
#
# Generate the matrix of chemicals by endpoints
#
#--------------------------------------------------------------------------------------
syndrome.matrix <- function(syndrome="anemia",endpoint.file="ToxRefDB/anemia_candidate_endpoints.xlsx",species.list=c("rat","mouse","dog"),do.prep=T) {
  if(do.prep) {
    effect.data <- read.xlsx(endpoint.file)
    effect.list <- effect.data[,"endpoint"]
    effect.data <<- effect.data
    effect.list <<- effect.list
    infile <- paste("output/syndrome_chem_data_",syndrome,".xlsx",sep="")
    all.data <- read.xlsx(infile)
    all.data <<- all.data
  }
  name.list <- c("CODE","Name","rat.CHR","mouse.CHR","dog.CHR","rat.SUB","anemia_class",effect.list)
  code.list <- sort(unique(all.data[,"CODE"]))
  nchem <- length(code.list)
  cat("nchem: ",nchem,"\n")
  flush.console()
  
  mat <- as.data.frame(matrix(nrow=nchem,ncol=length(name.list)))
  mat[] <- ""
  names(mat) <- name.list
  mat[,"CODE"] <- code.list
  rownames(mat) <- code.list
  for(i in 1:nchem) {
    code <- code.list[i]
    temp <- all.data[is.element(all.data[,"CODE"],code),]
    mat[i,"Name"] <- temp[1,"chemical_name"]
    cat(mat[i,"Name"] ,"\n"); flush.console()
    nrow <- dim(temp)[1]
    for(j in 1:nrow) {
      species.use <- temp[j,"species"]
      study.type.use <- temp[j,"study_type"]
      if(species.use=="rat" && study.type.use=="CHR") mat[i,"rat.CHR"] <- "Y"
      if(species.use=="mouse" && study.type.use=="CHR") mat[i,"mouse.CHR"] <- "Y"
      if(species.use=="dog" && study.type.use=="CHR") mat[i,"dog.CHR"] <- "Y"
      if(species.use=="rat" && study.type.use=="SUB") mat[i,"rat.SUB"] <- "Y"
    }
    mort.rat <- 1000000
    mort.rat.sub <- 1000000
    mort.mouse <- 1000000
    mort.dog <- 1000000
    
    bw.rat <- 1000000
    bw.rat.sub <- 1000000
    bw.mouse <- 1000000
    bw.dog <- 1000000
    if(mat[i,"rat.CHR"]=="Y") {
      temp2 <- temp[is.element(temp[,"species"],"rat"),]
      temp2 <- temp2[is.element(temp2[,"study_type"],"CHR"),]
      eflist <- sort(unique(temp2[,"endpoint_final"]))
      if(is.element("Mortality_Incr",eflist)) mort.rat <- min(temp2[is.element(temp2[,"endpoint_final"],"Mortality_Incr"),"dose"])
      if(is.element("BodyWeight_Decr",eflist)) bw.rat <- min(temp2[is.element(temp2[,"endpoint_final"],"BodyWeight_Decr"),"dose"])
      if(mort.rat==Inf || is.nan(mort.rat) || bw.rat==Inf || is.nan(bw.rat) ) browser()
    }
    if(mat[i,"rat.SUB"]=="Y") {
      temp2 <- temp[is.element(temp[,"species"],"rat"),]
      temp2 <- temp2[is.element(temp2[,"study_type"],"SUB"),]
      eflist <- sort(unique(temp2[,"endpoint_final"]))
      if(is.element("Mortality_Incr",eflist)) mort.rat.sub <- min(temp2[is.element(temp2[,"endpoint_final"],"Mortality_Incr"),"dose"])
      if(is.element("BodyWeight_Decr",eflist)) bw.rat.sub <- min(temp2[is.element(temp2[,"endpoint_final"],"BodyWeight_Decr"),"dose"])
      if(mort.rat.sub==Inf || is.nan(mort.rat.sub) || bw.rat.sub==Inf || is.nan(bw.rat.sub) ) browser()
    }
    if(mat[i,"mouse.CHR"]=="Y") {
      temp2 <- temp[is.element(temp[,"species"],"mouse"),]
      temp2 <- temp2[is.element(temp2[,"study_type"],"CHR"),]
      eflist <- sort(unique(temp2[,"endpoint_final"]))
      if(is.element("Mortality_Incr",eflist)) mort.mouse <- min(temp2[is.element(temp2[,"endpoint_final"],"Mortality_Incr"),"dose"])
      if(is.element("BodyWeight_Decr",eflist)) bw.mouse <- min(temp2[is.element(temp2[,"endpoint_final"],"BodyWeight_Decr"),"dose"])
      if(mort.mouse==Inf || is.nan(mort.mouse) || bw.mouse==Inf || is.nan(bw.mouse) ) browser()
    }
    if(mat[i,"dog.CHR"]=="Y") {
      temp2 <- temp[is.element(temp[,"species"],"dog"),]
      temp2 <- temp2[is.element(temp2[,"study_type"],"CHR"),]
      eflist <- sort(unique(temp2[,"endpoint_final"]))
      if(is.element("Mortality_Incr",eflist)) mort.dog <- min(temp2[is.element(temp2[,"endpoint_final"],"Mortality_Incr"),"dose"])
      if(is.element("BodyWeight_Decr",eflist)) bw.dog <- min(temp2[is.element(temp2[,"endpoint_final"],"BodyWeight_Decr"),"dose"])
      if(mort.dog==Inf || is.nan(mort.dog) || bw.dog==Inf || is.nan(bw.dog) ) browser()
    }
    n <- dim(temp)[1]
    if(n==1) {
      temp <- as.matrix(temp)
    }
    for(i in 1:n) {
      endpoint <- temp[i,"endpoint_final"]
      species <- temp[i,"species"]
      study.type <- temp[i,"study_type"]
      sl <- ""
      mort.dose <- 1000000
      if(species=="rat" && study.type=="CHR")   {sl <- "R";mort.dose <- mort.rat; bw.dose <- bw.rat}
      if(species=="rat" && study.type=="SUB")   {sl <- "S";mort.dose <- mort.rat.sub; bw.dose <- bw.rat.sub}
      if(species=="mouse" && study.type=="CHR") {sl <- "M";mort.dose <- mort.mouse; bw.dose <- bw.mouse}
      if(species=="dog" && study.type=="CHR")   {sl <- "D";mort.dose <- mort.dog; bw.dose <- bw.dog}
      
      if(is.element(endpoint,effect.list)) {
        dose <- min(temp[is.element(temp[,"endpoint_final"],endpoint),"dose"])
        if(dose>=bw.dose) sl <- tolower(sl)
        if(dose>=mort.dose) sl <- paste(tolower(sl),"x",sep="")
        init <- mat[code,endpoint]
        if(length(grep(sl,init))==0) init <- paste(init,sl,sep="")
        mat[code,endpoint] <- init
      }
    }
  }
  outfile <- paste("output/matrix_chem_data_anemia.xlsx",sep="")
  write.xlsx(mat,file=outfile, row.names=F)
  browser()		
}
#--------------------------------------------------------------------------------------
#
# Generate the matrix of chemicals by endpoints
#
#--------------------------------------------------------------------------------------
syndrome.matrix.numeric <- function(syndrome="anemia",endpoint.file="ToxRefDB/anemia_candidate_endpoints.xlsx",species.list=c("rat","mouse","dog"),do.prep=T) {
  if(do.prep) {
    effect.data <- read.xlsx(endpoint.file)
    effect.list <- effect.data[,"endpoint"]
    effect.data <<- effect.data
    effect.list <<- effect.list
    infile <- paste("output/syndrome_chem_data_",syndrome,".xlsx",sep="")
    all.data <- read.xlsx(infile)
    all.data <<- all.data
  }
  name.list <- c("CODE","Name","rat.CHR","mouse.CHR","dog.CHR","rat.SUB","anemia_class",effect.list)
  code.list <- sort(unique(all.data[,"CODE"]))
  nchem <- length(code.list)
  cat("nchem: ",nchem,"\n")
  flush.console()
  
  mat <- as.data.frame(matrix(nrow=nchem,ncol=length(name.list)))
  mat[,1:7] <- ""
  names(mat) <- name.list
  mat[,"CODE"] <- code.list
  rownames(mat) <- code.list
  for(i in 1:nchem) {
    code <- code.list[i]
    temp <- all.data[is.element(all.data[,"CODE"],code),]
    mat[i,"Name"] <- temp[1,"chemical_name"]
    cat(mat[i,"Name"] ,"\n"); flush.console()
    nrow <- dim(temp)[1]
    for(j in 1:nrow) {
      species.use <- temp[j,"species"]
      study.type.use <- temp[j,"study_type"]
      if(species.use=="rat" && study.type.use=="CHR") mat[i,"rat.CHR"] <- "Y"
      if(species.use=="mouse" && study.type.use=="CHR") mat[i,"mouse.CHR"] <- "Y"
      if(species.use=="dog" && study.type.use=="CHR") mat[i,"dog.CHR"] <- "Y"
      if(species.use=="rat" && study.type.use=="SUB") mat[i,"rat.SUB"] <- "Y"
    }
    mort.rat <- 1000000
    mort.rat.sub <- 1000000
    mort.mouse <- 1000000
    mort.dog <- 1000000
    
    bw.rat <- 1000000
    bw.rat.sub <- 1000000
    bw.mouse <- 1000000
    bw.dog <- 1000000
    denom <- 0
    if(mat[i,"rat.CHR"]=="Y") {
      temp2 <- temp[is.element(temp[,"species"],"rat"),]
      temp2 <- temp2[is.element(temp2[,"study_type"],"CHR"),]
      eflist <- sort(unique(temp2[,"endpoint_final"]))
      if(is.element("Mortality_Incr",eflist)) mort.rat <- min(temp2[is.element(temp2[,"endpoint_final"],"Mortality_Incr"),"dose"])
      if(is.element("BodyWeight_Decr",eflist)) bw.rat <- min(temp2[is.element(temp2[,"endpoint_final"],"BodyWeight_Decr"),"dose"])
      if(mort.rat==Inf || is.nan(mort.rat) || bw.rat==Inf || is.nan(bw.rat) ) browser()
      denom <- denom+1
    }
    if(mat[i,"rat.SUB"]=="Y") {
      temp2 <- temp[is.element(temp[,"species"],"rat"),]
      temp2 <- temp2[is.element(temp2[,"study_type"],"SUB"),]
      eflist <- sort(unique(temp2[,"endpoint_final"]))
      if(is.element("Mortality_Incr",eflist)) mort.rat.sub <- min(temp2[is.element(temp2[,"endpoint_final"],"Mortality_Incr"),"dose"])
      if(is.element("BodyWeight_Decr",eflist)) bw.rat.sub <- min(temp2[is.element(temp2[,"endpoint_final"],"BodyWeight_Decr"),"dose"])
      if(mort.rat.sub==Inf || is.nan(mort.rat.sub) || bw.rat.sub==Inf || is.nan(bw.rat.sub) ) browser()
      denom <- denom+1
    }
    if(mat[i,"mouse.CHR"]=="Y") {
      temp2 <- temp[is.element(temp[,"species"],"mouse"),]
      temp2 <- temp2[is.element(temp2[,"study_type"],"CHR"),]
      eflist <- sort(unique(temp2[,"endpoint_final"]))
      if(is.element("Mortality_Incr",eflist)) mort.mouse <- min(temp2[is.element(temp2[,"endpoint_final"],"Mortality_Incr"),"dose"])
      if(is.element("BodyWeight_Decr",eflist)) bw.mouse <- min(temp2[is.element(temp2[,"endpoint_final"],"BodyWeight_Decr"),"dose"])
      if(mort.mouse==Inf || is.nan(mort.mouse) || bw.mouse==Inf || is.nan(bw.mouse) ) browser()
      denom <- denom+1
    }
    if(mat[i,"dog.CHR"]=="Y") {
      temp2 <- temp[is.element(temp[,"species"],"dog"),]
      temp2 <- temp2[is.element(temp2[,"study_type"],"CHR"),]
      eflist <- sort(unique(temp2[,"endpoint_final"]))
      if(is.element("Mortality_Incr",eflist)) mort.dog <- min(temp2[is.element(temp2[,"endpoint_final"],"Mortality_Incr"),"dose"])
      if(is.element("BodyWeight_Decr",eflist)) bw.dog <- min(temp2[is.element(temp2[,"endpoint_final"],"BodyWeight_Decr"),"dose"])
      if(mort.dog==Inf || is.nan(mort.dog) || bw.dog==Inf || is.nan(bw.dog) ) browser()
      denom <- denom+1
    }
    for(j in 1:length(effect.list)) mat[code,effect.list[j]] <- as.numeric(0)
    n <- dim(temp)[1]
    if(n==1) {
      temp <- as.matrix(temp)
    }
    for(i in 1:n) {
      endpoint <- temp[i,"endpoint_final"]
      species <- temp[i,"species"]
      study.type <- temp[i,"study_type"]
      sl <- 0
      mort.dose <- 1000000
      if(species=="rat" && study.type=="CHR")   {sl <- 2;mort.dose <- mort.rat; bw.dose <- bw.rat}
      if(species=="rat" && study.type=="SUB")   {sl <- 2;mort.dose <- mort.rat.sub; bw.dose <- bw.rat.sub}
      if(species=="mouse" && study.type=="CHR") {sl <- 2;mort.dose <- mort.mouse; bw.dose <- bw.mouse}
      if(species=="dog" && study.type=="CHR")   {sl <- 2;mort.dose <- mort.dog; bw.dose <- bw.dog}
      
      if(is.element(endpoint,effect.list)) {
        dose <- min(temp[is.element(temp[,"endpoint_final"],endpoint),"dose"])
        if(dose>=bw.dose) sl <- 1
        if(dose>=mort.dose) sl <- 0
        
        init <- mat[code,endpoint]
        if(length(grep(sl,init))==0) init <- init+sl
        mat[code,endpoint] <- init
      }
    }
    for(j in 1:length(effect.list)) mat[code,effect.list[j]] <- mat[code,effect.list[j]] /(2*denom)
  }
  outfile <- paste("output/matrix_chem_data_numeric_anemia.xlsx",sep="")
  write.xlsx(mat,file=outfile, row.names=F)
  browser()		
}
#--------------------------------------------------------------------------------------
#
# do the heatmaps of the anemia matrix
#
#--------------------------------------------------------------------------------------
anemia.hm <- function(to.file=F) {
  cat("==========================================================================\n")
  cat("anemiahm\n")
  cat("==========================================================================\n")
  if(to.file) {
    file <- paste("plots/anemia_hm.pdf",sep="")
    pdf(file=file,width=8,height=10,pointsize=12,bg="white",paper="letter",pagecentre=T)
  }
  infile <- paste("output/anemia_endpoint_map.xlsx",sep="")
  map <- read.xlsx(infile)
  endpoint.list <- map[map[,2]==2,1]
  
  infile <- paste("output/matrix_chem_data_numeric_anemia 2015-07-23.xlsx",sep="")
  indata <- read.xlsx(infile)
  rownames(indata) <- indata[,"CODE"]
  name.list <- indata[,"Name"]
  cex.hm <- 0.8
  
  syndrome <- as.matrix(indata[,endpoint.list])
  rs <- rowSums(syndrome)
  cs <- colSums(syndrome)
  syndrome <- syndrome[rs>0.5,cs>=0.5]
  result <- heatmap(t(syndrome),margins=c(2,20),scale="none",main="Anemia",symm=F,
                    xlab="",ylab="",cexRow=0.8,cexCol=0.01,col=brewer.pal(9,"Reds"),
                    hclustfun=function(x) hclust(d=dist(x),method="ward.D"),keep.dendro=T,verbose=F,na.rm=F)
  
  if(to.file) dev.off()
  browser()
}
#--------------------------------------------------------------------------------------
#
# calculate first order associations betwene anemia classes and ToxCast assays
#
#--------------------------------------------------------------------------------------
anemia.assoc <- function() {
  cat("==========================================================================\n")
  cat("anemia.assoc\n")
  cat("==========================================================================\n")
  infile <- paste("output/matrix_chem_data_numeric_anemia 2015-07-23.xlsx",sep="")
  indata <- read.xlsx(infile)
  rownames(indata) <- indata[,"CODE"]
  code.list <- indata[,"CODE"]
  code.list <- code.list[is.element(code.list,CODE.LIST)]
  code.list <- sort(code.list)
  anemia <- indata[code.list,]
  mat.tested <- MAT.tested[code.list,]
  mat.z.norm <- MAT.Z.NORM[code.list,]
  mat.hitcall <- MAT.hitcall[code.list,]
  nchem <- dim(anemia)[1]
  txt <-TxT(1,2,3,4)
  s <- paste("anemia.class\tassay\t",txt$title,"\n",sep="")
  outfile <- "output/anemia_assoc.txt"
  cat(s,file=outfile,append=F)
  class.list <- sort(unique(anemia[,"anemia_class"]))
  class.list <- class.list[!is.element(class.list,c("Equivocal","Negative",""))]
  for(i in 1:length(class.list)) {
    aclass <- class.list[i]
    mask <- vector(mode="integer",length=nchem)
    mask[] <- 0
    mask[is.element(anemia[,"anemia_class"],"Negative")] <- 1
    mask[is.element(anemia[,"anemia_class"],aclass)] <- 2
    nassay <- dim(mat.hitcall)[2]
    for(j in 1:nassay) {
      assay <- names(mat.tested)[j]
      #print(assay)
      tested <- mat.tested[,j]
      mask2 <- mask*tested
      y <- mask2[mask2>0]
      y <- y-1
      x <- mat.hitcall[mask2>0,j]
      x <- mat.z.norm[mask2>0,j]
      x[is.na(x)] <- 0
      x[x<2] <- 0
      x[x>0] <- 1
      y <- y[!is.na(x)]
      x <- x[!is.na(x)]
      
      a <- sum(x*y)
      b <- sum(x*(1-y))
      c <- sum((1-x)*y)
      d <- sum((1-x)*(1-y))
      doit <- T
      if(a<0 || b<0 || c<0 || d<0) doit <- F
      if(doit) {
        txt <- TxT(a,b,c,d)
        s <- paste(aclass,"\t",assay,"\t",txt$sval,"\n",sep="")
        cat(s,file=outfile,append=T)
        cat(s)
        #if(substr(assay,1,5)=="Tox21") browser()
      }
    }
  }
}
#--------------------------------------------------------------------------------------
#
# Generate the matrix of chemicals by endpoints
#
#--------------------------------------------------------------------------------------
anemia.physchem <- function() {
  infile <- "output/matrix_chem_data_CHR_anemia 2015-03-26.xlsx"
  anemia <- read.xlsx(infile)
  rownames(anemia) <- anemia[,"CODE"]
  
  infile <- "physchem/toxcast_physchem_cellstress_DFT.xlsx"
  physchem <- read.xlsx(infile)
  rownames(physchem) <- physchem[,"CODE"]
  
  code.list <- anemia[,"CODE"]
  code.list <- code.list[is.element(code.list,physchem[,"CODE"])]
  
  code.list <- sort(code.list)
  anemia <- anemia[code.list,]
  physchem <- physchem[code.list,]
  all.data <- cbind(anemia[,1:6],physchem)
  
  outfile <- "output/anemia_physchem 2015-03-26.xlsx"
  write.xlsx(all.data,file=outfile, row.names=F)
  browser()		
}
#--------------------------------------------------------------------------------------
#
# Generate the matrix of chemicals by endpoints
#
#--------------------------------------------------------------------------------------
anemia.ttest <- function() {
  infile <- "output/anemia_physchem 2015-03-26.xlsx"
  anemia <- read.xlsx(infile)
  rownames(anemia) <- anemia[,"CODE"]
  pred.list <- names(anemia)[35:dim(anemia)[2]]
  npred <- length(pred.list)
  code.list <- anemia[,"CODE"]
  
  class.list <- sort(unique(anemia[,"anemia_class"]))
  class.list <- class.list[1:length(class.list)-1]
  nclass <- length(class.list)
  options(warn=2)
  outfile <- "output/anemia_physchem_stats.txt"
  s <- "aclass\tpred\tn.pos\tmean.pos\tsd.pos\tn.neg\tmean.neg\tsd.neg\tp.value\n"
  cat(s,file=outfile,append=F)
  for(j in 1:nclass) {
    aclass <- class.list[j]
    cat("\n============================\n")
    cat(aclass,"\n")
    cat("============================\n")
    
    code.list.pos <- code.list[is.element(anemia[,"anemia_class"],aclass)]
    code.list.neg <- code.list[is.element(anemia[,"anemia_class"],"No anemia")]
    if(length(code.list.pos)>2) {
      for(i in 1:npred) {
        pred <- pred.list[i]
        x.pos <- as.numeric(anemia[code.list.pos,pred])
        x.neg <- as.numeric(anemia[code.list.neg,pred])
        x.pos <- x.pos[!is.na(x.pos)]
        x.neg <- x.neg[!is.na(x.neg)]
        
        n.pos <- length(x.pos)
        mean.pos <- mean(x.pos)
        sd.pos <- sd(x.pos)
        
        n.neg <- length(x.neg)
        mean.neg <- mean(x.neg)
        sd.neg <- sd(x.neg)
        #cat(pred,n.pos,n.neg,"\n")
        #print(x.pos)
        #print(x.neg)
        
        #browser()
        ret <- t.test(x.pos,x.neg)
        if(!is.na(ret$p.value)) {
          #if(ret$p.value<0.05) {
          cat(pred,":",ret$p.value,"\n")
          s <- paste(aclass,pred,n.pos,format(mean.pos,digits=2),format(sd.pos,digits=2),n.neg,format(mean.neg,digits=2),format(sd.neg,digits=2),format(ret$p.value,digits=2),sep="\t")
          s <- paste(s,"\n",sep="")
          cat(s,file=outfile,append=T)
          #}
        }
      }
    }
  }
  
}
#--------------------------------------------------------------------------------------
#
# Generate the matrix of chemicals by endpoints
#
#--------------------------------------------------------------------------------------
anemia.toxcast <- function() {
  infile <- "output/matrix_chem_data_CHR_anemia 2015-03-26.xlsx"
  anemia <- read.xlsx(infile)
  rownames(anemia) <- anemia[,"CODE"]
  code.list <- anemia[,"CODE"]
  code.list <- code.list[is.element(code.list,rownames(MAT.Z))]
  anemia <- anemia[code.list,]
  
  infile <- "output/anemia_physchem 2015-03-26.xlsx"
  toxcast_physchem <- read.xlsx(infile)
  rownames(toxcast_physchem) <- toxcast_physchem[,"CODE"]
  tp.list <- c("cytotoxicity_BLA","cytotoxicity_SRB","proliferation_decrease","ER_stress","apoptosis_up","microtubule_up","mitochondrial_disruption_up","oxidative_stress_up","cell_cycle_up","heat_shock","hypoxia","estrogen_receptor","androgen_receptor","ppar_signaling","ion_channel","CYP450","GPCR","AHR","PTPN","AChE","CCL2")
  ntp <- length(tp.list)
  
  class.list <- sort(unique(anemia[,"anemia_class"]))
  class.list <- class.list[1:length(class.list)-1]
  nclass <- length(class.list)
  
  outfile <- "output/anemia_physchem_toxcast_stats.txt"
  txt <- TxT(1,2,3,4)
  s <- paste("aclass\tassay\tbioprocess\tn.pos\tn.neg\t",txt$title,"\n",sep="")
  cat(s,file=outfile,append=F)
  
  nassay <- dim(MAT.Z)[2]
  for(j in 0:nclass) {
    if(j==0) {
      aclass <- "All"
      code.list.neg <- code.list[is.element(anemia[,"anemia_class"],"No anemia")]
      code.list.pos <- code.list[!is.element(code.list,code.list.neg)]
    }
    else {
      aclass <- class.list[j]
      code.list.pos <- code.list[is.element(anemia[,"anemia_class"],aclass)]
      code.list.neg <- code.list[is.element(anemia[,"anemia_class"],"No anemia")]
    }
    cat("\n============================\n")
    cat(aclass,"\n")
    cat("============================\n")
    flush.console()
    if(length(code.list.pos)>2) {
      for(i in 1:nassay) {
        assay <- names(MAT.Z)[i]
        bioprocess <- ASSAY.INFO[assay,"biological_process"]
        pred.pos <- MAT.Z[code.list.pos,i]
        pred.pos <- pred.pos[!is.na(pred.pos)]
        n.pos <- length(pred.pos)
        pred.pos[pred.pos<3] <- 0
        pred.pos[pred.pos>0] <- 1
        
        pred.neg <- MAT.Z[code.list.neg,i]
        pred.neg <- pred.neg[!is.na(pred.neg)]
        n.neg <- length(pred.neg)
        pred.neg[pred.neg<2] <- 0
        pred.neg[pred.neg>0] <- 1
        
        a <- sum(pred.pos)
        c <- n.pos-a
        b <- sum(pred.neg)
        d <- n.neg-b
        #cat("Z: ",assay,a,b,c,d,"\n")
        #print(pred.pos)
        #print(pred.neg)
        txt <- TxT(a,b,c,d)
        s <- paste(aclass,"\t",assay,"\t",bioprocess,"\t",n.pos,"\t",n.neg,"\t",txt$sval,"\n",sep="")
        cat(s,file=outfile,append=T)
      }
      for(i in 1:ntp) {
        assay <- tp.list[i]
        bioprocess <- assay
        pred.pos <- toxcast_physchem[code.list.pos,assay]
        pred.pos <- pred.pos[!is.na(pred.pos)]
        n.pos <- length(pred.pos)
        pred.pos[pred.pos<3] <- 0
        pred.pos[pred.pos>0] <- 1
        
        pred.neg <- toxcast_physchem[code.list.neg,assay]
        pred.neg <- pred.neg[!is.na(pred.neg)]
        n.neg <- length(pred.neg)
        pred.neg[pred.neg<2] <- 0
        pred.neg[pred.neg>0] <- 1
        a <- sum(pred.pos)
        c <- n.pos-a
        b <- sum(pred.neg)
        d <- n.neg-c
        #cat("P: ",assay,a,b,c,d,"\n")
        
        txt <- TxT(a,b,c,d)
        s <- paste(aclass,"\t",assay,"\t",bioprocess,"\t",n.pos,"\t",n.neg,"\t",txt$sval,"\n",sep="")
        cat(s,file=outfile,append=T)
      }			
    }
  }
  
}
#--------------------------------------------------------------------------------------
#
# do the heatmaps of the first level syndromes 
#
#--------------------------------------------------------------------------------------
syndrome.coor.hm <- function(species,study_type,to.file=F) {
  cat("==========================================================================\n")
  cat("syndrome.coor.hm\n")
  cat("==========================================================================\n")
  if(to.file) {
    file <- paste("plots/syndrome_coor_hm_",species,"_",study_type,".pdf",sep="")
    pdf(file=file,width=8,height=10,pointsize=12,bg="white",paper="letter",pagecentre=T)
  }
  infile <- ENDPOINT.FILE
  syndromes <- read.xlsx(infile)
  filename <- ENDPOINT.MAP.FILE
  replacements <- read.xlsx(filename)
  for(i in 1:dim(syndromes)[1]) {
    endpoint <- syndromes[i,"endpoint_final"]
    if(is.element(endpoint,replacements[,"endpoint_final"])) {
      endpoint.replace <- replacements[is.element(replacements[,"endpoint_final"],endpoint),"replacement"]
      syndromes[i,"endpoint_final"] <- endpoint.replace
    }
  }	
  
  syndromes <- syndromes[syndromes[,"useme"]==1,]
  syndromes <- syndromes[is.element(syndromes[,"species"],species),]
  syndromes <- syndromes[is.element(syndromes[,"study_type"],study_type),]
  
  
  slist <- c(syndromes[,"syndrome1"],syndromes[,"syndrome2"],syndromes[,"syndrome3"],syndromes[,"syndrome4"],syndromes[,"syndrome5"])
  slist <- sort(unique(slist))
  nsyndrome <- length(slist)
  
  infile <- paste("output/lel_correlations_",species,"_",study_type,".xlsx",sep="")
  effect_coor <- read.xlsx(infile)
  
  for(i in 1:nsyndrome) {
    syndrome <- slist[i]
    elist1 <- syndromes[is.element(syndromes[,"syndrome1"],syndrome),"endpoint_final"]
    elist2 <- syndromes[is.element(syndromes[,"syndrome2"],syndrome),"endpoint_final"]
    elist3 <- syndromes[is.element(syndromes[,"syndrome3"],syndrome),"endpoint_final"]
    elist4 <- syndromes[is.element(syndromes[,"syndrome4"],syndrome),"endpoint_final"]
    elist5 <- syndromes[is.element(syndromes[,"syndrome5"],syndrome),"endpoint_final"]
    if(length(elist2)>0) elist1 <- c(elist1,elist2)
    if(length(elist3)>0) elist1 <- c(elist1,elist3)
    if(length(elist4)>0) elist1 <- c(elist1,elist4)
    if(length(elist5)>0) elist1 <- c(elist1,elist5)
    effect.list <- sort(unique(elist1))
    cat("syndrome: ",syndrome,"\n"); flush.console()
    
    mask <- vector(length=dim(effect_coor)[1],mode="integer")
    mask[] <- 1
    mask[!is.element(effect_coor[,"endpoint1"],effect.list)] <- 0
    mask[!is.element(effect_coor[,"endpoint2"],effect.list)] <- 0
    temp <- effect_coor[mask==1,]
    n <- length(effect.list)
    #browser()
    if(sum(mask)>0) {
      simmat <- matrix(ncol=n,nrow=n)
      simmat[] <- 0
      rownames(simmat) <- effect.list
      colnames(simmat) <- effect.list
      for(j in 1:dim(temp)[1]) {
        e1 <- temp[j,"endpoint1"]
        e2 <- temp[j,"endpoint2"]
        OR <- temp[j,"OR"]
        p <- temp[j,"p.value"]
        ppv <- temp[j,"PPV"]
        tp <- temp[j,"TP"]
        if(tp>=5 && p<0.05 && ppv>0.1) simmat[e1,e2] <- ppv
      }
      for(j in 1:n) simmat[j,j] <- 1
      rs <- rowSums(simmat)
      cs <- colSums(simmat)
      simmat <- simmat[rs>1,cs>1]
      cex.hm <- 0.7
      maxn <- max(dim(simmat)[1],dim(simmat)[2])
      if(maxn>20) cex.hm <- 0.5
      if(maxn>40) cex.hm <- 0.2
      if(dim(simmat)[1]>1 && dim(simmat)[2]>1) {
        result <- heatmap(simmat,margins=c(25,25),scale="none",main=paste(syndrome),symm=F,
                          xlab="",ylab="",cexRow=cex.hm,cexCol=cex.hm,col=brewer.pal(9,"Reds"),
                          hclustfun=function(x) hclust(d=dist(x),method="ward.D"),keep.dendro=T,verbose=F,na.rm=F)
        
        if(!to.file) browser()
      }
    }
  }
  if(to.file) dev.off()
}
#--------------------------------------------------------------------------------------
#
# do the histograms of hits in the syndromes 
#
#--------------------------------------------------------------------------------------
syndrome.hist <- function(species,study_type,to.file=F) {
  cat("==========================================================================\n")
  cat("syndrome.hist\n")
  cat("==========================================================================\n")
  if(to.file) {
    file <- paste("plots/syndrome_hist_",species,"_",study_type,".pdf",sep="")
    pdf(file=file,width=8,height=10,pointsize=12,bg="white",paper="letter",pagecentre=T)
  }
  par(mfrow=c(3,2),mar=c(5,4,4,4))
  
  infile <- ENDPOINT.FILE
  syndromes <- read.xlsx(infile)
  filename <- ENDPOINT.MAP.FILE
  replacements <- read.xlsx(filename)
  for(i in 1:dim(syndromes)[1]) {
    endpoint <- syndromes[i,"endpoint_final"]
    if(is.element(endpoint,replacements[,"endpoint_final"])) {
      endpoint.replace <- replacements[is.element(replacements[,"endpoint_final"],endpoint),"replacement"]
      syndromes[i,"endpoint_final"] <- endpoint.replace
    }
  }	
  
  syndromes <- syndromes[syndromes[,"useme"]==1,]
  syndromes <- syndromes[is.element(syndromes[,"species"],species),]
  syndromes <- syndromes[is.element(syndromes[,"study_type"],study_type),]
  
  slist <- c(syndromes[,"syndrome1"],syndromes[,"syndrome2"],syndromes[,"syndrome3"],syndromes[,"syndrome4"],syndromes[,"syndrome5"])
  slist <- sort(unique(slist))
  nsyndrome <- length(slist)
  
  infile <- paste("ToxRefDB/hit_mat_",species,"_",study_type,".xlsx",sep="")
  hit.mat <- read.xlsx(infile)
  
  for(i in 1:nsyndrome) {
    syndrome <- slist[i]
    elist1 <- syndromes[is.element(syndromes[,"syndrome1"],syndrome),"endpoint_final"]
    elist2 <- syndromes[is.element(syndromes[,"syndrome2"],syndrome),"endpoint_final"]
    elist3 <- syndromes[is.element(syndromes[,"syndrome3"],syndrome),"endpoint_final"]
    elist4 <- syndromes[is.element(syndromes[,"syndrome4"],syndrome),"endpoint_final"]
    elist5 <- syndromes[is.element(syndromes[,"syndrome5"],syndrome),"endpoint_final"]
    if(length(elist2)>0) elist1 <- c(elist1,elist2)
    if(length(elist3)>0) elist1 <- c(elist1,elist3)
    if(length(elist4)>0) elist1 <- c(elist1,elist4)
    if(length(elist5)>0) elist1 <- c(elist1,elist5)
    effect.list <- sort(unique(elist1))
    ne <- length(effect.list)
    cat("syndrome: ",syndrome,":",ne,"\n"); flush.console()
    if(ne>1) {
      temp <- hit.mat[,effect.list]
      rs <- rowSums(temp)
      rs <- rs[rs>0]
      nchem.all <- length(rs)
      x <- seq(from=1,to=ne)
      y <- x
      y[] <- 0
      for(i in 1:length(rs)) y[rs[i]] <- y[rs[i]]+1
      barplot(y,main=syndrome,xlim=c(0,ne),names.arg=x,xlab="N(Endpoints)",ylab="N(Chemicals)")
      
      rs <- rowSums(temp)
      temp2 <- temp[rs==1,]
      nchem.1 <- dim(temp2)[1]
      main <- paste("Singletons: ",nchem.1, " of ",nchem.all,sep="")
      cs <- 100*colSums(temp2)/dim(temp2)[1]
      index <- sort(cs,index.return=T)$ix
      cs <- cs[index]
      barplot(cs,horiz=T,xlim=c(0,100),xlab="%Chemicals Positive",names.arg=F,main=main)
      for(i in 1:length(cs)) {
        endpoint <- names(cs)[i]
        prefix <- paste(study_type,"_",species,"_",sep="")
        endpoint <- str_replace_all(endpoint,prefix,"")
        y <- (i-0.5)*1.2
        x <- min(cs[i],10)
        
        text(x,y,endpoint,pos=4,cex=0.7)
      }
      if(!to.file) browser()
    }
  }
  if(to.file) dev.off()
}
#--------------------------------------------------------------------------------------
#
# do the heatmaps of the first level syndromes 
#
#--------------------------------------------------------------------------------------
syndrome.chem.hm <- function(species,study_type,to.file=F) {
  cat("==========================================================================\n")
  cat("syndrome.chem.hm \n")
  cat("==========================================================================\n")
  if(to.file) {
    file <- paste("plots/syndrome_chem_hm_",species,"_",study_type,".pdf",sep="")
    pdf(file=file,width=8,height=10,pointsize=12,bg="white",paper="letter",pagecentre=T)
  }
  infile <- ENDPOINT.FILE
  syndromes <- read.xlsx(infile)
  filename <- ENDPOINT.MAP.FILE
  replacements <- read.xlsx(filename)
  for(i in 1:dim(syndromes)[1]) {
    endpoint <- syndromes[i,"endpoint_final"]
    if(is.element(endpoint,replacements[,"endpoint_final"])) {
      endpoint.replace <- replacements[is.element(replacements[,"endpoint_final"],endpoint),"replacement"]
      syndromes[i,"endpoint_final"] <- endpoint.replace
    }
  }	
  
  syndromes <- syndromes[syndromes[,"useme"]==1,]
  syndromes <- syndromes[is.element(syndromes[,"species"],species),]
  syndromes <- syndromes[is.element(syndromes[,"study_type"],study_type),]
  
  slist <- c(syndromes[,"syndrome1"],syndromes[,"syndrome2"],syndromes[,"syndrome3"],syndromes[,"syndrome4"],syndromes[,"syndrome5"])
  slist <- sort(unique(slist))
  nsyndrome <- length(slist)
  
  infile <- paste("ToxRefDB/hit_mat_",species,"_",study_type,".xlsx",sep="")
  hit.mat <- read.xlsx(infile)
  
  #browser()
  for(i in 1:nsyndrome) {
    syndrome <- slist[i]
    cat(syndrome,"\n"); flush.console()
    nmin <- 1#syndromes.min[syndrome,"min_effect"]
    elist1 <- syndromes[is.element(syndromes[,"syndrome1"],syndrome),"endpoint_final"]
    elist2 <- syndromes[is.element(syndromes[,"syndrome2"],syndrome),"endpoint_final"]
    elist3 <- syndromes[is.element(syndromes[,"syndrome3"],syndrome),"endpoint_final"]
    elist4 <- syndromes[is.element(syndromes[,"syndrome4"],syndrome),"endpoint_final"]
    elist5 <- syndromes[is.element(syndromes[,"syndrome5"],syndrome),"endpoint_final"]
    if(length(elist2)>0) elist1 <- c(elist1,elist2)
    if(length(elist3)>0) elist1 <- c(elist1,elist3)
    if(length(elist4)>0) elist1 <- c(elist1,elist4)
    if(length(elist5)>0) elist1 <- c(elist1,elist5)
    effect.list <- sort(unique(elist1))
    if(length(effect.list)>1) {
      
      temp <- hit.mat[,effect.list]
      rs <- rowSums(temp)
      temp <- temp[rs>0,]
      chems <- hit.mat[rs>0,"Name"]
      nchem <- length(chems)
      main <- paste(syndrome,":",nchem)
      if(nchem>1) {
        #lelmat <- as.matrix(lel[rs>=1,])
        cs <- colSums(temp)
        temp <- temp[,cs>0]
        if(length(cs[cs>0])>1) {
          result <- heatmap(t(temp),margins=c(5,20),scale="none",main=main,
                            xlab="",ylab="",cexRow=0.7,cexCol=0.7,col=brewer.pal(9,"Reds"),
                            hclustfun=function(x) hclust(d=dist(x),method="ward.D"),keep.dendro=T,verbose=F,na.rm=F,
                            labCol=chems,labRow=effect.list)
          if(!to.file) browser()
        }
      }
    }
  }
  if(to.file) dev.off()
}
#--------------------------------------------------------------------------------------
#
# do the individual chemical plots 
#
#--------------------------------------------------------------------------------------
syndrome.chem.plots <- function(species,study_type,to.file=F) {
  cat("==========================================================================\n")
  cat("syndrome.chem.plots\n")
  cat("==========================================================================\n")
  if(to.file) {
    file <- paste("plots/syndrome_chem_plots_",species,"_",study_type,".pdf",sep="")
    pdf(file=file,width=7,height=10,pointsize=12,bg="white",paper="letter",pagecentre=T)
  }
  par(mfrow=c(3,1),mar=c(5,4,4,2))
  
  infile <- paste("syndromes/syndromes_",species,"_",study_type,".xlsx",sep="")
  syndromes <- read.xlsx(infile)
  syndromes <- syndromes[syndromes[,"useme"]==1,]
  slist <- c(syndromes[,"syndrome1"],syndromes[,"syndrome2"],syndromes[,"syndrome3"],syndromes[,"syndrome4"],syndromes[,"syndrome5"])
  slist <- sort(unique(slist))
  nsyndrome <- length(slist)
  slist.min <- vector(mode="integer",length=nsyndrome)
  slist.min[] <- 2
  
  infile <- paste("syndromes/syndromes_min_",species,"_",study_type,".xlsx",sep="")
  syndromes.min <- read.xlsx(infile)
  rownames(syndromes.min) <- syndromes.min[,"syndrome"]
  for(i in 1:nsyndrome) {
    syndrome <- slist[i]
    slist.min[i] <- syndromes.min[syndrome,"min_effect"]
  }
  
  infile <- paste("ToxRefDB/lel_mat_",species,"_",study_type,".xlsx",sep="")
  lel.mat <- read.xlsx(infile)
  chems <- lel.mat[,1:6]
  lel.mat <- lel.mat[,7:dim(lel.mat)[2]]
  
  infile <- paste("ToxRefDB/hit_mat_",species,"_",study_type,".xlsx",sep="")
  hit.mat <- read.xlsx(infile)
  hit.mat <- hit.mat[,7:dim(lel.mat)[2]]
  
  nchem <- dim(lel.mat)[1]	
  #browser()
  dose.min <- 0.01
  dose.max <- 100000
  ymax <- 1
  syn.hit <- matrix(nrow=nchem,ncol=nsyndrome)
  rownames(syn.hit) <- chems[,"CODE"]
  colnames(syn.hit) <- slist
  syn.hit[] <- 0
  syn.min <- syn.hit
  syn.max <- syn.hit
  syn.tot <- syn.hit
  syn.pos <- syn.hit
  for(i in 1:nsyndrome) {
    syndrome <- slist[i]
    cat("prepare ",syndrome,"\n");flush.console()
    nmin <- slist.min[i]
    elist1 <- syndromes[is.element(syndromes[,"syndrome1"],syndrome),"endpoint"]
    elist2 <- syndromes[is.element(syndromes[,"syndrome2"],syndrome),"endpoint"]
    elist3 <- syndromes[is.element(syndromes[,"syndrome3"],syndrome),"endpoint"]
    elist4 <- syndromes[is.element(syndromes[,"syndrome4"],syndrome),"endpoint"]
    elist5 <- syndromes[is.element(syndromes[,"syndrome5"],syndrome),"endpoint"]
    if(length(elist2)>0) elist1 <- c(elist1,elist2)
    if(length(elist3)>0) elist1 <- c(elist1,elist3)
    if(length(elist4)>0) elist1 <- c(elist1,elist4)
    if(length(elist5)>0) elist1 <- c(elist1,elist5)
    effect.list <- sort(unique(elist1))
    if(length(effect.list)>1) {
      temp <- hit.mat[,effect.list]
      rs <- rowSums(temp)
      rs[rs<nmin] <- 0
      rs[rs>0] <- 1
      syn.hit[,i] <- rs
      temp <- lel.mat[,effect.list]
      for(j in 1:nchem) {
        if(syn.hit[j,i]==1) {
          lel.list <- temp[j,]
          syn.tot[j,i] <- length(lel.list)
          lel.list <- lel.list[!is.na(lel.list)]
          syn.min[j,i] <- min(lel.list)
          syn.max[j,i] <- max(lel.list)
          syn.pos[j,i] <- length(lel.list)
        }
      }
    }
  }
  nsyn.list <- rowSums(syn.hit)
  
  for(j in 1:nchem) {
    code <- chems[j,"CODE"]
    cname <- chems[j,"Name"]
    nsyn <- nsyn.list[j]
    if(nsyn>0) {
      plot(0~0,type="n",ylim=c(0,ymax),xlim=c(dose.min,dose.max),log="x",xlab="Dose",ylab="",cex.axis=1,cex.lab=1,main=cname)	
      ldt <- chems[j,"ldt"]
      hdt <- chems[j,"hdt"]
      lines(c(ldt,ldt),c(0,ymax),col="black",lwd=2)
      lines(c(hdt,hdt),c(0,ymax),col="black",lwd=2)
      
      delta <- 1/nsyn
      counter <- 1
      for(i in 1:nsyndrome) {
        syndrome <- slist[i]
        #cat(syndrome,"\n");flush.console()
        hit <- syn.hit[code,syndrome]
        if(hit==1) {
          lel.min <- syn.min[code,syndrome]
          lel.max <- syn.max[code,syndrome]
          ntot <- syn.tot[code,syndrome]
          npos <- syn.pos[code,syndrome]
          y <- (counter-0.5)*delta
          counter <- counter+1
          col <- "black"
          col2 <- "gray"
          if(length(grep("Mortality",syndrome,value=T,perl=T,ignore.case=T))==1) {
            col <- "red"
            col2 <- "red"
            #browser()
          }
          if(length(grep("Body",syndrome,value=T,perl=T,ignore.case=T))==1) {
            col <- "green"
            col2 <- "green"
          }
          lines(c(lel.min,lel.max),c(y,y),lwd=2,col=col)
          points(lel.min,y,pch=23,cex=2,bg=col2)
          points(lel.max,y,pch=23,cex=2,bg=col2)
          text(hdt*1.1,y,paste(syndrome," : ",npos,"/",ntot,sep=""),pos=4,cex=1)
        }
      }
      if(!to.file) browser()
    }
  }
  if(to.file) dev.off()
}
#--------------------------------------------------------------------------------------
#
# do the individual chemical plots 
#
#--------------------------------------------------------------------------------------
syndrome.allchem.hm <- function(species,study_type,to.file=F) {
  cat("==========================================================================\n")
  cat("syndrome.allchem.hm\n")
  cat("==========================================================================\n")
  if(to.file) {
    file <- paste("plots/syndrome_allchem_hm_",species,"_",study_type,".pdf",sep="")
    pdf(file=file,width=7,height=10,pointsize=12,bg="white",paper="letter",pagecentre=T)
  }
  par(mfrow=c(1,1),mar=c(5,4,4,2))
  
  infile <- paste("output/syndrome_lelmat_",species,"_",study_type,".xlsx",sep="")
  resmat <- read.xlsx(infile)
  
  chem.data <- resmat[,1:6]
  resmat <- resmat[,7:dim(resmat)[2]]
  rownames(resmat) <- chem.data[,"Name"]	
  rs <- rowSums(resmat)
  cs <- colSums(resmat)
  mat <- resmat[rs>0,cs>0]
  result <- heatmap(t(mat),margins=c(5,12),scale="none",
                    xlab="",ylab="",cexRow=0.4,cexCol=0.1,col=brewer.pal(9,"Reds"),
                    hclustfun=function(x) hclust(d=dist(x),method="ward.D"),keep.dendro=T,verbose=F,na.rm=F)
  
  if(to.file) dev.off()	
}
#--------------------------------------------------------------------------------------
#
# do the individual chemical plots 
#
#--------------------------------------------------------------------------------------
syndrome.allcoor.hm <- function(species,study_type,do.prep=F,to.file=F) {
  cat("==========================================================================\n")
  cat("syndrome.allcoor.hm\n")
  cat("==========================================================================\n")
  if(do.prep) {
    infile <- paste("output/syndrome_lelmat_",species,"_",study_type,".xlsx",sep="")
    resmat <- read.xlsx(infile)
    
    chem.data <- resmat[,1:6]
    resmat <- resmat[,7:dim(resmat)[2]]
    rownames(resmat) <- chem.data[,"Name"]	
    
    syn.list <- colnames(resmat)
    nsyn <- length(syn.list)
    coormat <- matrix(ncol=nsyn,nrow=nsyn)
    rownames(coormat) <- syn.list
    colnames(coormat) <- syn.list
    coormat[] <- 0
    for(i in 1:(nsyn-1)) {
      for(j in (i+1):nsyn) {
        s1 <- resmat[,i]
        s2 <- resmat[,j]
        s1 <- s1[!is.na(s2)]
        s2 <- s2[!is.na(s2)]
        s2 <- s2[!is.na(s1)]
        s1 <- s1[!is.na(s1)]
        if(length(s1>2)) {
          a <- sum(s1*s2)
          b <- sum((1-s1)*s2)
          c <- sum(s1*(1-s2))
          d <- sum((1-s1)*(1-s2))
          if(a>=3) {
            txt <- TxT(a,b,c,d)
            p.value <- txt$p.value
            or <- txt$odds.ratio
            if(p.value<0.05 && or>2) {
              coormat[i,j] <- or-1
              coormat[j,i] <- or-1
            }
          }
        }
      }
    }
    for(i in 1:nsyn) coormat[i,i] <- 10
    coormat[coormat>10] <- 10
    COORMAT <<- coormat
  }
  if(to.file) {
    file <- paste("plots/syndrome_allcoor_hm_",species,"_",study_type,".pdf",sep="")
    pdf(file=file,width=8,height=10,pointsize=12,bg="white",paper="letter",pagecentre=T)
  }
  coormat <- COORMAT
  for(i in 1:dim(coormat)[1]) coormat[i,i] <- 0
  rs <- rowSums(coormat)
  coormat <- coormat[rs>0,rs>0]
  for(i in 1:dim(coormat)[1]) coormat[i,i] <- 10
  par(mfrow=c(1,1),mar=c(5,4,4,2))
  result <- heatmap(coormat,margins=c(8,8),scale="none",symm=T,
                    xlab="",ylab="",cexRow=0.4,cexCol=0.4,col=brewer.pal(9,"Reds"),main=paste(study_type,":",species),
                    hclustfun=function(x) hclust(d=dist(x),method="ward.D"),keep.dendro=T,verbose=F,na.rm=F)
  
  if(to.file) dev.off()	
}
#--------------------------------------------------------------------------------------
#
# calculate correlations between syndromes for SUB and CHR 
#
#--------------------------------------------------------------------------------------
SUB.CHR.syndrome.correlations <- function() {
  cat("==========================================================================\n")
  cat("SUB.CHR.syndrome.correlations\n")
  cat("==========================================================================\n")
  outfile <- paste("syndromes/SUB_CHR_syndrome_correlations.txt",sep="")
  txt <- TxT(1,2,3,4)
  cat(paste("SUB.syndrome\tCHR.syndrome\t",txt$title,"\n",sep=""),file=outfile,append=F)
  
  infile <- "syndromes/SUB_CHR_syndrome_comp.xlsx"
  sub.chr <- read.xlsx(infile)
  ncomp <- dim(sub.chr)[1]
  
  infile <- "syndromes/all_syndrome_lelmat.xlsx"
  lel.mat <- read.xlsx(infile)
  rownames(lel.mat) <- lel.mat[,"CODE"]
  
  for(i in 1:ncomp) {
    syn.sub <- sub.chr[i,1]
    syn.chr <- sub.chr[i,2]
    if(is.element(syn.sub,names(lel.mat)) && is.element(syn.chr,names(lel.mat))) {
      lel.sub <- lel.mat[,syn.sub]
      lel.chr <- lel.mat[,syn.chr]
      lel.sub <- lel.sub[!is.na(lel.chr)]
      lel.chr <- lel.chr[!is.na(lel.chr)]
      lel.chr <- lel.chr[!is.na(lel.sub)]
      lel.sub <- lel.sub[!is.na(lel.sub)]
      
      a <- sum(lel.sub*lel.chr)
      if(a>=3) {
        b <- sum(lel.sub*(1-lel.chr))
        c <- sum((1-lel.sub)*lel.chr)
        d <- sum((1-lel.sub)*(1-lel.chr))
        txt <- TxT(a,b,c,d)
        s <- paste(syn.sub,"\t",syn.chr,"\t",txt$sval,"\n",sep="")
        cat(s,file=outfile,append=T)
      }
    }
  }
}
#--------------------------------------------------------------------------------------
#
# calculate correlations between effects for SUB and CHR 
#
#--------------------------------------------------------------------------------------
SUB.CHR.effect.correlations <- function() {
  cat("==========================================================================\n")
  cat("SUB.CHR.effect.correlations\n")
  cat("==========================================================================\n")
  outfile <- paste("syndromes/SUB_CHR_effect_correlations.txt",sep="")
  txt <- TxT(1,2,3,4)
  cat(paste("SUB.effect\tCHR.effect\t",txt$title,"\n",sep=""),file=outfile,append=F)
  
  infile <- "syndromes/SUB_CHR_effect_comp.xlsx"
  sub.chr <- read.xlsx(infile)
  ncomp <- dim(sub.chr)[1]
  
  infile <- "ToxRefDB/LEL_MAT_SUB.xlsx"
  mat.sub <- read.xlsx(infile)
  rownames(mat.sub) <- mat.sub[,"CODE"]
  
  infile <- "ToxRefDB/LEL_MAT_CHR.xlsx"
  mat.chr <- read.xlsx(infile)
  rownames(mat.chr) <- mat.chr[,"CODE"]
  
  codes.sub <- mat.sub[,"CODE"]
  codes.chr <- mat.chr[,"CODE"]
  code.list <- codes.sub[is.element(codes.sub,codes.chr)]
  mat.sub <- mat.sub[code.list,]
  mat.chr <- mat.chr[code.list,]
  for(i in 1:ncomp) {
    syn.sub <- sub.chr[i,1]
    syn.chr <- sub.chr[i,2]
    syn.sub <- str_replace_all(syn.sub," ",".")
    syn.chr <- str_replace_all(syn.chr," ",".")
    
    cat(syn.sub,":",syn.chr,"\n")
    if(is.element(syn.sub,names(mat.sub)) && is.element(syn.chr,names(mat.chr))) {
      lel.sub <- mat.sub[,syn.sub]
      lel.chr <- mat.chr[,syn.chr]
      lel.sub[is.na(lel.sub)] <- 0
      lel.chr[is.na(lel.chr)] <- 0
      lel.sub[lel.sub>0] <- 1
      lel.chr[lel.chr>0] <- 1
      
      a <- sum(lel.sub*lel.chr)
      if(a>=3) {
        b <- sum(lel.sub*(1-lel.chr))
        c <- sum((1-lel.sub)*lel.chr)
        d <- sum((1-lel.sub)*(1-lel.chr))
        txt <- TxT(a,b,c,d)
        s <- paste(syn.sub,"\t",syn.chr,"\t",txt$sval,"\n",sep="")
        cat(s,file=outfile,append=T)
      }
    }
  }
}
#--------------------------------------------------------------------------------------
#
# make an ROC curve for the syndrome and effect correlations 
#
#--------------------------------------------------------------------------------------
SUB.CHR.correlations.roc <- function(to.file=F) {
  cat("==========================================================================\n")
  cat("SUB.CHR.correlations.roc\n")
  cat("==========================================================================\n")
  infile <- paste("syndromes/SUB_CHR_effect_correlations.xlsx",sep="")
  coor.effect <- read.xlsx(infile)
  infile <- paste("syndromes/SUB_CHR_syndrome_correlations.xlsx",sep="")
  coor.syndrome <- read.xlsx(infile)
  if(to.file) {
    file <- paste("plots/SUB_CHR_ROC.pdf",sep="")
    pdf(file=file,width=7,height=7,pointsize=12,bg="white",paper="letter",pagecentre=T)
  }
  par(mfrow=c(1,1),mar=c(5,4,4,2))
  plot(0~0,type="n",xlim=c(0,1),ylim=c(0,1),xlab="1-spec",ylab="sens",cex.lab=1.5,cex.axis=1.5,main="SUB-to-CHR Correlations")
  lines(c(0,1),c(0,1))
  points(0.5,0.2,pch=21,bg="red",cex=1)
  points(0.5,0.1,pch=21,bg="blue",cex=1)
  text(0.5,0.2,"Effect Correlation",pos=4,cex=1.5)
  text(0.5,0.1,"Syndrome Correlation",pos=4,cex=1.5)
  
  for(i in 1:dim(coor.effect)[1]) {
    x <- 1-coor.effect[i,"Spec"]
    y <- coor.effect[i,"Sens"]
    points(x,y,pch=21,bg="red",cex=1)
  }
  for(i in 1:dim(coor.syndrome)[1]) {
    x <- 1-coor.syndrome[i,"Spec"]
    y <- coor.syndrome[i,"Sens"]
    points(x,y,pch=21,bg="blue",cex=1)
  }
  
  if(!to.file) browser()
  else dev.off()
}
########################################################################################
########################################################################################
########################################################################################
########################################################################################
########################################################################################
########################################################################################
########################################################################################
########################################################################################
#--------------------------------------------------------------------------------------
#
# do the individual chemical plots 
#
#--------------------------------------------------------------------------------------
syndrome.chem.plots.all <- function(to.file=F) {
  cat("==========================================================================\n")
  cat("syndrome.chem.plots.all\n")
  cat("==========================================================================\n")
  
  infile <- "syndromes/syndromes_rat_CHR.xlsx"
  syndromes <- read.xlsx(infile)
  syndromes <- syndromes[syndromes[,"useme"]>0,]
  slist <- c(syndromes[,"syndrome1"],syndromes[,"syndrome2"],syndromes[,"syndrome3"],syndromes[,"syndrome4"],syndromes[,"syndrome5"])
  slist.CHR <- sort(unique(slist))
  for(i in 1:length(slist.CHR)) slist.CHR[i] <- paste("CHR_",slist.CHR[i],sep="")
  
  infile <- "syndromes/syndromes_rat_SUB.xlsx"
  syndromes <- read.xlsx(infile)
  syndromes <- syndromes[syndromes[,"useme"]>0,]
  slist <- c(syndromes[,"syndrome1"],syndromes[,"syndrome2"],syndromes[,"syndrome3"],syndromes[,"syndrome4"],syndromes[,"syndrome5"])
  slist.SUB <- sort(unique(slist))
  for(i in 1:length(slist.SUB)) slist.SUB[i] <- paste("SUB_",slist.SUB[i],sep="")
  
  infile <- "syndromes/syndromes_rat_DEV.xlsx"
  syndromes <- read.xlsx(infile)
  syndromes <- syndromes[syndromes[,"useme"]>0,]
  slist <- c(syndromes[,"syndrome1"],syndromes[,"syndrome2"],syndromes[,"syndrome3"],syndromes[,"syndrome4"],syndromes[,"syndrome5"])
  slist.DEV <- sort(unique(slist))
  for(i in 1:length(slist.DEV)) slist.DEV[i] <- paste("DEV_",slist.DEV[i],sep="")
  
  infile <- "syndromes/syndromes_rat_MGR.xlsx"
  syndromes <- read.xlsx(infile)
  syndromes <- syndromes[syndromes[,"useme"]>0,]
  slist <- c(syndromes[,"syndrome1"],syndromes[,"syndrome2"],syndromes[,"syndrome3"],syndromes[,"syndrome4"],syndromes[,"syndrome5"])
  slist.MGR <- sort(unique(slist))
  for(i in 1:length(slist.MGR)) slist.MGR[i] <- paste("MGR_",slist.MGR[i],sep="")
  
  nsyndrome.CHR <- length(slist.CHR)
  nsyndrome.SUB <- length(slist.SUB)
  nsyndrome.DEV <- length(slist.DEV)
  nsyndrome.MGR <- length(slist.MGR)
  
  infile <- "output/syndrome_lelmat_rat_CHR.xlsx"; syn.hit.CHR <- read.xlsx(infile); rownames(syn.hit.CHR) <- syn.hit.CHR[,"CODE"]
  infile <- "output/syndrome_lelmat_rat_SUB.xlsx"; syn.hit.SUB <- read.xlsx(infile); rownames(syn.hit.SUB) <- syn.hit.SUB[,"CODE"]
  infile <- "output/syndrome_lelmat_rat_DEV.xlsx"; syn.hit.DEV <- read.xlsx(infile); rownames(syn.hit.DEV) <- syn.hit.DEV[,"CODE"]
  infile <- "output/syndrome_lelmat_rat_MGR.xlsx"; syn.hit.MGR <- read.xlsx(infile); rownames(syn.hit.MGR) <- syn.hit.MGR[,"CODE"]
  
  infile <- "output/syndrome_lelmat_min_rat_CHR.xlsx"; syn.min.CHR <- read.xlsx(infile); rownames(syn.min.CHR) <- syn.min.CHR[,1]
  infile <- "output/syndrome_lelmat_min_rat_SUB.xlsx"; syn.min.SUB <- read.xlsx(infile); rownames(syn.min.SUB) <- syn.min.SUB[,1]
  infile <- "output/syndrome_lelmat_min_rat_DEV.xlsx"; syn.min.DEV <- read.xlsx(infile); rownames(syn.min.DEV) <- syn.min.DEV[,1]
  infile <- "output/syndrome_lelmat_min_rat_MGR.xlsx"; syn.min.MGR <- read.xlsx(infile); rownames(syn.min.MGR) <- syn.min.MGR[,1]
  
  infile <- "output/syndrome_lelmat_max_rat_CHR.xlsx"; syn.max.CHR <- read.xlsx(infile); rownames(syn.max.CHR) <- syn.max.CHR[,1]
  infile <- "output/syndrome_lelmat_max_rat_SUB.xlsx"; syn.max.SUB <- read.xlsx(infile); rownames(syn.max.SUB) <- syn.max.SUB[,1]
  infile <- "output/syndrome_lelmat_max_rat_DEV.xlsx"; syn.max.DEV <- read.xlsx(infile); rownames(syn.max.DEV) <- syn.max.DEV[,1]
  infile <- "output/syndrome_lelmat_max_rat_MGR.xlsx"; syn.max.MGR <- read.xlsx(infile); rownames(syn.max.MGR) <- syn.max.MGR[,1]
  
  infile <- "output/syndrome_lelmat_pos_rat_CHR.xlsx"; syn.pos.CHR <- read.xlsx(infile); rownames(syn.pos.CHR) <- syn.pos.CHR[,1]
  infile <- "output/syndrome_lelmat_pos_rat_SUB.xlsx"; syn.pos.SUB <- read.xlsx(infile); rownames(syn.pos.SUB) <- syn.pos.SUB[,1]
  infile <- "output/syndrome_lelmat_pos_rat_DEV.xlsx"; syn.pos.DEV <- read.xlsx(infile); rownames(syn.pos.DEV) <- syn.pos.DEV[,1]
  infile <- "output/syndrome_lelmat_pos_rat_MGR.xlsx"; syn.pos.MGR <- read.xlsx(infile); rownames(syn.pos.MGR) <- syn.pos.MGR[,1]
  
  infile <- "output/syndrome_lelmat_tot_rat_CHR.xlsx"; syn.tot.CHR <- read.xlsx(infile); rownames(syn.tot.CHR) <- syn.tot.CHR[,1]
  infile <- "output/syndrome_lelmat_tot_rat_SUB.xlsx"; syn.tot.SUB <- read.xlsx(infile); rownames(syn.tot.SUB) <- syn.tot.SUB[,1]
  infile <- "output/syndrome_lelmat_tot_rat_DEV.xlsx"; syn.tot.DEV <- read.xlsx(infile); rownames(syn.tot.DEV) <- syn.tot.DEV[,1]
  infile <- "output/syndrome_lelmat_tot_rat_MGR.xlsx"; syn.tot.MGR <- read.xlsx(infile); rownames(syn.tot.MGR) <- syn.tot.MGR[,1]
  
  for(i in 7:dim(syn.hit.CHR)[2]) {
    newname <- names(syn.hit.CHR)[i]
    newname <- paste("CHR_",newname,sep="")
    names(syn.hit.CHR)[i] <- newname
    names(syn.min.CHR)[i] <- newname
    names(syn.max.CHR)[i] <- newname
    names(syn.pos.CHR)[i] <- newname
    names(syn.tot.CHR)[i] <- newname
  }
  
  for(i in 7:dim(syn.hit.SUB)[2]) {
    newname <- names(syn.hit.SUB)[i]
    newname <- paste("SUB_",newname,sep="")
    names(syn.hit.SUB)[i] <- newname
    names(syn.min.SUB)[i] <- newname
    names(syn.max.SUB)[i] <- newname
    names(syn.pos.SUB)[i] <- newname
    names(syn.tot.SUB)[i] <- newname
  }
  
  for(i in 7:dim(syn.hit.DEV)[2]) {
    newname <- names(syn.hit.DEV)[i]
    newname <- paste("DEV_",newname,sep="")
    names(syn.hit.DEV)[i] <- newname
    names(syn.min.DEV)[i] <- newname
    names(syn.max.DEV)[i] <- newname
    names(syn.pos.DEV)[i] <- newname
    names(syn.tot.DEV)[i] <- newname
  }
  
  for(i in 7:dim(syn.hit.MGR)[2]) {
    newname <- names(syn.hit.MGR)[i]
    newname <- paste("MGR_",newname,sep="")
    names(syn.hit.MGR)[i] <- newname
    names(syn.min.MGR)[i] <- newname
    names(syn.max.MGR)[i] <- newname
    names(syn.pos.MGR)[i] <- newname
    names(syn.tot.MGR)[i] <- newname
  }
  
  
  dose.min <- 0.1
  dose.max <- 100000
  ymax <- 1
  
  code.list <- sort(unique(c(syn.hit.CHR[,"CODE"],syn.hit.SUB[,"CODE"],syn.hit.DEV[,"CODE"],syn.hit.MGR[,"CODE"])))
  nchem <- length(code.list)
  chem.mat <- as.data.frame(matrix(nrow=nchem,ncol=10),stringsAsFactors=F)
  names(chem.mat) <- c("CODE","CASRN","Name","CHR","SUB","DEV","MGR","LDT","HDT")
  rownames(chem.mat) <- code.list
  chem.mat[,"CODE"] <- code.list
  chem.mat[,"CHR"] <- 0
  chem.mat[,"SUB"] <- 0
  chem.mat[,"DEV"] <- 0
  chem.mat[,"MGR"] <- 0
  chem.mat[,"LDT"] <- 0
  chem.mat[,"HDT"] <- 0
  
  for(i in 1:dim(syn.hit.CHR)[1]) {
    code <- syn.hit.CHR[i,"CODE"]
    casrn <- syn.hit.CHR[i,"CASRN"]
    cname <- syn.hit.CHR[i,"Name"]
    chem.mat[code,"CHR"] <- 1
    chem.mat[code,"CASRN"] <- casrn
    chem.mat[code,"Name"] <- cname
  }
  for(i in 1:dim(syn.hit.SUB)[1]) {
    code <- syn.hit.SUB[i,"CODE"]
    casrn <- syn.hit.SUB[i,"CASRN"]
    cname <- syn.hit.SUB[i,"Name"]
    chem.mat[code,"SUB"] <- 1
    chem.mat[code,"CASRN"] <- casrn
    chem.mat[code,"Name"] <- cname
  }
  for(i in 1:dim(syn.hit.DEV)[1]) {
    code <- syn.hit.DEV[i,"CODE"]
    casrn <- syn.hit.DEV[i,"CASRN"]
    cname <- syn.hit.DEV[i,"Name"]
    chem.mat[code,"DEV"] <- 1
    chem.mat[code,"CASRN"] <- casrn
    chem.mat[code,"Name"] <- cname
  }
  for(i in 1:dim(syn.hit.MGR)[1]) {
    code <- syn.hit.MGR[i,"CODE"]
    casrn <- syn.hit.MGR[i,"CASRN"]
    cname <- syn.hit.MGR[i,"Name"]
    chem.mat[code,"MGR"] <- 1
    chem.mat[code,"CASRN"] <- casrn
    chem.mat[code,"Name"] <- cname
  }
  
  syn.names <- c(slist.CHR,slist.SUB,slist.DEV,slist.MGR)
  all.data <- matrix(nrow=nchem,ncol=length(syn.names))
  colnames(all.data) <- syn.names
  rownames(all.data) <- code.list
  all.data[] <- NA
  for(i in 1:length(slist.CHR)) {
    syndrome <- slist.CHR[i]
    cat(syndrome,"\n"); flush.console()
    for(j in 1:dim(syn.hit.CHR)[1]) {
      code <- syn.hit.CHR[j,"CODE"]
      all.data[code,syndrome] <- syn.hit.CHR[j,syndrome]
    }
  }
  for(i in 1:length(slist.SUB)) {
    syndrome <- slist.SUB[i]
    cat(syndrome,"\n"); flush.console()
    for(j in 1:dim(syn.hit.SUB)[1]) {
      code <- syn.hit.SUB[j,"CODE"]
      all.data[code,syndrome] <- syn.hit.SUB[j,syndrome]
    }
  }
  for(i in 1:length(slist.DEV)) {
    syndrome <- slist.DEV[i]
    cat(syndrome,"\n"); flush.console()
    for(j in 1:dim(syn.hit.DEV)[1]) {
      code <- syn.hit.DEV[j,"CODE"]
      all.data[code,syndrome] <- syn.hit.DEV[j,syndrome]
    }
  }
  for(i in 1:length(slist.MGR)) {
    syndrome <- slist.MGR[i]
    cat(syndrome,"\n"); flush.console()
    for(j in 1:dim(syn.hit.MGR)[1]) {
      code <- syn.hit.MGR[j,"CODE"]
      all.data[code,syndrome] <- syn.hit.MGR[j,syndrome]
    }
  }
  SDATA.ALL <<- all.data
  all.data <- as.data.frame(cbind(chem.mat,all.data),stringsAsFactors=F)
  outfile <- paste("syndromes/all_syndrome_lelmat.xlsx",sep="")
  write.xlsx(all.data,file=outfile, row.names=F)
  
  if(to.file) {
    file <- paste("plots/syndrome_chem_plots_global.pdf",sep="")
    pdf(file=file,width=7,height=10,pointsize=12,bg="white",paper="letter",pagecentre=T)
  }
  par(mfrow=c(2,1),mar=c(5,4,4,2))
  
  for(j in 1:nchem) {
    code <- chem.mat[j,"CODE"]
    cname <- chem.mat[j,"Name"]
    ichr <- chem.mat[j,"CHR"]
    isub <- chem.mat[j,"SUB"]
    idev <- chem.mat[j,"DEV"]
    imgr <- chem.mat[j,"MGR"]
    nsyn <- 0
    main <- paste(cname,"\n",sep="")
    
    ldt <- 1e6
    hdt <- 1e-6
    ld50 <- 1e-6
    if(ichr==1) {
      temp <- syn.hit.CHR[code,slist.CHR]
      nsyn <- nsyn + sum(temp)
      main <- paste(main,"CHR")
      ldt.temp <- syn.hit.CHR[code,"ldt"]
      hdt.temp <- syn.hit.CHR[code,"hdt"]
      if(ldt.temp<ldt) ldt <- ldt.temp
      if(hdt.temp>hdt) hdt <- hdt.temp
    }
    if(isub==1) {
      temp <- syn.hit.SUB[code,slist.SUB]
      nsyn <- nsyn + sum(temp)
      main <- paste(main,"SUB")
      ldt.temp <- syn.hit.SUB[code,"ldt"]
      hdt.temp <- syn.hit.SUB[code,"hdt"]
      if(ldt.temp<ldt) ldt <- ldt.temp
      if(hdt.temp>hdt) hdt <- hdt.temp
    }
    if(idev==1) {
      temp <- syn.hit.DEV[code,slist.DEV]
      nsyn <- nsyn + sum(temp)
      main <- paste(main,"DEV")
      ldt.temp <- syn.hit.DEV[code,"ldt"]
      hdt.temp <- syn.hit.DEV[code,"hdt"]
      if(ldt.temp<ldt) ldt <- ldt.temp
      if(hdt.temp>hdt) hdt <- hdt.temp
    }
    if(imgr==1) {
      temp <- syn.hit.MGR[code,slist.MGR]
      nsyn <- nsyn + sum(temp)
      main <- paste(main,"MGR")
      ldt.temp <- syn.hit.MGR[code,"ldt"]
      hdt.temp <- syn.hit.MGR[code,"hdt"]
      if(ldt.temp<ldt) ldt <- ldt.temp
      if(hdt.temp>hdt) hdt <- hdt.temp
    }
    #cat(cname,"\n")
    #flush.console()
    if(nsyn>0) {
      plot(0~0,type="n",ylim=c(0,ymax),xlim=c(dose.min,dose.max),log="x",xlab="Dose",ylab="",cex.axis=1,cex.lab=1,main=main)	
      lines(c(ldt,ldt),c(0,ymax),col="black",lwd=2)
      lines(c(hdt,hdt),c(0,ymax),col="black",lwd=2)
      lines(c(ld50,ld50),c(0,ymax),col="red",lwd=2)
      
      delta <- 1/nsyn
      counter <- 1
      if(ichr==1) {
        for(i in 1:nsyndrome.CHR) {
          syndrome <- slist.CHR[i]
          hit <- syn.hit.CHR[code,syndrome]
          if(hit==1) {
            lel.min <- syn.min.CHR[code,syndrome]
            lel.max <- syn.max.CHR[code,syndrome]
            ntot <- syn.tot.CHR[code,syndrome]
            npos <- syn.pos.CHR[code,syndrome]
            y <- (counter-0.5)*delta
            counter <- counter+1
            
            col <- "black"
            col2 <- "gray"
            if(length(grep("Mortality",syndrome,value=T,perl=T,ignore.case=T))==1) {
              col <- "red"
              col2 <- "red"
              #browser()
            }
            if(length(grep("Body",syndrome,value=T,perl=T,ignore.case=T))==1) {
              col <- "green"
              col2 <- "green"
            }
            lines(c(lel.min,lel.max),c(y,y),lwd=2,col=col)
            points(lel.min,y,pch=23,cex=1,bg=col2)
            points(lel.max,y,pch=23,cex=1,bg=col2)
            text(hdt*1.1,y,paste(syndrome," : ",npos,"/",ntot,sep=""),pos=4,cex=0.6)
          }
        }
      }
      if(isub==1) {
        for(i in 1:nsyndrome.SUB) {
          syndrome <- slist.SUB[i]
          hit <- syn.hit.SUB[code,syndrome]
          if(hit==1) {
            lel.min <- syn.min.SUB[code,syndrome]
            lel.max <- syn.max.SUB[code,syndrome]
            ntot <- syn.tot.SUB[code,syndrome]
            npos <- syn.pos.SUB[code,syndrome]
            y <- (counter-0.5)*delta
            counter <- counter+1
            col <- "black"
            col2 <- "gray"
            if(length(grep("Mortality",syndrome,value=T,perl=T,ignore.case=T))==1) {
              col <- "red"
              col2 <- "red"
              #browser()
            }
            if(length(grep("Body",syndrome,value=T,perl=T,ignore.case=T))==1) {
              col <- "green"
              col2 <- "green"
            }
            lines(c(lel.min,lel.max),c(y,y),lwd=2,col=col)
            points(lel.min,y,pch=23,cex=1,bg=col2)
            points(lel.max,y,pch=23,cex=1,bg=col2)
            text(hdt*1.1,y,paste(syndrome," : ",npos,"/",ntot,sep=""),pos=4,cex=0.6)
          }
        }
      }
      if(idev==1) {
        for(i in 1:nsyndrome.DEV) {
          syndrome <- slist.DEV[i]
          hit <- syn.hit.DEV[code,syndrome]
          if(hit==1) {
            lel.min <- syn.min.DEV[code,syndrome]
            lel.max <- syn.max.DEV[code,syndrome]
            ntot <- syn.tot.DEV[code,syndrome]
            npos <- syn.pos.DEV[code,syndrome]
            y <- (counter-0.5)*delta
            counter <- counter+1
            col <- "black"
            col2 <- "gray"
            if(length(grep("Mortality",syndrome,value=T,perl=T,ignore.case=T))==1) {
              col <- "red"
              col2 <- "red"
              #browser()
            }
            if(length(grep("Body",syndrome,value=T,perl=T,ignore.case=T))==1) {
              col <- "green"
              col2 <- "green"
            }
            lines(c(lel.min,lel.max),c(y,y),lwd=2,col=col)
            points(lel.min,y,pch=23,cex=1,bg=col2)
            points(lel.max,y,pch=23,cex=1,bg=col2)
            text(hdt*1.1,y,paste(syndrome," : ",npos,"/",ntot,sep=""),pos=4,cex=0.6)
          }
        }
      }
      if(imgr==1) {
        for(i in 1:nsyndrome.MGR) {
          syndrome <- slist.MGR[i]
          hit <- syn.hit.MGR[code,syndrome]
          if(hit==1) {
            lel.min <- syn.min.MGR[code,syndrome]
            lel.max <- syn.max.MGR[code,syndrome]
            ntot <- syn.tot.MGR[code,syndrome]
            npos <- syn.pos.MGR[code,syndrome]
            y <- (counter-0.5)*delta
            counter <- counter+1
            col <- "black"
            col2 <- "gray"
            if(length(grep("Mortality",syndrome,value=T,perl=T,ignore.case=T))==1) {
              col <- "red"
              col2 <- "red"
              #browser()
            }
            if(length(grep("Body",syndrome,value=T,perl=T,ignore.case=T))==1) {
              col <- "green"
              col2 <- "green"
            }
            lines(c(lel.min,lel.max),c(y,y),lwd=2,col=col)
            points(lel.min,y,pch=23,cex=1,bg=col2)
            points(lel.max,y,pch=23,cex=1,bg=col2)
            text(hdt*1.1,y,paste(syndrome," : ",npos,"/",ntot,sep=""),pos=4,cex=0.6)
          }
        }
      }
      if(!to.file) browser()
    }
  }
  if(to.file) dev.off()
}
#--------------------------------------------------------------------------------------
#
# do the individual chemical plots 
#
#--------------------------------------------------------------------------------------
syndrome.global.coor.hm <- function(do.prep=F,to.file=F) {
  cat("==========================================================================\n")
  cat("syndrome.global.chem.hm\n")
  cat("==========================================================================\n")
  if(do.prep) {
    resmat <- SDATA.ALL
    syn.list <- colnames(resmat)
    nsyn <- length(syn.list)
    coormat <- matrix(ncol=nsyn,nrow=nsyn)
    rownames(coormat) <- syn.list
    colnames(coormat) <- syn.list
    coormat[] <- 0
    for(i in 1:(nsyn-1)) {
      for(j in (i+1):nsyn) {
        s1 <- resmat[,i]
        s2 <- resmat[,j]
        s1 <- s1[!is.na(s2)]
        s2 <- s2[!is.na(s2)]
        s2 <- s2[!is.na(s1)]
        s1 <- s1[!is.na(s1)]
        if(length(s1>2)) {
          a <- sum(s1*s2)
          b <- sum((1-s1)*s2)
          c <- sum(s1*(1-s2))
          d <- sum((1-s1)*(1-s2))
          if(a>=3) {
            txt <- TxT(a,b,c,d)
            p.value <- txt$p.value
            or <- txt$odds.ratio
            if(p.value<0.05 && or>4) {
              coormat[i,j] <- or-1
              coormat[j,i] <- or-1
            }
          }
        }
      }
    }
    for(i in 1:nsyn) coormat[i,i] <- 10
    coormat[coormat>10] <- 10
    COORMAT <<- coormat
  }
  if(to.file) {
    file <- paste("plots/syndrome_coor_hm_global.pdf",sep="")
    pdf(file=file,width=8,height=10,pointsize=12,bg="white",paper="letter",pagecentre=T)
  }
  coormat <- COORMAT
  for(i in 1:dim(coormat)[1]) coormat[i,i] <- 0
  rs <- rowSums(coormat)
  coormat <- coormat[rs>0,rs>0]
  for(i in 1:dim(coormat)[1]) coormat[i,i] <- 10
  par(mfrow=c(1,1),mar=c(5,4,4,2))
  result <- heatmap(coormat,margins=c(8,8),scale="none",symm=T,
                    xlab="",ylab="",cexRow=0.25,cexCol=0.25,col=brewer.pal(9,"Reds"),
                    hclustfun=function(x) hclust(d=dist(x),method="ward.D"),keep.dendro=T,verbose=F,na.rm=F)
  
  if(to.file) dev.off()	
}
#--------------------------------------------------------------------------------------
#
# Do the cancer QC
#
#--------------------------------------------------------------------------------------
cancer.QC <- function() {
  cat("==========================================================================\n")
  cat("cancer.QC\n")
  cat("==========================================================================\n")
  infile <- paste("output/syndrome_lelmat_",STUDY_TYPE,".xlsx",sep="")
  syndrome.mat <- read.xlsx(infile)
  rownames(syndrome.mat) <- syndrome.mat[,"CODE"]
  
  infile <- "QC/ToxRefDB_Phase_1_CancerEndpoints.xlsx"
  toxrefp1.mat <- read.xlsx(infile)
  rownames(toxrefp1.mat) <- toxrefp1.mat[,"CODE"]
  
  syndrome.mat <- syndrome.mat[is.element(syndrome.mat[,"CODE"],toxrefp1.mat[,"CODE"]),]
  toxrefp1.mat <- toxrefp1.mat[is.element(toxrefp1.mat[,"CODE"],syndrome.mat[,"CODE"]),]
  toxrefp1.mat <- toxrefp1.mat[syndrome.mat[,"CODE"],]
  
  tref.list <- c("CHR_Rat_Kidney_2_PreneoplasticLesion",
                 "CHR_Rat_Liver_2_PreneoplasticLesion",
                 "CHR_Rat_Liver_3_NeoplasticLesion",
                 "CHR_Rat_Testes_2_PreneoplasticLesion",
                 "CHR_Rat_Testes_3_NeoplasticLesion",
                 "CHR_Rat_ThyroidGland_2_PreneoplasticLesion",
                 "CHR_Rat_ThyroidGland_3_NeoplasticLesion")
  
  synd.list <- c("Kidney",
                 "Liver",	
                 "Liver.neoplasia",
                 "Testes.neoplasia",
                 "Testes.neoplasia",
                 "Thyroid.Gland",
                 "Thyroid.Gland")
  
  outfile <- paste("QC/cancerQC.txt",sep="")
  txt <- TxT(1,2,3,4)
  cat(paste("Syndrome\tCancerClass\t",txt$title,"\n",sep=""),file=outfile,append=F)
  
  n <- length(tref.list)
  for(i in 1:n) {
    trefi <- tref.list[i]
    syndi <- synd.list[i]
    
    trefv <- as.numeric(toxrefp1.mat[,trefi])
    syndv <- syndrome.mat[,syndi]
    trefv <- trefv[!is.na(syndv)]
    syndv <- syndv[!is.na(syndv)]
    syndv <- syndv[!is.na(trefv)]
    trefv <- trefv[!is.na(trefv)]
    
    a <- sum(trefv*syndv)
    b <- sum((1-trefv)*syndv)
    c <- sum(trefv*(1-syndv))
    d <- sum((1-trefv)*(1-syndv))
    txt <- TxT(a,b,c,d)
    s <- paste(trefi,"\t",syndi,"\t",txt$sval,"\n",sep="")
    cat(s,file=outfile,append=T)
  }
  browser()
  
}
########################################################################################
########################################################################################
########################################################################################
########################################################################################
########################################################################################
########################################################################################
########################################################################################
########################################################################################
#--------------------------------------------------------------------------------------
#
# build the summary table 
#
#--------------------------------------------------------------------------------------
syndrome.summary <- function() {
  infile <- "syndromes/syndromes_min_CHR.xlsx"
  syndromes.min.CHR <- read.xlsx(infile)
  rownames(syndromes.min.CHR) <- syndromes.min.CHR[,"Syndrome"]
  syndromes.CHR <- syndromes.min.CHR[,"Syndrome"]
  
  infile <- "syndromes/syndromes_min_SUB.xlsx"
  syndromes.min.SUB <- read.xlsx(infile)
  rownames(syndromes.min.SUB) <- syndromes.min.SUB[,"Syndrome"]
  syndromes.SUB <- syndromes.min.SUB[,"Syndrome"]
  
  infile <- "syndromes/syndromes_min_DEV.xlsx"
  syndromes.min.DEV <- read.xlsx(infile)
  rownames(syndromes.min.DEV) <- syndromes.min.DEV[,"Syndrome"]
  syndromes.DEV <- syndromes.min.DEV[,"Syndrome"]
  
  infile <- "syndromes/syndromes_min_MGR.xlsx"
  syndromes.min.MGR <- read.xlsx(infile)
  rownames(syndromes.min.MGR) <- syndromes.min.MGR[,"Syndrome"]
  syndromes.MGR <- syndromes.min.MGR[,"Syndrome"]
  
  slist <- c(syndromes.CHR,syndromes.SUB,syndromes.DEV,syndromes.CHR)
  n <- length(slist)
  for(i in 1:n) {
    slist[i] <- str_replace_all(slist[i],"CHR_","")
    slist[i] <- str_replace_all(slist[i],"SUB_","")
    slist[i] <- str_replace_all(slist[i],"DEV_","")
    slist[i] <- str_replace_all(slist[i],"MGR_","")
  }
  slist.unique <- sort(unique(slist))
  nunique <- length(slist.unique)
  name.list <- c("Syndrome","CHR.effects","SUB.effects","DEV.effects","MGR.effects","CHR.chems","SUB.chems","DEV.chems","MGR.chems")
  stable <- as.data.frame(matrix(nrow=nunique,ncol=length(name.list)),stringsAsFactors=F)
  names(stable) <- name.list
  stable[] <- ""
  stable[,"Syndrome"] <- slist.unique
  st.list <- c("CHR","SUB","DEV","MGR")
  for(i in 1:length(st.list)) {
    study <- st.list[i]
    infile <- paste("syndromes/syndromes_",study,".xlsx",sep="")
    synmat <- read.xlsx(infile)
    infile <- paste("output/syndrome_lelmat_",study,".xlsx",sep="")
    lelmat <- read.xlsx(infile)
    for(j in 1:nunique) {
      syndrome <- paste(study,"_",slist.unique[j],sep="")	
      if(is.element(syndrome,colnames(lelmat))) {
        temp <- lelmat[,syndrome]
        temp[is.na(temp)] <- 0
        temp[temp>0] <- 1
        npos <- sum(temp)
        ntot <- length(temp)
        colname <- paste(study,".chems",sep="")
        stable[j,colname] <- paste(npos," (",format(100*npos/ntot,digits=1),"%)",sep="")
        temp1 <- synmat[is.element(synmat[,"Syndrome1"],syndrome),]
        temp2 <- synmat[is.element(synmat[,"Syndrome2"],syndrome),]
        temp3 <- synmat[is.element(synmat[,"Syndrome3"],syndrome),]
        temp4 <- synmat[is.element(synmat[,"Syndrome4"],syndrome),]
        temp5 <- synmat[is.element(synmat[,"Syndrome5"],syndrome),]
        nt <- dim(temp1)[1]+dim(temp2)[1]+dim(temp3)[1]+dim(temp4)[1]+dim(temp5)[1]
        colname <- paste(study,".effects",sep="")
        stable[j,colname] <- nt
      }
      
    }
  }
  outfile <- paste("syndromes/syndrome_summary.xlsx",sep="")
  write.xlsx(stable,file=outfile, row.names=F)
}
#--------------------------------------------------------------------------------------
#
# Compare frequencies of hits above and below mortality 
#
#--------------------------------------------------------------------------------------
mortality.comp <- function(to.file=F) {
  cat("==========================================================================\n")
  cat("mortality.comp\n")
  cat("==========================================================================\n")
  
  species <- "rat"
  study_type <- "CHR"
  infile <- paste("syndromes/syndromes_",species,"_",study_type,".xlsx",sep="")
  syndromes <- read.xlsx(infile)
  syndromes <- syndromes[syndromes[,"useme"]==1,]
  
  slist <- c(syndromes[,"syndrome1"],syndromes[,"syndrome2"],syndromes[,"syndrome3"],syndromes[,"syndrome4"],syndromes[,"syndrome5"])
  syndromes.CHR <- sort(unique(slist))
  nsyndrome.CHR <- length(syndromes.CHR)
  
  infile <- "output/syndrome_lelmat_rat_CHR.xlsx"; syn.hit.CHR <- read.xlsx(infile); rownames(syn.hit.CHR) <- syn.hit.CHR[,"CODE"]
  infile <- "output/syndrome_lelmat_min_rat_CHR.xlsx"; syn.min.CHR <- read.xlsx(infile); rownames(syn.min.CHR) <- syn.min.CHR[,1]
  #infile <- "output/syndrome_lelmat_max_CHR.xlsx"; syn.max.CHR <- read.xlsx(infile); rownames(syn.max.CHR) <- syn.max.CHR[,1]
  #infile <- "output/syndrome_lelmat_pos_CHR.xlsx"; syn.pos.CHR <- read.xlsx(infile); rownames(syn.pos.CHR) <- syn.pos.CHR[,1]
  #infile <- "output/syndrome_lelmat_tot_CHR.xlsx"; syn.tot.CHR <- read.xlsx(infile); rownames(syn.tot.CHR) <- syn.tot.CHR[,1]
  
  
  code.list <- syn.hit.CHR[,"CODE"]
  nsyn <- length(syndromes.CHR)
  mort.mat <- as.data.frame(matrix(nrow=nsyn,ncol=3),stringsAsFactors=F)
  rownames(mort.mat) <- syndromes.CHR
  mort.mat[,1] <- syndromes.CHR
  names(mort.mat) <- c("Syndrome","N.LT.Mortality","N.GE.Mortality")
  
  #mort <- syn.min.CHR[,"CHR_BodyWeight_Decr"]
  mort <- syn.min.CHR[,"Mortality"]
  mort.mask <- mort
  mort.mask[!is.na(mort.mask)] <- 1
  mort.mask[is.na(mort.mask)] <- 0
  options(warn=1)
  for(i in 1:nsyn) {
    syndrome <- syndromes.CHR[i]
    #cat(syndrome,"\n"); flush.console()
    syn.min <- syn.min.CHR[,syndrome]
    syn.mask <- syn.min
    syn.mask[!is.na(syn.mask)] <- 1
    syn.mask[is.na(syn.mask)] <- 0
    n.syn.nomort <- sum(syn.mask*(1-mort.mask))
    both.mask <- mort.mask*syn.mask
    mort.both <- mort[both.mask==1]
    syn.both <- syn.min[both.mask==1]
    nboth <- sum(both.mask)
    nlt <- n.syn.nomort
    nge <- 0
    if(nboth>0) {
      for(j in 1:nboth) {
        if(syn.both[j]<mort.both[j]) nlt <- nlt+1
        else nge <- nge+1
      }
    }
    cat(syndrome,":",nge,":",nlt,"\n")
    mort.mat[syndrome,"N.LT.Mortality"] <- nlt
    mort.mat[syndrome,"N.GE.Mortality"] <- nge
    #browser()
  }	
  x <- mort.mat[,"N.GE.Mortality"]
  y <- mort.mat[,"N.LT.Mortality"]
  xmax <- 55
  if(to.file) {
    file <- paste("plots/CHR_mortality_comp.pdf",sep="")
    pdf(file=file,width=7,height=7,pointsize=12,bg="white",paper="letter",pagecentre=T)
  }
  plot(y~x,xlim=c(0,xmax),ylim=c(0,xmax),xlab="Syndrome LEL>= Mortality LEL",ylab="Syndrome LEL < Mortality LEL",cex.lab=1.5,cex.axis=1.5)
  lines(c(0,xmax),c(0,xmax))
  lines(c(10,xmax+10),c(0,xmax))
  lines(c(0,xmax),c(10,xmax+10))
  points(35,10,pch=21,bg="blue",cex=1.5)
  points(35,7,pch=21,bg="red",cex=1.5)
  points(35,4,pch=21,bg="gray",cex=1.5)
  text(35,10,"Preneoplasia",pos=4,cex=0.75)
  text(35,7,"Neoplasia",pos=4,cex=0.75)
  text(35,4,"Other",pos=4,cex=0.75)
  for(i in 1:nsyn) {
    syndrome <- syndromes.CHR[i]
    slabel <- str_replace_all(syndrome,"CHR_","")
    slabel <- str_replace_all(slabel,"_Neoplasia","")
    slabel <- str_replace_all(slabel,"_Preneoplasia","")
    slabel <- str_replace_all(slabel,"_Decr","")
    slabel <- str_replace_all(slabel,"_Incr","")
    xp <- x[i]
    yp <- y[i]
    if(syndrome=="CHR_Testes_Preneoplasia") {yp <- yp+1.5;xp <- xp-1}
    if(syndrome=="CHR_AdrenalGland_Preneoplasia") {yp <- yp-1.5;xp <- xp-1}
    if(slabel=="Uterus") {yp <- yp+1.5;xp <- xp-1}
    if(length(grep("_Neoplasia",syndrome,perl=T))==1) points(x[i],y[i],pch=21,bg="red",cex=1.5)
    else if(length(grep("_Preneoplasia",syndrome,perl=T))==1) points(x[i],y[i],pch=21,bg="blue",cex=1.5)
    else points(x[i],y[i],pch=21,bg="gray",cex=1.5)
  }
  if(to.file) dev.off()
  else browser()
}
#--------------------------------------------------------------------------------------
#
# Analyze hit rates as a function of mortality 
#
#--------------------------------------------------------------------------------------
mortality.hitrates <- function(to.file=F) {
  cat("==========================================================================\n")
  cat("mortality.hitrates\n")
  cat("==========================================================================\n")
  
  species <- "rat"
  study_type <- "CHR"
  infile <- paste("syndromes/syndromes_",species,"_",study_type,".xlsx",sep="")
  syndromes <- read.xlsx(infile)
  syndromes <- syndromes[syndromes[,"useme"]==1,]
  
  slist <- c(syndromes[,"syndrome1"],syndromes[,"syndrome2"],syndromes[,"syndrome3"],syndromes[,"syndrome4"],syndromes[,"syndrome5"])
  syndromes.CHR <- sort(unique(slist))
  nsyndrome.CHR <- length(syndromes.CHR)
  
  infile <- "output/syndrome_lelmat_rat_CHR.xlsx"; syn.hit.CHR <- read.xlsx(infile); rownames(syn.hit.CHR) <- syn.hit.CHR[,"CODE"]
  infile <- "output/syndrome_lelmat_min_rat_CHR.xlsx"; syn.min.CHR <- read.xlsx(infile); rownames(syn.min.CHR) <- syn.min.CHR[,1]
  #infile <- "output/syndrome_lelmat_max_CHR.xlsx"; syn.max.CHR <- read.xlsx(infile); rownames(syn.max.CHR) <- syn.max.CHR[,1]
  #infile <- "output/syndrome_lelmat_pos_CHR.xlsx"; syn.pos.CHR <- read.xlsx(infile); rownames(syn.pos.CHR) <- syn.pos.CHR[,1]
  #infile <- "output/syndrome_lelmat_tot_CHR.xlsx"; syn.tot.CHR <- read.xlsx(infile); rownames(syn.tot.CHR) <- syn.tot.CHR[,1]
  
  infile <- "ToxRefDB/lel_mat_rat_CHR.xlsx"; eLEL.CHR <- read.xlsx(infile); rownames(eLEL.CHR) <- eLEL.CHR[,1]; eLEL.CHR <- eLEL.CHR[,4:dim(eLEL.CHR)[2]]
  
  slist.CHR <- sort(unique(syndromes.CHR)); nsyndrome.CHR <- length(slist.CHR)
  
  code.list <- syn.hit.CHR[,"CODE"]
  nchem <- length(code.list)
  nsyn <- length(syndromes.CHR)
  hit.syn <- vector(length=nchem,mode="integer")
  hit.effect <- vector(length=nchem,mode="integer")
  mort.class <- vector(length=nchem,mode="integer")
  hit.syn[] <- 0
  hit.effect[] <- 0
  mort.class[] <- 0
  for(i in 1:nchem) {
    code <- code.list[i]
    temp <- syn.hit.CHR[code,slist.CHR]
    hit.syn[i] <- sum(temp)
    temp <- eLEL.CHR[code,]
    temp[is.na(eLEL.CHR[code,])] <- 0
    temp[!is.na(eLEL.CHR[code,])] <- 1
    hit.effect[i] <- sum(temp)
    if(syn.hit.CHR[code,"Mortality"]==1) mort.class[i] <- 1
  }
  if(to.file) {
    file <- paste("plots/CHR_mortality_hitrate.pdf",sep="")
    pdf(file=file,width=7,height=10,pointsize=12,bg="white",paper="letter",pagecentre=T)
  }
  par(mfrow=c(2,1),mar=c(4,4,2,2))
  
  res <- t.test(hit.effect[mort.class==1],hit.effect[mort.class==0],alternative="greater")
  boxplot(hit.effect~mort.class,names=c("No Mortality","Yes Mortality"),main=paste("Effects\np-value: ",format(res$p.value,digits=2),sep=""),ylab="Number of Active Effects")
  n1 <- sum(mort.class)
  n0 <- length(mort.class)-n1
  text(1,50,n0)
  text(2,50,n1)
  
  res <- t.test(hit.syn[mort.class==1],hit.syn[mort.class==0],alternative="greater")
  boxplot(hit.syn~mort.class,names=c("No Mortality","Yes Mortality"),main=paste("Syndromes\np-value: ",format(res$p.value,digits=2),sep=""),ylab="Number of Active Syndromes")
  n1 <- sum(mort.class)
  n0 <- length(mort.class)-n1
  text(1,25,n0)
  text(2,25,n1)
  
  if(to.file) dev.off()
  else browser()
}
#--------------------------------------------------------------------------------------
#
# Analyze hit rates as a function of mortality 
#
#--------------------------------------------------------------------------------------
mortality.comp2 <- function(to.file=F) {
  cat("==========================================================================\n")
  cat("mortality.comp2\n")
  cat("==========================================================================\n")
  
  species <- "rat"
  study_type <- "CHR"
  infile <- paste("syndromes/syndromes_",species,"_",study_type,".xlsx",sep="")
  syndromes <- read.xlsx(infile)
  syndromes <- syndromes[syndromes[,"useme"]==1,]
  
  slist <- c(syndromes[,"syndrome1"],syndromes[,"syndrome2"],syndromes[,"syndrome3"],syndromes[,"syndrome4"],syndromes[,"syndrome5"])
  syndromes.CHR <- sort(unique(slist))
  nsyndrome.CHR <- length(syndromes.CHR)
  
  infile <- "output/syndrome_lelmat_rat_CHR.xlsx"; syn.hit.CHR <- read.xlsx(infile); rownames(syn.hit.CHR) <- syn.hit.CHR[,"CODE"]
  infile <- "output/syndrome_lelmat_min_rat_CHR.xlsx"; syn.min.CHR <- read.xlsx(infile); rownames(syn.min.CHR) <- syn.min.CHR[,1]
  
  slist.CHR <- sort(unique(syndromes.CHR)); nsyndrome.CHR <- length(slist.CHR)
  nsyn <- length(syndromes.CHR)
  syn.frac <- vector(length=nsyn,mode="integer")
  syn.class <- vector(length=nsyn,mode="integer")
  syn.frac[] <- 0
  syn.class[] <- 0
  syn.tot <- syn.frac
  mort <- syn.min.CHR[,"Mortality"]
  mort.mask <- mort
  mort.mask[!is.na(mort.mask)] <- 1
  mort.mask[is.na(mort.mask)] <- 0
  options(warn=1)
  
  for(i in 1:nsyn) {
    syndrome <- slist.CHR[i]
    if(syndrome!="Mortality") {
      if(length(grep("_Neoplasia",syndrome,perl=T))==1) syn.class[i] <- 2
      else if(length(grep("_Preneoplasia",syndrome,perl=T))==1) syn.class[i] <- 1
      
      syn.min <- syn.min.CHR[,syndrome]
      syn.mask <- syn.min
      syn.mask[!is.na(syn.mask)] <- 1
      syn.mask[is.na(syn.mask)] <- 0
      n.syn.nomort <- sum(syn.mask*(1-mort.mask))
      both.mask <- mort.mask*syn.mask
      mort.both <- mort[both.mask==1]
      syn.both <- syn.min[both.mask==1]
      nboth <- sum(both.mask)
      nlt <- n.syn.nomort
      nge <- 0
      if(nboth>0) {
        for(j in 1:nboth) {
          if(syn.both[j]<mort.both[j]) nlt <- nlt+1
          else nge <- nge+1
        }
      }
      ntot <- nlt+nge
      syn.tot[i] <- ntot
      if(ntot==0) ntot <- 0
      syn.frac[i] <- nge/ntot
    }
  }
  output <- as.data.frame(cbind(slist.CHR,syn.tot,syn.frac),stringsAsFactors=F)
  names(output) <- c("Syndrome","TotalHits","HitsGEMortality")
  output[,"TotalHits"] <- as.numeric(output[,"TotalHits"])
  output[,"HitsGEMortality"] <- as.numeric(output[,"HitsGEMortality"])
  outfile <- paste("syndromes/syndrome_mortality_summary.xlsx",sep="")
  write.xlsx(output,file=outfile, row.names=F)
  
  if(to.file) {
    file <- paste("plots/CHR_mortality_comp2.pdf",sep="")
    pdf(file=file,width=7,height=7,pointsize=12,bg="white",paper="letter",pagecentre=T)
  }
  par(mfrow=c(1,1),mar=c(4,4,2,2))
  
  #res <- t.test(hit.effect[mort.class==1],hit.effect[mort.class==0],alternative="greater")
  boxplot(syn.frac~syn.class,names=c("Other","Preneoplasia","Neoplasia"),main="fraction(chemicals active, dose>=Mortality)")
  n0 <- length(syn.class[syn.class==0])
  n1 <- length(syn.class[syn.class==1])
  n2 <- length(syn.class[syn.class==2])
  text(1,0.9,n0)
  text(2,0.9,n1)
  text(3,0.9,n2)
  
  if(to.file) dev.off()
  else browser()
}
