#--------------------------------------------------------------------------------------
#
# ToxRefDB_clean.R - code to prepare ToxRefDB data for use with Syndromes
#
# January 2016
# Richard Judson
#
# US EPA
# Questions, comments to: judson.richard@epa.gov, 919-541-3085
#
#--------------------------------------------------------------------------------------


#--------------------------------------------------------------------------------------
#
# Prepare the ToxRefDB file
#
#--------------------------------------------------------------------------------------
prep.ToxRefDB <- function(do.read=T,species,study_type) {
  print.current.function()
  if(do.read) {
    infile="../ToxRefDB/toxrefdb_study_tg_effect_endpoint_AUG2014_FOR_PUBLIC_RELEASE.xlsx"
    TOXREFDB.ALL <<- read.xlsx(infile)
  }
  print(dim(TOXREFDB.ALL))
  mask <- TOXREFDB.ALL[,"usability_desc"]
  mask[mask=="Acceptable Guideline (post-1998)"] <- 1
  mask[mask=="Acceptable Guideline (pre-1998)"] <- 1
  mask[mask=="Acceptable Non-guideline"] <- 1
  mask[mask=="Deficient Evaluation"] <- 0
  mask[mask=="Unacceptable"] <- 0
  mask[mask=="Unassigned"] <- 0
  temp <- TOXREFDB.ALL[mask==1,]
  print(dim(temp))
  
  mask1 <- vector(length=dim(temp)[1],mode="integer")
  mask1[] <- 0
  mask2 <- mask1
  
  mask1[is.element(temp[,"species"],species)] <- 1
  mask2[is.element(temp[,"study_type"],study_type)] <- 1
  mask <- mask1 * mask2
  
  temp <- temp[mask==1,]
  print(dim(temp))
  
  casrn.list <- sort(uniquify(temp[,"chemical_casrn"]))
  nchem <- length(unique(casrn.list))
  cat("Total CASRN: ",nchem,"\n")
  fname <- paste("../ToxRefDB/toxrefdb_acceptable_",species,"_",study_type,".xlsx",sep="")
  write.xlsx(temp,file=fname, row.names=F)
}
#--------------------------------------------------------------------------------------
#
# Build the study table
#
#--------------------------------------------------------------------------------------
build.study.table <- function(species,study_type) {
  print.current.function()
  
  filename <- "../ToxRefDB/toxrefdb_endpoint_2016_01_26.xlsx"
  aggregates <- read.xlsx(filename)
  rosetta <- aggregates[,c("endpoint_raw","endpoint_auto","useme")]
  rosetta <- rosetta[rosetta[,"useme"]>0,]
  rosetta <- cbind(rosetta,rosetta[,"endpoint_auto"])
  names(rosetta)[4] <- "endpoint_final"
  rosetta[,4] <- as.character(rosetta[,4])
  filename <- "../anemia_files/anemia_endpoints_2016_01_28.xlsx"
  map <- read.xlsx(filename)
  rownames(map) <- map[,"endpoint_auto"]
  for(i in 1:dim(rosetta)[1]) {
    endpoint <- rosetta[i,"endpoint_auto"]
    #print(endpoint)
    if(is.element(endpoint,map[,"endpoint_auto"])) {
      endpoint.replace <- map[is.element(map[,"endpoint_auto"],endpoint),"endpoint_final"]
      rosetta[i,"endpoint_final"] <- endpoint.replace
    }
    else rosetta[i,"endpoint_final"] <- NA
  }
  rosetta <- rosetta[!is.na(rosetta[,"endpoint_final"]),]

  filename <- paste("../ToxRefDB/toxrefdb_acceptable_",species,"_",study_type,".xlsx",sep="")
  temp <- read.xlsx(filename)
  
  temp <- cbind(temp,temp[,1])
  names(temp)[1] <- "CODE"
  temp <- cbind(temp,temp[,1])
  names(temp)[dim(temp)[2]] <- "endpoint_raw"
  temp <- cbind(temp,temp[,1])
  names(temp)[dim(temp)[2]] <- "endpoint_auto"
  temp <- cbind(temp,temp[,1])
  names(temp)[dim(temp)[2]] <- "endpoint_final"
  temp <- cbind(temp,temp[,1])
  names(temp)[dim(temp)[2]] <- "useme"
  temp[,"endpoint_raw"] <- ""
  temp[,"endpoint_auto"] <- ""
  temp[,"endpoint_final"] <- ""
  temp[,"useme"] <- numeric(0)
  
  code.list <- paste("C",temp[,"chemical_casrn"],sep="")
  code.list <- str_replace_all(code.list,"-","")
  temp[,"CODE"] <- code.list
  all.data <- temp
  
  nrow <- dim(temp)[1]
  for(i in 1:nrow) {
    effect_type      	<- all.data[i,"effect_type"]
    study_type	    	<- all.data[i,"study_type"]
    effect_target		<- all.data[i,"effect_target"]
    effect_desc			<- all.data[i,"effect_desc"]
    direction			<- all.data[i,"direction"]
    target_site			<- all.data[i,"target_site"]
    focal_diffuse		<- all.data[i,"focal_diffuse"]
    effect_category		<- all.data[i,"effect_category"]
    endpoint_category	<- all.data[i,"endpoint_category"]
    endpoint_type		<- all.data[i,"endpoint_type"]
    endpoint_system		<- all.data[i,"endpoint_system"]
    endpoint_target		<- all.data[i,"endpoint_target"]
    endpoint_lifestage	<- all.data[i,"endpoint_lifestage"]
    
    if(is.na(effect_type)) effect_type <- ""
    if(is.na(effect_target)) effect_target <- ""		
    if(is.na(effect_desc)) effect_desc <- ""
    if(is.na(direction)) direction <- ""
    if(is.na(target_site)) target_site <- ""
    if(is.na(focal_diffuse)) focal_diffuse <- ""		
    if(is.na(effect_category)) effect_category <- ""		
    if(is.na(endpoint_category)) endpoint_category <- ""
    if(is.na(endpoint_type)) endpoint_type <- ""		
    if(is.na(endpoint_system)) endpoint_system <- ""		
    if(is.na(endpoint_target)) endpoint_target <- ""		
    if(is.na(endpoint_lifestage)) endpoint_lifestage <- ""
    effect_target <- str_replace_all(effect_target," ","")
    endpoint.raw <- paste(		
      effect_type,"_",	    	
      study_type,"_",	
      effect_target,"_",		
      effect_desc,"_",			
      direction,"_",			
      target_site,"_",			
      focal_diffuse,"_",		
      effect_category,"_",		
      endpoint_category,"_",
      endpoint_type,"_",		
      endpoint_system,"_",		
      endpoint_target,"_",		
      endpoint_lifestage,sep="")
    
    all.data[i,"endpoint_raw"] <- endpoint.raw
    
    temp <- rosetta[is.element(rosetta[,"endpoint_raw"],endpoint.raw),]
    if(dim(temp)[1]>0) {
      endpoint.final <- all.data[i,"endpoint_auto"] <- temp[1,"endpoint_auto"]
      endpoint.final <- all.data[i,"endpoint_final"] <- temp[1,"endpoint_final"]
      all.data[i,"useme"] <- temp[1,"useme"]
    }
  }
  mask <- all.data[,"useme"]
  mask[is.na(mask)] <- 0
  all.data <- all.data[mask>0,]	
  outfile <- paste("../ToxRefDB/all_data_",species,"_",study_type,".xlsx",sep="")
  write.xlsx(all.data,file=outfile, row.names=F)
}
#--------------------------------------------------------------------------------------
#
# build the endpoint master table
#
#--------------------------------------------------------------------------------------
build.endpoint.master <- function() {
  print.current.function()
  species.list <- c("rat","rat","mouse","dog")
  type.list <- c("CHR","SUB","CHR","CHR")
  
  nstudy <- length(type.list)
  
  name.list <- c(
    "study_type",
    "species",	
    "effect_type",
    "effect_target",
    "effect_desc",
    "direction",
    "target_site",
    "focal_diffuse",
    "effect_category",
    "endpoint_category",
    "endpoint_type",
    "endpoint_system",
    "endpoint_target",
    "endpoint_lifestage"
  )
  
  all.result <- NULL
  
  for(i in 1:nstudy) {
    species <- species.list[i]
    study_type <- type.list[i]
    cat(species,":",study_type,"\n"); flush.console()
    
    filename <- paste("../ToxRefDB/toxrefdb_acceptable_",species,"_",study_type,".xlsx",sep="")
    temp <- read.xlsx(filename)
    result <- as.data.frame(matrix(nrow=dim(temp)[1],ncol=length(name.list)),stringsAsFactors=F)
    names(result) <- name.list
    mask <- vector(mode="integer",length=dim(temp)[1])
    mask[] <- 1
    mask[is.element(temp[,"effect_type"],"Organ Weight")] <- 0
    mask[is.element(temp[,"effect_type"],"Pathology (Gross)")] <- 0
    mask[is.element(temp[,"effect_type"],"In-Life Observations")] <- 0
    mask[is.element(temp[,"endpoint_target"],"Mortality")] <- 1
    mask[is.element(temp[,"endpoint_target"],"BodyWeight")] <- 1
    temp <- temp[mask==1,]
    
    result <- temp[,name.list]
    result <- unique(result)
    print(dim(result))
    all.result <- rbind(all.result,result)
  }
  filename <- paste("../ToxRefDB/toxrefdb_endpoint_master.xlsx",sep="")
  write.xlsx(all.result,file=filename,row.names=F)
}
#--------------------------------------------------------------------------------------
#
# build the clean endpoints
#
#--------------------------------------------------------------------------------------
build.endpoint.clean <- function() {
  print.current.function()
  
  filename <- paste("../ToxRefDB/toxrefdb_endpoint_master.xlsx",sep="")
  endpoint.table <- read.xlsx(filename)
  nendpoint <- dim(endpoint.table)[1]
  eclean <- vector(length=nendpoint,mode="character")
  eclean[] <- ""
  endpoint.raw <- eclean
  
  organ.list <- c(
    "AdrenalGland","Artery","Blood","Bloodvessel","Bone","BoneMarrow","Brain","ClitoralGland","CoagulatingGland","Ear","Epididymis",
    "Esophagus","Eye","Gallbladder","HarderianGland","Heart","IntestineLarge","IntestineSmall","Kidney","LacrimalGland",
    "Larynx","Liver","Lung","LymphNode","MammaryGland","Nerve","Nose","OralMucosa","Ovary","Oviduct",
    "Pancreas","ParathyroidGland","Penis","Peritoneum","Pharynx","PituitaryGland","PreputialGland","Prostate","ReproductiveOutcome","Salivaryglands",
    "SeminalVesicle","SkeletalMuscle","Skin","Spinalcord","Spleen","Stomach","Testis","Thymus","ThyroidGland","TissueNOS",
    "Tongue","Tooth","Trachea","UncertainPrimarySite","Ureter","Urethra","UrinaryBladder","Uterus","Vagina","ZymbalsGland")
  
  
  for(i in 1:nendpoint) {
    bw <- F
    cholinesterase <- F
    dev <- F
    devdelay <- F
    devlandmark <- F
    devmal <- F
    devneurotox <- F
    devnonrepro <- F
    devrepro <- F
    fetus <- F
    mort <- F
    neoplastic <- F
    neurotox <- F
    offspring <- F
    prolif <- F
    repro <- F
    repromating <- F
    reprononmating <- F
    reprooutcome <- F
    reproperf <- F
    sexdev <- F
    urinalysis <- F	
    
    effect_type	    	<- endpoint.table[i,"effect_type"]
    study_type	    	<- endpoint.table[i,"study_type"]
    effect_target		<- endpoint.table[i,"effect_target"]
    effect_desc			<- endpoint.table[i,"effect_desc"]
    direction			<- endpoint.table[i,"direction"]
    target_site			<- endpoint.table[i,"target_site"]
    focal_diffuse		<- endpoint.table[i,"focal_diffuse"]
    effect_category		<- endpoint.table[i,"effect_category"]
    endpoint_category	<- endpoint.table[i,"endpoint_category"]
    endpoint_type		<- endpoint.table[i,"endpoint_type"]
    endpoint_system		<- endpoint.table[i,"endpoint_system"]
    endpoint_target		<- endpoint.table[i,"endpoint_target"]
    endpoint_lifestage	<- endpoint.table[i,"endpoint_lifestage"]
    
    if(is.na(effect_type)) effect_type <- ""
    if(is.na(effect_target)) effect_target <- ""		
    if(is.na(effect_desc)) effect_desc <- ""
    if(is.na(direction)) direction <- ""
    if(is.na(target_site)) target_site <- ""
    if(is.na(focal_diffuse)) focal_diffuse <- ""		
    if(is.na(effect_category)) effect_category <- ""		
    if(is.na(endpoint_category)) endpoint_category <- ""
    if(is.na(endpoint_type)) endpoint_type <- ""		
    if(is.na(endpoint_system)) endpoint_system <- ""		
    if(is.na(endpoint_target)) endpoint_target <- ""		
    if(is.na(endpoint_lifestage)) endpoint_lifestage <- ""
    effect_target <- str_replace_all(effect_target," ","")
    
    endpoint.raw[i] <- paste(		
      effect_type,"_",	    	
      study_type,"_",	
      effect_target,"_",		
      effect_desc,"_",			
      direction,"_",			
      target_site,"_",			
      focal_diffuse,"_",		
      effect_category,"_",		
      endpoint_category,"_",
      endpoint_type,"_",		
      endpoint_system,"_",		
      endpoint_target,"_",		
      endpoint_lifestage,sep="")
    
    description <- ""
    if(nchar(target_site)>0) {
      description <- target_site
      if(nchar(effect_desc)>0) description <- paste(description,"_",effect_desc,sep="")
    }
    else description <- effect_desc
    
    if(effect_category=="Developmental") dev <- T
    if(effect_type=="Developmental") dev <- T
    
    if(effect_category=="Offspring") offspring <- T
    
    if(effect_category=="Reproductive") repro <- T
    if(endpoint_system=="ReproductiveFemale") repro <- T
    if(endpoint_system=="ReproductiveMale") repro <- T
    
    if(endpoint_category=="DevelopmentalReproductive") devrepro <- T
    
    if(endpoint_system=="ReproductivePerformance") reproperf <- T
    
    if(endpoint_system=="ReproductiveOutcome") reprooutcome <- T
    
    if(endpoint_system=="SexualDevelopmentalLandmark") sexdev <- T
    
    if(endpoint_target=="CholinesteraseInhibition") cholinesterase <- T
    if(endpoint_type=="CholinesteraseInhibition") cholinesterase <- T
    
    if(endpoint_system=="OffspringSurvivalEarly") {mort <- T; offspring <- T}
    if(endpoint_system=="OffspringSurvivalLate") {mort <- T; offspring <- T}
    
    if(endpoint_system=="DevelopmentalLandmark") devlandmark <- T
    
    if(endpoint_type=="PathologyNeoplastic") neoplastic <- T
    
    if(endpoint_type=="PathologyProliferative") prolif <- T
    
    if(endpoint_type=="DevelopmentalDelay") devdelay <- T
    
    if(endpoint_type=="DevelopmentalMalformation") devmal <- T
    
    if(endpoint_type=="DevelopmentalNeurotoxicity") devneurotox <- T
    
    if(endpoint_type=="Neurotoxicity") neurotox <- T
    
    if(endpoint_type=="ReproductiveNonMating") reprononmating <- T
    
    if(endpoint_type=="ReproductiveMating") repromating <- T
    
    organ <- ""
    if(is.element(endpoint_target,organ.list)) organ <- endpoint_target
    else organ <- effect_target
    
    if(endpoint_target=="Deadfetuses") {mort <- T;fetus <- T}
    else if(endpoint_target=="BodyWeight") {bw <- T}
    else if(endpoint_target=="CholinesteraseInhibition") {cholinesterase <- T}
    else if(endpoint_target=="Eyeopening") {organ <- "Eye"; devdelay <- T}
    else if(endpoint_target=="Intercurrentdeaths") {mort <- T}
    else if(endpoint_target=="JawHyoid") {organ <- "bone"}
    else if(endpoint_target=="Mortality") {mort <- T}
    else if(endpoint_target=="ReproductivePerformance") {reproperf <- T}
    else if(endpoint_target=="TotalLitterLoss") {mort <- T;fetus <- T}
    else if(endpoint_target=="Urinalysis") {urinalysis <- T}
    else if(endpoint_target=="Viabilityindex") {mort <- T;fetus <- T}
    
    if(!is.element(endpoint_target,organ.list)) description <- paste(endpoint_target,"_",description,sep="")
    
    if(endpoint_lifestage=="fetal") fetus <- T	
    if(endpoint_lifestage=="juvenile") offspring <- T	
    
    if(study_type=="CHR" || study_type=="SUB" || endpoint_target=="ClinicalChemistry"|| endpoint_target=="Hematology") {
      dev <- F
      devdelay <- F
      devlandmark <- F
      devmal <- F
      devneurotox <- F
      devnonrepro <- F
      devrepro <- F
      repro <- F
      repromating <- F
      reprononmating <- F
      reprooutcome <- F
      reproperf <- F
    }
    
    
    prefix <- ""
    if(cholinesterase) prefix <- ""
    else if(urinalysis) prefix <- "Urinalysis_"
    else if(bw) prefix <- "BodyWeight_"
    else if(mort) prefix <- "Mortality_"
    else if(devmal) prefix <- "DevMalformation_"
    else if(devdelay) prefix <- "DevDelay_"
    else if(devneurotox) prefix <- "DevNeurotox_"
    #else if(devrepro) prefix <- "DevRepro_"
    else if(devnonrepro) prefix <- "DevNonRepro_"
    else if(devlandmark) prefix <- "DevLandmark_"
    else if(neurotox) prefix <- "Neurotox_"
    #else if(repromating) prefix <- "ReproMating_"
    #else if(reprononmating) prefix <- "ReproNonMating_"
    #else if(reprooutcome) prefix <- "ReproOutcome_"
    #else if(reproperf) prefix <- "ReproPerf_"
    #else if(sexdev) prefix <- "SexDev_"
    else if(dev) prefix <- "DevOther_"
    #else if(repro) prefix <- "ReproOther_"
    
    severity <- ""
    if(prolif) severity <- "Proliferative_"
    else if(neoplastic) severity <- "Neoplastic_"
    
    lifestage <- ""
    if(!devmal && !devdelay) {
      if(offspring) lifestage <- "Offspring_"
      else if(fetus) lifestage <- "Fetus_"
    }
    newdir <- ""
    if(direction=="Increase") newdir <- "Incr"
    else if(direction=="Decrease") newdir <- "Decr"
    else newdir <- direction
    
    if(organ=="bone") organ <- "Bone"
    if(organ=="Bladder") organ <- "UrinaryBladder"
    if(organ=="Aorta") organ <- "Heart_Aorta"
    if(organ=="Aorticarch") organ <- "Heart_Aorta"
    if(organ=="Greatvessels") organ <- "Bloodvessel"
    if(organ=="Limb") organ <- "Bone_Limb"
    if(organ=="Mouth/Jaw") organ <- "Bone_MouthJaw"
    if(organ=="Nasal") organ <- "Nose"
    if(organ=="Paw/Digit") organ <- "Bone_PawDigit"
    if(organ=="Presphenoid") organ <- "Bone_Presphenoid"
    if(organ=="Pulmonaryartery") organ <- "Bloodvessel_PulmonaryArtery"
    if(organ=="Radius") organ <- "Bone_Radius"
    if(organ=="Testes") organ <- "Testis"
    if(organ=="Ulna") organ <- "Bone_Ulna"
    if(organ=="Zygomatic") organ <- "Gonad_Zygomatic"
    if(organ=="Ductusarteriosus") organ <- "Heart_Ductusarteriosus"
    if(organ=="Innominateartery") organ <- "Bloodvessel_Innominateartery"
    if(organ=="Interparietal") organ <- "Bone_Interparietal"
    if(organ=="Subclavianartery") organ <- "Bloodvessel_Subclavianartery"
    if(organ=="Cornea") organ <- "Eye_Cornea"
    #if(organ=="") organ <- 
    #if(organ=="") organ <- 
    #if(organ=="") organ <- 
    #if(organ=="") organ <- 
    #if(organ=="") organ <- 
    organuse <- ""
    if(nchar(organ)>2) organuse <- paste(organ,"_",sep="")
    descuse <- ""
    if(nchar(description)>0) descuse <- paste(description,"_",sep="")
    
    
    if(bw) endpoint <- paste("BodyWeight_",lifestage,newdir,sep="")
    else if(mort) endpoint <- paste("Mortality_",lifestage,newdir,sep="")
    else endpoint <- paste(prefix,organuse,severity,descuse,lifestage,newdir,sep="")
    #endpoint <- paste("[",prefix,"][",organuse,"][",prolifuse,"][",neoplasticuse,"][",descuse,"][",lifestage,"][",direction,"]",sep="")
    endpoint <- str_replace_all(endpoint,fixed(" "),"")
    endpoint <- str_replace_all(endpoint,fixed("0_"),"")
    endpoint <- str_replace_all(endpoint,fixed("ClinicalChemistry"),"")
    endpoint <- str_replace_all(endpoint,fixed("Hematology"),"")
    endpoint <- str_replace_all(endpoint,fixed("[NotInList]"),"")
    
    endpoint <- str_replace_all(endpoint,fixed("DevOther_AdrenalGland"),"DevMalformation_AdrenalGland")
    endpoint <- str_replace_all(endpoint,fixed("DevOther_BloodVessel"),"DevMalformation_BloodVessel")
    endpoint <- str_replace_all(endpoint,fixed("DevOther_Bone"),"DevMalformation_Bone")
    endpoint <- str_replace_all(endpoint,fixed("DevOther_Brain"),"DevMalformation_Brain")
    endpoint <- str_replace_all(endpoint,fixed("DevOther_Diaphragm"),"DevMalformation_Diaphragm")
    endpoint <- str_replace_all(endpoint,fixed("DevOther_Ear"),"DevMalformation_Ear")
    endpoint <- str_replace_all(endpoint,fixed("DevOther_Eye"),"DevMalformation_Eye")
    endpoint <- str_replace_all(endpoint,fixed("DevOther_Epididymis"),"DevMalformation_Epididymis")
    endpoint <- str_replace_all(endpoint,fixed("DevOther_Heart"),"DevMalformation_Heart")
    endpoint <- str_replace_all(endpoint,fixed("DevOther_Gallbladder"),"DevMalformation_Gallbladder")
    endpoint <- str_replace_all(endpoint,fixed("DevOther_Kidney"),"DevMalformation_Kidney")
    endpoint <- str_replace_all(endpoint,fixed("DevOther_Liver"),"DevMalformation_Liver")
    endpoint <- str_replace_all(endpoint,fixed("DevOther_Lung"),"DevMalformation_Lung")
    endpoint <- str_replace_all(endpoint,fixed("DevOther_Nose"),"DevMalformation_Nose")
    endpoint <- str_replace_all(endpoint,fixed("DevOther_Pancreas"),"DevMalformation_Pancreas")
    endpoint <- str_replace_all(endpoint,fixed("DevOther_Penis"),"DevMalformation_Penis")
    endpoint <- str_replace_all(endpoint,fixed("DevOther_Spleen"),"DevMalformation_Spleen")
    endpoint <- str_replace_all(endpoint,fixed("DevOther_Stomach"),"DevMalformation_Stomach")
    endpoint <- str_replace_all(endpoint,fixed("DevOther_Testes"),"DevMalformation_Testis")
    endpoint <- str_replace_all(endpoint,fixed("DevOther_Testis"),"DevMalformation_Testis")
    endpoint <- str_replace_all(endpoint,fixed("DevOther_Thymus"),"DevMalformation_Thymus")
    endpoint <- str_replace_all(endpoint,fixed("DevOther_ThyroidGland"),"DevMalformation_ThyroidGland")
    endpoint <- str_replace_all(endpoint,fixed("DevOther_Trunk"),"DevMalformation_Trunk")
    endpoint <- str_replace_all(endpoint,fixed("DevOther_Ureter"),"DevMalformation_Ureter")
    endpoint <- str_replace_all(endpoint,fixed("DevOther_UrinaryBladder"),"DevMalformation_UrinaryBadder")
    endpoint <- str_replace_all(endpoint,fixed("DevOther_Uterus"),"DevMalformation_Uterus")
    endpoint <- str_replace_all(endpoint,fixed("DevOther_Intestines"),"DevMalformation_Intestines")
    endpoint <- str_replace_all(endpoint,fixed("DevOther_Prostate"),"DevMalformation_Prostate")
    endpoint <- str_replace_all(endpoint,fixed("DevOther_ReproPerf"),"ReproPerf")
    
    endpoint <- str_replace_all(endpoint,fixed("DevOther_SeminalVesicle"),"DevMalformation_SeminalVesicle")
    endpoint <- str_replace_all(endpoint,fixed("DevOther_Spinalcord"),"DevMalformation_Spinalcord")
    endpoint <- str_replace_all(endpoint,fixed("DevOther_Gonad"),"DevMalformation_Gonad")
    endpoint <- str_replace_all(endpoint,fixed("DevOther_Bloodvessel"),"DevMalformation_Bloodvessel")
    #endpoint <- str_replace_all(endpoint,fixed("DevOther_"),"DevMalformation_")
    #endpoint <- str_replace_all(endpoint,fixed("DevOther_"),"DevMalformation_")
    
    
    endpoint <- str_replace_all(endpoint,fixed("ReproductiveOutcome"),"ReproOutcome")
    endpoint <- str_replace_all(endpoint,fixed("ReproductivePerformance"),"ReproPerf")
    endpoint <- str_replace_all(endpoint,fixed("SexualDevelopmentalLandmark_"),"")
    endpoint <- str_replace_all(endpoint,fixed("EstrousCycle_EstrousCycle"),"EstrousCycle")
    endpoint <- str_replace_all(endpoint,fixed("Other_Other_"),"")
    endpoint <- str_replace_all(endpoint,fixed("Urinalysis_Urinalysis_Urinalysis_"),"Urinalysis_")
    endpoint <- str_replace_all(endpoint,fixed("CholinesteraseInhibition_"),"")
    endpoint <- str_replace_all(endpoint,fixed("Incisoreruption_Incisoreruption"),"IncisorEruption")
    endpoint <- str_replace_all(endpoint,fixed("AnogenitalDistance_AnogenitalDistance"),"AnogenitalDistance")
    endpoint <- str_replace_all(endpoint,fixed("PinnaUnfolding_PinnaUnfolding"),"PinnaUnfolding")
    endpoint <- str_replace_all(endpoint,fixed("DevelopmentalLandmark"),"DevLandmark")
    endpoint <- str_replace_all(endpoint,fixed("Eyeopening_Eyeopening"),"EyeOpening")
    endpoint <- str_replace_all(endpoint,fixed("NippleDevelopment_NippleDevelopment"),"NippleDevelopment")
    endpoint <- str_replace_all(endpoint,fixed("Preputialseparation_Preputialseparation"),"PreputialSeparation")
    endpoint <- str_replace_all(endpoint,fixed("Vaginalopening_Vaginalopening"),"VaginalOpening")
    endpoint <- str_replace_all(endpoint,fixed("ReproMating_ReproPerf"),"ReproPerf")
    endpoint <- str_replace_all(endpoint,fixed("DevOther_ReproOutcome"),"ReproOutcome")
    endpoint <- str_replace_all(endpoint,fixed("DevOther_ReproPerf"),"ReproPerf")
    endpoint <- str_replace_all(endpoint,fixed("'"),"")
    endpoint <- str_replace_all(endpoint,fixed("Runts_Runts"),"Runts")
    endpoint <- str_replace_all(endpoint,fixed("Sexratio_Sexratio(%male)"),"SexRatio")
    endpoint <- str_replace_all(endpoint,fixed("Aborted_Aborted"),"Aborted")
    endpoint <- str_replace_all(endpoint,fixed("Birthindex_Birthindex"),"BirthIndex")
    endpoint <- str_replace_all(endpoint,fixed("Fertility_Fertility"),"Fertility")
    endpoint <- str_replace_all(endpoint,fixed("Gestationalinterval_Gestationalinterval"),"GestationalInterval")
    endpoint <- str_replace_all(endpoint,fixed("Implantations_Implantations"),"Implantations")
    endpoint <- str_replace_all(endpoint,fixed("Mating_Mating"),"Mating")
    endpoint <- str_replace_all(endpoint,fixed("Postimplantationloss_Postimplantationloss"),"PostimplantationLoss")
    endpoint <- str_replace_all(endpoint,fixed("Pregnancy_Pregnancy"),"Pregnancy")
    endpoint <- str_replace_all(endpoint,fixed("PregnancyIndex_PregnancyIndex"),"PregnancyIndex")
    endpoint <- str_replace_all(endpoint,fixed("Preimplantationloss_Preimplantationloss"),"PreimplantationLoss")
    endpoint <- str_replace_all(endpoint,fixed("PrematureDelivery_PrematureDelivery"),"PrematureDelivery")
    endpoint <- str_replace_all(endpoint,fixed("ReproPerf_ReproPerf"),"ReproPerf")
    endpoint <- str_replace_all(endpoint,fixed("Resorptions_Resorptions"),"Resorptions")
    endpoint <- str_replace_all(endpoint,fixed("DevDelay_DevLandmark"),"DevDelay")
    endpoint <- str_replace_all(endpoint,fixed("SpermMeasure_Sperm"),"Sperm")
    endpoint <- str_replace_all(endpoint,fixed("Spermmorphology_Sperm"),"Sperm_Morphology")
    endpoint <- str_replace_all(endpoint,fixed("SpermMeasure_Spermmotility"),"Sperm_Motility")
    endpoint <- str_replace_all(endpoint,fixed("Locomotion_Locomotion"),"Locomotion")
    endpoint <- str_replace_all(endpoint,fixed("Motoractivity_Motoractivity_Activity"),"Motoractivity")
    endpoint <- str_replace_all(endpoint,fixed("Motoractivity_Motoractivity_NOS"),"Motoractivity")
    endpoint <- str_replace_all(endpoint,fixed("Reflexes_Reflexes"),"Reflexes")
    endpoint <- str_replace_all(endpoint,fixed("GestationIndex_GestationIndex"),"GestationIndex")
    endpoint <- str_replace_all(endpoint,fixed("Mating_Postimplantationlos"),"Postimplantationlos")
    endpoint <- str_replace_all(endpoint,fixed("Postimplantationloss_ZonaFasciculata"),"")
    endpoint <- str_replace_all(endpoint,fixed("PregnancyIndex_Preimplantationloss"),"Preimplantationloss")
    endpoint <- str_replace_all(endpoint,fixed("Resorptions_ZonaFasciculata_Resorptions"),"Resorptions_ZonaFasciculata")
    
    endpoint <- str_replace_all(endpoint,fixed("__"),"_")
    endpoint <- str_replace_all(endpoint,fixed("__"),"_")
    
    if(substr(endpoint,1,1)=="_") endpoint <- substr(endpoint,2,nchar(endpoint))
    if(substr(endpoint,nchar(endpoint),nchar(endpoint))=="_") endpoint <- substr(endpoint,1,(nchar(endpoint)-1))
    
    eclean[i] <- endpoint
  }
  
  endpoint.table <- cbind(endpoint.table,endpoint.raw)
  names(endpoint.table)[dim(endpoint.table)[2]] <- "endpoint_raw"
  endpoint.table <- cbind(endpoint.table,eclean)
  names(endpoint.table)[dim(endpoint.table)[2]] <- "endpoint_auto"
  
  filename <- paste("../ToxRefDB/toxrefdb_endpoint_clean_temp.xlsx",sep="")
  write.xlsx(endpoint.table,file=filename,row.names=F)
}
#--------------------------------------------------------------------------------------
#
# build the clean endpoints
#
#--------------------------------------------------------------------------------------
build.anemia.endpoints.0 <- function() {
  print.current.function()
  
  file <- paste("../ToxRefDB/toxrefdb_endpoint_2016_01_26.xlsx",sep="")
  temp <- read.xlsx(file)
  temp <- temp[temp[,"useme"]==1,"endpoint_final"]
  temp <- sort(unique(temp))
  temp <- as.data.frame(cbind(temp,temp))
  names(temp) <- c("endpoint_auto","endpoint_final")
  file <- paste("../anemia_files/anemia_endpoints_temp.xlsx",sep="")
  write.xlsx(temp,file)
}
#--------------------------------------------------------------------------------------
#
# read in the data
#
#--------------------------------------------------------------------------------------
prep.lel.mats <- function(species,study_type) {
  print.current.function()
  
  filename <- paste("../ToxRefDB/all_data_",species,"_",study_type,".xlsx",sep="")
  all.data <- read.xlsx(filename)
  code.list <- sort(unique(all.data[,"CODE"]))
  nchem <- length(code.list)

  name.list <- c("CODE","CASRN","Name","DSSTox_GSID","study_id","source_study_alphanumeric_id","ldt","hdt")
  chem.data.one <- as.data.frame(matrix(nrow=1,ncol=length(name.list)),stringsAsFactors=F)
  chem.data.one[] <- NA
  names(chem.data.one) <- name.list
  chem.data <- NULL

  counter <- 0
  for(i in 1:nchem) {
    code <- code.list[i]
    temp <- all.data[is.element(all.data[,"CODE"],code),]
    study.id.list <- sort(unique(temp[,"study_id"]))
    for(j in 1:length(study.id.list)) {
      study.id <- study.id.list[j]
      temp2 <- temp[is.element(temp[,"study_id"],study.id),]
      chem.data.one[1,"CODE"] <- code
      chem.data.one[1,"Name"] <- temp2[1,"chemical_name"]
      chem.data.one[1,"CASRN"] <- temp2[1,"chemical_casrn"]
      chem.data.one[1,"DSSTox_GSID"] <- temp2[1,"chemical_id"]
      chem.data.one[1,"study_id"] <- temp2[1,"study_id"]
      chem.data.one[1,"ldt"] <- min(temp2[,"ldt"])
      chem.data.one[1,"hdt"] <- max(temp2[,"hdt"])
      chem.data.one[1,"source_study_alphanumeric_id"] <- temp2[1,"source_study_alphanumeric_id"]
      chem.data <- rbind(chem.data,chem.data.one)
      counter <- counter+1
      rownames(chem.data)[counter] <- paste(code,"_",study.id,sep="")
    }
  }
  outfile <- paste("../ToxRefDB/chemicals_",species,"_",study_type,".xlsx",sep="")
  write.xlsx(chem.data,file=outfile, row.names=F)

  filename <- "../anemia_files/anemia_endpoints_2016_01_28.xlsx"
  map <- read.xlsx(filename)
  rownames(map) <- map[,"endpoint_auto"]

  endpoint.list <- unique(map[,"endpoint_final"])
  nendpoint <- length(endpoint.list)
  
  nrow <- dim(chem.data)[1]
  lel.mat <- as.data.frame(matrix(nrow=nrow,ncol=nendpoint),stringsAsFactors=F)
  names(lel.mat) <- endpoint.list
  rownames(lel.mat) <- rownames(chem.data)
  lel.mat[] <- NA
  all.data <- all.data[is.element(all.data[,"endpoint_final"],map[,"endpoint_final"]),]

  for(i in 1:dim(all.data)[1]) {
    code <- all.data[i,"CODE"]
    study.id <- all.data[i,"study_id"]
    endpoint <- all.data[i,"endpoint_final"]
    #endpoint.new <- map[endpoint,"endpoint_final"]
    #cat("[",endpoint,"]\n",sep="")
    lel <- all.data[i,"dose"]
    rowname <-  paste(code,"_",study.id,sep="")
    lel.mat[rowname,endpoint] <- lel
  }
  hit.mat <- lel.mat
  hit.mat[is.na(lel.mat)] <- 0
  hit.mat[!is.na(lel.mat)] <- 1
  lel.mat <- cbind(chem.data,lel.mat)
  hit.mat <- cbind(chem.data,hit.mat)
  outfile <- paste("../ToxRefDB/lel_mat_",species,"_",study_type,".xlsx",sep="")
  write.xlsx(lel.mat,file=outfile, row.names=F)
  outfile <- paste("../ToxRefDB/hit_mat_",species,"_",study_type,".xlsx",sep="")
  write.xlsx(hit.mat,file=outfile, row.names=F)
}
#--------------------------------------------------------------------------------------
#
# calculate correlations between all endpoints 
#
#--------------------------------------------------------------------------------------
generate.lel.correlations <- function(species,study_type) {
  print.current.function()
  outfile <- paste("../output/lel_correlations_",species,"_",study_type,".txt",sep="")
  txt <- TxT(1,2,3,4)
  cat(paste("endpoint1\tendpoint2\t",txt$title,"\n",sep=""),file=outfile,append=F)
  
  infile <- paste("../ToxRefDB/hit_mat_",species,"_",study_type,".xlsx",sep="")
  hit.mat <- read.xlsx(infile)
  rownames(hit.mat) <- hit.mat[,"CODE"]
  hit.mat <- hit.mat[,7:dim(hit.mat)[2]]
  endpoint.list <- names(hit.mat)
  nendpoint <- length(endpoint.list)
  
  for(i in 1:(nendpoint-1)) {
    e1 <- endpoint.list[i]
    for(j in (i+1):nendpoint) {
      e2 <- endpoint.list[j]
      b1 <- hit.mat[,e1]
      b2 <- hit.mat[,e2]
      a <- sum(b1*b2)
      if(a>=2) {
        b <- sum(b1*(1-b2))
        c <- sum((1-b1)*b2)
        d <- sum((1-b1)*(1-b2))
        txt <- TxT(a,b,c,d)
        b1.both <- b1[b1*b2==1]
        b2.both <- b2[b1*b2==1]
        if(txt$odds.ratio>2 && txt$p.value<0.05) {
          s <- paste(e1,"\t",e2,"\t",txt$sval,"\n",sep="")
          cat(s,file=outfile,append=T)
          
          if(a>=10) {
            cat("================================\n")
            cat(e1," - ",e2,"\n")
            cat("Examples: ",a,"\n")
            cat("OR: ",format(txt$odds.ratio,digits=2),"\n")
            cat("p.value: ",format(txt$p.value,digits=2),"\n")
            flush.console()
          }
        }
      }
      if(a>=2) {
        b <- sum((1-b1)*b2)
        c <- sum(b1*(1-b2))
        d <- sum((1-b1)*(1-b2))
        txt <- TxT(a,b,c,d)
        b1.both <- b1[b1*b2==1]
        b2.both <- b2[b1*b2==1]
        if(txt$odds.ratio>2 && txt$p.value<0.05) {
          s <- paste(e2,"\t",e1,"\t",txt$sval,"\n",sep="")
          cat(s,file=outfile,append=T)
          
          if(a>=10) {
            cat("================================\n")
            cat(e1," - ",e2,"\n")
            cat("Examples: ",a,"\n")
            cat("OR: ",format(txt$odds.ratio,digits=2),"\n")
            cat("p.value: ",format(txt$p.value,digits=2),"\n")
            flush.console()
          }
        }
      }
    }
  }
}
#########################################################################################################
#########################################################################################################
#########################################################################################################
#########################################################################################################
#########################################################################################################
#########################################################################################################
#########################################################################################################
#--------------------------------------------------------------------------------------
#
# build the endpint aggregate table
#
#--------------------------------------------------------------------------------------
build.endpoint.aggregates.v2 <- function() {
  print.current.function()
  
    species.list <- c("rat","rat","rat","rat","mouse","dog","rabbit")
    type.list <- c("CHR","DEV","MGR","SUB","CHR","CHR","DEV")

    nstudy <- length(type.list)
    
    name.list <- c(
    "study_type",
    "species",	
    "effect_type",
    "effect_target",
    "effect_desc",
    "direction",
    "target_site",
    "focal_diffuse",
    "effect_category",
    "endpoint_category",
   	"endpoint_type",
   	"endpoint_system",
    "endpoint_target",
   	"endpoint_lifestage"
   	)

	all.result <- NULL
	
	for(i in 1:nstudy) {
		species <- species.list[i]
		study_type <- type.list[i]
		cat(species,":",study_type,"\n"); flush.console()
		
		filename <- paste("../ToxRefDB/toxrefdb_acceptable_",species,"_",study_type,".xlsx",sep="")
		temp <- read.xlsx(filename)
    	result <- as.data.frame(matrix(nrow=dim(temp)[1],ncol=length(name.list)),stringsAsFactors=F)
    	names(result) <- name.list
		mask <- vector(mode="integer",length=dim(temp)[1])
		mask[] <- 1
		mask[is.element(temp[,"effect_type"],"Organ Weight")] <- 0
		mask[is.element(temp[,"effect_type"],"Pathology (Gross)")] <- 0
		mask[is.element(temp[,"effect_type"],"In-Life Observations")] <- 0
		mask[is.element(temp[,"endpoint_target"],"Mortality")] <- 1
		mask[is.element(temp[,"endpoint_target"],"BodyWeight")] <- 1
		temp <- temp[mask==1,]

		result <- temp[,name.list]
		result <- unique(result)
		print(dim(result))
		
		result <- cbind(result,result[,1])
		names(result)[dim(result)[2]] <- "endpoint_prelim"
		result <- cbind(result,result[,1])
		names(result)[dim(result)[2]] <- "endpoint_auto"
		#result <- cbind(result,result[,1])
		#names(result)[dim(result)[2]] <- "endpoint"
		#result <- cbind(result,result[,1])
		#names(result)[dim(result)[2]] <- "useme"
		#result <- cbind(result,result[,1])
		#names(result)[dim(result)[2]] <- "syndrome_prelim"
		#result[,"endpoint"] <- ""
		#result[,"endpoint_prelim"] <- ""
		#result[,"syndrome_prelim"] <- ""
		#result[,"useme"] <- as.numeric(1)		

		if(study_type=="DEV") {
			eprelim <- paste(
			result[,"effect_category"],"_",
			result[,"endpoint_type"],"_",
			result[,"endpoint_lifestage"],"_",
			result[,"effect_target"],"_",
			result[,"endpoint_target"],"_",
			result[,"effect_desc"],"_",
			result[,"target_site"],"_",
			result[,"direction"],
			sep="")

			syndromes <- paste(
			result[,"endpoint_type"],"_",
			result[,"endpoint_lifestage"],"_",
			result[,"endpoint_target"],
			sep="")
		}
		else if(study_type=="MGR") {
			eprelim <- paste(
			result[,"effect_category"],"_",
			result[,"endpoint_type"],"_",
			result[,"endpoint_lifestage"],"_",
			result[,"endpoint_target"],"_",
			result[,"effect_desc"],"_",
			result[,"target_site"],"_",
			result[,"direction"],
			sep="")

			syndromes <- paste(
			result[,"endpoint_type"],"_",
			result[,"endpoint_lifestage"],"_",
			result[,"endpoint_target"],
			sep="")
		}
		else {
			eprelim <- paste(
			result[,"endpoint_target"],"_",
			result[,"endpoint_type"],"_",
			result[,"effect_desc"],"_",
			result[,"target_site"],"_",
			result[,"direction"],
			sep="")

			syndromes <- paste(
			result[,"endpoint_target"],"_",
			result[,"endpoint_type"],
			sep="")
		}    

		for(i in 1:length(eprelim)) {
			endpoint <- fix.endpoint.v2(eprelim[i])
			eprelim[i] <- endpoint
			syndrome <- fix.endpoint.v2(syndromes[i])
			syndromes[i] <- syndrome
		}    
		result[,"endpoint_prelim"] <- eprelim
		result[,"endpoint_auto"] <- eprelim
		#result[,"endpoint"] <- eprelim
		#result[,"syndrome_prelim"] <- syndromes

		all.result <- rbind(all.result,result)
	}
	
	filename <- paste("../ToxRefDB/toxrefdb_endpoint_aggregated_temp.xlsx",sep="")
	write.xlsx(all.result,file=filename,row.names=F)

}
#--------------------------------------------------------------------------------------
#
# fix an endpoint
#
#--------------------------------------------------------------------------------------
fix.endpoint.v2 <- function(endpoint) {
  print.current.function()
  temp <- endpoint
	temp <- str_replace_all(temp,fixed(" "),"")
	temp <- str_replace_all(temp,fixed("NA_"),"_")
	temp <- str_replace_all(temp,fixed("NOS_"),"_")
	temp <- str_replace_all(temp,fixed(",NOS"),"")
	temp <- str_replace_all(temp,fixed("(NOS)"),"")
	temp <- str_replace_all(temp,fixed("_0_"),"_")
	temp <- str_replace_all(temp,fixed("[NotInList]"),"")
	temp <- str_replace_all(temp,fixed("_adult_"),"_")
	temp <- str_replace_all(temp,fixed("Change,NOS"),"")
	temp <- str_replace_all(temp,fixed("Increase"),"Incr")
	temp <- str_replace_all(temp,fixed("Decrease"),"Decr")
	temp <- str_replace_all(temp,fixed("OtherSystemic"),"Systemic")
	temp <- str_replace_all(temp,fixed("PathologyNeoplastic"),"Neoplastic")
	temp <- str_replace_all(temp,fixed("PathologyNonProliferative"),"NonProliferative")
	temp <- str_replace_all(temp,fixed("PathologyProliferative"),"Proliferative")
	temp <- str_replace_all(temp,fixed("In-Life Observations"),"InLife")
	temp <- str_replace_all(temp,fixed("CholinesteraseInhibition"),"Cholinesterase")
	temp <- str_replace_all(temp,fixed("Pathology(Non-neoplastic)"),"NonNeoplastic")
	temp <- str_replace_all(temp,fixed("Pathology(Neoplastic)"),"Neoplastic")
	temp <- str_replace_all(temp,fixed("Pathology(Clinical)"),"Clinical")	
	temp <- str_replace_all(temp,fixed("Urinalysis_Systemic_"),"Urinalysis_")
	temp <- str_replace_all(temp,fixed("ClinicalChemistry_Systemic_"),"")
	temp <- str_replace_all(temp,fixed("Systemic_ClinicalChemistry_"),"")
	temp <- str_replace_all(temp,fixed("ClinicalChemistry_"),"")
	temp <- str_replace_all(temp,fixed("Hematology_Systemic_"),"")
	temp <- str_replace_all(temp,fixed("Hematology_"),"")
	temp <- str_replace_all(temp,fixed("DevelopmentalDelay"),"DevDelay")
	temp <- str_replace_all(temp,fixed("DevelopmentalMalformation"),"DevMalformation")
	temp <- str_replace_all(temp,fixed("DevelopmentalNeurotoxicity_"),"DevNeurotox_")
	temp <- str_replace_all(temp,fixed("ReproductiveMating"),"ReproMating")
	temp <- str_replace_all(temp,fixed("ReproductiveNonMating"),"ReproNonMating")
	temp <- str_replace_all(temp,fixed("Gestationalinterval_Gestationalinterval"),"Gestationalinterval")
	temp <- str_replace_all(temp,fixed("Implantations_Implantations"),"Implantations")
	temp <- str_replace_all(temp,fixed("Littersize_Littersize"),"Littersize")
	temp <- str_replace_all(temp,fixed("LitterViability_LitterViability"),"LitterViability")
	temp <- str_replace_all(temp,fixed("Livebirthindex_Livebirthindex"),"Livebirthindex")
	temp <- str_replace_all(temp,fixed("LiveFetuses_LiveFetuses"),"LiveFetuses")
	temp <- str_replace_all(temp,fixed("Resorptions_Resorptions"),"Resorptions")
	temp <- str_replace_all(temp,fixed("Runts_Runts"),"Runts")
	temp <- str_replace_all(temp,fixed("Sexratio_Sexratio(%male)"),"Sexratio")
	temp <- str_replace_all(temp,fixed("TotalLitterLoss_TotalLitterLoss"),"TotalLitterLoss")
	temp <- str_replace_all(temp,fixed("BodyWeight_BodyWeight"),"BodyWeight")
	temp <- str_replace_all(temp,fixed("Cholinesterase_"),"")
	temp <- str_replace_all(temp,fixed("Developmental_DevDelay_"),"DevDelay_")
	temp <- str_replace_all(temp,fixed("Developmental_DevMalformation_"),"DevMalformation_")
	temp <- str_replace_all(temp,fixed("Developmental_NonProliferative_"),"NonProliferative_")
	temp <- str_replace_all(temp,fixed("Developmental_Proliferative_"),"Proliferative_")
	temp <- str_replace_all(temp,fixed("Developmental_ReproMating_"),"ReproMating_")
	temp <- str_replace_all(temp,fixed("Developmental_Systemic_BodyWeight_"),"BodyWeight_")
	temp <- str_replace_all(temp,fixed("Developmental_Systemic_Hematology_"),"")
	temp <- str_replace_all(temp,fixed("Maternal_"),"")
	temp <- str_replace_all(temp,fixed("Postimplantationloss_Postimplantationloss"),"Postimplantationloss")
	temp <- str_replace_all(temp,fixed("pregnancy_Pregnancy_Pregnancy"),"pregnancy")
	temp <- str_replace_all(temp,fixed("Preimplantationloss_Preimplantationloss"),"Preimplantationloss")
	temp <- str_replace_all(temp,fixed("PrematureDelivery_PrematureDelivery"),"PrematureDelivery")
	temp <- str_replace_all(temp,fixed("Systemic_BodyWeight"),"BodyWeight")
	temp <- str_replace_all(temp,fixed("BodyWeight_BodyWeight"),"BodyWeight")
	temp <- str_replace_all(temp,fixed("BodyWeightGain_Decr"),"BodyWeight_Decr")
	temp <- str_replace_all(temp,fixed("BodyWeightGain_Incr"),"BodyWeight_Incr")
	temp <- str_replace_all(temp,fixed("Systemic_Hematology_"),"")
	temp <- str_replace_all(temp,fixed("Developmental_"),"DevOther_")
	temp <- str_replace_all(temp,fixed("__"),"_")
	temp <- str_replace_all(temp,fixed("__"),"_")
	temp <- str_replace_all(temp,fixed("__"),"_")
	temp <- str_replace_all(temp,fixed("BodyWeightGain_Decr"),"BodyWeight_Decr")
	temp <- str_replace_all(temp,fixed("BodyWeightGain_Incr"),"BodyWeight_Incr")
	temp <- str_replace_all(temp,fixed("DevOther_DevNeurotox"),"DevNeurotox")
	temp <- str_replace_all(temp,fixed("/"),"")
	temp <- str_replace_all(temp,fixed("DevOther_Systemic_"),"")
	temp <- str_replace_all(temp,fixed("BodyWeight_Offspring(pup)Weight_Decr"),"BodyWeight_Offspring_Decr")
	temp <- str_replace_all(temp,fixed("BodyWeight_Offspring(pup)WeightGain_Decr"),"BodyWeight_Offspring_Decr")
	temp <- str_replace_all(temp,fixed("AnogenitalDistance_AnogenitalDistance"),"AnogenitalDistance")
	temp <- str_replace_all(temp,fixed("NippleDevelopment_NippleDevelopment"),"NippleDevelopment")
	temp <- str_replace_all(temp,fixed("Vaginalopening_Vaginalopening"),"Vaginalopening")
	temp <- str_replace_all(temp,fixed("SexualDevelopmentalLandmark"),"SexDevLandmark")
	temp <- str_replace_all(temp,fixed("DevelopmentalLandmark"),"DevLandmark")
	temp <- str_replace_all(temp,fixed("Coordination_Coordination"),"Coordination")
	temp <- str_replace_all(temp,fixed("Locomotion_Locomotion"),"Locomotion")
	temp <- str_replace_all(temp,fixed("DevOther_ThyroidStimulatingHormone(=Thyrotropin)(TSH)_Incr"),"ThyroidStimulatingHormone(=Thyrotropin)(TSH)_Incr")
	temp <- str_replace_all(temp,fixed("DevOther_Triiodothyronine(T3),Total_Decr"),"Triiodothyronine(T3),Total_Decr")
	temp <- str_replace_all(temp,fixed("NonProliferative_"),"")
	temp <- str_replace_all(temp,fixed("AdrenalGland_AdrenalGland"),"AdrenalGland")
	temp <- str_replace_all(temp,fixed("Bloodvessel_Bloodvessel"),"Bloodvessel")
	temp <- str_replace_all(temp,fixed("Brain_Brain"),"Brain")
	temp <- str_replace_all(temp,fixed("Kidney_Kidney"),"Kidney")
	temp <- str_replace_all(temp,fixed("Liver_Liver"),"Liver")
	temp <- str_replace_all(temp,fixed("Spinalcord_Spinalcord"),"Spinalcord")
	temp <- str_replace_all(temp,fixed("Ureter_Ureter"),"Ureter")
	temp <- str_replace_all(temp,fixed("IntestineLarge_IntestineLarge"),"IntestineLarge")
	temp <- str_replace_all(temp,fixed("IntestineSmall_IntestineSmall"),"IntestineSmall")
	temp <- str_replace_all(temp,fixed("Lung_Lung"),"Lung")
	temp <- str_replace_all(temp,fixed("Nose_Nose"),"Nose")
	temp <- str_replace_all(temp,fixed("Prostate_Prostate"),"Prostate")
	temp <- str_replace_all(temp,fixed("SeminalVesicle_SeminalVesicle"),"SeminalVesicle")
	temp <- str_replace_all(temp,fixed("Skin_Skin"),"Skin")
	temp <- str_replace_all(temp,fixed("Spleen_Spleen"),"Spleen")
	temp <- str_replace_all(temp,fixed("Stomach_Stomach"),"Stomach")
	temp <- str_replace_all(temp,fixed("Testes_Testis"),"Testis")
	temp <- str_replace_all(temp,fixed("Thymus_Thymus"),"Thymus")
	temp <- str_replace_all(temp,fixed("ThyroidGland_ThyroidGland"),"ThyroidGland")
	temp <- str_replace_all(temp,fixed("Trachea_Trachea"),"Trachea")
	temp <- str_replace_all(temp,fixed("UncertainPrimarySite_UncertainPrimarySite"),"UncertainPrimarySite")
	temp <- str_replace_all(temp,fixed("Uterus_Uterus"),"Uterus")
	temp <- str_replace_all(temp,fixed("Vagina_Vagina"),"Vagina")
	temp <- str_replace_all(temp,fixed("Reproductive_ReproMating"),"ReproMating")
	temp <- str_replace_all(temp,fixed("Offspring_DevDelay"),"DevDelay")
	temp <- str_replace_all(temp,fixed("Offspring_ReproMating"),"ReproMating")
	temp <- str_replace_all(temp,fixed("Deadfetuses_Deadfetuses"),"Deadfetuses")
	temp <- str_replace_all(temp,fixed("Offspring_Systemic_juvenile_BodyWeight_Fetalweight_Decr"),"Fetalweight_Decr")
	temp <- str_replace_all(temp,fixed("Offspring_Systemic_juvenile_BodyWeight_Offspring_Decr"),"BodyWeight_Decr")
	temp <- str_replace_all(temp,fixed("Offspring_Systemic_juvenile_Mortality_Mortality_Mortality_Incr"),"Mortality_Incr")
	temp <- str_replace_all(temp,fixed("Ovary_Ovary"),"Ovary")
	temp <- str_replace_all(temp,fixed("Proliferative_Prostate"),"Prostate_Proliferative")
	temp <- str_replace_all(temp,fixed("Proliferative_Liver"),"Liver_Proliferative")
	temp <- str_replace_all(temp,fixed("Proliferative_Kidney"),"Kidney_Proliferative")
	temp <- str_replace_all(temp,fixed("Proliferative_Spleen"),"Spleen_Proliferative")
	temp <- str_replace_all(temp,fixed("Proliferative_SeminalVesicle"),"SeminalVesicle_Proliferative")
	temp <- str_replace_all(temp,fixed("Reproductive_ReproNonMating"),"ReproNonMating")
	temp <- str_replace_all(temp,fixed("Deadfetuses_Deadfetuses"),"Deadfetuses")
	temp <- str_replace_all(temp,fixed("pregnancy_ReproductivePerformance"),"pregnancy")
	temp <- str_replace_all(temp,fixed("ReproductivePerformance"),"ReproPerf")
	temp <- str_replace_all(temp,fixed("EstrousCycle_EstrousCycle"),"EstrousCycle")
	temp <- str_replace_all(temp,fixed("ClinicalSigns_"),"")
	temp <- str_replace_all(temp,fixed("[Clinical]"),"")
	temp <- str_replace_all(temp,fixed("Systemic_"),"")
	temp <- str_replace_all(temp,fixed("Tissue_Tissue"),"Tissue")
	temp <- str_replace_all(temp,fixed("DevDelay_juvenile"),"DevDelay")
	temp <- str_replace_all(temp,fixed("DevMalformation_fetal"),"DevMalformation")
	temp <- str_replace_all(temp,fixed("DevOther_Bone"),"DevMalformation_Bone")
	temp <- str_replace_all(temp,fixed("DevOther_Limb"),"DevMalformation_Bone_Limb")
	temp <- str_replace_all(temp,fixed("DevOther_MouthJaw"),"DevMalformation_Bone_MouthJaw")
	temp <- str_replace_all(temp,fixed("DevOther_Eye"),"DevMalformation_Eye")
	temp <- str_replace_all(temp,fixed("DevOther_PawDigit"),"DevMalformation_Bone_PawDigit")
	temp <- str_replace_all(temp,fixed("DevOther_Radius"),"DevMalformation_Bone_Radius")
	temp <- str_replace_all(temp,fixed("DevOther_ReproPerf"),"ReproPerf")
	temp <- str_replace_all(temp,fixed("DevOther_ReproNonMating"),"ReproNonMating")
	temp <- str_replace_all(temp,fixed("DevOther_Testosterone_Decr"),"Testosterone_Decr")
	temp <- str_replace_all(temp,fixed("DevOther_Ulna"),"DevMalformation_Bone_Ulna")
	temp <- str_replace_all(temp,fixed("fetal_AdrenalGland"),"AdrenalGland_fetal")
	temp <- str_replace_all(temp,fixed("fetal_Brain"),"Brain_fetal")
	temp <- str_replace_all(temp,fixed("fetal_Liver"),"Liver_fetal")
	temp <- str_replace_all(temp,fixed("fetal_Kidney"),"Kidney_fetal")
	temp <- str_replace_all(temp,fixed("fetal_Spinalcord"),"Spinalcord_fetal")
	temp <- str_replace_all(temp,fixed("fetal_Ureter"),"Ureterfetal")
	temp <- str_replace_all(temp,fixed("Mortality_Mortality"),"Mortality")
	temp <- str_replace_all(temp,fixed("Mortality_Mortality"),"Mortality")
	temp <- str_replace_all(temp,fixed("Mortality_Mortality"),"Mortality")
	temp <- str_replace_all(temp,fixed("Mortality_Mortality"),"Mortality")
	temp <- str_replace_all(temp,fixed("MaternalWastage_Mortality_Died_Incr"),"Mortality_MaternalWastage_Incr")
	temp <- str_replace_all(temp,fixed("MaternalWastage_Mortality_DiedPregnant_Incr"),"Mortality_MaternalWastage_DiedPregnant_Incr")
	temp <- str_replace_all(temp,fixed("Offspring_Gonad"),"Gonad_Offspring")
	temp <- str_replace_all(temp,fixed("Offspring_juvenile_Uterus"),"Uterus_Offspring")
	temp <- str_replace_all(temp,fixed("Other_Othe"),"Other")
	temp <- str_replace_all(temp,fixed("Aborted_Aborted"),"Aborted")
	temp <- str_replace_all(temp,fixed("Birthindex_Birthindex"),"Birthindex")
	temp <- str_replace_all(temp,fixed("Fertility_Fertility"),"Fertility")
	temp <- str_replace_all(temp,fixed("Pregnancy_Pregnancy"),"Pregnancy")
	temp <- str_replace_all(temp,fixed("PregnancyIndex_"),"")
	temp <- str_replace_all(temp,fixed("BodyWeight_Damweight_Decr"),"BodyWeight_Decr")
	temp <- str_replace_all(temp,fixed("DevMalformation_Bone_DevMalformation_Sternebra"),"DevMalformation_Bone_Sternebra")
	temp <- str_replace_all(temp,fixed("Heart_Heart"),"Heart")
	temp <- str_replace_all(temp,fixed("Kidney_fetal"),"DevOther_Kidney")
	temp <- str_replace_all(temp,fixed("Liver_fetal"),"DevOther_Liver")
	temp <- str_replace_all(temp,fixed("MaternalWastage_Mortality_DiedNonpregnant_Incr"),"Mortality_MaternalWastage_DiedNonPregnant_Incr")
	temp <- str_replace_all(temp,fixed("Neurotoxicity_Motoractivity_Motoractivity_Decr"),"NeuroTox_Motoractivity_Decr")
	temp <- str_replace_all(temp,fixed("Gallbladder_Gallbladder"),"Gallbladder_Gallbladder")
	temp <- str_replace_all(temp,fixed("Proliferative_fetal_Gallbladder"),"Gallbladder_Proliferative_fetal")
	temp <- str_replace_all(temp,fixed("Proliferative_fetal_Lung"),"Lung_Proliferative_fetal")
	temp <- str_replace_all(temp,fixed("Proliferative_fetal_Spleen"),"Spleen_Proliferative_fetal")
	temp <- str_replace_all(temp,fixed("Proliferative_fetal_Thymus"),"Thymus_Proliferative_fetal")
	temp <- str_replace_all(temp,fixed("Proliferative_Liver_fetal_Proliferation"),"Liver_Proliferative_fetal")
	temp <- str_replace_all(temp,fixed("Proliferative_Nose"),"Nose_Proliferative")
	temp <- str_replace_all(temp,fixed("Proliferative_Stomach"),"Stomach_Proliferative")
	temp <- str_replace_all(temp,fixed("ReproMating_juvenile_OffspringSurvival"),"ReproMating_OffspringSurvival")
	temp <- str_replace_all(temp,fixed("OffspringSurvival-Early"),"")
	temp <- str_replace_all(temp,fixed("SpermMeasure_Sperm_Spermmotility"),"SpermMotility")
	temp <- str_replace_all(temp,fixed("Spermmorphology_Sperm_Abnormal"),"Spermmorphology_Abnormal")
	temp <- str_replace_all(temp,fixed("Urinalysis_Urinalysis"),"Urinalysis")
	temp <- str_replace_all(temp,fixed("Eyeopening_Eyeopening"),"Eyeopening")
	temp <- str_replace_all(temp,fixed("Incisoreruption_Incisoreruption"),"IncisorEruption")
	temp <- str_replace_all(temp,fixed("PinnaUnfolding_PinnaUnfolding"),"PinnaUnfolding")
	temp <- str_replace_all(temp,fixed("DevNeurotox_juvenile"),"DevNeurotox")
	temp <- str_replace_all(temp,fixed("DevOther_General_Runts"),"ReproMating_pregnancy_ReproductiveOutcome_Runts_Incr")
	temp <- str_replace_all(temp,fixed("Gallbladder_Gallbladder"),"Gallbladder")
	temp <- str_replace_all(temp,fixed("Neoplastic_Uterus"),"Uterus_Neoplastic")
	temp <- str_replace_all(temp,fixed("Anemia-_"),"Anemia_")
	temp <- str_replace_all(temp,fixed("Birthindex_ReproMating_Birthindex_Decr"),"ReproMating_Birthindex_Decr")
	temp <- str_replace_all(temp,fixed("BodyWeight_Offspring(pup)Weight_Decr"),"BodyWeight_Offspring_Decr")
	temp <- str_replace_all(temp,fixed("Preputialseparation_Preputialseparation"),"Preputialseparation")
	temp <- str_replace_all(temp,fixed("DevMalformation_Appendicular"),"DevMalformation_Bone_Appendicular")
	temp <- str_replace_all(temp,fixed("DevOther_Kidney_Smallrenalpapilla_Papilla_Incr"),"DevOther_Kidney_Smallrenalpapilla_Incr")
	temp <- str_replace_all(temp,fixed("Epididymis_Neoplastic_Mesothelioma_Mesothelium_Incr"),"Epididymis_Neoplastic_Mesothelioma_Incr")
	temp <- str_replace_all(temp,fixed("Gallbladder_Proliferative_fetal_Gallbladder_Hypoplasia_Incr"),"Gallbladder_fetal_Proliferative_Hypoplasia_Incr")
	temp <- str_replace_all(temp,fixed("Mortality_Died_Incr"),"Mortality_Incr")
	temp <- str_replace_all(temp,fixed("Offspring_Ablepharia"),"DevMalformation_Eye_Ablepharia")
	temp <- str_replace_all(temp,fixed("Offspring_Aglossia"),"DevMalformation_Bone_Aglossia")
	temp <- str_replace_all(temp,fixed("Offspring_Agnathia_Incr"),"DevMalformation_Bone_Agnathia_Incr")
	temp <- str_replace_all(temp,fixed("Offspring_Anophthalmia_Bilateral_Incr"),"DevMalformation_Eye_Anophthalmia_Bilateral_Incr")
	temp <- str_replace_all(temp,fixed("Offspring_Atrophy_Hepatocyte_Incr"),"Liver_Offspring_Atrophy_Hepatocyte_Incr")
	temp <- str_replace_all(temp,fixed("Offspring_Canal_Incr"),"DevMalformation_Ear_Canal_Incr")
	temp <- str_replace_all(temp,fixed("Offspring_Cleftpalate_Incr"),"DevMalformation_Bone_MouthJaw_Cleftpalate_Incr")
	temp <- str_replace_all(temp,fixed("Offspring_DevMalformation_Axial"),"DevMalformation_Bone_Axial")
	temp <- str_replace_all(temp,fixed("Offspring_DevMalformation_Cranial"),"DevMalformation_Bone_Cranial")
	temp <- str_replace_all(temp,fixed("Offspring_Hydronephrosis_Incr"),"Kidney_Offspring_Hydronephrosis_Incr")
	temp <- str_replace_all(temp,fixed("Offspring_Hydroureter_Incr"),"Ureter_Offspring_Hydroureter_Incr")
	temp <- str_replace_all(temp,fixed("Offspring_FullSupernumerary_Incr"),"DevMalformation_Bone_FullSupernumerary_Incr")
	temp <- str_replace_all(temp,fixed("Offspring_Exencephaly_Incr"),"DevMalformation_Bone_Cranial_Exencephaly_Incr")
	temp <- str_replace_all(temp,fixed("Offspring_Dilatedrenalpelvis_Incr"),"DevMalformation_Kidney_Dilatedrenalpelvis_Incr")
	temp <- str_replace_all(temp,fixed("Offspring_Dilatation_Pelvis_Incr"),"DevMalformation_Bone_Dilatation_Pelvis_Incr")
	temp <- str_replace_all(temp,fixed("Offspring_juvenile_AdrenalGland"),"AdrenalGland_Offspring")
	temp <- str_replace_all(temp,fixed("Offspring_juvenile_Alkalinephosphatase(ALPALK)_Incr"),"Alkalinephosphatase(ALPALK)_Incr")
	temp <- str_replace_all(temp,fixed("Offspring_juvenile_BodyWeight"),"BodyWeight_Offspring")
	temp <- str_replace_all(temp,fixed("Offspring_juvenile_Brain"),"Brain_Offspring")
	temp <- str_replace_all(temp,fixed("Offspring_juvenile_Eye"),"Eye_Offspring")
	temp <- str_replace_all(temp,fixed("Offspring_juvenile_Kidney"),"Kidney_Offspring")
	temp <- str_replace_all(temp,fixed("Offspring_juvenile_Liver"),"Liver_Offspring")
	temp <- str_replace_all(temp,fixed("Offspring_juvenile_Lung"),"Lung_Offspring")
	temp <- str_replace_all(temp,fixed("Offspring_juvenile_MammaryGland"),"MammaryGland_Offspring")
	temp <- str_replace_all(temp,fixed("Offspring_juvenile_Stomach"),"Stomach_Offspring")
	temp <- str_replace_all(temp,fixed("Offspring_juvenile_Testis"),"Testis_Offspring")
	temp <- str_replace_all(temp,fixed("Offspring_juvenile_ThyroidGland"),"ThyroidGland_Offspring")
	temp <- str_replace_all(temp,fixed("Parental_"),"")
	temp <- str_replace_all(temp,fixed("Lactationindex_Lactationindex"),"Lactationindex")
	temp <- str_replace_all(temp,fixed("Viabilityindex_Viabilityinde"),"Viabilityindex")
	temp <- str_replace_all(temp,fixed("Intercurrentdeaths_Intercurrentdeaths"),"Intercurrentdeaths")
	temp <- str_replace_all(temp,fixed("Sperm_ReproNonMating"),"ReproNonMating_Sperm")
	temp <- str_replace_all(temp,fixed("Offspring_Neurotoxicity_juvenile_"),"Neurotox_juvenile_")
	temp <- str_replace_all(temp,fixed("Offspring_Proliferative_juvenile_Kidney"),"Kidney_Proliferative_juvenile")
	temp <- str_replace_all(temp,fixed("Offspring_Proliferative_juvenile_Liver"),"Liver_Proliferative_juvenile")
	temp <- str_replace_all(temp,fixed("Offspring_Proliferative_juvenile_MammaryGland"),"MammaryGland_Proliferative_juvenile")
	temp <- str_replace_all(temp,fixed("Offspring_Proliferative_juvenile_PituitaryGland"),"PituitaryGland_Proliferative_juvenile")
	temp <- str_replace_all(temp,fixed("Offspring_Proliferative_juvenile_Spleen"),"Spleen_Proliferative_juvenile")
	temp <- str_replace_all(temp,fixed("Offspring_Proliferative_juvenile_Stomach"),"Stomach_Proliferative_juvenile")
	temp <- str_replace_all(temp,fixed("Offspring_Proliferative_juvenile_Ureter"),"Ureter_Proliferative_juvenile")
	temp <- str_replace_all(temp,fixed("Offspring_juvenile_"),"")
	temp <- str_replace_all(temp,fixed("Offspring_Retinafold_Incr"),"DevMalformation_Eye_Retinafold_Incr")
	temp <- str_replace_all(temp,fixed("Offspring_Openeye_Incr"),"DevMalformation_Eye_Openeye_Incr")
	temp <- str_replace_all(temp,fixed("Offspring_Microphthalmia_Bilateral_Incr"),"DevMalformation_Eye_Microphthalmia_Bilateral_Incr")
	temp <- str_replace_all(temp,fixed("Offspring_Microphthalmia_Incr"),"DevMalformation_Eye_Microphthalmia_Incr")
	temp <- str_replace_all(temp,fixed("Offspring_Macroglossia_Incr"),"DevMalformation_Jaw_Macroglossia_Incr")
	temp <- str_replace_all(temp,fixed("Offspring_Misshapen_Incr"),"DevMalformation_Nose_Misshapen_Incr")
	temp <- str_replace_all(temp,fixed("Offspring_Proboscis_Incr"),"DevMalformation_Nose_Proboscis_Incr")
	temp <- str_replace_all(temp,fixed("Offspring_spacebetweenbodywallandorgans_Incr"),"DevOther_spacebetweenbodywallandorgans_Incr")
	temp <- str_replace_all(temp,fixed("Offspring_Umbilicalhernia_Incr"),"DevOther_Umbilicalhernia_Incr")
	temp <- str_replace_all(temp,fixed("Offspring_Runts_Incr"),"ReproMating_pregnancy_ReproductiveOutcome_Runts_Incr")
	temp <- str_replace_all(temp,fixed("__"),"_")
	temp <- str_replace_all(temp,fixed("__"),"_")
	temp <- str_replace_all(temp,fixed("__"),"_")
	
	if(substr(temp,1,1)=="_") temp <- substr(temp,2,nchar(temp))
	if(substr(temp,nchar(temp),nchar(temp))=="_") temp <- substr(temp,1,(nchar(temp)-1))
	if(substr(temp,1,nchar("Reproductive_"))=="Reproductive_") temp <- str_replace_all(temp,fixed("Reproductive_"),"")

	return(temp)
}
#--------------------------------------------------------------------------------------
#
# read in the data
#
#--------------------------------------------------------------------------------------
process.study.data <- function(species,study_type) {
  cat("==========================================================================\n")
  cat("process.study.data\n")
  cat("==========================================================================\n")
  flush.console()
  filename <- paste("../ToxRefDB/toxrefdb_acceptable_",species,"_",study_type,".xlsx",sep="")
  temp <- read.xlsx(filename)
  mask <- vector(mode="integer",length=dim(temp)[1])
  mask[] <- 1
  mask[is.element(temp[,"effect_type"],"Organ Weight")] <- 0
  mask[is.element(temp[,"effect_type"],"Pathology (Gross)")] <- 0
  mask[is.element(temp[,"effect_type"],"In-Life Observations")] <- 0
  mask[is.element(temp[,"endpoint_target"],"Mortality")] <- 1
  mask[is.element(temp[,"endpoint_target"],"BodyWeight")] <- 1
  temp <- temp[mask==1,]
  
  name.list <- c(
    "CODE",
    "CASRN",
    "Name",
    "DSSTox_GSID",
    "study_type",
    "species",	
    "strain",	
    "ldt",
    "hdt",
    "gender",
    "dose",
    "effect_type",
    "endpoint_target",
    "effect_desc",
    "direction",
    "target_site",
    "endpoint_type",
    "endpoint_system",
    "endpoint_lifestage",
    "endpoint_prelim",
    "endpoint",
    "useme",
    "syndrome_prelim"
  )
  
  result <- as.data.frame(matrix(nrow=dim(temp)[1],ncol=length(name.list)),stringsAsFactors=F)
  names(result) <- name.list
  
  result[,"Name"] <- temp[,"chemical_name"]
  result[,"CASRN"] <- temp[,"chemical_casrn"]
  result[,"DSSTox_GSID"] <- temp[,"chemical_id"]
  code.list <- paste("C",temp[,"chemical_casrn"],sep="")
  code.list <- str_replace_all(code.list,"-","")
  result[,"CODE"] <- code.list
  result[,"study_type"] <- temp[,"study_type"]
  result[,"species"] <- temp[,"species"]
  result[,"strain"] <- temp[,"strain"]
  result[,"ldt"] <- temp[,"ldt"]
  result[,"hdt"] <- temp[,"hdt"]
  result[,"gender"] <- temp[,"gender"]
  result[,"dose"] <- temp[,"dose"]
  
  result[,"effect_type"] <- temp[,"effect_type"]
  result[,"endpoint_target"] <- temp[,"endpoint_target"]
  result[,"effect_desc"] <- temp[,"effect_desc"]
  result[,"direction"] <- temp[,"direction"]
  result[,"target_site"] <- temp[,"target_site"]
  result[,"endpoint_type"] <- temp[,"endpoint_type"]
  result[,"endpoint_system"] <- temp[,"endpoint_system"]
  result[,"endpoint_lifestage"] <- temp[,"endpoint_lifestage"]
  
  if(study_type=="DEV") {
    eprelim <- paste(
      temp[,"endpoint_type"],"_",
      temp[,"endpoint_target"],"_",
      temp[,"effect_desc"],"_",
      temp[,"target_site"],"_",
      temp[,"endpoint_lifestage"],
      temp[,"direction"],"_",
      sep="")
  }
  else if(study_type=="MGR") {
    eprelim <- paste(
      temp[,"endpoint_type"],"_",
      temp[,"endpoint_target"],"_",
      temp[,"effect_desc"],"_",
      temp[,"target_site"],"_",
      temp[,"endpoint_lifestage"],
      temp[,"direction"],"_",
      sep="")
  }
  else {
    eprelim <- paste(
      temp[,"endpoint_target"],"_",
      temp[,"endpoint_type"],"_",
      temp[,"effect_desc"],"_",
      temp[,"target_site"],"_",
      temp[,"direction"],
      sep="")
  }    
  
  for(i in 1:length(eprelim)) {
    endpoint <- eprelim[i]
    endpoint <- fix.endpoint.v2(eprelim[i])
    eprelim[i] <- endpoint
  }    
  
  filename <- paste("../ToxRefDB/endpoint_aggregates.xlsx",sep="")
  aggregates <- read.xlsx(filename)
  
  result[,"endpoint_prelim"] <- eprelim
  elist <- eprelim
  slist <- elist
  slist[] <- NA
  ulist <- vector(mode="integer",length=length(eprelim))
  ulist[] <- 0
  
  for(i in 1:length(eprelim)) {
    endpoint <- eprelim[i]
    temp <- aggregates[is.element(aggregates[,"endpoint_prelim"],endpoint),]
    if(dim(temp)[1]>0) {
      elist[i] <- temp[1,"endpoint"]
      ulist[i] <- temp[1,"useme"]
      slist[i] <- temp[1,"syndrome_prelim"]
    }
  }
  result[,"endpoint"] <- elist
  result[,"useme"] <- ulist
  result[,"syndrome_prelim"] <- slist
  
  endpoints <- result[,c(
    "study_type",
    "endpoint_lifestage",
    "endpoint_type",
    "effect_type",
    "endpoint_system",
    "endpoint_target",
    "target_site",
    "effect_desc",
    "direction",
    "endpoint_prelim",
    "endpoint",
    "useme")]
  
  endpoints <- cbind(endpoints,endpoints[,dim(endpoints)[2]])
  names(endpoints)[dim(endpoints)[2]] <- "count"
  endpoints[,"count"] <- as.numeric(0)
  
  endpoints <- cbind(endpoints,slist)
  names(endpoints)[dim(endpoints)[2]] <- "syndrome1"
  endpoints <- cbind(endpoints,endpoints[,dim(endpoints)[2]])
  names(endpoints)[dim(endpoints)[2]] <- "syndrome2"
  endpoints <- cbind(endpoints,endpoints[,dim(endpoints)[2]])
  names(endpoints)[dim(endpoints)[2]] <- "syndrome3"
  endpoints <- cbind(endpoints,endpoints[,dim(endpoints)[2]])
  names(endpoints)[dim(endpoints)[2]] <- "syndrome4"
  endpoints <- cbind(endpoints,endpoints[,dim(endpoints)[2]])
  names(endpoints)[dim(endpoints)[2]] <- "syndrome5"
  endpoints[,"syndrome2"] <- ""
  endpoints[,"syndrome3"] <- ""
  endpoints[,"syndrome4"] <- ""
  endpoints[,"syndrome5"] <- ""
  
  endpoints <- unique(endpoints)
  for(i in 1:dim(endpoints)[1]) {
    endpoint <- endpoints[i,"endpoint"]
    temp <- result[is.element(result[,"endpoint"],endpoint),"CODE"]
    endpoints[i,"count"] <- length(unique(temp))
  }
  
  endpoints <- endpoints[endpoints[,"count"]>=5,]
  outfile <- paste("../syndromes/syndromes_",species,"_",study_type,"_PRELIM.xlsx",sep="")
  write.xlsx(endpoints,file=outfile, row.names=F)
  
  outfile <- paste("../ToxRefDB/endpoints_by_chemicals_",species,"_",study_type,".xlsx",sep="")
  write.xlsx(result,file=outfile, row.names=F)
  
  cat("read in the toxref data\n")
  flush.console()
}
