#--------------------------------------------------------------------------------------
#
# dataPrep.R utilities for managing ToxCast data
#
# December 2012
# Richard Judson
#  
# US EPA
# Questions, comments to: judson.richard@epa.gov
#
#
#--------------------------------------------------------------------------------------
#library(grDevices)
#library(RColorBrewer)
library(stringr)
library(reshape2)
library(data.table)
source("utils.R")
toxcast.export.date <<- "2013_12_10"
HOMEDIR <<- paste("../../data/export/ToxCast_",toxcast.export.date,"/",sep="")
FITSDIR <<- paste(HOMEDIR,"assay_data/curve_fits/",sep="")
CHEMDIR <<- paste(HOMEDIR,"chemical_info/",sep="")
ASSAYDIR <<- paste(HOMEDIR,"assay_info/",sep="")
VARMATDIR <<- paste("varmats/",sep="")
#--------------------------------------------------------------------------------------
#
# This routine takes the output from the database and prepares the by-compound files
# where all files are lined up by unique structure
#
# Files created from the perl export script:
# ToxCast_Chemicals_{toxcast.export.date}.txt
# ToxCast_ToxRefDB_{toxcast.export.date}.txt
# ToxCast_assay_fit_{Source}_{toxcast.export.date}.txt
# ToxCast_conc_resp_fit_{Source}_{toxcast.export.date}.txt
#
# Files created by this set of routines
# ToxCast_AC50_matrix_bySample_{chemical set}_{toxcast.export.date}.txt
# ToxCast_AC50_matrix_byCASRN_{chemical set}_{toxcast.export.date}.txt
#
#--------------------------------------------------------------------------------------
prep.all<- function(toxcast.export.date="2013_12_10",do.read.data=T,do.read.chems=T) {
	PrepByChemical(toxcast.export.date,chemset="E1K",do.read.data,do.read.chems,do.temp=T)
	PrepByChemical.matrix(toxcast.export.date,chemset="E1K",do.read.data=T,do.read.chems=T)
	Level7.AC50(toxcast.export.date,chemset="E1K")
	prepPath.v3(toxcast.export.date,do.read.data=T,chemset="E1K",assay.list=c("NVS_NR_bER","NVS_NR_hER","NVS_NR_mERa","OT_ER_ERaERa_0480","OT_ER_ERaERa_1440","OT_ER_ERaERb_0480","OT_ER_ERaERb_1440","OT_ER_ERbERb_0480","OT_ER_ERbERb_1440","OT_ERa_EREGFP_0120","OT_ERa_EREGFP_0480","ATG_ERa_TRANS_perc","ATG_ERE_CIS_perc","Tox21_ERa_BLA_Agonist_ratio","Tox21_ERa_LUC_BG1_Agonist","ACEA_T47D_80hr_Positive","Tox21_ERa_BLA_Antagonist_ratio","Tox21_ERa_LUC_BG1_Antagonist"),		pathname="ER")
}
#--------------------------------------------------------------------------------------
#
# Create the full input AC50, T,B,Emax,... files, one row per chemical
#
# chemset=(ToxCast,E1K,Tox21)
#
#--------------------------------------------------------------------------------------
PrepByChemical <- function(toxcast.export.date="2013_12_10",chemset="E1K",do.read.data=T,do.read.chems=T,do.temp=T) {
  
  #chemset="E1K";do.read.data=T;do.read.chems=T;do.temp=T

	cat("==========================================================================\n")
	cat("Prepare the AC50ByChemical file long format ...\n")
	cat("==========================================================================\n")
	cat("Read in assay data ...\n")
	flush.console()
	if(chemset!="Tox21"){vendors <- c("ACEA","Apredica","Attagene","Bioseek","Novascreen","Odyssey Thera","NCGC")
	}	else vendors <- c("NCGC")
	if(do.read.data) {
# 		fit.data.list <- list()
# 		for(i in 1:length(vendors)) {
#       cat('Reading...',vendors[i],'\n')
# 			file <- paste(FITSDIR,"ToxCast_assay_fit_",vendors[i],"_",toxcast.export.date,".txt",sep="")			
# 			fit.data.list[[vendors[i]]] <- read.table(file,header=T,sep="\t",stringsAsFactors=F,quote="")
# 			print(dim(fit.data.list[[vendors[i]]]))
# 			cat("Done...",file,"\n")
# 			flush.console()
# 		}
# 		DATA <<- rbindlist(fit.data.list)
    #save('DATA',file="ALLFITDATA.RData")
    load("ALLFITDATA.RData")
		cat("DATA read in\n")
		flush.console()
		DATA <- as.data.frame(DATA)
	}
	if(do.read.chems) {
		file <- paste(CHEMDIR,"ToxCast_Generic_Chemicals_",toxcast.export.date,".txt",sep="")			
		CHEMS <<- read.table(file,header=T,sep="\t",stringsAsFactors=F,quote="",comment.char="")
		cat("CHEMS read in\n")
		flush.console()
	}

	file <- paste(ASSAYDIR,"Tox21_assays.txt",sep="")			
	tox21.assays <- read.table(file,header=F,sep="\t",stringsAsFactors=F,quote="",comment.char="")
	assay.list <- as.character(DATA[,"assay_name"])
	assay.list <- sort(uniquify(assay.list))
	#assay.list <- assay.list[1:length(assay.list)]
	if(chemset=="Tox21") assay.list <- as.character(tox21.assays[,1])
	p1.list <- CHEMS[,"Phase_I"]					
	p2.list <- CHEMS[,"Phase_II"]
	e1k.list <- CHEMS[,"E1K"]
	tox21.list <- CHEMS[,"Tox21"]
	
	if(chemset=="ToxCast") {
		mask <- p1.list+p2.list
		mask[mask>1] <- 1
	}
	if(chemset=="E1K") {
		mask <- p1.list+p2.list+e1k.list
		mask[mask>1] <- 1
	}
	if(chemset=="Tox21") {
		mask <- tox21.list
		mask[mask>1] <- 1
	}
	name.frame <- CHEMS[mask==1,c("CODE","CASRN","ShortName")]	
	code.list <- name.frame[,"CODE"]
	casrn.list <- name.frame[,"CASRN"]
	nchem <- dim(name.frame)[1]
	nassay <- length(assay.list)
	cat("nchem: ",nchem,"\n")
	cat("nassay: ",nassay,"\n")
	flush.console()
	
  	if(do.temp) {
		temp.0 <- DATA[is.element(DATA[,"assay_name"],assay.list),]
		temp.1 <- temp.0[,"chemical_casrn"]
		temp.1 <- str_replace_all(temp.1,"-","")
		temp.1 <- str_replace_all(temp.1," ","")
		temp.1 <- str_replace_all(temp.1,"_","")
		temp.2 <- temp.1
		temp.2[] <- "C"
		temp.3 <- paste(temp.2,temp.1,sep="")	
		temp.0[,"chemical_casrn"] <- temp.3
		long.data.0 <- temp.0[is.element(temp.0[,"chemical_casrn"],code.list),]
		atemp <- long.data.0[,"AC50"]
		f6 <- long.data.0[,"level6_ac50_hitcall"]
		f6[is.na(f6)] <- "TRUE"
		f6[atemp>=1000000] <- "FALSE"
		long.data.0[,"level6_ac50_hitcall"] <- f6
		atemp[f6=="FALSE"] <- 1000000
		f7 <- long.data.0[,"level7_ac50_hitcall"]
		f7[is.na(f7)] <- "TRUE"
		f7[f6=="FALSE"] <- "FALSE"
		long.data.0[,"level7_ac50_hitcall"] <- f7
		f8 <- long.data.0[,"level8_ac50_hitcall"]
		f8[is.na(f8)] <- "TRUE"
		f8[f7=="FALSE"] <- "FALSE"
		long.data.0[,"level8_ac50_hitcall"] <- f8
		md <- long.data.0[,"AC50_mod"]
		atemp[atemp>1000000] <- 1000000
		long.data.0[,"AC50"] <- atemp
		long.data <<- long.data.0
	}
	cat("long.data created\n")
	print(dim(long.data))
	flush.console()
	file <- paste(FITSDIR,"chem_data_temp.txt",sep="")
	s <- "Assay\tCODE\tName\tAC50\tAC50_mod\tW\tT\tEmax\tmax_conc\tlevel7_ac50_hitcall\tlevel8_ac50_hitcall\n"
	
	cat(file=file,s,append=F)

	
  long.data <- as.data.table(long.data)
  setkeyv(long.data,c('assay_name','chemical_casrn','AC50'))
  long.data[,length(unique(paste(assay_name,chemical_casrn)))]
  ind.dt <- long.data[,.I[1],by=c('assay_name','chemical_casrn')]
  long.data <- long.data[ind.dt$V1]
  long.data <- long.data[,list(Assay=assay_name,CODE=chemical_casrn,Name=chemical_name,AC50,AC50_mod,W,T,Emax,max_conc,level7_ac50_hitcall,level8_ac50_hitcall)]
}
#--------------------------------------------------------------------------------------
#
# Create the full input AC50, T,B,Emax,... files, one row per chemical
#
# chemset=(ToxCast,E1K,Tox21)
#
#--------------------------------------------------------------------------------------
PrepByChemical.old <- function(toxcast.export.date="2013_12_10",chemset="E1K",do.read.data=T,do.read.chems=T,do.temp=T) {

	cat("==========================================================================\n")
	cat("Prepare the AC50ByChemical file long format ...\n")
	cat("==========================================================================\n")
	cat("Read in assay data ...\n")
	flush.console()
	if(chemset!="Tox21") vendors <- c("ACEA","Apredica","Attagene","Bioseek","Novascreen","Odyssey Thera","NCGC")
	else vendors <- c("NCGC")
	if(do.read.data) {
		fit.data <- NULL
		for(i in 1:length(vendors)) {	
			file <- paste(FITSDIR,"ToxCast_assay_fit_",vendors[i],"_",toxcast.export.date,".txt",sep="")			
			temp <- read.table(file,header=T,sep="\t",stringsAsFactors=F,quote="")
			print(dim(temp))
			cat("Read in ...",file,"\n")
			flush.console()
			fit.data <- rbind(fit.data,temp)
		}
		DATA <<- fit.data
		cat("DATA read in\n")
		flush.console()
	}
	if(do.read.chems) {
		file <- paste(CHEMDIR,"ToxCast_Generic_Chemicals_",toxcast.export.date,".txt",sep="")			
		CHEMS <<- read.table(file,header=T,sep="\t",stringsAsFactors=F,quote="",comment.char="")
		cat("CHEMS read in\n")
		flush.console()
	}

	file <- paste(ASSAYDIR,"Tox21_assays.txt",sep="")			
	tox21.assays <- read.table(file,header=F,sep="\t",stringsAsFactors=F,quote="",comment.char="")
	assay.list <- as.character(DATA[,"assay_name"])
	assay.list <- sort(uniquify(assay.list))
	#assay.list <- assay.list[1:length(assay.list)]
	if(chemset=="Tox21") assay.list <- as.character(tox21.assays[,1])
	p1.list <- CHEMS[,"Phase_I"]					
	p2.list <- CHEMS[,"Phase_II"]
	e1k.list <- CHEMS[,"E1K"]
	tox21.list <- CHEMS[,"Tox21"]
	
	if(chemset=="ToxCast") {
		mask <- p1.list+p2.list
		mask[mask>1] <- 1
	}
	if(chemset=="E1K") {
		mask <- p1.list+p2.list+e1k.list
		mask[mask>1] <- 1
	}
	if(chemset=="Tox21") {
		mask <- tox21.list
		mask[mask>1] <- 1
	}
	name.frame <- CHEMS[mask==1,c("CODE","CASRN","ShortName")]	
	code.list <- name.frame[,"CODE"]
	casrn.list <- name.frame[,"CASRN"]
	nchem <- dim(name.frame)[1]
	nassay <- length(assay.list)
	cat("nchem: ",nchem,"\n")
	cat("nassay: ",nassay,"\n")
	flush.console()
	#nassay <- 10
	#nchem <- 100
	#code.list <- code.list[1:nchem]
	#assay.list <- assay.list[1:nassay]
	if(do.temp) {
		temp.0 <- DATA[is.element(DATA[,"assay_name"],assay.list),]
		temp.1 <- temp.0[,"chemical_casrn"]
		temp.1 <- str_replace_all(temp.1,"-","")
		temp.1 <- str_replace_all(temp.1," ","")
		temp.1 <- str_replace_all(temp.1,"_","")
		temp.2 <- temp.1
		temp.2[] <- "C"
		temp.3 <- paste(temp.2,temp.1,sep="")	
		temp.0[,"chemical_casrn"] <- temp.3
		long.data.0 <- temp.0[is.element(temp.0[,"chemical_casrn"],code.list),]
		atemp <- long.data.0[,"AC50"]
		f6 <- long.data.0[,"level6_ac50_hitcall"]
		f6[is.na(f6)] <- "TRUE"
		f6[atemp>=1000000] <- "FALSE"
		long.data.0[,"level6_ac50_hitcall"] <- f6
		atemp[f6=="FALSE"] <- 1000000
		f7 <- long.data.0[,"level7_ac50_hitcall"]
		f7[is.na(f7)] <- "TRUE"
		f7[f6=="FALSE"] <- "FALSE"
		long.data.0[,"level7_ac50_hitcall"] <- f7
		f8 <- long.data.0[,"level8_ac50_hitcall"]
		f8[is.na(f8)] <- "TRUE"
		f8[f7=="FALSE"] <- "FALSE"
		long.data.0[,"level8_ac50_hitcall"] <- f8
		md <- long.data.0[,"AC50_mod"]
		atemp[atemp>1000000] <- 1000000
		long.data.0[,"AC50"] <- atemp
		long.data <<- long.data.0
	}
	cat("long.data created\n")
	print(dim(long.data))
	flush.console()
	file <- paste(FITSDIR,"chem_data_temp.txt",sep="")
	s <- "Assay\tCODE\tName\tAC50\tAC50_mod\tW\tT\tEmax\tmax_conc\tlevel7_ac50_hitcall\tlevel8_ac50_hitcall\n"
	
	cat(file=file,s,append=F)

	counter <- 0
	for(i in 1:nassay) {
		assay <- assay.list[i]
		cat("===================================\n",assay,"\n",i,"of",nassay,"\n===================================\n")
		flush.console()
		atemp <- long.data[is.element(long.data[,"assay_name"],assay),]
		cat("dim(atemp): ",dim(atemp),"\n")
		flush.console()
		if(dim(atemp)[1]>0) {
			for(j in 1:nchem) {
				code <- code.list[j]
				btemp <- atemp[is.element(atemp[,"chemical_casrn"],code),]
				if(dim(btemp)[1]>0) {
					iuse <- 1
					if(dim(btemp)[1]>1) iuse <- sort(btemp[,"AC50"],index.return=T)$ix[1]
					s <- paste(assay,"\t",code,"\t",btemp[iuse,"chemical_name"],"\t",btemp[iuse,"AC50"],"\t",btemp[iuse,"AC50_mod"],"\t",btemp[iuse,"W"],"\t",btemp[iuse,"T"],"\t",btemp[iuse,"Emax"],"\t", btemp[iuse,"max_conc"],"\t", btemp[iuse,"level7_ac50_hitcall"],"\t", btemp[iuse,"level8_ac50_hitcall"],"\n",sep="")
					#ac50.mat[code,assay] <- btemp[iuse,"AC50"]
					#mod.mat[code,assay] <- btemp[iuse,"AC50_mod"]
					#w.mat[code,assay] <- btemp[iuse,"W"]
					#t.mat[code,assay] <- btemp[iuse,"T"]
					#emax.mat[code,assay] <- btemp[iuse,"Emax"]
					#maxconc.mat[code,assay] <- btemp[iuse,"max_conc"]
					cat(file=file,s,append=T)
					#cat(s)
					#browser()
				}
			}
		}
	}
}
#--------------------------------------------------------------------------------------
#
# Create the full input AC50, T,B,Emax,... files, one row per chemical
#
# chemset=(ToxCast,E1K,Tox21)
#
#--------------------------------------------------------------------------------------
PrepByChemical.matrix <- function(toxcast.export.date="2013_12_10",chemset="E1K",do.read.data=T,do.read.chems=T) {

	cat("==========================================================================\n")
	cat("Prepare the AC50ByChemical Matrices ...\n")
	cat("==========================================================================\n")
	cat("Read in assay data ...\n")
	flush.console()
	if(chemset!="Tox21") vendors <- c("ACEA","Apredica","Attagene","Bioseek","Novascreen","Odyssey Thera","NCGC")
	else vendors <- c("NCGC")
	if(do.read.data) {
		file <- paste(FITSDIR,"chem_data_temp.txt",sep="")
		temp <- read.table(file,header=T,sep="\t",stringsAsFactors=F,quote="")
		print(dim(temp))
		cat("Read in ...",file,"\n")
		DATA.LONG <<- temp
		cat("DATA.LONG read in\n")
		flush.console()
	}
	if(do.read.chems) {
		file <- paste(CHEMDIR,"ToxCast_Generic_Chemicals_",toxcast.export.date,".txt",sep="")			
		CHEMS <<- read.table(file,header=T,sep="\t",stringsAsFactors=F,quote="",comment.char="")
		cat("CHEMS read in\n")
		flush.console()
	}
	assay.list <- as.character(DATA.LONG[,"Assay"])
	assay.list <- sort(uniquify(assay.list))
	p1.list <- CHEMS[,"Phase_I"]					
	p2.list <- CHEMS[,"Phase_II"]
	e1k.list <- CHEMS[,"E1K"]
	tox21.list <- CHEMS[,"Tox21"]
	
	if(chemset=="ToxCast") {
		mask <- p1.list+p2.list
		mask[mask>1] <- 1
	}
	if(chemset=="E1K") {
		mask <- p1.list+p2.list+e1k.list
		mask[mask>1] <- 1
	}
	if(chemset=="Tox21") {
		mask <- tox21.list
		mask[mask>1] <- 1
	}
	name.frame <- CHEMS[mask==1,c("CODE","CASRN","ShortName")]	
	code.list <- name.frame[,"CODE"]
	casrn.list <- name.frame[,"CASRN"]
	nchem <- dim(name.frame)[1]
	nassay <- length(assay.list)
	cat("nchem: ",nchem,"\n")
	cat("nassay: ",nassay,"\n")
	flush.console()

	tmat <- matrix(nrow=nchem,ncol=nassay)
	colnames(tmat) <- assay.list
	row.names(tmat) <- code.list
	tmat[] <- NA

	ndata <- dim(DATA.LONG)[1]
	#ndata <- 100
	var.list <- c("AC50","AC50_mod","W","T","Emax","max_conc","level7_ac50_hitcall","level8_ac50_hitcall")
	nvar <- length(var.list)
	for(v in 1:nvar) {
		variable <- var.list[v]
		tmat[] <- NA
		cat("Prep data for ",variable,"\n")
		flush.console()
		test <- dcast(DATA.LONG,CODE~Assay,value.var=variable)
		test.mat <- test[,2:ncol(test)]
		rownames(test.mat) <- test[,1]
		test.mat <- test.mat[code.list,]
		result <- cbind(name.frame,test.mat)
		file <- paste(FITSDIR,"ToxCast_ResultMatrix_",chemset,"_",variable,"_",toxcast.export.date,".txt",sep="")
		write.table(result, file=file,row.names=FALSE,append=FALSE,quote=TRUE,sep = "\t")
		cat("Data written for ",variable,"\n")
		flush.console()
	}
}
#--------------------------------------------------------------------------------------
#
# Create the full input AC50, T,B,Emax,... files, one row per chemical
#
# chemset=(ToxCast,E1K,Tox21)
#
#--------------------------------------------------------------------------------------
Level7.AC50 <- function(toxcast.export.date="2013_12_10",chemset="E1K") {

	cat("==========================================================================\n")
	cat("Prepare the AC50ByChemical Matrices ...\n")
	cat("==========================================================================\n")
	cat("Read in assay data ...\n")
	flush.console()
	variable <- "AC50"
	file <- paste(FITSDIR,"ToxCast_ResultMatrix_",chemset,"_",variable,"_",toxcast.export.date,".txt",sep="")
	atemp <- read.table(file,header=T,sep="\t",stringsAsFactors=F,quote="\"")
	variable <- "level8_ac50_hitcall"
	file <- paste(FITSDIR,"ToxCast_ResultMatrix_",chemset,"_",variable,"_",toxcast.export.date,".txt",sep="")
	ftemp <- read.table(file,header=T,sep="\t",stringsAsFactors=F,quote="\"")

	amat <- atemp[,4:dim(atemp)[2]]
	fmat <- ftemp[,4:dim(ftemp)[2]]
	fmat[is.na(fmat)] <- -1
	fmat[fmat=="TRUE"] <- 1
	fmat[fmat=="FALSE"] <- 0
	
	amat[fmat==0] <- 1000000
	amat <- cbind(atemp[,1:3],amat)
	
	file <- paste(FITSDIR,"ToxCast_ResultMatrix_",chemset,"_AC50_level8_",toxcast.export.date,".txt",sep="")
	write.table(amat, file=file,row.names=FALSE,append=FALSE,quote=TRUE,sep = "\t")
}
#--------------------------------------------------------------------------------------
#
# Prepare the special pathway files
#
# chemset=(ToxCast,E1K,Tox21)
#
#--------------------------------------------------------------------------------------
#prepPath.v3 <- function(toxcast.export.date="2013_12_10",chemset="E1K",do.read.data=T,
#	assay.list=c("NVS_NR_bER","NVS_NR_hER","NVS_NR_mERa",
#	"OT_ER_ERaERa_0480","OT_ER_ERaERa_1440","OT_ER_ERaERb_0480","OT_ER_ERaERb_1440","OT_ER_ERbERb_0480","OT_ER_ERbERb_1440",
#	"OT_ERa_EREGFP_0120","OT_ERa_EREGFP_0480",
#	"ATG_ERa_TRANS_perc","ATG_ERE_CIS_perc",
#	"Tox21_ERa_BLA_Agonist_ratio","Tox21_ERa_LUC_BG1_Agonist",
#	"ACEA_T47D_80hr_Positive",
#	"Tox21_ERa_BLA_Antagonist_ratio","Tox21_ERa_LUC_BG1_Antagonist"),
#	pathname="ER") {

#"NVS_NR_cAR",
prepPath.v3 <- function(toxcast.export.date="2013_12_10",chemset="E1K",do.read.data=T,
	assay.list=c("NVS_NR_hAR","NVS_NR_rAR",
			"OT_AR_ARSRC1_0480","OT_AR_ARSRC1_0960",
			"ATG_AR_TRANS_perc",
			"Tox21_AR_BLA_Agonist_ratio","Tox21_AR_LUC_MDAKB2_Agonist",
			"Tox21_AR_BLA_Antagonist_ratio","Tox21_AR_LUC_MDAKB2_Antagonist"),
	pathname="AR") {
	
	cat("==========================================================================\n")
	cat("Prepare the ",pathname,"file ...\n")
	cat("==========================================================================\n")
	var.list <- c("AC50","AC50_mod","W","T","Emax","max_conc","level7_ac50_hitcall","level8_ac50_hitcall")
	nvar <- length(var.list)

	for(v in 1:nvar) {
		variable <- var.list[v]
		file <- paste(FITSDIR,"ToxCast_ResultMatrix_",chemset,"_",variable,"_",toxcast.export.date,".txt",sep="")
		temp <- read.table(file,header=T,sep="\t",stringsAsFactors=F,quote="\"'")
		name.mat <- temp[,1:3]
		
		data.mat <- temp[,assay.list]
		if(v==1) {
			mask <- vector(length=dim(data.mat)[1],mode="numeric")
			mask[] <- 1
			data.temp <- data.mat
			data.temp[is.na(data.temp)] <- -1
			data.temp[data.temp>=0] <- 0
			mask <- rowSums(data.temp)
			mask[mask<0] <- 1
			cat("Number of full rows: ",length(mask)-sum(mask)," out of ",length(mask),"\n")
			flush.console()
		}
		data.new <- cbind(name.mat,data.mat)
		data.new <- data.new[mask==0,]
		file <- paste(FITSDIR,"Pathway_",pathname,"_",chemset,"_",variable,"_",toxcast.export.date,".txt",sep="")
		write.table(data.new, file=file,row.names=FALSE,append=FALSE,quote=TRUE,sep = "\t")
		cat("Data written for variable: ",variable,"\n")
	}
}
########################################################################################
########################################################################################
########################################################################################
########################################################################################
########################################################################################
########################################################################################
########################################################################################
#--------------------------------------------------------------------------------------
#
# Create the full input AC50, T,B,Emax,... files, one row per chemical
#
# chemset=(ToxCast,E1K,Tox21)
#
#--------------------------------------------------------------------------------------
PrepByChemical.2 <- function(toxcast.export.date="2013_12_06",chemset="E1K",do.read.data=F,do.read.chems=T,do.temp=F) {

	
	ac50.mat <- cbind(name.frame,ac50.mat)
	t.mat <- cbind(name.frame,t.mat)
	w.mat <- cbind(name.frame,w.mat)
	emax.mat <- cbind(name.frame,emax.mat)
	mod.mat <- cbind(name.frame,mod.mat)
	maxconc.mat <- cbind(name.frame,maxconc.mat)
	
	file <- paste(FITSDIR,"ToxCast_AC50_Matrix_",chemset,"_",toxcast.export.date,".txt",sep="")			
	write.table(ac50.mat, file=file,row.names=FALSE,append=FALSE,quote=TRUE,sep = "\t")
	file <- paste(FITSDIR,"ToxCast_T_Matrix_",chemset,"_",toxcast.export.date,".txt",sep="")			
	write.table(t.mat, file=file,row.names=FALSE,append=FALSE,quote=TRUE,sep = "\t")
	file <- paste(FITSDIR,"ToxCast_W_Matrix_",chemset,"_",toxcast.export.date,".txt",sep="")			
	write.table(w.mat, file=file,row.names=FALSE,append=FALSE,quote=TRUE,sep = "\t")
	file <- paste(FITSDIR,"ToxCast_Emax_Matrix_",chemset,"_",toxcast.export.date,".txt",sep="")			
	write.table(emax.mat, file=file,row.names=FALSE,append=FALSE,quote=TRUE,sep = "\t")
	file <- paste(FITSDIR,"ToxCast_AC50_mod_Matrix_",chemset,"_",toxcast.export.date,".txt",sep="")			
	write.table(mod.mat, file=file,row.names=FALSE,append=FALSE,quote=TRUE,sep = "\t")
	file <- paste(FITSDIR,"ToxCast_max_conc_Matrix_",chemset,"_",toxcast.export.date,".txt",sep="")			
	write.table(maxconc.mat, file=file,row.names=FALSE,append=FALSE,quote=TRUE,sep = "\t")
	cat("Assay data written ...\n")
	browser()
}
#--------------------------------------------------------------------------------------
#
# Create the full input AC50 files, one row per sample
#
# chemset=(ToxCast,E1K,Tox21)
#
#--------------------------------------------------------------------------------------
AC50ByChemical_old <- function(toxcast.export.date="2013_12_13",chemset="Tox21",do.read.data=T) {

	cat("==========================================================================\n")
	cat("Prepare the AC50ByChemical file ...\n")
	cat("==========================================================================\n")
	cat("Read in assay data ...\n")
	flush.console()
	vendors <- c("ACEA","Apredica","Attagene","Bioseek","NCGC","Novascreen","Odyssey Thera")
	if(do.read.data) {
		file <- paste(FITSDIR,"ToxCast_AC50_Matrix_bySample_",chemset,"_",toxcast.export.date,".txt",sep="")			
		temp <- read.table(file,header=T,sep="\t",stringsAsFactors=F)
		DATA <<- temp
	}
	casrn.list <- uniquify(as.character(DATA[,"CASRN"]))
	assay.list <- names(DATA)[6:dim(DATA)[2]]
	nchem <- length(casrn.list)
	nassay <- length(assay.list)
	newdata <- as.data.frame(matrix(nrow=nchem,ncol=4+nassay,data=NA))
	names(newdata) <- c("CODE","CASRN_NODASH","CASRN","Name",assay.list)
	newdata[,"CASRN"] <- casrn.list
	code.list <- casrn.list
	code.list <- str_replace_all(code.list,"-","")
	code.list <- str_replace_all(code.list," ","")
	code.list <- str_replace_all(code.list,"_","")
	newdata[,"CASRN_NODASH"] <- code.list
	temp <- code.list
	temp[] <- "C"
	temp2 <- paste(temp,code.list,sep="")
	newdata[,"CODE"] <- temp2
	
	for(i in 1:nchem) {
		casrn <- casrn.list[i]
		temp <- DATA[is.element(DATA[,"CASRN"],casrn),]
		cname <- temp[1,"Name"]
		cat(i,cname,"\n")
		flush.console()
		newdata[i,"Name"] <- cname
		temp <- temp[,6:dim(temp)[2]]
		temp[is.na(temp)] <- 2000000
		tempmin <- colMin(temp)
		newdata[i,5:(4+nassay)] <- tempmin
	}
	print( dim(newdata))
	file <- paste(FITSDIR,"ToxCast_AC50_Matrix_byChemical_",chemset,"_",toxcast.export.date,".txt",sep="")			
	write.table(newdata, file=file,row.names=FALSE,append=FALSE,quote=TRUE,sep = "\t")
	cat("Assay data written ...\n")
}
#--------------------------------------------------------------------------------------
#
# Prepare the special pathway files
#
# chemset=(ToxCast,E1K,Tox21)
#
#--------------------------------------------------------------------------------------
prepPathAll <- function(toxcast.export.date="2013_03_33",do.read.data=T,do.read.chems=T) {
	doit <- T
	if(doit) {
	prepPath.v2(toxcast.export.date="2013_03_22",do.read.data,do.read.chems,
		assay.list=c("NVS_NR_bER","NVS_NR_hER","NVS_NR_mERa",
		"OT_ER_ERaERa_0480","OT_ER_ERaERa_1440","OT_ER_ERaERb_0480","OT_ER_ERaERb_1440","OT_ER_ERbERb_0480","OT_ER_ERbERb_1440",
		"OT_ERa_EREGFP_0120","OT_ERa_EREGFP_0480",
		"ATG_ERa_TRANS_perc","ATG_ERE_CIS_perc",
		"Tox21_ERa_BLA_Agonist_ratio","Tox21_ERa_LUC_BG1_Agonist",
		"ACEA_T47D_80hr_Positive",
		"Tox21_ERa_BLA_Antagonist_ratio","Tox21_ERa_LUC_BG1_Antagonist"),
		pathname="ER")

	prepPath.v2(toxcast.export.date="2013_03_22",do.read.data,do.read.chems,
		assay.list=c("ATG_AR_TRANS_perc",
		"NVS_NR_cAR",
		"NVS_NR_hAR",
		"NVS_NR_rAR",
		"OT_AR_ARE_LUC_Agonist_1440",
		"OT_AR_ARSRC1_0480",
		"OT_AR_ARSRC1_0960",
		"Tox21_AR_BLA_Agonist_ratio",
		"Tox21_AR_BLA_Antagonist_ratio",
		"Tox21_AR_LUC_MDAKB2_Agonist",
		"Tox21_AR_LUC_MDAKB2_Antagonist"),
		pathname="AR")

	prepPath.v2(toxcast.export.date="2013_03_22",do.read.data,do.read.chems,
		assay.list=c("ATG_PPRE_CIS_perc",            
		"ATG_PPARg_TRANS_perc",         
		"ATG_PPARd_TRANS_perc",         
		"ATG_PPARa_TRANS_perc",         
		"ATG_PPRE_CIS",                 
		"ATG_PPARg_TRANS",              
		"ATG_PPARd_TRANS",              
		"ATG_PPARa_TRANS",              
		"ATG_RXRa_TRANS_perc",
		"ATG_RXRb_TRANS_perc",
		"ATG_RARa_TRANS_perc",
		"ATG_RARb_TRANS_perc",
		"ATG_RARg_TRANS_perc",
		"ATG_DR4_LXR_CIS_perc",
		"NVS_NR_hRAR_Antagonist",
		"NVS_NR_hRARa_Agonist",
		"NVS_NR_hPPARg",                
		"NVS_NR_hPPARa",                
		"NVS_ENZ_hMMP1",
		"NVS_ENZ_hMMP1_Activator",
		"OT_PPARg_PPARgSRC1_1440",
		"OT_NURR1_NURR1RXRa_0480",
		"OT_NURR1_NURR1RXRa_1440",
		"BSK_BE3C_MMP1_down",
		"BSK_BE3C_MMP1_up",
		"BSK_hDFCGF_MMP1_down",
		"BSK_hDFCGF_MMP1_up",
		"Tox21_PPARg_BLA_Agonist_ch1",
		"Tox21_PPARg_BLA_Agonist_ratio"),
		pathname="PPAR")
	}
}
#--------------------------------------------------------------------------------------
#
# Prepare the AhR file
#
#
#--------------------------------------------------------------------------------------
prepAhR <- function() {
	prepPath.v2(toxcast.export.date="2013_03_22",do.read.data=F,do.read.chems=T,
	assay.list=c("Tox21_AhR","ATG_Ahr_CIS_perc"),
	pathname="AhR") 
}
#--------------------------------------------------------------------------------------
#
# Prepare the special pathway files
#
# chemset=(ToxCast,E1K,Tox21)
#
#--------------------------------------------------------------------------------------
prepPath.v2 <- function(toxcast.export.date="2013_12_04",do.read.data=T,do.read.chems=T,do.intermediate=T,
	assay.list=c("NVS_NR_bER","NVS_NR_hER","NVS_NR_mERa",
	"OT_ER_ERaERa_0480","OT_ER_ERaERa_1440","OT_ER_ERaERb_0480","OT_ER_ERaERb_1440","OT_ER_ERbERb_0480","OT_ER_ERbERb_1440",
	"OT_ERa_EREGFP_0120","OT_ERa_EREGFP_0480",
	"ATG_ERa_TRANS_perc","ATG_ERE_CIS_perc",
	"Tox21_ERa_BLA_Agonist_ratio","Tox21_ERa_LUC_BG1_Agonist",
	"ACEA_T47D_80hr_Positive",
	"Tox21_ERa_BLA_Antagonist_ratio","Tox21_ERa_LUC_BG1_Antagonist"),
	pathname="ER") {
	
	cat("==========================================================================\n")
	cat("Prepare the ",pathname,"file ...\n")
	cat("==========================================================================\n")
	vendors <- c("ACEA","Attagene","Novascreen","Odyssey Thera","NCGC")
	if(do.read.data) {
		fit.data <- NULL
		for(i in 1:length(vendors)) {	
			file <- paste(FITSDIR,"ToxCast_assay_fit_",vendors[i],"_",toxcast.export.date,".txt",sep="")			
			temp <- read.table(file,header=T,sep="\t",stringsAsFactors=F,quote="")
			print(dim(temp))
			cat("Read in ...",file,"\n")
			flush.console()
			fit.data <- rbind(fit.data,temp)
		}
		DATA <<- fit.data
	}
	if(do.read.chems) {
		file <- paste(CHEMDIR,"ToxCastChemNamesMaster_2013_09_06.txt",sep="")	
		CHEMS <<- read.table(file,header=T,sep="\t",stringsAsFactors=F,quote="",comment.char="")
		mask <- vector(length=dim(CHEMS)[2],mode="integer")
		mask[] <- 0
		chems <- CHEMS

		code.list <- sort(uniquify(chems[,"CODE"]))

		name.frame <- as.data.frame(matrix(nrow=length(code.list),ncol=3))
		names(name.frame) <- c("CODE","CASRN","Name")
		name.frame[,"CODE"] <- code.list
		for(i in 1:length(code.list)) {
			code <- code.list[i]
			ctemp <- chems[is.element(chems[,"CODE"],code),]
			name.frame[i,"CASRN"] <- ctemp[1,"CASRN"]
			cname <- ctemp[1,"ShortName"]
			cname <- str_replace_all(cname,"\"","")
			name.frame[i,"Name"] <- cname
		}
		NAME.FRAME <<- name.frame
		CODE.LIST <<- code.list
	}
	nchem <- length(CODE.LIST)
	nassay <- length(assay.list)
	
	ac50.mat <- matrix(nrow=nchem,ncol=nassay)
	t.mat <- matrix(nrow=nchem,ncol=nassay)
	w.mat <- matrix(nrow=nchem,ncol=nassay)
	emax.mat <- matrix(nrow=nchem,ncol=nassay)
	mod.mat <- matrix(nrow=nchem,ncol=nassay)
	maxconc.mat <- matrix(nrow=nchem,ncol=nassay)
	
	ac50.mat[] <- NA
	t.mat[] <- NA
	w.mat[] <- NA
	emax.mat[] <- NA
	mod.mat[] <- NA
	maxconc.mat[] <- NA
	
	row.names(ac50.mat) <- CODE.LIST
	row.names(t.mat) <- CODE.LIST
	row.names(w.mat) <- CODE.LIST
	row.names(emax.mat) <- CODE.LIST
	row.names(mod.mat) <- CODE.LIST
	row.names(maxconc.mat) <- CODE.LIST
	
	colnames(ac50.mat) <- assay.list
	colnames(t.mat) <- assay.list
	colnames(w.mat) <- assay.list
	colnames(emax.mat) <- assay.list
	colnames(mod.mat) <- assay.list
	colnames(maxconc.mat) <- assay.list
	
	if(do.intermediate) {
		temp <- DATA[is.element(DATA[,"assay_name"],assay.list[1]),]
		for(i in 2:nassay) {
			temp <- rbind(temp,DATA[is.element(DATA[,"assay_name"],assay.list[i]),])
		}
		DATA.TEMP <<- temp
	}
	temp <- DATA.TEMP
	
	ac50.mat[,"NVS_NR_bER"] <- 1000000
	ac50.mat[,"NVS_NR_hER"] <- 1000000
	ac50.mat[,"NVS_NR_mERa"] <- 1000000
	nassay.use <- nassay
	#nassay.use <- 1
	nchem.use <- nchem
	#nchem.use <- 10
	for(j in 1:nassay.use) {
		assay <- assay.list[j]
		cat("Fill in assay:",assay,"\n")
		flush.console()
		atemp <- temp[is.element(temp[,"assay_name"],assay),]
		for(i in 1:nchem.use) {
			code <- CODE.LIST[i]
			casrn <- NAME.FRAME[i,"CASRN"]
			btemp <- atemp[is.element(atemp[,"chemical_casrn"],casrn),]
			if(dim(btemp)[1]>0) {
				iuse <- 1
				if(dim(btemp)[1]>1) {
					iuse <- sort(btemp[,"AC50"],index.return=T)$ix[1]
				}
				ac50 <- btemp[iuse,"AC50"]
				ac50.mod <- btemp[iuse,"AC50_mod"]
				wval <- btemp[iuse,"W"]
				tval <- btemp[iuse,"T"]
				emax <- btemp[iuse,"Emax"]
				maxconc <- btemp[iuse,"max_conc"]
				
				l6 <- btemp[iuse,"level6_ac50_hitcall"]
				l7 <- btemp[iuse,"level7_ac50_hitcall"]
				l8 <- btemp[iuse,"level7_ac50_hitcall"]
				if(is.na(l6)) l6 <- "TRUE"
				if(is.na(l7)) l7 <- "TRUE"
				if(is.na(l8)) l8 <- "TRUE"
				if(ac50>=1000000) ac50 <- 1000000
				if(l6=="FALSE" || l7=="FALSE" || l8=="FALSE") {
					ac50 <- 1000000
					tval <- 0
				}
				if(is.na(tval)) tval <- 0
				if(is.na(wval)) wval <- 1000000
				
				ac50.mat[code,assay] <- ac50
				t.mat[code,assay] <- tval
				w.mat[code,assay] <- wval
				emax.mat[code,assay] <- emax
				mod.mat[code,assay] <- ac50.mod
				maxconc.mat[code,assay] <- maxconc
			}
		}
	}
	mask <- rowSums(ac50.mat)
	mask[is.na(mask)] <- 0
	mask[mask>0] <- 1
	name.frame <- NAME.FRAME[mask==1,]
	
	ac50.mat <- ac50.mat[mask==1,]
	t.mat <- t.mat[mask==1,]
	w.mat <- w.mat[mask==1,]
	emax.mat <- emax.mat[mask==1,]
	mod.mat <- mod.mat[mask==1,]
	maxconc.mat <- maxconc.mat[mask==1,]
	
	ac50.mat <- cbind(name.frame,ac50.mat)
	t.mat <- cbind(name.frame,t.mat)
	w.mat <- cbind(name.frame,w.mat)
	emax.mat <- cbind(name.frame,emax.mat)
	mod.mat <- cbind(name.frame,mod.mat)
	maxconc.mat <- cbind(name.frame,maxconc.mat)
	
	file <- paste(FITSDIR,pathname,"_pathway_AC50_Matrix_",toxcast.export.date,".txt",sep="")			
	write.table(ac50.mat, file=file,row.names=FALSE,append=FALSE,quote=TRUE,sep = "\t")
	file <- paste(FITSDIR,pathname,"_pathway_T_Matrix_",toxcast.export.date,".txt",sep="")			
	write.table(t.mat, file=file,row.names=FALSE,append=FALSE,quote=TRUE,sep = "\t")
	file <- paste(FITSDIR,pathname,"_pathway_W_Matrix_",toxcast.export.date,".txt",sep="")			
	write.table(w.mat, file=file,row.names=FALSE,append=FALSE,quote=TRUE,sep = "\t")
	file <- paste(FITSDIR,pathname,"_pathway_Emax_Matrix_",toxcast.export.date,".txt",sep="")			
	write.table(emax.mat, file=file,row.names=FALSE,append=FALSE,quote=TRUE,sep = "\t")
	file <- paste(FITSDIR,pathname,"_pathway_AC50_mod_Matrix_",toxcast.export.date,".txt",sep="")			
	write.table(mod.mat, file=file,row.names=FALSE,append=FALSE,quote=TRUE,sep = "\t")
	file <- paste(FITSDIR,pathname,"_pathway_max_conc_Matrix_",toxcast.export.date,".txt",sep="")			
	write.table(maxconc.mat, file=file,row.names=FALSE,append=FALSE,quote=TRUE,sep = "\t")
	cat("Assay data written ...\n")
	browser()
}
#--------------------------------------------------------------------------------------
#
# Merge teh ToxCast and Tox21 data for the E1K set
#
#--------------------------------------------------------------------------------------
merge.ToxCast.Tox21 <- function(toxcast.export.date="2013_12_04",do.read.data=F,do.read.chems=F) {
	
	FITSDIR <- "input/"
	CHEMDIR <- "input/"
	ASSAYDIR <- "input/"
	vendors <- c("ACEA","Apredica","Attagene","Bioseek","Novascreen","Odyssey Thera","NCGC")
	if(do.read.data) {
		fit.data <- NULL
		for(i in 1:length(vendors)) {	
			file <- paste(FITSDIR,"ToxCast_assay_fit_",vendors[i],"_",toxcast.export.date,".txt",sep="")			
			temp <- read.table(file,header=T,sep="\t",stringsAsFactors=F,quote="")
			print(dim(temp))
			cat("Read in ...",file,"\n")
			flush.console()
			fit.data <- rbind(fit.data,temp)
		}
		DATA <<- fit.data
	}
	if(do.read.chems) {
		file <- paste(CHEMDIR,"ToxCastTox21_Chemicals_",toxcast.export.date,".txt",sep="")			
		CHEMS <<- read.table(file,header=T,sep="\t",stringsAsFactors=F,quote="",comment.char="")
		cat("Read in ...",file,"\n")
		mask <- vector(length=dim(CHEMS)[2],mode="integer")
		mask[] <- 0
		mask <- CHEMS[,"Phase_I_V1"] + CHEMS[,"Phase_I_V2"] + CHEMS[,"Phase_IIa"] + CHEMS[,"Phase_IIb"] + CHEMS[,"Phase_IIc"] + CHEMS[,"E1K"]  
		mask[mask>0] <- 1
		chems <- CHEMS[mask>0,]

		code.list <- sort(uniquify(chems[,"CODE"]))
		CODE.LIST <<- code.list

		name.frame <- as.data.frame(matrix(nrow=length(CODE.LIST),ncol=3))
		names(name.frame) <- c("CODE","CASRN","Name")
		name.frame[,"CODE"] <- CODE.LIST
		for(i in 1:length(CODE.LIST)) {
			code <- CODE.LIST[i]
			ctemp <- chems[is.element(chems[,"CODE"],code),]
			name.frame[i,"CASRN"] <- substr(ctemp[1,"CASRN"],2,nchar(ctemp[1,"CASRN"]))
			name.frame[i,"Name"] <- ctemp[1,"Name"]
		}
		NAME.FRAME <<- name.frame
	}
	cat("CODE.LIST:",length(CODE.LIST),"\n")
	flush.console()
	nchem <- length(CODE.LIST)
	assay.list <- sort(uniquify(DATA[,"assay_name"]))
	assay.list <- assay.list[2:length(assay.list)]
	nassay <- length(assay.list)
	nassay.use <- nassay

	ac50.mat <- matrix(nrow=nchem,ncol=nassay)
	ac50_l6.mat <- matrix(nrow=nchem,ncol=nassay)
	ac50_mod.mat <- matrix(nrow=nchem,ncol=nassay)
	t.mat <- matrix(nrow=nchem,ncol=nassay)
	w.mat <- matrix(nrow=nchem,ncol=nassay)
	emax.mat <- matrix(nrow=nchem,ncol=nassay)
	ac50.mat[] <- NA
	ac50_l6.mat[] <- NA
	ac50_mod.mat[] <- NA
	t.mat[] <- NA
	w.mat[] <- NA
	emax.mat[] <- NA
	row.names(ac50.mat) <- CODE.LIST
	row.names(ac50_l6.mat) <- CODE.LIST
	row.names(ac50_mod.mat) <- CODE.LIST
	row.names(t.mat) <- CODE.LIST
	row.names(w.mat) <- CODE.LIST
	row.names(emax.mat) <- CODE.LIST
	colnames(ac50.mat) <- assay.list
	colnames(ac50_l6.mat) <- assay.list
	colnames(ac50_mod.mat) <- assay.list
	colnames(t.mat) <- assay.list
	colnames(w.mat) <- assay.list
	colnames(emax.mat) <- assay.list
	
	nassay.use <- nassay
	#nassay.use <- 5

	nchem.use <- nchem
	#nchem.use <- 10
	for(j in 1:nassay.use) {
		assay <- assay.list[j]
		cat("Fill in assay:",assay,"\n")
		flush.console()
		atemp <- DATA[is.element(DATA[,"assay_name"],assay),]
		#temp <- DATA[is.element(DATA[,"assay_name"],assay.list[1]),]
		for(i in 1:nchem.use) {
			code <- CODE.LIST[i]
			casrn <- NAME.FRAME[i,"CASRN"]
			btemp <- atemp[is.element(atemp[,"chemical_casrn"],casrn),]
			if(dim(btemp)[1]>0) {
				iuse <- 1
				if(dim(btemp)[1]>1) {
					iuse <- sort(btemp[,"AC50"],index.return=T)$ix[1]
				}
				ac50 <- btemp[iuse,"AC50"]
				if(ac50>=1000000) ac50 <- 1000000
				ac50_l6 <- ac50
				ac50.mod <- btemp[iuse,"AC50_mod"]
				if(is.na(ac50.mod)) ac50.mod <- "=="

				wval <- btemp[iuse,"W"]
				if(is.na(wval)) wval <- 1000000

				tval <- btemp[iuse,"T"]
				if(is.na(tval)) tval <- 1000000

				emaxval <- btemp[iuse,"Emax"]
				if(is.na(emaxval)) emaxval <- 1000000

				lec <- btemp[iuse,"LEC"]
				if(is.na(lec)) lec <- 1000000

				l7 <- btemp[iuse,"level7_ac50_hitcall"]
				#if(substr(assay,1,3)=="ATG") l7 <- btemp[iuse,"level8_ac50_hitcall"]
				if(is.na(l7)) l7 <- "TRUE"
				if(l7=="FALSE") ac50 <- 1000000

				ac50.mat[code,assay] <- ac50
				ac50_l6.mat[code,assay] <- ac50_l6
				ac50_mod.mat[code,assay] <- ac50.mod
				t.mat[code,assay] <- tval
				w.mat[code,assay] <- wval
				emax.mat[code,assay] <- emaxval
			}
		}
	}
	ac50.mat <- cbind(NAME.FRAME,ac50.mat)
	ac50_l6.mat <- cbind(NAME.FRAME,ac50_l6.mat)
	ac50_mod.mat <- cbind(NAME.FRAME,ac50_mod.mat)
	t.mat <- cbind(NAME.FRAME,t.mat)
	w.mat <- cbind(NAME.FRAME,w.mat)
	emax.mat <- cbind(NAME.FRAME,emax.mat)
	
	
	file <- paste(FITSDIR,"ToxCast_Tox21_ALL_AC50_Matrix_",toxcast.export.date,".txt",sep="")			
	write.table(ac50.mat, file=file,row.names=FALSE,append=FALSE,quote=TRUE,sep = "\t")
	file <- paste(FITSDIR,"ToxCast_Tox21_ALL_AC50_Matrix_LEVEL_6_",toxcast.export.date,".txt",sep="")			
	write.table(ac50_l6.mat, file=file,row.names=FALSE,append=FALSE,quote=TRUE,sep = "\t")
	file <- paste(FITSDIR,"ToxCast_Tox21_ALL_AC50_mod_Matrix_",toxcast.export.date,".txt",sep="")			
	write.table(ac50_mod.mat, file=file,row.names=FALSE,append=FALSE,quote=TRUE,sep = "\t")
	file <- paste(FITSDIR,"ToxCast_Tox21_ALL_T_Matrix_",toxcast.export.date,".txt",sep="")			
	write.table(t.mat, file=file,row.names=FALSE,append=FALSE,quote=TRUE,sep = "\t")
	file <- paste(FITSDIR,"ToxCast_Tox21_ALL_W_Matrix_",toxcast.export.date,".txt",sep="")			
	write.table(w.mat, file=file,row.names=FALSE,append=FALSE,quote=TRUE,sep = "\t")
	file <- paste(FITSDIR,"ToxCast_Tox21_ALL_Emax_Matrix_",toxcast.export.date,".txt",sep="")			
	write.table(emax.mat, file=file,row.names=FALSE,append=FALSE,quote=TRUE,sep = "\t")
	cat("Assay data written ...\n")
	browser()
}
#--------------------------------------------------------------------------------------
#
# Load all of the ToxCast data for further use
#
#--------------------------------------------------------------------------------------
load.bySample <- function(infile) {
	print("==========================================================================",quote=FALSE)
	print("Load by sample",quote=FALSE)
	print("==========================================================================",quote=FALSE)
	TX.sample <<- read.table(file=infile,sep="\t",header=TRUE,check.names=F,stringsAsFactors=F)
	print("Sample data loaded")
	flush.console()
}
#--------------------------------------------------------------------------------------
#
# Load all of the fingerprint data
#
#--------------------------------------------------------------------------------------
load.chems <- function(infile) {
	print("==========================================================================",quote=FALSE)
	print("Load chemicals",quote=FALSE)
	print("==========================================================================",quote=FALSE)
	infile <- paste("../data/",infile,sep="")
	CHEMS <<- read.table(file=infile,sep="\t",header=T,check.names=F,comment.char="",stringsAsFactors=F)
	print("Chemical data loaded")
	flush.console()
}
#--------------------------------------------------------------------------------------
#
# Create the full input AC50 file, one row per compound or generic chemical
#
#--------------------------------------------------------------------------------------
AC50ByCompound <- function(date.string) {

	print("==========================================================================",quote=FALSE)
	print(paste("Prepare the AC50 by compound file ...",sep=""),quote=FALSE)
	print("==========================================================================",quote=FALSE)
	flush.console()

	name.list <- as.character(TX.sample[,"Name"])
	name.list <- uniquify(name.list)
	name.list <- sort(name.list)
	nassay <- dim(TX.sample)[2]-6
	assay.list <- names(TX.sample)[7:length(names(TX.sample))]
	nchem <- length(name.list)
	
	data <- as.data.frame(matrix(data=NA,nrow=nchem,ncol=(nassay+4)))
	names(data) <- c("DSSTOX_GSID","CASRN","Name","inchi_key",assay.list)
	for(i in 1:nchem) {
		cname <- name.list[i]
		temp <- CHEMS[CHEMS[,6]==cname,]
		casrn <- as.character(temp[1,"CASRN"])
		gsid <- as.character(temp[1,"DSSTOX_GSID"])
		inchi <- as.character(temp[1,"inchi_key"])
		data[i,1] <- gsid
		data[i,2] <- casrn
		data[i,3] <- cname
		data[i,4] <- inchi
		
		temp <- TX.sample[TX.sample[,"CASRN"]==casrn,]
		temp <- temp[,7:(7+nassay-1)]
		#print(cname)
		#print(dim(temp))
		if(dim(temp)[1]==1) {
			data[i,(5):(5+nassay-1)] <- temp[1,]
		}
		else {
		#	print(">>> got here")
			x <- cummin(temp)
			data[i,(5):(5+nassay-1)] <- x[dim(x)[1],]
		}
		#if(cname=="Bisphenol A") browser()
		if(i%%100==0)	cat("Chemicals processed: ",i," out of ",nchem,"\n")
		flush.console()
	}

	write.table(data, file =paste("../data/ToxCast_AC50_byCompound_",date.string,".txt",sep=""), row.names=FALSE, append = FALSE, quote = TRUE, sep = "\t")
	print("Assay data written ...")
	flush.console()

}
#--------------------------------------------------------------------------------------
#
# Create the full input AC50 files, one row per sample
#
# chemset=(ToxCast,E1K,Tox21)
#
#--------------------------------------------------------------------------------------
LECBySample <- function(toxcast.export.date="2013_01_03",chemset="E1K",do.read.data=T,do.read.chems=T) {

	cat("==========================================================================\n")
	cat("Prepare the LECBySample file ...\n")
	cat("==========================================================================\n")
	cat("Read in assay data ...\n")
	flush.console()
	FITSDIR <- "../assay_data/curve_fits/"
	CHEMDIR <- "../chemical_info/"
	ASSAYDIR <- "../assay_info/"
	if(chemset!="Tox21") vendors <- c("ACEA","Apredica","Attagene","Bioseek","Novascreen","Odyssey Thera")
	else vendors <- c("NCGC")
	if(do.read.data) {
		fit.data <- NULL
		for(i in 1:length(vendors)) {	
			file <- paste(FITSDIR,"ToxCast_assay_fit_NOT_FOR_PUBLIC_RELEASE_",vendors[i],"_",toxcast.export.date,".txt",sep="")			
			temp <- read.table(file,header=T,sep="\t",stringsAsFactors=F,quote="")
			print(dim(temp))
			cat("Read in ...",file,"\n")
			flush.console()
			fit.data <- rbind(fit.data,temp)
		}
		DATA <<- fit.data
	}
	if(do.read.chems) {
		file <- paste(CHEMDIR,"ToxCastTox21_Chemicals_",toxcast.export.date,".txt",sep="")			
		CHEMS <<- read.table(file,header=T,sep="\t",stringsAsFactors=F,quote="",comment.char="")
	}

	file <- paste(ASSAYDIR,"Tox21_assays.txt",sep="")			
	tox21.assays <- read.table(file,header=F,sep="\t",stringsAsFactors=F,quote="",comment.char="")
	assay.list <- as.character(DATA[,"assay_name"])
	assay.list <- sort(uniquify(assay.list))
	assay.list <- assay.list[1:length(assay.list)]
	if(chemset=="Tox21") assay.list <- as.character(tox21.assays[,1])
	p11.list <- CHEMS[,"Phase_I_V1"]					
	p12.list <- CHEMS[,"Phase_I_V2"]					
	p2a.list <- CHEMS[,"Phase_IIa"]
	p2b.list <- CHEMS[,"Phase_IIb"]
	p2c.list <- CHEMS[,"Phase_IIc"]
	e1k.list <- CHEMS[,"E1K"]
	tox21.list <- CHEMS[,"Tox21"]
	
	if(chemset=="ToxCast") {
		mask <- p11.list+p12.list+p2a.list+p2b.list+p2c.list
		mask[mask>1] <- 1
	}
	if(chemset=="E1K") {
		mask <- p11.list+p12.list+p2a.list+p2b.list+p2c.list+e1k.list
		mask[mask>1] <- 1
	}
	if(chemset=="Tox21") {
		mask <- tox21.list
		mask[mask>1] <- 1
	}
	NAMES <- CHEMS[mask==1,c("DSSTOX_GSID","SNSID","Sample_ID","CASRN","Name")]	

	sample.list <- NAMES[,"Sample_ID"]
	mask <- vector(mode="integer",length=length(sample.list))
	mask[] <- 1
	for(i in 2:length(sample.list)) {
		if(sample.list[i]==sample.list[i-1]) mask[i] <- 0
	}
 	sample.list <- sample.list[mask==1]
 	NAMES <- NAMES[mask==1,]
	
	nchem <- length(sample.list)
	nassay <- length(assay.list)
	
	DMAT <- as.data.frame(matrix(nrow=nchem,ncol=nassay))
	row.names(DMAT) <- sample.list
	names(DMAT) <- assay.list
	DMAT[] <- NA
	
	temp <- DATA[is.element(DATA[,"assay_name"],assay.list),]
	temp <- temp[is.element(temp[,"sample_id"],sample.list),]
	mask.1 <- temp[,"LEC"]
	mask.1[] <- 0 
	mask.1[temp[,"level7_lec_hitcall"]=="FALSE"] <- 1
	mask.2 <- temp[,"LEC"]
	mask.2[mask.2>=1000000] <- 0
	mask.2[mask.2>0] <- 1
	mask.3 <- mask.2*mask.1
	temp[mask.3==1,"LEC"] <- 1000700
	
	counter <- 0
	temp2 <- temp[,"AC50_mod"]
	temp3 <- temp[,"LEC"]
	temp3[temp2=="<>"] <- 1000999
	temp4 <- temp3
	temp4[] <- 1
	#temp4[temp2==">="] <- 5
	#temp4[temp2=="<="] <- 1/10
	for(i in 1:length(temp3)) {
		if(temp3[i]<1000000) temp3[i] <- temp3[i]*temp4[i]
	}
	nassay <- dim(DMAT)[2]
	nchem <- dim(DMAT)[1]
	for(i in 1:nassay) {
		assay <- names(DMAT)[i]
		cat("===================================\n",assay,"\n",i,"of",nassay,"\n===================================\n")
		flush.console()
		dtemp <- as.matrix(DMAT[,i])
		dtemp[] <- 2000000
		row.names(dtemp) <- row.names(DMAT)
		mask <- DMAT[,assay]
		mask[] <- 0
		mask[is.element(temp[,"assay_name"],assay)] <- 1
		sids <- temp[mask==1,"sample_id"]
		lecs <- temp3[mask==1]
		for(j in 1:length(sids)) {
			sid <- sids[j]
			lec <- lecs[j]
			if(!is.na(sid) && !is.na(lec)) {
				if(lec>2000000) {
					cat("Bad LEC\n")
					return()
				}
				dtemp[sid,1] <- lec
			}
		}
		DMAT[,assay] <- dtemp[,1]
	}

	cat("finished building the matrix, now trim empty columns\n")
	flush.console()
	temp <- DMAT
	temp[is.na(temp)] <- 0
	temp[temp>0] <- 1
	cs <- colSums(temp)
	DMAT <- DMAT[,cs>0]
	
	DMAT <- cbind(NAMES[1:nchem,],DMAT)
	print( dim(DMAT))
	file <- paste(FITSDIR,"ToxCast_LEC_Matrix_bySample_NOT_FOR_PUBLIC_RELEASE_",chemset,"_",toxcast.export.date,".txt",sep="")			
	write.table(DMAT, file=file,row.names=FALSE,append=FALSE,quote=TRUE,sep = "\t")
	cat("Assay data written ...\n")
}
#--------------------------------------------------------------------------------------
#
# Create the full input LEC files, one row per sample
#
# chemset=(ToxCast,E1K,Tox21)
#
#--------------------------------------------------------------------------------------
LECByChemical <- function(toxcast.export.date="2013_02_13",chemset="Tox21",do.read.data=T) {

	cat("==========================================================================\n")
	cat("Prepare the LECByChemical file ...\n")
	cat("==========================================================================\n")
	cat("Read in assay data ...\n")
	flush.console()
	FITSDIR <- "../assay_data/curve_fits/"
	CHEMDIR <- "../chemical_info/"
	ASSAYDIR <- "../assay_info/"
	vendors <- c("ACEA","Apredica","Attagene","Bioseek","NCGC","Novascreen","Odyssey Thera")
	if(do.read.data) {
		file <- paste(FITSDIR,"ToxCast_LEC_Matrix_bySample_NOT_FOR_PUBLIC_RELEASE_",chemset,"_",toxcast.export.date,".txt",sep="")			
		temp <- read.table(file,header=T,sep="\t",stringsAsFactors=F)
		DATA <<- temp
	}
	casrn.list <- uniquify(as.character(DATA[,"CASRN"]))
	assay.list <- names(DATA)[6:dim(DATA)[2]]
	nchem <- length(casrn.list)
	nassay <- length(assay.list)
	newdata <- as.data.frame(matrix(nrow=nchem,ncol=4+nassay,data=NA))
	names(newdata) <- c("CODE","CASRN_NODASH","CASRN","Name",assay.list)
	newdata[,"CASRN"] <- casrn.list
	code.list <- casrn.list
	code.list <- str_replace_all(code.list,"-","")
	code.list <- str_replace_all(code.list," ","")
	code.list <- str_replace_all(code.list,"_","")
	newdata[,"CASRN_NODASH"] <- code.list
	temp <- code.list
	temp[] <- "C"
	temp2 <- paste(temp,code.list,sep="")
	newdata[,"CODE"] <- temp2
	
	for(i in 1:nchem) {
		casrn <- casrn.list[i]
		temp <- DATA[is.element(DATA[,"CASRN"],casrn),]
		cname <- temp[1,"Name"]
		cat(i,cname,"\n")
		flush.console()
		newdata[i,"Name"] <- cname
		temp <- temp[,6:dim(temp)[2]]
		temp[is.na(temp)] <- 2000000
		tempmin <- colMin(temp)
		newdata[i,5:(4+nassay)] <- tempmin
	}
	print( dim(newdata))
	file <- paste(FITSDIR,"ToxCast_LEC_Matrix_byChemical_NOT_FOR_PUBLIC_RELEASE_",chemset,"_",toxcast.export.date,".txt",sep="")			
	write.table(newdata, file=file,row.names=FALSE,append=FALSE,quote=TRUE,sep = "\t")
	cat("Assay data written ...\n")
}

