#--------------------------------------------------------------------------------------
#
# Filter the hit matrix
#
#--------------------------------------------------------------------------------------
filter.hits <- function() {
    cat("==========================================================================\n")
    cat("filter.hits\n")
    cat("==========================================================================\n")

	hfiltered <- MAT.hitcall

    source.list <- sort(uniquify(ASSAY.INFO[,"Source"]))
    nsource <- length(source.list)

	for(i in 1:nsource) {
		source <- source.list[i]
		cutoff <- 0
		if(source=="Cytotoxicity") cutoff <- 50
		if(source=="ATG_CIS" || source=="ATG_TRANS") cutoff <- 1.5
		if(source=="ATG_cytotox") cutoff <- 50
		if(source=="ACEA") cutoff <- 50
		if(source=="APR_up" || source=="APR_dn") cutoff <- 2
		if(source=="BSK_up" || source=="BSK_down") cutoff <- 0.2
		if(source=="NVS_ADME" || source=="NVS_ADME_Activator" || source=="NVS_ENZ" || source=="NVS_ENZ_Activator" || source=="NVS_GPCR" || source=="NVS_IC" || source=="NVS_MP" || source=="NVS_NR" || source=="NVS_TR" ) cutoff <- 50
		if(source=="Tox21_BLA_Agonist" || source=="Tox21_BLA_viability" || source=="Tox21_BLA_Antagonist" || source=="Tox21_LUC_Agonist" || source=="Tox21_LUC_Antagonist" || source=="Tox21_QC") cutoff <- 50
		if(source=="OT") cutoff <- 50

		assay.list <- ASSAY.INFO[is.element(ASSAY.INFO[,"Source"],source),"Assay"]
		nassay <- length(assay.list)
		for(j in 1:nassay) {
			assay <- assay.list[j]
			tested <- MAT.tested[,assay]
			T <- MAT.T[,assay]
			T[T<0] <- 0
			T[is.na(T)] <- -1
			T[T<cutoff] <- 0

			H <- MAT.hitcall[,assay]
			H[H<0] <- 0
			H[is.na(H)] <- -1
			H[T==0] <- 0
			H[is.na(MAT.hitcall[,assay])] <- NA

			H0 <- MAT.hitcall[,assay]
			H0[is.na(H0)] <- 0
			H0[H0<0] <- 0
			hits.old <- sum(H0)

			hits.new <- sum(H[!is.na(H)])

			hfiltered[,assay] <- H
			delta <- hits.old - hits.new
			cat(assay,":",hits.old,":",delta,"\n")
			flush.console()
		}
	}
	MAT.hitcall.FILTERED <<- hfiltered
}
#--------------------------------------------------------------------------------------
#
# Scale the top
#
#--------------------------------------------------------------------------------------
scale.top <- function() {
    cat("==========================================================================\n")
    cat("scale.top\n")
    cat("==========================================================================\n")

	tscale <- MAT.T

    source.list <- sort(uniquify(ASSAY.INFO[,"Source"]))
    nsource <- length(source.list)

	for(i in 1:nsource) {
		source <- source.list[i]
		assay.list <- ASSAY.INFO[is.element(ASSAY.INFO[,"Source"],source),"Assay"]
		scaler <- 0
		if(substr(source,1,3)=="NVS") scaler <- 1
		else if(substr(source,1,4)=="ACEA") scaler <- 1
		else if(substr(source,1,5)=="Tox21") scaler <- 1
		else if(substr(source,1,2)=="OT") scaler <- 1
		else {

			temp <- MAT.T[,assay.list]
			tested <- MAT.tested[,assay.list]
			hits <- MAT.hitcall[,assay.list]
			temp[temp<0] <- 0
			temp[is.na(temp)] <- -1
			temp[tested==0] <- -1
			temp[hits==0] <- -1
			tlist <- as.numeric(temp[temp>0])
			scaler <- 100/quantile(tlist,probs=seq(0,1,0.05))[20]

		}
		if(is.na(scaler)) scaler <- 1
		nassay <- length(assay.list)
		for(j in 1:nassay) {
			assay <- assay.list[j]
			tscale[,assay] <- MAT.T[,assay]*scaler
		}
		cat(source,":",scaler,"\n")
		flush.console()
			#browser()
	}
	MAT.T.SCALED <<- tscale
}
#--------------------------------------------------------------------------------------
#
# build the heatmap for the biological processes by chemicals filtered by z
#
# --------------------------------------------------------------------------------------
bio.z.hm <- function(zcut=3,zset="LO",cutoff=0,do.all=T,to.file=F,nlevel=50) {

	code.list <- CHEMS[is.element(CHEMS[,"Phase_I"],1),"CODE"]
	code.list <- c(code.list,CHEMS[is.element(CHEMS[,"Phase_II"],1),"CODE"])
	code.list <- sort(unique(code.list))
	nchem <- length(code.list)
	temp <- MAT.Z[code.list,]
	if(zset=="LO") {
		temp[is.na(temp)] <- 1000000
		temp[temp>zcut] <- 0
		temp[temp!=0] <- 1
	}
	else if(zset=="HI") {
		temp[is.na(temp)] <- -1000000
		temp[temp<zcut] <- 0
		temp[temp>0] <- 1
	}
	else if(zset=="ALL") {
		temp[is.na(temp)] <- -1000000
		temp[temp> -1000] <- 1
		temp[temp<0] <- 0
	}
	htemp <- MAT.hitcall.FILTERED[code.list,]
	temp[is.na(htemp)] <- 0
	temp[htemp==0] <- 0

	bio.list <- sort(unique(ASSAY.INFO[,"biological_process"]))
	if(!do.all) {
	bio.list <- c(
"cytotoxicity BLA",
"cytotoxicity SRB",
"ER stress",
"apoptosis up",
"heat shock",
"hypoxia",
"oxidative stress up",
"proliferation decrease",
"mitochondrial disruption down",
"microtubule up")
	}

	bio.list <- bio.list[!is.element(bio.list,"cytotox other")]
	bio.list <- bio.list[!is.element(bio.list,"assay QC")]
	bio.list <- bio.list[!is.element(bio.list,"enzyme anti-blocking")]
	nprocess <- length(bio.list)
	dtemp <- matrix(ncol=nprocess,nrow=nchem)
	dtemp[] <- 0
	rownames(dtemp) <- code.list
	for(i in 1:nprocess) {
		bp <- bio.list[i]
		assay.list <- ASSAY.INFO[is.element(ASSAY.INFO[,"biological_process"],bp),"Assay"]
		assay.list <- assay.list[is.element(assay.list,colnames(temp))]
		slice <- as.matrix(temp[,assay.list])
		denom <- dim(slice)[2]
		rs <- rowSums(slice) / denom
		dtemp[,i] <- as.numeric(rs)
	}
	colnames(dtemp) <- bio.list

	if(to.file) {
		fname <- paste("../plots/bioZ_",zcut,"_",zset,"_subset_hm.pdf",sep="")
		if(do.all) fname <- paste("../plots/bioZ_",zcut,"_",zset,"_all_hm.pdf",sep="")
		pdf(file=fname,width=7,height=7,pointsize=12,bg="white",paper="letter",pagecentre=T)
	}
	dtemp[dtemp<cutoff] <- 0
	result <- heatmap(t(as.matrix(dtemp)),margins=c(10,10),scale="none",main=paste("Biological process / Z: ",zcut,":",zset),
					  xlab="",ylab="",cexCol=0.1,cexRow=0.8,col=brewer.pal(9,"Reds"),
					  hclustfun=function(x) hclust(d=dist(x),method="ward.D"),keep.dendro=T,verbose=T)

    output <- dtemp
    chems <- CHEMS[code.list,]
    output <- cbind(chems[,c("CODE","Name","use_category","structure_category","structure_super_category")],output)
   	outfile <- paste("../plots/assay_class_zfrac_",zcut,"_",zset,"_subset.csv",sep="")
	if(do.all) outfile <- paste("../plots/assay_class_zfrac_",zcut,"_",zset,"_all.csv",sep="")
    write.csv(output,file=outfile, row.names=F)

    cl <- hclust(d=dist(dtemp),method="ward.D")
    hcut <- 1
    clcut <- cutree(cl,h=hcut)
    clout <- cbind(clcut,clcut)
    clout <- cbind(clout,clcut)
    clout <- cbind(clout,clcut)
    clout <- cbind(clout,clcut)
    clout <- cbind(clout,clcut)
    clout<- as.data.frame(clout)
    for(i in 1:length(clcut)) {
        clout[i,1] <- CHEMS[i,"CODE"]
        clout[i,2] <- CHEMS[i,"Name"]
        clout[i,3] <- CHEMS[i,"use_category"]
        clout[i,4] <- CHEMS[i,"structure_category"]
        clout[i,5] <- CHEMS[i,"structure_super_category"]
    }
    names(clout) <- c("CODE","Name","use_category","structure_category","structure_super_category","Level_1")

    cat("Finished preping clusters for hcut: ",hcut,"\n")
    flush.console()

    for(hcut in 2:nlevel) {
        clcut <- cutree(cl,h=hcut)
        clout <- cbind(clout,clcut)
        names(clout)[dim(clout)[2]] <- paste("Level_",hcut,sep="")
        cat("Finished preping clusters for hcut: ",hcut,"\n")
        flush.console()
    }
   	outfile <- paste("../plots/chem_clusters_",zcut,"_",zset,"_subset.csv",sep="")
	if(do.all) outfile <- paste("../plots/chem_clusters_",zcut,"_",zset,"_all.csv",sep="")
    write.csv(clout,file=outfile, row.names=F)

	if(to.file) dev.off()
	else browser()
}
#--------------------------------------------------------------------------------------
#
# build the heatmap for the assay technologies by chemicals filtered by z
#
# --------------------------------------------------------------------------------------
promiscuity.hm <- function(zcut=3,zset="LO",cutoff=0,to.file=F) {

	code.list <- CHEMS[is.element(CHEMS[,"Phase_I"],1),"CODE"]
	code.list <- c(code.list,CHEMS[is.element(CHEMS[,"Phase_II"],1),"CODE"])
	code.list <- sort(unique(code.list))
	nchem <- length(code.list)
	temp <- MAT.Z[code.list,]
	if(zset=="LO") {
		temp[is.na(temp)] <- 1000000
		temp[temp>zcut] <- 0
		temp[temp!=0] <- 1
	}
	else if(zset=="HI") {
		temp[is.na(temp)] <- -1000000
		temp[temp<zcut] <- 0
		temp[temp>0] <- 1
	}
	else if(zset=="ALL") {
		temp[is.na(temp)] <- -1000000
		temp[temp>-1000] <- 1
		temp[temp<0] <- 0
	}
	htemp <- MAT.hitcall.FILTERED[code.list,]
	temp[is.na(htemp)] <- 0
	temp[htemp==0] <- 0

	source.list <- sort(unique(ASSAY.INFO[,"Source"]))
	source.list <- source.list[!is.element(source.list,"NVS_ADME_Activator")]
	source.list <- source.list[!is.element(source.list,"NVS_ENZ_Activator")]
	source.list <- source.list[!is.element(source.list,"ATG_cytotox")]
	source.list <- source.list[!is.element(source.list,"Tox21_QC")]
	source.list <- source.list[!is.element(source.list,"ACEA")]

	nprocess <- length(source.list)
	dtemp <- matrix(ncol=nprocess,nrow=nchem)
	dtemp[] <- 0
	rownames(dtemp) <- code.list
	for(i in 1:nprocess) {
		source <- source.list[i]
		assay.list <- ASSAY.INFO[is.element(ASSAY.INFO[,"Source"],source),"Assay"]
		assay.list <- assay.list[is.element(assay.list,colnames(temp))]
		slice <- as.matrix(temp[,assay.list])
		denom <- dim(slice)[2]
		rs <- rowSums(slice) / denom
		dtemp[,i] <- as.numeric(rs)
	}
	colnames(dtemp) <- source.list

	if(to.file) {
		fname <- paste("../plots/source_",zcut,"_",zset,"_hm.pdf",sep="")
		pdf(file=fname,width=7,height=7,pointsize=12,bg="white",paper="letter",pagecentre=T)
	}
	dtemp[dtemp<cutoff] <- 0
	result <- heatmap(t(as.matrix(dtemp)),margins=c(10,10),scale="none",main=paste("Biological process / Z: ",zcut,":",zset),
					  xlab="",ylab="",cexCol=0.1,cexRow=0.8,col=brewer.pal(9,"Reds"),
					  hclustfun=function(x) hclust(d=dist(x),method="ward.D"),keep.dendro=T,verbose=T)

    output <- dtemp
    chems <- CHEMS[code.list,]
    output <- cbind(chems[,c("CODE","Name","use_category","structure_category","structure_super_category","target_gene")],output)
   	outfile <- paste("../plots/source_zfrac_",zcut,"_",zset,".csv",sep="")
    write.csv(output,file=outfile, row.names=F)

	if(to.file) dev.off()
	else browser()
}
#--------------------------------------------------------------------------------------
#
# merge the trend files
#
# --------------------------------------------------------------------------------------
trend.merge <- function(to.file=F) {
	infile <- "../plots/source_zfrac_3_LO.csv"
	temp <- read.csv(infile,header=T,stringsAsFactors=F,quote="\"",comment.char="")
	code.list <- temp[,"CODE"]
	output <- temp
	rownames(output) <- code.list
	name.list <- names(output)
	for(i in 7:length(output)) name.list[i] <- paste(name.list[i],"_LO",sep="")
	names(output) <- name.list

	infile <- "../plots/source_zfrac_5_HI.csv"
	temp <- read.csv(infile,header=T,stringsAsFactors=F,quote="\"",comment.char="")
	name.list <- names(temp)
	for(i in 1:length(temp)) name.list[i] <- paste(name.list[i],"_HI",sep="")
	names(temp) <- name.list
	output <- cbind(output,temp[,7:dim(temp)[2]])

	infile <- "../plots/source_zfrac_3_ALL.csv"
	temp <- read.csv(infile,header=T,stringsAsFactors=F,quote="\"",comment.char="")
	name.list <- names(temp)
	for(i in 1:length(temp)) name.list[i] <- paste(name.list[i],"_ALL",sep="")
	names(temp) <- name.list
	output <- cbind(output,temp[,7:dim(temp)[2]])

	infile <- "../plots/assay_class_zfrac_3_LO_all.csv"
	temp <- read.csv(infile,header=T,stringsAsFactors=F,quote="\"",comment.char="")
	name.list <- names(temp)
	for(i in 1:length(temp)) name.list[i] <- paste(name.list[i],"_LO",sep="")
	names(temp) <- name.list
	output <- cbind(output,temp[,6:dim(temp)[2]])

	infile <- "../plots/assay_class_zfrac_5_HI_all.csv"
	temp <- read.csv(infile,header=T,stringsAsFactors=F,quote="\"",comment.char="")
	name.list <- names(temp)
	for(i in 1:length(temp)) name.list[i] <- paste(name.list[i],"_HI",sep="")
	names(temp) <- name.list
	output <- cbind(output,temp[,6:dim(temp)[2]])

	infile <- "../plots/assay_class_zfrac_5_ALL_all.csv"
	temp <- read.csv(infile,header=T,stringsAsFactors=F,quote="\"",comment.char="")
	name.list <- names(temp)
	for(i in 1:length(temp)) name.list[i] <- paste(name.list[i],"_ALL",sep="")
	names(temp) <- name.list
	output <- cbind(output,temp[,6:dim(temp)[2]])

   	outfile <- "../plots/trend_set.csv"
    write.csv(output,file=outfile, row.names=F)

	if(to.file) {
		fname <- paste("../plots/trend_set_hm.pdf",sep="")
		pdf(file=fname,width=7,height=7,pointsize=12,bg="white",paper="letter",pagecentre=T)
	}
	dtemp <- output[,7:dim(output)[2]]
	rtemp <- matrix(nrow=dim(dtemp)[1],ncol=dim(dtemp)[2])
	rtemp[] <- 0
	colnames(rtemp) <- colnames(dtemp)
	rownames(rtemp) <- rownames(dtemp)


	for(i in 1:dim(dtemp)[2]) {
		temp <- as.numeric(dtemp[,i])
		rtemp[,i] <- temp
	}

	result <- heatmap(t(as.matrix(dtemp)),margins=c(10,10),scale="none",main="Trend Plot",
					  xlab="",ylab="",cexCol=0.1,cexRow=0.2,col=brewer.pal(9,"Reds"),
					  hclustfun=function(x) hclust(d=dist(x),method="ward.D"),keep.dendro=T,verbose=T)

	if(to.file) dev.off()
	else browser()
}
#--------------------------------------------------------------------------------------
#
# ploth the apr vs. cytotox
#
# --------------------------------------------------------------------------------------
apr.vs.cytotox <- function(to.file=F) {
	file <- "APR/apr_norecover.txt"
    temp <- read.table(file,header=T,sep="\t",stringsAsFactors=F,quote="",comment="")
    if(to.file) {
        fname <- "plots/apr_vs_cytotox.pdf"
        pdf(file=fname,width=7,height=7,pointsize=12,bg="white",paper="letter",pagecentre=T)
    }
    file <- "output/cytotox_dist.txt"
   	ctemp <- read.table(file,header=T,sep="\t",stringsAsFactors=F,quote="",comment="")
   	rownames(ctemp) <- ctemp[,"CODE"]


	plot(1~1,xlim=c(0.01,1000),ylim=c(0.01,1000),xlab="Burst Center (uM)",ylab="APR No-recover conc (uM)",cex.lab=1,cex.axis=1,log="xy",main="APR no-recovery vs. Cytotox",type="n")
	npt <- dim(temp)[1]
	for(i in 1:npt) {
		code <- temp[i,"CODE"]
		val <- temp[i,"rs_norecov_mn"]
		val.sd <- temp[i,"rs_norecov_sd"]
		bc <- ctemp[code,"cytotox.median"]
		bc.sd <- ctemp[code,"cytotox.mad.global"]
		bc.min <- 1000000*10**(-(bc+bc.sd))
		bc.max <- 1000000*10**(-(bc-bc.sd))

		bc <- 1000000 * 10**(-bc)
		#cat(format(val,digits=2),":",format(bc,digits=2),"\n")
		if(!is.na(bc)) {
			points(val~bc)
			lines(c(bc.min,bc.max),c(val,val))
			lines(c(bc,bc),c(val-val.sd,val+val.sd))
		}
	}
	lines(c(0.001,1000),c(0.001,1000))
    if(to.file) dev.off()
    else browser()

}
#--------------------------------------------------------------------------------------
#
# load chemicals
#
# QC=OK
#--------------------------------------------------------------------------------------
old.load.chemicals <- function() {
    file <- "input/ToxCast_Generic_Chemicals_2013_12_10.txt"
    temp <- read.table(file,header=T,sep="\t",stringsAsFactors=F,quote="",comment="")
    cat("loaded Chemical Information\n"); flush.console()
    cat("Original size: ",dim(temp),"\n")
    CODE.LIST <<- NAME.MAT[,"CODE"]
    rownames(temp) <- temp[,"CODE"]
    CHEMS <<- temp[CODE.LIST,]
    cat("Final size: ",dim(CHEMS),"\n")
}
#--------------------------------------------------------------------------------------
#
# z-score: load the data
#
# QC=OK
#--------------------------------------------------------------------------------------
old.load.zscore <- function() {
    cat("==========================================================================\n")
    cat("load zscore\n")
    cat("==========================================================================\n")
    flush.console()

    file <- "output/zscore_all_all_2sided.txt"
    temp <- read.table(file,header=T,sep="\t",stringsAsFactors=F,quote="",comment="")
    rownames(temp) <- temp[,"CODE"]
    temp <- temp[,ASSAY.LIST]
    Z.ALL.ALL.2SIDED <<- temp
}
#--------------------------------------------------------------------------------------
#
# z-score normed: load the data
#
# QC=OK
#--------------------------------------------------------------------------------------
load.zscore.norm <- function() {
    cat("==========================================================================\n")
    cat("load zscore normed\n")
    cat("==========================================================================\n")
    flush.console()

    file <- "output/zscore_matrix_norm.txt"
    temp <- read.table(file,header=T,sep="\t",stringsAsFactors=F,quote="",comment="")
    #rownames(temp) <- CHEMS[,"CODE"]
    MAT.Z.NORM <<- temp
}
#--------------------------------------------------------------------------------------
#
# calculate the most potent and specific hits
#
# QC=OK
#--------------------------------------------------------------------------------------
potent.specific <- function(cutoff=5) {
    cat("==========================================================================\n")
    cat("potent.specific\n")
    cat("==========================================================================\n")
    flush.console()

	code.list <- c()
	gene.list <- c()
	gs.list <- c()
	ngene <- dim(GMATRIX)[2]
	for(i in 1:ngene) {
		gene <- names(GMATRIX)[i]
		temp <- GMATRIX[,gene]
		clist <- rownames(GMATRIX[GMATRIX[,gene]>=cutoff,])
		gslist <- GMATRIX[clist,gene]
		glist <- clist
		glist[] <- gene
		code.list <- c(code.list,clist)
		gene.list <- c(gene.list,glist)
		gs.list <- c(gs.list,gslist)
	}
	z.list <- gs.list
	z.list[] <- 0
	combo.list <- z.list
	match.list <- z.list
	match.list[] <- ""
	cname.list <- CHEMS[code.list,"ShortName"]
	target.list <- CHEMS[code.list,"IntendedTarget"]
	target.class.list <- target.list
	target.class.list[] <- ""
	use.list <- CHEMS[code.list,"UseCategory"]
	str.list <- CHEMS[code.list,"StructureCategory"]

	nr.list <- c("ESR1","ESR2","AR","PGR","NR3C1","NR3C2")
	gpcr.list <- sort(uniquify(toupper(ASSAY.INFO[is.element(ASSAY.INFO[,"Source"],"Novascreen_GPCR"),"Gene_Process"])))
	gpcr.list <- c(gpcr.list,"HTR1B","DRD3","BDKRB1","ADRA2A","DRD2","HTR2A")
	gpcr.list <- sort(uniquify(gpcr.list))
	ptgs.list <- c("PTGS1","PTGS2","PTGER2")
	gaba.list <- c("GABRA1","GABRA5","GABRAx")
	ppar.list <- c("PPARA","PPARD","PPARG","PPARx")
	ache.list <-c("ACHE","BCHE")
	mmp.list <- c("MMP1","MMP2","MMP7","MMP9","MMP13")
	ic.list <- sort(uniquify(toupper(ASSAY.INFO[is.element(ASSAY.INFO[,"Source"],"Novascreen_IC"),"Gene_Process"])))
	ic.list <- c(ic.list,sort(uniquify(toupper(ASSAY.INFO[is.element(ASSAY.INFO[,"Source"],"Novascreen_LGIC"),"Gene_Process"]))))
	ic.list <- c(ic.list,"SIGMAR1","ION_CHANNEL","KCNJx","KCNJX","KCNJ1")

	slc.list <- sort(uniquify(toupper(ASSAY.INFO[is.element(ASSAY.INFO[,"Source"],"Novascreen_TR"),"Gene_Process"])))
	slc.list <- c(slc.list,"SLC6A9","SLC6A4","SLC18A2","SLC6A2","SLC6A3")
	enz.list <- sort(uniquify(toupper(ASSAY.INFO[is.element(ASSAY.INFO[,"Source"],"Novascreen_ENZ"),"Gene_Process"])))
	dna.list <- c("DNA","TP53","H2AFX")
	mito.list <- c("mitochondrial","TSPO","mitochondria","Mitochondria","MITOCHONDRIA")

	n <- length(code.list)
	for(i in 1:n) {
		code <- code.list[i]
		gene <- toupper(gene.list[i])
		if(gene=="HLA.DRA") gene <- "HLA-DRA"
		assay.list <- ASSAY.INFO[is.element(toupper(ASSAY.INFO[,"Gene_Process"]),gene),"Assay"]

		slice <- 0
		for(k in 1:length(assay.list)) {
		      if(sum(grep("_Activator",assay.list[k]))>0) slice <- slice+1
		      if(sum(grep("_up",assay.list[k]))>0) slice <- slice+1
		}
		denom <- length(assay.list)-slice
		if(denom==0) denom <- 1
		z <- MAT.Z.NORM[code,assay.list]
		z[is.na(z)] <- 0

		zmean <- sum(as.numeric(z))	/denom
		z.list[i] <- zmean
		combo.list[i] <- z.list[i]+gs.list[i]
		temp <- target.list[i]
		temp <- str_replace_all(temp,"ion channel","ion_channel")
		temp <- str_replace_all(temp,"Ion channel","ION_CHANNEL")
		temp <- str_replace_all(temp,"Ion channel Na","ION_CHANNEL")
		x <- str_split(toupper(temp)," ")
		#print(x[[1]])
		tlist <- x[[1]]
		tlist <- tlist[!is.element(tlist," ")]
		tlist <- tlist[!is.element(tlist,"]")]
		tlist <- tlist[!is.element(tlist,"[")]
		tlist <- tlist[!is.element(tlist,"(?)")]

		matched <- F
		for(j in 1:length(tlist)) {
			if(!matched) {
				target <- tlist[j]
				if(sum(grep(target,gpcr.list))>0) {
					if(target!="AR" && target!="RNA") {
						target.class.list[i] <- "GPCR"
					}
				}
				#cat(gene,":",target,"\n")
				if(str_length(target)>0) {
					if(sum(grep(target,gene))>0) {
						match.list[i] <- "Exact Match"
						matched <- T
						#print(match.list[i])
					}
					if(sum(grep(gene,tlist))>0) {
						match.list[i] <- "Exact Match"
						matched <- T
						#print(match.list[i])
					}
					else if(sum(grep(target,dna.list))>0 && sum(grep(gene,dna.list))>0) {
						match.list[i] <- "Exact Match"
						matched <- T
						#print(match.list[i])
					}
					else if(sum(grep(target,mito.list))>0 && sum(grep(gene,mito.list))>0) {
						match.list[i] <- "Exact Match"
						matched <- T
						#print(match.list[i])
					}
					else if(sum(grep(target,gpcr.list))>0 && sum(grep(gene,gpcr.list))>0) {
						if(target!="AR" && target!="RNA") {
							match.list[i] <- "Crosstalk GPCR"
							matched <- T
							target.class.list[i] <- "GPCR"
							#print(match.list[i])
							#cat(gene,":",target,":",tlist,"\n")
							#browser()
						}
					}
					else if(sum(grep(target,nr.list))>0 && sum(grep(gene,nr.list))>0) {
						match.list[i] <- "Crosstalk NR"
						matched <- T
						target.class.list[i] <- "NR"
						#print(match.list[i])
					}
					#else if(sum(grep(target,ppar.list))>0 && sum(grep(gene,ppar.list))>0) {
					#	match.list[i] <- "Crosstalk PPAR"
					#	matched <- T
					#	#print(match.list[i])
					#}
					else if(sum(grep(target,enz.list))>0 && sum(grep(gene,enz.list))>0) {
						match.list[i] <- "Crosstalk ENZ"
						matched <- T
						target.class.list[i] <- "ENZ"

						#print(match.list[i])
					}
					else if(sum(grep(target,ic.list))>0 && sum(grep(gene,ic.list))>0) {
						match.list[i] <- "Crosstalk IC"
						matched <- T
						target.class.list[i] <- "IC"

						#print(match.list[i])
					}
					#else if(sum(grep(target,ptgs.list))>0 && sum(grep(gene,ptgs.list))>0) {
					#	match.list[i] <- "Crosstalk PTGS"
					#	matched <- T
					#	#print(match.list[i])
					#}
					#else if(sum(grep(target,gaba.list))>0 && sum(grep(gene,gaba.list))>0) {
					#	match.list[i] <- "Crosstalk GABA"
					#	matched <- T
					#	print(match.list[i])
					#}
					#else if(sum(grep(target,mmp.list))>0 && sum(grep(gene,mmp.list))>0) {
					#	match.list[i] <- "Crosstalk MMP"
					#	matched <- T
					#	print(match.list[i])
					#}
					#else if(sum(grep(target,slc.list))>0 && sum(grep(gene,slc.list))>0) {
					#	match.list[i] <- "Crosstalk SLC"
					#	matched <- T
					#	#print(match.list[i])
					#}
					#else if(sum(grep(target,ache.list))>0 && sum(grep(gene,ache.list))>0) {
					#	match.list[i] <- "Crosstalk ACHE"
					#	matched <- T
					#	print(match.list[i])
					#}
					else {
					#	browser()
					}
				}
			}
		}
	}
    results <- as.data.frame(cbind(code.list,cname.list,use.list,str.list,target.list,target.class.list,gene.list,as.numeric(gs.list),z.list,combo.list,match.list),stringsAsFactors=F)
    names(results) <- c("CODE","Name","UseCategory","StructureCategory","IntendedTarget","TargetClass","Gene","GeneScore","Zmean","PotentSelective","MatchType")
	outfile <- paste("output/potent_specific_",cutoff,".txt",sep="")
	write.table(results,file=outfile, row.names=F, append=FALSE, quote=F, sep = "\t")
	class.list <- c("Exact Match","Crosstalk GPCR","Crosstalk NR","Crosstalk IC","Crosstalk ENZ")
	nclass <- length(class.list)
	res.sum <- as.data.frame(matrix(nrow=nclass+2,ncol=5))
	names(res.sum) <- c("Class","Description","Chemicals","Hits","Percent")
	for(i in 1:nclass) {
		my.class <- class.list[i]
		res.sum[i,"Class"] <- my.class
		temp <- results[is.element(results[,"MatchType"],my.class),]
		res.sum[i,"Hits"] <- dim(temp)[1]
		res.sum[i,"Percent"] <- format(100*dim(temp)[1]/dim(results)[1],digits=2)
		res.sum[i,"Chemicals"] <- length(uniquify(temp[,"CODE"]))
	}
	temp <- results[is.element(results[,"MatchType"],""),]
	temp.other.target <- temp[!is.element(temp[,"IntendedTarget"],""),]
	temp.no.target <- temp[is.element(temp[,"IntendedTarget"],""),]
	res.sum[nclass+1,"Class"] <- "Other Target"
	res.sum[nclass+2,"Class"] <- "No Target"
	res.sum[nclass+1,"Hits"] <- dim(temp.other.target)[1]
	res.sum[nclass+2,"Hits"] <- dim(temp.no.target)[1]
	res.sum[nclass+1,"Percent"] <- format(100*dim(temp.other.target)[1]/dim(results)[1],digits=2)
	res.sum[nclass+2,"Percent"] <- format(100*dim(temp.no.target)[1]/dim(results)[1],digits=2)
	res.sum[nclass+1,"Chemicals"] <- length(uniquify(temp.other.target[,"CODE"]))
	res.sum[nclass+2,"Chemicals"] <- length(uniquify(temp.no.target[,"CODE"]))
	print(res.sum)
	outfile <- paste("output/potent_specific_summary_",cutoff,".txt",sep="")
	write.table(res.sum,file=outfile, row.names=F, append=FALSE, quote=F, sep = "\t")

	gene.list <- as.character(sort(uniquify(results[,"Gene"])))
	ngene <- length(gene.list)
	gene.sum <- as.data.frame(matrix(nrow=ngene,ncol=3))
	gene.sum[,1] <- gene.list
	names(gene.sum) <- c("Gene","Hits","MeanGS")
	for(i in 1:ngene) {
		gene <- gene.list[i]
		temp <- results[is.element(results[,"Gene"],gene),]
		gene.sum[i,"Hits"] <- dim(temp)[1]
		gene.sum[i,"MeanGS"] <- mean(as.numeric(temp[,"GeneScore"]))
	}
	outfile <- paste("output/potent_specific_gene_summary_",cutoff,".txt",sep="")
	write.table(gene.sum,file=outfile, row.names=F, append=FALSE, quote=F, sep = "\t")

	code.list <- as.character(sort(uniquify(results[,"CODE"])))
	nchem <- length(code.list)
	chem.sum <- as.data.frame(matrix(nrow=nchem,ncol=7))
	chem.sum[,1] <- code.list
	names(chem.sum) <- c("CODE","Name","UseCategory","StructureCategory","IntendedTarget","Hits","MeanGS")
	for(i in 1:nchem) {
		code <- code.list[i]
		temp <- results[is.element(results[,"CODE"],code),]
		chem.sum[i,"Hits"] <- dim(temp)[1]
		chem.sum[i,"MeanGS"] <- mean(as.numeric(temp[,"GeneScore"]))
		chem.sum[i,"Name"] <- CHEMS[code,"ShortName"]
		chem.sum[i,"UseCategory"] <- CHEMS[code,"UseCategory"]
		chem.sum[i,"StructureCategory"] <- CHEMS[code,"StructureCategory"]
		chem.sum[i,"IntendedTarget"] <- CHEMS[code,"IntendedTarget"]
	}
	outfile <- paste("output/potent_specific_chemical_summary_",cutoff,".txt",sep="")
	write.table(chem.sum,file=outfile, row.names=F, append=FALSE, quote=F, sep = "\t")

    browser()
}
#--------------------------------------------------------------------------------------
#
# summarize for a target class
#
# QC=OK
#--------------------------------------------------------------------------------------
target.class.summary <- function(target.class="GPCR",cutoff=5) {
    cat("==========================================================================\n")
    cat("target.class.summary\n")
    cat("==========================================================================\n")
    flush.console()
	gpcr.list <- sort(uniquify(toupper(ASSAY.INFO[is.element(ASSAY.INFO[,"Source"],"Novascreen_GPCR"),"Gene_Process"])))
	gpcr.list <- c(gpcr.list,"HTR1B","DRD3","BDKRB1","ADRA2A","DRD2","HTR2A")
	gpcr.list <- sort(uniquify(gpcr.list))
	ptgs.list <- c("PTGS1","PTGS2","PTGER2")
	gaba.list <- c("GABRA1","GABRA5","GABRAx")
	ppar.list <- c("PPARA","PPARD","PPARG","PPARx")
	ache.list <-c("ACHE","BCHE")
	mmp.list <- c("MMP1","MMP2","MMP7","MMP9","MMP13")
	ic.list <- sort(uniquify(toupper(ASSAY.INFO[is.element(ASSAY.INFO[,"Source"],"Novascreen_IC"),"Gene_Process"])))
	ic.list <- c(ic.list,sort(uniquify(toupper(ASSAY.INFO[is.element(ASSAY.INFO[,"Source"],"Novascreen_LGIC"),"Gene_Process"]))))
	ic.list <- c(ic.list,"SIGMAR1","ION_CHANNEL","KCNJx","KCNJX")

	slc.list <- sort(uniquify(toupper(ASSAY.INFO[is.element(ASSAY.INFO[,"Source"],"Novascreen_TR"),"Gene_Process"])))
	slc.list <- c(slc.list,"SLC6A9","SLC6A4","SLC18A2","SLC6A2","SLC6A3")
	enz.list <- sort(uniquify(toupper(ASSAY.INFO[is.element(ASSAY.INFO[,"Source"],"Novascreen_ENZ"),"Gene_Process"])))
	dna.list <- c("DNA","TP53","H2AFX")
	mito.list <- c("mitochondrial","TSPO","mitochondria","Mitochondria","MITOCHONDRIA")

	code.list <- CHEMS[,"CODE"]
	mask <- code.list
	mask[] <- 0
	nchem <- length(code.list)
	for(i in 1:nchem) {
		code <- code.list[i]
		temp <- CHEMS[code,"IntendedTarget"]
		temp <- str_replace_all(temp,"ion channel","ion_channel")
		temp <- str_replace_all(temp,"Ion channel","ION_CHANNEL")
		temp <- str_replace_all(temp,"Ion channel Na","ION_CHANNEL")
		x <- str_split(toupper(temp)," ")
		tlist <- x[[1]]
		tlist <- tlist[!is.element(tlist," ")]
		tlist <- tlist[!is.element(tlist,"]")]
		tlist <- tlist[!is.element(tlist,"[")]
		tlist <- tlist[!is.element(tlist,"(?)")]

		matched <- F
		for(j in 1:length(tlist)) {
			target <- tlist[j]
			if(!matched && target!="") {
				if(target.class=="GPCR") {
					if(sum(grep(target,gpcr.list))>0) {
						if(target!="AR" && target!="RNA") {
							mask[i] <- 1
							matched <- T
						}
					}
				}
			}
		}
	}

	code.list <- code.list[mask==1]
	cat("Chemicals targeting class:",target.class,":",length(code.list),"\n")
	name.list <- CHEMS[code.list,"ShortName"]
	print(name.list)
	#outfile <- paste("output/potent_specific_target_class_summary_",cutoff,"_",target.class,".txt",sep="")
	#write.table(chem.sum,file=outfile, row.names=F, append=FALSE, quote=F, sep = "\t")

    browser()
}
#--------------------------------------------------------------------------------------
#
# Emax x Z-score
#
# QC=OK
#--------------------------------------------------------------------------------------
emax.z <- function(to.file=F) {
	cat("==========================================================================\n")
	cat("emax.z\n")
	cat("==========================================================================\n")
    flush.console()
	if(to.file) {
	    fname <- paste("plots/emax_z.pdf",sep="")
	    pdf(file=fname,width=7,height=10,pointsize=12,bg="white",paper="letter",pagecentre=T)
	}
    par(mfrow=c(3,2),mar=c(6,6,4,4))

   	mask <- ASSAY.INFO[,"Type"]
   	mask[is.element(mask,c("gene","functional"))] <- 1
   	mask[is.element(mask,c("cytotox","cytotox_fail","proliferation"))] <- 0
   	atemp <- ASSAY.INFO[mask==1,]

   	assay.set.list <- sort(uniquify(atemp[,"Source"]))
   	nset <- length(assay.set.list)
   	for(i in 1:nset) {
   		aset <- assay.set.list[i]
   		cat(aset,"\n")
   		assay.list <- atemp[is.element(atemp[,"Source"],aset),"Assay"]
   		assay.list <- assay.list[is.element(assay.list,names(EMAXMAT))]
   		etemp <- EMAXMAT[,assay.list]
   		ztemp <- MAT.Z.NORM[,assay.list]
   		if(aset!="ACEA") {
   			ztemp <- as.numeric(as.matrix(ztemp))
   			etemp <- as.numeric(as.matrix(etemp))
   		}
   		etemp <- etemp[!is.na(ztemp)]
   		ztemp <- ztemp[!is.na(ztemp)]
   		cat(length(ztemp),"\n")
   		ztemp <- ztemp[!is.na(etemp)]
   		etemp <- etemp[!is.na(etemp)]
   		cat(length(ztemp),"\n")
   		hist(etemp)

   		browser()
   		if(length(ztemp)>0) {
   			plot(etemp~ztemp,xlab="Z",ylab="Emax",main=aset,cex.lab=1.5,cex.axis=1.5)
   			if(!to.file) browser()
   		}
   	}
   	if(to.file) dev.off()
	else browser()
}
#--------------------------------------------------------------------------------------
#
# LPlot the RTK data
#
# QC=OK
#--------------------------------------------------------------------------------------
rtk.auc.plot <- function(to.file=F,ymin=0.00,ymax=0.2) {
	cat("==========================================================================\n")
	cat("rtk.plot\n")
	cat("==========================================================================\n")
    flush.console()
    file <- "output/by_chemical_hit_dist.txt"
	adata <- read.table(file,header=T,sep="\t",stringsAsFactors=F,quote="",comment="")
    file <- "output/cytotox_dist.txt"
	cytotox.data <- read.table(file,header=T,sep="\t",stringsAsFactors=F,quote="",comment="")
	rownames(cytotox.data) <- cytotox.data[,"CODE"]
	#load("../RatAUC/ratvalues-better-073012.RData")
   	file <- "RTK/CHR_MTD.txt"
	trdata <- read.table(file,header=T,sep="\t",stringsAsFactors=F,quote="",comment="")
  	load("../RatAUC/ratvalues-better-073012.RData")
	if(to.file) {
	    fname <- paste("plots/rtk_auc_with_burst_withToxRefDB.pdf",sep="")
	    pdf(file=fname,width=7,height=10,pointsize=12,bg="white",paper="letter",pagecentre=T)
	}
    par(mfrow=c(1,1),mar=c(6,6,4,4))

	nchem <- length(vLiver.rat.peak.values)
	code.list <- c()
	cname.list <- c()
	burst.mean.list <- c()
	burst.max.list <- c()
	burst.min.list <- c()
	peak.mtd.list <- c()
	peak.lel.list <- c()
	ache.list <- c()

	for(i in 1:nchem) {
		casrn <- names(vLiver.rat.peak.values)[i]
		code <- paste("C",str_replace_all(casrn,"-",""),sep="")
		peak <- vLiver.rat.peak.values[i]
		doit <- T
		if(!is.element(code,CHEMS[,"CODE"])) doit <- F
		if(!is.element(code,cytotox.data[,"CODE"])) doit <- F
		if(!is.element(casrn,trdata[,"chemical_casrn"])) doit <- F
		if(doit) {
			temp <- CHEMS[is.element(CHEMS[,"CODE"],code),]

			cname <- temp[1,"ShortName"]
			target <- temp[1,"IntendedTarget"]
			ache <- F
 			if(!is.na(target)) {
 				if(sum(grep("ACHE",target))>0) ache <- T
 			}
 			burst.mean.log <- cytotox.data[code,"cytotox.mean"]
 			burst.mad.log <- cytotox.data[code,"cytotox.sd.global"]
			if(!is.na(burst.mean.log)) {
				if(burst.mean.log>=4) {
					burst.mean <- 10**(-burst.mean.log)*1000000
					burst.max <- 10**(-(burst.mean.log-3*burst.mad.log))*1000000
					burst.min <- 10**(-(burst.mean.log+3*burst.mad.log))*1000000

					#burst.max <- burst.max / burst.mean
					#burst.min <- burst.min / burst.mean

					temp.2 <- trdata[is.element(trdata[,"chemical_casrn"],casrn),]
					if(dim(temp.2)[1]>0 && !is.na(burst.mean)) {
						mtd <- max(temp.2[,"hdt"])
						lel <- as.numeric(min(temp.2[,"lel_dose"]))

						###peak.mtd <- peak*mtd
						#peak.mtd <- peak.mtd / burst.mean

						###peak.lel <- peak*lel
						#peak.lel <- peak.lel / burst.mean

						peak.lel <- lel
						peak.mtd <- mtd
						burst.mean <- burst.mean / peak
						burst.max <- burst.max / peak
						burst.min <- burst.min / peak

						code.list <- c(code.list,code)
						cname.list <- c(cname.list,cname)
						burst.mean.list <- c(burst.mean.list,burst.mean)
						burst.max.list <- c(burst.max.list,burst.max)
						burst.min.list <- c(burst.min.list,burst.min)
						peak.mtd.list <- c(peak.mtd.list,peak.mtd)
						peak.lel.list <- c(peak.lel.list,peak.lel)
						ache.list <- c(ache.list,ache)
					}
				}
			}
		}
	}
	##############################################################
	nhit <- length(code.list)
	ymax <- nhit+3

	index <- sort(burst.mean.list,index.return=T)$ix
	index <- sort(peak.mtd.list,index.return=T)$ix
	plot(0~0,cex.lab=1.5,log="x",cex.axis=1.5,xlim=c(0.001,10000),ylim=c(ymin,ymax),xlab="Dose (mg/kg/day)", ylab="",type="n",main="")
	for(i in 1:nhit) {
		code <- code.list[i]
		cname <- cname.list[index[i]]
		ache <- ache.list[index[i]]
		burst.mean <- burst.mean.list[index[i]]
		burst.max <- burst.max.list[index[i]]
		burst.min <- burst.min.list[index[i]]
		peak.mtd <- peak.mtd.list[index[i]]
		peak.lel <- peak.lel.list[index[i]]

		#peak.mtd <- peak.mtd / burst.mean
		#peak.lel <- peak.lel / burst.mean
		color <- "gray"
		if(peak.mtd <burst.min) color <- "red"
		if(peak.mtd >burst.max) color <- "green"
		if(color=="red" && ache) color <- "yellow"
		if(color=="red" && cname=="Abamectin") color <- "yellow"
		y <- i
		points(peak.mtd,y,pch=21,bg=color,cex=2)
		#if(color!="gray") points(peak.lel,y,pch="*",bg="black",cex=2)
		lines(c(burst.min,burst.max),c(y,y),lwd=1)
		lines(c(burst.min,burst.min),c(y-0.2,y+0.2),lwd=1)
		lines(c(burst.max,burst.max),c(y-0.2,y+0.2),lwd=1)

		if(peak.mtd<burst.min) text(peak.mtd,y,cname,pos=2,cex=0.8)
		if(peak.mtd>burst.max) {
			#if(peak.mtd<100) text(peak.mtd,y,cname,pos=4,cex=0.8)
			#else
			text(peak.mtd,y,cname,pos=4,cex=0.8)
		}
	}
	text(1,ymax*0.99,paste("Total Chemicals: ",nhit),pos=4)
	xlegend <- 1e-3
	points(xlegend,ymax*0.95,pch=21,bg="gray",cex=2)
	points(xlegend,ymax*0.90,pch=21,bg="green",cex=2)
	points(xlegend,ymax*0.85,pch=21,bg="red",cex=2)
	points(xlegend,ymax*0.8,pch=21,bg="yellow",cex=2)
	#points(xlegend,ymax*0.75,pch="*",bg="black",cex=2)
	text(xlegend*0.5,ymax,"Conc @MTD",pos=4)
	text(xlegend,ymax*0.95,"~burst",pos=4)
	text(xlegend,ymax*0.9,">burst",pos=4)
	text(xlegend,ymax*0.85,"<burst",pos=4)
	text(xlegend,ymax*0.8,"<burst, Neurological-Limited",pos=4)
	#text(xlegend,ymax*0.75,"Systemic LEL",pos=4)
	if(to.file) dev.off()
	else browser()

}
#--------------------------------------------------------------------------------------
#
# export the hits in a long format
#
# QC=OK
#--------------------------------------------------------------------------------------
export.chem.hits <- function(set.name="spill_analogs",code.list=c("C10042598","C104767","C108930","C111273","C111706","C111875","C112301","C112425","C112538","C112709","C112721","C116029","C123513","C143088","C2433149","C3452979","C36653824","C5349519","C629765","C70568604","C8000417","C96413","C98522")) {
	cat("==========================================================================\n")
	cat("export.chem.hits\n")
	cat("==========================================================================\n")
    flush.console()
    file <- "output/chemical_hits_long.txt"
    if(!is.na(set.name)) file <- paste("output/chemical_hits_long_",set.name,".txt",sep="")
    s <- "CODE\tCASRN\tName\tShortName\tAssaysTested\tHits\tHitRatio\tCategory\tIntendedTarget\tGene\tAssay\tmodifier\tAC50\tEmax\tZ\n"
    cat(file=file,s,append=F)
    if(is.na(set.name)) code.list <- CHEMS[,"CODE"]
    nchem <- length(code.list)
    for(i in 1:nchem) {
    	code <- code.list[i]
    	casrn <- CHEMS[code,"CASRN"]
    	cname <- CHEMS[code,"Name"]
    	sname <- CHEMS[code,"ShortName"]
    	target <- CHEMS[code,"IntendedTarget"]
    	ccat <- CHEMS[code,"StructureCategory"]
    	tline <- MAT.hitcall[code,]
    	aline <- MAT.AC50[code,]
    	zline <- MAT.Z.NORM[code,]

        temp <- tline
        temp[] <- 1
        temp[is.na(tline)] <- 0
        ntry <- sum(temp)

        temp <- tline
        temp[is.na(tline)] <- 0
        nhit <- sum(temp)

        hit.ratio <- nhit / max(1,ntry)
        if(nhit>0) {

            alist <- names(MAT.AC50)[temp==1]
            for(j in 1:length(alist)) {
                assay <- alist[j]
                gene <- ASSAY.INFO[is.element(ASSAY.INFO[,"Assay"],assay),"Gene_Process"]
                ac50 <- MAT.AC50[code,assay]
                emax <- EMAXMAT[code,assay]
                modifier <- AC50MODMAT[code,assay]
                z <- MAT.Z.NORM[code,assay]
                s <- paste(code,"\t",casrn,"\t",cname,"\t",sname,"\t",ntry,"\t",nhit,"\t",format(hit.ratio,digits=2),"\t",ccat,"\t",target,"\t",gene,"\t",assay,"\t",modifier,"\t",format(ac50,digits=3),"\t",format(emax,digits=3),"\t",format(z,digits=2),"\n",sep="")
                cat(s)
                cat(file=file,s,append=T)
                flush.console()
            }
        }
    }
}
#--------------------------------------------------------------------------------------
#
# make a set of summary calculations
#
# >>> run once per update of assay data
#
#--------------------------------------------------------------------------------------
calc.1 <- function(do.load=F) {
	if(do.load) load.data()
	hit.dist(to.file=T,target.gene=NA)
	hit.dist(to.file=T,target.gene="ESR1")
	load.zscore()
	assay.summary(to.file=T)
	export.chem.hits()
	genescore.hits(do.prep=T)
	load.genescore()
	genescore.FDA(do.prep=T)
	genescore.plots.1(to.file=T)
	calc.genescore.matrix()
	load.genescore.matrix()
	genescore.rank()
	genescore.hm(to.file=T)
	rtk.auc.plot(to.file=T)
	promiscuity.by.category()
	source.summary.boxplot(to.file=T)
	#
	# These only need to be done once
	#
	#####build.toxrefdb.index(do.read=T)
	#####build.toxrefdb.type(do.read=T,type="CHR",species="rat")
	toxcast.toxrefdb.assoc(do.read=T)
	gene.chemsim(do.read=T,cutoff1=1,cutoff2=7)
	gene.stats.summary(do.read=T,to.file=T)
	hm.assay(phase.list=c("Phase_I_V2","Phase_IIa","Phase_IIb","Phase_IIc"),hm.name="ToxCast Phase I and II",to.file=T)
	hm.assay(phase.list=c("Phase_I_V2"),hm.name="ToxCast Phase I",to.file=T)
	hm.assay(phase.list=c("Phase_I_V2","Phase_IIa","Phase_IIb","Phase_IIc","E1K"),hm.name="ToxCast E1K",to.file=T)
	hm.cytotox(phase.list=c("Phase_I_V2","Phase_IIa","Phase_IIb","Phase_IIc"),hm.name="Cytotox",to.file=T)
	mw.dist(do.prep=T,to.file=T)
	hm.chemsim(to.file=T)
	bruns.watson(to.file=T)
	plot.hit.dist(do.prep=T,to.file=T)
	cytotox.dist(to.file=T)
	###one.genescore.plot(do.prep=T,to.file=T,min.score=7,gene="CYP19A1")
	###genescore.hm.small(to.file=T,cutoff=10,cex.col=0.1,title="Mercury_Tin",code.list=c("C1118463","C1461229","C1461252","C2155706","C4342363","C683181","C753731","C76879","C587859","C62384","C7487947"))
	###genescore.hm.small.bytarget(to.file=T,cutoff=5,cex.col=1,cex.row=0.5,title="Mitochondria",gene.list=c("TSPO","MITOFUNCTION"))
	###assay.set(aname="PPAR",min.hit=2,assay.list=c("ATG_PPARa_TRANS","ATG_PPRE_CIS","NVS_NR_hPPARa","ATG_PPARd_TRANS","ATG_PPARg_TRANS","NVS_NR_hPPARg","Tox21_PPARg_BLA_Agonist_ratio","OT_PPARg_PPARgSRC1_1440"))
	###promiscuity.features(do.prep=T)
	###promiscuity.by.category()
	###sa.neighbor()
	###edsp21()
}
#--------------------------------------------------------------------------------------
#
# Calculate promiscuity features
#
# QC=OK
#--------------------------------------------------------------------------------------
promiscuity.features <- function(do.prep=F) {
print("add gene selective")
return()
	if(do.prep) {
		file <- "structure_input/ToxCast_SMARTS_REDUCED_matrix_2013_03_05.txt"
		sdata <- read.table(file,header=T,sep="\t",stringsAsFactors=F,quote="\"")
		chemset <- sort(rownames(sdata))
		sdata <- sdata[chemset,]
		SDATA <<- sdata

		file <- "output/by_chemical_hit_dist.txt"
		cdata <- read.table(file,header=T,sep="\t",stringsAsFactors=F,quote="\"")
		rownames(cdata) <- cdata[,"CODE"]
		cdata <- cdata[chemset,]

		HF <<- cdata[,"Hits"] / cdata[,"AssaysTested"]
		HF.selective <<- cdata[,"SelectiveHits"] / cdata[,"AssaysTested"]
	}

	file <- "structure_input/SMARTS_index.txt"
	sindex <- read.table(file,header=T,sep="\t",stringsAsFactors=F,quote="\"",comment="")

	hf <- HF[!is.na(HF)]
	hfs <- HF.selective[!is.na(HF)]
	sdata <- SDATA[!is.na(HF),]
	fout <- "output/promiscuity_features.txt"
	s <- paste("FeatureID\tSMARTS\tFeatureName\tNchem\tmean.in\tsd.in\tmean.out\tsd.out\tp.value\tmean.in.selective\tsd.in.selective\tmean.out.selective\tsd.out.selective\tp.value.selective\n")
	cat(s,file=fout,append=F)
	nfeature <- dim(sdata)[2]
	for(i in 1:nfeature) {
		fpid <- names(sdata)[i]
		smarts <- sindex[is.element(sindex[,1],fpid),2]
		fpname <- sindex[is.element(sindex[,1],fpid),3]
		count <- sum(sdata[,i])
		if(count>=5) {
			invals <- hf[sdata[,i]==1]
			mean.in <- mean(invals)
			sd.in <- sd(invals)
			outvals <- hf[sdata[,i]==0]
			mean.out <- mean(outvals)
			sd.out <- sd(outvals)
			p <- wilcox.test(invals,outvals,alternative="greater")$p.value

			invals.s <- hfs[sdata[,i]==1]
			mean.in.s <- mean(invals.s)
			sd.in.s <- sd(invals.s)
			outvals.s <- hfs[sdata[,i]==0]
			mean.out.s <- mean(outvals.s)
			sd.out.s <- sd(outvals.s)
			p.s <- wilcox.test(invals.s,outvals.s,alternative="greater")$p.value

			s <- paste(fpid,"\t",smarts,"\t",fpname,"\t",count,"\t",format(mean.in,digits=3),"\t",format(sd.in,digits=3),"\t",format(mean.out,digits=3),"\t",format(sd.out,digits=3),"\t",format(p,digits=3),"\t",format(mean.in.s,digits=3),"\t",format(sd.in.s,digits=3),"\t",format(mean.out.s,digits=3),"\t",format(sd.out.s,digits=3),"\t",format(p.s,digits=3),"\n",sep="")
			cat(s,file=fout,append=T)
			cat(s)
		}
		#browser()
	}
	#browser()
}
#--------------------------------------------------------------------------------------
#
# Calculate promiscuity features
#
# QC=OK
#--------------------------------------------------------------------------------------
promiscuity.chemotypes <- function(do.prep=F) {
print("add gene selective")
return()
	if(do.prep) {
		file <- "structure_input/Altimira_Chemotypes_2013_02_04.txt"
		sdata <- read.table(file,header=T,sep="\t",stringsAsFactors=F,quote="\"")
		rownames(sdata) <- sdata[,"CODE"]
		sdata <- sdata[,5:dim(sdata)[2]]
		chemset <- sort(rownames(sdata))
		sdata <- sdata[chemset,]
		SDATA <<- sdata

		file <- "output/by_chemical_hit_dist.txt"
		cdata <- read.table(file,header=T,sep="\t",stringsAsFactors=F,quote="\"")
		rownames(cdata) <- cdata[,"CODE"]
		cdata <- cdata[chemset,]

		HF <<- cdata[,"Hits"] / cdata[,"AssaysTested"]
		HF.selective <<- cdata[,"SelectiveHits"] / cdata[,"AssaysTested"]
	}

	#file <- "structure_input/SMARTS_index.txt"
	#sindex <- read.table(file,header=T,sep="\t",stringsAsFactors=F,quote="\"",comment="")

	hf <- HF[!is.na(HF)]
	hfs <- HF.selective[!is.na(HF)]
	sdata <- SDATA[!is.na(HF),]
	fout <- "output/promiscuity_by_chemotype.txt"
	s <- paste("FeatureName\tNchem\tmean.in\tsd.in\tmean.out\tsd.out\tp.value\tmean.in.selective\tsd.in.selective\tmean.out.selective\tsd.out.selective\tp.value.selective\n")
	cat(s,file=fout,append=F)
	nfeature <- dim(sdata)[2]
	for(i in 1:nfeature) {
		fpid <- names(sdata)[i]
		#smarts <- sindex[is.element(sindex[,1],fpid),2]
		#fpname <- sindex[is.element(sindex[,1],fpid),3]
		count <- sum(sdata[,i])
		if(count>=5) {
			invals <- hf[sdata[,i]==1]
			mean.in <- mean(invals)
			sd.in <- sd(invals)
			outvals <- hf[sdata[,i]==0]
			mean.out <- mean(outvals)
			sd.out <- sd(outvals)
			p <- wilcox.test(invals,outvals,alternative="greater")$p.value

			invals.s <- hfs[sdata[,i]==1]
			mean.in.s <- mean(invals.s)
			sd.in.s <- sd(invals.s)
			outvals.s <- hfs[sdata[,i]==0]
			mean.out.s <- mean(outvals.s)
			sd.out.s <- sd(outvals.s)
			p.s <- wilcox.test(invals.s,outvals.s,alternative="greater")$p.value

			s <- paste(fpid,"\t",count,"\t",format(mean.in,digits=3),"\t",format(sd.in,digits=3),"\t",format(mean.out,digits=3),"\t",format(sd.out,digits=3),"\t",format(p,digits=3),"\t",format(mean.in.s,digits=3),"\t",format(sd.in.s,digits=3),"\t",format(mean.out.s,digits=3),"\t",format(sd.out.s,digits=3),"\t",format(p.s,digits=3),"\n",sep="")
			cat(s,file=fout,append=T)
			cat(s)
		}
		#browser()
	}
	#browser()
}
#--------------------------------------------------------------------------------------
#
# Calculate promiscuity stats for the structure classes using gene score
#
# QC=OK
#--------------------------------------------------------------------------------------
promiscuity.by.category.by.genescore <- function(cutoff=5) {
    nchem <- length(CODE.LIST)
    hf <- vector(length=nchem,mode="numeric")
    s.class <- vector(length=nchem,mode="numeric")
    hf[] <- NA
    s.class[] <- NA
    for(i in 1:nchem) {
        code <- CODE.LIST[i]
        s.class[i] <- CHEMS[code,"StructureCategory"]
        temp <- GENEDATA[is.element(GENEDATA[,"CODE"],code),]
        temp.gs <- temp[,"GeneScore"]
        top <- length(temp.gs[temp.gs>=cutoff])
        bot <- length(temp.gs)
        if(bot>200) {
            hf[i] <- top/bot
        }
    }
    hf <- hf[!is.na(s.class)]
    s.class <- s.class[!is.na(s.class)]
    s.class <- s.class[!is.na(hf)]
    hf <- hf[!is.na(hf)]

    fout <- paste("output/promiscuity_by_category_",cutoff,".txt",sep="")
    s <- paste("Category\tNchem\tmean_HitRatio\tSD_HitRatio\tZ-value\tp-hot\tp-cold\n")
    cat(s,file=fout,append=F)
    cat.set <- sort(uniquify(CHEMS[,"StructureCategory"]))
    for(i in 1:length(cat.set)) {
        category <- cat.set[i]
        print(category)
        ratio.in <- hf[is.element(s.class,category)]
        ratio.out <- hf[!is.element(s.class,category)]

        if(length(ratio.in)>4 && length(ratio.out)>4) {
            cmean.in <- mean(ratio.in)
            cmean.out <- mean(ratio.out)
            csd.in <- sd(ratio.in)
            csd.out <- sd(ratio.out)
            nchem <- length(ratio.in)
            z.value <- (cmean.in-cmean.out)/csd.out
            p.hot <- wilcox.test(ratio.in,ratio.out,alternative="greater")$p.value
            p.cold <- wilcox.test(ratio.in,ratio.out,alternative="less")$p.value
            s <- paste(category,"\t",nchem,"\t",format(cmean.in,digits=3),"\t",format(csd.in,digits=3),"\t",format(z.value,digits=3),"\t",format(p.hot,digits=3),"\t",format(p.cold,digits=3),"\n",sep="")
            cat(s,file=fout,append=T)
            cat(s)
        }
    }
}
#--------------------------------------------------------------------------------------
#
# Look for structure category correlations with BSK SVM scores
#
#--------------------------------------------------------------------------------------
bsk.category <- function(do.prep.1=T,do.prep.2=F,do.calc=F) {
	if(do.prep.1) {
		file <- "input/BSK_Supplemental_Table_8.txt"
		bsk.svm <- read.table(file,header=T,sep="\t",stringsAsFactors=F,quote="\"",comment="")
		print(dim(bsk.svm))
		file <- "input/ToxCast_Samples_2014_01_13.txt"
		samples <- read.table(file,header=T,sep="\t",stringsAsFactors=F,quote="",comment="")
		print(dim(samples))

		bsk.svm[,"Concentration"] <- bsk.svm[,"Concentration"]/1000

		tx.list <- bsk.svm[,"TXCode"]
		conc.list <- bsk.svm[,"Concentration"]
		code.list <- tx.list
		name.list <- tx.list
		category.list <- tx.list
		z.list <- tx.list
		code.list[] <- NA
		name.list[] <- NA
		category.list[] <- NA
		z.list[] <- NA
		for(i in 1:length(tx.list)) {
			tx <- tx.list[i]
			stemp <- samples[is.element(samples[,"sample_id"],tx),]
			code <- stemp[1,"CODE"]
			code.list[i] <- code
			name.list[i] <- stemp[1,"ShortName"]
			category.list[i] <- CHEMS[code,"StructureCategory"]
			cmean <- CYTOTOX[code,"cytotox.mean"]
			if(is.na(cmean)) cmean <- 3
			csd <- CYTOTOX[code,"cytotox.sd.global"]
			conc <- conc.list[i]
			logconc <- -log10(conc/1000000)
			z.list[i] <- (logconc-cmean)/csd
		}
		z.list[is.na(z.list)] <- 0
		z.list <- as.numeric(z.list)
		bsk.svm <- cbind(z.list,bsk.svm)
		bsk.svm <- cbind(category.list,bsk.svm)
		bsk.svm <- cbind(name.list,bsk.svm)
		bsk.svm <- cbind(code.list,bsk.svm)
		names(bsk.svm)[1:4] <- c("CODE","Name","Category","Z")
		BSK.SVM.1 <<- bsk.svm
	}
	if(do.prep.2) {
		mask <- BSK.SVM.1[,"Z"]
		mask[is.na(mask)] <- 0
		mask[mask<3] <- 0
		mask[mask>0] <- 1
		bsk.svm.2 <- BSK.SVM.1[mask==1,]
		code.list <- sort(uniquify(bsk.svm.2[,"CODE"]))
		bsk.svm.3 <- bsk.svm.2[1:2,]
		for(i in 1:length(code.list)) {
			code <- code.list[i]
			temp <- bsk.svm.2[is.element(bsk.svm.2[,"CODE"],code),]
			temp.row <- temp[1,]
			block <- temp[,11:34]
			block.best <- colMax(block)
			temp.row[,11:34] <- block.best
			bsk.svm.3 <- rbind(bsk.svm.3,temp.row)
		}
		bsk.svm.3 <- bsk.svm.3[3:dim(bsk.svm.3)[1],]
		BSK.SVM.2 <<- bsk.svm.3
	}

	if(do.calc) {
		cat("BSK.SVM: ",dim(BSK.SVM),"\n")
		cat.list <- sort(uniquify(BSK.SVM.2[,"Category"]))
		svm.list <- names(BSK.SVM.2)[11:34]
		nsvm <- length(svm.list)
		ncat <- length(cat.list)
		fout <- "input/bsk_svm_correlation.txt"
		s <- paste("SVM.Class\tCategory\tN.in\tmean.in\tN.out\tmean.out\tp.value\n")
		cat(s,file=fout,append=F)

		for(i in 1:nsvm) {
			svm <- svm.list[i]
			print(svm)
			temp <- BSK.SVM.2[,svm]
			for(j in 2:ncat) {
				scat <- cat.list[j]
				print(scat)
				clist <- as.character(BSK.SVM.2[,"Category"])
				mask <- vector(mode="integer",length=length(clist))
				mask[] <- 0
				mask[is.element(clist,scat)] <- 1
				in.val <- temp[mask==1]
				out.val <- temp[mask==0]
				n.in <- length(in.val)
				mean.in <- mean(in.val)
				n.out <- length(out.val)
				mean.out <- mean(out.val)
				p.value <- wilcox.test(in.val,out.val,alternative="greater")$p.value
				s <- paste(svm,"\t",scat,"\t",n.in,"\t",format(mean.in,digits=2),"\t",n.out,"\t",format(mean.out,digits=2),"\t",format(p.value,digits=3),"\n",sep="")
				cat(s,file=fout,append=T)
				cat(s)
			}
		}
	}
browser()
}
#--------------------------------------------------------------------------------------
#
# Calculate promiscuity stats for the structure classes
#
# QC=OK
#--------------------------------------------------------------------------------------
promiscuity.by.category <- function() {
print("add gene selective")
return()
	#file <- "input/by_chemical_hit_dist.txt"
	#cdata <- read.table(file,header=T,sep="\t",stringsAsFactors=F,quote="\"")

	fout <- "output/promiscuity_by_category.txt"
	s <- paste("BurstClass\tCategory\tType\tNchem\tmean_HitRatio\tSD_HitRatio\tZ-value\tp-hot\tp-cold\n")
	cat(s,file=fout,append=F)
	cat.set <- sort(uniquify(cdata[,"StructureCategory"]))
	for(i in 1:length(cat.set)) {
		category <- cat.set[i]
		print(category)
		temp.in <- cdata[is.element(cdata[,"StructureCategory"],category),]
		temp.out <- cdata[!is.element(cdata[,"StructureCategory"],category),]



		ratio.in <- temp.in[,"HitRatio"]
		ratio.out <- temp.out[,"HitRatio"]

		if(length(ratio.in)>4 && length(ratio.out)>4) {
			#print(length(ratio.in))
			#print(length(ratio.out))
			cmean.in <- mean(ratio.in)
			cmean.out <- mean(ratio.out)
			csd.in <- sd(ratio.in)
			csd.out <- sd(ratio.out)
			nchem <- length(ratio.in)
			z.value <- (cmean.in-cmean.out)/csd.out
			p.hot <- wilcox.test(ratio.in,ratio.out,alternative="greater")$p.value
			p.cold <- wilcox.test(ratio.in,ratio.out,alternative="less")$p.value
			s <- paste("All\t",category,"\tCategory\t",nchem,"\t",format(cmean.in,digits=3),"\t",format(csd.in,digits=3),"\t",format(z.value,digits=3),"\t",format(p.hot,digits=3),"\t",format(p.cold,digits=3),"\n",sep="")
			cat(s,file=fout,append=T)
			cat(s)
		}
		ratio.in <- temp.in[,"SelectiveHitRatio"]
		ratio.out <- temp.out[,"SelectiveHitRatio"]
		if(length(ratio.in)>4 && length(ratio.out)>4) {
			cmean.in <- mean(ratio.in)
			cmean.out <- mean(ratio.out)
			csd.in <- sd(ratio.in)
			csd.out <- sd(ratio.out)
			nchem <- length(ratio.in)
			z.value <- (cmean.in-cmean.out)/csd.out
			p.hot <- wilcox.test(ratio.in,ratio.out,alternative="greater")$p.value
			p.cold <- wilcox.test(ratio.in,ratio.out,alternative="less")$p.value
			s <- paste("Selective\t",category,"\tCategory\t",nchem,"\t",format(cmean.in,digits=3),"\t",format(csd.in,digits=3),"\t",format(z.value,digits=3),"\t",format(p.hot,digits=3),"\t",format(p.cold,digits=3),"\n",sep="")
			cat(s,file=fout,append=T)
			cat(s)
		}
	}

}
#--------------------------------------------------------------------------------------
#
# plot the range of hits as a fucntion of MW
#
# QC=OK
#--------------------------------------------------------------------------------------
mw.dist <- function(do.prep=F,to.file=F) {

	if(do.prep) {
		temp.hit <- MAT.hitcall
		temp.hit.ps <- MAT.Z.NORM
		temp.hit[is.na(temp.hit)] <- 0
		temp.hit[temp.hit>0] <- 1
		temp.hit.ps[is.na(temp.hit.ps)] <- 0
		temp.hit.ps[temp.hit.ps>0] <- 1

		mask <- MAT.hitcall
		mask[!is.na(mask)] <- 1
		mask[is.na(mask)] <- 0
		bot <- rowSums(mask)
		bot[bot==0] <- 1

		top.hit <- rowSums(temp.hit)
		fraction.hit <- top.hit/bot
		top.hit.ps <- rowSums(temp.hit.ps)
		fraction.hit.ps <- top.hit.ps/bot

		mw.min <<- c(10,50,100,150,200,250,300,400,500,750,1000)
		mw.max <<- c(   50,100,150,200,250,300,400,500,750,1000,10000)
		#mw.min <- c(50,100,150,200,250,300,400,500,750)
		#mw.max <- c(   100,150,200,250,300,400,500,750,1000)
		ngroups <- length(mw.min)
		groups <- fraction.hit
		groups[] <- 0

		nchem <- length(groups)
		for(i in 1:nchem) {
			for(j in 1:ngroups) {
				if(CHEMSTRUCT[i,"MW"] > mw.min[j] && CHEMSTRUCT[i,"MW"]<=mw.max[j]) groups[i] <- j
			}
		}
		FRACTION.HIT <<- fraction.hit
		FRACTION.HIT.PS <<- fraction.hit.ps
		GROUPS <<- groups
	}
	fraction.hit <- FRACTION.HIT[GROUPS>0]
	fraction.hit.ps <- FRACTION.HIT.PS[GROUPS>0]
	groups <- GROUPS[GROUPS>0]
	ngroups <- max(groups)
    if(to.file) {
        fname <- paste("plots/mw_hit_dist.pdf",sep="")
        pdf(file=fname,width=7,height=10,pointsize=12,bg="white",paper="letter",pagecentre=T)
    }
    par(mfrow=c(2,1),mar=c(4,4,2,2))
    ymax <- 0.4
    boxplot(fraction.hit~groups,xlab="MW range",ylab="Hit Fraction",names=mw.min,cex.axis=1.2,cex.lab=1.2,ylim=c(0,ymax),main="All Hits")
   	for(i in 1:ngroups) text(i,ymax,paste(length(groups[groups==i])),pos=1)

    boxplot(fraction.hit.ps~groups,xlab="MW range",ylab="Hit Fraction",names=mw.min,cex.axis=1.2,cex.lab=1.2,ylim=c(0,ymax),main="Potent and Selective Hits")
   	for(i in 1:ngroups) text(i,ymax,paste(length(groups[groups==i])),pos=1)


   	if(to.file) dev.off()
    else browser()
}
#--------------------------------------------------------------------------------------
#
# Load the phase I data
#
#--------------------------------------------------------------------------------------
loadPhaseI <- function() {
    cat("==========================================================================\n")
    cat("load phase I data ...\n")
    cat("==========================================================================\n")
    flush.console()
    mydate <- "20100129"
    techlist <- c("Novascreen","ACEA","Attagene","BioSeek","Cellumen","CellzDirect","NCGC")
    ntech <- length(techlist)
    for(i in 1:ntech) {
    	file <- paste("../input/Phase_I/ToxCast_",techlist[i],"_",mydate,".txt",sep="")
    	temp <- read.table(file,header=T,sep="\t",stringsAsFactors=F,quote="\"")
    	print(file)
    	print(dim(temp))
    	if(i==1) {
    		adata <- temp[,4:dim(temp)[2]]
    		CHEMS <- temp[,1:3]
    	}
    	else {
    		adata <- cbind(adata,temp[,4:dim(temp)[2]])
    	}
    }
    code.list <- CHEMS[,2]
    nchem <- length(code.list)
    for(i in 1:nchem) {
    	code <- code.list[i]
    	code <- paste("C",str_replace_all(code,"-",""),sep="")
		code.list[i] <- code
	}
	mask <- vector(length=nchem,mode="numeric")
	mask[] <- 1
	for(i in 2:nchem) {
		if(is.element(code.list[i],code.list[1:(i-1)])) mask[i] <- 0
	}
	#browser()
	CHEMS <- CHEMS[mask==1,]
	adata <- adata[mask==1,]
	code.list <- code.list[mask==1]
	rownames(CHEMS) <- code.list
	rownames(adata) <- code.list
    #rownames(adata) <- code.list
    #rownames(CHEMS) <- code.list
    PHASE_I_CHEMS <<- CHEMS
    PHASE_I_DATA <<- adata
    PHASE_I_CODES <<- code.list
}
#--------------------------------------------------------------------------------------
#
# compare the old and new minimum AC50 values
#
#--------------------------------------------------------------------------------------
comp.old.new <- function(to.file=F) {

   	if(to.file) {
	   fname <- "../plots/toxcast_old_new.pdf"
		pdf(file=fname,width=7,height=7,pointsize=12,bg="white",paper="letter",pagecentre=T)
   	}
	par(mfrow=c(2,2),mar=c(4,4,2,2))

	code.list.old <- PHASE_I_CODES
	code.list <- code.list.old[is.element(code.list.old,CODE.LIST)]
	nchem <- length(code.list)

############################
	data.old <- PHASE_I_DATA[code.list,]
	data.new <- MAT.AC50[code.list,]

	data.old[is.na(data.old)] <- 1000000
	data.new[is.na(data.new)] <- 1000000

	min.old <- rowMin(data.old)
	min.new <- rowMin(data.new)

	plot(min.new~min.old,log="xy",xlim=c(1e-4,100),ylim=c(1e-4,100),main="Minimum Values: All",xlab="Phase I",ylab="Phase II")
	lines(c(1e-5,1e5),c(1e-5,1e5))

############################
	q2.old <- vector(length=nchem,mode="numeric")
	q2.old[] <- 0
	q2.new <- q2.old
	q3.new <- q2.old
	q4.new <- q2.old
	q3.old <- q2.old
	q4.old <- q2.old

	log.old <- -log10(data.old/1000000)
	log.new <- -log10(data.new/1000000)

	for(i in 1:nchem) {
		temp <- log.old[i,]
		x <- quantile(temp[temp>0])
		q2.old[i] <- x[2]
		q3.old[i] <- x[3]
		q4.old[i] <- x[4]
		temp <- log.new[i,]
		x <- quantile(temp[temp>0])
		q2.new[i] <- x[2]
		q3.new[i] <- x[3]
		q4.new[i] <- x[4]
	}
	plot(q2.new~q2.old,xlim=c(4,8),ylim=c(4,8),main="log(quantile 2) Values: All",xlab="Phase I",ylab="Phase II")
	lines(c(0,8),c(0,8))
	plot(q3.new~q3.old,xlim=c(4,8),ylim=c(4,8),main="log(quantile 3) Values: All",xlab="Phase I",ylab="Phase II")
	lines(c(0,8),c(0,8))
	plot(q4.new~q4.old,xlim=c(4,8),ylim=c(4,8),main="log(quantile 4) Values: All",xlab="Phase I",ylab="Phase II")
	lines(c(0,8),c(0,8))

############################
	prefix.list.old <- c("NVS","ATG","BSK","CLM","NCGC")
	prefix.list.new <- c("NVS","ATG","BSK","APR","Tox21")
	for(j in 1:length(prefix.list.old)) {
		prefix.old <- prefix.list.old[j]
		prefix.new <- prefix.list.new[j]
		assay.old <- colnames(PHASE_I_DATA)
		assay.new <- colnames(MAT.AC50)
		useme.old <- grep(prefix.old,assay.old)
		useme.new <- grep(prefix.new,assay.new)
		assay.old.sub <- assay.old[useme.old]
		assay.new.sub <- assay.new[useme.new]
		data.old <- PHASE_I_DATA[code.list,assay.old.sub]
		data.new <- MAT.AC50[code.list,assay.new.sub]

		data.old[is.na(data.old)] <- 1000000
		data.new[is.na(data.new)] <- 1000000

		min.old <- rowMin(data.old)
		min.new <- rowMin(data.new)

		plot(min.new~min.old,log="xy",xlim=c(1e-4,100),ylim=c(1e-4,100),main=paste("Minimum Values: ",prefix.new),xlab="Phase I",ylab="Phase II")
		lines(c(1e-5,1e5),c(1e-5,1e5))

############################
		q2.old <- vector(length=nchem,mode="numeric")
		q2.old[] <- 0
		q2.new <- q2.old
		q3.new <- q2.old
		q4.new <- q2.old
		q3.old <- q2.old
		q4.old <- q2.old

		log.old <- -log10(data.old/1000000)
		log.new <- -log10(data.new/1000000)

		for(i in 1:nchem) {
			temp <- log.old[i,]
			x <- quantile(temp[temp>0])
			q2.old[i] <- x[2]
			q3.old[i] <- x[3]
			q4.old[i] <- x[4]
			temp <- log.new[i,]
			x <- quantile(temp[temp>0])
			q2.new[i] <- x[2]
			q3.new[i] <- x[3]
			q4.new[i] <- x[4]
		}
		plot(q2.new~q2.old,xlim=c(2,8),ylim=c(2,8),main=paste("log(quantile 2) Values:",prefix.new),xlab="Phase I",ylab="Phase II")
		lines(c(0,8),c(0,8))
		plot(q3.new~q3.old,xlim=c(2,8),ylim=c(2,8),main=paste("log(quantile 3) Values:",prefix.new),xlab="Phase I",ylab="Phase II")
		lines(c(0,8),c(0,8))
		plot(q4.new~q4.old,xlim=c(2,8),ylim=c(2,8),main=paste("log(quantile 4) Values:",prefix.new),xlab="Phase I",ylab="Phase II")
		lines(c(0,8),c(0,8))
	}


	if(to.file) dev.off()
	else browser()
}
#--------------------------------------------------------------------------------------
#
# Model the burst - make an example plot
#
# QC=OK
#--------------------------------------------------------------------------------------
burst.model <- function(to.file=F,df=4,ncp=1,yscale=10,cmin=0.25,cmax=1.5) {
    cat("==========================================================================\n")
    cat("burst.modelt\n")
    cat("==========================================================================\n")
    flush.console()
    if(to.file) {
        fname <- "plots/burst_model.pdf"
        pdf(file=fname,width=7,height=7,pointsize=12,bg="white",paper="letter",pagecentre=T)
    }
    xmax <- 10
    par(mfrow=c(1,1),mar=c(4,4,2,2))
    plot(1~1,xlim=c(1e-4,1e4),ylim=c(0,11),cex.lab=1.5,cex.axis=1.5,type="n",xlab="Concentration (uM)",ylab="Number of hits",log="x")

    x <- seq(-10,xmax*5,by=xmax/100)
    for(i in 1:5) {
        xmin <- 10**(-(cmin+i))*1000000
        xmax <- 10**(-(cmax+i))*1000000
        xmid <- 10**(-0.5*(cmin+cmax+2*i))*1000000
        rect(xmax,2*(i-1),1e5,2*i-0.5,col="gray")
        y <- dchisq(10+x-5*i, df=df, ncp=ncp, log = F)
        yp <- yscale*y + 2*(i-1)
        xp <- 10**(-(x/5+2+0.30))*1000000
        lines(yp~xp,lwd=2)
                                        #browser()
        lines(c(1e-4,1e4),c(2*(i-1),2*(i-1)),col="black")

        lines(c(xmin,xmax),c(2*i-0.5,2*i-0.5),col="red",lwd=5)
        lines(c(xmid,xmid),c(2*(i-1)+1,2*i-0.5),col="red",lwd=5)

        if(i==4) arrows(x0=xmid,y0=7.1,x1=xmid/100,lwd=3,length=0.1)
    }

    lines(c(100,100),c(0,10),lwd=6)
    arrows(x0=100,y0=10,x1=1e-2,lwd=6)
    text(6e-3,10.5,"Concentration Range Tested",pos=4,cex=1.3)
    text(2e-2,5.5,"Cytotoxicity Range",cex=1.3,pos=4,col="red")
    text(0.3,8.5,"Burst",cex=1.3,pos=4,col="black")
    text(0.01,6.75,"Z (log units)",cex=1.3,pos=4,col="black")
    text(1e-4,9,"Chemical A: Most Potent",cex=1.1,pos=4,col="black")
    text(1e-4,1,"Chemical E: Least Potent",cex=1.1,pos=4,col="black")
    if(to.file) dev.off()
    else browser()
}
#--------------------------------------------------------------------------------------
#
# assay source summary
#
# QC=OK
#--------------------------------------------------------------------------------------
assay.source.count <- function() {
    cat("==========================================================================\n")
    cat("assay.source.count\n")
    cat("==========================================================================\n")
    flush.console()

   	mask <- ASSAY.INFO[,"biological_process"]
   	atemp <- ASSAY.INFO[mask==1,]

   	assay.set.list <- sort(uniquify(atemp[,"Source"]))
   	nset <- length(assay.set.list)
	asum <- as.data.frame(matrix(nrow=nset,ncol=3))
	asum[,1] <- assay.set.list

   	for(i in 1:nset) {
   		aset <- assay.set.list[i]
   		cat(aset,"\n")
   		assay.list <- atemp[is.element(atemp[,"Source"],aset),"Assay"]
   		assay.list <- assay.list[is.element(assay.list,names(MAT.AC50))]
		asum[i,2] <- length(assay.list)
		#if(aset=="Novascreen_ADME") asum[i,3] <- "Duplicate in activator direction"
		#if(aset=="Novascreen_ENZ") asum[i,3] <- "Duplicate in activator direction"
	}
	names(asum) <- c("Source","Assays","Note")
	print(asum)
    outfile <- "../output/assay_source_count.txt"
    write.table(asum,file=outfile, row.names=F, append=FALSE, quote=F, sep = "\t")
}
#--------------------------------------------------------------------------------------
#
# load the structure similarity matrix
#
# QC=OK
#--------------------------------------------------------------------------------------
load.tanim.structure <- function() {
	cat("==========================================================================\n")
    cat("load.tanim.structure\n")
    cat("==========================================================================\n")
	flush.console()
	file <- "../structure_input/ToxCast_Tanimoto_matrix_REDUCED_2013_03_05.txt"
    temp <- read.table(file,header=T,sep="\t",stringsAsFactors=F,quote="",comment="")
    TANIM.STRUCTURE <<- temp
    cat("Structure similarity matrix: ",dim(TANIM.STRUCTURE),"\n")
}
#--------------------------------------------------------------------------------------
#
# Plot the distribution of htis gene-wise for targets with good reference chemcials
#
# QC=OK
#-------------------------------------------------------------------------------------
genescore.refchem.plot <- function(to.file=F) {
    cat("==========================================================================\n")
    cat("genescore.refchem.plot\n")
    cat("==========================================================================\n")
    flush.console()
    chemGene.0 <- sort(uniquify(toupper(CHEMS[,"target_gene"])))
    chemGene <- c()

    for(i in 1:length(chemGene.0)) {
        x <- str_split(chemGene.0[i]," ")
        for(j in 1:length(x[[1]])) {
            if(length(x[[1]][j])>0) chemGene <- c(chemGene,x[[1]][j])
        }
    }
    chemGene <- chemGene[!is.element(chemGene,c("","[","]", "(?)","14","A"))]
    chemGene <- sort(uniquify(chemGene))

    cg.chems <- c()
    cg.genes <- c()
    for(i in 1:dim(CHEMS)[1]) {
        code <- CHEMS[i,"CODE"]
        x <- str_split(toupper(CHEMS[i,"target_gene"])," ")
        for(j in 1:length(x[[1]])) {
            if(length(x[[1]][j])>0) {
                if(!is.element(x[[1]][j],c("","[","]", "(?)","14","A","UNKNOWN"))) {
                    cg.chems <- c(cg.chems,code)
                    cg.genes <- c(cg.genes,x[[1]][j])
                }
            }
        }
    }

    assayGene <- sort(uniquify(toupper(GENEDATA[,"Gene"])))
    assayGene <- assayGene[is.element(assayGene,chemGene)]
    print(assayGene)

    assayGene <- assayGene[!is.element(assayGene,c("HRH2"))]

    temp <- GENEDATA[is.element(GENEDATA[,"Gene"],assayGene),]

    az <- temp[,"GeneScore"]
    gene.group <- temp[, "Gene"]

    gene.group <- gene.group[az!=0]
    az <- az[az!=0]

    gene.group <- gene.group[az!=Inf]
    az <- az[az!=Inf]

    gene.group <- gene.group[az!= -Inf]
    az <- az[az!= -Inf]

    gene.group <- gene.group[!is.na(az)]
    az <- az[!is.na(az)]

    gene.group <- gene.group[!is.na(az)]
    az <- az[!is.na(az)]

    gsmax <- 9
    az[az>gsmax] <- gsmax
    gene.group.unique <- uniquify(gene.group)
    ngene <- length(gene.group.unique)

    if(to.file) {
        fname <- "../plots/gene_refchem_plot.pdf"
        pdf(file=fname,width=7,height=10,pointsize=12,bg="white",paper="letter",pagecentre=T)
    }
    par(mfrow=c(1,1),mar=c(4,8,2,2))
    az.log <- az
    box.res <- boxplot(az~gene.group,xlab="Gene Score",ylab="",cex.axis=1,cex.lab=1.2,horizontal=T,las=1,ylim=c(-1,gsmax),plot=F)
    box.res.log <- boxplot(az.log~gene.group,xlab="Gene Score (uM)",ylab="",main=paste(ngene," Genes"),cex.axis=1,cex.lab=1.2,horizontal=T,las=1,ylim=c(0.001,10),log="x")

    lines(c(1000000,1000000),c(-100,100),col="gray")
    lines(c(100000,100000),c(-100,100),col="gray")
    lines(c(10000,10000),c(-100,100),col="gray")
    lines(c(1000,1000),c(-100,100),col="gray")
    lines(c(100,100),c(-100,100),col="gray")
    lines(c(10,10),c(-100,100),col="gray")
    lines(c(1,1),c(-100,100),col="gray")
    lines(c(0.1,0.1),c(-100,100),col="gray")
    lines(c(0.01,0.01),c(-100,100),col="gray")
    lines(c(0.001,0.001),c(-100,100),col="gray")
    for(i in 1:ngene) {
        gene <- gene.group.unique[i]
        maxz <- -10000000
        temp <- GENEDATA[is.element(GENEDATA[,"Gene"],gene),]

        mask <- cg.genes
        mask[] <- 0
        mask[is.element(cg.genes,gene)] <- 1
        code.list <- cg.chems[mask==1]
        for(j in 1:length(code.list)) {
            code <- code.list[j]
            temp.1 <- temp[is.element(temp[,"CODE"],code),]
            if(dim(temp.1)[1]>0) {
                for(k in 1:dim(temp.1)[1]) {
                    z <- temp.1[k,"GeneScore"]
                    if(z>maxz) maxz <- z
                    zlog <- z
                    points(zlog,i,pch="*",col="red",cex=2)
                }
            }
        }
        x.flag <- 1e-3
        if(maxz > box.res$stats[3,i]) points(x.flag,i,pch=22,bg="green",cex=2)
        else if(maxz > box.res$stats[2,i]) points(x.flag,i,pch=22,,bg="green",cex=2)
        else points(x.flag,i,pch=22,bg="red",cex=2)
    }

    if(to.file) dev.off()
    else browser()
}
#--------------------------------------------------------------------------------------
#
# Calculate some stats on the genescore
#
#-------------------------------------------------------------------------------------
genescore.stats <- function(do.prep=T,to.file=F) {
    cat("==========================================================================\n")
    cat("genescore.stats\n")
    cat("==========================================================================\n")
    flush.console()
    if(do.prep) {
        chemGene.0 <- sort(uniquify(toupper(CHEMS[,"target_gene"])))
        chemGene <- c()

        for(i in 1:length(chemGene.0)) {
            x <- str_split(chemGene.0[i]," ")
            for(j in 1:length(x[[1]])) {
                if(length(x[[1]][j])>0) chemGene <- c(chemGene,x[[1]][j])
            }
        }
        chemGene <- chemGene[!is.element(chemGene,c("","[","]", "(?)","14","A"))]
        chemGene <- sort(uniquify(chemGene))
        cg.chems <- c()
        cg.genes <- c()
        for(i in 1:dim(CHEMS)[1]) {
            code <- CHEMS[i,"CODE"]
            x <- str_split(toupper(CHEMS[i,"target_gene"])," ")
            for(j in 1:length(x[[1]])) {
                if(length(x[[1]][j])>0) {
                    if(!is.element(x[[1]][j],c("","[","]", "(?)","14","A","UNKNOWN"))) {
                        cg.chems <- c(cg.chems,code)
                        cg.genes <- c(cg.genes,x[[1]][j])
                    }
                }
            }
        }

        assayGene <- sort(uniquify(toupper(GENEDATA[,"Gene"])))
        #assayGene <- assayGene[!is.element(assayGene,c("PROLIFERATION","NUCLEARSIZE","MITOFUNCTION","MITOTICARREST","CYTOTOX","CELLCYCLEARREST","",""))]
        ngene <- length(assayGene)
        col.list <- c("gene","ntry","nhit.total","nhit.pos","max.genescore","max.code","max.chemname","max.intendedtarget","max.StructureCategory","max.UseCategory","target.match")
        geneSummary <- as.data.frame(matrix(nrow=ngene,ncol=length(col.list)))
        rownames(geneSummary) <- assayGene
        names(geneSummary) <- col.list

        for(i in 1:ngene) {
            gene <- assayGene[i]
            temp <- GENEDATA[is.element(GENEDATA[,"Gene"],gene),]
            temp <- temp[!is.element(temp[,"CODE"],c("C8018017","C12427382","C36673162")),]
            stemp <- temp[,"GeneScore"]

            index <- sort(temp[,"GeneScore"],index.return=T,decreasing=T)$ix[1]
            geneSummary[gene,"gene"] <- gene
            geneSummary[gene,"ntry"] <- length(stemp)
            geneSummary[gene,"nhit.total"] <- length(stemp[stemp>0])
            geneSummary[gene,"nhit.pos"] <- length(stemp[stemp>0])
            geneSummary[gene,"max.genescore"] <- temp[index,"GeneScore"]
            geneSummary[gene,"max.code"] <- temp[index,"CODE"]
            geneSummary[gene,"max.chemname"] <- temp[index,"ShortName"]
            geneSummary[gene,"max.intendedtarget"] <- temp[index,"IntendedTarget"]
            geneSummary[gene,"max.StructureCategory"] <- CHEMS[temp[index,"CODE"],"StructureCategory"]
            geneSummary[gene,"max.UseCategory"] <- CHEMS[temp[index,"CODE"],"UseCategory"]

            geneSummary[gene,"target.match"] <- "F"
            x <- str_split(toupper(temp[index,"IntendedTarget"])," ")
            if(is.element(gene,x[[1]])) geneSummary[gene,"target.match"] <- "T"
            print(gene)
                                        #browser()
        }
        outfile <- "../output/genescore_stats_1.txt"
        write.table(geneSummary,file=outfile, row.names=F, append=FALSE, quote=F, sep = "\t")
        GENE.SUMMARY <<- geneSummary
    }
    if(to.file) {
        fname <- "../plots/genescore_2.pdf"
        pdf(file=fname,width=7,height=7,pointsize=12,bg="white",paper="letter",pagecentre=T)
    }
    par(mfrow=c(1,1),mar=c(5,5,2,2))

    temp <- GENE.SUMMARY[GENE.SUMMARY[,"ntry"]>800,]
    temp <- temp[temp[,"nhit.pos"]>0,]
    x <- temp[,"nhit.pos"]/temp[,"ntry"]
    y <- temp[,"max.genescore"]
    z <- temp[,"max.UseCategory"]
    g <- temp[,"gene"]
    ylog <- 10**-y * 1000000
    plot(ylog~x,type="p",cex.lab=1.5,cex.axis=1.5,xlab="f(GeneScore>0)",ylab="Max(GeneScore)",xlim=c(0.001,1),log="xy")
    points(ylog[is.element(z,"Pharmaceutical")]~x[is.element(z,"Pharmaceutical")],pch=21,bg="green")
    pesticide.list <- c("Insecticide/Chemical intermediate" ,"Fungicide/Crop protection", "Fungicide/antimicrobial","degradate of endosulfan (CASRN 115-29-7)" ,"Degradate of Aldicarb (CASRN 116-06-3)","Herbicide","Insecticide","Biocide","Bactericide","Fungicide","Microbicide","microbiocide","Rodenticide")
    points(ylog[is.element(z,pesticide.list)]~x[is.element(z,pesticide.list)],pch=21,bg="red")
    for(i in 1:length(g)) {
        if(x[i]>=0.1) {
            label <- g[i]
            if(label=="NR1I2") label <- "NR1I2 (PXR)"
            if(label=="NFE2L2") label <- "NFE2L2 (NRF2)"
            text(label=label,x=x[i],y=ylog[i],pos=4)
        }
    }
    npharma <- length(y[is.element(z,"Pharmaceutical")])
    npest <- length(y[is.element(z,pesticide.list)])
    ntot <- length(y)
    nother <- ntot-npest-npharma
    points(x=0.001,y=1e-7,pch=21,bg="green",cex=1.2); text(label=paste("Pharmaceutical: ",npharma),x=0.001,y=1e-7,pos=4,cex=1.1)
    points(x=0.001,y=3.5e-7,pch=21,bg="red",cex=1.2); text(label=paste("Pesticide: ",npest),x=0.001,y=3.5e-7,pos=4,cex=1.1)
    points(x=0.001,y=1e-6,pch=21,cex=1.2,col="black"); text(label=paste("Other: ",nother),x=0.001,y=1e-6,pos=4,cex=1.1)

    if(to.file) dev.off()
    else browser()

}
#--------------------------------------------------------------------------------------
#
# Examine the distribution of cytotox hits
#
# QC=OK
#--------------------------------------------------------------------------------------
cytotox.dist <- function(do.prep=T,to.file=F) {
    cat("==========================================================================\n")
    cat("cytotox.dist\n")
    cat("==========================================================================\n")
    flush.console()
    file <- "output/by_chemical_hit_dist.txt"
    temp <- read.table(file,header=T,sep="\t",stringsAsFactors=F,quote="\"")
    rownames(temp) <- temp[,"CODE"]
    if(to.file) {
        fname <- "plots/cytotox_dist_summary.pdf"
        pdf(file=fname,width=7,height=7,pointsize=12,bg="white",paper="letter",pagecentre=T)
    }
    par(mfrow=c(1,1),mar=c(4,4,3,3))

    temp.1 <- temp[temp[,"AssaysTested"]>500,]
    ymax <- 30
    zpm <- c()
    zpm.class <- c()
    nchem <- dim(temp.1)[1]
    cytotox.assay.set <- ASSAY.INFO[is.element(ASSAY.INFO[,"Type"],"cytotox"),"Assay"]
    cytotox.assay.set <- c(cytotox.assay.set,"APR_CellCycleArrest_24h_up")
    cytotox.assay.set <- c(cytotox.assay.set,"APR_CellCycleArrest_72h_up")
    cytotox.assay.set.small <- c(cytotox.assay.set,"ATG_NRF2_ARE_CIS")
    zt.all <- c()
    perc.z.in <- c()
    perc.z.out <- c()

    stress <- CYTOTOX
    stress <- cbind(stress,stress[,5])
    stress <- cbind(stress,stress[,5])
    stress <- cbind(stress,stress[,5])
    stress <- cbind(stress,stress[,5])
    stress <- cbind(stress,stress[,5])
    names(stress)[6] <- "cytotox"
    names(stress)[7] <- "APR_OxidativeStress_24h_up"
    names(stress)[8] <- "ATG_NRF2_ARE_CIS"
    names(stress)[9] <- "ATG_MRE_CIS"
    names(stress)[10] <- "ATG_HSE_CIS"
    stress[,"cytotox"] <- 1000000 * 10**(-stress[,"cytotox.mean"])
    stress[,"APR_OxidativeStress_24h_up"] <- MAT.AC50[,"APR_OxidativeStress_24h_up"]
    stress[,"ATG_NRF2_ARE_CIS"] <- MAT.AC50[,"ATG_NRF2_ARE_CIS"]
    stress[,"ATG_MRE_CIS"] <- MAT.AC50[,"ATG_MRE_CIS"]
    stress[,"ATG_HSE_CIS"] <- MAT.AC50[,"ATG_HSE_CIS"]
    stress <- stress[temp.1[,"CODE"],]
    if(do.prep) {
        for(i in 1:nchem) {
            code <- temp.1[i,"CODE"]
            burst.mean <- temp.1[code,"Burst.median"]
            burst.sd <- temp.1[code,"Burst.mad"]
            y <- 100*temp.1[code,"HitRatio"]
            denom <- temp.1[code,"AssaysTested"]
            if(!is.na(burst.mean) && !is.na(burst.sd)) {
                zpm <- c(zpm,y)
                if(burst.mean==3) zpm.class <- c(zpm.class,0)
                else {
                    zpm.class <- c(zpm.class,1)
                    zt <- MAT.Z.NORM[code,]
                    zt <- zt[!is.na(zt)]
                    zt.all <- c(zt.all,zt)
                    n <- length(zt)
                    perc.out <- 100*length(zt[zt>=3])/denom
                    perc.in <- 100*length(zt[zt<3])/denom
                    perc.z.in <- c(perc.z.in,perc.in)
                    perc.z.out <- c(perc.z.out,perc.out)
                }
            }
        }
        ZPM <<- zpm
        ZPM.CLASS <<- zpm.class
        PERC.Z.IN <<- perc.z.in
        PERC.Z.OUT <<- perc.z.out
        ZT.ALL <<- zt.all
    }
    h.0 <- ZPM[ZPM.CLASS==0]
    h.1 <- ZPM[ZPM.CLASS==1]
    n.0 <- length(h.0)
    n.1 <- length(h.1)
    result.01 <- t.test(h.0,h.1,alternative="less")

    zpm <- ZPM
    zpm.class <- ZPM.CLASS
    perc.z.out <- PERC.Z.OUT
    h.2 <- PERC.Z.OUT
    result.02 <- t.test(h.0,h.2,alternative="less")

    perc.z.out.class <- perc.z.out
    perc.z.out.class[] <- 2
    zpm <- c(zpm,perc.z.out)
    zpm.class <- c(zpm.class,perc.z.out.class)
    result <- boxplot(zpm~zpm.class,xlab="",ylab="%Hit (total)",names=c("Cytotox(-)","Cytotox(+)","Cytotox(+),Z>3"),cex.axis=1.2,cex.lab=1.2,ylim=c(0,35))
    text(label=paste("Cytotox(-): ",n.0,"chemicals"),x=0.5,y=20,pos=4,cex=1.1)
    text(label=paste("Cytotox(+): ",n.1,"chemicals"),x=0.5,y=17,pos=4,cex=1.1)
    text(label=paste("p: ",format(result.01$p.value,digits=2)),x=2,y=36,pos=1,cex=1.1)
    text(label=paste("p: ",format(result.02$p.value,digits=2)),x=3,y=36,pos=1,cex=1.1)
    text(label=paste("median: ",format(result$stats[3,1],digits=2),"%",sep=""),x=1,y=34,pos=1,cex=1.1)
    text(label=paste("median: ",format(result$stats[3,2],digits=2),"%",sep=""),x=2,y=34,pos=1,cex=1.1)
    text(label=paste("median: ",format(result$stats[3,3],digits=2),"%",sep=""),x=3,y=34,pos=1,cex=1.1)
    x <- stress[,"cytotox"]
    y <- stress[,"APR_OxidativeStress_24h_up"]
    plot(y~x,log="xy",xlab="Median Cytotoxicity AC50 (uM)",ylab="Stress AC50(uM)",xlim=c(0.1,200),ylim=c(0.1,200),cex.lab=1.2,cex.axis=1.2,type="n")
    lines(c(0.01,1000),c(0.01,1000),lwd=2)
    lines(c(0.1,1000),c(0.01,100),lwd=1)
    lines(c(0.01,100),c(0.1,1000),lwd=1)
	mask <- x
	mask[mask>=1000] <- 0
	mask[mask>0] <- 1
	denom <- x[mask==1]
    y <- stress[,"APR_OxidativeStress_24h_up"]
	points(y~x,pch=21,bg="red",cex=1)
	num <- y[mask==1]
	f.1 <- length(num[num>denom])/length(denom)

    y <- stress[,"ATG_NRF2_ARE_CIS"]
	points(y~x,pch=21,bg="green",cex=1)
	num <- y[mask==1]
	f.2 <- length(num[num>denom])/length(denom)

    y <- stress[,"ATG_MRE_CIS"]
	points(y~x,pch=21,bg="blue",cex=1)
	num <- y[mask==1]
	f.3 <- length(num[num>denom])/length(denom)

    y <- stress[,"ATG_HSE_CIS"]
	points(y~x,pch=21,bg="gray",cex=1)
	num <- y[mask==1]
	f.4 <- length(num[num>denom])/length(denom)

	text(x=0.08,y=150,"% Hits AC50 > cytotox median",cex=1.2,pos=4)
	text(x=0.1,y=100,paste("Oxidative Stress:",format(100*f.1,digits=3),"%"),pos=4); points(x=0.1,y=100,pch=21,bg="red",cex=1.5)
	text(x=0.1,y=70,paste("NRF2:",format(100*f.2,digits=3),"%"),pos=4); points(x=0.1,y=70,pch=21,bg="green",cex=1.5)
	text(x=0.1,y=50,paste("MRE:",format(100*f.3,digits=3),"%"),pos=4); points(x=0.1,y=50,pch=21,bg="blue",cex=1.5)
	text(x=0.1,y=35,paste("HSE:",format(100*f.4,digits=3),"%"),pos=4); points(x=0.1,y=35,pch=21,bg="gray",cex=1.5)


    par(mfrow=c(2,2),mar=c(4,4,3,3))

    x <- stress[,"cytotox"]
    y <- stress[,"APR_OxidativeStress_24h_up"]
	mask1 <- x
	mask1[mask1>=1000] <- 0
	mask1[mask1>0] <- 1
	mask1[is.na(x)] <- 0

	mask2 <- y
	mask2[mask2>=1000] <- 0
	mask2[mask2>0] <- 1
	mask2[is.na(y)] <- 0

	mask <- mask1*mask2
	denom <- x[mask==1]
	num <- y[mask==1]
	z <- num/denom
	z <- z[!is.na(z)]

	breaks.min <- 0.01
    breaksA <- breaks.min
    for(i in 1:14) breaksA <- c(breaksA,2*breaksA[length(breaksA)])
    z <- z[z<breaksA[length(breaksA)]]
    z <- z[z>breaksA[1]]
    xA <- hist(z,breaks=breaksA,plot=F)
    ymax <- max(xA$counts)
    hist.log(breaksA,xA$counts,ylim=c(0,ymax),xlab="AC50(stress)/AC50(Cytotox)",ylab="Hits",main="Oxidative Stress",1000000,1000000,1000000)
	lines(c(1,1),c(0,ymax),lwd=2,col="red")
	lines(c(10,10),c(0,ymax),lwd=1,col="red")
	lines(c(0.1,0.1),c(0,ymax),lwd=1,col="red")

    x <- stress[,"cytotox"]
    y <- stress[,"ATG_NRF2_ARE_CIS"]
	mask1 <- x
	mask1[mask1>=1000] <- 0
	mask1[mask1>0] <- 1
	mask1[is.na(x)] <- 0

	mask2 <- y
	mask2[mask2>=1000] <- 0
	mask2[mask2>0] <- 1
	mask2[is.na(y)] <- 0

	mask <- mask1*mask2
	denom <- x[mask==1]
	num <- y[mask==1]
	z <- num/denom
	z <- z[!is.na(z)]

	breaks.min <- 0.01
    breaksA <- breaks.min
    for(i in 1:14) breaksA <- c(breaksA,2*breaksA[length(breaksA)])
    z <- z[z<breaksA[length(breaksA)]]
    z <- z[z>breaksA[1]]
    xA <- hist(z,breaks=breaksA,plot=F)
    ymax <- max(xA$counts)
    hist.log(breaksA,xA$counts,ylim=c(0,ymax),xlab="AC50(stress)/AC50(Cytotox)",ylab="Hits",main="NRF2",1000000,1000000,1000000)
	lines(c(1,1),c(0,ymax),lwd=2,col="red")
	lines(c(10,10),c(0,ymax),lwd=1,col="red")
	lines(c(0.1,0.1),c(0,ymax),lwd=1,col="red")

    x <- stress[,"cytotox"]
    y <- stress[,"ATG_MRE_CIS"]
	mask1 <- x
	mask1[mask1>=1000] <- 0
	mask1[mask1>0] <- 1
	mask1[is.na(x)] <- 0

	mask2 <- y
	mask2[mask2>=1000] <- 0
	mask2[mask2>0] <- 1
	mask2[is.na(y)] <- 0

	mask <- mask1*mask2
	denom <- x[mask==1]
	num <- y[mask==1]
	z <- num/denom
	z <- z[!is.na(z)]

	breaks.min <- 0.01
    breaksA <- breaks.min
    for(i in 1:14) breaksA <- c(breaksA,2*breaksA[length(breaksA)])
    z <- z[z<breaksA[length(breaksA)]]
    z <- z[z>breaksA[1]]
    xA <- hist(z,breaks=breaksA,plot=F)
    ymax <- max(xA$counts)
    hist.log(breaksA,xA$counts,ylim=c(0,ymax),xlab="AC50(stress)/AC50(Cytotox)",ylab="Hits",main="MRE",1000000,1000000,1000000)
	lines(c(1,1),c(0,ymax),lwd=2,col="red")
	lines(c(10,10),c(0,ymax),lwd=1,col="red")
	lines(c(0.1,0.1),c(0,ymax),lwd=1,col="red")

    x <- stress[,"cytotox"]
    y <- stress[,"ATG_HSE_CIS"]
	mask1 <- x
	mask1[mask1>=1000] <- 0
	mask1[mask1>0] <- 1
	mask1[is.na(x)] <- 0

	mask2 <- y
	mask2[mask2>=1000] <- 0
	mask2[mask2>0] <- 1
	mask2[is.na(y)] <- 0

	mask <- mask1*mask2
	denom <- x[mask==1]
	num <- y[mask==1]
	z <- num/denom
	z <- z[!is.na(z)]

	breaks.min <- 0.01
    breaksA <- breaks.min
    for(i in 1:14) breaksA <- c(breaksA,2*breaksA[length(breaksA)])
    z <- z[z<breaksA[length(breaksA)]]
    z <- z[z>breaksA[1]]
    xA <- hist(z,breaks=breaksA,plot=F)
    ymax <- max(xA$counts)
    hist.log(breaksA,xA$counts,ylim=c(0,ymax),xlab="AC50(stress)/AC50(Cytotox)",ylab="Hits",main="HSE",1000000,1000000,1000000)
	lines(c(1,1),c(0,ymax),lwd=2,col="red")
	lines(c(10,10),c(0,ymax),lwd=1,col="red")
	lines(c(0.1,0.1),c(0,ymax),lwd=1,col="red")

    if(to.file) graphics.off()
    else browser()
}

#--------------------------------------------------------------------------------------
#
# biological process specificity
#
#--------------------------------------------------------------------------------------
biological.process.specificity <- function(do.prep=F,to.file=F,to.file2=F) {
    cat("==========================================================================\n")
    cat("biological.process.specificity\n")
    cat("==========================================================================\n")

	code.list <- CHEMS[is.element(CHEMS[,"Phase_I"],1),"CODE"]
	code.list <- c(code.list,CHEMS[is.element(CHEMS[,"Phase_II"],1),"CODE"])
	code.list <- sort(unique(code.list))
	nchem <- length(code.list)

	if(do.prep) {
		zs <- MAT.Z.NORM[code.list,]
		tp <- MAT.T.SCALED[code.list,]
		ht <- MAT.hitcall[code.list,]
		tested <- MAT.tested[code.list,]

		zs[is.na(zs)] <- 0
		tp[is.na(tp)] <- 0
		ht[is.na(ht)] <- 0
		zs <- zs*tested
		tp <- tp*tested
		ht <- ht*tested

		zs <- zs*ht
		tp <- tp*ht

		zs[zs<5] <- 0
		zs[zs>0] <- 1
		tp[tp<50] <- 0
		tp[tp>0] <- 1
		spec <- zs*tp

		BPSPEC <<- spec
		rs <- rowSums(spec)
		spec <- spec[rs>0,]
		cs <- colSums(spec)
		spec <- spec[,cs>0]
		cnames <- CHEMS[code.list,]
		cnames <- cnames[rs>0,]
		outdata <- cbind(cnames,spec)
		outfile <- "../plots/assay_specificity.csv"
		write.csv(outdata,file=outfile, row.names=F)

		if(to.file) {
			fname <- "../plots/assay_specificity_hm.pdf"
			pdf(file=fname,width=7,height=7,pointsize=12,bg="white",paper="letter",pagecentre=T)
		}
		result <- heatmap(t(as.matrix(spec)),margins=c(10,10),scale="none",main="Assay Specificity",
						  xlab="",ylab="",cexCol=0.1,cexRow=0.3,col=brewer.pal(9,"Reds"),
						  hclustfun=function(x) hclust(d=dist(x),method="ward.D"),keep.dendro=T,verbose=F)
		if(to.file) dev.off()
		else browser()
	}

	source.list <- sort(uniquify(ASSAY.INFO[,"biological_process"]))
	source.list <- source.list[!is.element(source.list,"assay QC")]
	nsource <- length(source.list)
	result1 <- matrix(nrow=nchem,ncol=nsource)
	rownames(result1) <- code.list
	colnames(result1) <- source.list
	rs <- vector(length=nchem,mode="numeric")
	for(i in 1:nsource) {
		sourcei <- source.list[i]
		assay.list <- ASSAY.INFO[is.element(ASSAY.INFO[,"biological_process"],sourcei),"Assay"]
		temp <- BPSPEC[,assay.list]
		rs[] <- 0
		if(length(assay.list)>1) rs <- rowSums(temp)
		#else rs <- sum(temp)
		result1[,i] <- rs
	}

	spec <- result1
	rs <- rowSums(result1)
	result1 <- cbind(rs,result1)
	result1 <- as.data.frame(result1)
	names(result1)[1] <- "Total"
    cnames <- CHEMS[code.list,]
    outdata <- cbind(cnames,result1)
    outfile <- "../plots/bioprocess_specificity.csv"
    write.csv(outdata,file=outfile, row.names=F)

    rs <- rowSums(spec)
    cs <- colSums(spec)
    spec <- spec[rs>0,cs>0]
    spec[spec>0] <- 1
    rowcolors <- colnames(spec)
    for(i in 1:length(rowcolors)) {
    	type <- drug.target(rowcolors[i])
    	if(type==1) rowcolors[i] <- "red"
    	else rowcolors[i] <- "white"
    }
    colcolors <- rownames(spec)
    for(i in 1:length(colcolors)) {
    	code <- colcolors[i]
    	cclass <- CHEMS[code,"use_super_category"]
	    if(str_detect(cclass,ignore.case("Pharmaceutical"))) colcolors[i] <- "red"
		else if(str_detect(cclass,ignore.case("Pesticide"))) colcolors[i] <- "black"
		else if(str_detect(cclass,ignore.case("Food/Flavor/Fragrance"))) colcolors[i] <- "green"
		else colcolors[i] <- "white"
    }

    if(to.file2) {
        fname <- "../plots/bioprocess_specificity_hm.pdf"
        pdf(file=fname,width=7,height=7,pointsize=12,bg="white",paper="letter",pagecentre=T)
    }
    result <- heatmap(t(as.matrix(spec)),margins=c(10,10),scale="none",main="Biological Process Specificity",
					  xlab="",ylab="",cexCol=0.1,cexRow=1,col=brewer.pal(9,"Reds"),
					  hclustfun=function(x) hclust(d=dist(x),method="ward.D"),keep.dendro=T,verbose=F,
					  RowSideColors=rowcolors,ColSideColors=colcolors)
    if(to.file2) dev.off()
	else browser()
}
#--------------------------------------------------------------------------------------
#
# do the heatmap of the assay coverage
#
# QC=OK
#--------------------------------------------------------------------------------------
assay.coverage.hm <- function(to.file=F,cex.col=0.1,cex.row=0.1) {
    cat("==========================================================================\n")
    cat("assay.coverage.hm\n")
    cat("==========================================================================\n")
    flush.console()

    if(to.file) {
        file <- paste("../plots/assay_coverage_heatmap.pdf",sep="")
        pdf(file=file,width=7,height=7,pointsize=12,bg="white",paper="letter",pagecentre=T)
    }
	mat <- MAT.tested[1:1700,]
    result <- heatmap(mat,margins=c(5,5),scale="none",main=paste("Assay Coverage",dim(mat)[1]," chemicals, ",dim(mat)[2]," assays"),
                      xlab="Assays",ylab="Chemicals",cexCol=cex.col,cexRow=cex.row,col=brewer.pal(9,"Reds"),
                      hclustfun=function(x) hclust(d=dist(x),method="ward.D"),keep.dendro=T,verbose=T)

    if(to.file) graphics.off()
    else browser()
}
