#--------------------------------------------------------------------------------------
#
# run all of the diagnostics functions
#
#--------------------------------------------------------------------------------------
run.diagnostics <- function(do.prep=T) {
	print.current.function()
	if(do.prep) cell.stress.ranges()
	chemical.hitdist(to.file=T,target.gene="ESR1") # Figure 2
	chemical.hitdist(to.file=T,target.gene=NA) 
	source.Zdist.2(to.file=T) # Figure 3
	target.analysis(T) # Figure 4
	bioprocess.quadrant.trellis.Z(do.prep=T,to.file=T) # Figure 5,6
	bioclass.hm(T) # Figure 7
	cellfree.vs.cellbased(T) # Figure 8
	bruns.watson(to.file=T)
	cytotox.boxplot(T)
	chemical.hit.dist.summary(do.prep=T,to.file=T)
}
#--------------------------------------------------------------------------------------
#
# Get the cytotoxicity ranges
#
#--------------------------------------------------------------------------------------
cell.stress.ranges <- function() {
	print.current.function()

    nchem <- dim(CHEMS)[1]

    result <- as.data.frame(cbind(CHEMS[,"CODE"],CHEMS[,"Name"]))
	result <- result[1:nchem,]
    names(result)[1] <- "CODE"
    names(result)[2] <- "Name"

	bio.list <- c("cytotoxicity BLA","cytotoxicity SRB","proliferation decrease","apoptosis up","ER stress",
	"microtubule up","heat shock","hypoxia","inflammation up","mitochondrial disruption up","oxidative stress up",
	"CCL2","cell cycle up","cellular adhesion up","chemokine up","coagulation up","CYP450","enzyme blocking",
	"estrogen receptor","extracellular matrix up","GPCR","growth factor up","ion channel","oncogene",
	"ppar signaling","PTPN","transporter",
	"AHR","AChE")
	name.list <- c("cytotoxBLA","cytotoxSRB","prolifDown","apoptosis","ERstress","microtubule","heatShock","hypoxia",
	"inflammation","mitochondria","oxidativeStress",   
    "CCL2","cellCycleUp","cellularAdhesionUp","chemokineUp","coagulationUp","CYP450","enzymeBlocking","estrogenReceptor",
    "extracellularMatrixUp","GPCR","growthFactorUp","ionChannel","oncogene","PPAR","PTPN","transporter",
    "AHR","AChE")
    
    
    for(b in 1:length(bio.list)) {
		print(bio.list[b])
		flush.console()
		assay.set <- ASSAY.INFO[is.element(ASSAY.INFO[,"biological_process"],bio.list[b]),"assay"]
		n <- vector(length=nchem,mode="numeric")
		minval <- vector(length=nchem,mode="numeric")
		maxval <- vector(length=nchem,mode="numeric")
		for(i in 1:nchem) {
			tested <- MAT.tested[i,assay.set]
			hit <- MAT.hitcall[i,assay.set]
			ac50 <- MAT.AC50[i,assay.set]
			hit <- hit[tested==1]
			hit[is.na(hit)] <- 0
			hit[hit<0] <- 0
			ac50 <- ac50[tested==1]
			ac50 <- ac50[hit==1]
			nhit <- sum(hit)
			min.i <- 1000
			max.i <- 1000
			if(nhit>0) {
				min.i <- min(ac50)
				max.i <- max(ac50)
			}
			n[i] <- nhit
			minval[i] <- min.i
			maxval[i] <- max.i
		}
		counter <- dim(result)[2]
		result <- cbind(result,n,minval,maxval)
		names(result)[counter+1] <- paste(name.list[b],".nhit.uM",sep="")
		names(result)[counter+2] <- paste(name.list[b],".min.uM",sep="")
		names(result)[counter+3] <- paste(name.list[b],".max.uM",sep="")
	}
	assay.set <- ASSAY.INFO[,"assay"]
	n <- vector(length=nchem,mode="numeric")
	minval <- vector(length=nchem,mode="numeric")
	maxval <- vector(length=nchem,mode="numeric")

	for(i in 1:nchem) {
		flush.console()
		tested <- MAT.tested[i,assay.set]
		hit <- MAT.hitcall[i,assay.set]
		ac50 <- MAT.AC50[i,assay.set]
		hit <- hit[tested==1]
		hit[is.na(hit)] <- 0
		hit[hit<0] <- 0
		ac50 <- ac50[tested==1]
		ac50 <- ac50[hit==1]
		nhit <- sum(hit)
		min.i <- 1000
		max.i <- 1000
		if(nhit>0) {
			min.i <- min(ac50)
			max.i <- max(ac50)
		}
		n[i] <- nhit
		minval[i] <- min.i
		maxval[i] <- max.i
	}

	counter <- dim(result)[2]
	result <- cbind(result,n,minval,maxval)
	names(result)[counter+1] <- "All.nhit.uM"
	names(result)[counter+2] <- "All.min.uM"
	names(result)[counter+3] <- "All.max.uM"

    outfile <- paste("../input/cell_stress_ranges_",VARMATDATE,".xlsx",sep="")
    write.xlsx(result,outfile)
    rownames(result) <- result[,"CODE"]
    CELL.STRESS.RANGES <<- result
}

#--------------------------------------------------------------------------------------
#
# Calculate at the hit distribution by chemical
#
# QC=OK
#--------------------------------------------------------------------------------------
chemical.hitdist <- function(to.file=F,target.gene=NA) {
  print.current.function()
  options(warn=1)
  warning(immediate.=T,call.=T)
  flush.console()
  alist <- ASSAY.LIST
  ztemp <- MAT.logAC50
  ztemp[] <- NA
  target.gene <- as.character(target.gene)
  nassay <- length(alist)
  nchem <- dim(CHEMS)[1]
  file <- "../output/chemical_hitdist.txt"
  s <- "CODE\tCASRN\tName\tIntendedTarget\tStructureCategory\tUseCategory\tUseSuperCategory\tAssaysTested\tHits\tHitRatio\tSelectiveHits\tSelectiveHitRatio\tCytotoxAssaysTested\tCytotoxHits\tMinAC50\tCytotoxMedian\tCytotoxMin\n"
  sall <- s
  if(is.na(target.gene)) cat(file=file,s,append=F)
  if(to.file) {
    fname <- "../plots/chemical_hit_dist.pdf"
    if(!is.na(target.gene)) fname <- paste("../plots/chemical_hit_dist_",target.gene,".pdf",sep="")
    pdf(file=fname,width=7,height=10,pointsize=12,bg="white",paper="letter",pagecentre=T)
  }
  par(mfrow=c(3,2),mar=c(4,4,2,2))
  
  target.assay.list <- NA
  if(!is.na(target.gene)) target.assay.list <- ASSAY.INFO[is.element(ASSAY.INFO[,"intended_target"],target.gene),"assay"]
  cytotox.assay.set <- ASSAY.INFO[is.element(ASSAY.INFO[,"biological_process"],c("cytotoxicity SRB","cytotoxicity BLA","proliferation decrease")),"assay"]
  CYTOTOX.ASSAYS <<- cytotox.assay.set
  
  mask <- vector(mode="integer",length=dim(MAT.logAC50)[2])
  mask[] <- 1
  mask[is.element(names(MAT.logAC50),cytotox.assay.set)] <- 0
  istart <- 1
  
  for(i in istart:nchem) {
    counter <- i
    code <- CHEMS[i,"CODE"]
    casrn <- CHEMS[i,"CASRN"]
    cname <- CHEMS[i,"Name"]
    target <- CHEMS[i,"target_gene"]
    ccat <- CHEMS[i,"structure_category"]
    ucat <- CHEMS[i,"use_category"]
    uscat <- CHEMS[i,"use_super_category"]
    
    cytotox.median <- as.numeric(as.character(CYTOTOX[code,"cytotox_median_um"]))
    cytotox.mad <- as.numeric(as.character(CYTOTOX[code,"global_mad"]))
    cytotox.min <- as.numeric(as.character(CYTOTOX[code,"cytotox_lower_bound_um"]))
    if(is.na(cytotox.median)) {
      cytotox.median <- 1000
      cytotox.min <- 100
    }
    if(!is.na(target.gene)) {
      target.temp <- MAT.logAC50[code,target.assay.list]
      target.temp[is.na(target.temp)] <- 0
      if(sum(target.temp)==0) doit <- F
    }
    tempA <- MAT.AC50[code,mask==1]
    hit.assays <- names(tempA)[!is.na(tempA)]
    tempA <- tempA[!is.na(tempA)]
    tempA[tempA<1E-3] <- 1E-3
    ntry <- length(tempA)
    hit.assays <- hit.assays[tempA<1000000]
    hit.genes <- sort(uniquify(ASSAY.INFO[is.element(ASSAY.INFO[,"assay"],hit.assays),"intended_target"]))
    nhit.gene <- length(hit.genes)
    tempA <- tempA[tempA<1000000]
    nhit <- length(tempA)
    hit.ratio <- nhit/ntry
    
    temp.selective <- tempA[tempA<cytotox.min]
    nhit.selective <- length(temp.selective)
    hit.ratio.selective <- nhit.selective / ntry
    hit.gene.ratio <- nhit.gene/ntry
    tempA <- sort(tempA)
    minAC50 <- min(tempA)
    cytotox.ntry <- NA
    cytotox.nhit <- NA
    cytotox.ratio <- NA
    
    cytotox.tempA <- MAT.AC50[code,cytotox.assay.set]
    cytotox.tempA <- cytotox.tempA[!is.na(cytotox.tempA)]
    cytotox.ntry <- length(cytotox.tempA)
    
    
    if(nhit>5) {
      tempA.log <- log10(tempA)-6
      xmin <- 0
      xmax <- 10
      xminA <- 1e-4
      xmaxA <- 1000
      if(minAC50<1E-4) {
        xmax <- 14
        xminA <- 1E-6
      }
      breaksA.min <- 1e-3
      nbreaks <- 80
      if(minAC50<breaksA.min) {
        breaksA.min <- breaksA.min/100
      }
      breaksA <- breaksA.min
      for(i in 1:65) breaksA <- c(breaksA,1.2*breaksA[length(breaksA)])
      while(max(breaksA)<=max(tempA)) {
        breaksA <- c(breaksA,1.2*breaksA[length(breaksA)])
        #cat("Added another point to breaksA",max(breaksA),"\n")
      }
      print(code)
      xA <- hist(tempA,breaks=breaksA,plot=F)
      ymax <- 1.5*max(xA$counts)
      if(ymax<5) ymax <- 5
      hist.log(breaksA,xA$counts,ylim=c(0,ymax),xlab="AC50 (uM)",ylab="Hits",main=paste(casrn,":",cname),cytotox.median, cytotox.min, cytotox.max)
      eps <- 0.08
      xpmin <- breaksA.min
      text(xpmin,ymax*(1-1*eps),paste("ntry=",ntry,sep=""),pos=4)
      text(xpmin,ymax*(1-2*eps),paste("nhit=",nhit,sep=""),pos=4)
      text(xpmin,ymax*(1-3*eps),paste("nhit (Z>3)=",nhit.selective,sep=""),pos=4)
      text(xpmin,ymax*(1-4*eps),paste("cytotox median=",format(cytotox.median,digits=2),sep=""),pos=4)
      text(xpmin,ymax*(1-5*eps),paste("cytotox min=",format(cytotox.min,digits=2),sep=""),pos=4)
      tempA.cytotox <- MAT.AC50[code,cytotox.assay.set]
      tempA.cytotox <- tempA.cytotox[!is.na(tempA.cytotox)]
      tempA.cytotox <- tempA.cytotox[tempA.cytotox<1000000]
      cytotox.nhit <- length(tempA.cytotox)
      text(xpmin,ymax*(1-6*eps),paste("cytotox try=",cytotox.ntry,sep=""),pos=4)
      text(xpmin,ymax*(1-7*eps),paste("cytotox hit=",cytotox.nhit,sep=""),pos=4)
      if(length(tempA.cytotox)>0) {
        for(j in 1:length(tempA.cytotox)) {
          ac50 <- tempA.cytotox[j]
          points(ac50,ymax*0.6,pch="*",col="red",cex=2)
        }
      }
      
      s <- paste(code,"\t",casrn,"\t",cname,"\t",target,"\t",ccat,"\t",ucat,"\t",uscat,"\t",ntry,"\t",nhit,"\t",format(hit.ratio,digits=2),"\t",nhit.selective,"\t",format(hit.ratio.selective,digits=2),"\t",cytotox.ntry,"\t",cytotox.nhit,"\t",format(minAC50,digits=3),"\t",format(cytotox.median,digits=2),"\t",format(cytotox.min,digits=2),"\n",sep="")
      cat(counter,":",s)
      if(is.na(target.gene))  cat(file=file,s,append=T)
      flush.console()
      
      if(!is.na(target.gene)) {
        agset <- ASSAY.INFO[is.element(ASSAY.INFO[,"intended_target"],target.gene),"assay"]
        for(g in 1:length(agset)) {
          assay.g <- agset[g]
          ac50 <- MAT.AC50[code,assay.g]
          asource <- ASSAY.INFO[is.element(ASSAY.INFO[,"assay"],agset[g]),"source_group"]
          pch <- 24
          color <- "white"
          if(asource=="NVS_NR") color <- "black"
          if(asource=="OT") color <- "green"
          if(asource=="Tox21_BLA_Agonist") color <- "gray"
          if(asource=="Tox21_BLA_Antagonist") color <- "gray"
          if(asource=="Tox21_LUC_Agonist") color <- "gray"
          if(asource=="Tox21_LUC_Antagonist") color <- "gray"
          if(asource=="ATG_CIS") color <- "violet"
          if(asource=="ATG_TRANS") color <- "violet"
          if(asource=="ACEA") color <- "red"
          if(asource=="BSK_up") color <- "orange"
          if(asource=="BSK_down") color <- "orange"
          if(asource=="Apr_up") color <- "yellow"
          if(asource=="Apr_dn") color <- "yellow"
          yval <- ymax*0.5*(1+0.2*rnorm(1,0.1))
          points(ac50,yval,pch=pch,bg=color,fg="black",cex=2)
        }
      }
      if(!to.file) browser()
    }
  }
  
  if(to.file) graphics.off()
  else browser()
}
#--------------------------------------------------------------------------------------
#
# Assay source summary table
# zmode=original or norm
# QC=OK
#--------------------------------------------------------------------------------------
source.Zdist.2 <- function(to.file=F) {
  print.current.function()
  if(to.file) {
    fname <- paste("../plots/source_Zdist_2.pdf",sep="")
    pdf(file=fname,width=7,height=10,pointsize=12,bg="white",paper="letter",pagecentre=T)
  }
  par(mfrow=c(4,2),mar=c(4,4,2,2))
  
  source.list <- sort(uniquify(ASSAY.INFO[,"source_group"]))
  nsource <- length(source.list)
  
  z <- MAT.Z.NORM
  z[MAT.hitcall==0] <- NA
  z[MAT.tested==0] <- NA
  z[MAT.Z==0] <- NA
  
  ctemp <- CHEMS[,c("CODE","Phase_I","Phase_II")]
  mask <- ctemp[,"Phase_I"]+ctemp[,"Phase_II"]
  code.list <- ctemp[mask>0,"CODE"]
  
  code.cytotox.no <- CYTOTOX[CYTOTOX[,"nhit"]<2,"CODE"]
  code.cytotox.yes <- CYTOTOX[CYTOTOX[,"nhit"]>=2,"CODE"]
  
  code.cytotox.no <- code.cytotox.no[is.element(code.cytotox.no,code.list)]
  code.cytotox.yes <- code.cytotox.yes[is.element(code.cytotox.yes,code.list)]
  cat("chems no cytotox:",length(code.cytotox.no),"\n")
  cat("chems yes cytotox:",length(code.cytotox.yes),"\n")
  
  xmax <- 15
  xmin <- -5
  for(i in 1:nsource) {
    source <- source.list[i]
    assay.list <- ASSAY.INFO[is.element(ASSAY.INFO[,"source_group"],source),"assay"]
    assay.list <- assay.list[is.element(assay.list,colnames(MAT.AC50))]
    cat(source," : ",length(assay.list),"\n")
    flush.console()
    if(length(assay.list)>1) {
      ztemp <- z[code.cytotox.yes,assay.list]
      ztemp <- as.numeric(as.matrix(ztemp))
      ztemp <- ztemp[!is.na(ztemp)]
      ztemp <- ztemp[ztemp> xmin]
      ztemp <- ztemp[ztemp< xmax]
      breaks <- seq(-6,xmax,by=0.2)
      x <- hist(ztemp,xlim=c(xmin,xmax),main=paste("Z distribution for",source),cex.lab=1.2,cex.axis=1.2,ylab="Chemical/Assay Hits",xlab="Z-score",breaks=breaks,freq=T)
      
      center1 <- 0
      center2 <- 0
      mymix <- cbind(x$breaks[1:length(x$counts)],x$counts)
      fit <- mix(mymix,mixparam(mu=c(0,xmax),sigma=c(2,2)),"norm")
      center1 <- fit[[1]][1,2]
      center2 <- fit[[1]][2,2]
      ymax <- max(x$counts)
      lines(c(3,3),c(0,ymax),lwd=2,col="red")
      lines(c(0,0),c(0,ymax),lwd=2,col="red")
      
      ztemp <- z[code.cytotox.no,assay.list]
      ztemp <- as.numeric(as.matrix(ztemp))
      ztemp <- ztemp[!is.na(ztemp)]
      ztemp <- ztemp[ztemp> xmin]
      ztemp <- ztemp[ztemp< xmax]
      breaks <- seq(-6,xmax,by=0.2)
      x <- hist(ztemp,xlim=c(xmin,xmax),ylim=c(0,ymax),main=paste("Z distribution for",source),cex.lab=1.2,cex.axis=1.2,ylab="Chemical/Assay Hits",xlab="Z-score",breaks=breaks,freq=T)
      lines(c(3,3),c(0,ymax),lwd=2,col="red")
      lines(c(0,0),c(0,ymax),lwd=2,col="red")
      
      if(!to.file) browser()
    }
  }
  if(to.file) graphics.off()
  else browser()
}
#--------------------------------------------------------------------------------------
#
# chemical target analysis
#
#--------------------------------------------------------------------------------------
target.analysis <- function(to.file=F) {
  print.current.function()
  
  assay.temp <- ASSAY.INFO[,c("assay","intended_target")]
  assay.temp[,"intended_target"] <- toupper(assay.temp[,"intended_target"]) 
  fname <- "../input/chemical_targets.xlsx"
  tchems <- read.xlsx(fname)
  
  assay.temp <- assay.temp[is.element(assay.temp[,"intended_target"],tchems[,"target_gene"]),]
  n <- dim(assay.temp)[1]
  mask <- vector(length=n,mode="integer")
  mask[] <- 1
  for(i in 1:n) {
    doit <- 1
    if(length(grep("antagonist",assay.temp[i,"assay"],ignore.case=T))==1) doit <- 0
    if(length(grep("activator",assay.temp[i,"assay"],ignore.case=T))==1) doit <- 0
    mask[i] <- doit
  }
  assay.temp <- assay.temp[mask==1,]
  hit.list <- NULL
  z.list <- NULL
  gene.list <- sort(unique(assay.temp[,"intended_target"]))
  tchems <- tchems[is.element(tchems[,"target_gene"],gene.list),]
  n <- dim(tchems)[1]
  
  for(i in 1:n) {
    gene <- tchems[i,"target_gene"]
    code <- tchems[i,"CODE"]
    assays <- assay.temp[is.element(assay.temp[,"intended_target"],gene),"assay"]
    for(j in 1:length(assays)) {
      ztemp <- MAT.Z.NORM[code,assays[j]]
      #cat(code,":",gene,":",assays[j],":",ztemp,"\n")
      if(is.na(ztemp)) {
        hit.list <- c(hit.list,0)
      }
      else {
        hit.list <- c(hit.list,1)
        z.list <- c(z.list,ztemp)
      }
    }
  }  
  
  code.list.in <- sort(unique(tchems[,"CODE"]))
  
  rs <- rowSums(MAT.tested)
  code.list <- CODE.LIST[rs>500]
  cat("code.list: ",length(code.list),"\n")
  code.list.out <- code.list[!is.element(code.list,code.list.in)]
  assay.list <- sort(unique(assay.temp[,"assay"]))
  z.list.out <- NULL
  hit.list.out <- NULL
  for(i in 1:length(code.list.out)) {
    for(j in 1:length(assay.list)) {
      code <- code.list.out[i]
      assay <- assay.list[j]			
      ztemp <- MAT.Z.NORM[code,assay]
      if(is.na(ztemp)) {
        hit.list.out <- c(hit.list.out,0)
      }
      else {
        hit.list.out <- c(hit.list.out,1)
        z.list.out <- c(z.list.out,ztemp)
      }
    }
  }
  cat("Target hit mean:",mean(hit.list),"\n")
  cat("Other hit mean: ",mean(hit.list.out),"\n")
  
  ret <- t.test(z.list,z.list.out,alternative="greater")
  p.value <- ret$p.value
  group.1 <- z.list
  group.1[] <- 1
  group.2 <- z.list.out
  group.2[] <- 2
  groups <- c(group.1,group.2)
  vals <- c(z.list,z.list.out)
  
  if(to.file) {
    file <- paste("../plots/target_analysis.pdf",sep="")
    pdf(file=file,width=7,height=7,pointsize=12,bg="white",paper="letter",pagecentre=T)
  }
  par(mfrow=c(1,1),mar=c(5,5,4,2))
  
  boxplot(vals~groups,xlab="",ylab="Z-score",names=c("Target Class","Others"),cex.axis=1.2,cex.lab=1.2,ylim=c(-10,40),main="")
  text(1,37,format(median(z.list),digits=2))
  text(2,37,format(median(z.list.out),digits=2))
  text(0.5,37,"median:",pos=4)
  text(0.5,40,"p-value:",pos=4)
  text(1,40,format(p.value,digits=2))
  if(to.file) dev.off()
  else browser()
}
#--------------------------------------------------------------------------------------
#
# compile the data on the chemicals with targets
#
#--------------------------------------------------------------------------------------
target.dmat <- function() {
  print.current.function()
  
  name.list <- c("CODE","CASRN","Name","target_gene","assay","hitcall","AC50","T","Z")
  arow <- as.data.frame(matrix(ncol=length(name.list),nrow=1))
  names(arow) <- name.list
  dmat <- NULL
  for(i in 1:NCHEM) {
    code <- CHEMS[i,"CODE"]
    casrn <- CHEMS[i,"CASRN"]
    cname <- CHEMS[i,"Name"]
    gene <- CHEMS[i,"target_gene"]
    if(!is.na(gene)) {
      assay.list <- ASSAY.INFO[is.element(ASSAY.INFO[,"intended_target_gene_symbol"],gene),"assay"]
      
      if(length(assay.list)>0) {
        for(j in 1:length(assay.list)) {
          assay <- assay.list[j]
          cat(code,":",gene,":",assay,"\n")
          arow[1,"CODE"] <- code
          arow[1,"CASRN"] <- casrn
          arow[1,"Name"] <- cname
          arow[1,"target_gene"] <- gene
          arow[1,"assay"] <- assay
          arow[1,"hitcall"] <- MAT.hitcall[code,assay]
          arow[1,"AC50"] <- MAT.AC50[code,assay]
          arow[1,"T"] <- MAT.T.SCALED[code,assay]
          arow[1,"Z"] <- MAT.Z.NORM[code,assay]
          dmat <- rbind(dmat,arow)
        }
      }
    }
  }
  browser()
  file <- "../output/target_gene_chemical_assay.xlsx"  
  write.xlsx(dmat,file)
  
}
#--------------------------------------------------------------------------------------
#
# chemical target analysis
#
#--------------------------------------------------------------------------------------
target.analysis.2 <- function(to.file=F) {
  print.current.function()
  
  file <- "../output/target_gene_chemical_assay_FILTERED.xlsx"  
  dmat <- read.xlsx(file)
  assay.list <- sort(unique(dmat[,"assay"]))
  name.list <- c("assay","gene","chemicals","hits","rate")
  resmat <- as.data.frame(matrix(nrow=length(assay.list),ncol=length(name.list)))
  names(resmat) <- name.list
  for(i in 1:length(assay.list)) {
    assay <- assay.list[i]
    temp <- dmat[is.element(dmat[,"assay"],assay),]
    gene <- temp[1,"target_gene"]
    nchem <- dim(temp)[1]
    hits <- sum(temp[,"hitcall"])
    rate <- hits/nchem
    resmat[i,"assay"] <- assay
    resmat[i,"gene"] <- gene
    resmat[i,"chemicals"] <- nchem 
    resmat[i,"hits"] <- hits
    resmat[i,"rate"] <- rate
  }
  file <- "../output/target_gene_chemical_assay_summary.xlsx"  
  write.xlsx(resmat,file)
  samples.target <- dim(dmat)[1]
  hits.target <- sum(dmat[,"hitcall"])
  hitrate.target <- hits.target/samples.target
  cat("TARGET : total trys:",samples.target," hits:",hits.target," overall rate:",format(hitrate.target,digits=2),"\n")

  temp <- as.vector(as.matrix(MAT.hitcall))
  temp <- temp[!is.na(temp)]
  samples.total <- length(temp)
  hits.total <- sum(temp)
  hitrate.total <- hits.total/samples.total
  cat("TOTAL : total trys:",samples.total," hits:",hits.total," overall rate:",format(hitrate.total,digits=2),"\n")
  
  z.target <- dmat[,"Z"]
  z.target <- z.target[!is.na(z.target)]
  temp <- MAT.Z.NORM[,assay.list]
  for(i in 1:dim(dmat)[1]) {
    assay <- dmat[i,"assay"]
    code <- dmat[i,"CODE"]
    temp[code,assay] <- NA
  }
  z.total <- as.vector(as.matrix(temp))
  z.total <- z.total[!is.na(z.total)]
  g.target <- z.target
  g.target[] <- 1
  g.total <- z.total
  g.total[] <- 2
  g.all <- c(g.target,g.total)
  z.all <- c(z.target,z.total)
  
  ret <- t.test(z.target,z.total,alternative="greater")
  p.value <- ret$p.value
  
  if(to.file) {
    file <- paste("../plots/target_analysis_2.pdf",sep="")
    pdf(file=file,width=7,height=7,pointsize=12,bg="white",paper="letter",pagecentre=T)
  }
  par(mfrow=c(1,1),mar=c(5,5,4,2))
  
  boxplot(z.all~g.all,xlab="",ylab="Z-score",names=c("Target Class","Others"),cex.axis=1.2,cex.lab=1.2,ylim=c(-10,40),main="")
  text(1,37,format(median(z.target),digits=2))
  text(2,37,format(median(z.total),digits=2))
  text(0.5,37,"median:",pos=4)
  text(0.5,40,"p-value:",pos=4)
  text(1,40,format(p.value,digits=2))
  if(to.file) dev.off()
  else browser()
}
#--------------------------------------------------------------------------------------
#
# trellis plot on on Z
#
#--------------------------------------------------------------------------------------
bioprocess.quadrant.trellis.Z <- function(do.prep=T,to.file=F) {
  print.current.function()
  
  code.list <- CHEMS[is.element(CHEMS[,"Phase_I"],1),"CODE"]
  code.list <- c(code.list,CHEMS[is.element(CHEMS[,"Phase_II"],1),"CODE"])
  code.list <- sort(unique(code.list))
  
  name.list <- CHEMS[code.list,"Name"]
  nchem <- length(code.list)
  
  tested <- MAT.tested[code.list,]
  z <- MAT.Z.NORM[code.list,]
  hit <- MAT.hitcall[code.list,]
  
  bioproc.list <- sort(unique(ASSAY.INFO[,"biological_process"]))
  useclass.list <- c("FoodFlavorFragrance","Solvent","Surfactant","Other","Microbicide","Herbicide","Pesticide","Pharmaceutical")
  useclass.plot.names <- useclass.list
  for(i in 1:length(useclass.list)) useclass.plot.names[i] <- use.sub(useclass.list[i])
  useclass.plot.names <- sort(useclass.plot.names)
  
  nuseclass <- length(useclass.list)
  nbioproc <- length(bioproc.list)
  nz <- 2
  
  rtable <- NULL
  
  if(do.prep) {
    for(iz in 1:2) {
      Q <- z
      Q[Q<=0] <- 0.001
      Q[tested<=0] <- 0
      Q[hit<=0] <- 0
      Q[is.na(Q)] <- 0
      if(iz==1) {
        Q[z>3] <- 0; Q[Q>0] <- 1
        zclass <- "Low-Z"
      }
      if(iz==2) {
        Q[z<3] <- 0; Q[Q>0] <- 1
        zclass <- "High-Z"
      }
      assay.list <- ASSAY.LIST
      for(iu in 1:nuseclass) {
        useclass <- useclass.list[iu]
        code.list.useclass <- CHEMS[is.element(CHEMS[,"use_super_category"],useclass),"CODE"]
        code.list.useclass <- code.list.useclass[is.element(code.list.useclass,code.list)]
        temp <- Q[code.list.useclass,assay.list]
        
        if(!is.null(dim(temp))) {
          rs <- rowSums(temp)
          rs <- as.numeric(rs/dim(temp)[2])
          block <- as.data.frame(matrix(ncol=8,nrow=length(code.list.useclass)))
          name.list <- CHEMS[code.list.useclass,"Name"]
          structure.list <- CHEMS[code.list.useclass,"structure_category"]
          use.list <- CHEMS[code.list.useclass,"use_category"]
          block[,1] <- code.list.useclass
          block[,2] <- name.list
          block[,3] <- structure.list
          block[,4] <- use.list
          block[,5] <- use.sub(useclass)
          block[,6] <- "All Assays"
          block[,7] <- zclass
          block[,8] <- rs
          
          names(block) <- c("CODE","Name","structure_category","use_category","use_super_category","BioProcess","Z.class","fraction.active")
          rtable <- rbind(rtable,block)
        }
      }
      for(ibp in 1:nbioproc) {
        bioproc <- bioproc.list[ibp]
        assay.list <- ASSAY.INFO[is.element(ASSAY.INFO[,"biological_process"],bioproc),"assay"]
        if(length(assay.list)>2) {
          for(iu in 1:nuseclass) {
            useclass <- useclass.list[iu]
            code.list.useclass <- CHEMS[is.element(CHEMS[,"use_super_category"],useclass),"CODE"]
            code.list.useclass <- code.list.useclass[is.element(code.list.useclass,code.list)]
            temp <- Q[code.list.useclass,assay.list]
            
            if(!is.null(dim(temp))) {
              rs <- rowSums(temp)
              rs <- as.numeric(rs/dim(temp)[2])
              block <- as.data.frame(matrix(ncol=8,nrow=length(code.list.useclass)))
              
              name.list <- CHEMS[code.list.useclass,"Name"]
              structure.list <- CHEMS[code.list.useclass,"structure_category"]
              use.list <- CHEMS[code.list.useclass,"use_category"]
              block[,1] <- code.list.useclass
              block[,2] <- name.list
              block[,3] <- structure.list
              block[,4] <- use.list
              
              block[,5] <- use.sub(useclass)
              block[,6] <- bioproc
              block[,7] <- zclass
              block[,8] <- rs
              
              names(block) <- c("CODE","Name","structure_category","use_category","use_super_category","BioProcess","Z.class","fraction.active")
              rtable <- rbind(rtable,block)
            }
          }
        }
      }
    }
    RTABLE <<- rtable
    csv.file <- "../plots/bioprocess_use_z.xlsx"
    write.xlsx(RTABLE,csv.file)
  }
  
  cex <- 0.9
  pdf.file <- "../plots/bioprocess_quadrant_trellis_Z.pdf"
  if(to.file) pdf(file=pdf.file,width=7.5,height=7.5,pointsize=12,bg="white",paper="letter",pagecentre=T)
  par(mfrow=c(2,2),mar=c(6,4,1,1))
  for(bp in 0:nbioproc) {
    if(bp==0) {
      bioproc <- "All Assays"
      ymax <- 0.4
    }
    else {
      bioproc <- bioproc.list[bp]
      ymax <- 1.2
    }
    print(bioproc)
    temp <- RTABLE[is.element(RTABLE[,"BioProcess"],bioproc),]
    if(dim(temp)[1]>5) {
      tempxx <- temp[is.element(temp[,"Z.class"],"High-Z"),]
      fraction.hit <- tempxx[,"fraction.active"]
      groups <- tempxx[,"use_super_category"]
      main <- paste(bioproc,"High-Z")
      boxplot(fraction.hit~groups,xlab="",ylab="Fraction Assays Active / Chemical",cex.axis=cex,cex.lab=cex,ylim=c(0,ymax),main=main,las=2)
      
      for(g in 1:nuseclass) {
        gname <- useclass.plot.names[g]
        x <- fraction.hit[is.element(groups,"A.Food")]
        y <- fraction.hit[is.element(groups,gname)]
        p.lt <- wilcox.test(y,x,alternative="less")$p.value
        p.gt <- wilcox.test(y,x,alternative="greater")$p.value
        nchem <- length(y)
        pstring <- "NS"; col="black"
        if(is.nan(p.lt)) p.lt <- 1
        if(is.nan(p.gt)) p.gt <- 1
        if(min(p.lt,p.gt)<0.05) {
          if(p.lt<p.gt) {pstring <- format(p.lt,digits=2); col <- "blue" }
          else {pstring <- format(p.gt,digits=2); col <- "red" }
        }
        text(g,ymax,nchem,pos=1,cex=cex,col="black")
        text(g,ymax*(0.9-0.05*(g%%2)),pstring,pos=1,cex=cex,col=col)
      }
      
      temp <- RTABLE[is.element(RTABLE[,"BioProcess"],bioproc),]
      tempxx <- temp[is.element(temp[,"Z.class"],"Low-Z"),]
      fraction.hit <- tempxx[,"fraction.active"]
      groups <- tempxx[,"use_super_category"]
      main <- paste(bioproc,"Low-Z")
      boxplot(fraction.hit~groups,xlab="",ylab="Fraction Assays Active / Chemical",cex.axis=cex,cex.lab=cex,ylim=c(0,ymax),main=main,las=2)
      
      for(g in 1:nuseclass) {
        gname <- useclass.plot.names[g]
        x <- fraction.hit[is.element(groups,"A.Food")]
        y <- fraction.hit[is.element(groups,gname)]
        p.lt <- wilcox.test(y,x,alternative="less")$p.value
        p.gt <- wilcox.test(y,x,alternative="greater")$p.value
        nchem <- length(y)
        
        pstring <- "NS"; col="black"
        if(is.nan(p.lt)) p.lt <- 1
        if(is.nan(p.gt)) p.gt <- 1
        if(min(p.lt,p.gt)<0.05) {
          if(p.lt<p.gt) {pstring <- format(p.lt,digits=2); col <- "blue" }
          else {pstring <- format(p.gt,digits=2); col <- "red" }
        }
        text(g,ymax,nchem,pos=1,cex=cex,col="black")
        text(g,ymax*(0.9-0.05*(g%%2)),pstring,pos=1,cex=cex,col=col)
      }
      
      if(!to.file) browser()
    }
    
  }
  if(to.file) dev.off()
}
use.sub <- function(useclass) {
  if(useclass=="FoodFlavorFragrance") return("A.Food")
  else if(useclass=="Solvent") return("B.Solvent")
  else if(useclass=="Surfactant") return("D.Surfactant")
  else if(useclass=="Other") return("C.Other")
  else if(useclass=="Pesticide") return("G.Pesticide")
  else if(useclass=="Microbicide") return("F.Microbicide")
  else if(useclass=="Herbicide") return("E.Herbicide")
  else if(useclass=="Pharmaceutical") return("H.Drug")
  else {
    print(useclass)
    return (useclass)
  }
}
#--------------------------------------------------------------------------------------
#
# bioclass analysis
#
#--------------------------------------------------------------------------------------
bioclass.hm <- function(to.file=F) {
  print.current.function()
  
  bp.list <- c("proliferation decrease","cytotoxicity SRB","cytotoxicity BLA","apoptosis up","oxidative stress up","ER stress","heat shock","mitochondrial disruption up","microtubule up")                                       
  bp.list.names <- c("Proliferation decrease","Cytotoxicty BLA","Cytotoxicity SRB","Apoptosis Induction","Oxidative Stress","ER Stress","Heat Shock","Mitochondrial Disruption","Microtubule Disruption")   
  bp.colors <- c("orange","red","violet","blue","gray","yellow","green","black","cyan")
  assay.list <- NULL
  assay.bp <- NULL
  assay.color <- NULL
  nbp <- length(bp.list)
  for(i in 1:nbp) {
    bp <- bp.list[i]
    bpcol <- bp.colors[i]
    assays <- ASSAY.INFO[is.element(ASSAY.INFO[,"biological_process"],bp),"assay"]
    assay.list <- c(assay.list,assays)
    temp <- assays
    temp[] <- bp
    assay.bp <- c(assay.bp,temp)
    temp <- assays
    temp[] <- bpcol
    assay.color <- c(assay.color,temp)
  }
  rs <- rowSums(MAT.tested)
  code.list <- CODE.LIST[rs>500]
  code.list.ctx <- CYTOTOX[CYTOTOX[,"cytotox_median_um"]<1000,"CODE"]
  code.list.noctx <- CYTOTOX[CYTOTOX[,"cytotox_median_um"]>=1000,"CODE"]
  code.list.ctx <- code.list.ctx[is.element(code.list.ctx,code.list)]
  code.list.noctx <- code.list.noctx[is.element(code.list.noctx,code.list)]
  
  z1 <- MAT.Z.NORM[code.list.ctx,assay.list]
  z1 <- MAT.Z.NORM[code.list,assay.list]
  z <- z1
  z[is.na(z)] <- -1000000
  z[z<=3] <- 1
  z[z>3] <- 2
  z[is.na(z1)] <- 0
  
  if(to.file) {
    file <- paste("../plots/bioclass_hm.pdf",sep="")
    pdf(file=file,width=8,height=8,pointsize=12,bg="white",paper="letter",pagecentre=T)
  }
  result <- heatmap(t(as.matrix(z)),margins=c(5,10),scale="none",main="",
                    xlab="",ylab="",cexCol=0.005,cexRow=0.5,col=brewer.pal(9,"Reds"),
                    hclustfun=function(x) hclust(d=dist(x),method="ward.D"),keep.dendro=T,verbose=F,
                    RowSideColors=assay.color,Rowv=NA)
  
  dmat <- as.matrix(z)
  cl <- hclust(d=dist(dmat),method="ward.D")
  hcut <- 1
  nlevel <- 50
  clcut <- cutree(cl,h=hcut)
  clout <- cbind(clcut,clcut)
  clout <- cbind(clout,clcut)
  clout <- cbind(clout,clcut)
  clout <- cbind(clout,clcut)
  clout <- cbind(clout,clcut)
  clout<- as.data.frame(clout)
  for(i in 1:length(clcut)) {
    code <- rownames(clout)[i]
    clout[i,1] <- code
    clout[i,2] <- CHEMS[code,"Name"]
    clout[i,3] <- CHEMS[code,"use_category"]
    clout[i,4] <- CHEMS[code,"use_super_category"]
    clout[i,5] <- CHEMS[code,"structure_category"]
  }
  names(clout) <- c("CODE","Name","use_category","use_super_category","structure_category","Level_1")
  
  cat("Finished preping clusters for hcut: ",hcut,"\n")
  flush.console()
  
  for(hcut in 2:nlevel) {
    clcut <- cutree(cl,h=hcut)
    clout <- cbind(clout,clcut)
    names(clout)[dim(clout)[2]] <- paste("Level_",hcut,sep="")
    cat("Finished preping clusters for hcut: ",hcut,"\n")
    flush.console()
  }
  
  clout <- cbind(clout,z)
  
  fname <- paste("../output/cell_stress_cytotox_clusters.xlsx",sep="")
  write.xlsx(clout,fname)
  
  if(to.file) dev.off()
  else browser()
}
#--------------------------------------------------------------------------------------
#
# plot the number of low z hit in cell-free vs. cell-based
#
#--------------------------------------------------------------------------------------
cellfree.vs.cellbased <- function(to.file=F) {
  print.current.function()
  if(to.file) {
    fname <- "../plots/cellfree_vs_cellbased.pdf"
    pdf(file=fname,width=8,height=7,pointsize=12,bg="white",paper="letter",pagecentre=T)
  }
  par(mfrow=c(1,1),mar=c(4,4,2,2))
  
  source.list <- sort(uniquify(ASSAY.INFO[,"source_group"]))
  nsource <- length(source.list)
  
  
  fname <- paste("../physchem/toxcast_physchem_cellstress_DFT.xlsx",sep="")
  pchem <- read.xlsx(fname)
  pcode <- pchem[,"CODE"]	
  rownames(pchem) <- pchem[,"CODE"]
  pchem.names <- names(pchem)[29:dim(pchem)[2]]
  npchem <- length(pchem.names)
  code.list <- CODE.LIST[is.element(CODE.LIST,pcode)]
  
  pchem.list <- c("SASA","volume","mol.MW","QPlogPo.w","Marvin.logD.pH7.3","DFT.EA","DFT.softness","DFT.hardness","DFT.polarizability")
  pchem.color <- pchem.list
  pchem.color[] <- "purple"
  pchem1 <- pchem[code.list,pchem.list]
  np <- length(pchem.list)
  for(i in 1:np) {
    varname <- pchem.list[i]
    temp <- pchem1[,varname]
    if(varname=="DFT.hardness") temp <- -temp
    temp1 <- temp
    temp1[] <- 0
    cutoff <- as.numeric(quantile(temp, probs=seq(0,1,0.1),na.rm=T)[10])
    temp1[temp>cutoff] <- 1
    pchem1[,pchem.list[i]] <- temp1
  }
  
  Z <- MAT.Z.NORM
  H <- MAT.hitcall
  tested <- MAT.tested
  rs <- rowSums(tested)
  Z <- Z[rs>500,]
  H <- H[rs>500,]
  tested <- tested[rs>500,]
  
  Z[tested==0] <- NA
  Z[H==0] <- NA
  
  Z[is.na(Z)] <- 1000000
  Z[Z>3] <- 1000000
  Z[Z<=3] <- 1
  Z[Z>10000] <- 0
  rs <- rowSums(Z)
  cs <- colSums(Z)
  Z <- Z[rs>0,cs>0]
  
  assay.list.cellbased <- NULL
  assay.list.cellfree <- NULL
  
  for(i in 1:nsource) {
    source <- source.list[i]
    assay.list <- ASSAY.INFO[is.element(ASSAY.INFO[,"source_group"],source),"assay"]
    if(is.element(source,c("NVS_ADME","NVS_ADME_Activator","NVS_ENZ","NVS_ENZ_Activator","NVS_GPCR","NVS_IC","NVS_MP","NVS_NR","NVS_TR"))) {
      assay.list.cellfree <- c(assay.list.cellfree,assay.list)		
    }
    else {
      assay.list.cellbased <- c(assay.list.cellbased,assay.list)
    }
  }
  assay.list.cellfree <- assay.list.cellfree[is.element(assay.list.cellfree,names(Z))]
  assay.list.cellbased <- assay.list.cellbased[is.element(assay.list.cellbased,names(Z))]
  Z.cellfree <- Z[,assay.list.cellfree]
  Z.cellbased <- Z[,assay.list.cellbased]
  n.cellbased <- rowSums(Z.cellbased)
  n.cellfree <- rowSums(Z.cellfree)
  
  nx <- dim(Z.cellfree)[1]
  ny <- dim(Z.cellbased)[1]
  f.cellbased <- as.double(n.cellbased) / nx
  f.cellfree <- as.double(n.cellfree) / ny
  
  plot(f.cellbased~f.cellfree,xlab="Fraction Hits Z<3 Cell-free",ylab="Fraction Hits Z<3 Cell-based",
       xlim=c(0,0.3),ylim=c(0,0.3),cex.lab=1.1,cex.axis=1.1)
  lines(c(0,0.2),c(0,0.2))
  lines(c(0.2,0.2),c(0,0.5))
  
  text(-5,180,paste("Cell-free assays: ",dim(Z.cellfree)[2]),pos=4)
  text(-5,174,paste("Cell-based assays: ",dim(Z.cellbased)[2]),pos=4)
  
  x <- 100/nx
  x <- 0.22
  y <- 170/nx
  y <- 0.05
  dy <- 5/nx
  points(x,y,bg="red",pch=21,cex=1.5); text(x,y,"Surfactant",pos=4);y <- y-dy
  points(x,y,bg="blue",pch=21,cex=1.5); text(x,y,"Microbicide",pos=4);y <- y-dy
  points(x,y,bg="white",pch=21,cex=1.5); text(x,y,"Pharmaceutical",pos=4);y <- y-dy
  points(x,y,bg="cyan",pch=21,cex=1.5); text(x,y,"Pesticide",pos=4);y <- y-dy
  points(x,y,bg="yellow",pch=21,cex=1.5); text(x,y,"Herbicide",pos=4);y <- y-dy
  points(x,y,bg="gray",pch=21,cex=1.5); text(x,y,"Other",pos=4);y <- y-dy
  
  counter <- 0
  for(i in 1:length(n.cellbased)) {
    code <- rownames(Z.cellfree)[i]
    cname <- CHEMS[code,"Name"]
    supercat <- CHEMS[code,"use_super_category"]
    x <- f.cellfree[i]
    y <- f.cellbased[i]
    pch <- 21
    if(supercat=="Surfactant") points(x,y,bg="red",pch=pch,cex=1.5)
    if(supercat=="Microbicide") points(x,y,bg="blue",pch=pch,cex=1.5)
    if(supercat=="Pharmaceutical") points(x,y,bg="white",pch=pch,cex=1.5)
    if(supercat=="Pesticide") points(x,y,bg="cyan",pch=pch,cex=1.5)
    if(supercat=="Herbicide") points(x,y,bg="yellow",pch=pch,cex=1.5)
    if(supercat=="Other") points(x,y,bg="gray",pch=pch,cex=1.5)
  }
  
  counter <- 0
  for(i in 1:length(n.cellbased)) {
    code <- rownames(Z.cellfree)[i]
    cname <- CHEMS[code,"Name"]
    supercat <- CHEMS[code,"use_super_category"]
    x <- f.cellfree[i]
    y <- f.cellbased[i]
    doit <- F
    if(x>0.14) doit <- T
    if(x>y && y>0.05) doit <- T
    if(y<0.05 && x>(y*1.5) && (supercat=="Pharmaceutical" || supercat=="Microbicide")) doit <- T
    if(doit) {
      counter <- counter+1
      if(counter==11) text(x,y,counter,pos=1,cex=0.8)  
      else if(counter==8) text(x,y,counter,pos=3,cex=0.8)  
      else if(counter==4) text(x,y,counter,pos=3,cex=0.8)  
      else text(x,y,counter,pos=4,cex=0.8)
      text(0.2,0.3-counter*0.01,paste(counter,":",cname),pos=4,cex=0.6)
    }
  }
  if(to.file) dev.off()
  else browser()
}
#--------------------------------------------------------------------------------------
#
# plot the range of Bruns Watson calls vs promiscuity
#
# QC=OK
#--------------------------------------------------------------------------------------
bruns.watson <- function(to.file=F) {
  print.current.function()
  file <- "../BrunsWatson/BrunsWatsonOutput.txt"
  bdata <- read.table(file,header=T,sep="\t",stringsAsFactors=F,quote="\"",comment="")
  print(dim(bdata))
  
  code.list <- CHEMS[is.element(CHEMS[,"Phase_I"],1),"CODE"]
  code.list <- c(code.list,CHEMS[is.element(CHEMS[,"Phase_II"],1),"CODE"])
  code.list <- sort(unique(code.list))
  nchem <- length(code.list)
  
  #nchem <- 100
  hf.hi <- vector(length=nchem,mode="numeric")
  hf.lo <- vector(length=nchem,mode="numeric")
  bw.class <- vector(length=nchem,mode="numeric")
  hf.hi[] <- NA
  hf.lo[] <- NA
  bw.class[] <- NA
  for(i in 1:nchem) {
    code <- code.list[i]
    temp <- MAT.Z.NORM[code,]
    temp <- temp[!is.na(temp)]
    
    temp.lo <- temp[temp<3]
    temp.hi <- temp[temp>5]
    n.lo <- length(temp.lo)
    n.hi <- length(temp.hi)
    tested <- MAT.tested[code,]
    tested[is.na(tested)] <- 0
    tested[tested<0] <- 0
    denom <- sum(tested)
    
    hf.hi[i] <- n.hi/denom
    hf.lo[i] <- n.lo/denom
    if(is.element(code,bdata[,"Title"])) {
      res <- bdata[is.element(bdata[,"Title"],code),"Result"]
      if(res=="clean") bw.class[i] <- 0
      if(res=="dirty") bw.class[i] <- 1
      if(res=="fail") bw.class[i] <- 2
    }
  }
  hf.hi <- hf.hi[!is.na(bw.class)]
  hf.lo <- hf.lo[!is.na(bw.class)]
  bw.class <- bw.class[!is.na(bw.class)]
  
  p.20.lo <- wilcox.test(hf.lo[bw.class==2],hf.lo[bw.class==0],alternative="greater")$p.value
  p.21.lo <- wilcox.test(hf.lo[bw.class==2],hf.lo[bw.class==1],alternative="greater")$p.value
  p.10.lo <- wilcox.test(hf.lo[bw.class==1],hf.lo[bw.class==0],alternative="greater")$p.value
  
  p.20.hi <- wilcox.test(hf.hi[bw.class==2],hf.hi[bw.class==0],alternative="greater")$p.value
  p.21.hi <- wilcox.test(hf.hi[bw.class==2],hf.hi[bw.class==1],alternative="greater")$p.value
  p.10.hi <- wilcox.test(hf.hi[bw.class==1],hf.hi[bw.class==0],alternative="greater")$p.value
  
  if(to.file) {
    fname <- paste("../plots/bruns_watson.pdf",sep="")
    pdf(file=fname,width=7,height=7,pointsize=12,bg="white",paper="letter",pagecentre=T)
  }
  par(mfrow=c(2,1),mar=c(4,4,2,2))
  ymax <- 0.5
  ytop <- 0.45
  delta <- 0.05
  
  boxplot(hf.lo~bw.class,xlab="Bruns-Watson Class",ylab="Hit Fraction Lo-Z",names=c("clean","dirty","fail"),cex.axis=1.2,cex.lab=1.2,ylim=c(0,ymax),main="Actual vs. Predicted Promiscuity, Gene Hits")
  text(1.2,ytop,paste("p(fail:clean):",format(p.20.lo,digits=3)),pos=4)
  text(2.2,ytop,paste("p(fail:dirty):",format(p.21.lo,digits=3)),pos=4)
  text(1.2,ytop-delta,paste("p(dirty:clean):",format(p.10.lo,digits=3)),pos=4)
  
  boxplot(hf.hi~bw.class,xlab="Bruns-Watson Class",ylab="Hit Fraction Hi-Z",names=c("clean","dirty","fail"),cex.axis=1.2,cex.lab=1.2,ylim=c(0,ymax),main="Actual vs. Predicted Promiscuity, Gene Hits")
  text(1.2,ytop,paste("p(fail:clean):",format(p.20.hi,digits=3)),pos=4)
  text(2.2,ytop,paste("p(fail:dirty):",format(p.21.hi,digits=3)),pos=4)
  text(1.2,ytop-delta,paste("p(dirty:clean):",format(p.10.hi,digits=3)),pos=4)
  
  if(to.file) graphics.off()
  else browser()
}
#--------------------------------------------------------------------------------------
#
# boxplots as a function of cytotox hits
#
#--------------------------------------------------------------------------------------
cytotox.boxplot <- function(to.file=F) {
  print.current.function()
  
  if(to.file) {
    fname <- paste("../plots/cytotox_boxplot.pdf",sep="")
    pdf(file=fname,width=7,height=7,pointsize=12,bg="white",paper="letter",pagecentre=T)
  }
  
  par(mfrow=c(1,1),mar=c(4,4,2,2))
  ymax <- 0.35
  code.list <- CODE.LIST
  cyto.list <- CYTOTOX.ASSAYS
  non.cyto.list <- ASSAY.LIST[!is.element(ASSAY.LIST,CYTOTOX.ASSAYS)]
  hitmat <- MAT.hitcall
  hitmat[is.na(hitmat)] <- 0
  hitmat[hitmat<0] <- 0
  hitmat[is.na(MAT.tested)] <- 0
  hitmat[MAT.tested==0] <- 0
  hitmat[hitmat>0] <- 1
  hitmat.cyto <- hitmat[code.list,cyto.list]
  hitmat.non.cyto <- hitmat[code.list,non.cyto.list]
  groups <- rowSums(hitmat.cyto)
  values <- rowSums(hitmat.non.cyto) / length(non.cyto.list)
  
  result <- lm(values~groups+0)
  intercept <- 0
  slope <- result[[1]][1]
  boxplot(values~groups,xlab="Number of Cytotoxity Hits",ylab="Fraction of Hits",cex.axis=1.2,cex.lab=1.2,ylim=c(0,ymax),main="")
  lines(c(0,40),c(intercept,intercept+40*slope))
  rms <- summary(result)[[8]]
  text(x=2,y=0.25,paste("R-squared =",format(rms,digits=2)),pos=4,cex=1.5)
  for(i in 0:max(groups)) {
    temp <- values[groups==i]
    text(i,0.1+slope*i,length(temp),pos=4)
  }
  if(!to.file) browser()
  
  hitmat <- MAT.logAC50
  hitmat[is.na(hitmat)] <- 6
  hitmat[is.na(MAT.tested)] <- 6
  hitmat[MAT.tested==0] <- 6
  hitmat <- hitmat[code.list,cyto.list]
  nchem <- dim(hitmat)[1]
  hit.count <- groups
  values <- NULL
  groups <- NULL
  for(i in 1:nchem) {
    if(hit.count[i]>0) {
      temp <- hitmat[i,]
      temp <- temp[temp<6]
      values <- c(values,temp)
      temp[] <- hit.count[i]
      groups <- c(groups,temp)
    }
  }
  boxplot(values~groups,xlab="Number of Cytotoxity Hits",ylab="log(AC50) for Cytotox assays",cex.axis=1.2,cex.lab=1.2,ylim=c(-3,3),main="")
  if(!to.file) browser()  
  
  if(to.file) graphics.off()
}
#--------------------------------------------------------------------------------------
#
# plot histograms of hits by chemical and assay
#
# QC=OK
#--------------------------------------------------------------------------------------
chemical.hit.dist.summary <- function(do.prep=T,to.file=F) {
  print.current.function()
  if(do.prep) {
    tested <- MAT.tested
    tested[tested<0] <- 0
    tested[is.na(tested)] <- 0
    
    hit <- MAT.hitcall
    hit[is.na(hit)] <- 0
    hit[hit>0] <- 1
    hit[hit<0] <- 0
    hit[tested==0] <- 0
    
    bot <- rowSums(tested)
    bot[bot==0] <- 1
    top.hit <- rowSums(hit)
    fraction.hit.by.chem <<- top.hit/bot
    
    bot <- colSums(tested)
    bot[bot==0] <- 1
    top.hit <- colSums(hit)
    fraction.hit.by.assay <<- top.hit/bot
  }
  if(to.file) {
    fname <- paste("../plots/chemical_hit_dist_summary.pdf",sep="")
    pdf(file=fname,width=7,height=10,pointsize=12,bg="white",paper="letter",pagecentre=T)
  }
  par(mfrow=c(2,1),mar=c(4,4,2,2))
  ylim <- 1
  breaks <- seq(0,ylim,by=0.02)
  hist(fraction.hit.by.chem,xlab="Fraction Hits",ylab="Chemicals",cex.axis=1.2,cex.lab=1.2,main="By Chemical",freq=T,breaks=breaks,ylim=c(0,1000))
  breaks <- seq(0,ylim,by=0.02)
  hist(fraction.hit.by.assay,xlab="Fraction Hits",ylab="Assays",cex.axis=1.2,cex.lab=1.2,main="By Assay",freq=T,breaks=breaks,ylim=c(0,500))
  
  if(to.file) graphics.off()
  else browser()
}
#--------------------------------------------------------------------------------------
#
# Assay summary table
# mode=ToxCast or E1K
# QC=OK
#--------------------------------------------------------------------------------------
assay.Zdist <- function(to.file=F) {
  print.current.function()
  if(to.file) {
    fname <- "../plots/assay_Zdist.pdf"
    pdf(file=fname,width=7,height=10,pointsize=12,bg="white",paper="letter",pagecentre=T)
  }
  par(mfrow=c(4,2),mar=c(4,4,2,2))
  
  z <- MAT.Z.NORM
  z[MAT.hitcall==0] <- NA
  z[MAT.Z==0] <- NA
  
  nassay <- length(ASSAY.LIST)
  ztemp <- as.vector(z)
  zdist <- ztemp[!is.na(ztemp)]
  ctemp <- MAT.hitcall
  ctemp[!is.na(MAT.hitcall)] <- 1
  ctemp[is.na(MAT.hitcall)] <- 0
  chem.mask <- rowSums(ctemp)
  chem.mask[chem.mask<200] <- 0
  chem.mask[chem.mask>0] <- 1
  
  for(i in 1:nassay) {
    assay <- ASSAY.LIST[i]
    
    source <- ASSAY.INFO[is.element(ASSAY.INFO[,"assay"],assay),"source_group"][1]
    print(assay)
    flush.console()
    if(is.na(source)) {
      cat("No source: ",assay,"\n")
      browser()
    }
    atemp <- MAT.AC50[,assay]
    atemp.disc <- MAT.hitcall[,assay]
    ztemp <- z[,assay]
    atemp <- atemp[chem.mask==1]
    atemp.disc <- atemp.disc[chem.mask==1]
    ztemp <- ztemp[chem.mask==1]
    
    test.mask <- atemp.disc
    test.mask[!is.na(test.mask)] <- 1
    test.mask[is.na(test.mask)] <- 0
    ntest <- sum(test.mask)
    
    hit.mask <- atemp.disc[test.mask==1]
    nhit <- sum(hit.mask)
    
    za <- z[,assay]
    mask <- za
    mask[] <- 1
    mask[is.na(za)] <- 0
    mask[za<3] <- 0
    
    if(nhit>0) {
      xmax <- 6
      xmin <- -6
      ztemp <- ztemp[!is.na(ztemp)]
      ztemp <- ztemp[ztemp> -5]
      ztemp <- ztemp[ztemp< 8]
      if(length(ztemp)>0) {
        if(max(ztemp)>6) xmax <- 8
        breaks <- seq(-6,xmax,by=0.2)
        x <- hist(ztemp,xlim=c(xmin,xmax),main=assay,cex.lab=1.2,cex.axis=1.2,ylab="Chemicals",xlab="Z-score",breaks=breaks,freq=T)
        ymax <- max(x$counts)
        eps <- 0.1
        text(-6,ymax*(1-1*eps),paste("ntry=",ntest,sep=""),pos=4)
        text(-6,ymax*(1-2*eps),paste("nhit=",nhit,sep=""),pos=4)
        lines(c(0,0),c(0,ymax/4),lwd=2,col="red")
        if(!to.file) browser()
      }
    }
  }
  if(to.file) graphics.off()
  else browser()
}
