#--------------------------------------------------------------------------------------
#
# toxcast_driver_v12.R - code to analyze the ToxCast data
#
# November 2014
# Richard Judson
#
# US EPA
# Questions, comments to: judson.richard@epa.gov, 919-541-3085
#
#--------------------------------------------------------------------------------------
options(java.parameters = "-Xmx1000m")
library(grDevices)
library(RColorBrewer)
library(stringr)
library(mixdist)
library(class)
library(lattice)
library(openxlsx)
source("utils.R")
source("diagnostics_v01.R")
#source("genescore_v01.R")
#source("pathway_v01.R")
#source("burst_v01.R")
#source("rat_cancer_model_ivive.R")
VARMATDIR <<- "input/varmats_141121/"
VARMATDATE <<- "141121"
#--------------------------------------------------------------------------------------
#
# Run all analyses
#
#--------------------------------------------------------------------------------------
run.all <- function(suffix=VARMATDATE,do.genescore=F) {
    load_and_scale(suffix)
    #run_diagnostics(do.prep=T)
    if(do.genescore) {
    	genescore_driver()
    	ref_chems()
    	ref_chems_summary()
    }
    #print_summary_stats()
}
#--------------------------------------------------------------------------------------
#
# load all of the data and build scaled matrices
#
# CHEMS - matrix of all chemical information
# CODE.LIST - list of unique chemical codes, rownames of all matrices
# NCHEM - number of chemicals
#
# ASSAY.INFO - assay matrix
# ASSAY.LIST - vector of unique assay names - colnames of all matrices
# NASSAY - number of assays
#
# CYTOTX - matrix of cytotox paramters for each chemical
#
# Matrices (NCHEM x NASSAY)
#
# MAT.AC50 (uM)
# MAT.AC50_loss
# MAT.ACB MAT.ACC  MAT.AC10
# MAT.hitcall
# MAT.logAC50 MAT.logAC50_loss
# MAT.min_conc  MAT.max_conc
# MAT.model
# MAT.Emax
# MAT.T
# MAT.T.SCALED (T scaled so that 95%-ile of hits have T=100%)
# MAT.tested
# MAT.W
# MAT.Z
# MAT.Z.NORM (shifted so htat first peak of Z distribution is at ~ 0
#
# Each time you use a matrix, you need to make sure that you are appropriately checking
# the tested and hitcall matrix
#
#--------------------------------------------------------------------------------------
load_and_scale <- function(suffix=VARMATDATE) {
	prep_matrices(suffix)
	load_assay_defs()
	source_Zdist(to.file=T,zmode="original")
	shift_zscore()
	scale_top_by_assay()
}
#--------------------------------------------------------------------------------------
#
# Create the full input AC50, T,B,Emax,... files, one row per chemical
#
#--------------------------------------------------------------------------------------
prep_matrices <- function(suffix=VARMATDATE) {
    cat("==========================================================================\n")
    cat("Prepare the matrices ...\n")
    cat("==========================================================================\n")
    cat("Read in chemical data ...\n")
    flush.console()
    file <- paste(VARMATDIR,"Chemical_Summary_",suffix,".csv",sep="")
    temp <- read.csv(file,header=T,stringsAsFactors=F,quote="\"",comment.char="")
    PIPELINE.SAMPLES <<- temp
    code.list <- sort(unique(temp[,"code"]))
    nchem <- length(code.list)
    chems <- as.data.frame(matrix(nrow=nchem,ncol=4))
    names(chems) <- c("CODE","CASRN","Name","DSSTox_GSID")
    chems[,"CODE"] <- code.list
    rownames(chems) <- chems[,"CODE"]
    for(i in 1:nchem) {
        code <- code.list[i]
        ctemp <- temp[is.element(temp[,"code"],code),]
        chems[code,"CODE"] <- code
        chems[code,"CASRN"] <- ctemp[1,"casn"]
        chems[code,"Name"] <- ctemp[1,"chnm"]
        chems[code,"DSSTox_GSID"] <- paste("DSSTox_",ctemp[1,"chid"],sep="")
    }
    PIPELINE.CHEMS <<- chems
    cat("Dimension of PIPELINE.CHEMS: ",dim(PIPELINE.CHEMS),"\n")
    flush.console()

    file <- paste(VARMATDIR,"AllResults_flags_",suffix,".csv",sep="")
    temp <- read.csv(file,header=T,stringsAsFactors=F,quote="\"",comment.char="")
    x <- temp[,"chnm"]
    y <- str_replace_all(x,"\"","")
    temp[,"chnm"] <- y
    ALL.FLAGS <<- temp

	load_old_chemical_defs()

    # create the unified chemicals set
    temp <- PIPELINE.CHEMS
    ntemp <- names(temp)
    temp <- cbind(temp,temp[,dim(temp)[2]])
    temp <- cbind(temp,temp[,dim(temp)[2]])
    temp <- cbind(temp,temp[,dim(temp)[2]])
    temp <- cbind(temp,temp[,dim(temp)[2]])
    temp <- cbind(temp,temp[,dim(temp)[2]])
    temp <- cbind(temp,temp[,dim(temp)[2]])
    temp <- cbind(temp,temp[,dim(temp)[2]])
    temp <- cbind(temp,temp[,dim(temp)[2]])
    temp <- cbind(temp,temp[,dim(temp)[2]])
    temp <- cbind(temp,temp[,dim(temp)[2]])

    ntemp <- c(ntemp,"target_gene","toxcast_reference_target_gene","use_category","use_super_category","structure_category","structure_super_category","Phase_I","Phase_II","E1K","Tox21")
    names(temp) <- ntemp
    temp[,"target_gene"] <- NA
    temp[,"toxcast_reference_target_gene"] <- NA
    temp[,"use_category"] <- NA
    temp[,"use_super_category"] <- NA
    temp[,"structure_category"] <- NA
    temp[,"structure_super_category"] <- NA
    temp[,"Phase_I"] <- NA
    temp[,"Phase_II"] <- NA
    temp[,"E1K"] <- NA
    temp[,"Tox21"] <- NA
    rownames(temp) <- temp[,"CODE"]
    NCHEM <<- dim(temp)[1]

    for(i in 1:NCHEM) {
        code <- temp[i,"CODE"]
        temp2 <- OLD.CHEMS[code,]
        if(dim(temp2)[1]==1) {
            temp[i,"target_gene"] <- temp2[1,"target_gene"]
            temp[i,"toxcast_reference_target_gene"] <- temp2[1,"toxcast_reference_target_gene"]
            temp[i,"use_category"] <- temp2[1,"use_category"]
            temp[i,"use_super_category"] <- temp2[1,"use_super_category"]
            temp[i,"structure_category"] <- temp2[1,"structure_category"]
            temp[i,"structure_super_category"] <- temp2[1,"structure_super_category"]
            temp[i,"Phase_I"] <- temp2[1,"Phase_I"]
            temp[i,"Phase_II"] <- temp2[1,"Phase_II"]
            temp[i,"E1K"] <- temp2[1,"E1K"]
            temp[i,"Tox21"] <- temp2[1,"Tox21"]
		}
        else {
            temp[i,"target_gene"] <- NA
            temp[i,"use_category"] <- "unknown"
            temp[i,"use_super_category"] <- "Other"
            temp[i,"structure_category"] <- "unknown"
            temp[i,"structure_super_category"] <- NA
            temp[i,"Phase_I"] <- 0
            temp[i,"Phase_II"] <- 0
            temp[i,"E1K"] <- 0
            temp[i,"Tox21"] <- 0
        }
    }

    x <- temp[,"Name"]
    y <- str_replace_all(x,"\"","")
    temp[,"Name"] <- y

    CHEMS <<- temp
    CODE.LIST <<- CHEMS[,"CODE"]

    outfile <- paste("input/ToxCast_Chems_Master_",suffix,".txt",sep="")
    write.table(CHEMS,file=outfile, row.names=F, append=FALSE, quote=T, sep = "\t")
    cat("CHEMS read in\n")
    flush.console()

    #
    # assays
    #
    cat("Read in assay data ...\n")

    file <- paste(VARMATDIR,"AllResults_cyto_dist_",suffix,".csv",sep="")
    temp <- read.csv(file,header=T,stringsAsFactors=F,quote="\"",comment.char="")
    temp <- temp[,2:11]
    names(temp) <- c("CASRN","Name","CODE","cytotox_median_raw","cytotox_mad","nhit","global_mad","cytotox_median_log", "cytotox_median_um","cytotox_lower_bound_um")
    rownames(temp) <- temp[,"CODE"]
    x <- temp[,"Name"]
    y <- str_replace_all(x,"\"","")
    temp[,"Name"] <- y
    CYTOTOX <<- temp

    file <- paste(VARMATDIR,"AllResults_tested_Matrix_",suffix,".csv",sep="")
    temp.tested <- read.csv(file,header=T,stringsAsFactors=F,quote="\"",comment.char="", row.names = 1)

    file <- paste(VARMATDIR,"AllResults_hitc_Matrix_",suffix,".csv",sep="")
    temp.hit <- read.csv(file,header=T,stringsAsFactors=F,quote="\"",comment.char="", row.names = 1)

    file <- paste(VARMATDIR,"AllResults_modl_ga_Matrix_",suffix,".csv",sep="")
    temp.log_ac50 <- read.csv(file,header=T,stringsAsFactors=F,quote="\"",comment.char="", row.names = 1)

    file <- paste(VARMATDIR,"AllResults_modl_ac10_Matrix_",suffix,".csv",sep="")
    temp.log_ac10 <- read.csv(file,header=T,stringsAsFactors=F,quote="\"",comment.char="", row.names = 1)

    file <- paste(VARMATDIR,"AllResults_modl_acc_Matrix_",suffix,".csv",sep="")
    temp.log_acc <- read.csv(file,header=T,stringsAsFactors=F,quote="\"",comment.char="", row.names = 1)

    file <- paste(VARMATDIR,"AllResults_modl_acb_Matrix_",suffix,".csv",sep="")
    temp.log_acb <- read.csv(file,header=T,stringsAsFactors=F,quote="\"",comment.char="", row.names = 1)

    file <- paste(VARMATDIR,"AllResults_modl_la_Matrix_",suffix,".csv",sep="")
    temp.log_loss_ac50 <- read.csv(file,header=T,stringsAsFactors=F,quote="\"",comment.char="", row.names = 1)

    file <- paste(VARMATDIR,"AllResults_max_med_Matrix_",suffix,".csv",sep="")
    temp.emax <- read.csv(file,header=T,stringsAsFactors=F,quote="\"",comment.char="", row.names = 1)

    file <- paste(VARMATDIR,"AllResults_modl_gw_Matrix_",suffix,".csv",sep="")
    temp.w <- read.csv(file,header=T,stringsAsFactors=F,quote="\"",comment.char="", row.names = 1)

    file <- paste(VARMATDIR,"AllResults_modl_lw_Matrix_",suffix,".csv",sep="")
    temp.loss_w <- read.csv(file,header=T,stringsAsFactors=F,quote="\"",comment.char="", row.names = 1)

    file <- paste(VARMATDIR,"AllResults_modl_tp_Matrix_",suffix,".csv",sep="")
    temp.t <- read.csv(file,header=T,stringsAsFactors=F,quote="\"",comment.char="", row.names = 1)

    file <- paste(VARMATDIR,"AllResults_modl_Matrix_",suffix,".csv",sep="")
    temp.modl <- read.csv(file,header=T,stringsAsFactors=F,quote="\"",comment.char="", row.names = 1)

    file <- paste(VARMATDIR,"AllResults_zscore_Matrix_",suffix,".csv",sep="")
    temp.z <- read.csv(file,header=T,stringsAsFactors=F,quote="\"",comment.char="", row.names = 1)

    file <- paste(VARMATDIR,"AllResults_logc_min_Matrix_",suffix,".csv",sep="")
    temp.log_cmin <- read.csv(file,header=T,stringsAsFactors=F,quote="\"",comment.char="", row.names = 1)

    file <- paste(VARMATDIR,"AllResults_logc_max_Matrix_",suffix,".csv",sep="")
    temp.log_cmax <- read.csv(file,header=T,stringsAsFactors=F,quote="\"",comment.char="", row.names = 1)

    cat("all assay files read in\n")
    flush.console()

    temp.log_ac10[is.na(temp.log_ac10)] <- 6
    temp.log_ac50[is.na(temp.log_ac50)] <- 6
    temp.log_loss_ac50[is.na(temp.log_loss_ac50)] <- 6
    temp.log_acc[is.na(temp.log_acc)] <- 6
    temp.log_acb[is.na(temp.log_acb)] <- 6
    temp.w[is.na(temp.w)] <- 0
    temp.emax[is.na(temp.emax)] <- 0
    temp.loss_w[is.na(temp.loss_w)] <- 0
    temp.t[is.na(temp.t)] <- 0
    temp.z[is.na(temp.z)] <- 0
    temp.modl[is.na(temp.modl)] <- 0
    temp.log_cmax[is.na(temp.log_cmax)] <- 0
    temp.log_cmin[is.na(temp.log_cmin)] <- 0
    temp.tested[is.na(temp.tested)] <- 0
    cat("NA fixed\n")
    flush.console()

    temp.log_ac10[temp.hit<=0] <- 6
    temp.log_ac50[temp.hit<=0] <- 6
    temp.log_loss_ac50[temp.hit<=0] <- 6
    temp.log_acc[temp.hit<=0] <- 6
    temp.log_acb[temp.hit<=0] <- 6
    temp.w[temp.hit<=0] <- 0
    temp.loss_w[temp.hit<=0] <- 0
    temp.t[temp.hit<=0] <- 0
    cat("hit matrix applied\n")

    flush.console()
	temp.ac10 <- 10**(temp.log_ac10)
	temp.ac50 <- 10**(temp.log_ac50)
	temp.loss_ac50 <- 10**(temp.log_loss_ac50)
	temp.acc <- 10**(temp.log_acc)
	temp.acb <- 10**(temp.log_acb)
	temp.cmax <- 10**(temp.log_cmax)
	temp.cmin <- 10**(temp.log_cmin)
    cat("exponentiation\n")
	flush.console()

	temp.ac10[temp.tested<=0] <- NA
	temp.ac50[temp.tested<=0] <- NA
    temp.acc[temp.tested<=0] <- NA
    temp.acb[temp.tested<=0] <- NA
    temp.loss_ac50[temp.tested<=0] <- NA
    temp.log_ac10[temp.tested<=0] <- NA
    temp.log_acc[temp.tested<=0] <- NA

    temp.emax[temp.tested<=0] <- NA
    temp.w[temp.tested<=0] <- NA
    temp.loss_w[temp.tested<=0] <- NA
    temp.t[temp.tested<=0] <- NA
    temp.modl[temp.tested<=0] <- NA
    temp.z[temp.tested<=0] <- NA
    temp.hit[temp.tested<=0] <- NA
    temp.cmax[temp.tested<=0] <- NA
    temp.cmin[temp.tested<=0] <- NA

    temp.log_loss_ac50[temp.tested<=0] <- NA
    temp.log_ac50[temp.tested<=0] <- NA

    cat("test matrix applied\n")
    flush.console()

	MAT.AC50 <<- temp.ac50
	MAT.logAC50 <<- temp.log_ac50
    cat("AC50: ",dim(MAT.AC50),"\n")

	MAT.AC50_loss <<- temp.loss_ac50
	MAT.logAC50_loss <<- temp.log_loss_ac50
    cat("AC50_loss: ",dim(MAT.AC50_loss),"\n");flush.console()

	MAT.Emax <<- temp.emax
    cat("Emax: ",dim(MAT.Emax),"\n");flush.console()

	MAT.model <<- temp.modl
    cat("model: ",dim(MAT.model),"\n");flush.console()

    MAT.hitcall <<- temp.hit
    cat("hitcall: ",dim(MAT.hitcall),"\n");flush.console()

    MAT.T <<- temp.t
    cat("T: ",dim(MAT.T),"\n");flush.console()

    MAT.W <<- temp.w
    cat("W: ",dim(MAT.W),"\n");flush.console()

    MAT.Z <<- temp.z
    cat("Z: ",dim(MAT.Z),"\n");flush.console()

    MAT.min_conc <<- temp.cmin
    cat("min_conc: ",dim(MAT.min_conc),"\n");flush.console()

    MAT.max_conc <<- temp.cmax
    cat("min_conc: ",dim(MAT.max_conc),"\n");flush.console()

    MAT.AC10 <<- temp.ac10
    cat("AC10: ",dim(MAT.AC10),"\n");flush.console()

    MAT.ACC <<- temp.acc
    cat("ACC: ",dim(MAT.ACC),"\n");flush.console()

    MAT.ACB <<- temp.acb
    cat("ACB: ",dim(MAT.ACB),"\n");flush.console()

    MAT.tested <<- temp.tested
    cat("tested: ",dim(MAT.tested),"\n");flush.console()

	CODE.LIST <<- rownames(MAT.AC50)
	NCHEM <<- length(CODE.LIST)
	CHEMS <<- CHEMS[CODE.LIST,]

    ASSAY.LIST <<- colnames(MAT.AC50)
    NASSAY <<- length(ASSAY.LIST)

    file <- "input/gene_family.csv"
    gf <- read.csv(file,header=T,stringsAsFactors=F,quote="\"",comment.char="")
    GENE.FAMILY <<- gf
}
#--------------------------------------------------------------------------------------
#
# reload the assay definitions
#
#--------------------------------------------------------------------------------------
load_assay_defs <- function() {
	file <- paste(VARMATDIR,"Assay_Summary_modified_150320.xlsx",sep="")
	temp.assay <- read.xlsx(file)
	rownames(temp.assay) <- temp.assay[,"Assay"]
	temp.assay <- temp.assay[ASSAY.LIST,]
	ASSAY.INFO <<- temp.assay
}
#--------------------------------------------------------------------------------------
#
# reload the chemical definitions
#
#--------------------------------------------------------------------------------------
load_old_chemical_defs <- function() {
	file <- "input/ToxCast_GenericChemicals_2014_11_24.csv"
	temp.chems <- read.csv(file,header=T,stringsAsFactors=F,quote="\"",comment.char="")
	rownames(temp.chems) <- temp.chems[,"CODE"]
	OLD.CHEMS <<- temp.chems
}
#--------------------------------------------------------------------------------------
#
# z-score: shift the data
#
# QC=OK
#--------------------------------------------------------------------------------------
shift_zscore <- function() {
    cat("==========================================================================\n")
    cat("shift zscore\n")
    cat("==========================================================================\n")
    flush.console()
    file <- "output/source_Zshifts_original.txt"

    zshift <- read.table(file,header=T,sep="\t",stringsAsFactors=F,quote="",comment="")
    ztemp <- MAT.Z
    namat <- ztemp
    namat[] <- 0
    namat[is.na(ztemp)] <- 1
    namat[is.na(MAT.tested)] <- 1
    namat[is.na(MAT.hitcall)] <- 1
    namat[MAT.tested<=0] <- 1
    namat[MAT.hitcall<=0] <- 1

    assay.list <- names(MAT.Z)
    nassay <- length(assay.list)
    for(i in 1:nassay) {
        assay <- assay.list[i]
        source <- ASSAY.INFO[is.element(ASSAY.INFO[,"Assay"],assay),"Source"]
        shift <- zshift[is.element(zshift[,"Source"],source),"Center1"]
        if(length(shift)==0) shift <- 0
        cat(assay,":",source,":",shift,"\n")
        ztemp[,assay] <- ztemp[,assay] - shift
    }
    ztemp[namat==1] <- NA
    MAT.Z.NORM <<- ztemp
    outfile <- "output/zscore_matrix_norm.txt"
    write.table(ztemp,file=outfile, row.names=T, append=FALSE, quote=F, sep = "\t")
}
#--------------------------------------------------------------------------------------
#
# Scale the top
#
#--------------------------------------------------------------------------------------
scale_top_by_assay <- function() {
    cat("==========================================================================\n")
    cat("scale_top_by_assay\n")
    cat("==========================================================================\n")

	tscale <- MAT.T

	for(i in 1:NASSAY) {
		assay <- ASSAY.INFO[i,"Assay"]
		source <- ASSAY.INFO[i,"Source"]
		scaler <- 0
		if(substr(source,1,3)=="NVS") scaler <- 1
		else if(substr(source,1,4)=="ACEA") scaler <- 1
		else if(substr(source,1,5)=="Tox21") scaler <- 1
		else if(substr(source,1,2)=="OT") scaler <- 1
		else {
			temp <- MAT.T[,assay]
			temp[temp>200] <- 200
			tested <- MAT.tested[,assay]
			hits <- MAT.hitcall[,assay]
			temp[temp<0] <- 0
			temp[is.na(temp)] <- -1
			temp[tested==0] <- -1
			temp[hits==0] <- -1
			tlist <- as.numeric(temp[temp>0])
			scaler <- 100/quantile(tlist,probs=seq(0,1,0.05))[20]
		}
		if(is.na(scaler)) scaler <- 1
		if(scaler>200) scaler <- 200
		tscale[,assay] <- MAT.T[,assay]*scaler
		if(scaler!=1) {
			cat(assay,":",scaler,"\n")
			flush.console()
		}
	}
	tscale[tscale>200] <- 200
	MAT.T.SCALED <<- tscale
}
#--------------------------------------------------------------------------------------
#
# print the summary stats
#
#--------------------------------------------------------------------------------------
print_summary_stats <- function() {
    cat("==========================================================================\n")
    cat("print_summary_stats\n")
    cat("==========================================================================\n")
    file <- "output/toxcast_summary_stats.txt"
    s <- "===================================================\n"
	s <- paste(s,"ToxCast Summary Statistics\n",sep="")
    s <- paste(s,"===================================================\n",sep="")
    s <- paste(s,"Number of assays: ",dim(MAT.AC50)[2],"\n",sep="")
    s <- paste(s,"Number of chemicals: ",dim(MAT.AC50)[1],"\n",sep="")

    ctemp <- CHEMS[,c("CODE","Phase_I","Phase_II")]
    mask <- ctemp[,"Phase_I"]+ctemp[,"Phase_II"]
    code.list <- ctemp[mask>0,"CODE"]
    s <- paste(s,"Number of chemicals in Phase I,II: ",length(code.list),"\n",sep="")
    s <- paste(s,"===================================================\n\n",sep="")
    s <- paste(s,"stats on different chemical use categories\n",sep="")
    s <- paste(s,"===================================================\n",sep="")
    use.list <- sort(unique(CHEMS[,"use_category"]))
    s <- paste(s,"Number of use categories:",length(use.list),"\n\n",sep="")
    for(i in 1:length(use.list)) {
        temp <- CHEMS[is.element(CHEMS[,"use_category"],use.list[i]),]
        s <- paste(s,use.list[i],"\t",dim(temp)[1],"\n",sep="")
    }

    s <- paste(s,"===================================================\n",sep="")
    s <- paste(s,"stats on different chemical use supercategories\n",sep="")
    use.list <- sort(unique(CHEMS[,"use_super_category"]))
    s <- paste(s,"Number of use super_categories: ",length(use.list),"\n\n",sep="")
    for(i in 1:length(use.list)) {
        temp <- CHEMS[is.element(CHEMS[,"use_super_category"],use.list[i]),]
        s <- paste(s,use.list[i],"\t",dim(temp)[1],"\n",sep="")
    }

    s <- paste(s,"===================================================\n",sep="")
    s <- paste(s,"stats on different chemical structure categories\n",sep="")
    use.list <- sort(unique(CHEMS[,"structure_category"]))
    s <- paste(s,"Number of structure categories: ",length(use.list),"\n\n",sep="")
    for(i in 1:length(use.list)) {
        temp <- CHEMS[is.element(CHEMS[,"structure_category"],use.list[i]),]
        s <- paste(s,use.list[i],"\t",dim(temp)[1],"\n",sep="")
    }

    s <- paste(s,"===================================================\n",sep="")
    s <- paste(s,"stats on different chemical structure supercategories classes\n",sep="")
    use.list <- sort(unique(CHEMS[,"structure_super_category"]))
    s <- paste(s,"Number of structure super_categories: ",length(use.list),"\n\n",sep="")
    for(i in 1:length(use.list)) {
        temp <- CHEMS[is.element(CHEMS[,"structure_super_category"],use.list[i]),]
        s <- paste(s,use.list[i],"\t",dim(temp)[1],"\n",sep="")
    }

    s <- paste(s,"===================================================\n",sep="")
    s <- paste(s,"stats on different assay sources\n",sep="")
    use.list <- sort(unique(ASSAY.INFO[,"Source"]))
    s <- paste(s,"Number of assay sources: ",length(use.list),"\n\n",sep="")
    for(i in 1:length(use.list)) {
        temp <- ASSAY.INFO[is.element(ASSAY.INFO[,"Source"],use.list[i]),]
        s <- paste(s,use.list[i],"\t",dim(temp)[1],"\n",sep="")
    }

    s <- paste(s,"===================================================\n",sep="")
    s <- paste(s,"stats on different assay genes\n",sep="")
    use.list <- sort(unique(ASSAY.INFO[,"intended_target"]))
    s <- paste(s,"Number of assay gene targets: ",length(use.list),"\n\n",sep="")
    for(i in 1:length(use.list)) {
        temp <- ASSAY.INFO[is.element(ASSAY.INFO[,"intended_target"],use.list[i]),]
        s <- paste(s,use.list[i],"\t",dim(temp)[1],"\n",sep="")
    }

    s <- paste(s,"===================================================\n",sep="")
    s <- paste(s,"stats on different assay biological processes\n",sep="")
    use.list <- sort(unique(ASSAY.INFO[,"biological_process"]))
    s <- paste(s,"Number of assay biological_processes: ",length(use.list),"\n\n",sep="")
    for(i in 1:length(use.list)) {
        temp <- ASSAY.INFO[is.element(ASSAY.INFO[,"biological_process"],use.list[i]),]
        s <- paste(s,use.list[i],"\t",dim(temp)[1],"\n",sep="")
    }

    s <- paste(s,"===================================================\n",sep="")
    s <- paste(s,"stats on total hits and hits in 4 quadrants\n",sep="")

    zmat <- MAT.Z.NORM[code.list,]
    tmat <- MAT.T.SCALED[code.list,]
    hitmat <- MAT.hitcall[code.list,]
    testmat <- MAT.tested[code.list,]

    testmat[is.na(testmat)] <- 0
    testmat[testmat<0] <- 0

    hitmat[is.na(hitmat)] <- 0
    hitmat[hitmat<0] <- 0
    hitmat[testmat==0] <- 0

    zmat[zmat<=0] <- 0.0001
    zmat[is.na(zmat)] <- 0
    zmat[testmat<=0] <- 0
    zmat[hitmat<=0] <- 0

    tmat[is.na(tmat)] <- 0
    tmat[testmat<=0] <- 0
    tmat[hitmat<=0] <- 0
    tmat[tmat<0] <- 0

    zmat.hi <- zmat
    zmat.hi[zmat.hi<3] <- 0
    zmat.hi[zmat.hi>0] <- 1

    zmat.lo <- zmat
    zmat.lo[zmat.lo>=3] <- 0
    zmat.lo[zmat.lo>0] <- 1

    tmat.hi <- tmat
    tmat.hi[tmat.hi<50] <- 0
    tmat.hi[tmat.hi>0] <- 1

    tmat.lo <- tmat
    tmat.lo[tmat.lo>=50] <- 0
    tmat.lo[tmat.lo>0] <- 1

    tall <- tmat
    tall[tall>0] <- 1
    zall <- zmat
    zall[zall>0] <- 1

    q1 <- tmat.hi*zmat.hi
    q2 <- tmat.lo*zmat.hi
    q3 <- tmat.hi*zmat.lo
    q4 <- tmat.lo*zmat.lo
    sq1 <- sum(q1)
    sq2 <- sum(q2)
    sq3 <- sum(q3)
    sq4 <- sum(q4)
    ntested <- sum(testmat)
    nhit <- sum(hitmat)
    nall <- dim(testmat)[1]*dim(testmat)[2]
    rq1 <- sq1 / nhit
    rq2 <- sq2 / nhit
    rq3 <- sq3 / nhit
    rq4 <- sq4 / nhit
    rtested <- ntested / nall
    rhit <- nhit / ntested
    stall <- sum(tall)
    szall <- sum(zall)
    rtall <- stall/nhit
    rzall <- szall/nhit

    s <- paste(s,"Total Cells: ",nall,"\n",sep="")
    s <- paste(s,"Tested:      ",ntested," : ",format(rtested,digits=2),"\n",sep="")
    s <- paste(s,"Hits:        ",nhit," : ",format(rhit,digits=2),"\n",sep="")
    s <- paste(s,"Z.hi x T.hi: ",sq1," : ",format(rq1,digits=2),"\n",sep="")
    s <- paste(s,"Z.hi x T.lo: ",sq2," : ",format(rq2,digits=2),"\n",sep="")
    s <- paste(s,"Z.lo x T.hi: ",sq3," : ",format(rq3,digits=2),"\n",sep="")
    s <- paste(s,"Z.lo x T.lo: ",sq4," : ",format(rq4,digits=2),"\n",sep="")

    
    
	s <- paste(s,"===================================================\n",sep="")
	s <- paste(s,"stats on total with and without cytotox\n",sep="")

	code.cytotox.no <- CYTOTOX[CYTOTOX[,"nhit"]<2,"CODE"]
	code.cytotox.yes <- CYTOTOX[CYTOTOX[,"nhit"]>=2,"CODE"]
	
	code.cytotox.no <- code.cytotox.no[is.element(code.cytotox.no,code.list)]
	code.cytotox.yes <- code.cytotox.yes[is.element(code.cytotox.yes,code.list)]
    s <- paste(s,"Number of chemicals without cytotox: ",length(code.cytotox.no),"\n",sep="")
    s <- paste(s,"Number of chemicals with cytotox:    ",length(code.cytotox.yes),"\n",sep="")
	
	hit.no <- MAT.hitcall[code.cytotox.no,]
	test.no <- MAT.tested[code.cytotox.no,]
	hit.no[is.na(hit.no)] <- 0
	test.no[is.na(test.no)] <- 0
	hit.no[hit.no<0] <- 0
	test.no[test.no<0] <- 0
	rs.hit.no <- rowSums(hit.no)
	rs.test.no <- rowSums(test.no)
	frac.no <- rs.hit.no/rs.test.no
	mean.no <- mean(frac.no)
	sd.no <- sd(frac.no)
    s <- paste(s,"Mean hit ratio and SD for chemicals without cytotox:  ",format(mean.no,digits=2)," : ",format(sd.no,digits=2),"\n",sep="")

	hit.yes <- MAT.hitcall[code.cytotox.yes,]
	test.yes <- MAT.tested[code.cytotox.yes,]
	hit.yes[is.na(hit.yes)] <- 0
	test.yes[is.na(test.yes)] <- 0
	hit.yes[hit.yes<0] <- 0
	test.yes[test.yes<0] <- 0
	rs.hit.yes <- rowSums(hit.yes)
	rs.test.yes <- rowSums(test.yes)
	frac.yes <- rs.hit.yes/rs.test.yes
	mean.yes <- mean(frac.yes)
	sd.yes <- sd(frac.yes)
    s <- paste(s,"Mean hit ratio and SD for chemicals with cytotox:     ",format(mean.yes,digits=2)," : ",format(sd.yes,digits=2),"\n",sep="")
    s <- paste(s,"===================================================\n",sep="")

    cat(file=file,s,append=F)
    cat(s)
}
