#--------------------------------------------------------------------------------------
#
# look at assay-assay correlation as a function of Z
#
# cutoff - this is the minimum similarity between two assays in the z-matrix the use in clustering
# threshold - this is the minimum fraction of the assays in a cluster to call a chemical positive for that cluster
# --------------------------------------------------------------------------------------
zcorr.all <- function(cutoff=0.5,threshold=0.75) {
	zcorr(zmin= -100,zmax=3,T,cutoff)
	zclust(zmin=0,zmax=3,cutoff)
	zclust.chem(zmin=0,zmax=3,cutoff,threshold,to.file=T)
	flush.console()

	zcorr(zmin=5,zmax=100,T,cutoff)
	zclust(zmin=5,zmax=100,cutoff)
	zclust.chem(zmin=5,zmax=100,cutoff,threshold,to.file=T)
	flush.console()
}
#--------------------------------------------------------------------------------------
#
# look at assay-assay correlation as a function of Z
#
# --------------------------------------------------------------------------------------
zcorr <- function(zmin=-100,zmax=1,to.file=F,cutoff) {

	if(!exists("MAT.AC50")) prepMatrices()
	if(!exists("MAT.Z.NORM")) {
	    file <- "output/zscore_matrix_norm.txt"
		MAT.Z.NORM <<- read.table(file,header=T,sep="\t",stringsAsFactors=F,quote="\"",comment.char="")
	}
	file <- "../input/AssayList.txt"
	assays <- read.table(file,header=T,sep="\t",stringsAsFactors=F,quote="\"",comment.char="")
	rownames(assays) <- assays[,"Assay"]
	assays <- assays[names(MAT.AC50),]

	amask <- assays[,"Type"]
	amask[] <- 1
	amask[assays[,"Type"]=="cytotox"] <- 0
	amask[assays[,"Type"]=="cytotox_fail"] <- 0
	MAT.Z.NORM <- MAT.Z.NORM[,amask==1]
	assay.list <- assays[amask==1,"Assay"]

    cfilter <- MAT.AC50[,amask==0]
	cfilter[!is.na(cfilter)] <- 1
	cfilter[is.na(cfilter)] <- 0
	rs <- rowSums(cfilter)

	MAT.Z.NORM <- MAT.Z.NORM[rs>10,]
	cat("Dimension of MAT.Z.NORM: ",dim(MAT.Z.NORM),"\n")
	MAT.Z.NORM[MAT.Z.NORM>zmax] <- NA
	MAT.Z.NORM[MAT.Z.NORM<zmin] <- NA
	MAT.Z.NORM[!is.na(MAT.Z.NORM)] <- 1
	MAT.Z.NORM[is.na(MAT.Z.NORM)] <- 0
	cat("sum of MAT.Z.NORM: ",sum(MAT.Z.NORM),"\n")
	rs <- rowSums(MAT.Z.NORM)
	MAT.Z.NORM <- MAT.Z.NORM[rs>2,]
	cs <- colSums(MAT.Z.NORM)
	MAT.Z.NORM <- MAT.Z.NORM[,cs>2]
	cat("Dimension of MAT.Z: ",dim(MAT.Z.NORM),"\n")

	temp <- MAT.Z.NORM
	dmat <- dist(t(temp),method="binary",diag=T,upper=T)
	simmat <- 1-as.matrix(dmat)
   	zmin.name <- max(zmin,0)
    fname <- paste("burst/sim_mat_",zmin.name,"_",zmax,".txt",sep="")
    write.table(simmat,file=fname, row.names=T, append=FALSE, quote=F, sep = "\t")

    if(to.file) {
		nd <- dim(simmat)[1]
		simmat.cast <- matrix(nrow=nd,ncol=5)
		temp <- simmat.cast
		for(i in 1:nd) {
			temp <- matrix(nrow=nd,ncol=5)
			temp[,1] <- colnames(simmat)[i]
			temp[,2] <- colnames(simmat)
			temp[,3] <- simmat[i,]
			temp[,4] <- zmin
			temp[,5] <- zmax
			temp <- temp[temp[,3]>0,]
			if(i==1) simmat.cast <- temp
			else simmat.cast <- rbind(simmat.cast,temp)
		}
		simmat.cast <- as.data.frame(simmat.cast)

		names(simmat.cast) <- c("Assay.1","Assay.2","Similarity","Zmin","Zmax")
	   	zmin.name <- max(zmin,0)
	    fname <- paste("burst/zcorr_long_",zmin.name,"_",zmax,".txt",sep="")
	    write.table(simmat.cast,file=fname, row.names=T, append=FALSE, quote=F, sep = "\t")
	}
    if(to.file) {
    	zmin.name <- max(zmin,0)
        fname <- paste("burst/zcorr_",zmin.name,"_",zmax,"_hm.pdf",sep="")
        pdf(file=fname,width=7,height=7,pointsize=12,bg="white",paper="letter",pagecentre=T)
        fname <- paste("burst/zcorr_",zmin.name,"_",zmax,".txt",sep="")
        write.table(simmat,file=fname, row.names=T, append=FALSE, quote=F, sep = "\t")
    }
    result <- heatmap(simmat,margins=c(10,10),scale="none",main=paste("Z-corr: ",zmin,":",zmax),
                      xlab="",ylab="",cexCol=0.25,cexRow=0.25,col=brewer.pal(9,"Reds"),symm=T,
                      hclustfun=function(x) hclust(d=dist(x),method="ward.D"),keep.dendro=T,verbose=T)

	if(to.file) dev.off()
	else browser()

	simmat.2 <- simmat
	simmat.2[simmat.2<cutoff] <- 0
	simmat.2[simmat.2>0] <- 1
	rs <- rowSums(simmat.2)
	simmat.2 <- simmat.2[rs>1,rs>1]

    if(to.file) {
    	zmin.name <- max(zmin,0)
        fname <- paste("burst/zcorr_",zmin.name,"_",zmax,"_discrete_hm_",cutoff,".pdf",sep="")
        pdf(file=fname,width=7,height=7,pointsize=12,bg="white",paper="letter",pagecentre=T)
        fname <- paste("burst/zcorr_",zmin.name,"_",zmax,"_discrete_",cutoff,".txt",sep="")
        write.table(simmat.2,file=fname, row.names=T, append=FALSE, quote=F, sep = "\t")
    }
    result <- heatmap(simmat.2,margins=c(10,10),scale="none",main=paste("Z-corr: ",zmin,":",zmax),
                      xlab="",ylab="",cexCol=0.25,cexRow=0.25,col=brewer.pal(9,"Reds"),symm=T,
                      hclustfun=function(x) hclust(d=dist(x),method="ward.D"),keep.dendro=T,verbose=T)

	if(to.file) dev.off()
	else browser()
}
#--------------------------------------------------------------------------------------
#
# do the clustering to get assays that are co-regulated
#
# --------------------------------------------------------------------------------------
zclust <- function(zmin=0,zmax=2,cutoff) {
    file <- paste("burst/sim_mat_",zmin,"_",zmax,".txt",sep="")
   	simmat <- read.table(file,header=T,sep="\t",stringsAsFactors=F,quote="",comment="")

	simmat.2 <- simmat
	simmat.2[simmat.2<cutoff] <- 0
	simmat.2[simmat.2>0] <- 1
	rs <- rowSums(simmat.2)
	simmat.2 <- simmat.2[rs>1,rs>1]

	nassay <- dim(simmat.2)[1]
	dmat <- as.matrix(1-simmat.2)
	hres <- hclust(as.dist(dmat),method="ward.D")
	result1 <- cutreeDynamic(hres,cutHeight=1,minClusterSize=1,method="hybrid",distM=dmat)
	output <- as.data.frame(cbind(names(simmat.2),result1))
	names(output) <- c("assay","cluster")
    file <- paste("burst/cluster_members_cutree_",zmin,"_",zmax,"_",cutoff,".txt",sep="")
    write.table(output,file=file, row.names=F, append=FALSE, quote=F, sep = "\t")
}
#--------------------------------------------------------------------------------------
#
# find the chemicals responsible for the clusters
#
# --------------------------------------------------------------------------------------
zclust.chem <- function(zmin=0,zmax=2,cutoff,threshold,to.file=F) {

    file <- paste("burst/cluster_members_cutree_",zmin,"_",zmax,"_",cutoff,".txt",sep="")
   	cassay <- read.table(file,header=T,sep="\t",stringsAsFactors=F,quote="",comment="")
	ncl <- max(cassay[,"cluster"])

	output <- NULL
	for(i in 1:ncl) {
		assay.set <- cassay[is.element(cassay[,"cluster"],i),"assay"]
		temp <- MAT.Z.NORM[,assay.set]
		dtemp <- temp
		if(zmin==0) {
			dtemp[is.na(dtemp)] <- 10000
			dtemp[dtemp>=zmax] <- 10000
			dtemp[dtemp<zmax] <- 1
			dtemp[dtemp>=zmax] <- 0
		}
		else {
			dtemp[is.na(dtemp)] <- 0
			dtemp[dtemp<=zmin] <- 0
			dtemp[dtemp>=zmax] <- 0
			dtemp[dtemp>0] <- 1
		}
		rs <- rowSums(dtemp)
		nmin <- threshold * dim(dtemp)[2]
		dtemp <- dtemp[rs>nmin,]

		if(dim(dtemp)[1]>1) {
			chem.list <- CHEMS[rownames(dtemp),]
			out.temp <- cbind(chem.list,chem.list[,1])
			out.temp[,dim(out.temp)[2]] <- i
			out.temp <- cbind(out.temp,chem.list[,1])
			out.temp[,dim(out.temp)[2]] <- paste(zmin," to ",zmax,sep="")
			output <- rbind(output,out.temp)

			if(to.file) {
				fname <- paste("burst/zclust_chem_",zmin,"_",zmax,"_hm_",cutoff,"_",threshold,"_",i,".pdf",sep="")
				pdf(file=fname,width=7,height=7,pointsize=12,bg="white",paper="letter",pagecentre=T)
			}
			result <- heatmap(as.matrix(dtemp),margins=c(10,10),scale="none",main=paste("Z-cluster-chem: ",zmin,":",zmax," cl: ",i),
							  labRow=chem.list[,"short_name"],xlab="",ylab="",cexCol=0.5,cexRow=0.5,col=brewer.pal(9,"Reds"),
							  hclustfun=function(x) hclust(d=dist(x),method="ward.D"),keep.dendro=T,verbose=T)

			if(to.file) dev.off()
			else browser()
		}
	}
	output <- as.data.frame(output)
	names(output) <- c(names(CHEMS),"cluster","z-range")
	fname <- paste("burst/zclust_chem_",zmin,"_",zmax,"_chemicals_",cutoff,"_",threshold,".txt",sep="")
    write.table(output,file=fname, row.names=F, append=FALSE, quote=F, sep = "\t")
}
#--------------------------------------------------------------------------------------
#
# find the chemicals responsible for the clusters
#
# --------------------------------------------------------------------------------------
zclust.summary <- function(cutoff=0.5,threshold=0.75,to.file=F) {
	zmin <- 0
	zmax <- 3
	fname <- paste("burst/zclust_chem_",zmin,"_",zmax,"_chemicals_",cutoff,"_",threshold,".txt",sep="")
   	temp.lo <- read.table(fname,header=T,sep="\t",stringsAsFactors=F,quote="",comment="")
   	mask <- temp.lo[,"Phase_I"]+temp.lo[,"Phase_II"]
   	temp.lo <- temp.lo[mask>0,]

	zmin <- 5
	zmax <- 100
	fname <- paste("burst/zclust_chem_",zmin,"_",zmax,"_chemicals_",cutoff,"_",threshold,".txt",sep="")
   	temp.hi <- read.table(fname,header=T,sep="\t",stringsAsFactors=F,quote="",comment="")
   	mask <- temp.hi[,"Phase_I"]+temp.hi[,"Phase_II"]
   	temp.hi <- temp.hi[mask>0,]

	for(i in 1:dim(temp.lo)[1]) temp.lo[i,"cluster"] <- paste("Lo-Z",temp.lo[i,"cluster"])
	for(i in 1:dim(temp.hi)[1]) temp.hi[i,"cluster"] <- paste("Hi-Z",temp.hi[i,"cluster"])

	temp <- rbind(temp.lo,temp.hi)
	ch.list <- sort(unique(temp[,"short_name"]))
	cl.list <- sort(unique(temp[,"cluster"]))
	nchem <- length(ch.list)
	ncl <- length(cl.list)
	mat <- as.data.frame(matrix(nrow=nchem,ncol=ncl))
	mat[] <- 0
	names(mat) <- cl.list
	rownames(mat) <- ch.list
	for(i in 1:dim(temp)[1]) {
		chem <- temp[i,"short_name"]
		cluster <- temp[i,"cluster"]
		mat[chem,cluster] <- 1
	}

	if(to.file) {
		fname <- paste("burst/zclust_chem_",zmin,"_",zmax,"_hm_",cutoff,"_",threshold,"_",i,".pdf",sep="")
		pdf(file=fname,width=7,height=10,pointsize=12,bg="white",paper="letter",pagecentre=T)
	}
	result <- heatmap(as.matrix(mat),margins=c(2,10),scale="none",main=paste("Chemicals x Clusters"),
					  xlab="",ylab="",cexCol=0.5,cexRow=0.5,col=brewer.pal(9,"Reds"),
					  hclustfun=function(x) hclust(d=dist(x),method="ward.D"),keep.dendro=T,verbose=F)

	ids <- "burst/entities.txt"
	s <- paste("entity\ttype\n",sep="")
	cat(file=ids,s,append=F)
	for(i in 1:dim(CHEMS)[1]) {
		s <- paste(CHEMS[i,"short_name"],"\tchemical\n",sep="")
		cat(file=ids,s,append=T)
	}
	for(i in 1:dim(ASSAYS)[1]) {
		s <- paste(ASSAYS[i,"Assay"],"\tassay\n",sep="")
		cat(file=ids,s,append=T)
	}
	gene.list <- sort(unique(ASSAYS[,"Gene_Process"]))
	for(i in 1:length(gene.list)) {
		s <- paste(gene.list[i],"\tgene\n",sep="")
		cat(file=ids,s,append=T)
	}

	for(i in 1:ncl) {
		type <- "Cluster Hi-Z"
		if(substr(cl.list[i],1,2)=="Lo") type <- "Cluster Lo-Z"
		s <- paste(cl.list[i],"\t",type,"\n",sep="")
		cat(file=ids,s,append=T)
	}

	sif <- "burst/clusters.sif"
	s <- paste("",sep="")
	cat(file=sif,s,append=F)
	for(i in 1:dim(temp)[1]) {
		chem <- temp[i,"short_name"]
		cluster <- temp[i,"cluster"]
		s <- paste(chem,"\tactivates\t",cluster,"\n",sep="")
		cat(file=sif,s,append=T)
	}

	for(i in 1:dim(ASSAYS)[1]) {
		assay <- ASSAYS[i,"Assay"]
		gene <- ASSAYS[i,"Gene_Process"]
		if(!is.na(gene)) {
			s <- paste(assay,"\tmeasures\t",gene,"\n",sep="")
			cat(file=sif,s,append=T)
		}
	}

	zmin <- 0
	zmax <- 3
	prefix <- "Lo-Z"
    file <- paste("burst/cluster_members_cutree_",zmin,"_",zmax,"_",cutoff,".txt",sep="")
   	cassay <- read.table(file,header=T,sep="\t",stringsAsFactors=F,quote="",comment="")
	ncl <- dim(cassay)[1]
	for(i in 1:ncl) {
		assay <- cassay[i,"assay"]
		cluster <- paste(prefix,cassay[i,"cluster"])
		s <- paste(assay,"\tis_member_of\t",cluster,"\n",sep="")
		cat(file=sif,s,append=T)
	}

	zmin <- 5
	zmax <- 100
	prefix <- "Hi-Z"
    file <- paste("burst/cluster_members_cutree_",zmin,"_",zmax,"_",cutoff,".txt",sep="")
   	cassay <- read.table(file,header=T,sep="\t",stringsAsFactors=F,quote="",comment="")
	ncl <- dim(cassay)[1]
	for(i in 1:ncl) {
		assay <- cassay[i,"assay"]
		cluster <- paste(prefix,cassay[i,"cluster"])
		s <- paste(assay,"\tis_member_of\t",cluster,"\n",sep="")
		cat(file=sif,s,append=T)
	}


	if(to.file) dev.off()
	else browser()
}
#--------------------------------------------------------------------------------------
#
# find the chemicals responsible for the clusters
#
# --------------------------------------------------------------------------------------
WxZ <- function(zcut1=3,zcut2=5,to.file=F) {

	nassay <- length(ASSAY.LIST)
	result <- matrix(nrow=nassay,ncol=4)
	rownames(result) <- ASSAY.LIST
	result[] <- 0
	color <- vector(length=nassay,mode="character")

	z1 <- MAT.Z.NORM
	z1[z1>=zcut2] <- 10
	z1[z1<=zcut1] <- 1
	z1[is.na(z1)] <- 100
	for(i in 1:nassay) {
		assay <- ASSAY.LIST[i]
		w1.i <- MAT.W[,assay]
		z1.i <- z1[,assay]

		val.in <- mean(w1.i[z1.i==1])
		sd.in <- sd(w1.i[z1.i==1])
		val.out <- mean(w1.i[z1.i==10])
		sd.out <- sd(w1.i[z1.i==10])
		result[i,1] <- val.in
		result[i,2] <- sd.in
		result[i,3] <- val.out
		result[i,4] <- sd.out

		source <- ASSAYS[assay,"Source"]
		if(source=="ACEA") color[i] <- "orange"
		if(source=="Apredica_down") color[i] <- "yellow"
		if(source=="Apredica_up") color[i] <- "yellow"
		if(source=="Odyssey Thera") color[i] <- "green"
		if(source=="Attagene_cis") color[i] <- "blue"
		if(source=="Attagene_trans") color[i] <- "blue"
		if(source=="BioSeek_down") color[i] <- "violet"
		if(source=="BioSeek_up") color[i] <- "violet"
		if(source=="Tox21_LUC") color[i] <- "gray"
		if(source=="Tox21_BLA") color[i] <- "gray"
		if(source=="Tox21_LUC_viability") color[i] <- "gray"
		if(source=="Tox21_BLA_viability") color[i] <- "gray"
		if(source=="Novascreen_ADME") color[i] <- "red"
		if(source=="Novascreen_ADME_act") color[i] <- "red"
		if(source=="Novascreen_ENZ") color[i] <- "red"
		if(source=="Novascreen_ENZ_act") color[i] <- "red"
		if(source=="Novascreen_GPCR") color[i] <- "red"
		if(source=="Novascreen_IC") color[i] <- "red"
		if(source=="Novascreen_Misc") color[i] <- "red"
		if(source=="Novascreen_NR") color[i] <- "red"
		if(source=="Novascreen_TR") color[i] <- "red"
	}
	if(to.file) {
		fname <- paste("plots/WxZ.pdf",sep="")
		pdf(file=fname,width=7,height=7,pointsize=12,bg="white",paper="letter",pagecentre=T)
	}

	mask <- vector(length=nassay,mode="integer")
	mask[] <- 1
	mask[is.na(result[,1])] <- 0
	mask[is.na(result[,2])] <- 0
	mask[is.na(result[,3])] <- 0
	mask[is.na(result[,4])] <- 0
	x <- result[mask==1,1]
	y <- result[mask==1,3]

	sd.x <- result[mask==1,2]
	sd.y <- result[mask==1,4]

	color <- color[mask==1]

	plot(y~x,type="p",xlim=c(0,8),ylim=c(0,8),xlab="|W(Z<3)|",ylab="|W(Z>5)|",cex.lab=1.2,cex.axis=1.2,main="WxZ")
	for(i in 1:length(x)) {
		lines(c(x[i]-sd.x[i],x[i]+sd.x[i]),c(y[i],y[i]))
		lines(c(x[i],x[i]),c(y[i]-sd.y[i],y[i]+sd.y[i]))
		points(y[i]~x[i],bg=color[i],pch=21)
	}
	lines(c(0,8),c(0,8))
	y <- 8
	x <- 0.25
	dy <- 0.4
	text(x,y,"ACEA",pos=4); points(0,y,pch=21,bg="orange",cex=2); y <- y-dy
	text(x,y,"Apredica",pos=4); points(0,y,pch=21,bg="yellow",cex=2); y <- y-dy
	text(x,y,"Attagen",pos=4); points(0,y,pch=21,bg="blue",cex=2); y <- y-dy
	text(x,y,"BioSeek",pos=4); points(0,y,pch=21,bg="violet",cex=2); y <- y-dy
	text(x,y,"Novascreen",pos=4); points(0,y,pch=21,bg="red",cex=2); y <- y-dy
	text(x,y,"Odyssey Thera",pos=4); points(0,y,pch=21,bg="green",cex=2); y <- y-dy
	text(x,y,"Tox21",pos=4); points(0,y,pch=21,bg="gray",cex=2); y <- y-dy
	if(to.file) dev.off()
	else browser()
}


