#--------------------------------------------------------------------------------------
#
# toxcast_driver_v12.R - code to analyze the ToxCast data
#
# November 2014
# Richard Judson
#
# US EPA
# Questions, comments to: judson.richard@epa.gov, 919-541-3085
#
#--------------------------------------------------------------------------------------
options(java.parameters = "-Xmx1000m")
library(grDevices)
library(RColorBrewer)
library(stringr)
library(mixdist)
library(class)
library(lattice)
library(openxlsx)
library(DBI)
library(RMySQL)

source("utils.R")

SERVER <<- "134.67.216.114"
SERVER <<- "au.epa.gov"
DB <<- "dev_physchemdb"
USER <<- "rjudson"
PASSWORD <<- "password"

VARMATDIR <<- "../input/varmats_151020_internal/"
VARMATDATE <<- "151020"
#--------------------------------------------------------------------------------------
#
# create the unique set of alerts and tehir short codes
#
#--------------------------------------------------------------------------------------
alert.dictionary.parser <- function() {
	print.current.function()
	file <- "../alerts/toolbox_alerts_raw_Tox21.xlsx"
	mat.1 <- read.xlsx(file)
	alert.source.1 <- names(mat.1)[27:dim(mat.1)[2]]
	nsource <- length(alert.source.1)
	name.list <- c("source","source.code")
	dict.0 <- as.data.frame(matrix(nrow=nsource,ncol=length(name.list)))
	names(dict.0) <- name.list
	dict.0[,1] <- alert.source.1
	for(i in 1:nsource) {
		str <- "source."
		if(i<10) str <- paste(str,"0",sep="")
		str <- paste(str,i,sep="")
		dict.0[i,2] <- str
	}
	
	name.list <- c("source","source.code","alert","alert.code")
	dict.1 <- as.data.frame(matrix(nrow=1,ncol=length(name.list)))
	names(dict.1) <- name.list
	dict.temp <- dict.1
	for(i in 1:nsource) {
		source <- dict.0[i,1]
		scode <- dict.0[i,2]
		temp <- mat.1[,source]
		alert.list <- NULL
		for(j in 1:length(temp)) {
			atemp <- str_split(temp[j],"\\|")[[1]]
			alert.list <- c(alert.list,atemp)
		}
		alert.list <- sort(unique(alert.list))
		for(j in 1:length(alert.list)) {
			acode <- paste(scode,".alert.",sep="")
			if(j<10) acode <- paste(acode,"0",sep="")
			acode <- paste(acode,j,sep="")
			dict.temp[1,1] <- source
			dict.temp[1,2] <- scode
			dict.temp[1,3] <- alert.list[j]
			dict.temp[1,4] <- acode
			dict.1 <- rbind(dict.1,dict.temp)
		}
	}
	dict.1 <- dict.1[2:dim(dict.1)[1],]
	file <- "../alerts/toolbox_alerts_dictionary_Tox21.xlsx"
	write.xlsx(dict.1,file)
	browser()
}
#--------------------------------------------------------------------------------------
#
# create the chemical by alert file
#
#--------------------------------------------------------------------------------------
alert.file.parser <- function() {
	print.current.function()

	file <- "../alerts/toolbox_alerts_dictionary_Tox21.xlsx"
	dict <- read.xlsx(file)
	source.list <- sort(unique(dict[,"source"]))
	nsource <- length(source.list)
	nalert <- dim(dict)[1]
	file <- "../alerts/toolbox_alerts_raw_Tox21.xlsx"
	mat.in <- read.xlsx(file)
	nchem <- dim(mat.in)[1]
	name.list <- c("CODE","CASRN","Name",dict[,4])
	mat <- as.data.frame(matrix(nrow=nchem,ncol=length(name.list)))
	names(mat) <- name.list
	mat[] <- 0
	mat[,"CODE"] <- mat.in[,"CODE"]
	mat[,"CASRN"] <- mat.in[,"CASRN"]
	mat[,"Name"] <- mat.in[,"Name"]
	#mat[,"DSSTox_GSID"] <- mat.in[,"DSSTox_GSID"]
	#mat[,"DSSTox_CID"] <- mat.in[,"DSSTox_CID"]
	code.list <- mat.in[,"CODE"]
	rownames(mat) <- code.list
	for(i in 1:nchem) {
		code <- code.list[i]
		cat(mat[i,"Name"],"\n")
		flush.console()
		for(j in 1:nsource) {
			source <- source.list[j]
			dict.source <- dict[is.element(dict[,"source"],source),]
			if(is.element(source,names(mat.in))) {
				colnum <- which.max(is.element(names(mat.in),source))
				val <- mat.in[i,colnum]
				val.list <- str_split(val,"\\|")[[1]]
				for(k in 1:length(val.list)) {
					alert.code <- dict.source[is.element(dict.source[,"alert"],val.list[k]),"alert.code"]
					mat[code,alert.code] <- 1
				}
			}
		}
	}
	file <- "../alerts/toolbox_alerts_binary_Tox21.xlsx"
	write.xlsx(mat,file)
	
	browser()
}
