#--------------------------------------------------------------------------------------
#
# utils.R utilities for managing ToxCast data
#
# November 2015
# Richard Judson
#
# US EPA
# Questions, comments to: judson.richard@epa.gov
#
#
#--------------------------------------------------------------------------------------
#-----------------------------------------------------------------------------------
#
# run a query
#
#-----------------------------------------------------------------------------------
run.query <- function(query,db) {
	con <- dbConnect(drv=RMySQL::MySQL(),user=USER,password=PASSWORD,host=SERVER,dbname=db)
	rs <- dbSendQuery(con, query)
	d1 <- dbFetch(rs, n = -1) 
	#print(d1)
	#flush.console()
	dbHasCompleted(rs)
	dbClearResult(rs)
	dbDisconnect(con)
	return(d1)
}
#-----------------------------------------------------------------------------------
#
# test the interaction with the DB
#
#-----------------------------------------------------------------------------------
test.con <- function() {
	#ret <- run.query("SELECT distinct Db from db where Db like 'actor%'","mysql")
	#print(ret)
	ret <- run.query("show databases",DB)
	print(ret)
	ret <- run.query("show tables",DB)
	print(ret)
	ret <- run.query("desc pred_epi",DB)
	print(ret[,"Field"])
	#ret <- run.query("desc assay_category_cv",DB)
	#print(ret)
	#ret <- run.query("select category, description from assay_category_cv",DB)
	#print(ret)
	#ret <- run.query("desc assay_component",DB)
	#print(ret)
	
}
#--------------------------------------------------------------------------------------
#
# get info for a code
#
#--------------------------------------------------------------------------------------
probe.code <- function(code) {
	print(CHEMS[code,"Name"])
	temp <- MAT.logAC50[code,]
	name.list <- names(temp)
	mask <- temp
	mask[] <- 1
	mask[is.na(temp)] <- 0
	mask[temp==6] <- 0
	temp <- temp[mask==1]
	name.list <- name.list[mask==1]
	mat <- as.data.frame(cbind(name.list,as.numeric(temp)),stringsAsFactors=F)
	names(mat) <- c("assay","logAC50")
	mat <- mat[order(as.numeric(mat$logAC50)),]	
	print(mat)
}

#--------------------------------------------------------------------------------------
#
# get the name of the current functio nand pretty print it
#
#--------------------------------------------------------------------------------------
print.current.function <- function(comment.string=NA) {
	cat("=========================================\n")
	curcall <- sys.call(sys.parent(n=1))[[1]]
	cat(curcall,"\n")
	if(!is.na(comment.string))	cat(comment.string,"\n")
	cat("=========================================\n")
	flush.console()
}
#-----------------------------------------------------------------------------------
#
# do an or on a chunk of a matrix
#
#-----------------------------------------------------------------------------------
aORb <<- function(V,M) {
    res <- c()
    for(i in 1:dim(M)[1]) res <- c(res,sum(V|M[i,]))
    return(res)
}
#--------------------------------------------------------------------------------------
#
# uniquify - take a list and get the unique set
#
#--------------------------------------------------------------------------------------
uniquify <- function(x) {
	temp <- duplicated(x)
	y <- x[!temp]
	return(y)
}
#--------------------------------------------------------------
#
# calculate the paramteres for a 2 x 2 matrix
#
#--------------------------------------------------------------
TxT<<- function(tp,fp,fn,tn,do.p=TRUE) {

	sens<-tp/(tp+fn)
	spec<-tn/(tn+fp)
	ppv<-tp/(tp+fp)
	npv<-tn/(tn+fn)

	relative.risk <- (tp/(tp+fp)) / (fn/(tn+fn))
	odds.ratio <- (tp*tn)/(fp*fn)

	accuracy <- (tp+tn)/(tp+tn+fp+fn)
	x<-matrix(data=NA, nrow=2, ncol=2)
	x[1,1]<-tp
	x[1,2]<-fp
	x[2,1]<-fn
	x[2,2]<-tn
	if(is.infinite(relative.risk))  relative.risk <- 1000000
	if(is.infinite(odds.ratio))  odds.ratio <- 1000000
	if(is.infinite(sens))  sens <- 0
	if(is.infinite(spec))  spec <- 0
	if(is.nan(relative.risk))  relative.risk <- 1
	if(is.nan(odds.ratio))  odds.ratio <- 1
	if(is.nan(sens))  sens <- 0
	if(is.nan(spec))  spec <- 0
	if(is.na(sens))  sens <- 0
	if(is.na(spec))  spec <- 0
	or.ci.lwr <- 0
	or.ci.upr <- 1000000
	if(odds.ratio<1000000 && tp>0 && tn>0 && fp>0 && fn>0) {
		lnor <- log(odds.ratio)
		selnor <- sqrt(1/tp+1/tn+1/fp+1/fn)
		ln.upr <- lnor+1.96*selnor
		ln.lwr <- lnor-1.96*selnor
		or.ci.lwr <- exp(ln.lwr)
		or.ci.upr <- exp(ln.upr)
	}
	p.value<-1
	if(do.p==TRUE) {
		if(sens>0 && spec>0) {
			c<-fisher.test(x)
			p.value <- c$p.value
		}
	}
	ba <- 0.5*(sens+spec)

	sval<-paste(tp,"\t",fp,"\t",fn,"\t",tn,"\t",format(sens,digits=3),"\t",format(spec,digits=3),"\t",format(ba,digits=3),"\t",format(accuracy,digits=3),"\t",format(relative.risk,digits=3),"\t",format(odds.ratio,digits=3),"\t",format(or.ci.lwr,digits=3),"\t",format(or.ci.upr,digits=3),"\t",format(ppv,digits=3),"\t",format(npv,digits=3),"\t",format(p.value,digits=3),sep="")
	title<-paste("TP\tFP\tFN\tTN\tSens\tSpec\tBA\tAcrcy\tRelRsk\tOR\tCI.OR.LWR\tCI.OR.UPR\tPPV\tNPV\tp.value",sep="")
  	r<<-list(a=tp,b=fp,c=fn,d=tn,sens=sens,spec=spec,ba=ba,accuracy=accuracy,relative.risk=relative.risk,odds.ratio=odds.ratio,or.ci.lwr=or.ci.lwr,or.ci.upr=or.ci.upr,ppv=ppv,npv=npv,p.value=p.value,sval=sval,title=title)

	r
}
#-----------------------------------------------------------------------------------
#
# chisq on a single variable
#   used as the inner function for factor selection
#
#-----------------------------------------------------------------------------------
chisq_1Var <- function(x,Class) {
	y <- Class[,1]
	a <-sum(x*y)
	b <- sum(x*(1-y))
	c <- sum((1-x)*y)
	d <- sum((1-x)*(1-y))
	txt <- TxT(a,b,c,d)
	return(txt)
}
#-----------------------------------------------------------------------------------
#
# minimum by col
#
#-----------------------------------------------------------------------------------
colMin <- function(x) {
	ret <- apply(x,FUN=min,MARGIN=2)
}
#-----------------------------------------------------------------------------------
#
# maximum by col
#
#-----------------------------------------------------------------------------------
colMax <- function(x) {
	ret <- apply(x,FUN=max,MARGIN=2)
}
#-----------------------------------------------------------------------------------
#
# median by col
#
#-----------------------------------------------------------------------------------
colMed <- function(x) {
  ret <- apply(x,FUN=median,MARGIN=2)
}
#-----------------------------------------------------------------------------------
#
# minimum by row
#
#-----------------------------------------------------------------------------------
rowMin <- function(x) {
	ret <- apply(x,FUN=min,MARGIN=1)
}
#-----------------------------------------------------------------------------------
#
# minimum by row
#
#-----------------------------------------------------------------------------------
rowMax <- function(x) {
	ret <- apply(x,FUN=max,MARGIN=1)
}
#-----------------------------------------------------------------------------------
#
# minimum by row
#
#-----------------------------------------------------------------------------------
rowSum <- function(x) {
	ret <- apply(x,FUN=sum,MARGIN=1)
}
#-----------------------------------------------------------------------------------
#
# median by row
#
#-----------------------------------------------------------------------------------
rowMed <- function(x) {
	ret <- apply(x,FUN=median,MARGIN=1)
}
#-----------------------------------------------------------------------------------
#
# mean by row
#
#-----------------------------------------------------------------------------------
rowMean <- function(x) {
  ret <- apply(x,FUN=mean,MARGIN=1)
}
