#--------------------------------------------------------------------------------------
#
# rat_cancer_model_ivive.R - export dat for rat cancer model paper
#
# December 2014
# Richard Judson
#
# US EPA
# Questions, comments to: judson.richard@epa.gov, 919-541-3085
#
# Order of processing
#

#--------------------------------------------------------------------------------------
options(java.parameters = "-Xmx1000m")
library(grDevices)
library(RColorBrewer)
library(stringr)
library(deSolve)
library(msm)
library(httk)
#library(fingerprint)
#library(e1071)
#library(diptest)
#library(xlsx)
#library(xlsxjars)
#library(pheatmap)
#library(nnclust)
#library(ggplot2)
#library(fmsb)
#library(mixdist)
source("utils.R")


#Acetochlor
#Ametryn
#Carbaryl
#Cyclanilide
#Cyfluthrin
#PFOA
#2,5-Pyridinedicarboxylic acid di-n-propyl ester (2,5-PCADPE)
#Simazine
#Tebufenpyrad
#Vinclozolin
#DEHP
#Bisphenol A
#Cyprodinil
#Flusilazole
#Indoxacarb
#Triclosan
#WY-14643
#Lipopolysaccharide
#Flutamide
#Estragole
#Ethyl methanesulfonate

CODE.LIST.IVIVE <- c("C34256821","C834128","C80057","C63252","C113136779","C68359375","C121552612","C117817","C136458","C140670","C85509199","C13311847","C173584446","C335671","C50892234","C122349","C119168773","C3380345","C50471448")

#--------------------------------------------------------------------------------------
#
# make the toxcat hit table
#
#--------------------------------------------------------------------------------------
rat.ivive.toxcast.hit.table <- function() {
    cat("==========================================================================\n")
    cat("rat.ivive.toxcast.hit.table\n")
    cat("==========================================================================\n")
    flush.console()
	outfile <- "../output/rat_cancer_model_data.txt"
	s <- "CODE\tCASRN\tName\tAssay\tGene\tBioProcess\tAC50\tZ\n"
	cat(s,file=outfile,append=F)
	for(i in 1:length(CODE.LIST.IVIVE)) {
		code <- CODE.LIST.IVIVE[i]
		casrn <- CHEMS[code,"CASRN"]
		cname <- CHEMS[code,"Name"]
		temp <- MAT.hitcall[code,]
		for(j in 1:dim(temp)[2]) {
			assay <- names(temp)[j]
			gene <- ASSAY.INFO[assay,"intended_target"]
			bioprocess <- ASSAY.INFO[assay,"biological_process"]
			if(!is.na(temp[1,j])) if(temp[1,j]==1) {
				ac50 <- MAT.AC50[code,assay]
				z <- MAT.Z.NORM[code,assay]
				s <- paste(code,"\t",casrn,"\t",cname,"\t",assay,"\t",gene,"\t",bioprocess,"\t",format(ac50,digits=2),"\t",format(z,digits=2),"\n",sep="")
				cat(s,file=outfile,append=T)
#				cat(s)
			}
		}
	}
}
#--------------------------------------------------------------------------------------
#
# read in the data
#
#--------------------------------------------------------------------------------------
rat.ivive.read.data <- function() {
    cat("==========================================================================\n")
    cat("rat.ivive.read.data\n")
    cat("==========================================================================\n")
    flush.console()
    temp <- read.table(file="../ToxRefDB/toxrefdb_acceptable_rat_3studies.txt",header=T,sep="\t",stringsAsFactors=F,quote="\"",comment="")
	names(temp)[2] <- "CASRN"
	names(temp)[3] <- "Name"
	temp <- cbind(temp[,"CASRN"],temp)
	names(temp)[1] <- "CODE"
	code.list <- as.character(temp[,"CODE"])
	ncode <- length(code.list)
	for(i in 1:ncode) {
		code <- code.list[i]
		code <- paste("C",code,sep="")
		code <- str_replace_all(code,"-","")
		code.list[i] <- code
	}
	temp[,"CODE"] <- code.list
	temp <- temp[is.element(temp[,"CODE"],CODE.LIST.IVIVE),]
    RAT.3STUDIES <<- temp
    cat("read in the toxref data\n")
    flush.console()
    temp <- read.table(file="../LD50/ToxCastInII_Tox21_Chemicals_LD50_Dow_to_EPA.txt",header=T,sep="\t",stringsAsFactors=F,quote="",comment="")
    rownames(temp) <- temp[,"CODE"]
    temp <- temp[CODE.LIST.IVIVE,]
    RAT.LD50 <<- temp
    cat("read in the LD50 data\n")
    flush.console()
    browser()
}
#--------------------------------------------------------------------------------------
#
# prep the master list data
#
#--------------------------------------------------------------------------------------
rat.ivive.prep.master.list <- function() {
    cat("==========================================================================\n")
    cat("prep.master.list\n")
    cat("==========================================================================\n")
    flush.console()

    cytotox.summary <- read.table(file="../input/AllResults_cyto_dist_141222.txt",header=T,sep="\t",stringsAsFactors=F,quote="\"",comment="")
	rownames(cytotox.summary) <- cytotox.summary[,"CODE"]
	
    httk.summary <- read.table(file="../httk/css_summary.txt",header=T,sep="\t",stringsAsFactors=F,quote="\"",comment="")
	casrn.list <- RAT.3STUDIES[,"CASRN"]
	casrn.list <- sort(unique(casrn.list))
	casrn.list <- casrn.list[is.element(casrn.list,RAT.LD50[,"CASRN"])]
	code.list <- casrn.list
	code.list[] <- NA
	cname.list <- casrn.list
	cname.list[] <- NA
	nchem <- length(code.list)
	ld50.list <- vector(length=nchem,mode="numeric")
	ld50.list[] <- NA
	css.list <- vector(length=nchem,mode="numeric")
	css.list[] <- NA
	css.paper.list <- vector(length=nchem,mode="numeric")
	css.paper.list[] <- NA
	css.class.list <- casrn.list
	css.class.list[] <- NA
	cyto.med.list <- vector(length=nchem,mode="numeric")
	cyto.med.list[] <- NA
	cyto.min.list <- vector(length=nchem,mode="numeric")
	cyto.min.list[] <- NA
	
	nchem <- length(casrn.list)
	for(i in 1:nchem) {
		casrn <- casrn.list[i]
    	code <- paste("C",str_replace_all(casrn,"-",""),sep="")
		code.list[i] <- code
		temp <- RAT.3STUDIES[is.element(RAT.3STUDIES[,"CODE"],code),]
		cname <- temp[1,"Name"]
		cname.list[i] <- cname
		ld50.list[i] <- RAT.LD50[code,"Rat_LD50_Oral"]
		if(is.element(casrn,get_cheminfo())) {
			#css <- get_wetmore_css(chem.cas=casrn,species="Rat", which.quantile = 0.50,suppress.messages=T)
			parms <- parameterize_pbtk(chem.cas=casrn,species="Rat",default.to.human=T,
 										tissuelist=list(Liver=c("Liver"),Kidney=c("Kidney"),Lung=c("Lung"),Gut=c("Gut")),
 										force.human.clint.fub = F,clint.pvalue.threshold=0.05,fu.hep.correct=TRUE)
			css <- calc_analytic_css(chem.cas=casrn,daily.dose=1,output.units='uM',model = 'pbtk',species='Rat',concentration='plasma',suppress.messages=T,fu.hep.correct=T,recalc.blood2plasma=F,parameters=parms)
			css.list[i] <- as.numeric(css)
		}
		if(is.element(casrn,httk.summary[,"CAS"])) {
			htemp <- httk.summary[is.element(httk.summary[,"CAS"],casrn),]
			css.class.list[i] <- htemp[1,"Triage.Call"]
			css.paper.list[i] <- htemp[1,"Predicted.Css"]
		}
		if(is.element(code,cytotox.summary[,"CODE"])) {
			ctemp <- cytotox.summary[is.element(cytotox.summary[,"CODE"],code),]
			cyto.med.list[i] <- ctemp[1,"cyto_pt_um"]
			cyto.min.list[i] <- ctemp[1,"lower_bnd_um"]
		}
	}
	colname.list <- c("CODE","CASRN","Name","LD50","Css.rat.httk","Css","Css.class","Cytotox.med","Cytotox.min","rat.cyto.med","rat.cyto.min","alpha","conc0","conc1","death.width","hscale","hits.chr","hits.dev","hits.mgr")
	temp <- as.data.frame(matrix(nrow=nchem,ncol=length(colname.list)))
	NCHEM.IVIVE <<- nchem
	names(temp) <- colname.list
	temp[,"CODE"] <- code.list
	temp[,"CASRN"] <- casrn.list
	temp[,"Name"] <- cname.list
	temp[,"LD50"] <- ld50.list
	temp[,"Css.rat.httk"] <- css.list
	temp[,"Css"] <- css.paper.list
	temp[,"Css.class"] <- css.class.list
	temp[,"Cytotox.med"] <- cyto.med.list
	temp[,"Cytotox.min"] <- cyto.min.list
	temp[,"rat.cyto.med"] <- NA
	temp[,"rat.cyto.min"] <- NA
	temp[,"alpha"] <- NA
	temp[,"conc0"] <- NA
	temp[,"conc1"] <- NA
	temp[,"death.width"] <- NA
	temp[,"hscale"] <- NA
	temp[,"hits.chr"] <- NA
	temp[,"hits.dev"] <- NA
	temp[,"hits.mgr"] <- NA
	
	rownames(temp) <- code.list
	MASTER.LIST.IVIVE <<- temp
	browser()
}
#--------------------------------------------------------------------------------------
#
# Calculate at the hit distribution by chemical
#
# QC=OK
#--------------------------------------------------------------------------------------
rat.ivive.plot.hit.dist <- function(istart=1,to.file=F) {
    cat("==========================================================================\n")
    cat("plot.hit.dist\n")
    cat("==========================================================================\n")
    flush.console()
	
    if(to.file) {
        fname <- "../plots/rat_ivive_by_chemical_hit_dist.pdf"
        pdf(file=fname,width=7,height=10,pointsize=12,bg="white",paper="letter",pagecentre=T)
    }
    par(mfrow=c(3,2),mar=c(4,4,2,2))

	temp <- RAT.3STUDIES[,"study_type"]
	n.chr <- length(temp[is.element(temp,"CHR")])
	n.dev <- length(temp[is.element(temp,"DEV")])
	n.mgr <- length(temp[is.element(temp,"MGR")])
	r.chr <- n.chr / n.chr
	r.dev <- n.chr / n.dev
	r.mgr <- n.chr / n.mgr
	
	cat("CHR: ",n.chr,":",format(r.chr,digits=2),"\n")
	cat("DEV: ",n.dev,":",format(r.dev,digits=2),"\n")
	cat("MGR: ",n.mgr,":",format(r.mgr,digits=2),"\n")
	flush.console()
	hit.sum <- MASTER.LIST.IVIVE
	
    for(i in istart:NCHEM.IVIVE) {
    	code <- MASTER.LIST.IVIVE[i,"CODE"]
    	casrn <- MASTER.LIST.IVIVE[i,"CASRN"]
    	cname <- MASTER.LIST.IVIVE[i,"Name"]
    	ld50 <- as.numeric(MASTER.LIST.IVIVE[i,"LD50"])
		css.class <- MASTER.LIST.IVIVE[i,"Css.class"]
		doit <- F
		if(!is.na(css.class)) {
			#print(css.class)
			if(css.class=="On the Order") doit <- T
		}
		doit <- T
		if(doit) {
			cat("========================================================\n")
			cat(i,":",code,":",cname,"\n")

			temp <- RAT.3STUDIES[is.element(RAT.3STUDIES[,"CASRN"],casrn),]

			temp.chr <- temp[is.element(temp[,"study_type"],"CHR"),]
			temp.dev <- temp[is.element(temp[,"study_type"],"DEV"),]
			temp.mgr <- temp[is.element(temp[,"study_type"],"MGR"),]
			n.chr <- dim(temp.chr)[1]
			n.dev <- dim(temp.dev)[1]
			n.mgr <- dim(temp.mgr)[1]
			cat("Counts for chr,dev,mgr: ",n.chr,n.dev,n.mgr,"\n")
			#temp <- rbind(temp,temp.dev)
			#temp <- rbind(temp,temp.dev)
			hit.list <- temp[,"dose"]
			tempA <- hit.list

			xmin <- 0
			xmax <- 20000
			breaks <- seq(0,xmax,by=0.2)
			breaks.min <- 1e-4
			nbreaks <- 50
			if(min(tempA)<breaks.min) {
				breaks.min <- breaks.min/100
			}
			if(min(tempA)<breaks.min) {
				breaks.min <- breaks.min/100
			}
			breaksA <- breaks.min
			for(j in 1:50) breaksA <- c(breaksA,1.5*breaksA[length(breaksA)])
			while(max(breaksA)<max(tempA)) {
				breaksA <- c(breaksA,1.5*breaksA[length(breaksA)])
				#cat("Added another point to breaksA",max(breaksA),"\n")
			}
			xA <- hist(tempA,breaks=breaksA,plot=F)
			ymax <- 1.5*max(xA$counts)

			iseq <- seq(from=1,to=length(xA$counts),by=1)
			x.fit <- xA$mids[xA$counts>0]
			i.seq <- iseq[xA$counts>0]
			y.fit <- xA$counts[xA$counts>0]
			i.seq.rev <- max(i.seq)-rev(i.seq)
			x.fit.rev <- rev(x.fit)
			y.fit.rev <- rev(y.fit)
			#plot(y.fit.rev ~ i.seq.rev)

	#print("need to weight study types by the number of hits overall")
			
			hist.log(breaksA,xA$counts,ylim=c(0,ymax),xlab="LEL (mg/kg)",ylab="Hits",main=paste(casrn,":",cname))
			if(!is.na(ld50)) lines(c(ld50,ld50),c(0,ymax),col="red",lwd=3)

			X.EXP <<- breaksA[1:(length(breaksA)-1)]
			Y.EXP <<- xA$counts
			#alpha=1,conc0=1e-3,conc1=1000,death.width=1,hscale=1
			
			conc1.start <- log10(ld50)
			if(is.na(conc1.start)) conc1.start <- -2
			start <- c(1,         0,   conc1.start,  1,    1)
			lwr <-   c(0.1,      -6,  -2,            0.1, 0.1)
			upr <-   c(4,         2,   4,            2,   100)
			res <- optim(par=start,f=hit.error.func,method="L-BFGS-B",lower=lwr,upper=upr,control=list(maxit=2000)) 
			alpha <- res$par[1]
			conc0 <- 10**res$par[2]
			conc1 <- 10**res$par[3]
			death.width <- res$par[4]
			hscale <- res$par[5]
			hit.sum[i,"alpha"] <- alpha
			hit.sum[i,"conc0"] <- conc0
			hit.sum[i,"conc1"] <- conc1
			hit.sum[i,"death.width"] <- death.width
			hit.sum[i,"hscale"] <- hscale
			hit.sum[i,"hits.chr"] <- n.chr
			hit.sum[i,"hits.dev"] <- n.dev
			hit.sum[i,"hits.mgr"] <- n.mgr
			hits.theory <- X.EXP
			hits.theory[] <- 0
			for(j in 1:length(X.EXP)) hits.theory[j] <- hit.count.theory(X.EXP[j],alpha,conc0,conc1,death.width,hscale)
			lines(hits.theory~X.EXP)

			css.rat <- as.numeric(MASTER.LIST.IVIVE[i,"Css.rat.httk"])
			css.human <- as.numeric(MASTER.LIST.IVIVE[i,"Css"])
			css.class <- MASTER.LIST.IVIVE[i,"Css.class"]
			cyto.med <- as.numeric(MASTER.LIST.IVIVE[i,"Cytotox.med"])
			cyto.min <- as.numeric(MASTER.LIST.IVIVE[i,"Cytotox.min"])
			#print(MASTER.LIST[i,])
			if(!is.na(cyto.med)) {
				if(!is.na(css.rat)) {
					rat.med <- cyto.med / css.rat
					rat.min <- cyto.min / css.rat
					#cat("Rat: ",rat.min,rat.med,"\n")

					lines(c(rat.min,rat.med),c(ymax*0.8,ymax*0.8),col="green",lwd=3)
					hit.sum[i,"rat.cyto.med"] <- rat.med
					hit.sum[i,"rat.cyto.min"] <- rat.min

				}
				#if(!is.na(css.human)) {
				#	human.med <- cyto.med / css.human
				#	human.min <- cyto.min / css.human
				#	#cat("Human: ",human.min,human.med,"\n")
				#	lines(c(human.min,human.med),c(ymax*0.7,ymax*0.7),col="blue",lwd=3)
				#}
			}	
			if(!is.na(css.class)) {
				#print(css.class)
				text(x=1e-2,y=ymax*0.9,paste("Css.class=",css.class),pos=4)
			}
			flush.console()
			if(!to.file) browser()
		}
    }
	if(to.file) dev.off()
	HIT.SUMMARY.IVIVE <<- hit.sum
	outfile <- "../output/rat_ivive_hit_summary.csv"
	write.csv(hit.sum,file=outfile,row.names=F)
}
#--------------------------------------------------------------------------------------
#
# Error function for the model
#
#--------------------------------------------------------------------------------------
hit.summary <- function(to.file=F) {
    if(to.file) {
        fname <- "plots/hit_summary.pdf"
        pdf(file=fname,width=7,height=7,pointsize=12,bg="white",paper="letter",pagecentre=T)
    }
    par(mfrow=c(1,1),mar=c(4,4,2,2))
	x <- HIT.SUMMARY[,"conc1"]
	y <- HIT.SUMMARY[,"LD50"]
	plot(y~x,log="xy",xlim=c(0.01,10000),ylim=c(0.01,10000),ylab="LD50 (points) / IVIVE Burst (lines)",xlab="In Vivo Burst",cex.lab=1.5,cex.axis=1.5)
	
	temp1 <- HIT.SUMMARY[is.element(HIT.SUMMARY[,"Css.class"],"On the Order" ),]
	temp2 <- HIT.SUMMARY[is.element(HIT.SUMMARY[,"Css.class"],">3.2x Overestimated" ),]
	temp3 <- HIT.SUMMARY[is.element(HIT.SUMMARY[,"Css.class"],">10x Overestimated" ),]
	
	nchem <- dim(temp1)[1]
	for(i in 1:nchem) {
		x0 <- temp1[i,"conc1"]
		y1 <- temp1[i,"rat.cyto.med"]
		y2 <- temp1[i,"rat.cyto.min"]
		if(!is.na(x0) && !is.na(y1) && !is.na(y1)) lines(c(x0,x0),c(y1,y2),col="green",lwd=3)
	}
	nchem <- dim(temp2)[1]
	for(i in 1:nchem) {
		x0 <- temp2[i,"conc1"]
		y1 <- temp2[i,"rat.cyto.med"]
		y2 <- temp2[i,"rat.cyto.min"]
		if(!is.na(x0) && !is.na(y1) && !is.na(y1)) lines(c(x0,x0),c(y1,y2),col="yellow",lwd=3)
	}
	nchem <- dim(temp1)[1]
	for(i in 1:nchem) {
		x0 <- temp3[i,"conc1"]
		y1 <- temp3[i,"rat.cyto.med"]
		y2 <- temp3[i,"rat.cyto.min"]
		if(!is.na(x0) && !is.na(y1) && !is.na(y1)) lines(c(x0,x0),c(y1,y2),col="red",lwd=3)
	}
	points(y~x,pch=21,bg="black")
	lines(c(0.01,10000),c(0.01,10000))
	if(to.file) dev.off()
}
#--------------------------------------------------------------------------------------
#
# Error function for the model
#
#--------------------------------------------------------------------------------------
hit.error.func <- function(x) {
	alpha <- x[1]
	conc0 <- 10**x[2]
	conc1 <- 10**x[3]
	death.width <- x[4]
	hscale	 <- x[5]
	zero.index <- length(Y.EXP)
	for(i in 1:length(Y.EXP)) {
		j <- 1+length(Y.EXP)-i
		if(Y.EXP[j]==0) zero.index <- j
		else break
	}
		
	conc.list <- X.EXP[Y.EXP>0]
	resp.list <- Y.EXP[Y.EXP>0]
	conc.list <- c(conc.list,X.EXP[zero.index])
	resp.list <- c(resp.list,Y.EXP[zero.index])
	#print(zero.index)
	#browser()
	resp.list.theory <- resp.list
	resp.list.theory[] <- 0
	error <- 0
	for(i in 1:length(conc.list)) {
		conc <- conc.list[i]
		resp.list.theory[i] <- hit.count.theory(conc,alpha,conc0,conc1,death.width,hscale)
		error <- error + (resp.list.theory[i]-resp.list[i])**2
	}
	error <- error / length(resp.list)
	return(error)
}
#--------------------------------------------------------------------------------------
#
# theoretical distribution of hits 
#
# assumes a growing exponential
# growth constane = alpha
# center point = conc0
# death point = conc1
# method = 1 or 2
#--------------------------------------------------------------------------------------
hit.count.theory <- function(conc,alpha,conc0,conc1,death.width,hscale,method=3) {
	log.conc  <- log10(conc)
	log.conc0 <- log10(conc0)
	log.conc1 <- log10(conc1)
	
	if(method==1) {
		val1 <- exp((log.conc-log.conc0)/alpha)
		val2 <- hill(conc,conc1,wid=death.width)
		val <- hscale * val1 * (1-val2)
	}
	else if(method==2) {	
		val1 <- hill(conc,conc0,wid=0.2)
		val2 <- hill(conc,conc1,wid=death.width)
		val <- hscale * val1 * (1-val2) * (log.conc-log.conc0)
		if(conc<conc0) val <- 0
	}
	else if(method==3) {	
		val1 <- hill(conc,conc0,wid=alpha)
		val2 <- exp(-(log.conc-log.conc1)**2/death.width**2)
		val <- hscale * val1 * val2
	}
	return(val)
}
#--------------------------------------------------------------------------------------
#
# Test the theoretical distribution
#
#--------------------------------------------------------------------------------------
test.theory <- function(alpha=1,conc0=1e-3,conc1=1000,death.width=1,hscale=1) {
    cat("==========================================================================\n")
    cat("test.theory\n")
    cat("==========================================================================\n")
    flush.console()
	
	breaksA <- 1e-2	
	for(i in 1:40) breaksA <- c(breaksA,1.5*breaksA[length(breaksA)])
	hit.list <- breaksA
	hit.list[] <- 0
	for(i in 1:length(breaksA)) {
		conc <- breaksA[i]
		hit.list[i] <- hit.count.theory(conc,alpha,conc0,conc1,death.width,hscale)
	}
	ymax <- max(hit.list)
	hist.log(breaksA,hit.list,ylim=c(0,ymax),xlab="LEL (mg/kg)",ylab="Hits",main="")
}
#--------------------------------------------------------------------------------------
#
# calculate a Hill function
#
#--------------------------------------------------------------------------------------
hill <- function(x,mid,wid,bot=0,top=1) {
	rng <- top-bot
	val <- bot + rng*(x**wid/(x**wid + mid**wid))
	return(val)
}
#--------------------------------------------------------------------------------------
#
# plot a histogram on a log scale 
#
#--------------------------------------------------------------------------------------
hist.log <- function(x,y,ylim,xlab,ylab,main) {
	plot(x[1:length(y)],y,type="n",col="gray40",lwd=2.5,log="x",xlab=xlab,ylim=ylim,ylab=ylab,main=main,cex.lab=1.2,cex.axis=1.2)
	for(i in 1:length(y)) {
		rect(x[i],y[i],x[i+1],0)
	}
}
#######################################################################################
#######################################################################################
#######################################################################################
#######################################################################################
#######################################################################################
#######################################################################################
#######################################################################################
#--------------------------------------------------------------------------------------
#
# Do the first filter
#
#--------------------------------------------------------------------------------------
filter.1 <- function(file="ToxRefDB/toxrefdb_study_tg_effect_summary.txt",do.read=T) {
    cat("==========================================================================\n")
    cat("filter.1\n")
    cat("==========================================================================\n")
    flush.console()
	if(do.read) {
    	TEMP <<- read.table(file,header=T,sep="\t",stringsAsFactors=F,quote="\"",comment="")
    }
	print(dim(TEMP))
    mask <- TEMP[,"data_usability_desc"]
    mask[mask=="Acceptable Guideline (post-1998)"] <- 1
    mask[mask=="Acceptable Guideline (pre-1998)"] <- 1
    mask[mask=="Acceptable Non-guideline"] <- 1
    mask[mask=="Deficient Evaluation"] <- 0
    mask[mask=="Unacceptable"] <- 0
    mask[mask=="Unassigned"] <- 0
    temp <- TEMP[mask==1,]
	print(dim(temp))
	
    mask <- temp[,"species"]
    mask[mask!="rat"] <- 0
    mask[mask=="rat"] <- 1
    temp <- temp[mask==1,]
	print(dim(temp))
	
    mask <- temp[,"study_type"]
    mask[mask=="ACU"] <- 0
    mask[mask=="CHR"] <- 1
    mask[mask=="DEV"] <- 1
    mask[mask=="DNT"] <- 0
    mask[mask=="MGR"] <- 1
    mask[mask=="NEU"] <- 0
    mask[mask=="OTH"] <- 0
    mask[mask=="REP"] <- 0
    mask[mask=="SAC"] <- 0
    mask[mask=="SUB"] <- 0
    temp <- temp[mask==1,]
	print(dim(temp))
	
    browser()
	casrn.list <- sort(uniquify(temp[,"chemical_casrn"]))
	cat("original casrn list: ",length(casrn.list),"\n")
    mask <- casrn.list
    mask[] <- 0
    for(i in 1:length(casrn.list)) {
    	casrn <- casrn.list[i]
    	study.list <- sort(uniquify(temp[is.element(temp[,"chemical_casrn"],casrn),"study_type"]))
    	if(length(study.list)==3) mask[i] <- 1
    }
	casrn.list <- casrn.list[mask=="1"]    	
    temp2 <- temp[is.element(temp[,"chemical_casrn"],casrn.list),]
	print(dim(temp2))
    browser()
    RAT.3STUDIES <<- temp2
    outfile <- "ToxRefDB/toxrefdb_acceptable_rat_3studies.txt"
    write.table(temp2,file=outfile, row.names=F, append=FALSE, quote=F, sep = "\t")
}
