#--------------------------------------------------------------------------------------
#
# Generate the data matrix
#
#--------------------------------------------------------------------------------------
BB.dmat <- function() {
  print.current.function()

  file <- "../BB_PPAR/BB_PPARG_chems.xlsx"
  BBchems <- read.xlsx(file)
  assay.list <- c("ATG_PPRE_CIS_up","ATG_PPARg_TRANS_up","NVS_NR_hPPARg","TOX21_PPARg_BLA_Agonist_ratio","TOX21_PPARg_BLA_antagonist_ratio",
                  "ATG_PPARa_TRANS_up",
                  "NVS_NR_hPPARa",
                  "ATG_RXRa_TRANS_up",
                  "ATG_RXRb_TRANS_up",
                  "OT_NURR1_NURR1RXRa_0480",
                  "OT_NURR1_NURR1RXRa_1440",
                  "OT_PPARg_PPARgSRC1_0480",
                  "OT_PPARg_PPARgSRC1_1440",
                  "LTEA_HepaRG_ACOX1_up",
                  "LTEA_HepaRG_CYP4A11_up",
                  "LTEA_HepaRG_CYP7A1_up",
                  "LTEA_HepaRG_FABP1_up",
                  "LTEA_HepaRG_HIF1A_up",
                  "LTEA_HepaRG_HMGCS2_up",
                  "LTEA_HepaRG_LPL_up")
  temp <- MAT.hitcall[,assay.list]
  temp[is.na(temp)] <- 0
  rs <- rowSums(temp)
  code.list <-  BBchems[,"CODE"] 
  nchem <- length(code.list)
  nassay <- length(assay.list)
  ac50mat <- MAT.logAC50[code.list,assay.list]
  tmat <- MAT.T.SCALED[code.list,assay.list]
  zmat <- MAT.Z.NORM[code.list,assay.list]

  name.list <- c("PPARG.hit.fraction","PPARG.hit.hiZ","PPARA.hit.fraction","PPARA.hit.hiZ","RXR.hit.fraction","RXR.hit.hiZ")
  summat <- as.data.frame(matrix(nrow=nchem,ncol=length(name.list)))
  names(summat) <- name.list
  pparg.list <- c("ATG_PPRE_CIS_up","ATG_PPARg_TRANS_up","NVS_NR_hPPARg","TOX21_PPARg_BLA_Agonist_ratio","TOX21_PPARg_BLA_antagonist_ratio","OT_PPARg_PPARgSRC1_0480","OT_PPARg_PPARgSRC1_1440")
  ppara.list <- c("ATG_PPRE_CIS_up","ATG_PPARa_TRANS_up","NVS_NR_hPPARa")
  rxr.list <- c("ATG_RXRa_TRANS_up","ATG_RXRb_TRANS_up","OT_NURR1_NURR1RXRa_0480","OT_NURR1_NURR1RXRa_1440")
  for(i in 1:nchem) {
    temp <- ac50mat[i,pparg.list]
    temp <- temp[temp<6]
    summat[i,"PPARG.hit.fraction"] <- length(temp) / length(pparg.list)
    temp <- ac50mat[i,ppara.list]
    temp <- temp[temp<6]
    summat[i,"PPARA.hit.fraction"] <- length(temp) / length(ppara.list)
    temp <- ac50mat[i,rxr.list]
    temp <- temp[temp<6]
    summat[i,"RXR.hit.fraction"] <- length(temp) / length(rxr.list)

    temp <- zmat[i,pparg.list]
    temp <- temp[!is.na(temp)]
    if(length(temp)>0) summat[i,"PPARG.hit.hiZ"] <- length(temp[temp>=3]) / length(temp)
    else summat[i,"PPARG.hit.hiZ"] <- 0
    temp <- zmat[i,ppara.list]
    temp <- temp[!is.na(temp)]
    if(length(temp)>0) summat[i,"PPARA.hit.hiZ"] <- length(temp[temp>=3]) / length(temp)
    else summat[i,"PPARA.hit.hiZ"] <- 0
    temp <- zmat[i,rxr.list]
    temp <- temp[!is.na(temp)]
    if(length(temp)>0) summat[i,"RXR.hit.hiZ"] <- length(temp[temp>=3]) / length(temp)
    else summat[i,"RXR.hit.hiZ"] <- 0
  }
  for(i in 1:nassay) {
      names(ac50mat)[i] <- paste(names(ac50mat)[i],".logAC50",sep="")
      names(tmat)[i] <- paste(names(tmat)[i],".T",sep="")
      names(zmat)[i] <- paste(names(zmat)[i],".Z",sep="")
  }
  cytotox.assay.set <- ASSAY.INFO[is.element(ASSAY.INFO[,"biological_process"],c("cytotoxicity SRB","cytotoxicity BLA","proliferation decrease")),"assay"]
  atemp <- MAT.logAC50[code.list,cytotox.assay.set]
  chems <-CHEMS[code.list,]
  
  dmat <- cbind(chems,summat,ac50mat,zmat,tmat,atemp)

  dmat <- cbind(BBchems,dmat)
  file <- "../BB_PPAR/PPARG.xlsx"
  write.xlsx(dmat,file)
  browser()
}
#--------------------------------------------------------------------------------------
#
# Generate the data matrix
#
#--------------------------------------------------------------------------------------
BB.pyrethroids <- function() {
  print.current.function()
  
  assay.list <- c("ATG_PPRE_CIS_up","ATG_PPARg_TRANS_up","NVS_NR_hPPARg","TOX21_PPARg_BLA_Agonist_ratio","TOX21_PPARg_BLA_antagonist_ratio",
                  "ATG_PPARa_TRANS_up",
                  "NVS_NR_hPPARa",
                  "ATG_RXRa_TRANS_up",
                  "ATG_RXRb_TRANS_up",
                  "OT_NURR1_NURR1RXRa_0480",
                  "OT_NURR1_NURR1RXRa_1440",
                  "OT_PPARg_PPARgSRC1_0480",
                  "OT_PPARg_PPARgSRC1_1440",
                  "LTEA_HepaRG_ACOX1_up",
                  "LTEA_HepaRG_CYP4A11_up",
                  "LTEA_HepaRG_CYP7A1_up",
                  "LTEA_HepaRG_FABP1_up",
                  "LTEA_HepaRG_HIF1A_up",
                  "LTEA_HepaRG_HMGCS2_up",
                  "LTEA_HepaRG_LPL_up")
  
  
  temp <- MAT.hitcall[,assay.list]
  temp[is.na(temp)] <- 0
  rs <- rowSums(temp)
  code.list <- c("C10453868","C122008859","C23031369","C28434006","C39515418","C52315078","C52645531","C584792","C66230044","C68359375","C79538322","C82657043")
  nchem <- length(code.list)
  nassay <- length(assay.list)
  ac50mat <- MAT.logAC50[code.list,assay.list]
  tmat <- MAT.T.SCALED[code.list,assay.list]
  zmat <- MAT.Z.NORM[code.list,assay.list]
  
  name.list <- c("PPARG.hit.fraction","PPARG.hit.hiZ","PPARA.hit.fraction","PPARA.hit.hiZ","RXR.hit.fraction","RXR.hit.hiZ")
  summat <- as.data.frame(matrix(nrow=nchem,ncol=length(name.list)))
  names(summat) <- name.list
  pparg.list <- c("ATG_PPRE_CIS_up","ATG_PPARg_TRANS_up","NVS_NR_hPPARg","TOX21_PPARg_BLA_Agonist_ratio","TOX21_PPARg_BLA_antagonist_ratio","OT_PPARg_PPARgSRC1_0480","OT_PPARg_PPARgSRC1_1440")
  ppara.list <- c("ATG_PPRE_CIS_up","ATG_PPARa_TRANS_up","NVS_NR_hPPARa")
  rxr.list <- c("ATG_RXRa_TRANS_up","ATG_RXRb_TRANS_up","OT_NURR1_NURR1RXRa_0480","OT_NURR1_NURR1RXRa_1440")
  for(i in 1:nchem) {
    temp <- ac50mat[i,pparg.list]
    temp <- temp[temp<6]
    summat[i,"PPARG.hit.fraction"] <- length(temp) / length(pparg.list)
    temp <- ac50mat[i,ppara.list]
    temp <- temp[temp<6]
    summat[i,"PPARA.hit.fraction"] <- length(temp) / length(ppara.list)
    temp <- ac50mat[i,rxr.list]
    temp <- temp[temp<6]
    summat[i,"RXR.hit.fraction"] <- length(temp) / length(rxr.list)
    
    temp <- zmat[i,pparg.list]
    temp <- temp[!is.na(temp)]
    if(length(temp)>0) summat[i,"PPARG.hit.hiZ"] <- length(temp[temp>=3]) / length(temp)
    else summat[i,"PPARG.hit.hiZ"] <- 0
    temp <- zmat[i,ppara.list]
    temp <- temp[!is.na(temp)]
    if(length(temp)>0) summat[i,"PPARA.hit.hiZ"] <- length(temp[temp>=3]) / length(temp)
    else summat[i,"PPARA.hit.hiZ"] <- 0
    temp <- zmat[i,rxr.list]
    temp <- temp[!is.na(temp)]
    if(length(temp)>0) summat[i,"RXR.hit.hiZ"] <- length(temp[temp>=3]) / length(temp)
    else summat[i,"RXR.hit.hiZ"] <- 0
  }
  for(i in 1:nassay) {
    names(ac50mat)[i] <- paste(names(ac50mat)[i],".logAC50",sep="")
    names(tmat)[i] <- paste(names(tmat)[i],".T",sep="")
    names(zmat)[i] <- paste(names(zmat)[i],".Z",sep="")
  }
  cytotox.assay.set <- ASSAY.INFO[is.element(ASSAY.INFO[,"biological_process"],c("cytotoxicity SRB","cytotoxicity BLA","proliferation decrease")),"assay"]
  atemp <- MAT.logAC50[code.list,cytotox.assay.set]
  chems <-CHEMS[code.list,]
  
  dmat <- cbind(chems,summat,ac50mat,zmat,tmat,atemp)
  
  file <- "../BB_PPAR/PPARG_pyrethroids.xlsx"
  write.xlsx(dmat,file)
  browser()
}


#--------------------------------------------------------------------------------------
#
# Calculate at the hit distribution by chemical
#
# QC=OK
#--------------------------------------------------------------------------------------
BB.analysis <- function(to.file=F,target.gene=NA) {
  print.current.function()
  options(warn=1)
  warning(immediate.=T,call.=T)
  flush.console()
  alist <- ASSAY.LIST
  ztemp <- MAT.logAC50
  ztemp[] <- NA
  target.gene <- as.character(target.gene)
  nassay <- length(alist)
  nchem <- dim(CHEMS)[1]
  file <- "../output/chemical_hitdist.txt"
  s <- "CODE\tCASRN\tName\tIntendedTarget\tStructureCategory\tUseCategory\tUseSuperCategory\tAssaysTested\tHits\tHitRatio\tSelectiveHits\tSelectiveHitRatio\tCytotoxAssaysTested\tCytotoxHits\tMinAC50\tCytotoxMedian\tCytotoxMin\n"
  sall <- s
  if(is.na(target.gene)) cat(file=file,s,append=F)
  if(to.file) {
    fname <- "../plots/chemical_hit_dist.pdf"
    if(!is.na(target.gene)) fname <- paste("../plots/chemical_hit_dist_",target.gene,".pdf",sep="")
    pdf(file=fname,width=7,height=10,pointsize=12,bg="white",paper="letter",pagecentre=T)
  }
  par(mfrow=c(3,2),mar=c(4,4,2,2))
  
  target.assay.list <- NA
  if(!is.na(target.gene)) target.assay.list <- ASSAY.INFO[is.element(ASSAY.INFO[,"intended_target"],target.gene),"assay"]
  cytotox.assay.set <- ASSAY.INFO[is.element(ASSAY.INFO[,"biological_process"],c("cytotoxicity SRB","cytotoxicity BLA","proliferation decrease")),"assay"]
  CYTOTOX.ASSAYS <<- cytotox.assay.set
  
  mask <- vector(mode="integer",length=dim(MAT.logAC50)[2])
  mask[] <- 1
  mask[is.element(names(MAT.logAC50),cytotox.assay.set)] <- 0
  istart <- 1
  
  for(i in istart:nchem) {
    counter <- i
    code <- CHEMS[i,"CODE"]
    casrn <- CHEMS[i,"CASRN"]
    cname <- CHEMS[i,"Name"]
    target <- CHEMS[i,"target_gene"]
    ccat <- CHEMS[i,"structure_category"]
    ucat <- CHEMS[i,"use_category"]
    uscat <- CHEMS[i,"use_super_category"]
    
    cytotox.median <- as.numeric(as.character(CYTOTOX[code,"cytotox_median_um"]))
    cytotox.mad <- as.numeric(as.character(CYTOTOX[code,"global_mad"]))
    cytotox.min <- as.numeric(as.character(CYTOTOX[code,"cytotox_lower_bound_um"]))
    if(is.na(cytotox.median)) {
      cytotox.median <- 1000
      cytotox.min <- 100
    }
    if(!is.na(target.gene)) {
      target.temp <- MAT.logAC50[code,target.assay.list]
      target.temp[is.na(target.temp)] <- 0
      if(sum(target.temp)==0) doit <- F
    }
    tempA <- MAT.AC50[code,mask==1]
    hit.assays <- names(tempA)[!is.na(tempA)]
    tempA <- tempA[!is.na(tempA)]
    tempA[tempA<1E-3] <- 1E-3
    ntry <- length(tempA)
    hit.assays <- hit.assays[tempA<1000000]
    hit.genes <- sort(uniquify(ASSAY.INFO[is.element(ASSAY.INFO[,"assay"],hit.assays),"intended_target"]))
    nhit.gene <- length(hit.genes)
    tempA <- tempA[tempA<1000000]
    nhit <- length(tempA)
    hit.ratio <- nhit/ntry
    
    temp.selective <- tempA[tempA<cytotox.min]
    nhit.selective <- length(temp.selective)
    hit.ratio.selective <- nhit.selective / ntry
    hit.gene.ratio <- nhit.gene/ntry
    tempA <- sort(tempA)
    minAC50 <- min(tempA)
    cytotox.ntry <- NA
    cytotox.nhit <- NA
    cytotox.ratio <- NA
    
    cytotox.tempA <- MAT.AC50[code,cytotox.assay.set]
    cytotox.tempA <- cytotox.tempA[!is.na(cytotox.tempA)]
    cytotox.ntry <- length(cytotox.tempA)
    
    
    if(nhit>5) {
      tempA.log <- log10(tempA)-6
      xmin <- 0
      xmax <- 10
      xminA <- 1e-4
      xmaxA <- 1000
      if(minAC50<1E-4) {
        xmax <- 14
        xminA <- 1E-6
      }
      breaksA.min <- 1e-3
      nbreaks <- 80
      if(minAC50<breaksA.min) {
        breaksA.min <- breaksA.min/100
      }
      breaksA <- breaksA.min
      for(i in 1:65) breaksA <- c(breaksA,1.2*breaksA[length(breaksA)])
      while(max(breaksA)<=max(tempA)) {
        breaksA <- c(breaksA,1.2*breaksA[length(breaksA)])
        #cat("Added another point to breaksA",max(breaksA),"\n")
      }
      print(code)
      xA <- hist(tempA,breaks=breaksA,plot=F)
      ymax <- 1.5*max(xA$counts)
      if(ymax<5) ymax <- 5
      hist.log(breaksA,xA$counts,ylim=c(0,ymax),xlab="AC50 (uM)",ylab="Hits",main=paste(casrn,":",cname),cytotox.median, cytotox.min, cytotox.max)
      eps <- 0.08
      xpmin <- breaksA.min
      text(xpmin,ymax*(1-1*eps),paste("ntry=",ntry,sep=""),pos=4)
      text(xpmin,ymax*(1-2*eps),paste("nhit=",nhit,sep=""),pos=4)
      text(xpmin,ymax*(1-3*eps),paste("nhit (Z>3)=",nhit.selective,sep=""),pos=4)
      text(xpmin,ymax*(1-4*eps),paste("cytotox median=",format(cytotox.median,digits=2),sep=""),pos=4)
      text(xpmin,ymax*(1-5*eps),paste("cytotox min=",format(cytotox.min,digits=2),sep=""),pos=4)
      tempA.cytotox <- MAT.AC50[code,cytotox.assay.set]
      tempA.cytotox <- tempA.cytotox[!is.na(tempA.cytotox)]
      tempA.cytotox <- tempA.cytotox[tempA.cytotox<1000000]
      cytotox.nhit <- length(tempA.cytotox)
      text(xpmin,ymax*(1-6*eps),paste("cytotox try=",cytotox.ntry,sep=""),pos=4)
      text(xpmin,ymax*(1-7*eps),paste("cytotox hit=",cytotox.nhit,sep=""),pos=4)
      if(length(tempA.cytotox)>0) {
        for(j in 1:length(tempA.cytotox)) {
          ac50 <- tempA.cytotox[j]
          points(ac50,ymax*0.6,pch="*",col="red",cex=2)
        }
      }
      
      s <- paste(code,"\t",casrn,"\t",cname,"\t",target,"\t",ccat,"\t",ucat,"\t",uscat,"\t",ntry,"\t",nhit,"\t",format(hit.ratio,digits=2),"\t",nhit.selective,"\t",format(hit.ratio.selective,digits=2),"\t",cytotox.ntry,"\t",cytotox.nhit,"\t",format(minAC50,digits=3),"\t",format(cytotox.median,digits=2),"\t",format(cytotox.min,digits=2),"\n",sep="")
      cat(counter,":",s)
      if(is.na(target.gene))  cat(file=file,s,append=T)
      flush.console()
      
      if(!is.na(target.gene)) {
        agset <- ASSAY.INFO[is.element(ASSAY.INFO[,"intended_target"],target.gene),"assay"]
        for(g in 1:length(agset)) {
          assay.g <- agset[g]
          ac50 <- MAT.AC50[code,assay.g]
          asource <- ASSAY.INFO[is.element(ASSAY.INFO[,"assay"],agset[g]),"source_group"]
          pch <- 24
          color <- "white"
          if(asource=="NVS_NR") color <- "black"
          if(asource=="OT") color <- "green"
          if(asource=="Tox21_BLA_Agonist") color <- "gray"
          if(asource=="Tox21_BLA_Antagonist") color <- "gray"
          if(asource=="Tox21_LUC_Agonist") color <- "gray"
          if(asource=="Tox21_LUC_Antagonist") color <- "gray"
          if(asource=="ATG_CIS") color <- "violet"
          if(asource=="ATG_TRANS") color <- "violet"
          if(asource=="ACEA") color <- "red"
          if(asource=="BSK_up") color <- "orange"
          if(asource=="BSK_down") color <- "orange"
          if(asource=="Apr_up") color <- "yellow"
          if(asource=="Apr_dn") color <- "yellow"
          yval <- ymax*0.5*(1+0.2*rnorm(1,0.1))
          points(ac50,yval,pch=pch,bg=color,fg="black",cex=2)
        }
      }
      if(!to.file) browser()
    }
  }
  
  if(to.file) graphics.off()
  else browser()
}
