<!---
  **Parameters set in preceding CFM files:

  RIGHT_NOW - date of run time
  RUN_TS - string of runtime formatted as "yyyy-mm-dd_HHMMSS"

  DATAFILENAME = "ecotox_explore_data_" & RUN_TS & ".csv" - file name for the csv data source
  RSCRIPTFILENAME = "ecotox_explore_data_" & RUN_TS & ".r" - r script file name

  APPLIEDFILTERSSTR - explore filters used to generate the dataset, formatted to fit this script
  ABBREVIATEDFILTERSSTR - explore filters summary, truncated for brevity
  YAXISLINEVAL - value for the y-axis line as generated by the Explore module, "NA" if not set

  X_VARIABLES - struct for all allowed x categories
  Y_VARIABLES - struct for all allowed y categories
  Z_VARIABLES - struct for all allowed z categories

  x_cat - selected x category
  y_cat - selected y category
  z_cat - selected z category

  PLOTTABLECOUNT - number of records that are able to be plotted
  TOTALCOUNT - total number of records in the dataset
  ROWCOUNT - number of rows in datatable, if more than 3000, only first 3000 are included

  **Column names from data source:
    "Test_Num"
    "CAS_Num"
    "Chemical_Name"
    "Species_Group"
    "Species_Name"
    "Common_Name"
    "Class"
    "Order"
    "Family"
    "Genus"
    "Effect"
    "Measurement"
    "Endpoint"
    "Duration_Std"
    "Conc_Type"
    "Conc_Mean_Std"
    "Conc_Units_Std"
    "Pub_Year"
    "Ecoref_Num"
    "Citation"
--->

<cfoutput>
<cfset y_datacol = Y_VARIABLES[y_cat]["datacol"]>
<cfset y_description = Y_VARIABLES[y_cat]["name"] & "\n(" & Y_VARIABLES[y_cat]["unit"] & ")">
<cfif y_datacol EQ "Duration_Std">
  <cfset y_description = Y_VARIABLES[y_cat]["name"] & " (" & Y_VARIABLES[y_cat]["unit"] & ")">
</cfif>

<cfset x_datacol = X_VARIABLES[x_cat]["datacol"]>
<cfset x_description = X_VARIABLES[x_cat]["name"]>
<cfset z_datacol = Z_VARIABLES[z_cat]["datacol"]>
<cfset z_description = Z_VARIABLES[z_cat]["name"]>	

##########################################################################################################################################################################
#### 
#### Purpose: Provide option to use exported data file from ECOTOX Knowledgebase Explore module 
####   to create customizable, high-quality figures comparable to display on Plot View.
####   Intended for users with basic knowledge of R, with layout/colors that can be customized.
####
#### File: #RSCRIPTFILENAME#, created on #RIGHT_NOW#
#### 
#### Selections from ECOTOX Explore Plot View:
####   Y-axis: "#Y_VARIABLES[y_cat]["name"]#"
####   X-axis: "#x_description#"
####   Legend Category: "#z_description#"
####   Display Line: #YAXISLINEVAL# #Y_VARIABLES[y_cat]["unit"]#
#### 
#### Applied Filters (abbreviated - for full list, see end of script):
#ABBREVIATEDFILTERSSTR#
####
#### #PLOTTABLECOUNT# Plottable Records -- #TOTALCOUNT# Total Records<cfif ROWCOUNT GTE 3000> (downloaded table and plot include first 3,000 records)</cfif>
####
#### NOTES: R script and data table file (.csv) must be unzipped into common directory/folder.
####   Data records are included if they can be converted to
####   Standardized Concentration Units (unit ratio equivalent to ppm)
####   with display and export limited to a maximum of 3,000 records. 
####
##########################################################################################################################################################################

#### Packages and working directory ---- 

## Install packages (if not already installed) and read in libraries

if (!require('this.path')) install.packages('this.path')
if (!require('ggplot2')) install.packages('ggplot2'); library('ggplot2')
if (!require('dplyr')) install.packages('dplyr'); library('dplyr')

## Set working directory to be the source file location (for this script)
setwd(this.path::here())

#### Data file (.csv) from ECOTOX Explore ----
dat.name <- "#DATAFILENAME#"
      ## default data file name, change if saved data export to different name
dat <- read.csv(dat.name) ## read in .csv data file

#### Select fields used in the plot - filled in based on selections in Explore ----

## Categories with: [1] Field name; [2] Custom Label - Full list of options included at end of script

y.cat <- c("#y_datacol#", "#y_description#")
x.cat <- c("#x_datacol#", "#x_description#")
l.cat <- c("#z_datacol#", "#z_description#")

#### Data manipulation ----

## Make ** x.cat ** AND ** l.cat ** as factors with specified order based on concentration
dat <- arrange(dat, Conc_Mean_Std)

dat[[x.cat[1]]] <- factor(dat[[x.cat[1]]], levels=unique(dat[[x.cat[1]]]))
dat[[l.cat[1]]] <- factor(dat[[l.cat[1]]], levels=unique(dat[[l.cat[1]]]))

## Make Effect groups as factors with specified order (different than concentration)
effect.groups <- c("Biochemistry", "Enzyme(s)", "Hormone(s)", "Genetics", "Cell(s)", "Histology", "Accumulation",  
                   "Immunological", "Intoxication", "Physiology", "Injury", "Development", "Growth", "Morphology", 
                   "Behavior", "Avoidance", "Feeding behavior", "Reproduction", "Mortality", "Population",
                   "Ecosystem process", "Multiple", "Unspecified")

dat$Effect <- factor(dat$Effect , levels=effect.groups)


#### GRAPH preferences ----

## Colors (6 colors)
col6 <- c("##D8D97AFF", "##95C36EFF", "##74C8C3FF", "##5A97C1FF", "##295384FF", "##0A2E57FF")
## col6 <- c("##D72000FF", "##381A61FF",  "##1BB6AFFF","##CCBFFFFF", "##FFAD0AFF", "black") ##alt colors

## For color plots: 6 colors * 5 shapes => 30 unique shape/color combos
## repeating to have list of 600 for each
fill.values1 <- c(rep(col6, 100))
shp.values1 <- c(rep(c(15,16,17,18,20),120))

## For greyscale (b&w) plots:
## 5 shapes with dark grey fill (21:25) + 20 with just black (0:20, minus 16) + 5 shapes with light grey fill (21:25)
## => 30 unique shape points repeating to have list of 600 for each
fill.values2 <- c(rep(c(rep("grey45", times = 5), rep("black", times  = 20), rep("grey75", times = 5)),20)) 
shp.values2 <- c(rep(c(25:17,15:0,25:21), 20)) ##skip 16 because it is too similar to 19

## Create custom theme for plots

ecotox_theme <- function() {
  theme(
    ## modify text, axis)
    axis.title.x = element_text(size = 18),
    axis.title.y = element_text(size = 18),
    axis.text.y = element_text(size = 16),
    axis.text.x = element_text(size = 16, angle = 45, vjust = 1, hjust = 1),
    panel.grid.minor.y = element_blank(),
    panel.grid.major.x = element_blank(), ##exclude this line if prefer major lines for x
    plot.title = element_text(size=18),
    legend.title = element_text(size = 14),
    legend.text = element_text(size = 12)
  )
}

#### Plots similar to ECOTOX EXPLORE ----
##   X-axis (categorical), Y-axis (continuous), Legend Category (Categorical)

#### Simple ggplot - color ----
p.a <- ggplot(dat, aes (x = .data[[x.cat[1]]], 
                        y = .data[[y.cat[1]]],
                        fill = .data[[l.cat[1]]],
                        shape = .data[[l.cat[1]]],  
                        color = .data[[l.cat[1]]])
) +
  scale_shape_manual(values=shp.values1)+ ## 5 levels for shapes
  scale_color_manual(values=fill.values1)+  ## 6 levels for colors => 30 unique shape/color combos
  geom_point(aes(),
             position = position_jitter(width = 0.15), 
             stat = 'identity', 
             size = 3,
             alpha = 0.65) +
  xlab(paste("\n", x.cat[2], sep="")) + 
  ylab(paste("\n", y.cat[2], sep="")) +
  labs(title = paste(y.cat[2],"\nx ", x.cat[2], ",\ncategorized by ", l.cat[2], sep="")) + 
  labs(color = l.cat[1], fill = l.cat[1], shape = l.cat[1]) +
  guides(fill = guide_legend(title = l.cat[2], ncol = 1), 
         col= guide_legend(title = l.cat[2], ncol = 1), 
         shape = guide_legend(title = l.cat[2], ncol = 1))+
  theme_bw() +
  ecotox_theme()


## if statements to adjust plots based on data

## 1) have Conc on a log scale and Duration (Days) on standard scale
    if(y.cat[1]=="Conc_Mean_Std") { 
      p.a <- p.a + scale_y_log10(breaks = 10**(-10:10),
              labels = scales::label_number(10**(-10:10))) ##default is conc. value
              ##labels = function(x) format(x, scientific = TRUE)) ##option for sci. notation
    }


## 2) change the size of x-axis and legend text when >25 categories,
## and if >50 categories (x-axis) or >30 (legend) remove tick labels and add text with ## unique
    n.x.cat <- n_distinct(dat[[x.cat[1]]])
    n.l.cat <- n_distinct(dat[[l.cat[1]]])
    
    if(n.x.cat>25) { 
      p.a <- p.a + theme(axis.text.x = element_text(size = 8, angle = 45, vjust = 1, hjust = 1))}
    
    if(n.l.cat>25) { 
      p.a <- p.a + theme(legend.text = element_text(size = 8))}

    if(n.x.cat>50) { 
        p.a <- p.a + xlab(paste("\n", x.cat[2], " (", n.x.cat, " unique)", sep="")) + 
        theme(axis.text.x = element_blank())}
    
    if(n.l.cat>30) { 
      p.a <- p.a + labs(title = paste(y.cat[2],"\nx ", x.cat[2], ", \ncategorized by ", l.cat[2], 
        " (", n.l.cat, " unique)", sep="")) +
        theme(legend.position = "none")}


## 3) add horizontal reference line based on "Display Concentration/Duration Line" entered in Explore,
##    if no entry in Explore it will be NA
    
    disp.l <- #YAXISLINEVAL#

    if(is.na(disp.l)==FALSE) {
      p.a <- p.a + geom_hline(yintercept = disp.l, ##concentration/duration of interest 
                  color = "blue", linetype = "dashed", linewidth = 1) 
      ## Add text to reference line - add if desired
      ##p.a <- p.a + annotate(geom="label", x = n.x.cat - n.x.cat/10, 
      ##           y = disp.l, ##y-axis location for text, add offset if needed
      ##           label = "Conc. of interest", ##customize annotation as needed
      ##           label.size=NA, fill="##FFFFFFD9", ##white fill with 85% opaque
      ##           color = "blue", size = 4)
    }

p.a


#### Simple ggplot - greyscale ----
p.b <- ggplot(dat, aes (x = .data[[x.cat[1]]], 
                        y = .data[[y.cat[1]]],
                        shape = .data[[l.cat[1]]],
                        fill = .data[[l.cat[1]]])
              ) +
  scale_shape_manual(values=shp.values2)+ ## 26 levels for shapes
  scale_fill_manual(values=fill.values2)+ ## 5 grey fill + 21 black only =>26 unique shape points
  geom_point(aes(),
             color = "black",
             position = position_jitter(width = 0.15), 
             stat = 'identity', 
             size = 3,
             alpha = 0.65) +  
  xlab(paste("\n", x.cat[2], sep="")) + 
  ylab(paste("\n", y.cat[2], sep="")) +
  labs(title = paste(y.cat[2],"\nx ", x.cat[2], ",\ncategorized by ", l.cat[2], sep="")) +
  labs(color = l.cat[1], fill = l.cat[1], shape = l.cat[1]) +
  guides(fill = guide_legend(title = l.cat[2], ncol = 1), 
         col= guide_legend(title = l.cat[2], ncol = 1), 
         shape = guide_legend(title = l.cat[2], ncol = 1)) +
  theme_bw() +
  ecotox_theme()

## if statements to adjust plots based on data

## 1) have Conc on a log scale and Duration (Days) on standard scale
    if(y.cat[1]=="Conc_Mean_Std") { 
      p.b <- p.b + scale_y_log10(breaks = 10**(-10:10), labels = scales::label_number(10**(-10:10)))}

## 2) Change the size of x-axis and legend text when >25 categories,
## and if >50 categories (x-axis) or >30 (legend) remove tick labels and add text with ## unique
    n.x.cat <- n_distinct(dat[[x.cat[1]]])
    n.l.cat <- n_distinct(dat[[l.cat[1]]])
    
    if(n.x.cat>25) { 
    p.b <- p.b + theme(axis.text.x = element_text(size = 8, angle = 45, vjust = 1, hjust = 1))}
    
    if(n.l.cat>25) { 
      p.b <- p.b + theme(legend.text = element_text(size = 8))}
    
    if(n.x.cat>50) { 
      p.b <- p.b + xlab(paste("\n", x.cat[2], " (", n.x.cat, " unique)", sep="")) + 
        theme(axis.text.x = element_blank())}
    
    if(n.l.cat>30) { 
      p.b <- p.b + labs(title = paste(y.cat[2],"\nx ", x.cat[2], ",\ncategorized by ", l.cat[2],
        " (", n.l.cat, " unique)", sep=""))+
        theme(legend.position = "none")}

## 3) add horizontal reference line based on Display Concentration/Duration Line entered in Explore,
##    if no entry in Explore it will be NA
 
    if(is.na(disp.l)==FALSE) { 
      p.b <- p.b + geom_hline(yintercept = disp.l, ##concentration/duration of interest 
                              color = "black", linetype = "dashed", linewidth = 1) 
      ## Add text to reference line  - add if desired
      ##p.b <- p.b + annotate(geom="label", x = n.x.cat - n.x.cat/10, 
      ##              y = disp.l, ##y-axis location for text, add offset if needed  
      ##              label = "Conc. of interest", ##customize annotation as needed
      ##              label.size=NA, fill="##FFFFFFD9", ##white fill with 85% opaque
      ##              color = "black", size = 4)
      }

p.b


#### EXPORT plots as .jpeg ----

st <- format(Sys.time(), "%Y-%m-%d_%H.%M")

p.a.name <- paste("ECOTOXfigcolor", st, ".jpeg", sep="") ##customize jpeg file name here for color figure
p.b.name <- paste("ECOTOXfig", st, ".jpeg", sep="") ##customize jpeg file name here for grey scale figure

ggsave(file=p.a.name, plot=p.a, width=13, height= 9, units="in", dpi=300)
ggsave(file=p.b.name, plot=p.b, width=13, height= 9, units="in", dpi=300)


##########################################################################################################################################################################
#### END
##########################################################################################################################################################################

#### List of options for fields - Categories with: [1] Field name; [2] Custom Label
##
## For y.cat:
##       c("Conc_Mean_Std", "Concentration Mean (Std) \n(Unit ratio equivalent to ppm)")
##       c("Duration_Std", "Duration (Std) (days)")
##
## For x.cat AND l.cat: 
##       c("Species_Group", "Species Group")
##       c("Class", "Class")
##       c("Order", "Order")
##       c("Family", "Family")
##       c("Genus", "Genus")
##       c("Species_Name", "Species")
##       c("Chemical_Name", "Chemical")
##       c("Effect", "Effect Group")
##       c("Measurement", "Effect Measurement")
##       c("Endpoint", "Endpoint")
##       c("Ecoref_Num", "Reference")
##       c("Test_Num", "Test Number")
########

#### List of filters selected in Explore prior to export
##
#APPLIEDFILTERSSTR#
########

</cfoutput>