#!/usr/bin/perl
#
# Filename   : moves2smkEF.pl
# Author     : Catherine Seppanen, UNC
# Version    : 1.10
# Description: Generate SMOKE input emission factor lookup tables from MOVES2014 MySQL tables.
#            : Version 1.0 of this script was based on moves2smk_EF_v0.38.pl for processing
#            : MOVES2010b MySQL tables.
# Updates    : Version 1.1 - added support for rate-per-hour processing
#            : Version 1.2 - added support for SCC aggregation
#            : Version 1.3 - made formula processing less strict regarding existing emission factors and missing pollutants to work with SCC aggregation
#            : Version 1.4 - fix column name handling for CAS numbers as output pollutant names
#            : Version 1.5 - added support for process-specific output pollutants
#            : Version 1.6 - added support for rate-per-start processing
#            : Version 1.6.1 - improve formula processing time
#            : Version 1.7 - added support for applying NOx humidity corrections
#            : Version 1.8 - added support for rate-per-hour-oni processing
#            : Version 1.8.1 - apply NOx humidity corrections in RPHO mode
#            : Version 1.9 - added flag for indicating if NOx humidity adjustments were applied in MOVES
#            : Version 1.10 - added checks for scenario ID
#
# Usage: moves2smkEF.pl [-u <mysql user>] [-p <mysql password>]
#                            [-r RPD|RPV|RPP|RPH|RPS|RPHO]
#                            [--formulas <PollutantFormulasFile>] 
#                            [--fuel_agg <FuelTypeMappingFile>] 
#                            [--src_agg <SourceTypeMappingFile>] 
#                            [--road_agg <RoadTypeMappingFile>] 
#                            [--proc_agg <ProcessTypeMappingFile>] 
#                            [--moves_adjusted_nox Y|N]
#                            [--adjust_nox]
#                            [--pressures <CountyBarometricPressureFile>]
#                            <InputDBList> <PollutantMappingFile> [<OutputPath>]
# where
#   mysql user - MySQL user with table creation and write privileges in the MOVES databases
#   mysql password - password for the MySQL user (if needed)
#   RPD|RPP|RPV|RPH|RPS|RPHO - optional type of emission factors to process (rate-per-distance, rate-per-vehicle, rate-per-profile, rate-per-hour, rate-per-start, rate-per-hour-oni); if not specified, script will process all five types
#   PollutantFormulasFile - list of formulas used to calculate additional emission factors
#   FuelTypeMappingFile - list of MOVES fuel type IDs and corresponding aggregated fuel type ID
#   SourceTypeMappingFile - list of MOVES source type IDs and corresponding aggregated source type ID
#   RoadTypeMappingFile - list of MOVES road type IDs and corresponding aggregated road type ID
#   ProcessTypeMappingFile - list of MOVES process type IDs and corresponding aggregated process type ID
#   moves_adjusted_nox - flag to indicate if NOx humidity adjustments were applied in MOVES
#   adjust_nox - flag to indicate if NOx humidity corrections should be applied
#   CountyBarometricPressureFile - list of barometric pressure values for each county (required if using adjust_nox)
#   InputDBList - list of MySQL database names to process (generated by runspec_generator.pl MOVES preprocessor)
#   PollutantMappingFile - list of MOVES pollutant IDs and corresponding pollutant name; only listed pollutants will be included in output EF tables
#   OutputPath - optional; overrides the output path specified in InputDBList

use strict;
use warnings 'FATAL' => 'all';
use DBI;
use Getopt::Long;

# turn debug off, deletes all temporary tables
my $debug = 0;

#================================================================================================
# Process command line arguments

my $sqlUser = '';
my $sqlPass = '';
my $runType = '';
my $formulaFile = '';
our $moves_adjusted_nox = '';
our $adjust_nox = 0;
my ($fuelAggFile, $srcAggFile, $roadAggFile, $procAggFile, $pressureFile) = '';
GetOptions('user|u:s' => \$sqlUser, 
           'pass|p:s' => \$sqlPass, 
           'runtype|r:s' => \$runType, 
           'formulas=s' => \$formulaFile,
           'fuel_agg=s' => \$fuelAggFile, 
           'src_agg=s' => \$srcAggFile, 
           'road_agg=s' => \$roadAggFile, 
           'proc_agg=s' => \$procAggFile,
           'moves_adjusted_nox=s' => \$moves_adjusted_nox,
           'adjust_nox' => \$adjust_nox,
           'pressures=s' => \$pressureFile);

if ($runType && $runType ne 'RPD' && $runType ne 'RPV' && $runType ne 'RPP' && $runType ne 'RPH' && $runType ne 'RPS' && $runType ne 'RPHO')
{
  die "Please specify a valid type after '-r': RPD, RPV, RPP, RPH, RPS, or RPHO. To run all types do not use '-r' argument.\n";
}

(scalar(@ARGV) >= 2) or die <<END;
Usage: $0 [-u <mysql user>] [-p <mysql password>] [-r RPD|RPV|RPP|RPH|RPS|RPHO]
  [--formulas <PollutantFormulasFile>] 
  [--fuel_agg <FuelTypeMappingFile>] 
  [--src_agg <SourceTypeMappingFile>] 
  [--road_agg <RoadTypeMappingFile>] 
  [--proc_agg <ProcessTypeMappingFile>] 
  [--moves_adjusted_nox Y|N]
  [--adjust_nox]
  [--pressures <CountyBarometricPressureFile>]
  <InputDBList> <PollutantMappingFile> [<OutputPath>]
END

if ($moves_adjusted_nox && $moves_adjusted_nox ne 'Y' && $moves_adjusted_nox ne 'N')
{
  die "Please specify either Y or N for --moves_adjusted_nox.\n";
}

if ($moves_adjusted_nox eq 'Y' && $adjust_nox)
{
  die "--adjust_nox cannot be used when the NOx humidity adjustments have already been applied by MOVES (--moves_adjusted_nox is set to Y).\n";
}

if (!$moves_adjusted_nox && $adjust_nox)
{
  print "WARNING: Automatically setting --moves_adjusted_nox to N since --adjust_nox was specified.\n";
  $moves_adjusted_nox = 'N';
}

my ($dbFile, $pollutantFile, $outDir) = @ARGV;

#================================================================================================
# Read the input database list file generated from the MOVES Driver Script preprocessor 

my $dbFH;
open($dbFH, "<", $dbFile) or die "Unable to open input file of database names: $dbFile\n";

my $line = <$dbFH>;
chomp($line);
if ($line =~ /^\s*debug\s*$/i)
{
  $debug = 1;
  $line = <$dbFH>;
  chomp($line);
}
my $hostname = $line;

$line = <$dbFH>;
chomp($line);
$outDir = $line unless $outDir;
die "Output path was not specified in the input\n" unless $outDir;
$outDir =~ tr|\\|/|; # convert slashes
$outDir .= '/' unless $outDir =~ m|/$|; # append slash

my @dbList;
while ($line = <$dbFH>)
{
  chomp($line);
  next unless $line; # skip blank lines
  push(@dbList, $line);
}

close ($dbFH);

#================================================================================================
# Read file mapping MOVES pollutant IDs to output names

my $pollFH;
open($pollFH, "<", $pollutantFile) or die "Unable to open pollutant mapping file: $pollutantFile\n";

our %keptPollMap;   # map of MOVES IDs to pollutant names
our %keptPollNames; # list of valid pollutant names
our %keptPollProcMap; # map of MOVES IDs to list of process-specific output pollutants

while (my $line = <$pollFH>)
{
  chomp($line);

  # example lines:
  #   1,"Total Gaseous Hydrocarbons","THC_INV","mass"
  #   88,"NonHAPTOG","EXH_NHTOG","mass","1 2 15 16"
  # notes:
  #   MOVES2014 pollutant ID (column 1) must be integer, not in quotes
  #   MOVES2014 and SMOKE pollutant names (columns 2 and 3) must be in quotes
  #   list of MOVES2014 process IDs (column 5) is optional, must be in quotes
  my ($pollID, $pollName, $procList) = ($line =~ /^(\d+),"[^"]+","([^"]+)",[^,]+,?(?:"([^"]+)")?$/);
  next unless $pollID && $pollName; # skip lines without data

  if ($procList && scalar split(' ', $procList) > 0)
  {
    push(@{$keptPollProcMap{$pollID}}, {'name' => $pollName, 'list' => [split(' ', $procList)]});
  }
  else
  {
    $keptPollMap{$pollID} = $pollName;
  }
  $keptPollNames{$pollName} = 1;
}

close ($pollFH);

#================================================================================================
# Read pollutant formulas file

our @formulas;

if ($formulaFile)
{
  my $formFH;
  open($formFH, "<", $formulaFile) or die "Unable to open pollutant formulas file: $formulaFile\n";
  
  while (my $line = <$formFH>)
  {
    chomp($line);
    $line =~ s/\s+//g; # remove whitespace
    next unless $line; # skip blank lines
  
    # example formulas:
    #   POC = 0.47 * PM25TIRE_INV
    #   PMFINE = PM25_INV - PEC - POC - PNO3 - PSO4
    # general format:
    #   [outputName] = [term 1] + [term 2] + [term 3]
    #   where each term is [factor (optional)] * [inputName]
    # factors will be stored as negative values if term should be subtracted
    
    my @terms; # list of terms
    my ($factor, $name) = (1, undef); # new term with default factor of 1
  
    # split formula on equals sign into output and input
    my ($output, $input) = split('=', $line, 2);
    
    # split input on plus and minus signs; capture operation character
    for my $piece (split(/([\+\-])/, $input))
    {
      if ($piece eq '+')
      {
        # create new term with default factor of 1
        ($factor, $name) = (1, undef);
      }
      elsif ($piece eq '-')
      {
        # create new term with default factor of -1
        ($factor, $name) = (-1, undef);
      }
      elsif (length($piece))
      {
        # split term into factor and pollutant name on multiplication symbol
        for my $component (split(/\*/, $piece))
        {
          # match numeric factor
          if ($component =~ /^(?:\d+|\d+\.\d*|\.\d+)$/)
          {
            $factor *= $component;
          }
          elsif (length($component))
          {
            # make sure pollutant is in list of kept pollutants
            die "Unknown pollutant \"$component\" in formula \"$line\"\n" unless $keptPollNames{$component};
            
            # check for multiple pollutants in same formula term
            die "Invalid formula \"$line\"\n" if defined $name;
  
            $name = $component;
          }
        }
        # add term to list of terms
        push(@terms, {'factor' => $factor, 'inputName' => $name});
      }
    }
    # add formula to list of formulas
    push(@formulas, {'outputName' => $output, 'terms' => \@terms});
  }
  
  close ($formFH);
}

#================================================================================================
# Process SCC aggregation files

my $scc_sql = 'SCC';

if ($fuelAggFile || $srcAggFile || $roadAggFile || $procAggFile)
{
  my @sql_pieces = map { BuildAggregationSQL(@$_) } 
                       (['fuel', $fuelAggFile, 3], 
                        ['source', $srcAggFile, 5], 
                        ['road', $roadAggFile, 7], 
                        ['process', $procAggFile, 9]);
  
  $scc_sql = "CONCAT('22', " . join(', ', @sql_pieces) . ')';
}

#================================================================================================
# Read county barometric pressure file

our %countyPressureMap; # map of county IDs to barometric pressure value

if ($adjust_nox)
{
  die "County barometric pressure file is required to apply NOx adjustments\n" unless $pressureFile;
  
  my $presFH;
  open($presFH, "<", $pressureFile) or die "Unable to open county barometric pressure file: $pressureFile\n";

  while (my $line = <$presFH>)
  {
    chomp($line);
    
    # cdbname,countyID,stateID,countyName,altitude,GPAFract,barometricPressure,barometricPressureCV
    my @pieces = split(',', $line);
    my $countyID = $pieces[1];
    my $pressure = $pieces[6];
    
    $countyPressureMap{$countyID} = $pressure;
  }
  
  close ($presFH);
}

#================================================================================================
# Remove existing list files

for my $typeInfo (['RPD', 'distance'], ['RPV', 'vehicle'], ['RPP', 'profile'], ['RPH', 'hour'], ['RPS', 'start'], ['RPHO', 'houroni'])
{
  my ($thisType, $thisName) = @$typeInfo;
  if (!$runType || $runType eq $thisType)
  {
    my $listfile = "${outDir}mrclist.rateper${thisName}_smoke.lst";
    unlink $listfile or die "Couldn't remove existing list file $listfile: $!\n" if (-e $listfile);
  }
}

#================================================================================================
# Loop through list of input databases

our $dbh;
for my $db (@dbList)
{
  # make connection to database
  my $connectionInfo = "dbi:mysql:$db;$hostname";

  #$dbh = DBI->connect($connectionInfo, $sqlUser, $sqlPass) or die "Could not connect to database: $db\n";
  $dbh = DBI->connect($connectionInfo, 'ec2-user', $sqlPass) or die "Could not connect to database: $db\n";
  
  printf "\n%s Processing database %s...\n\n", scalar(localtime()), $db;

  # remove temporary tables if they exist
  my $sth = $dbh->prepare(<<END);
DROP TABLE IF EXISTS rateperdistance_smoke,
                     rateperdistance_smoke_adj,
                     ratepervehicle_smoke,
                     rateperprofile_smoke,
                     rateperhour_smoke,
                     rateperhour_smoke_adj,
                     rateperstart_smoke,
                     rateperhouroni_smoke,
                     rateperhouroni_smoke_adj
END
  $sth->execute() or die 'Error executing query: ' . $sth->errstr;
  
  printf "  - Completed drop tables at %s\n", scalar(localtime());

  #================================================================================================
  # Process rate per distance factors

  if (!$runType || $runType eq 'RPD')
  {
    printf "\n  Starting rate per distance processing...\n";

    # build where clause to select hours with unique temperatures
    my $whereClause = BuildRPDWhereClause();
    
    printf "  - Creating rateperdistance_smoke at %s\n", scalar(localtime());
    
    # create table with columns for each pollutant
    my $sql = <<END;
  CREATE TABLE rateperdistance_smoke
               (id INT PRIMARY KEY AUTO_INCREMENT)
  SELECT MOVESScenarioID, 
         yearID, 
         monthID,
         IF(LENGTH(linkID) = 9, 
            SUBSTR(linkID, 1, 5), 
            SUBSTR(linkID, 1, 4)) AS FIPS,
         $scc_sql AS agg_scc, 
         avgSpeedBinID, 
         temperature, 
         relHumidity
END

    my $pollQuery = BuildPollutantQuery('rateperdistance', 'ratePerDistance');
    $sql .= ", $pollQuery" if $pollQuery;

    $sql .= <<END;
    FROM rateperdistance
   WHERE $whereClause
     AND $scc_sql IS NOT NULL
     AND roadTypeID != 1
     AND MOVESScenarioID LIKE 'rd_%'
GROUP BY MOVESScenarioID, yearID, monthID,
         FIPS, agg_scc, avgSpeedBinID, temperature, relHumidity
ORDER BY temperature ASC, 
         agg_scc ASC, 
         avgSpeedBinID ASC
END
    $sth = $dbh->prepare($sql);
    $sth->execute() or die 'Error executing query: ' . $sth->errstr;
    
    printf "  - Completed rateperdistance_smoke at %s\n", scalar(localtime());

    # build list of columns for output file header and pollutants in table
    my ($headerListRef, $pollsInTableRef) = BuildHeaderList('rateperdistance_smoke');
    
    ProcessFormulas('rateperdistance_smoke', $pollsInTableRef, $headerListRef);

    # generate output files for each reference county and fuel month
    GenerateOutput('rateperdistance_smoke', $db, $outDir, $headerListRef);
    
    if ($adjust_nox)
    {
      # create copy of processed table to apply humidity adjustments
      $sth = $dbh->prepare(<<END);
CREATE TABLE rateperdistance_smoke_adj
(SELECT * FROM rateperdistance_smoke)
END
      $sth->execute() or die 'Error executing query: ' . $sth->errstr;
      ApplyHumidityAdjustment('rateperdistance_smoke_adj', $pollsInTableRef);
      GenerateOutput('rateperdistance_smoke_adj', $db, $outDir, $headerListRef);
    }
  }

  #================================================================================================
  # Process rate per vehicle factors

  if (!$runType || $runType eq 'RPV')
  {
    printf "\n  Starting rate per vehicle processing...\n";

    # create table with columns for each pollutant
    my $sql = <<END;
  CREATE TABLE ratepervehicle_smoke
               (id INT PRIMARY KEY AUTO_INCREMENT)
  SELECT MOVESScenarioID, 
         yearID, 
         monthID, 
         dayID, 
         hourID, 
         IF(LENGTH(zoneID) = 6,
            SUBSTR(zoneID, 1, 5),
            SUBSTR(zoneID, 1, 4)) AS FIPS,
         $scc_sql AS agg_scc, 
         temperature
END

    my $pollQuery = BuildPollutantQuery('ratepervehicle', 'ratePerVehicle');
    $sql .= ", $pollQuery" if $pollQuery;

    $sql .= <<END;
    FROM ratepervehicle
   WHERE $scc_sql IS NOT NULL
     AND MOVESScenarioID LIKE 'rv_%'
GROUP BY MOVESScenarioID, yearID, monthID, dayID, hourID, 
         FIPS, agg_scc, temperature 
ORDER BY temperature ASC, 
         dayID ASC, 
         agg_scc ASC, 
         hourID ASC
END
    $sth = $dbh->prepare($sql);
    $sth->execute() or die 'Error executing query: ' . $sth->errstr;

    printf "  - Completed ratepervehicle_smoke at %s\n", scalar(localtime());

    # build list of columns for output file header and pollutants in table
    my ($headerListRef, $pollsInTableRef) = BuildHeaderList('ratepervehicle_smoke');
    
    ProcessFormulas('ratepervehicle_smoke', $pollsInTableRef, $headerListRef);

    # generate output files for each reference county and fuel month
    GenerateOutput('ratepervehicle_smoke', $db, $outDir, $headerListRef);
  }

  #================================================================================================
  # Process rate per profile factors

  if (!$runType || $runType eq 'RPP')
  {
    printf "\n  Starting rate per profile processing...\n";

    # create table with columns for each pollutant
    my $sql = <<END;
  CREATE TABLE rateperprofile_smoke
               (id INT PRIMARY KEY AUTO_INCREMENT)
  SELECT MOVESScenarioID, 
         yearID, 
         IF(LENGTH(temperatureProfileID) = 10,
            SUBSTR(temperatureProfileID, 7, 2),
            SUBSTR(temperatureProfileID, 6, 2)) AS monthID, 
         dayID, 
         hourID, 
         IF(LENGTH(temperatureProfileID) = 10,
            SUBSTR(temperatureProfileID, 1, 5),
            SUBSTR(temperatureProfileID, 1, 4)) AS FIPS, 
         $scc_sql AS agg_scc,
         temperature
END

    my $pollQuery = BuildPollutantQuery('rateperprofile', 'ratePerVehicle');
    $sql .= ", $pollQuery" if $pollQuery;

    $sql .= <<END;
    FROM rateperprofile 
   WHERE $scc_sql IS NOT NULL
     AND MOVESScenarioID LIKE 'rp_%'
GROUP BY MOVESScenarioID, yearID, monthID, dayID, hourID, 
         FIPS, agg_scc, temperature 
ORDER BY MOVESScenarioID ASC, 
         dayID ASC, 
         agg_scc ASC, 
         hourID, 
         temperature
END
    $sth = $dbh->prepare($sql);
    $sth->execute() or die 'Error executing query: ' . $sth->errstr;

    printf "  - Completed rateperprofile_smoke at %s\n", scalar(localtime());

    # build list of columns for output file header and pollutants in table
    my ($headerListRef, $pollsInTableRef) = BuildHeaderList('rateperprofile_smoke');
    
    ProcessFormulas('rateperprofile_smoke', $pollsInTableRef, $headerListRef);

    # generate output files for each reference county and fuel month
    GenerateOutput('rateperprofile_smoke', $db, $outDir, $headerListRef);
  }

  #================================================================================================
  # Process rate per hour factors

  if (!$runType || $runType eq 'RPH')
  {
    printf "\n  Starting rate per hour processing...\n";

    # create table with columns for each pollutant
    my $sql = <<END;
  CREATE TABLE rateperhour_smoke
               (id INT PRIMARY KEY AUTO_INCREMENT)
  SELECT MOVESScenarioID, 
         yearID, 
         monthID,
         IF(LENGTH(linkID) = 9, 
            SUBSTR(linkID, 1, 5), 
            SUBSTR(linkID, 1, 4)) AS FIPS,
         $scc_sql AS agg_scc,
         temperature,
         relHumidity
END

    my $pollQuery = BuildPollutantQuery('rateperhour', 'ratePerHour');
    $sql .= ", $pollQuery" if $pollQuery;

    $sql .= <<END;
    FROM rateperhour
   WHERE hourID = 1
     AND dayID = 2 
     AND $scc_sql IS NOT NULL
     AND MOVESScenarioID LIKE 'rv_%'
GROUP BY MOVESScenarioID, yearID, monthID,
         FIPS, agg_scc, temperature, relHumidity
ORDER BY temperature ASC, 
         agg_scc ASC
END
    $sth = $dbh->prepare($sql);
    $sth->execute() or die 'Error executing query: ' . $sth->errstr;

    printf "  - Completed rateperhour_smoke at %s\n", scalar(localtime());

    # build list of columns for output file header and pollutants in table
    my ($headerListRef, $pollsInTableRef) = BuildHeaderList('rateperhour_smoke', 1);
    
    ProcessFormulas('rateperhour_smoke', $pollsInTableRef, $headerListRef);

    # generate output files for each reference county and fuel month
    GenerateOutput('rateperhour_smoke', $db, $outDir, $headerListRef);
    
    if ($adjust_nox)
    {
      # create copy of processed table to apply humidity adjustments
    $sth = $dbh->prepare(<<END);
CREATE TABLE rateperhour_smoke_adj
(SELECT * FROM rateperhour_smoke)
END
      $sth->execute() or die 'Error executing query: ' . $sth->errstr;
      ApplyHumidityAdjustment('rateperhour_smoke_adj', $pollsInTableRef);
      GenerateOutput('rateperhour_smoke_adj', $db, $outDir, $headerListRef);
    }
  }

  #================================================================================================
  # Process rate per start factors

  if (!$runType || $runType eq 'RPS')
  {
    printf "\n  Starting rate per start processing...\n";

    # create table with columns for each pollutant
    my $sql = <<END;
  CREATE TABLE rateperstart_smoke
               (id INT PRIMARY KEY AUTO_INCREMENT)
  SELECT MOVESScenarioID,
         yearID,
         monthID,
         dayID,
         hourID,
         IF(LENGTH(zoneID) = 6,
            SUBSTR(zoneID, 1, 5),
            SUBSTR(zoneID, 1, 4)) AS FIPS,
         $scc_sql AS agg_scc,
         temperature
END

    my $pollQuery = BuildPollutantQuery('rateperstart', 'ratePerStart');
    $sql .= ", $pollQuery" if $pollQuery;

    $sql .= <<END;
    FROM rateperstart
   WHERE $scc_sql IS NOT NULL
     AND MOVESScenarioID LIKE 'rv_%'
GROUP BY MOVESScenarioID, yearID, monthID, dayID, hourID,
         FIPS, agg_scc, temperature
ORDER BY temperature ASC,
         dayID ASC,
         agg_scc ASC,
         hourID ASC
END
    $sth = $dbh->prepare($sql);
    $sth->execute() or die 'Error executing query: ' . $sth->errstr;

    printf "  - Completed rateperstart_smoke at %s\n", scalar(localtime());

    # build list of columns for output file header and pollutants in table
    my ($headerListRef, $pollsInTableRef) = BuildHeaderList('rateperstart_smoke');
    
    ProcessFormulas('rateperstart_smoke', $pollsInTableRef, $headerListRef);

    # generate output files for each reference county and fuel month
    GenerateOutput('rateperstart_smoke', $db, $outDir, $headerListRef);
  }

  #================================================================================================
  # Process rate per hour off-network idle factors
  
  if (!$runType || $runType eq 'RPHO')
  {
    printf "\n  Starting rate per hour off-network idle processing...\n";

    # build where clause to select hours with unique temperatures
    my $whereClause = BuildRPDWhereClause();
    
    printf "  - Creating rateperhouroni_smoke at %s\n", scalar(localtime());
    
    # create table with columns for each pollutant
    my $sql = <<END;
  CREATE TABLE rateperhouroni_smoke
               (id INT PRIMARY KEY AUTO_INCREMENT)
  SELECT MOVESScenarioID,
         yearID,
         monthID,
         IF(LENGTH(linkID) = 9,
            SUBSTR(linkID, 1, 5),
            SUBSTR(linkID, 1, 4)) AS FIPS,
         $scc_sql AS agg_scc,
         temperature,
         relHumidity
END

    my $pollQuery = BuildPollutantQuery('rateperdistance', 'ratePerDistance');
    $sql .= ", $pollQuery" if $pollQuery;

    $sql .= <<END;
    FROM rateperdistance
   WHERE $scc_sql IS NOT NULL
     AND roadTypeID = 1
     AND MOVESScenarioID LIKE 'rd_%'
GROUP BY MOVESScenarioID, yearID, monthID,
         FIPS, agg_scc, temperature, relHumidity
ORDER BY temperature ASC,
         agg_scc ASC
END
    $sth = $dbh->prepare($sql);
    $sth->execute() or die 'Error executing query: ' . $sth->errstr;
    
    printf "  - Completed rateperhouroni_smoke at %s\n", scalar(localtime());

    # build list of columns for output file header and pollutants in table
    my ($headerListRef, $pollsInTableRef) = BuildHeaderList('rateperhouroni_smoke', 1);
    
    ProcessFormulas('rateperhouroni_smoke', $pollsInTableRef, $headerListRef);

    # generate output files for each reference county and fuel month
    GenerateOutput('rateperhouroni_smoke', $db, $outDir, $headerListRef);
    
    if ($adjust_nox)
    {
      # create copy of processed table to apply humidity adjustments
    $sth = $dbh->prepare(<<END);
CREATE TABLE rateperhouroni_smoke_adj
(SELECT * FROM rateperhouroni_smoke)
END
      $sth->execute() or die 'Error executing query: ' . $sth->errstr;
      ApplyHumidityAdjustment('rateperhouroni_smoke_adj', $pollsInTableRef);
      GenerateOutput('rateperhouroni_smoke_adj', $db, $outDir, $headerListRef);
    }
  }

  #================================================================================================
  # Clean up temporary tables

  if (!$debug)
  {
    $sth = $dbh->prepare(<<END);
DROP TABLE IF EXISTS rateperdistance_smoke,
                     rateperdistance_smoke_adj,
                     ratepervehicle_smoke,
                     rateperprofile_smoke,
                     rateperhour_smoke,
                     rateperhour_smoke_adj,
                     rateperstart_smoke,
                     rateperhouroni_smoke,
                     rateperhouroni_smoke_adj
END
    $sth->execute() or die 'Error executing query: ' . $sth->errstr;
  }

}

#================================================================================================
# Subroutines

# Build SQL to map input SCCs to output SCCs
sub BuildAggregationSQL
{
  my ($aggType, $aggFile, $substrPos) = @_;
  
  unless ($aggFile)
  {
    return "SUBSTR(SCC, $substrPos, 2)";
  }
  
  my $aggFH;
  open($aggFH, "<", $aggFile) or die "Unable to open $aggType aggregation file: $aggFile\n";
  
  my $sql = "CASE SUBSTR(SCC, $substrPos, 2) ";
  while (my $line = <$aggFH>)
  {
    chomp($line);
  
    my ($inputID, $outputID) = ($line =~ /^(\d\d?),(\d\d?),/);
    next unless $inputID && $outputID; # skip lines without data
    
    $inputID = '0' . $inputID if length($inputID) == 1;
    $outputID = '0' . $outputID if length($outputID) == 1;
    
    $sql .= "WHEN '$inputID' THEN '$outputID' ";
  }
  
  $sql .= 'ELSE NULL END';
  
  close ($aggFH);
  
  return $sql;
}

# Generate where clause to select hours with unique temperatures for rate-per-distance processing
sub BuildRPDWhereClause
{
  our ($dbh);
  
  # the rate per distance table stores factors for different temperatures in different hours
  # if more than 24 temperatures are used, then multiple MOVES runs will be made
  my $sth = $dbh->prepare(<<END);
  SELECT DISTINCT hourID, temperature, monthID
    FROM rateperdistance
ORDER BY monthID, temperature, hourID
END
  $sth->execute() or die 'Error executing query: ' . $sth->errstr;
    
  my $lastMonth = 0;
  my $monthPos = -1;
  my @monthStats;
  while (my ($hour, $temp, $month) = $sth->fetchrow_array())
  {
    if ($month != $lastMonth) {
      $monthPos++;
      $monthStats[$monthPos]{'month'} = $month;
      $monthStats[$monthPos]{'recNo'} = 0;
      $monthStats[$monthPos]{'lastTemp'} = -999.;
      $monthStats[$monthPos]{'maxHr'} = 1;
      $lastMonth = $month;
    }
    $monthStats[$monthPos]{'recNo'}++;
    if ($temp != $monthStats[$monthPos]{'lastTemp'}) 
    {
      $monthStats[$monthPos]{'lastTemp'} = $temp;
      $monthStats[$monthPos]{'maxHr'} = $hour;
    }
  }

  my $whereClause = " ( 0";
  for my $monthStat (@monthStats)
  {
    $whereClause .= " OR ( monthID = " . $monthStat->{'month'};
    if ($monthStat->{'recNo'} == 24)
    {
      $whereClause .= " AND hourID <= " . $monthStat->{'maxHr'};
    }
    else
    {
      $whereClause .= " AND temperature < " . $monthStat->{'lastTemp'} .
                      " OR ( temperature = " . $monthStat->{'lastTemp'} . " AND hourID = " . $monthStat->{'maxHr'} . " )";
    }
    $whereClause .= " )";
  }
  $whereClause .= " )";
  
  return $whereClause;
}

# Generate SQL to translate MOVES pollutant IDs to kept pollutant names
sub BuildPollutantQuery
{
  our ($dbh);
  my ($tableName, $columnName) = @_;
  
  my $sth = $dbh->prepare(<<END);
SELECT DISTINCT pollutantID
  FROM $tableName
 ORDER BY pollutantID
END
  $sth->execute() or die 'Error executing query: ' . $sth->errstr;
  
  my @output;
  while (my ($pollID) = $sth->fetchrow_array())
  {
    if (exists $keptPollMap{$pollID})
    {
      my $pollName = $keptPollMap{$pollID};
      push(@output, <<END);
SUM(IF(PollutantID = $pollID, $columnName, NULL)) AS `$pollName`
END
    }
    if (exists $keptPollProcMap{$pollID})
    {
      for my $outputList (@{$keptPollProcMap{$pollID}})
      {
        my $pollName = $outputList->{'name'};
        my $procList = join(',', map { sprintf("'%02d'", $_) } @{$outputList->{'list'}});
        push(@output, <<END);
SUM(IF(PollutantID = $pollID AND SUBSTR(SCC, 9, 2) IN ($procList), $columnName, NULL)) AS `$pollName`
END
      }
    }
  }
  
  return join(',', @output);
}

# Build list of columns for output file header and list of pollutants in table
sub BuildHeaderList
{
  our ($dbh, %keptPollNames);
  my ($tableName, $ignoreHumidityColumn) = @_;

  my (@headerList, %pollsInTable);

  my $sth = $dbh->prepare(<<END);
 SHOW COLUMNS 
 FROM $tableName
WHERE Field != 'id'
END
  $sth->execute() or die 'Error executing query: ' . $sth->errstr;

  while (my @cols = $sth->fetchrow_array())
  {
    next if $cols[0] eq 'relHumidity' && $ignoreHumidityColumn;
    push(@headerList, $cols[0]);
    $pollsInTable{$cols[0]} = 1 if $keptPollNames{$cols[0]};
  }
  
  return (\@headerList, \%pollsInTable);
}

# Determine which formulas apply to table, add columns for new pollutants,
# and calculate formula outputs
sub ProcessFormulas
{
  our ($dbh, @formulas);
  my ($tableName, $pollsInTableRef, $headerListRef) = @_;

  return unless scalar(@formulas);
  
  my @validFormulas; # list of indexes into global formulas array
  my @newPolls;      # list of new pollutants created by formulas

  for my $index (0..(scalar(@formulas) - 1))
  {
    # loop through formula terms and check if all are applicable
    my $missing = 0;
    for my $termRef (@{$formulas[$index]{'terms'}})
    {
      my $inputPollName = $termRef->{'inputName'};
      # check if input pollutant is present in table; if not, formula isn't valid
      unless (exists $pollsInTableRef->{$inputPollName})
      {
        $missing = 1;
        last;
      }
    }
    
    unless ($missing)
    {
      push(@validFormulas, $index);
      my $outputPollName = $formulas[$index]{'outputName'};
      unless ($pollsInTableRef->{$outputPollName})
      {
        # add output pollutant to list of new pollutants to create
        push(@newPolls, $outputPollName);
      }
    }
  }
  
  if (scalar(@validFormulas))
  {
    # add columns for any new pollutants
    for my $outputPollName (sort @newPolls)
    {
      my $sth = $dbh->prepare(<<END);
ALTER TABLE $tableName
 ADD COLUMN (`$outputPollName` double)
END
      $sth->execute() or die 'Error executing query: ' . $sth->errstr;
      
      # add column to header list
      push(@$headerListRef, $outputPollName);
    }
    
    for my $index (@validFormulas)
    {
      my $formulaRef = $formulas[$index];
      my $outputPollName = $formulaRef->{'outputName'};
      
      my $sql = <<END;
UPDATE $tableName
   SET `$outputPollName` = IFNULL(`$outputPollName`, 0)
END
      
      for my $termRef (@{$formulaRef->{'terms'}})
      {
        my $inputPollName = $termRef->{'inputName'};
        my $factor = $termRef->{'factor'};
        $sql .= " + $factor * IFNULL(`$inputPollName`, 0)";
      }
      
      my $sth = $dbh->prepare($sql);
      $sth->execute() or die 'Error executing query: ' . $sth->errstr;
    }
  }
  
  printf "  - Completed formula processing at %s\n", scalar(localtime());
}

# Generate output files for each reference county and fuel month
sub GenerateOutput
{
  our ($dbh, $adjust_nox, $moves_adjusted_nox);
  my ($tableName, $dbName, $outDir, $headerListRef) = @_;
  
  my $listfile = "${outDir}mrclist.$tableName.lst";
  my $listFH;
  open($listFH, ">>", $listfile) or die "Unable to open output list file: $listfile\n";

  my $loop_sth = $dbh->prepare(<<END);
  SELECT DISTINCT FIPS, monthID 
    FROM $tableName
ORDER BY FIPS, monthID
END
  $loop_sth->execute() or die 'Error executing query: ' . $loop_sth->errstr;
  
  while (my ($fips, $month) = $loop_sth->fetchrow_array())
  {
    # open the output file
    my $outfile = "${tableName}_${dbName}_${fips}_$month.csv";
    my $outdirfile = $outDir . $outfile;
    unlink($outdirfile);
    
    my $outFH;
    open($outFH, ">", $outdirfile) or die "Unable to open output file: $outdirfile\n";

    # generate the NUM_TEMP_BIN header
    my $colName = ($tableName eq 'rateperprofile_smoke' ? 'MOVESScenarioID' : 'temperature');
    my $sth = $dbh->prepare(<<END);
SELECT COUNT(DISTINCT($colName)) 
  FROM $tableName 
  WHERE monthID = ?
END
    $sth->bind_param(1, $month);
    $sth->execute() or die 'Error executing query: ' . $sth->errstr;

    while (my ($count) = $sth->fetchrow_array())
    {
      print $outFH "NUM_TEMP_BIN $count\n";
    }
    
    if ($adjust_nox)
    {
      my $adjusted = $tableName =~ /_adj$/ ? 'Y' : 'N';
      print $outFH "HUMIDITY_ADJUSTED_NOX $adjusted\n";
    }
    elsif ($moves_adjusted_nox)
    {
      print $outFH "HUMIDITY_ADJUSTED_NOX $moves_adjusted_nox\n";
    }

    # generate the header line
    print $outFH join(',', @{$headerListRef}) . "\n";

    # output data
    my $colList = join(',', map {qq|`$_`|} @{$headerListRef});
    $sth = $dbh->prepare(<<END);
  SELECT $colList
    FROM $tableName 
   WHERE FIPS = ?
     AND monthID = ?
ORDER BY id
END
    $sth->bind_param(1, $fips);
    $sth->bind_param(2, $month);
    $sth->execute() or die 'Error executing query: ' . $sth->errstr;
         
    while (my @data = $sth->fetchrow_array())
    {
      print $outFH join(',', map { defined($_) ? $_ : 0 } @data) . "\n";
    }

    close($outFH);

    print $listFH "$fips $month $outfile\n";
  }

  close($listFH);
  
  printf "  - Completed $tableName export at %s\n", scalar(localtime());
}

# Apply humidity adjustments to NOx emission factors
sub ApplyHumidityAdjustment
{
  our ($dbh, @formulas, %countyPressureMap);
  my ($tableName, $pollsInTableRef) = @_;
  
  # determine which humidity correction coefficient to use based on fuel type
  my $coefficient_sql = <<END1;
CASE
WHEN agg_scc LIKE '2201%' OR agg_scc LIKE '2205%' THEN 0.0038 -- gasoline
WHEN agg_scc LIKE '2202%' THEN 0.0026 -- diesel
ELSE 0
END
END1

  # determine which pollutants need updates
  my @updates;
  my @validFormulas;
  for my $pollID (3, 32, 33, 34)
  {
    if (exists $keptPollMap{$pollID})
    {
      my $pollName = $keptPollMap{$pollID};
      push(@updates, <<END);
`$pollName` = `$pollName` * (1-((hratio-75)*$coefficient_sql))
END

      # check if this pollutant is used in any formulas
      for my $index (0..(scalar(@formulas) - 1))
      {
        my $uses_pollutant = 0;
        my $valid = 1;
        for my $termRef (@{$formulas[$index]{'terms'}})
        {
          my $inputPollName = $termRef->{'inputName'};
          if ($inputPollName eq $pollName)
          {
            $uses_pollutant = 1;
          }
          # make sure all terms in the formula apply
          unless (exists $pollsInTableRef->{$inputPollName})
          {
            $valid = 0;
            last;
          }
        }
        
        if ($uses_pollutant && $valid)
        {
          # skip any formulas that update an existing pollutant since these can't be recalculated
          my $outputPollName = $formulas[$index]{'outputName'};
          if ($pollsInTableRef->{$outputPollName})
          {
            print "WARNING: Skipping formula for pollutant $outputPollName during NOx adjustment.\n";
          }
          else
          {
            push(@validFormulas, $index);
          }
        }
      }
    }
  }
  return unless scalar(@updates);
  
  # create table with humidity calculations for each temperature/humidity combination
  my $sth = $dbh->prepare(<<END);
CREATE TEMPORARY TABLE humidity_calcs
                       (FIPS VARCHAR(5),
                        temperature FLOAT,
                        relHumidity FLOAT,
                        tk FLOAT,
                        t0 FLOAT,
                        pv FLOAT,
                        hratio FLOAT)
END
  $sth->execute() or die 'Error executing query: ' . $sth->errstr;
  
  $sth = $dbh->prepare(<<END);
INSERT INTO humidity_calcs (FIPS, temperature, relHumidity)
SELECT DISTINCT FIPS, temperature, relHumidity
  FROM $tableName
END
  $sth->execute() or die 'Error executing query: ' . $sth->errstr;
  
  $sth = $dbh->prepare(<<END);
UPDATE humidity_calcs
   SET tk = 0.56*(temperature-32)+273,
       t0 = 374.27-0.56*(temperature-32)
END
  $sth->execute() or die 'Error executing query: ' . $sth->errstr;
  
  $sth = $dbh->prepare(<<END);
UPDATE humidity_calcs
   SET pv = (relHumidity/100)*6527.557*POW(10,(-t0/tk)*((3.2437+0.00588*t0+0.000000011702*POW(t0,3))/(1+0.00219*t0)))
END
  $sth->execute() or die 'Error executing query: ' . $sth->errstr;
  
  # use county-specific barometric pressure to calculate humidity ratio
  my $loop_sth = $dbh->prepare(<<END);
SELECT DISTINCT FIPS
  FROM humidity_calcs
END
  $loop_sth->execute() or die 'Error executing query: ' . $loop_sth->errstr;
  
  while (my ($fips) = $loop_sth->fetchrow_array())
  {
    unless (exists $countyPressureMap{$fips})
    {
      print "WARNING: County $fips not found in barometric pressures file. NOx emission factors won't be adjusted.\n";
      next;
    }
    
    $sth = $dbh->prepare(<<END);
UPDATE humidity_calcs
   SET hratio = LEAST(GREATEST(4347.8*pv/(?-pv),21),124)
END
    $sth->bind_param(1, $countyPressureMap{$fips});
    $sth->execute() or die 'Error executing query: ' . $sth->errstr;
  }
  
  # update base emission factors
  my $sql = <<END;
UPDATE $tableName
  JOIN humidity_calcs
    ON $tableName.FIPS = humidity_calcs.FIPS
   AND $tableName.temperature = humidity_calcs.temperature
   AND $tableName.relHumidity = humidity_calcs.relHumidity
   AND humidity_calcs.hratio IS NOT NULL
   SET 
END

  $sql .= join(',', @updates);
  $sth = $dbh->prepare($sql);
  $sth->execute() or die 'Error executing query: ' . $sth->errstr;
  
  # update emission factors calculated from formulas
  for my $index (@validFormulas)
  {
    my $formulaRef = $formulas[$index];
    my $outputPollName = $formulaRef->{'outputName'};
    
    my $sql = <<END;
UPDATE $tableName
   SET `$outputPollName` = 0
END
    
    for my $termRef (@{$formulaRef->{'terms'}})
    {
      my $inputPollName = $termRef->{'inputName'};
      my $factor = $termRef->{'factor'};
      $sql .= " + $factor * IFNULL(`$inputPollName`, 0)";
    }
    
    my $sth = $dbh->prepare($sql);
    $sth->execute() or die 'Error executing query: ' . $sth->errstr;
  }
  
  printf "  - Completed NOx adjustments at %s\n", scalar(localtime());
}