#!/bin/csh -f
# Create day specific emissions for CEM sources as sets of day
# specific files using a single base-year annual input SMOKE-formatted (IDA)
# dataset.
#
# You must have write permissions for the directory that you use this program in.
#  James Beidler <beidler.james@epa.gov> Revised: 10/10/06
#  Set to work for future year files using the new ORL format

# Define mysql configuration
setenv mysql_user dayspecific 
setenv mysql_pass non_cem_daily 
setenv mysql_db_name cem_daily

# Define output directory -- must be writable by user mysql
setenv ida_out emis_out
setenv file_prefix 2002cc

# The path to a temporary directory writable by user mysql
setenv sql_tmp tmp

# Define input files
setenv orl_annual emis_2002/ptinv_ptipm_cap2002v2_02apr2007_v4_orl.txt
setenv orl_annual_head 15   # How many lines of header to ignore in orl_annual file

setenv state_HEAT_month emis_2002/heat_month_2001-2003.csv
setenv state_NOX_month emis_2002/nox_month_2001-2003.csv
setenv state_SO2_month emis_2002/so2_month_2001-2003.csv

setenv county_fips emis_2002/Counties-with_time_zones.txt

setenv cemscan_out emis_2002/cemsum.2002.txt
setenv cemscan_head 1   # How many lines of header to ignore in cemscan file

setenv state_HEAT_daily emis_2002/heat_date_2002.csv
setenv state_NOX_daily emis_2002/nox_date_2002.csv
setenv state_SO2_daily emis_2002/so2_date_2002.csv

### End of user configuration

# Convert cemscan to CSV 
awk '{printf substr($0,1,6)",";printf substr($0,7,7)",";printf substr($0,14,13)",";printf substr($0,27,13)","; \
printf substr($0,40,13)",";printf substr($0,53,13)",";printf substr($0,66,13)",";printf substr($0,79,13)"\n"}' ${cemscan_out}  > cemscan.tmp 

echo "Creating tables..."

mysql -u ${mysql_user} -p${mysql_pass} --local-infile -t <<STOP
-- Create mysql database 
DROP DATABASE IF EXISTS ${mysql_db_name} ;
CREATE DATABASE ${mysql_db_name} ;
-- Create the tables
USE ${mysql_db_name} ;
CREATE TABLE cem_tmp (region varchar(5), plantid varchar(16), pointid varchar(16), stackid varchar(13), segment varchar(10), plant varchar(40), scc varchar(11), sic int, cas varchar(8), ann_emis real(12,6), orisid varchar(7), blrid varchar(7));
CREATE TABLE state_HEAT_month (fips varchar(2), state varchar(30), month int, frac real(12,6), index(month, fips)) ;
CREATE TABLE state_NOX_month (fips varchar(2), state varchar(30), month int, frac real(12,6), index(month, fips)) ;
CREATE TABLE state_SO2_month (fips varchar(2), state varchar(30), month int, frac real(12,6), index(month, fips)) ;
CREATE TABLE county_fips (record int, state varchar(10), county varchar(40), tz varchar(8), nstz char, countypop int, fips varchar(5)) ;
CREATE TABLE cemscan (oris varchar(7), boiler varchar(7), NOX float, SO2 float, optime float, gload float, sload float, htinput float) ;
CREATE TABLE state_HEAT_daily (fips varchar(2), state varchar(10), dateandtime varchar(20), emis float, index(emis(4)), month int, index(month, fips)) ;
CREATE TABLE state_NOX_daily (fips varchar(2), state varchar(10), dateandtime varchar(20), emis float, index(emis(4)), month int, index(month, fips)) ;
CREATE TABLE state_SO2_daily (fips varchar(2), state varchar(10), dateandtime varchar(20), emis float, index(emis(4)), month int, index(month, fips)) ;
-- Import the tables
LOAD DATA LOCAL INFILE '${orl_annual}' INTO TABLE cem_tmp FIELDS TERMINATED BY ',' OPTIONALLY ENCLOSED BY '"' IGNORE ${orl_annual_head} LINES (region, plantid, pointid, stackid, segment, plant, scc, @dummy8, @dummy9, @dummy10, @dummy11, @dummy12, @dummy13, @dummy14, sic, @dummy16, @dummy17, @dummy18, @dummy19, @dummy20, @dummy21, cas, ann_emis, @dummy24, @dummy25, @dummy26, @dummy27, @dummy28, @dummy29, orisid, blrid, @dummy32, @dummy33, @dummy34, @dummy35, @dummy36, @dummy37, @dummy38, @dummy39) ;
LOAD DATA LOCAL INFILE '${state_HEAT_month}' INTO TABLE state_HEAT_month FIELDS TERMINATED BY ',' OPTIONALLY ENCLOSED BY '"' ;
LOAD DATA LOCAL INFILE '${state_NOX_month}' INTO TABLE state_NOX_month FIELDS TERMINATED BY ',' OPTIONALLY ENCLOSED BY '"' ;
LOAD DATA LOCAL INFILE '${state_SO2_month}' INTO TABLE state_SO2_month FIELDS TERMINATED BY ',' OPTIONALLY ENCLOSED BY '"' ;
LOAD DATA LOCAL INFILE '${county_fips}' INTO TABLE county_fips FIELDS TERMINATED BY ',' OPTIONALLY ENCLOSED BY '"' ;
LOAD DATA LOCAL INFILE 'cemscan.tmp' INTO TABLE cemscan FIELDS TERMINATED BY ',' IGNORE ${cemscan_head} LINES ; 
LOAD DATA LOCAL INFILE '${state_HEAT_daily}' INTO TABLE state_HEAT_daily FIELDS TERMINATED BY ',' OPTIONALLY ENCLOSED BY '"' ;
LOAD DATA LOCAL INFILE '${state_NOX_daily}' INTO TABLE state_NOX_daily FIELDS TERMINATED BY ',' OPTIONALLY ENCLOSED BY '"';
LOAD DATA LOCAL INFILE '${state_SO2_daily}' INTO TABLE state_SO2_daily FIELDS TERMINATED BY ',' OPTIONALLY ENCLOSED BY '"' ;
-- Create tables for individual species and merge - do this because of the new ORL format
CREATE TABLE CO SELECT region, plantid, pointid, stackid, segment, plant, scc, sic, orisid, blrid, ann_emis CO FROM cem_tmp WHERE cas = "CO" ;
CREATE TABLE VOC SELECT region, plantid, pointid, stackid, segment, plant, scc, sic, orisid, blrid, ann_emis VOC FROM cem_tmp WHERE cas = "VOC" ;
CREATE TABLE NOX SELECT region, plantid, pointid, stackid, segment, plant, scc, sic, orisid, blrid, ann_emis NOX FROM cem_tmp WHERE cas = "NOX" ;
CREATE TABLE NH3 SELECT region, plantid, pointid, stackid, segment, plant, scc, sic, orisid, blrid, ann_emis NH3 FROM cem_tmp WHERE cas = "NH3" ;
CREATE TABLE PM10 SELECT region, plantid, pointid, stackid, segment, plant, scc, sic, orisid, blrid, ann_emis PM10 FROM cem_tmp WHERE cas = "PM10" ;
CREATE TABLE PM2_5 SELECT region, plantid, pointid, stackid, segment, plant, scc, sic, orisid, blrid, ann_emis PM2_5 FROM cem_tmp WHERE cas = "PM2_5" ;
CREATE TABLE SO2 SELECT region, plantid, pointid, stackid, segment, plant, scc, sic, orisid, blrid, ann_emis SO2 FROM cem_tmp WHERE cas = "SO2" ; 
\q
STOP

echo "Performing MySQL functions..."
# Match records from annual file to CEM data (orl_annual and cemscan_out)
# This is determined by whether or not the ORIS/Boiler ID fields in orl_annual 
# match the same fields in cemscan_out.
foreach species (CO NOX VOC SO2 NH3 PM2_5 PM10) 
mysql -u ${mysql_user} -p${mysql_pass} -t <<STOP
-- Step 2: Perform mysql calculations and functions
USE ${mysql_db_name} ;
CREATE TABLE match_${species} SELECT ${species}.* FROM ${species}, cemscan WHERE trim(${species}.orisid) = trim(cemscan.oris) AND trim(${species}.blrid) = trim(cemscan.boiler) ; 
\q
STOP
end

mysql -u ${mysql_user} -p${mysql_pass} -t <<STOP
USE ${mysql_db_name} ;
-- Step 3: Compute monthly emissions for all pollutants from sources matching the CEM data 
-- Create monthly emissions
CREATE TABLE NOX_month SELECT state_NOX_month.month, state_NOX_month.fips, match_NOX.region, match_NOX.plantid, match_NOX.pointid, match_NOX.stackid, match_NOX.segment, match_NOX.scc, match_NOX.sic, (match_NOX.NOX * state_NOX_month.frac) NOX FROM match_NOX, state_NOX_month WHERE (match_NOX.region LIKE CONCAT(IF(CHAR_LENGTH(state_NOX_month.fips) = 1, '0', ''), state_NOX_month.fips, '___')) ORDER BY state_NOX_month.month ;
CREATE TABLE SO2_month SELECT state_SO2_month.month, state_SO2_month.fips, match_SO2.region, match_SO2.plantid, match_SO2.pointid, match_SO2.stackid, match_SO2.segment, match_SO2.scc, match_SO2.sic, (match_SO2.SO2 * state_SO2_month.frac) SO2 FROM match_SO2, state_SO2_month WHERE (match_SO2.region LIKE CONCAT(IF(CHAR_LENGTH(state_SO2_month.fips) = 1, '0', ''), state_SO2_month.fips, '___')) ORDER BY state_SO2_month.month ;
ALTER TABLE NOX_month ADD INDEX(fips, month) ; 
ALTER TABLE SO2_month ADD INDEX(fips, month) ;
-- Create state-month totals
CREATE TABLE NOX_month_daily SELECT fips, state, sum(emis) NOX, month FROM state_NOX_daily GROUP BY fips, month;
CREATE TABLE SO2_month_daily SELECT fips, state, sum(emis) SO2, month FROM state_SO2_daily GROUP BY fips, month;
CREATE TABLE HEAT_month_daily SELECT fips, state, sum(emis) HEAT, month FROM state_HEAT_daily GROUP BY fips, month;
ALTER TABLE NOX_month_daily ADD INDEX(fips, month) ;
ALTER TABLE SO2_month_daily ADD INDEX(fips, month) ;
ALTER TABLE HEAT_month_daily ADD INDEX(fips, month) ;
-- Create days in month table
CREATE TABLE daysinmonth SELECT month, count(dateandtime) days FROM state_NOX_daily WHERE fips = 1 GROUP BY month;
\q
STOP

# Loop for HEAT data only
foreach species (CO VOC NH3 PM10 PM2_5)
mysql -u ${mysql_user} -p${mysql_pass} -t <<STOP
USE ${mysql_db_name} ;
--- Continue step 3 monthly emissions with HEAT data
CREATE TABLE ${species}_month SELECT state_HEAT_month.month, state_HEAT_month.fips, match_${species}.region, match_${species}.plantid, match_${species}.pointid, match_${species}.stackid, match_${species}.segment, match_${species}.scc, match_${species}.sic, (match_${species}.${species} * state_HEAT_month.frac) ${species} FROM match_${species}, state_HEAT_month WHERE (match_${species}.region LIKE CONCAT(IF(CHAR_LENGTH(state_HEAT_month.fips) = 1, '0', ''), state_HEAT_month.fips, '___')) ORDER BY state_HEAT_month.month ;
ALTER TABLE ${species}_month ADD INDEX(fips, month) ;
\q
STOP
end

echo "Creating day-specific emissions by facility..."

# Loop for HEAT related species
foreach HEAT_emis (CO VOC NH3 PM2_5)
if (-e ${sql_tmp}/${file_prefix}_${HEAT_emis}_cem.csv) then
        rm -f ${sql_tmp}/${file_prefix}_${HEAT_emis}_cem.csv
endif
#Compute day-specific emissions by facility
echo "SELECT ${HEAT_emis}_month.region, ${HEAT_emis}_month.plantid, ${HEAT_emis}_month.pointid, ${HEAT_emis}_month.stackid, ${HEAT_emis}_month.segment, '${HEAT_emis}', state_HEAT_daily.dateandtime, county_fips.tz,\
TRUNCATE(IF(HEAT_month_daily.HEAT >= 0, ${HEAT_emis}_month.${HEAT_emis} * state_HEAT_daily.emis / HEAT_month_daily.HEAT, ${HEAT_emis}_month.${HEAT_emis} / daysinmonth.days), 9) ${HEAT_emis}, ${HEAT_emis}_month.scc\
FROM ${HEAT_emis}_month, HEAT_month_daily, daysinmonth, state_HEAT_daily, county_fips\
WHERE (((IF(HEAT_month_daily.HEAT >= 0, ${HEAT_emis}_month.${HEAT_emis} * state_HEAT_daily.emis / HEAT_month_daily.HEAT, ${HEAT_emis}_month.${HEAT_emis} / daysinmonth.days)) >= 0)) AND ${HEAT_emis}_month.month = HEAT_month_daily.month AND ${HEAT_emis}_month.fips = HEAT_month_daily.fips AND HEAT_month_daily.month = daysinmonth.month AND ${HEAT_emis}_month.month = state_HEAT_daily.month AND ${HEAT_emis}_month.fips = state_HEAT_daily.fips AND county_fips.fips = ${HEAT_emis}_month.region ;" | mysql -u ${mysql_user} -p${mysql_pass} -D ${mysql_db_name} | sed 's/\t/,/g' > ${sql_tmp}/${file_prefix}_${HEAT_emis}_cem.csv
end

# Run for NOX
if (-e ${sql_tmp}/${file_prefix}_NOX_cem.csv) then
        rm -f ${sql_tmp}/${file_prefix}_NOX_cem.csv
endif
#Compute day-specific emissions by facility

echo "SELECT NOX_month.region, NOX_month.plantid, NOX_month.pointid, NOX_month.stackid, NOX_month.segment, 'NOX', state_NOX_daily.dateandtime, county_fips.tz,\
TRUNCATE(IF(NOX_month_daily.NOX >= 0, NOX_month.NOX * state_NOX_daily.emis / NOX_month_daily.NOX, NOX_month.NOX / daysinmonth.days), 9) NOX, NOX_month.scc\
FROM NOX_month, NOX_month_daily, daysinmonth, state_NOX_daily, county_fips \
WHERE (((IF(NOX_month_daily.NOX >= 0, NOX_month.NOX * state_NOX_daily.emis / NOX_month_daily.NOX, NOX_month.NOX / daysinmonth.days)) >= 0)) AND NOX_month.month = NOX_month_daily.month AND NOX_month.month = daysinmonth.month AND NOX_month.month = state_NOX_daily.month AND NOX_month.fips = NOX_month_daily.fips AND NOX_month.fips = state_NOX_daily.fips AND county_fips.fips = NOX_month.region ;" | mysql -u ${mysql_user} -p${mysql_pass} -D ${mysql_db_name} | sed 's/\t/,/g' > ${sql_tmp}/${file_prefix}_NOX_cem.csv

# Run for SO2
if (-e ${sql_tmp}/${file_prefix}_SO2_cem.csv) then
        rm -f ${sql_tmp}/${file_prefix}_SO2_cem.csv
endif
#Compute day-specific emissions by facility
echo "SELECT SO2_month.region, SO2_month.plantid, SO2_month.pointid, SO2_month.stackid, SO2_month.segment, 'SO2', state_SO2_daily.dateandtime, county_fips.tz,\
TRUNCATE(IF(SO2_month_daily.SO2 >= 0, SO2_month.SO2 * state_SO2_daily.emis / SO2_month_daily.SO2, SO2_month.SO2 / daysinmonth.days), 9) SO2, SO2_month.scc\
FROM SO2_month, SO2_month_daily, daysinmonth, state_SO2_daily, county_fips\
WHERE (((IF(SO2_month_daily.SO2 >= 0, SO2_month.SO2 * state_SO2_daily.emis / SO2_month_daily.SO2, SO2_month.SO2 / daysinmonth.days)) >= 0)) AND SO2_month.month = SO2_month_daily.month AND SO2_month.fips = SO2_month_daily.fips AND SO2_month_daily.month = daysinmonth.month AND SO2_month.month = state_SO2_daily.month AND SO2_month.fips = state_SO2_daily.fips AND county_fips.fips = SO2_month.region ;" |  mysql -u ${mysql_user} -p${mysql_pass} -D ${mysql_db_name} | sed 's/\t/,/g' > ${sql_tmp}/${file_prefix}_SO2_cem.csv

# Run for PMC
if (-e ${sql_tmp}/${file_prefix}_PMC_cem.csv) then
        rm -f ${sql_tmp}/${file_prefix}_PMC_cem.csv
endif
#Compute day-specific emissions by facility
echo "CREATE TABLE PMC_month SELECT PM10_month.*, PM2_5_month.PM2_5 FROM PM10_month\
LEFT JOIN PM2_5_month USING (month, fips, region, plantid, pointid, stackid, segment, scc, sic) ;\
SELECT PMC_month.region, PMC_month.plantid, PMC_month.pointid, PMC_month.stackid, PMC_month.segment, 'PMC', state_HEAT_daily.dateandtime, county_fips.tz,\
TRUNCATE(IF(HEAT_month_daily.HEAT >= 0, (PMC_month.PM10 - PMC_month.PM2_5) * state_HEAT_daily.emis / HEAT_month_daily.HEAT, (PMC_month.PM10 - PMC_month.PM2_5) / daysinmonth.days), 9) PMC, PMC_month.scc\
FROM PMC_month, HEAT_month_daily, daysinmonth, state_HEAT_daily, county_fips\
WHERE (((IF(HEAT_month_daily.HEAT >= 0, (PMC_month.PM10 - PMC_month.PM2_5) * state_HEAT_daily.emis / HEAT_month_daily.HEAT, (PMC_month.PM10 - PMC_month.PM2_5) / daysinmonth.days)) >= 0)) AND PMC_month.month = HEAT_month_daily.month AND PMC_month.fips = HEAT_month_daily.fips AND HEAT_month_daily.month = daysinmonth.month AND PMC_month.month = state_HEAT_daily.month AND PMC_month.fips = state_HEAT_daily.fips AND county_fips.fips = PMC_month.region ;" |  mysql -u ${mysql_user} -p${mysql_pass} -D ${mysql_db_name} | sed 's/\t/,/g' > ${sql_tmp}/${file_prefix}_PMC_cem.csv

echo "Formatting species files..."
# Concat species files
foreach month (JAN FEB MAR APR MAY JUN JUL AUG SEP OCT NOV DEC)
        if (-e ${ida_out}/${file_prefix}_${month}_cem.tmp) then
                rm -f ${ida_out}/${file_prefix}_${month}_cem.tmp
        endif
        foreach species (CO VOC NOX NH3 PMC PM2_5 SO2)
                grep ${month} ${sql_tmp}/${file_prefix}_${species}_cem.csv >> ${ida_out}/${file_prefix}_${month}_cem.tmp
        end

# Format species files and output
awk 'BEGIN {FS = ","}; \
{printf "%-5s", $1}; {printf "%-15s", $2}; {printf "%-12s", $3}; \
{printf "%-12s", $4}; {printf "%-12s", $5}; {printf "%-5s", $6}; \
{ if (substr($7,3,3)=="JAN") { month = "01"; } \
if (substr($7,3,3)=="FEB") { month = "02"; } \
if (substr($7,3,3)=="MAR") { month = "03"; } \
if (substr($7,3,3)=="APR") { month = "04"; } \
if (substr($7,3,3)=="MAY") { month = "05"; } \
if (substr($7,3,3)=="JUN") { month = "06"; } \
if (substr($7,3,3)=="JUL") { month = "07"; } \
if (substr($7,3,3)=="AUG") { month = "08"; } \
if (substr($7,3,3)=="SEP") { month = "09"; } \
if (substr($7,3,3)=="OCT") { month = "10"; } \
if (substr($7,3,3)=="NOV") { month = "11"; } \
if (substr($7,3,3)=="DEC") { month = "12"; } \
printf "%s/%s/%s", month, substr($7,1,2), substr($7,8,2)}; \
{printf "%-3s", $8}; {printf "%-18s ", $9}; {printf "%-10s\n", $10}' ${ida_out}/${file_prefix}_${month}_cem.tmp > ${ida_out}/${file_prefix}_${month}_cem.ida
rm -f ${ida_out}/${file_prefix}_${month}_cem.tmp
end
rm -f cemscan.tmp
