# AddProfNRMoCo_20191120.plx
# Previously, there were two NR output tables: cixhp and notci
# Now, there is only one. No need to aggregate hp and combine the two.
#   Variable hpcat is eliminated.
# Previously, only nonhaptog had a profile number added.
# Now, both nonhaptog (88) and exh pm25 (110) have profiles added.
# Previously, nht was extracted into a separate table for adding profile.
#   I now judge that there is no efficiency benefit from the separation.
# I still create a new table, allnrequip, distinct from movesoutput, because
#   1. I drop some variables from movesoutput, and
#   2. Emissions are aggregated from day into month.
# This script is called from ProcessNR.plx, which has placed the correct
# tables in database postprocess.
# Hence, this script does not need to worry about naming.
# Like ProcessNR.plx, this script is designed to test the system locally
#   and also to work on Amazon.

$os=$ARGV[0];
$profilesdb='nrspecprofiles_moves2014b';
print "\nStarting AddProfNRMoCo.plx\n";
$start=time();
print "os = $os\n";
print "profilesdb = $profilesdb\n";

if ($os eq 'LocalWindows'){
	$mycmd='mysql --user=moves --password=moves';
}else{
	$mycmd = "mysql --user=ec2-user";
}
print "mycmd = $mycmd\n";

# CREATE TABLE ALLNREQUIP FROM MOVESOUTPUT AND CONVERT DAY TO MONTH 
$start = time();
$sql="
use postprocess;
drop table if exists allnrequip;
create table allnrequip (
`yearID` smallint(5) unsigned DEFAULT NULL,
`monthID` smallint(5) unsigned DEFAULT NULL,
`countyID` int(10) unsigned DEFAULT NULL,
`pollutantID` smallint(5) unsigned DEFAULT NULL,
`processid` smallint DEFAULT NULL,
`fuelSubTypeID` smallint(5) unsigned DEFAULT NULL,
`SCC` char(10) DEFAULT NULL,
`engTechID` smallint(5) unsigned DEFAULT NULL,
`monthemiss` double DEFAULT NULL
);
insert allnrequip select
	yearID,
	monthID,
	countyID,
	pollutantID,
	processid,
	fuelSubTypeID,
	SCC,
	engTechID,
	case monthid
		when 1 then sum(dayid*emissionquant*31/7)
		when 2 then sum(dayid*emissionquant*28/7)
		when 3 then sum(dayid*emissionquant*31/7)
		when 4 then sum(dayid*emissionquant*30/7)
		when 5 then sum(dayid*emissionquant*31/7)
		when 6 then sum(dayid*emissionquant*30/7)
		when 7 then sum(dayid*emissionquant*31/7)
		when 8 then sum(dayid*emissionquant*31/7)
		when 9 then sum(dayid*emissionquant*30/7)
		when 10 then sum(dayid*emissionquant*31/7)
		when 11 then sum(dayid*emissionquant*30/7)
		when 12 then sum(dayid*emissionquant*31/7)
		end
from movesoutput
where emissionquant>0
group by
	monthid,
	pollutantid,
	processid,
	fuelSubTypeID,
	processid,
	SCC,
	engTechID
;
alter table allnrequip
	add column (specprofileid varchar(10) default null)
;
# INDEX
alter table allnrequip
	add index (pollutantid),
	add index (engtechid, processid, fuelsubtypeid)
;
";
open(out1,">script.sql"); print out1 $sql; close(out1);
`$mycmd < script.sql`;
$end=time(); $elapsed=$end-$start; print "time create allequip =$elapsed\n";

# SET SPECPROFILEID
$start=time();
$sql="
use postprocess;
update
	allnrequip a,
	nrspecprofiles_moves2014b.nrpm25profile p
set 
	a.specprofileid=p.nrpm25profileid
where
	a.pollutantid=110
	and a.engtechid=p.engtechid
	and a.processid=p.processid
	and a.fuelsubtypeid=p.fuelsubtypeid
;
update
	allnrequip a,
	nrspecprofiles_moves2014b.nrtogprofile t
set 
	a.specprofileid=t.nrtogprofileid
where
	a.pollutantid in (88,87)
	and a.engtechid=t.engtechid
	and a.processid=t.processid
	and a.fuelsubtypeid=t.fuelsubtypeid
;
delete from allnrequip where pollutantID not in (87,110);
";
open(out1,">script.sql"); print out1 $sql; close(out1);
`$mycmd < script.sql`;
$end=time(); $elapsed=$end-$start; print "time set profileid = $elapsed\n";

# QA1--THIS WORKS:  VERIFIES THAT THE ABOVE ALGORITHM CORRECTLY CALCULATES
#   MONTH EMISSIONS
$start=time();
$outfile="ProcessNR.txt";
print "outfile = $outfile\n";
$sql="
use postprocess;
select '';
select 
	'postprocess' db,
	'movesoutput' tbl,
	dayid,
	monthid,
	pollutantid,
	sum(emissionquant)
from movesoutput
group by
	dayid,
	monthid,
	pollutantid
order by
	dayid,
	monthid,
	pollutantid
;
select'';
select 
	'postprocess' db,
	'allnrequip' tbl,
	monthid,
	pollutantid,
	sum(monthemiss)
from allnrequip
group by
	monthid,
	pollutantid
order by
	monthid,
	pollutantid
;
";
#open(out1,">script.sql"); print out1 $sql; close(out1);
#`$mycmd < script.sql > $outfile`;

# QA2
$outfile = "ProcessNR_allnrequip.txt";
print "outfile = $outfile\n";
$sql="
use postprocess;
select '';
select 'allnrequip' tbl, count(*),sum(monthemiss) from allnrequip;
select 'allnrequip noprofile' tbl, count(*),sum(monthemiss) from allnrequip where isnull(specprofileid);
select 'allnrequip profile' tbl, count(*),sum(monthemiss) from allnrequip where not isnull(specprofileid);
";
#open(out1,">script.sql"); print out1 $sql; close(out1);
#`$mycmd < script.sql > $outfile`;

# QA3
$outfile = "ProfilesMissing.txt";
print "outfile = $outfile\n";
$sql="
use postprocess;
SELECT * FROM postprocess.allnrequip
where pollutantid in(88,110) and isnull(specprofileid);
";
#open(out1,">script.sql"); print out1 $sql; close(out1);
#`$mycmd < script.sql > $outfile`;

$end=time();
$end=time(); $elapsed=$end-$start; print "time QA = $elapsed\n";
