'''
This file defines each data section in the raw NR data files,
including the target MySQL table name and which columns are present
in the data file (including fixed-width lengths).

It also defines a helper function to determine which data sections
should be read based on the filename.
'''

import os
import NR08a.DataColumns as DataColumns

class DataType(object):
    section = None
    table = None
    columns = None
    mysql_columns = None
    format_data = None


class Activity(DataType):
    '''
    1-10   character   --- SCC code
    12-51  character   --- Equipment description (not used)
    52-56  character   --- Region code
    57-66  character   --- (not used. was exhaust technology type)
    67-71  real        --- Minimum HP
    72-76  real        --- Maximum HP
    77-81  real        --- Load factor
    82-86  real        --- (not used)
    87-96  character   --- Activity level units
    97-106 real        --- Activity level
    107-116 real        --- Identifier for age adjustment curve (DEFAULT=no adjustment)
    '''
    _scc = DataColumns.SCC([1, 10])
    _description = DataColumns.Description([12, 51])
    _region = DataColumns.Region([52, 56])
    _minhp = DataColumns.MinHP([67, 71])
    _maxhp = DataColumns.MaxHP([72, 76])
    _lf = DataColumns.LF([77, 81])
    _units = DataColumns.Units([87, 96])
    _activity = DataColumns.Activity([97, 106])
    _ageadj = DataColumns.AgeAdj([107, 116])
    
    section = 'ACTIVITY'
    table = 'activity'
    columns = [_scc, _description, _region, _minhp, _maxhp, _lf, _units, _activity, _ageadj]


class Allocation(DataType):
    '''
    1-3    Indicator code
    6-10   FIPS code (can be global FIPS codes e.g. 06000 = all of CA)
    11-15  Subregion code (blank means is entire nation, state or county)
    16-20  Year of estimate or prediction
    21-40  Indicator value
    41-45  Blank (unused)
    46+    Optional Description (unused)
    '''
    _indicator = DataColumns.Indicator([1, 3])
    _fips = DataColumns.FIPS([6, 10])
    _subregion = DataColumns.SubRegion([11, 15])
    _year = DataColumns.Year([16, 20])
    _allocation = DataColumns.Allocation([21, 40])
    _description = DataColumns.Description([46, 86])
    
    section = 'INDICATORS'
    table = 'allocation'
    columns = [_indicator, _fips, _subregion, _year, _allocation, _description]


class DailyTemps(DataType):
    '''
     1-2   integer    --- State FIPS (1 or 2 digits)
     3-6   character  --- State Abbreviation (Not Used)
     7-10  character  --- Parameter (TMAX, TMIN, TAVG, or RVP)
    12-13  integer    --- Month (1 - 12)
    14-199 real       --- 31 daily values, 6 char wide, separated by spaces.
    '''
    _statefips = DataColumns.StateFIPS([1, 2])
    _state = DataColumns.State([3, 6])
    _parameter = DataColumns.Parameter([7, 10])
    _month = DataColumns.Month([12, 13])
    _day = DataColumns.Day() # this column doesn't exist in raw data
    _value = DataColumns.DailyTempValue() # this column doesn't exist (as-is) in raw data
    _days = [DataColumns.GenericColumn('day{}'.format(i), 'double', [15+6*i, 20+6*i]) for i in range(0, 31)]
    
    section = 'DAYTEMPRVP'
    table = 'dailytemps'
    columns = [_statefips, _state, _parameter, _month]
    columns.extend(_days)
    mysql_columns = [_statefips, _state, _parameter, _month, _day, _value]

    @classmethod
    def format_data(cls, data):
        formatted_data = []
        
        # Change from a list of rows to a list of columns
        data = [list(map(lambda x: x[i], data)) for i in range(0,len(data[0]))]
        
        # Change from horizontal structure to vertical
        id_cols = data[0:4]
        dayID = 0
        for day_col in data[4:]:
            dayID += 1
            for value, statefip, state, parameter, month in zip(day_col, *id_cols):
                formatted_data.append([statefip, state, parameter, month, dayID, value])
        return formatted_data


class GrowthIndicators(DataType):
    '''
     1- 5    FIPS code (00000 = applies to entire nation)
                       (ss000 = applies to all of state ss)
     7-10    indicator code (arbitrary alphanumeric code)
    12-21    SCC code (2260004000 = applies to all 2-stroke lawn and garden)
                      (2600000000 = applies to all 2-stroke)
    23-27    beginning of HP range
    28-32    ending of HP range
    34-43    technology type (ALL = applies to all tech types)
    45-84    equipment description (not in file description, but the data exist
    '''
    _fips = DataColumns.FIPS([1, 5])
    _indicator = DataColumns.Indicator([7, 10])
    _scc = DataColumns.SCC([12, 21])
    _minhp = DataColumns.MinHP([23, 27])
    _maxhp = DataColumns.MaxHP([28, 32])
    _techtype = DataColumns.TechType([34, 43])
    _description = DataColumns.Description([45, 84])
    
    section = 'INDICATORS'
    table = 'growthindicators'
    columns = [_fips, _indicator, _scc, _minhp, _maxhp, _techtype, _description]


class Growth(DataType):
    '''
     1- 5    FIPS code (00000 = applies to entire nation)
                       (ss000 = applies to all of state ss)
     6-10    subregion code (blank = applies to all subregions)
    11-15    year of estimate (4-digit year)
    17-20    indicator code (arbitrary alphanumeric code)
    26-45    indicator value
    '''
    _fips = DataColumns.FIPS([1, 5])
    _subregion = DataColumns.SubRegion([6, 10])
    _year = DataColumns.Year([11, 15])
    _indicator = DataColumns.Indicator([17, 20])
    _growth = DataColumns.Growth([26, 45])
    
    section = 'GROWTH'
    table = 'growth'
    columns = [_fips, _subregion, _year, _indicator, _growth]


class GrowthScrappage(DataType):
    '''
    The first value is the fraction of useful life already used and the
    second value is the percentage of equipment scrapped.
    '''
    _ulf = DataColumns.UsefulLifeFraction([1, 9])
    _scrapped = DataColumns.PercentScrapped([10, 19])
    
    section = 'SCRAPPAGE'
    table = 'growthscrappage'
    columns = [_ulf, _scrapped]


class GrowthAltScrappage(DataType):
    '''
    Similar in concept as GrowthScrappage, but there are a variable
    number of columns, with the name of the equipment type as the value
    in the first row of the column. Because there are variable columns,
    these tables need to be formatted into 3 fixed columns before they
    can be written. Currently can read up to 99 columns.
    '''
    _ulf = DataColumns.UsefulLifeFraction([1, 9])
    _equip = DataColumns.EquipmentType() # this column doesn't exist in raw data
    _scrapped = DataColumns.PercentScrapped() # this column doesn't exist (as-is) in raw data
    _altscrap = [DataColumns.GenericColumn('percentScrapped{}'.format(i), 'double', [10*i, 19*i]) for i in range(1, 99)]
    
    section = 'ALTERNATE SCRAPPAGE'
    table = 'growthaltscrappage'
    columns = [_ulf]
    columns.extend(_altscrap)
    mysql_columns = [_ulf, _equip, _scrapped]

    @classmethod
    def format_data(cls, data):
        formatted_data = []
        
        # Change from a list of rows to a list of columns
        data = [list(map(lambda x: x[i], data)) for i in range(0,len(data[0]))]
        
        # Change from horizontal structure to vertical
        ulf_col = data[0]
        ulf_col.pop(0) # empty cell
        for scrapped_col in data[1:]:
            equip = scrapped_col.pop(0)
            if equip == '': break
            for ulf, scrapped in zip(ulf_col, scrapped_col):
                formatted_data.append([ulf, equip, scrapped])
        return formatted_data
       

class Population(DataType):
    '''
      1 -   5   FIPS code
      7 -  11   subregion code (used for subcounty estimates)
     13 -  16   year of population estimates
     18 -  27   SCC code (no globals accepted)
     29 -  68   equipment description (ignored)
     70 -  74   minimum HP range
     76 -  80   maximum HP range (ranges must match those internal to model)
     82 -  86   average HP in range (if blank model uses midpoint)
     88 -  92   expected useful life (in hours of use)
     93 - 102   flag for scrappage distribution curve (DEFAULT = standard curve)
    106 - 122   population estimate
    '''
    _fips = DataColumns.FIPS([1, 5])
    _subregion = DataColumns.SubRegion([7, 11])
    _year = DataColumns.Year([13, 16])
    _scc = DataColumns.SCC([18, 27])
    _description = DataColumns.Description([29, 68])
    _minhp = DataColumns.MinHP([70, 74])
    _maxhp = DataColumns.MaxHP([76, 80])
    _avghp = DataColumns.AverageHP([82, 86])
    _usefullife = DataColumns.ExpectedUsefulLife([88, 92])
    _scrapdistequip = DataColumns.ScrappageDistributionEquipment([93, 102])
    _pop = DataColumns.Population([106, 122])
    
    section = 'POPULATION'
    table = 'population'
    columns = [_fips, _subregion, _year, _scc, _description, _minhp, _maxhp, _avghp,
               _usefullife, _scrapdistequip, _pop]


class RetroFit(DataType):
    '''
    1-4   integer   --- Retrofit Year start (first calendar year when retrofits are done)
    6-9   integer   --- Retrofit Year end (last calendar year when retrofits are done)
   11-14  integer   --- Model Year start (first model year equipment that is receiving retrofit)
   16-19  integer   --- Model Year end (last model year equipment that is receiving retrofit)
   21-30  character --- SCC Code
   32-41  character --- Exhaust Technology Type (e.g., Base, T0, T1, T2, T2M)
   43-47  real      --- Minimum HP
   48-52  real      --- Maximum HP
   54-71  real      --- Annual Retrofit Fraction (0.0 - 1.0) OR Actual Total Number Retrofitted (greater than 1.0)
   73-78  real      --- Retrofit Effectiveness. Range = 0.0 - 1.0 (zero is no benefit). 
   80-89  character --- Exhaust Pollutant affected by retrofit (HC, CO, NOx, or PM)
   91-95  integer   --- Retrofit identifier (use same arbitrary number for different pollutants being affected by same retrofit)
   96+    Can enter a description of specific retrofit technology. Not read into model. 
    '''
    _yearstart = DataColumns.YearStart([1, 4])
    _yearend = DataColumns.YearEnd([6, 9])
    _mystart = DataColumns.ModelYearStart([11, 14])
    _myend = DataColumns.ModelYearEnd([16, 19])
    _scc = DataColumns.SCC([21, 30])
    _techtype = DataColumns.TechType([32, 41])
    _minhp = DataColumns.MinHP([43, 47])
    _maxhp = DataColumns.MaxHP([48, 52])
    _fraction = DataColumns.RetrofitValue([54, 71])
    _effectiveness = DataColumns.RetrofitEffectiveness([73, 78])
    _pollutant = DataColumns.Pollutant([80, 89])
    _identifier = DataColumns.RetrofitIdentifier([91, 95])
    _description = DataColumns.Description([96, 135])

    section = 'RETROFIT'
    table = 'retrofit'
    columns = [_yearstart, _yearend, _mystart, _myend, _scc, _techtype, _minhp,
               _maxhp, _fraction, _effectiveness, _pollutant, _identifier, _description]

class Regions(DataType):
    '''
    1- 5  character     -- user defined region code
    6-45  character     -- region description (not used)
    46-50  character     -- state or county FIPs code
    51-70  character     -- state or county name (not used)
    '''
    _region = DataColumns.Region([1, 5])
    _description = DataColumns.Description([6, 45])
    _fips = DataColumns.FIPS([46, 50])
    _name = DataColumns.Name([51, 70])
    
    section = 'REGIONS'
    table = 'region'
    columns = [_region, _description, _fips, _name]

class MonthlyAdjFactors(DataType):
    '''
      1-  5  character  -- subregion code (blank = match all)
      7- 16  character  -- SCC code (global codes are acceptable)
     18- 51  character  -- equipment description (not used)
     52- 61  real       -- fraction of annual activity in January
     62- 71  real       -- fraction of annual activity in February
     72- 81  real       -- fraction of annual activity in March
     82- 91  real       -- fraction of annual activity in April
     92-101  real       -- fraction of annual activity in May
    102-111  real       -- fraction of annual activity in June
    112-121  real       -- fraction of annual activity in July
    122-131  real       -- fraction of annual activity in August
    132-141  real       -- fraction of annual activity in September
    142-151  real       -- fraction of annual activity in October
    152-161  real       -- fraction of annual activity in November
    162-171  real       -- fraction of annual activity in December
    '''
    _subregion = DataColumns.SubRegion([1, 5])
    _scc = DataColumns.SCC([7, 16])
    _description = DataColumns.Description([18, 51])
    _month = DataColumns.Month()
    _adjfactor = DataColumns.AdjFactor()
    _afs = [DataColumns.GenericColumn('af{}'.format(i), 'double', [52+10*i, 61+10*i]) for i in range(0, 12)]
    
    section = 'MONTHLY'
    table = 'monthlyadjfactors'
    columns = [_subregion, _scc, _description]
    columns.extend(_afs)
    mysql_columns = [_subregion, _scc, _description, _month, _adjfactor]

    @classmethod
    def format_data(cls, data):
        formatted_data = []
        
        # Change from a list of rows to a list of columns
        data = [list(map(lambda x: x[i], data)) for i in range(0,len(data[0]))]
        
        # Change from horizontal structure to vertical
        id_cols = data[0:3]
        monthID = 0
        for month_col in data[3:]:
            monthID += 1
            for factor, subregion, scc, description in zip(month_col, *id_cols):
                formatted_data.append([subregion, scc, description, monthID, factor])
        return formatted_data


class DailyAdjFactors(DataType):
    '''
     1-  5  character  -- subregion code (blank = match all)
     7- 16  character  -- SCC code (global codes are acceptable)
    18- 52  character  -- equipment description (not used)
    52- 61  real       -- fraction of weekly activity in typical weekday
    62- 71  real       -- fraction of weekly activity in typical weekend
    '''
    _subregion = DataColumns.SubRegion([1, 5])
    _scc = DataColumns.SCC([7, 16])
    _description = DataColumns.Description([18, 51])
    _dayID = DataColumns.DayID()
    _adjfactor = DataColumns.AdjFactor()
    _afs = [DataColumns.GenericColumn('af{}'.format(i), 'double', [52+10*i, 61+10*i]) for i in range(0, 2)]
    
    section = 'DAILY'
    table = 'dailyadjfactors'
    columns = [_subregion, _scc, _description]
    columns.extend(_afs)
    mysql_columns = [_subregion, _scc, _description, _dayID, _adjfactor]

    @classmethod
    def format_data(cls, data):
        formatted_data = []
        
        # Change from a list of rows to a list of columns
        data = [list(map(lambda x: x[i], data)) for i in range(0,len(data[0]))]
        
        # Change from horizontal structure to vertical
        id_cols = data[0:3]
        dayID = 5
        for day_col in data[3:]:
            for factor, subregion, scc, description in zip(day_col, *id_cols):
                formatted_data.append([subregion, scc, description, dayID, factor])
            dayID = 2
        return formatted_data


class Fips(DataType):
    '''
    1- 5 FIPS code (all individual 5-digit county codes)
    7-10 Start year (4-digit. Blank default means all past years)
    12-15 End year (4-digit. Blank default means all future years)
    17-66 County name
    '''
    _fips = DataColumns.FIPS([1, 5])
    _yearstart = DataColumns.YearStart([7, 10])
    _yearend = DataColumns.YearEnd([12, 15])
    _countyname = DataColumns.CountyName([17, 66])
    
    section = 'FIPS'
    table = 'fips'
    columns = [_fips, _yearstart, _yearend, _countyname]


def DataTypesPicker(filename):
    basename = os.path.basename(filename).upper()
    if basename == 'SEASON.DAT' or basename.endswith('.SEA'):
        return [Regions, MonthlyAdjFactors, DailyAdjFactors]
    if basename.endswith('.POP'):
        return [Population]
    if basename.endswith('.GRW'):
        return [GrowthIndicators, Growth, GrowthScrappage, GrowthAltScrappage]
    if basename.endswith('.ALO'):
        return [Allocation]
    if basename == 'ACTIVITY.DAT' or basename.endswith('.ACT'):
        return [Activity]
    if basename == 'DAYTMPRV.DAT': # or basename.endswith('.DAY'):
        return [DailyTemps]
    if basename == 'RETROTST.DAT': # or basename.endswith('.RET'):
        return [RetroFit]
    if basename == 'FIPS.DAT':
        return [Fips]
    return [None]
