#!/usr/local/apps/anaconda/4.4.0/anaconda3/bin/python
#SBATCH --time 144:00:00
###SBATCH --nodes=6
#SBATCH --ntasks=288
#SBATCH --job-name=S11
#SBATCH -p ord
#SBATCH --gid=mod3eval
#SBATCH -o logs/CMAQv53_TS_108NHEMI_2020_288proc_%j.txt
###SBATCH --constraint=cascadelake
###SBATCH --constraint=broadwell

# On Atmos Requires
# module purge
# module load intel/18.0.1 intelmpi/2018.0.128
# module load netcdf-4.4.1/intel-18.0 pnetcdf-1.8.1/intel-18.0
# module load anaconda/4.4.0
import sys
import os
import pandas as pd
import datetime
import PseudoNetCDF as pnc
from glob import glob
import time
import argparse


parser = argparse.ArgumentParser()
parser.add_argument('START', help='YYYY-MM-DD')
parser.add_argument('END', help='YYYY-MM-DD')
args = parser.parse_args()

# Make sure NPROCS is <= the slurm option --ntasks
nc = 16
nr = 18
NPROCS = int(nc) * int(nr)
os.environ['NPCOL_NPROW'] = f' {nc} {nr}'

# Make sure this matches the slurm option --time
slurm_time = 144 * 3600

def readopts(optpath, **kwds):
    import yaml
    return yaml.load(open(optpath).read().format(**kwds))


def loadopts(optpath, **kwds):
    opts = readopts(optpath, **kwds)
    os.environ.update(**opts)
    return opts


def iscomplete(cgridpath, nextdate):
    if not os.path.exists(cgridpath):
        return False
    cgf = pnc.pncopen(cgridpath, format='ioapi')
    jday, hhmmss = cgf.variables['TFLAG'][-1, 0, :]
    chkjday, chkhhmmss = eval(
        nextdate.strftime('int(%Y%j),int(%H%M%S)')
    )
    # print(chkjday, jday, hhmmss, chkhhmmss, flush=True)
    return (chkjday == jday) and (hhmmss == 0)



# If overwrite is set to True, all outputs will be systematically overwritten
overwrite = False

# Load linux environment
linuxopts = loadopts('linux.env', WORKDIR=os.getcwd())


# Load CMAQ configuration environment
configopts = loadopts('config.env', WORKDIR=os.getcwd())


dates = pd.date_range(args.START, args.END)
firstdate = dates[0]
t0 = time.time()
cmaqtimes = []

for startdate in dates:
    today = datetime.datetime.now()
    print(f'Started {startdate:%Y-%m-%d} at: {today:%Y-%m-%dT%H:%M:%S}')
    # for IC
    prevdate = startdate + pd.Timedelta(-1, unit='D')

    # Load daily CMAQ input/output options
    runopts = readopts(
        'day.env', startdate=startdate, prevdate=prevdate, today=today,
        **configopts
    )
    runopts['INITIAL_RUN'] = ('Y' if startdate == firstdate else 'N')
    inputs = runopts.pop('inputs')
    outputs = runopts.pop('outputs')

    # GSI options
    gsiopts = readopts(
        'gsi.env', startdate=startdate, **configopts
    )
    for k,v in gsiopts.items(): # make rel paths absolute
        if v[0] == '.':
            gsiopts[k] = os.path.realpath(v)
    if gsiopts['DOGSI'] == 'F': # don't do GSI
        gsiopts['gsistring'] = 'noGSI'
    elif gsiopts['DOGSI'] == 'T': # do GSI
        os.makedirs(gsiopts['GSIOUT']+'/restart', exist_ok=True)
        gsiopts['gsistring'] = 'GSI'
        tomorrow = startdate + pd.Timedelta('1D')
        now = startdate
        while now < tomorrow:
            now = now + pd.Timedelta('1H')
            aqmf = f'{gsiopts["TROPOMIDIR"]}/{now:tropomi.no2.PAL_.%Y%m%d%H}.bufr'
            if os.path.exists(aqmf):
                rstf = f'{now:aqm.%Y%m%d.t%Hz.gsi.ncf}'
                gsiopts[f'CTM_GSI_{now:%H}'] = '{GSIOUT}'.format(**gsiopts)+f'/restart/{rstf}'
                gsiopts[f'DOITIME{now:%H}'] = 'YES'
            else:
                gsiopts[f'DOITIME{now:%H}'] = 'NO'
    else:
        raise ValueError(
            'Did not understand input for DOGSI:'+
            f' {gsiopts["DOGSI"]}, need T or F'
        )   

    

    # Update environment
    os.environ.update(**runopts)
    os.environ.update(**inputs)
    os.environ.update(**outputs)
    os.environ.update(**gsiopts)
    os.environ['LD_LIBRARY_PATH'] = f'{os.environ["LD_LIBRARY_PATH"]}:{os.environ["IOAPI_LIB_DIR"]}'
    print(gsiopts, flush=True)

    if not overwrite:
        nextdate = startdate + pd.Timedelta(1, unit='D')
        # Get rid of -v
        cgridpath = outputs['S_CGRID'].split()[0]
        if iscomplete(cgridpath, nextdate):
            print(f'{startdate:%F} complete', flush=True, file=sys.stdout)
            continue

    # Check for existing outputs
    misscount = 0
    for k, v in inputs.items():
        if (
            v != '' and '_LAB_' not in k
            and '_DATE_' not in k and not k.startswith('N_')
        ):
            inpath = v.split()[0]
            if not os.path.exists(inpath):
                misscount += 1
                print(f'Missing: {inpath}')

    if misscount > 0:
        raise IOError(f'Could not find {misscount} files')

    # Check for existing outputs
    outputexists = {}
    for k, v in outputs.items():
        outpath = v.split()[0]
        # Make sure destination folder exists
        os.makedirs(os.path.dirname(outpath), exist_ok=True)
        # Keep track of what outputs exist
        if os.path.exists(outpath):
            outputexists[k] = outpath

    if not overwrite and len(outputexists) > 0:
        # If not overwriting, list existing files and then err
        for k, outpath in outputexists.items():
            print(f'{k}={outpath} exists')

        raise IOError(f'{list(outputexists)} files exists')
    else:
        # If overwriting, remove any existing files
        for k, outpath in outputexists.items():
            os.remove(outpath)

    # Start Run CMAQ
    startcmaq = time.time()
    cmd = (
        '( /usr/bin/time -p mpirun -np {NPROCS} {BLDDIR}/CCTM_v532.exe )'
        + ' |& tee logs/buff_{EXECUTION_ID}.txt'
    ).format(NPROCS=NPROCS, **os.environ)
    os.system(cmd)
    endcmaq = time.time()
    # End Run CMAQ

    # Move logs to output/LOGS
    logpaths = sorted(glob('CTM_LOG_*'))
    os.makedirs(runopts['LOGDIR'], exist_ok=True)
    for logpath in logpaths:
        os.replace(logpath, os.path.join(runopts['LOGDIR'], logpath))

    print(
        f'Completed {startdate:%Y-%m-%d} at: '
        + f'{datetime.datetime.now():%Y-%m-%dT%H:%M:%S}'
    )

    # delete unwanted files
    trash = configopts['TRASH_FILES'].split(' ')
    trashpat = '{OUTDIR}/{startdate:%Y/%m/%d}/{OUTSTEM}_{startdate:%Y%m%d}'.format(**os.environ, startdate=startdate)
    for ftype in trash:
        if (ftype == 'CGRID'): # don't delete CGRID during the run
            continue
        myf = startdate.strftime(f'{trashpat}_{ftype}.nc')
        if os.path.isfile(myf):
            os.remove(myf)


    # Never restart with overwrite==True
    # Leads to an infinite loop
    if not overwrite:
        # Consider resubmitting
        time_left = slurm_time - (time.time() - t0)
        cmaqtimes.append(endcmaq - startcmaq)
        maxcmaqtime = max(cmaqtimes)
        if time_left < (maxcmaqtime * 1.1):
            # Restart submission
            os.system('sbatch ./run.py')
            break
