# Reformat_CEDS_0.1
import numpy as np
import xarray as xr
import os
from datetime import datetime

InDir = '/storage1/fs1/rvmartin/Active/GEOS-Chem-shared/ExternalShare/CEDS01_PNNL_org/'
OutDir = '/ExtData/HEMCO/CEDS/v2023-04/'

# voc speciation: following convention in CEDSv2 to lump species for corresponding voc names
# voc definition is the same in CEDSv2 and HTAPv3
# NMVOC emissions are split into the following 23 species groups:
# VOC_id  VOC_name                     molecular weight     GEOS-Chem Species
# VOC1    alcohols                     46.2                 EOH
# VOC2    ethane                       30                   C2H6
# VOC3    propane                      44                   C3H8
# VOC4    butanes                      57.8                 ALK4
# VOC5    pentanes                     72                   ALK4
# VOC6    hexanes_plus_higher_alkanes  106.8                ALK4
# VOC7    ethene                       28                   C2H4
# VOC8    propene                      42                   PRPE
# VOC9    ethyne                       26                   C2H2
# VOC12   other_alkenes_and_alkynes    67                   -
# VOC13   benzene                      78                   BENZ
# VOC14   toluene                      92                   TOLU
# VOC15   xylene                       106                  XYLE
# VOC16   trimethylbenzenes            120                  -
# VOC17   other_aromatics              126.8                -
# VOC18   esters                       104.7                -
# VOC19   ethers                       81.5                 -
# VOC20   chlorinated_hydrocarbons     138.8                -
# VOC21   methanal                     30                   CH2O
# VOC22   other_alkanals               68.8                 ALD2
# VOC23   ketones                      75.3                 MEK
# VOC24   acids                        59.1                 HCOOH
# VOC25   other_voc                    68.9                 -
# It is assumed that isoprene (number 10) and terpenes (number 11) are not
# emitted in significant quantities by anthropogenic sources, and are not
# included in the anthropogenic split.

species_in = ['BC', 'CH4', 'CO', 'CO2', 'N2O', 'NH3', 'NMVOC', 'NOx', 'OC', 'SO2', \
    'VOC01-alcohols', 'VOC02-ethane', 'VOC03-propane', 'VOC04-butanes', 'VOC05-pentanes', 'VOC06-hexanes', \
    'VOC07-ethene', 'VOC08-propene', 'VOC09-ethyne', 'VOC13-benzene', 'VOC14-toluene', \
    'VOC15-xylene', \
    'VOC21-methanal', 'VOC22-other', 'VOC23-ketones', 'VOC24-acids']
species_out = ['BC', 'CH4', 'CO', 'CO2', 'N2O', 'NH3', 'NMVOC', 'NO', 'OC', 'SO2', \
    'EOH', 'C2H6', 'C3H8', 'ALK4_butanes', 'ALK4_pentanes', 'ALK4_hexanes', 'C2H4', \
    'PRPE', 'C2H2', 'BENZ', 'TOLU', 'XYLE', 'CH2O', 'ALD2', 'MEK', 'HCOOH']

encoding = dict(missing_value=1.e+20, _FillValue=1.e+20)
# ids = "0: Agriculture; 1: Energy; 2: Industrial; 3: Transportation; 4: Residential, Commercial, Other; \
#       5: Solvents production and application; 6: Waste; 7: International Shipping";
sectors_out = ["agr", "ene", "ind", "tra", "rco", "slv", "wst", "shp"]

fpath = os.path.join(InDir, '{}-em/{}-em-anthro_input4MIPs_emissions_CMIP_CEDS-2021-04-21_gn_{}01-{}12.nc'.\
    format(species_in[0], species_in[0], 2019, 2019))
ds = xr.open_dataset(fpath)
lat = ds['lat']
lon = ds['lon']

for year in range(1980, 1982):
    path = os.path.join(OutDir, '{}'.format(year))
    isExist = os.path.exists(path)
    if not isExist:
        os.mkdir(os.path.join(OutDir, '{}'.format(year)))
    refyear = year
    day0 = datetime(year,1,1)
    time = []
    for mon in range(1,13):
        dayi = datetime(year, mon, 1)
        time.append((dayi - day0).days)
    time = np.array(time).astype('float')

    for speci in range(len(species_out)):
        ds_out = xr.Dataset({'lat': lat, 'lon': lon})

        if species_out[speci] == 'NO':
            factor = 30/46
        else:
            factor = 1

        if ("VOC" in species_in[speci]) & (species_in[speci] != 'NMVOC'):
            if species_in[speci] == 'VOC06-hexanes':
                fpath_in = os.path.join(InDir, '{}/{}-pl-em-speciated-VOC-anthro_input4MIPs_emissions_CMIP_CEDS-2021-04-21-supplemental-data_gn_{}01-{}12.nc'.\
                    format(species_in[speci], species_in[speci], year, year))
                varname_in = '{}_pl_em_speciated_VOC_anthro'.format(species_in[speci].replace("-", "_" ))
            elif species_in[speci] == 'VOC22-other':
                fpath_in = os.path.join(InDir, '{}/{}-alka-em-speciated-VOC-anthro_input4MIPs_emissions_CMIP_CEDS-2021-04-21-supplemental-data_gn_{}01-{}12.nc'.\
                    format(species_in[speci], species_in[speci], year, year))
                varname_in = '{}_alka_em_speciated_VOC_anthro'.format(species_in[speci].replace("-", "_" ))
            else:
                fpath_in = os.path.join(InDir, '{}/{}-em-speciated-VOC-anthro_input4MIPs_emissions_CMIP_CEDS-2021-04-21-supplemental-data_gn_{}01-{}12.nc'.\
                    format(species_in[speci], species_in[speci], year, year))
                varname_in = '{}_em_speciated_VOC_anthro'.format(species_in[speci].replace("-", "_" ))

        else:
            fpath_in = os.path.join(InDir, '{}-em/{}-em-anthro_input4MIPs_emissions_CMIP_CEDS-2021-04-21_gn_{}01-{}12.nc'.\
                format(species_in[speci], species_in[speci], year, year))
            varname_in = '{}_em_anthro'.format(species_in[speci])
        ds_in = xr.open_dataset(fpath_in)


        for seci in range(len(sectors_out)):
            varname_out = '{}_{}'.format(species_out[speci], sectors_out[seci])
            emisflux = ds_in[varname_in].values[:,seci,...].squeeze() * factor

            # convert to DataArray
            emisflux = xr.DataArray(emisflux, coords=[time, lat.values, lon.values], dims=['time', 'lat', 'lon'], \
                attrs=dict(long_name='{} emission in sector {}'.format(species_out[speci], sectors_out[seci]), \
                    units='kg/m2/s', encoding=encoding))
            ds_out[varname_out] = emisflux

        ds_out.time.attrs = dict(units='days since {}-01-01 00:00:00'.format(refyear), \
            delta_t='0000-01-00 00:00:00', axis='T', standard_name='Time', long_name='Time', calendar='standard')
        ds_out.lon.attrs = dict(units='degrees_east', axis='X', standard_name='Longitude', long_name='Longitude')
        ds_out.lat.attrs = dict(units='degrees_north', axis='Y', standard_name='Latitude', long_name='Latitude')
        ds_out.attrs = dict(Title='COARDS/netCDF file containing monthly emissions of {}'.format(species_out[speci]), Conventions = "COARDS")

        fpath_out = OutDir + '{}/CEDS_{}_0.1x0.1_{}.nc'.format(year, species_out[speci], year)
        ds_out.to_netcdf(fpath_out, engine='netcdf4', format='NETCDF4', unlimited_dims='time')