# EDGAR version 8 provides annual emissions files. They also provide temporal
# scaling factors for each month for each sector/year/country. Here, we apply
# the scale factors using a 0.1° x 0.1° country mask from GFEI (accounting for
# discrepancies with EDGAR). We also correct the EDGAR scale factors to account
# for the fact that there are a different number of days in each month. We
# generate the monthly files for each sector for 2010 to 2022.

import subprocess
import multiprocessing
import datetime
import calendar
import xarray as xr
import pandas as pd
import numpy as np
import os
import sys

year = int(sys.argv[1])
dir = f"{sys.argv[2]}/EDGARv8/{year}"

def generate_edgarv8_monthly(year,sec):

    # Download the emissions and fluxes files for this sector and year
    for type in ["emi","flx"]:
        zip = f"v8.0_FT2022_GHG_CH4_{year}_{sec}_{type}_nc.zip"
        file = f"v8.0_FT2022_GHG_CH4_{year}_{sec}_{type}.nc"
        url = (f"https://jeodpp.jrc.ec.europa.eu/ftp/jrc-opendata/EDGAR/"
               f"datasets/v80_FT2022_GHG/CH4/{sec}/{type}_nc/{zip}")
        subprocess.run(["wget", "-nv", "--read-timeout=180", "--tries=0", url])
        subprocess.run(["unzip", "-q", "-j", zip, file])
        subprocess.run(["rm", zip])

    # Copy the annual dataset so we can modify it
    with xr.open_dataset(f"v8.0_FT2022_GHG_CH4_{year}_{sec}_emi.nc") as ds0:
        ds = ds0.copy(deep=True)

    # Add country codes as a variable
    ds["country_codes"] = country_codes.copy(deep=True)

    # Create a time dimension of 12 months for the variable. This will copy the
    # emissions [tonnes/gridcell] to be the same in each of the 12 months as the
    # annual value. We'll scale it following EDGAR if there are scale factors.
    ds["emissions"] = ds["emissions"].expand_dims(dim={"time": 12}).copy()
    ds = ds.assign_coords(time=pd.date_range(f"{year}-01-01", f"{year}-12-01",
                                             freq="MS"))

    for country in edgar_countries:

        # Make sure there are cells in the GFEI map for this country
        if country not in list(gfei_cc["Country_code_A3"]):
            continue

        # Figure out which code GFEI uses for this country
        country_code = (gfei_cc[gfei_cc["Country_code_A3"] == country]
                        ["ISO3166-1-numeric"].iloc[0])

        # Get the monthly scale factors for this country/sector/year
        sf = monthly_sf[(monthly_sf["Country_code_A3"] == country) & 
                        (monthly_sf["EDGAR Sector"] == sec) & 
                        (monthly_sf["Year"] == year)]
        if len(sf) == 1:
            sf = np.array(sf[["Jan","Feb","Mar","Apr","May","Jun",
                              "Jul","Aug","Sep","Oct","Nov","Dec"]])[0]

            # Deal with rounding off of 1/12 in EDGAR scale factors
            if sf[0] == 0.08333 and sf[-1] == 0.08337:
                sf = np.ones((12))/12

        # If EDGAR doesn't provide a scale factor for this sector
        elif len(sf) == 0:
            sf = np.ones((12))/12

        assert abs(np.sum(sf) - 1) < 1e-5

        # Correct them for the different number of days in each month
        days_per_month = np.array([calendar.monthrange(year, m)[1]
                                   for m in range(1,13)])
        correction_array = days_per_month/np.mean(days_per_month)
        sf = sf*correction_array

        # Ensure the annual total will be conserved
        sf = sf/np.sum(sf)

        # Scale the mass in each month
        for m in range(12):
            ds["emissions"][m,:,:] = np.where(
                ds["country_codes"] == country_code,
                ds["emissions"][m,:,:]*sf[m],
                ds["emissions"][m,:,:])

        # Replace country code with -9999 to know which cells have been scaled
        ds["country_codes"][:,:] = np.where(
            ds["country_codes"] == country_code,
            -9999.0,
            ds["country_codes"][:,:])

    # If the country code is not -9999, we haven't scale this month yet.
    # Use a constant emissions rate throughout the year.
    sf = np.ones((12))/12
    sf = sf*correction_array
    sf = sf/np.sum(sf)
    for m in range(12):
        ds["emissions"][m,:,:] = np.where(
            ds["country_codes"] != -9999.0,
            ds["emissions"][m,:,:]*sf[m],
            ds["emissions"][m,:,:])

    # Drop the country code variable
    ds = ds.drop_vars("country_codes")

    # Use the emi [tonnes/gridcell] and flx [kg/m2/s] to get [m2/gridcell]
    with xr.open_dataset(f"v8.0_FT2022_GHG_CH4_{year}_{sec}_emi.nc") as emi,\
        xr.open_dataset(f"v8.0_FT2022_GHG_CH4_{year}_{sec}_flx.nc") as flx:

        sec_in_year = (365 + calendar.isleap(year))*24*60*60
        m2_per_gridcell = emi["emissions"]*1e3/flx["fluxes"]/sec_in_year

    # Convert [tonnes/gridcell] --> [tonnes/m2]
    ds["emissions"] = (ds["emissions"]/m2_per_gridcell).fillna(0.0)

    # Convert [tonnes/m2] --> [kg/m2/s]
    for m in range(12):
        seconds_in_this_month = days_per_month[m]*24*60*60
        ds["emissions"][m,:,:] = (ds["emissions"][m,:,:]*1e3/
                                  seconds_in_this_month)

    # Formatting for COARDS
    ds = ds.rename({"emissions":"emi_ch4"})
    ds["emi_ch4"] = ds["emi_ch4"].assign_attrs({"units":"kg/m2/s",
                                                "long_name":"Emissions of CH4"})
    ds["lon"] = ds["lon"].assign_attrs({"units":"degrees_east", "axis":"X",
                                        "long_name":"longitude"})
    ds["lat"] = ds["lat"].assign_attrs({"units":"degrees_north", "axis":"Y",
                                        "long_name":"latitude"})
    ds["time"] = ds["time"].assign_attrs({"axis":"T", "long_name":"Time"})
    ds.attrs["history"] = (f"Prepared by Nicholas Balasus on "
                           f"{datetime.datetime.now().date()} from EDGAR v8.")
    ds.attrs["title"] = f"Monthly Emissions of CH4 for {sec} sector."
    ds.attrs["conventions"] = "COARDS"

    # Remove old files
    subprocess.run(["rm",f"v8.0_FT2022_GHG_CH4_{year}_{sec}_emi.nc"])
    subprocess.run(["rm",f"v8.0_FT2022_GHG_CH4_{year}_{sec}_flx.nc"])

    # Save the file
    os.makedirs(dir, exist_ok=True)
    file = f"{dir}/v8.0_FT2022_GHG_CH4_{year}_{sec}_flx.nc"
    ds.to_netcdf(file, unlimited_dims=["time"],
                 encoding={"time": {"dtype":"float64"}})

if __name__ == "__main__":

    # The files "country_codes.csv" and "country_mask.csv" are from GFEIv2 and
    # tell us which country each 0.1° x 0.1° grid cell belongs to. Form them
    # into a xarray dataset here using a generic EDGAR v8 file as a template.
    zip = "v8.0_FT2022_GHG_CH4_2022_AGS_flx_nc.zip"
    file = "v8.0_FT2022_GHG_CH4_2022_AGS_flx.nc"
    url = (f"https://jeodpp.jrc.ec.europa.eu/ftp/jrc-opendata/EDGAR/datasets/"
           f"v80_FT2022_GHG/CH4/AGS/flx_nc/{zip}")
    subprocess.run(["wget", "-nv", "--read-timeout=180", "--tries=0", url])
    subprocess.run(["unzip", "-q", "-j", zip, file])
    subprocess.run(["rm", zip])

    with xr.open_dataset(file) as ds:
        cc_csv = pd.read_csv("country_mask.csv", header=None)
        country_codes = (ds["fluxes"]*0.0 + np.array(cc_csv))
        country_codes = country_codes.rename("country_code")
    subprocess.run(["rm", file])

    # Treat South Sudan as Sudan to be consistent with EDGAR v8.
    country_codes = xr.where(country_codes == 728, 729, country_codes)

    # Download the monthly temporal scale factors for CH4 from EDGAR v8.
    zip = "EDGAR_temporal_profiles_r2a_CH4.zip"
    file = "EDGAR_temporal_profiles_r2_CH4.xlsx"
    url = (f"https://jeodpp.jrc.ec.europa.eu/ftp/jrc-opendata/EDGAR/datasets/"
           f"v80_FT2022_GHG/profiles/{zip}")
    subprocess.run(["wget", "-nv", "--read-timeout=180", "--tries=0", url])
    subprocess.run(["unzip", "-q", "-j", zip, file])
    subprocess.run(["rm", zip])

    # Read in all of the monthly scale factors.
    monthly_sf = pd.read_excel("EDGAR_temporal_profiles_r2_CH4.xlsx", header=1)
    edgar_countries = monthly_sf["Country_code_A3"].unique()
    subprocess.run(["rm","EDGAR_temporal_profiles_r2_CH4.xlsx"])

    # Read in the GFEI country codes (code --> name).
    gfei_cc = pd.read_csv("country_codes.csv")

    # Generate all of the files
    sectors = ["ENE","REF_TRF","IND","TNR_Aviation_CDS","TNR_Aviation_CRS",
               "TNR_Aviation_LTO","TRO","TNR_Other","TNR_Ship","RCO",
               "PRO_COAL","PRO_OIL","PRO_GAS","CHE","IRO","ENF","MNM","AWB",
               "AGS","SWD_LDF","SWD_INC","WWT"]
    inputs = [(year,sec) for sec in sectors]
    with multiprocessing.Pool() as pool:
        pool.starmap(generate_edgarv8_monthly, inputs)
        pool.close()
        pool.join()