# EDGAR version 8 provides annual emissions files. They also provide temporal # scaling factors for each month for each sector/year/country. Here, we apply # the scale factors using a 0.1° x 0.1° country mask from GFEI (accounting for # discrepancies with EDGAR). We also correct the EDGAR scale factors to account # for the fact that there are a different number of days in each month. We # generate the monthly files for each sector for 2010 to 2022. import subprocess import multiprocessing import datetime import calendar import xarray as xr import pandas as pd import numpy as np import os import sys year = int(sys.argv[1]) dir = f"{sys.argv[2]}/EDGARv8/{year}" def generate_edgarv8_monthly(year,sec): # Download the emissions and fluxes files for this sector and year for type in ["emi","flx"]: zip = f"v8.0_FT2022_GHG_CH4_{year}_{sec}_{type}_nc.zip" file = f"v8.0_FT2022_GHG_CH4_{year}_{sec}_{type}.nc" url = (f"https://jeodpp.jrc.ec.europa.eu/ftp/jrc-opendata/EDGAR/" f"datasets/v80_FT2022_GHG/CH4/{sec}/{type}_nc/{zip}") subprocess.run(["wget", "-nv", "--read-timeout=180", "--tries=0", url]) subprocess.run(["unzip", "-q", "-j", zip, file]) subprocess.run(["rm", zip]) # Copy the annual dataset so we can modify it with xr.open_dataset(f"v8.0_FT2022_GHG_CH4_{year}_{sec}_emi.nc") as ds0: ds = ds0.copy(deep=True) # Add country codes as a variable ds["country_codes"] = country_codes.copy(deep=True) # Create a time dimension of 12 months for the variable. This will copy the # emissions [tonnes/gridcell] to be the same in each of the 12 months as the # annual value. We'll scale it following EDGAR if there are scale factors. ds["emissions"] = ds["emissions"].expand_dims(dim={"time": 12}).copy() ds = ds.assign_coords(time=pd.date_range(f"{year}-01-01", f"{year}-12-01", freq="MS")) for country in edgar_countries: # Make sure there are cells in the GFEI map for this country if country not in list(gfei_cc["Country_code_A3"]): continue # Figure out which code GFEI uses for this country country_code = (gfei_cc[gfei_cc["Country_code_A3"] == country] ["ISO3166-1-numeric"].iloc[0]) # Get the monthly scale factors for this country/sector/year sf = monthly_sf[(monthly_sf["Country_code_A3"] == country) & (monthly_sf["EDGAR Sector"] == sec) & (monthly_sf["Year"] == year)] if len(sf) == 1: sf = np.array(sf[["Jan","Feb","Mar","Apr","May","Jun", "Jul","Aug","Sep","Oct","Nov","Dec"]])[0] # Deal with rounding off of 1/12 in EDGAR scale factors if sf[0] == 0.08333 and sf[-1] == 0.08337: sf = np.ones((12))/12 # If EDGAR doesn't provide a scale factor for this sector elif len(sf) == 0: sf = np.ones((12))/12 assert abs(np.sum(sf) - 1) < 1e-5 # Correct them for the different number of days in each month days_per_month = np.array([calendar.monthrange(year, m)[1] for m in range(1,13)]) correction_array = days_per_month/np.mean(days_per_month) sf = sf*correction_array # Ensure the annual total will be conserved sf = sf/np.sum(sf) # Scale the mass in each month for m in range(12): ds["emissions"][m,:,:] = np.where( ds["country_codes"] == country_code, ds["emissions"][m,:,:]*sf[m], ds["emissions"][m,:,:]) # Replace country code with -9999 to know which cells have been scaled ds["country_codes"][:,:] = np.where( ds["country_codes"] == country_code, -9999.0, ds["country_codes"][:,:]) # If the country code is not -9999, we haven't scale this month yet. # Use a constant emissions rate throughout the year. sf = np.ones((12))/12 sf = sf*correction_array sf = sf/np.sum(sf) for m in range(12): ds["emissions"][m,:,:] = np.where( ds["country_codes"] != -9999.0, ds["emissions"][m,:,:]*sf[m], ds["emissions"][m,:,:]) # Drop the country code variable ds = ds.drop_vars("country_codes") # Use the emi [tonnes/gridcell] and flx [kg/m2/s] to get [m2/gridcell] with xr.open_dataset(f"v8.0_FT2022_GHG_CH4_{year}_{sec}_emi.nc") as emi,\ xr.open_dataset(f"v8.0_FT2022_GHG_CH4_{year}_{sec}_flx.nc") as flx: sec_in_year = (365 + calendar.isleap(year))*24*60*60 m2_per_gridcell = emi["emissions"]*1e3/flx["fluxes"]/sec_in_year # Convert [tonnes/gridcell] --> [tonnes/m2] ds["emissions"] = (ds["emissions"]/m2_per_gridcell).fillna(0.0) # Convert [tonnes/m2] --> [kg/m2/s] for m in range(12): seconds_in_this_month = days_per_month[m]*24*60*60 ds["emissions"][m,:,:] = (ds["emissions"][m,:,:]*1e3/ seconds_in_this_month) # Formatting for COARDS ds = ds.rename({"emissions":"emi_ch4"}) ds["emi_ch4"] = ds["emi_ch4"].assign_attrs({"units":"kg/m2/s", "long_name":"Emissions of CH4"}) ds["lon"] = ds["lon"].assign_attrs({"units":"degrees_east", "axis":"X", "long_name":"longitude"}) ds["lat"] = ds["lat"].assign_attrs({"units":"degrees_north", "axis":"Y", "long_name":"latitude"}) ds["time"] = ds["time"].assign_attrs({"axis":"T", "long_name":"Time"}) ds.attrs["history"] = (f"Prepared by Nicholas Balasus on " f"{datetime.datetime.now().date()} from EDGAR v8.") ds.attrs["title"] = f"Monthly Emissions of CH4 for {sec} sector." ds.attrs["conventions"] = "COARDS" # Remove old files subprocess.run(["rm",f"v8.0_FT2022_GHG_CH4_{year}_{sec}_emi.nc"]) subprocess.run(["rm",f"v8.0_FT2022_GHG_CH4_{year}_{sec}_flx.nc"]) # Save the file os.makedirs(dir, exist_ok=True) file = f"{dir}/v8.0_FT2022_GHG_CH4_{year}_{sec}_flx.nc" ds.to_netcdf(file, unlimited_dims=["time"], encoding={"time": {"dtype":"float64"}}) if __name__ == "__main__": # The files "country_codes.csv" and "country_mask.csv" are from GFEIv2 and # tell us which country each 0.1° x 0.1° grid cell belongs to. Form them # into a xarray dataset here using a generic EDGAR v8 file as a template. zip = "v8.0_FT2022_GHG_CH4_2022_AGS_flx_nc.zip" file = "v8.0_FT2022_GHG_CH4_2022_AGS_flx.nc" url = (f"https://jeodpp.jrc.ec.europa.eu/ftp/jrc-opendata/EDGAR/datasets/" f"v80_FT2022_GHG/CH4/AGS/flx_nc/{zip}") subprocess.run(["wget", "-nv", "--read-timeout=180", "--tries=0", url]) subprocess.run(["unzip", "-q", "-j", zip, file]) subprocess.run(["rm", zip]) with xr.open_dataset(file) as ds: cc_csv = pd.read_csv("country_mask.csv", header=None) country_codes = (ds["fluxes"]*0.0 + np.array(cc_csv)) country_codes = country_codes.rename("country_code") subprocess.run(["rm", file]) # Treat South Sudan as Sudan to be consistent with EDGAR v8. country_codes = xr.where(country_codes == 728, 729, country_codes) # Download the monthly temporal scale factors for CH4 from EDGAR v8. zip = "EDGAR_temporal_profiles_r2a_CH4.zip" file = "EDGAR_temporal_profiles_r2_CH4.xlsx" url = (f"https://jeodpp.jrc.ec.europa.eu/ftp/jrc-opendata/EDGAR/datasets/" f"v80_FT2022_GHG/profiles/{zip}") subprocess.run(["wget", "-nv", "--read-timeout=180", "--tries=0", url]) subprocess.run(["unzip", "-q", "-j", zip, file]) subprocess.run(["rm", zip]) # Read in all of the monthly scale factors. monthly_sf = pd.read_excel("EDGAR_temporal_profiles_r2_CH4.xlsx", header=1) edgar_countries = monthly_sf["Country_code_A3"].unique() subprocess.run(["rm","EDGAR_temporal_profiles_r2_CH4.xlsx"]) # Read in the GFEI country codes (code --> name). gfei_cc = pd.read_csv("country_codes.csv") # Generate all of the files sectors = ["ENE","REF_TRF","IND","TNR_Aviation_CDS","TNR_Aviation_CRS", "TNR_Aviation_LTO","TRO","TNR_Other","TNR_Ship","RCO", "PRO_COAL","PRO_OIL","PRO_GAS","CHE","IRO","ENF","MNM","AWB", "AGS","SWD_LDF","SWD_INC","WWT"] inputs = [(year,sec) for sec in sectors] with multiprocessing.Pool() as pool: pool.starmap(generate_edgarv8_monthly, inputs) pool.close() pool.join()