# Reformat_CEDS_0.1 import numpy as np import xarray as xr import os from datetime import datetime InDir = '/storage1/fs1/rvmartin/Active/GEOS-Chem-shared/ExternalShare/CEDS01_PNNL_org/' OutDir = '/ExtData/HEMCO/CEDS/v2023-04/' # voc speciation: following convention in CEDSv2 to lump species for corresponding voc names # voc definition is the same in CEDSv2 and HTAPv3 # NMVOC emissions are split into the following 23 species groups: # VOC_id VOC_name molecular weight GEOS-Chem Species # VOC1 alcohols 46.2 EOH # VOC2 ethane 30 C2H6 # VOC3 propane 44 C3H8 # VOC4 butanes 57.8 ALK4 # VOC5 pentanes 72 ALK4 # VOC6 hexanes_plus_higher_alkanes 106.8 ALK4 # VOC7 ethene 28 C2H4 # VOC8 propene 42 PRPE # VOC9 ethyne 26 C2H2 # VOC12 other_alkenes_and_alkynes 67 - # VOC13 benzene 78 BENZ # VOC14 toluene 92 TOLU # VOC15 xylene 106 XYLE # VOC16 trimethylbenzenes 120 - # VOC17 other_aromatics 126.8 - # VOC18 esters 104.7 - # VOC19 ethers 81.5 - # VOC20 chlorinated_hydrocarbons 138.8 - # VOC21 methanal 30 CH2O # VOC22 other_alkanals 68.8 ALD2 # VOC23 ketones 75.3 MEK # VOC24 acids 59.1 HCOOH # VOC25 other_voc 68.9 - # It is assumed that isoprene (number 10) and terpenes (number 11) are not # emitted in significant quantities by anthropogenic sources, and are not # included in the anthropogenic split. species_in = ['BC', 'CH4', 'CO', 'CO2', 'N2O', 'NH3', 'NMVOC', 'NOx', 'OC', 'SO2', \ 'VOC01-alcohols', 'VOC02-ethane', 'VOC03-propane', 'VOC04-butanes', 'VOC05-pentanes', 'VOC06-hexanes', \ 'VOC07-ethene', 'VOC08-propene', 'VOC09-ethyne', 'VOC13-benzene', 'VOC14-toluene', \ 'VOC15-xylene', \ 'VOC21-methanal', 'VOC22-other', 'VOC23-ketones', 'VOC24-acids'] species_out = ['BC', 'CH4', 'CO', 'CO2', 'N2O', 'NH3', 'NMVOC', 'NO', 'OC', 'SO2', \ 'EOH', 'C2H6', 'C3H8', 'ALK4_butanes', 'ALK4_pentanes', 'ALK4_hexanes', 'C2H4', \ 'PRPE', 'C2H2', 'BENZ', 'TOLU', 'XYLE', 'CH2O', 'ALD2', 'MEK', 'HCOOH'] encoding = dict(missing_value=1.e+20, _FillValue=1.e+20) # ids = "0: Agriculture; 1: Energy; 2: Industrial; 3: Transportation; 4: Residential, Commercial, Other; \ # 5: Solvents production and application; 6: Waste; 7: International Shipping"; sectors_out = ["agr", "ene", "ind", "tra", "rco", "slv", "wst", "shp"] fpath = os.path.join(InDir, '{}-em/{}-em-anthro_input4MIPs_emissions_CMIP_CEDS-2021-04-21_gn_{}01-{}12.nc'.\ format(species_in[0], species_in[0], 2019, 2019)) ds = xr.open_dataset(fpath) lat = ds['lat'] lon = ds['lon'] for year in range(1980, 1982): path = os.path.join(OutDir, '{}'.format(year)) isExist = os.path.exists(path) if not isExist: os.mkdir(os.path.join(OutDir, '{}'.format(year))) refyear = year day0 = datetime(year,1,1) time = [] for mon in range(1,13): dayi = datetime(year, mon, 1) time.append((dayi - day0).days) time = np.array(time).astype('float') for speci in range(len(species_out)): ds_out = xr.Dataset({'lat': lat, 'lon': lon}) if species_out[speci] == 'NO': factor = 30/46 else: factor = 1 if ("VOC" in species_in[speci]) & (species_in[speci] != 'NMVOC'): if species_in[speci] == 'VOC06-hexanes': fpath_in = os.path.join(InDir, '{}/{}-pl-em-speciated-VOC-anthro_input4MIPs_emissions_CMIP_CEDS-2021-04-21-supplemental-data_gn_{}01-{}12.nc'.\ format(species_in[speci], species_in[speci], year, year)) varname_in = '{}_pl_em_speciated_VOC_anthro'.format(species_in[speci].replace("-", "_" )) elif species_in[speci] == 'VOC22-other': fpath_in = os.path.join(InDir, '{}/{}-alka-em-speciated-VOC-anthro_input4MIPs_emissions_CMIP_CEDS-2021-04-21-supplemental-data_gn_{}01-{}12.nc'.\ format(species_in[speci], species_in[speci], year, year)) varname_in = '{}_alka_em_speciated_VOC_anthro'.format(species_in[speci].replace("-", "_" )) else: fpath_in = os.path.join(InDir, '{}/{}-em-speciated-VOC-anthro_input4MIPs_emissions_CMIP_CEDS-2021-04-21-supplemental-data_gn_{}01-{}12.nc'.\ format(species_in[speci], species_in[speci], year, year)) varname_in = '{}_em_speciated_VOC_anthro'.format(species_in[speci].replace("-", "_" )) else: fpath_in = os.path.join(InDir, '{}-em/{}-em-anthro_input4MIPs_emissions_CMIP_CEDS-2021-04-21_gn_{}01-{}12.nc'.\ format(species_in[speci], species_in[speci], year, year)) varname_in = '{}_em_anthro'.format(species_in[speci]) ds_in = xr.open_dataset(fpath_in) for seci in range(len(sectors_out)): varname_out = '{}_{}'.format(species_out[speci], sectors_out[seci]) emisflux = ds_in[varname_in].values[:,seci,...].squeeze() * factor # convert to DataArray emisflux = xr.DataArray(emisflux, coords=[time, lat.values, lon.values], dims=['time', 'lat', 'lon'], \ attrs=dict(long_name='{} emission in sector {}'.format(species_out[speci], sectors_out[seci]), \ units='kg/m2/s', encoding=encoding)) ds_out[varname_out] = emisflux ds_out.time.attrs = dict(units='days since {}-01-01 00:00:00'.format(refyear), \ delta_t='0000-01-00 00:00:00', axis='T', standard_name='Time', long_name='Time', calendar='standard') ds_out.lon.attrs = dict(units='degrees_east', axis='X', standard_name='Longitude', long_name='Longitude') ds_out.lat.attrs = dict(units='degrees_north', axis='Y', standard_name='Latitude', long_name='Latitude') ds_out.attrs = dict(Title='COARDS/netCDF file containing monthly emissions of {}'.format(species_out[speci]), Conventions = "COARDS") fpath_out = OutDir + '{}/CEDS_{}_0.1x0.1_{}.nc'.format(year, species_out[speci], year) ds_out.to_netcdf(fpath_out, engine='netcdf4', format='NETCDF4', unlimited_dims='time')