#!/usr/bin/env python ''' This Python script concatenates all of the netCDF files for individual species in the BB4CMIP6/v2019-06/YYYY subfolders into one netCDF file per year, named "BB4CMIP6/v2019-06/BB4CMIP6-Biomass_025x025_YYYY.nc". This is more efficient becaue it minimizes the number of times a netCDF file has to be opened. Calling sequence: ./concatentate_files.py ''' # Imports from os.path import join import xarray as xr from xarray.coding.variables import SerializationWarning import numpy as np import warnings # Suppress harmless run-time warnings (mostly about underflow in division) warnings.filterwarnings("ignore", category=RuntimeWarning) warnings.filterwarnings("ignore", category=UserWarning) warnings.filterwarnings("ignore", category=SerializationWarning) # Main path (edit as necessary) maindir = '/n/scratchlfs/jacob_lab/ryantosca/data/BB4CMIP6/v2019-06' # Loop over years for year in range(1800,1850): print('Now processing {}'.format(year)) # Open all files in each yearly subfolder into a single Dataset infiles = join(maindir, str(year), '*.nc') ds = xr.open_mfdataset(infiles) # Keep all DataArray attributes with xr.set_options(keep_attrs=True): # Loop over all variables for v in ds.data_vars.keys(): # Xarray will try convert missing values to NaN's, # so we need to replace these with zeroes ds[v].where(np.isnan(ds[v].values), other=0.0, drop=False) # Debug print #print('{} : {} {}'.format( # v, np.min(ds[v].values), np.max(ds[v].values))) # Write to the output file to the main path outfile = 'BB4CMIP6-Biomass_025x025_{}.nc'.format(year) outpath= join(maindir, outfile) ds.to_netcdf(outpath)