#!/usr/bin/env python

"""
Script to combine data from the various CMIP6 files into one file per
year, containing all species.  Reading one file per year instead of
one file per species per year is more computationally efficient.

NOTE: Some post-processing is required, to change the unit string
to "days since YYYY-01-01 00:00:00" and to chunk and compress
the data.  Use script post_process_data.sh to do this.

Bob Yantosca (yantosca@seas.harvard.edu), 18 Dec 2019
"""

# Imports
from os.path import join
import numpy as np
import xarray as xr

# Main data directory (edit for your system)
rootdir = "/n/holylfs/EXTERNAL_REPOS/GEOS-CHEM/gcgrid/gcdata/ExtData/HEMCO/CMIP6/v2019-09"

# Data files
files = [ join(rootdir, "CMIP6_GHG_surface_VMR_1750_2014_for_CH2Cl2.nc"), 
          join(rootdir, "CMIP6_GHG_surface_VMR_1750_2014_for_CH3Br.nc"),
          join(rootdir, "CMIP6_GHG_surface_VMR_1750_2014_for_CH3Cl.nc"),
          join(rootdir, "CMIP6_GHG_surface_VMR_1750_2014_for_CH4.nc"),
          join(rootdir, "CMIP6_GHG_surface_VMR_1750_2014_for_CHCl3.nc") ]

# Read everything into one dataset
ds = xr.open_mfdataset(files)

# Set the encoding types for the lon, lat, time fields
encoding={"time": {"dtype": "double"},
          "lat": {"dtype": "double"},
          "lon": {"dtype": "double"}}

# Loop over years
for year in range(1750, 2015):
    print("Processing year {}".format(year))
    
    with xr.set_options(keep_attrs=True):

        # Date as of Jan 1st of this year (as a np.datetime64[D] object)
        date_Jan1 = np.datetime64("{}-01-01".format(year), 'D')

        # From the combined dataset, pick only data for this year
        ds2 = ds.sel(time=slice("{}-01-16".format(year),
                                "{}-12-16".format(year)))

        # Adjsut the time units to be days since Jan 1st of this year
        ds2["time"] = ds2["time"].astype('datetime64[D]') - date_Jan1

        # Write data for this year to its own file
        ds2.to_netcdf("CMIP6_GHG_surface_VMR_{}.nc".format(year),
                      encoding=encoding)