Demo: ERA5 Climate download

Notebook #0: Data Download

UW Geospatial Data Analysis
CEE498/CEWA599
David Shean

import os
from glob import glob

Install necessary packages to open GRIB files (default ERA5 format) with xarray

https://github.com/ecmwf/cfgrib

While you wait, open a terminal and inspect the contents of the era5 directory, and review this information

%conda install -y -c conda-forge cfgrib

Download the prepared dataset sample

  • We could request directly fro CDS API, but this will require at least 5-10 minutes to fulfill, maybe hours

  • I downloaded some sample ERA5 datasets, created a zip file, staged and shared on Google Drive (accessible to anyone with link)

  • We can use the drivanon convenience package to easily download anonymously without authentication

#Install directly from github repo main branch
%pip install git+https://github.com/friedrichknuth/driveanon.git
import driveanon as da
import zipfile
%pwd
outdir = 'era5_data'
if not os.path.exists(outdir):
    os.makedirs(outdir)
def download_unzip(out_fn):
    #Download
    if not os.path.exists(out_fn):
        da.save(blob_id, filename=out_fn)
    #Extract to subdirectory
    if not os.path.exists(os.path.splitext(out_fn)[0]):
        with zipfile.ZipFile(out_fn, 'r') as zip_ref:
            zip_ref.extractall(os.path.splitext(out_fn)[0])

Function to load and combine grib files into a single xarray DataSet

import os
from glob import glob
import xarray as xr
def grib2nc(out_fn, writeout=True, compress=False):
    if not os.path.exists(out_fn):
        #Get all grib filenames in the directory
        paths = sorted(glob(os.path.splitext(out_fn)[0]+'*.grib'))
        #Generate xarray dataset list, opening with cfgrib engine
        datasets = [xr.open_dataset(p, engine='cfgrib') for p in paths]
        #Concatenate all datasets along the time axis
        combined = xr.concat(datasets, dim='time')
        #Drop unnecessary coordinates
        combined = combined.drop(['number', 'surface', 'step', 'valid_time'])
        if writeout:
            encoding = {}
            if compress:
                #Set up encoding parameters to use compression when writing netcdf file
                comp = dict(zlib=True, complevel=9)
                encoding = {var: comp for var in combined.data_vars}
            #Write out
            combined.to_netcdf(out_fn, encoding=encoding)
    #else:
    #    combined = xr.open_dataset(out_fn)
    #return combined
blob_id = '1Gwkg21LPKxvZsjwMrwVESGi2ZaVLQP58'
out_fn = f'{outdir}/ecv-for-climate-change.zip'
download_unzip(out_fn)
%pwd
datadir = os.path.splitext(out_fn)[0]
%cd $datadir
fn_list = ['climatology_0.25g_ea_2t.nc', \
           '1month_anomaly_Global_ea_2t.nc', \
           '1month_mean_Global_ea_2t.nc']
for out_fn in fn_list:
    grib2nc(out_fn)
%cd ../..
blob_id = '1nWjcjlqzkSqi-3u2vXt-5ya4VRSKS7v5'
out_fn = f'{outdir}/era5_WA_1979-2021_6hr.zip'
download_unzip(out_fn)
datadir = os.path.splitext(out_fn)[0]
%cd $datadir
fn_list = ['era5_WA_1979-2021_6hr_2m_temperature.nc', \
           'era5_WA_1979-2021_6hr_total_precipitation.nc', 
           'era5_WA_1979-2021_6hr_snow_depth.nc']
for out_fn in fn_list:
    grib2nc(out_fn)

Old files from 2018

Note: The following will take a few minutes, take the time to review the remainder of the lab