Demo: ERA5 Climate download¶
Notebook #0: Data Download¶
UW Geospatial Data Analysis
CEE498/CEWA599
David Shean
import os
from glob import glob
Install necessary packages to open GRIB files (default ERA5 format) with xarray¶
https://github.com/ecmwf/cfgrib
While you wait, open a terminal and inspect the contents of the era5 directory, and review this information
%conda install -y -c conda-forge cfgrib
Download the prepared dataset sample¶
We could request directly fro CDS API, but this will require at least 5-10 minutes to fulfill, maybe hours
I downloaded some sample ERA5 datasets, created a zip file, staged and shared on Google Drive (accessible to anyone with link)
We can use the
drivanonconvenience package to easily download anonymously without authentication
#Install directly from github repo main branch
%pip install git+https://github.com/friedrichknuth/driveanon.git
import driveanon as da
import zipfile
%pwd
outdir = 'era5_data'
if not os.path.exists(outdir):
os.makedirs(outdir)
def download_unzip(out_fn):
#Download
if not os.path.exists(out_fn):
da.save(blob_id, filename=out_fn)
#Extract to subdirectory
if not os.path.exists(os.path.splitext(out_fn)[0]):
with zipfile.ZipFile(out_fn, 'r') as zip_ref:
zip_ref.extractall(os.path.splitext(out_fn)[0])
Function to load and combine grib files into a single xarray DataSet¶
Also creates a new, compressed netcdf (nc) file to store the data for future use
See relevant doc on opening and writing files: http://xarray.pydata.org/en/stable/io.html
import os
from glob import glob
import xarray as xr
def grib2nc(out_fn, writeout=True, compress=False):
if not os.path.exists(out_fn):
#Get all grib filenames in the directory
paths = sorted(glob(os.path.splitext(out_fn)[0]+'*.grib'))
#Generate xarray dataset list, opening with cfgrib engine
datasets = [xr.open_dataset(p, engine='cfgrib') for p in paths]
#Concatenate all datasets along the time axis
combined = xr.concat(datasets, dim='time')
#Drop unnecessary coordinates
combined = combined.drop(['number', 'surface', 'step', 'valid_time'])
if writeout:
encoding = {}
if compress:
#Set up encoding parameters to use compression when writing netcdf file
comp = dict(zlib=True, complevel=9)
encoding = {var: comp for var in combined.data_vars}
#Write out
combined.to_netcdf(out_fn, encoding=encoding)
#else:
# combined = xr.open_dataset(out_fn)
#return combined
blob_id = '1Gwkg21LPKxvZsjwMrwVESGi2ZaVLQP58'
out_fn = f'{outdir}/ecv-for-climate-change.zip'
download_unzip(out_fn)
%pwd
datadir = os.path.splitext(out_fn)[0]
%cd $datadir
fn_list = ['climatology_0.25g_ea_2t.nc', \
'1month_anomaly_Global_ea_2t.nc', \
'1month_mean_Global_ea_2t.nc']
for out_fn in fn_list:
grib2nc(out_fn)
%cd ../..
blob_id = '1nWjcjlqzkSqi-3u2vXt-5ya4VRSKS7v5'
out_fn = f'{outdir}/era5_WA_1979-2021_6hr.zip'
download_unzip(out_fn)
datadir = os.path.splitext(out_fn)[0]
%cd $datadir
fn_list = ['era5_WA_1979-2021_6hr_2m_temperature.nc', \
'era5_WA_1979-2021_6hr_total_precipitation.nc',
'era5_WA_1979-2021_6hr_snow_depth.nc']
for out_fn in fn_list:
grib2nc(out_fn)
Old files from 2018¶
Note: The following will take a few minutes, take the time to review the remainder of the lab