10: Dask and rioxarray demo#

UW Geospatial Data Analysis
CEE467/CEWA567
David Shean

These are incomplete, some were not working - ran out of time to test, preserving here for the future

from dask.distributed import Client

client = Client("tcp://127.0.0.1:36567")
client

Client

Cluster

  • Workers: 4
  • Cores: 4
  • Memory: 8.59 GB
%%time
wa_merge['t2m'].mean()
CPU times: user 79 ms, sys: 51.3 ms, total: 130 ms
Wall time: 129 ms
<xarray.DataArray 't2m' ()>
array(8.996196, dtype=float32)
from dask.distributed import Client

#client = Client("tcp://127.0.0.1:35733")
client = Client(n_workers=2, threads_per_worker=2, memory_limit='1GB')
client

Client

Cluster

  • Workers: 2
  • Cores: 4
  • Memory: 2.00 GB
import hvplot.xarray
#This was pulled from the part 2 notebook, originally separate variables
wa_t.mean(dim=("latitude", "longitude")).hvplot()
wa_t.mean(dim="time").hvplot(aspect=2)
dem_fn = '../07_Raster2_DEMs_Warp_Clip_Sample/WA_SRTMGL3_utm_gdalwarp_lzw.tif'
dem_fn = '/home/jovyan/gda_course_2021_solutions/modules/07_Raster2_DEMs_Warp_Clip_Sample/WA_SRTMGL3_utm_gdalwarp_lzw.tif'
ls $dem_fn
/home/jovyan/gda_course_2021_solutions/modules/07_Raster2_DEMs_Warp_Clip_Sample/WA_SRTMGL3_utm_gdalwarp_lzw.tif
import rioxarray

https://corteva.github.io/rioxarray/stable/getting_started/getting_started.html

chunks={'x': 1024, 'y': 1024}
#chunks='auto'
overview_level=0
dem = rioxarray.open_rasterio(dem_fn, masked=True, default_name="SRTM", chunks=chunks)
dem
<xarray.DataArray 'SRTM' (band: 1, y: 5851, x: 8877)>
dask.array<open_rasterio-1fd172fd27c59599ee677e71323da560<this-array>, shape=(1, 5851, 8877), dtype=float64, chunksize=(1, 1024, 1024), chunktype=numpy.ndarray>
Coordinates:
  * band         (band) int64 1
  * y            (y) float64 5.446e+06 5.446e+06 ... 5.043e+06 5.043e+06
  * x            (x) float64 3.647e+05 3.648e+05 ... 9.748e+05 9.749e+05
    spatial_ref  int64 0
Attributes:
    scale_factor:  1.0
    add_offset:    0.0
    grid_mapping:  spatial_ref
dem.rio.transform()
Affine(68.74846185120099, 0.0, 364652.9635476386,
       0.0, -68.74846185120099, 5445635.966252405)
dem.rio.nodata
nan
dem.load()
<xarray.DataArray 'SRTM' (band: 1, y: 5851, x: 8877)>
array([[[nan, nan, nan, ..., nan, nan, nan],
        [nan, nan, nan, ..., nan, nan, nan],
        [nan, nan, nan, ..., nan, nan, nan],
        ...,
        [nan, nan, nan, ..., nan, nan, nan],
        [nan, nan, nan, ..., nan, nan, nan],
        [nan, nan, nan, ..., nan, nan, nan]]])
Coordinates:
  * band         (band) int64 1
  * y            (y) float64 5.446e+06 5.446e+06 ... 5.043e+06 5.043e+06
  * x            (x) float64 3.647e+05 3.648e+05 ... 9.748e+05 9.749e+05
    spatial_ref  int64 0
Attributes:
    scale_factor:  1.0
    add_offset:    0.0
    grid_mapping:  spatial_ref
distributed.client - ERROR - Failed to reconnect to scheduler after 10.00 seconds, closing client
_GatheringFuture exception was never retrieved
future: <_GatheringFuture finished exception=CancelledError()>
asyncio.exceptions.CancelledError
dem.compute()
<xarray.DataArray 'SRTM' (band: 1, y: 5851, x: 8877)>
array([[[nan, nan, nan, ..., nan, nan, nan],
        [nan, nan, nan, ..., nan, nan, nan],
        [nan, nan, nan, ..., nan, nan, nan],
        ...,
        [nan, nan, nan, ..., nan, nan, nan],
        [nan, nan, nan, ..., nan, nan, nan],
        [nan, nan, nan, ..., nan, nan, nan]]])
Coordinates:
  * band         (band) int64 1
  * y            (y) float64 5.446e+06 5.446e+06 ... 5.043e+06 5.043e+06
  * x            (x) float64 3.647e+05 3.648e+05 ... 9.748e+05 9.749e+05
    spatial_ref  int64 0
Attributes:
    scale_factor:  1.0
    add_offset:    0.0
    grid_mapping:  spatial_ref
dem.plot()
dem.hvplot(x='x', y='y')