diff --git a/xrspatial/reproject/__init__.py b/xrspatial/reproject/__init__.py index 8519dcf1..8cd514c2 100644 --- a/xrspatial/reproject/__init__.py +++ b/xrspatial/reproject/__init__.py @@ -504,7 +504,9 @@ def reproject( Higher values increase accuracy at the cost of more pyproj calls. Set to 0 for exact per-pixel transforms matching GDAL/rasterio. chunk_size : int or (int, int) or None - Output chunk size for dask. Defaults to 512. + Output chunk size for dask. If None, defaults to 512 for the + standard dask path and 2048 for the in-memory streaming and + dask+cupy paths (chosen to amortize kernel launch overhead). name : str or None Name for the output DataArray. max_memory : int or str or None @@ -541,6 +543,22 @@ def reproject( spatial dims) are carried through to the output. Coords that are aligned to the input y or x dims are dropped because their values do not apply to the rebuilt grid. + + Examples + -------- + >>> import xarray as xr + >>> import numpy as np + >>> from xrspatial.reproject import reproject + >>> raster = xr.DataArray( + ... np.random.rand(64, 64), + ... dims=['y', 'x'], + ... coords={'y': np.linspace(50, 40, 64), + ... 'x': np.linspace(-5, 5, 64)}, + ... attrs={'crs': 'EPSG:4326'}, + ... ) + >>> result = reproject(raster, 'EPSG:3857') + >>> result.attrs['crs'].startswith(('PROJCRS', 'PROJCS')) + True """ _validate_raster(raster, func_name='reproject', name='raster', ndim=(2, 3)) @@ -1411,7 +1429,9 @@ def merge( strategy : str Merge strategy: 'first', 'last', 'mean', 'max', 'min'. chunk_size : int or (int, int) or None - Chunk size for dask output. + Output chunk size for dask. If None, defaults to 512 for the + standard dask path and 2048 for the in-memory streaming and + dask+cupy paths (chosen to amortize kernel launch overhead). transform_precision : int Control-grid subdivisions for the coordinate transform (default 16). Higher values increase accuracy at the cost of more pyproj calls. @@ -1437,6 +1457,27 @@ def merge( There is no streaming in-memory path; for very large output mosaics, pass dask-backed inputs (or rely on the automatic promotion to the dask path) so that each output chunk is computed independently. + + Examples + -------- + >>> import xarray as xr + >>> import numpy as np + >>> from xrspatial.reproject import merge + >>> tile_a = xr.DataArray( + ... np.full((32, 32), 1.0), dims=['y', 'x'], + ... coords={'y': np.linspace(50, 45, 32), + ... 'x': np.linspace(-5, 0, 32)}, + ... attrs={'crs': 'EPSG:4326'}, + ... ) + >>> tile_b = xr.DataArray( + ... np.full((32, 32), 2.0), dims=['y', 'x'], + ... coords={'y': np.linspace(50, 45, 32), + ... 'x': np.linspace(0, 5, 32)}, + ... attrs={'crs': 'EPSG:4326'}, + ... ) + >>> mosaic = merge([tile_a, tile_b], resolution=0.5) + >>> mosaic.shape[1] >= 32 + True """ if not rasters: raise ValueError("merge(): rasters list must not be empty") diff --git a/xrspatial/tests/test_reproject.py b/xrspatial/tests/test_reproject.py index d7e4b504..e7c240a6 100644 --- a/xrspatial/tests/test_reproject.py +++ b/xrspatial/tests/test_reproject.py @@ -2478,3 +2478,78 @@ def test_reproject_y_descending_dask(self): out = reproject(raster, 'EPSG:3857') y_out = out.coords['y'].values assert np.all(np.diff(y_out) < 0) + + +# --------------------------------------------------------------------------- +# Inf input and chunk_size / max_memory parameter coverage +# --------------------------------------------------------------------------- + +def test_reproject_handles_inf_input(): + """Reprojecting a raster with +/-Inf pixels must not crash. + + The output behavior is implementation-defined: Inf may propagate + or be coerced to NaN. We only assert that the call returns and + the spatial geometry is intact. + """ + from xrspatial.reproject import reproject + data = np.ones((32, 32), dtype=np.float64) + data[0, 0] = np.inf + data[1, 1] = -np.inf + raster = xr.DataArray( + data, dims=['y', 'x'], + coords={'y': np.linspace(55, 45, 32), + 'x': np.linspace(-5, 5, 32)}, + attrs={'crs': 'EPSG:4326', 'nodata': np.nan}, + ) + result = reproject(raster, 'EPSG:32633') + assert result.ndim == 2 + assert result.shape[0] >= 1 and result.shape[1] >= 1 + + +@pytest.mark.skipif(not HAS_DASK, reason="dask required") +def test_reproject_chunk_size_tuple(): + """Tuple chunk_size should propagate to the output dask chunks.""" + from xrspatial.reproject import reproject + data = np.random.RandomState(0).rand(128, 128).astype(np.float64) + raster = xr.DataArray( + da.from_array(data, chunks=64), dims=['y', 'x'], + coords={'y': np.linspace(55, 45, 128), + 'x': np.linspace(-5, 5, 128)}, + attrs={'crs': 'EPSG:4326', 'nodata': np.nan}, + ) + out = reproject(raster, 'EPSG:32633', chunk_size=(64, 32)) + assert hasattr(out.data, 'chunks'), "expected dask-backed output" + row_chunks, col_chunks = out.data.chunks + # Allow the last chunk to be a remainder, but the leading chunk + # should match the requested size. + assert row_chunks[0] == 64 + assert col_chunks[0] == 32 + + +def test_reproject_max_memory_string_arg(): + """Reproject must accept human-readable max_memory strings.""" + from xrspatial.reproject import reproject + data = np.random.RandomState(0).rand(32, 32).astype(np.float64) + raster = xr.DataArray( + data, dims=['y', 'x'], + coords={'y': np.linspace(55, 45, 32), + 'x': np.linspace(-5, 5, 32)}, + attrs={'crs': 'EPSG:4326', 'nodata': np.nan}, + ) + for mem in ('256MB', '1GB'): + out = reproject(raster, 'EPSG:32633', max_memory=mem) + assert out.ndim == 2 + + +def test_reproject_max_memory_int_arg(): + """Reproject must accept integer byte counts for max_memory.""" + from xrspatial.reproject import reproject + data = np.random.RandomState(0).rand(32, 32).astype(np.float64) + raster = xr.DataArray( + data, dims=['y', 'x'], + coords={'y': np.linspace(55, 45, 32), + 'x': np.linspace(-5, 5, 32)}, + attrs={'crs': 'EPSG:4326', 'nodata': np.nan}, + ) + out = reproject(raster, 'EPSG:32633', max_memory=512 * 1024 * 1024) + assert out.ndim == 2