diff --git a/xrspatial/geotiff/_reader.py b/xrspatial/geotiff/_reader.py index 6188a879..cc1c3f75 100644 --- a/xrspatial/geotiff/_reader.py +++ b/xrspatial/geotiff/_reader.py @@ -1085,10 +1085,13 @@ def _read_tiles(data: bytes, ifd: IFD, header: TIFFHeader, # outweigh the thread-pool overhead. Uncompressed multi-tile reads also # benefit because numpy frombuffer + slice copies aren't free at large # tile sizes. Threshold (~64K decoded pixels per tile) was picked to - # avoid pool overhead on small 64x64 / 128x128 tile reads. + # avoid pool overhead on small 64x64 / 128x128 tile reads. The bound + # is inclusive so the default tile_size=256 (256*256 == 64*1024) lands + # on the parallel path -- a strict `>` excluded the most common tile + # size in practice (issue #1551). n_tiles = len(tile_jobs) tile_pixels = tw * th - use_parallel = (n_tiles > 1 and tile_pixels > 64 * 1024) + use_parallel = (n_tiles > 1 and tile_pixels >= 64 * 1024) def _decode_one(job): band_idx, tr, tc, tile_idx, tile_samples = job diff --git a/xrspatial/geotiff/tests/test_parallel_decode_default_tile_1551.py b/xrspatial/geotiff/tests/test_parallel_decode_default_tile_1551.py new file mode 100644 index 00000000..5529905f --- /dev/null +++ b/xrspatial/geotiff/tests/test_parallel_decode_default_tile_1551.py @@ -0,0 +1,144 @@ +"""Regression test for issue #1551. + +The parallel tile decode gate in ``_read_tiles`` previously used a strict +``tile_pixels > 64 * 1024`` comparison, which excluded the default +``tile_size=256`` (256 * 256 == 64 * 1024). Tiles at the default size are +the most common in real GeoTIFFs, so the parallel path almost never ran. +The fix is one character: ``>`` becomes ``>=``. + +These tests assert the dispatch by monkey-patching +``concurrent.futures.ThreadPoolExecutor`` and confirming it gets +constructed at the default tile_size. +""" +from __future__ import annotations + +import concurrent.futures +import threading + +import numpy as np + +from xrspatial.geotiff._dtypes import tiff_dtype_to_numpy +from xrspatial.geotiff._header import parse_all_ifds, parse_header +from xrspatial.geotiff._reader import _read_tiles + +from .conftest import make_minimal_tiff + + +class _PoolSpy: + """Drop-in replacement for ThreadPoolExecutor that records calls. + + Wraps the real executor so tests still get correct decoded output; + only the construction is observed. Each instance writes its + construction (and the test's thread id) into the supplied list so + pytest-xdist runs do not cross-contaminate. + """ + + def __init__(self, record, real_cls): + self._record = record + self._real_cls = real_cls + self._real = None + + def __call__(self, *args, **kwargs): + self._record.append({ + 'args': args, + 'kwargs': dict(kwargs), + 'thread': threading.get_ident(), + }) + self._real = self._real_cls(*args, **kwargs) + return self._real + + +def _build_tiled_tiff(tile_size: int, tiles_across: int, tiles_down: int) -> bytes: + """Build a tiled TIFF with deterministic pixel content.""" + width = tile_size * tiles_across + height = tile_size * tiles_down + pixel_data = np.arange(width * height, dtype=np.float32).reshape(height, width) + return make_minimal_tiff( + width, height, np.dtype('float32'), + pixel_data=pixel_data, + tiled=True, + tile_size=tile_size, + ) + + +def _decode(data: bytes) -> np.ndarray: + header = parse_header(data) + ifds = parse_all_ifds(data, header) + ifd = ifds[0] + dtype = tiff_dtype_to_numpy(ifd.bits_per_sample, ifd.sample_format) + return _read_tiles(data, ifd, header, dtype) + + +def test_parallel_engages_at_default_tile_size_256(monkeypatch): + """At tile_size=256 with multiple tiles, parallel decode must run. + + 256 * 256 == 64 * 1024 exactly, so this is the boundary case the + old strict ``>`` excluded. + """ + record: list[dict] = [] + real_cls = concurrent.futures.ThreadPoolExecutor + monkeypatch.setattr( + concurrent.futures, + 'ThreadPoolExecutor', + _PoolSpy(record, real_cls), + ) + + # 2x2 tile grid at the default tile_size=256 -> 4 tiles, each 64K pixels. + data = _build_tiled_tiff(tile_size=256, tiles_across=2, tiles_down=2) + arr = _decode(data) + + # Output is correct. + assert arr.shape == (512, 512) + assert arr.dtype == np.float32 + assert arr[0, 0] == 0.0 + assert arr[-1, -1] == float(512 * 512 - 1) + + # Most importantly, the parallel path engaged. + assert len(record) == 1, ( + f"Expected ThreadPoolExecutor to be constructed exactly once, " + f"got {len(record)} (parallel path did not run for default tile_size)" + ) + + +def test_sequential_for_small_tiles(monkeypatch): + """At tile_size=128 (16K pixels), parallel must NOT run. + + The threshold is meant to avoid pool overhead for small tiles, and + 128*128 = 16384 < 64*1024. This guards against an over-eager fix. + """ + record: list[dict] = [] + real_cls = concurrent.futures.ThreadPoolExecutor + monkeypatch.setattr( + concurrent.futures, + 'ThreadPoolExecutor', + _PoolSpy(record, real_cls), + ) + + data = _build_tiled_tiff(tile_size=128, tiles_across=4, tiles_down=4) + arr = _decode(data) + + assert arr.shape == (512, 512) + assert len(record) == 0, ( + f"Expected sequential decode below threshold, but " + f"ThreadPoolExecutor was constructed {len(record)} time(s)" + ) + + +def test_sequential_when_only_one_tile(monkeypatch): + """A single tile must stay on the sequential path even at large size.""" + record: list[dict] = [] + real_cls = concurrent.futures.ThreadPoolExecutor + monkeypatch.setattr( + concurrent.futures, + 'ThreadPoolExecutor', + _PoolSpy(record, real_cls), + ) + + data = _build_tiled_tiff(tile_size=256, tiles_across=1, tiles_down=1) + arr = _decode(data) + + assert arr.shape == (256, 256) + assert len(record) == 0, ( + f"Single-tile reads must stay sequential, but the pool was " + f"constructed {len(record)} time(s)" + )