Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

40 changes: 40 additions & 0 deletions docs/reference/sql/rs_frompath.qmd
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
---
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

title: RS_FromPath
description: Creates an out-of-database raster from a raster file path.
kernels:
- returns: raster
args:
- name: path
type: string
---

## Description

Loads raster metadata from the file at `path` and returns a raster whose bands
reference the source file as out-db bands.

This is useful when you want to work with rasters stored on disk without copying
their pixel data into the raster value itself.

## Examples

```sql
SELECT RS_BandPath(RS_FromPath('../../../submodules/sedona-testing/data/raster/test4.tiff'));
```
9 changes: 9 additions & 0 deletions rust/sedona-raster-gdal/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,13 @@ result_large_err = "allow"
[dependencies]
arrow-array = { workspace = true }
arrow-buffer = { workspace = true }
arrow-schema = { workspace = true }
datafusion-common = { workspace = true }
datafusion-expr = { workspace = true }
lru = { workspace = true }
sedona-common = { workspace = true }
sedona-expr = { workspace = true }
sedona-functions = { workspace = true }
sedona-gdal = { workspace = true }
sedona-raster = { workspace = true }
sedona-schema = { workspace = true }
Expand All @@ -46,3 +50,8 @@ sedona-gdal = { workspace = true, features = ["gdal-sys"] }
sedona-testing = { workspace = true }
tempfile = { workspace = true }
tokio = { workspace = true, features = ["rt-multi-thread"] }

[[bench]]
harness = false
name = "rs_frompath"
path = "benches/rs_frompath.rs"
93 changes: 93 additions & 0 deletions rust/sedona-raster-gdal/benches/rs_frompath.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

//! Benchmarks for RS_FromPath UDF.
//!
//! RS_FromPath creates out-db rasters from file paths, so these benchmarks use
//! raster fixtures from the `sedona-testing` test module rather than synthetic input.

use std::{hint::black_box, sync::Arc};

use arrow_array::{ArrayRef, StringArray};
use arrow_schema::DataType;
use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion, Throughput};
use datafusion_expr::ScalarUDF;
use sedona_schema::datatypes::SedonaType;
use sedona_testing::{data::test_raster, testers::ScalarUdfTester};

const SMALL_RASTER_FIXTURES: &[&str] = &[
"test1.tiff",
"test2.tif",
"test3.tif",
"test4.tiff",
"test5.tiff",
];

fn raster_path_array(names: &[&str], rows: usize) -> ArrayRef {
assert!(
!names.is_empty(),
"benchmark fixture list must not be empty"
);

let paths = names
.iter()
.map(|name| test_raster(name).unwrap())
.collect::<Vec<_>>();

let values = (0..rows)
.map(|index| paths[index % paths.len()].as_str())
.collect::<Vec<_>>();

Arc::new(StringArray::from(values))
}

fn bench_rs_frompath(c: &mut Criterion) {
let udf: ScalarUDF = sedona_raster_gdal::rs_frompath_udf().into();
let tester = ScalarUdfTester::new(udf, vec![SedonaType::Arrow(DataType::Utf8)]);

let single_small = raster_path_array(&["test4.tiff"], 1);
let mixed_small = raster_path_array(SMALL_RASTER_FIXTURES, SMALL_RASTER_FIXTURES.len());
let batched_small = raster_path_array(SMALL_RASTER_FIXTURES, 256);

let mut group = c.benchmark_group("rs_frompath");

group.throughput(Throughput::Elements(single_small.len() as u64));
group.bench_with_input(
BenchmarkId::new("fixtures", "single_small"),
&single_small,
|b, input| b.iter(|| black_box(tester.invoke_array(input.clone()).unwrap())),
);

group.throughput(Throughput::Elements(mixed_small.len() as u64));
group.bench_with_input(
BenchmarkId::new("fixtures", "mixed_small"),
&mixed_small,
|b, input| b.iter(|| black_box(tester.invoke_array(input.clone()).unwrap())),
);

group.throughput(Throughput::Elements(batched_small.len() as u64));
group.bench_with_input(
BenchmarkId::new("fixtures", "batched_small"),
&batched_small,
|b, input| b.iter(|| black_box(tester.invoke_array(input.clone()).unwrap())),
);

group.finish();
}

criterion_group!(benches, bench_rs_frompath);
criterion_main!(benches);
77 changes: 50 additions & 27 deletions rust/sedona-raster-gdal/src/gdal_common.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ use sedona_gdal::gdal::Gdal;
use sedona_gdal::gdal_dyn_bindgen::{GDAL_OF_RASTER, GDAL_OF_READONLY, GDAL_OF_VERBOSE_ERROR};
use sedona_gdal::geo_transform::GeoTransform;
use sedona_gdal::mem::MemDatasetBuilder;
use sedona_gdal::raster::rasterband::RasterBand;
use sedona_gdal::raster::types::DatasetOptions;
use sedona_gdal::raster::types::GdalDataType;

Expand Down Expand Up @@ -268,37 +269,11 @@ pub unsafe fn raster_ref_to_gdal_mem<R: RasterRef + ?Sized>(
.band(src_band_index)
.map_err(|e| arrow_datafusion_err!(e))?;
let band_metadata = band.metadata();
let band_type = band_metadata.data_type()?;
if let Some(nodata_bytes) = band_metadata.nodata_value() {
let raster_band = dataset
.rasterband(dst_band_index)
.map_err(convert_gdal_err)?;
match band_type {
BandDataType::UInt64 => {
let nodata_bytes: [u8; 8] = nodata_bytes.try_into().map_err(|_| {
exec_datafusion_err!("Invalid nodata byte length for UInt64")
})?;
let nodata = u64::from_le_bytes(nodata_bytes);
raster_band
.set_no_data_value_u64(Some(nodata))
.map_err(convert_gdal_err)?;
}
BandDataType::Int64 => {
let nodata_bytes: [u8; 8] = nodata_bytes.try_into().map_err(|_| {
exec_datafusion_err!("Invalid nodata byte length for Int64")
})?;
let nodata = i64::from_le_bytes(nodata_bytes);
raster_band
.set_no_data_value_i64(Some(nodata))
.map_err(convert_gdal_err)?;
}
_ => {
let nodata = bytes_to_f64(nodata_bytes, &band_type)?;
raster_band
.set_no_data_value(Some(nodata))
.map_err(convert_gdal_err)?;
}
}
set_band_nodata_from_bytes(&raster_band, Some(nodata_bytes))?;
}
}

Expand All @@ -320,6 +295,54 @@ pub fn nodata_bytes_to_f64(nodata_bytes: Option<&[u8]>, band_type: &BandDataType
bytes_to_f64(bytes, band_type).ok()
}

/// Read a GDAL band's nodata value into a byte vector using the band's native type.
pub fn band_nodata_to_bytes(band: &RasterBand<'_>) -> Result<Option<Vec<u8>>> {
let band_type = gdal_to_band_data_type(band.band_type())?;

Ok(match band_type {
BandDataType::UInt64 => band
.no_data_value_u64()
.map(|nodata| nodata.to_le_bytes().to_vec()),
BandDataType::Int64 => band
.no_data_value_i64()
.map(|nodata| nodata.to_le_bytes().to_vec()),
_ => band
.no_data_value()
.map(|nodata| nodata_f64_to_bytes(nodata, &band_type)),
})
}
Comment thread
Kontinuation marked this conversation as resolved.

/// Set a GDAL band's nodata value from stored bytes using the band's native type.
pub fn set_band_nodata_from_bytes(
band: &RasterBand<'_>,
nodata_bytes: Option<&[u8]>,
) -> Result<()> {
let band_type = gdal_to_band_data_type(band.band_type())?;

match (nodata_bytes, band_type) {
(Some(bytes), BandDataType::UInt64) => {
let bytes: [u8; 8] = bytes
.try_into()
.map_err(|_| exec_datafusion_err!("Invalid nodata byte length for UInt64"))?;
band.set_no_data_value_u64(Some(u64::from_le_bytes(bytes)))
.map_err(convert_gdal_err)
}
(Some(bytes), BandDataType::Int64) => {
let bytes: [u8; 8] = bytes
.try_into()
.map_err(|_| exec_datafusion_err!("Invalid nodata byte length for Int64"))?;
band.set_no_data_value_i64(Some(i64::from_le_bytes(bytes)))
.map_err(convert_gdal_err)
}
(Some(bytes), band_type) => band
.set_no_data_value(Some(bytes_to_f64(bytes, &band_type)?))
.map_err(convert_gdal_err),
(None, BandDataType::UInt64) => band.set_no_data_value_u64(None).map_err(convert_gdal_err),
(None, BandDataType::Int64) => band.set_no_data_value_i64(None).map_err(convert_gdal_err),
(None, _) => band.set_no_data_value(None).map_err(convert_gdal_err),
}
}

/// Convert a f64 nodata value into a byte vector appropriate for the given band type.
pub fn nodata_f64_to_bytes(nodata: f64, band_type: &BandDataType) -> Vec<u8> {
match band_type {
Expand Down
30 changes: 3 additions & 27 deletions rust/sedona-raster-gdal/src/gdal_dataset_provider.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,8 @@ use sedona_schema::raster::{BandDataType, StorageType};

use crate::gdal_common::{
band_data_type_to_gdal, bytes_to_f64, convert_gdal_err, normalize_outdb_source_path,
open_gdal_dataset, raster_ref_to_gdal_empty, raster_ref_to_gdal_mem, ToGdalGeoTransform,
open_gdal_dataset, raster_ref_to_gdal_empty, raster_ref_to_gdal_mem,
set_band_nodata_from_bytes, ToGdalGeoTransform,
};

/// A GDAL dataset constructed from a `RasterRef`.
Expand Down Expand Up @@ -281,32 +282,7 @@ impl GDALDatasetCache {
let vrt_band = vrt.rasterband(i).map_err(convert_gdal_err)?;

if let Some(nodata_bytes) = band_metadata.nodata_value() {
match band_type {
BandDataType::UInt64 => {
let nodata_bytes: [u8; 8] = nodata_bytes.try_into().map_err(|_| {
exec_datafusion_err!("Invalid nodata byte length for UInt64")
})?;
let nodata = u64::from_le_bytes(nodata_bytes);
vrt_band
.set_no_data_value_u64(Some(nodata))
.map_err(convert_gdal_err)?;
}
BandDataType::Int64 => {
let nodata_bytes: [u8; 8] = nodata_bytes.try_into().map_err(|_| {
exec_datafusion_err!("Invalid nodata byte length for Int64")
})?;
let nodata = i64::from_le_bytes(nodata_bytes);
vrt_band
.set_no_data_value_i64(Some(nodata))
.map_err(convert_gdal_err)?;
}
_ => {
let nodata = bytes_to_f64(nodata_bytes, &band_type)?;
vrt_band
.set_no_data_value(nodata)
.map_err(convert_gdal_err)?;
}
}
set_band_nodata_from_bytes(&vrt_band, Some(nodata_bytes))?;
}

match band_metadata.storage_type()? {
Expand Down
8 changes: 5 additions & 3 deletions rust/sedona-raster-gdal/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,14 @@
//! - GDAL datatype and nodata conversion helpers
//! - path normalization for GDAL VSI-backed raster sources

// Temporary until https://github.com/apache/sedona-db/issues/804 is resolved.
#[allow(dead_code)]
pub mod register;

mod gdal_common;
// Temporary until https://github.com/apache/sedona-db/issues/804 is resolved.
#[allow(dead_code)]
mod gdal_dataset_provider;

mod rs_frompath;
mod utils;

#[cfg(test)]
Expand All @@ -42,4 +43,5 @@ pub use gdal_common::{
band_data_type_to_gdal, bytes_to_f64, gdal_to_band_data_type, gdal_type_byte_size,
nodata_bytes_to_f64, nodata_f64_to_bytes,
};
pub use utils::{append_as_indb_raster, dataset_to_indb_raster};
pub use rs_frompath::rs_frompath_udf;
pub use utils::{append_as_indb_raster, append_as_outdb_raster, dataset_to_indb_raster};
25 changes: 25 additions & 0 deletions rust/sedona-raster-gdal/src/register.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

use sedona_expr::function_set::FunctionSet;

/// Export the set of GDAL-backed functions defined in this crate.
pub fn default_function_set() -> FunctionSet {
let mut function_set = FunctionSet::new();
function_set.insert_scalar_udf(crate::rs_frompath::rs_frompath_udf());
function_set
}
Loading
Loading