Skip to content

Commit 55f7acb

Browse files
committed
[ISSUE-153] Add blocking poll into python bindings
1 parent dab09b4 commit 55f7acb

2 files changed

Lines changed: 76 additions & 1 deletion

File tree

bindings/python/example/example.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -175,6 +175,27 @@ async def main():
175175

176176
# TODO: support to_duckdb()
177177

178+
# Test the new poll() method for incremental reading
179+
print("\n--- Testing poll() method ---")
180+
log_scanner.subscribe(None, None)
181+
182+
# Poll with a timeout of 5000ms (5 seconds)
183+
# Note: poll() returns an empty table (not an error) on timeout
184+
try:
185+
poll_result = log_scanner.poll(5000)
186+
print(f"Number of rows: {poll_result.num_rows}")
187+
188+
if poll_result.num_rows > 0:
189+
poll_df = poll_result.to_pandas()
190+
print(f"Polled data:\n{poll_df}")
191+
else:
192+
print("Empty result (no records available)")
193+
# Empty table still has schema
194+
print(f"Schema: {poll_result.schema}")
195+
196+
except Exception as e:
197+
print(f"Error during poll: {e}")
198+
178199
except Exception as e:
179200
print(f"Error during scanning: {e}")
180201

bindings/python/src/table.rs

Lines changed: 55 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,9 @@
1818
use crate::TOKIO_RUNTIME;
1919
use crate::*;
2020
use arrow::array::RecordBatch;
21-
use arrow_pyarrow::FromPyArrow;
21+
use arrow_pyarrow::{FromPyArrow, ToPyArrow};
2222
use fluss::client::EARLIEST_OFFSET;
23+
use fluss::record::to_arrow_schema;
2324
use fluss::rpc::message::OffsetSpec;
2425
use pyo3_async_runtimes::tokio::future_into_py;
2526
use std::sync::Arc;
@@ -321,6 +322,59 @@ impl LogScanner {
321322
Ok(df)
322323
}
323324

325+
/// Poll for new records with the specified timeout
326+
///
327+
/// Args:
328+
/// timeout_ms: Timeout in milliseconds to wait for records
329+
///
330+
/// Returns:
331+
/// PyArrow Table containing the polled records
332+
///
333+
/// Note:
334+
/// - Returns an empty table (with correct schema) if no records are available
335+
/// - When timeout expires, returns an empty table (NOT an error)
336+
fn poll(&self, py: Python, timeout_ms: i64) -> PyResult<Py<PyAny>> {
337+
use std::time::Duration;
338+
339+
if timeout_ms < 0 {
340+
return Err(FlussError::new_err(format!(
341+
"timeout_ms must be non-negative, got: {timeout_ms}"
342+
)));
343+
}
344+
345+
let timeout = Duration::from_millis(timeout_ms as u64);
346+
let scan_records = py
347+
.detach(|| TOKIO_RUNTIME.block_on(async { self.inner.poll(timeout).await }))
348+
.map_err(|e| FlussError::new_err(e.to_string()))?;
349+
350+
// Convert records to Arrow batches per bucket
351+
let mut arrow_batches = Vec::new();
352+
for (_bucket, records) in scan_records.into_records_by_buckets() {
353+
let mut batches = Utils::convert_scan_records_to_arrow(records);
354+
arrow_batches.append(&mut batches);
355+
}
356+
if arrow_batches.is_empty() {
357+
return self.create_empty_table(py);
358+
}
359+
360+
Utils::combine_batches_to_table(py, arrow_batches)
361+
}
362+
363+
/// Create an empty PyArrow table with the correct schema
364+
fn create_empty_table(&self, py: Python) -> PyResult<Py<PyAny>> {
365+
let arrow_schema = to_arrow_schema(self.table_info.get_row_type());
366+
let py_schema = arrow_schema
367+
.to_pyarrow(py)
368+
.map_err(|e| FlussError::new_err(format!("Failed to convert schema: {e}")))?;
369+
370+
let pyarrow = py.import("pyarrow")?;
371+
let empty_table = pyarrow
372+
.getattr("Table")?
373+
.call_method1("from_batches", (vec![] as Vec<Py<PyAny>>, py_schema))?;
374+
375+
Ok(empty_table.into())
376+
}
377+
324378
fn __repr__(&self) -> String {
325379
format!("LogScanner(table={})", self.table_info.table_path)
326380
}

0 commit comments

Comments
 (0)